Virtual register problem in X86 backend

Hi,

I'm having trouble using virtual register in the X86 backend.

I implemented a new intrinsic and I use a custom inserter. The goal of
the intrinsic is to set the content of the stack to zero at the end of
each function.

Here is my code:

MachineBasicBlock *
X86TargetLowering::EmitBURNSTACKWithCustomInserter(
                     MachineInstr *MI,
                     MachineBasicBlock *MBB) const {
    DebugLoc db = MI->getDebugLoc();
    const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
    const BasicBlock *LLVM_BB = MBB->getBasicBlock();
    MachineFunction *F = MBB->getParent();

    // Create all the basicblocks
    MachineBasicBlock *MBB_cond = F->CreateMachineBasicBlock(LLVM_BB);
    MachineBasicBlock *MBB_erase = F->CreateMachineBasicBlock(LLVM_BB);
    MachineBasicBlock *MBB_end = F->CreateMachineBasicBlock(LLVM_BB);

    // Insert the new basicblocks
    F->insert(MBB, MBB_cond);
    F->insert(MBB, MBB_erase);
    F->insert(MBB, MBB_end);

    // Split the last MBB in two
    MBB_end->splice(MBB_end->begin(), MBB,
next(MachineBasicBlock::iterator(MI)), MBB->end());
    MBB_end->transferSuccessorsAndUpdatePHIs(MBB);

    // Move MBB at the right place
    MBB_end->moveAfter(MBB);
    MBB_erase->moveAfter(MBB);
    MBB_cond->moveAfter(MBB);

    // Set the new successors
    MBB->addSuccessor(MBB_cond);
    MBB_cond->addSuccessor(MBB_erase);
    MBB_cond->addSuccessor(MBB_end);
    MBB_erase->addSuccessor(MBB_cond);
    MBB_erase->addSuccessor(MBB_end);

    MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
    const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
    unsigned regA = MRI.createVirtualRegister(AddrRegClass);
    unsigned regB = MRI.createVirtualRegister(AddrRegClass);
    unsigned regC = MRI.createVirtualRegister(AddrRegClass);

    // Set the indice
    BuildMI(*MBB, MI, db,
TII->get(X86::MOV64rr)).addReg(regA).addReg(X86::RSP);

    // Check condition
    BuildMI(*MBB_cond, MBB_cond->end(), db, TII->get(X86::PHI),
regB).addReg(regA).addMBB(MBB).addReg(regC).addMBB(MBB_erase);
    BuildMI(*MBB_cond, MBB_cond->end(), db,
TII->get(X86::CMP64rr)).addReg(regB).addReg(X86::RBP);
    BuildMI(*MBB_cond, MBB_cond->end(), db,
TII->get(X86::JE_4)).addMBB(MBB_end);

    // mov dword[reg], 0x0
    BuildMI(*MBB_erase, MBB_erase->end(), db,
TII->get(X86::MOV32mi)).addReg(regB).addImm(1).addReg(0).addImm(0).addReg(0).addImm(0);
   BuildMI(*MBB_erase, MBB_erase->end(), db, TII->get(X86::ADD64ri32),
regC).addReg(regB).addImm(8);
    BuildMI(*MBB_cond, MBB_erase->end(), db,
TII->get(X86::JMP_4)).addMBB(MBB_cond);

    // Erase intrinsic
    MI->eraseFromParent();
    MBB->getParent()->dump();
    return MBB_erase;
}

I run it on this sample code:

#include <stdio.h>

int main(int argc, char **argv) {
  printf("MAIN\n");
  return 0;
}

And it generate this X86 assembly:

/ (fcn) sym.main 115

         0x004004f0 b 55 push rbp
         0x004004f1 4889e5 mov rbp, rsp
         0x004004f4 4883ec30 sub rsp, 0x30
         0x004004f8 48b8f405400. mov rax, str.MAIN ; 0x004005f4
         0x00400502 c745fc00000. mov dword [rbp-0x4], 0x0
         0x00400509 897df8 mov [rbp-0x8], edi
         0x0040050c 488975f0 mov [rbp-0x10], rsi
         0x00400510 4889c7 mov rdi, rax
         0x00400513 b000 mov al, 0x0
         0x00400515 e8a6feffff call sym.imp.printf
            sym.imp.printf(unk)
         0x0040051a b900000000 mov ecx, 0x0
         0x0040051f 488b75e8 mov rsi, [rbp-0x18]
         0x00400523 4889e6 mov rsi, rsp
         0x00400526 8945e4 mov [rbp-0x1c], eax
         0x00400529 894de0 mov [rbp-0x20], ecx
         0x0040052c 48897dd8 mov [rbp-0x28], rdi
    . ; CODE (CALL) XREF from 0x00400555 (fcn.004004bc)
- loc.00400530 51
    .--> 0x00400530 488b45d8 mov rax, [rbp-0x28]
    > 0x00400534 4839e8 cmp rax, rbp
    > 0x00400537 488945d0 mov [rbp-0x30], rax
    >,=< 0x0040053b 0f8419000000 je 0x40055a
    >> 0x00400541 488b45d0 mov rax, [rbp-0x30]
    >> 0x00400545 c70000000000 mov dword [rax], 0x0
    >> 0x0040054b 480508000000 add rax, 0x8
    >> 0x00400551 488945d8 mov [rbp-0x28], rax
    > ; CODE (CALL) XREF from 0x00400530 (fcn.004004bc)
    `==< 0x00400555 e9d6ffffff jmp loc.00400530
     `-> 0x0040055a 8b45e0 mov eax, [rbp-0x20]
         0x0040055d 4883c430 add rsp, 0x30
         0x00400561 5d pop rbp

\ 0x00400562 c3 ret

As we can see, it moves RSP in RSI, but then, generates the rest of the
code with RAX, so it fails.

Am I missing something?

Cheers

Hi Julien,

I have to admit that the way you build the virtual registers looks correct to me.

Could you attach the machine IR right before and after the insertion as well as the final assembly (i.e., not just the binary), to see if I can help you further.

Thanks,
-Quentin

Hi,

Thx for your help...

Here is the IR code:

; ModuleID = 'foo_bar.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [6 x i8] c"MAIN\0A\00", align 1

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %retval = alloca i32, align 4
  %argc.addr = alloca i32, align 4
  %argv.addr = alloca i8**, align 8
  store i32 0, i32* %retval
  store i32 %argc, i32* %argc.addr, align 4
  store i8** %argv, i8*** %argv.addr, align 8
  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x
i8]* @.str, i32 0, i32 0))
  call void @llvm.burnstack()
  ret i32 0
}

declare i32 @printf(i8*, ...) #1

declare void @llvm.va_start(i8*)

declare void @llvm.va_end(i8*)

; Function Attrs: nounwind
declare void @llvm.burnstack() #2

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.ident = !{!0}

!0 = metadata !{metadata !"clang version 3.5.0 (tags/RELEASE_350/final) ()"}

Here is the assembly generated by llc (I'm not sure that's what you
meant by 'final assembly'):

        .text
        .file "foo_bar.ll"
        .globl main
        .align 16, 0x90
        .type main,@function
main: # @main
        .cfi_startproc
# BB#0: # %entry
        pushq %rbp
.Ltmp0:
        .cfi_def_cfa_offset 16
.Ltmp1:
        .cfi_offset %rbp, -16
        movq %rsp, %rbp
.Ltmp2:
        .cfi_def_cfa_register %rbp
        subq $16, %rsp
        movl $0, -4(%rbp)
        movl %edi, -8(%rbp)
        movq %rsi, -16(%rbp)
        movl $.L.str, %edi
        xorl %eax, %eax
        callq printf
        callq llvm.burnstack
        xorl %eax, %eax
        addq $16, %rsp
        popq %rbp
        retq
.Ltmp3:
        .size main, .Ltmp3-main
        .cfi_endproc

        .type .L.str,@object # @.str
        .section .rodata.str1.1,"aMS",@progbits,1
.L.str:
        .asciz "MAIN\n"
        .size .L.str, 6

        .ident "clang version 3.5.0 (tags/RELEASE_350/final) ()"
        .section ".note.GNU-stack","",@progbits

And here is the machine code in X86IselLowering.cpp generated by my
custom inserter:

# Machine code for function main: SSA
Frame Objects:
  fi#0: size=4, align=4, at location [SP+8]
  fi#1: size=4, align=4, at location [SP+8]
  fi#2: size=8, align=8, at location [SP+8]
Function Live Ins: %EDI in %vreg0, %RSI in %vreg2

BB#0: derived from LLVM BB %entry
    Live Ins: %EDI %RSI
        %vreg2<def> = COPY %RSI; GR64:%vreg2
        %vreg0<def> = COPY %EDI; GR32:%vreg0
        %vreg1<def> = COPY %vreg0<kill>; GR32:%vreg1,%vreg0
        %vreg3<def> = COPY %vreg2<kill>; GR64:%vreg3,%vreg2
        %vreg5<def> = MOV64ri <ga:@.str>; GR64:%vreg5
        MOV32mi <fi#0>, 1, %noreg, 0, %noreg, 0; mem:ST4[%retval]
        MOV32mr <fi#1>, 1, %noreg, 0, %noreg, %vreg1;
mem:ST4[%argc.addr] GR32:%vreg1
        MOV64mr <fi#2>, 1, %noreg, 0, %noreg, %vreg3;
mem:ST8[%argv.addr] GR64:%vreg3
        ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def>, %RSP<imp-use>
        %RDI<def> = COPY %vreg5; GR64:%vreg5
        %AL<def> = MOV8ri 0
        CALL64pcrel32 <ga:@printf>, <regmask>, %RSP<imp-use>,
%AL<imp-use>, %RDI<imp-use>, %EAX<imp-def>
        ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def>,
%RSP<imp-use>
        %vreg6<def> = COPY %EAX; GR32:%vreg6
        %vreg4<def> = MOV32ri 0; GR32:%vreg4
        MOV64rr %vreg7, %RSP; GR64:%vreg7
    Successors according to CFG: BB#1

BB#1: derived from LLVM BB %entry
    Predecessors according to CFG: BB#0 BB#2
        %vreg8<def> = PHI %vreg7, <BB#0>, %vreg9, <BB#2>;
GR64:%vreg8,%vreg7,%vreg9
        CMP64rr %vreg8, %RBP, %EFLAGS<imp-def>; GR64:%vreg8
        JE_4 <BB#3>, %EFLAGS<imp-use>
    Successors according to CFG: BB#2 BB#3

BB#2: derived from LLVM BB %entry
    Predecessors according to CFG: BB#1
        MOV32mi %vreg8, 1, %noreg, 0, %noreg, 0; GR64:%vreg8
        %vreg9<def,tied1> = ADD64ri32 %vreg8<tied0>, 8,
%EFLAGS<imp-def>; GR64:%vreg9,%vreg8
        JMP_4 <BB#1>
    Successors according to CFG: BB#1 BB#3

BB#3: derived from LLVM BB %entry
    Predecessors according to CFG: BB#1 BB#2
        %EAX<def> = COPY %vreg4; GR32:%vreg4
        RETQ %EAX<imp-use>

# End machine code for function main.

Cheers

Hi Julien,

Thanks for the input, I think I know what is going on.

Hi,

Thx for your help...

Here is the IR code:

; ModuleID = 'foo_bar.c'
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [6 x i8] c"MAIN\0A\00", align 1

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
%retval = alloca i32, align 4
%argc.addr = alloca i32, align 4
%argv.addr = alloca i8**, align 8
store i32 0, i32* %retval
store i32 %argc, i32* %argc.addr, align 4
store i8** %argv, i8*** %argv.addr, align 8
%call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([6 x
i8]* @.str, i32 0, i32 0))
call void @llvm.burnstack()
ret i32 0
}

declare i32 @printf(i8*, ...) #1

declare void @llvm.va_start(i8*)

declare void @llvm.va_end(i8*)

; Function Attrs: nounwind
declare void @llvm.burnstack() #2

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #1 = { "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"stack-protector-buffer-size"="8" "unsafe-fp-math"="false"
"use-soft-float"="false" }
attributes #2 = { nounwind }

!llvm.ident = !{!0}

!0 = metadata !{metadata !"clang version 3.5.0 (tags/RELEASE_350/final) ()"}

Here is the assembly generated by llc (I'm not sure that's what you
meant by 'final assembly'):

       .text
       .file "foo_bar.ll"
       .globl main
       .align 16, 0x90
       .type main,@function
main: # @main
       .cfi_startproc
# BB#0: # %entry
       pushq %rbp
.Ltmp0:
       .cfi_def_cfa_offset 16
.Ltmp1:
       .cfi_offset %rbp, -16
       movq %rsp, %rbp
.Ltmp2:
       .cfi_def_cfa_register %rbp
       subq $16, %rsp
       movl $0, -4(%rbp)
       movl %edi, -8(%rbp)
       movq %rsi, -16(%rbp)
       movl $.L.str, %edi
       xorl %eax, %eax
       callq printf
       callq llvm.burnstack
       xorl %eax, %eax
       addq $16, %rsp
       popq %rbp
       retq
.Ltmp3:
       .size main, .Ltmp3-main
       .cfi_endproc

       .type .L.str,@object # @.str
       .section .rodata.str1.1,"aMS",@progbits,1
.L.str:
       .asciz "MAIN\n"
       .size .L.str, 6

       .ident "clang version 3.5.0 (tags/RELEASE_350/final) ()"
       .section ".note.GNU-stack","",@progbits

And here is the machine code in X86IselLowering.cpp generated by my
custom inserter:

# Machine code for function main: SSA
Frame Objects:
fi#0: size=4, align=4, at location [SP+8]
fi#1: size=4, align=4, at location [SP+8]
fi#2: size=8, align=8, at location [SP+8]
Function Live Ins: %EDI in %vreg0, %RSI in %vreg2

BB#0: derived from LLVM BB %entry
   Live Ins: %EDI %RSI
       %vreg2<def> = COPY %RSI; GR64:%vreg2
       %vreg0<def> = COPY %EDI; GR32:%vreg0
       %vreg1<def> = COPY %vreg0<kill>; GR32:%vreg1,%vreg0
       %vreg3<def> = COPY %vreg2<kill>; GR64:%vreg3,%vreg2
       %vreg5<def> = MOV64ri <ga:@.str>; GR64:%vreg5
       MOV32mi <fi#0>, 1, %noreg, 0, %noreg, 0; mem:ST4[%retval]
       MOV32mr <fi#1>, 1, %noreg, 0, %noreg, %vreg1;
mem:ST4[%argc.addr] GR32:%vreg1
       MOV64mr <fi#2>, 1, %noreg, 0, %noreg, %vreg3;
mem:ST8[%argv.addr] GR64:%vreg3
       ADJCALLSTACKDOWN64 0, %RSP<imp-def>, %EFLAGS<imp-def>, %RSP<imp-use>
       %RDI<def> = COPY %vreg5; GR64:%vreg5
       %AL<def> = MOV8ri 0
       CALL64pcrel32 <ga:@printf>, <regmask>, %RSP<imp-use>,
%AL<imp-use>, %RDI<imp-use>, %EAX<imp-def>
       ADJCALLSTACKUP64 0, 0, %RSP<imp-def>, %EFLAGS<imp-def>,
%RSP<imp-use>
       %vreg6<def> = COPY %EAX; GR32:%vreg6
       %vreg4<def> = MOV32ri 0; GR32:%vreg4
       MOV64rr %vreg7, %RSP; GR64:%vreg7

Here vreg7 should be <def>. This is not the case, because you have a typo in your code (see below).

   Successors according to CFG: BB#1

BB#1: derived from LLVM BB %entry
   Predecessors according to CFG: BB#0 BB#2
       %vreg8<def> = PHI %vreg7, <BB#0>, %vreg9, <BB#2>;
GR64:%vreg8,%vreg7,%vreg9
       CMP64rr %vreg8, %RBP, %EFLAGS<imp-def>; GR64:%vreg8
       JE_4 <BB#3>, %EFLAGS<imp-use>
   Successors according to CFG: BB#2 BB#3

BB#2: derived from LLVM BB %entry
   Predecessors according to CFG: BB#1
       MOV32mi %vreg8, 1, %noreg, 0, %noreg, 0; GR64:%vreg8
       %vreg9<def,tied1> = ADD64ri32 %vreg8<tied0>, 8,
%EFLAGS<imp-def>; GR64:%vreg9,%vreg8
       JMP_4 <BB#1>
   Successors according to CFG: BB#1 BB#3

BB#3: derived from LLVM BB %entry
   Predecessors according to CFG: BB#1 BB#2
       %EAX<def> = COPY %vreg4; GR32:%vreg4
       RETQ %EAX<imp-use>

# End machine code for function main.

Cheers

Hi Julien,

I have to admit that the way you build the virtual registers looks correct to me.

Could you attach the machine IR right before and after the insertion as well as the final assembly (i.e., not just the binary), to see if I can help you further.

Thanks,
-Quentin

Hi,

I'm having trouble using virtual register in the X86 backend.

I implemented a new intrinsic and I use a custom inserter. The goal of
the intrinsic is to set the content of the stack to zero at the end of
each function.

Here is my code:

MachineBasicBlock *
X86TargetLowering::EmitBURNSTACKWithCustomInserter(
                   MachineInstr *MI,
                   MachineBasicBlock *MBB) const {
  DebugLoc db = MI->getDebugLoc();
  const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
  const BasicBlock *LLVM_BB = MBB->getBasicBlock();
  MachineFunction *F = MBB->getParent();

  // Create all the basicblocks
  MachineBasicBlock *MBB_cond = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *MBB_erase = F->CreateMachineBasicBlock(LLVM_BB);
  MachineBasicBlock *MBB_end = F->CreateMachineBasicBlock(LLVM_BB);

  // Insert the new basicblocks
  F->insert(MBB, MBB_cond);
  F->insert(MBB, MBB_erase);
  F->insert(MBB, MBB_end);

  // Split the last MBB in two
  MBB_end->splice(MBB_end->begin(), MBB,
next(MachineBasicBlock::iterator(MI)), MBB->end());
  MBB_end->transferSuccessorsAndUpdatePHIs(MBB);

  // Move MBB at the right place
  MBB_end->moveAfter(MBB);
  MBB_erase->moveAfter(MBB);
  MBB_cond->moveAfter(MBB);

  // Set the new successors
  MBB->addSuccessor(MBB_cond);
  MBB_cond->addSuccessor(MBB_erase);
  MBB_cond->addSuccessor(MBB_end);
  MBB_erase->addSuccessor(MBB_cond);
  MBB_erase->addSuccessor(MBB_end);

  MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo();
  const TargetRegisterClass *AddrRegClass = getRegClassFor(MVT::i64);
  unsigned regA = MRI.createVirtualRegister(AddrRegClass);
  unsigned regB = MRI.createVirtualRegister(AddrRegClass);
  unsigned regC = MRI.createVirtualRegister(AddrRegClass);

  // Set the indice
  BuildMI(*MBB, MI, db,
TII->get(X86::MOV64rr)).addReg(regA).addReg(X86::RSP);

Either, put regA as the last argument of BuildMI (like you did for the PHI for instance), or add the Define flag to the related regA.

Hopefully, that should fix your issue :).

Cheers,
-Quentin

Either, put regA as the last argument of BuildMI (like you did for the PHI for instance), or add the Define flag to the related regA.

Hopefully, that should fix your issue :).

It worked!

Thank you very much.

Cheers