llc barfing

I was playing with optimization switches to llc to see how fast I could get it
to compile but it keeps barfing. Tinkering indicates that llc barfs
particularly when -tailcallopt is given in combination with other flags. For
example, without -tailcallopt works in a couple of ways:

$ llc -O0 -f aout.bc -o aout.s

$ llc -O0 --regalloc=local -f aout.bc -o aout.s

But fails with -tailcallopt with different error messages in each case:

$ llc -O0 -tailcallopt -f aout.bc -o aout.s
llc: LiveVariables.cpp:125: void
llvm::LiveVariables::HandleVirtRegUse(unsigned int, llvm::MachineBasicBlock*,
llvm::MachineInstr*): Assertion `MRI->getVRegDef(reg) && "Register use before
def!"' failed.
0 llc 0x08b8efe8
Stack dump:
0. Program arguments: llc -O0 -tailcallopt -f aout.bc -o aout.s
1. Running pass 'Live Variable Analysis' on
function '@"visit_array_aux<`Reference>"'
Aborted

$ llc -tailcallopt -O0 --regalloc=local -f aout.bc -o aout.s
llc: RegAllocLocal.cpp:855:
void<unnamed>::RALocal::AllocateBasicBlock(llvm::MachineBasicBlock&):
Assertion `(!PhysRegsUsed[PhysReg] || PhysRegsUsed[PhysReg] == -1)
&& "Silently clearing a virtual register?"' failed.
0 llc 0x08b8efe8
Stack dump:
0. Program arguments: llc -tailcallopt -O0 --regalloc=local -f aout.bc -o
aout.s
1. Running pass 'Local Register Allocator' on
function '@"fill<`Reference>"'
Aborted

Are these barfs due to bugs in my generated code or is this parts of LLVM not
honoring tail calls?

Here's the IR of my relevant functions:

define fastcc i8* @"visit_array_aux<`Reference>"(%0, i32) {
entry:
  %2 = load i32* @shadow_stack_depth ; <i32> [#uses=0]
  br label %start

start: ; preds = %entry
  %3 = extractvalue %0 %0, 1 ; <i32> [#uses=1]
  %4 = icmp eq i32 %1, %3 ; <i1> [#uses=1]
  br i1 %4, label %pass, label %fail

fail: ; preds = %start
  %5 = icmp sge i32 %1, 0 ; <i1> [#uses=1]
  br i1 %5, label %pass2, label %fail1

pass: ; preds = %start
  ret i8* undef

fail1: ; preds = %fail
  br label %cont

pass2: ; preds = %fail
  %6 = extractvalue %0 %0, 1 ; <i32> [#uses=1]
  %7 = icmp slt i32 %1, %6 ; <i1> [#uses=1]
  br label %cont

cont: ; preds = %pass2, %fail1
  %8 = phi i1 [ %7, %pass2 ], [ false, %fail1 ] ; <i1> [#uses=1]
  br i1 %8, label %pass4, label %fail3

fail3: ; preds = %cont
  %9 = call i32 (i8*, ...)* @printf(i8* getelementptr ([27 x i8]* @buf10, i32
0, i32 0)) ; <i32> [#uses=0]
  call void @exit(i32 1)
  br label %cont5

pass4: ; preds = %cont
  br label %cont5

cont5: ; preds = %pass4, %fail3
  %10 = extractvalue %0 %0, 2 ; <i8*> [#uses=1]
  %11 = bitcast i8* %10 to %0* ; <%0*> [#uses=1]
  %12 = getelementptr %0* %11, i32 %1 ; <%0*> [#uses=1]
  %13 = load %0* %12 ; <%0> [#uses=2]
  %14 = extractvalue %0 %13, 2 ; <i8*> [#uses=1]
  %15 = ptrtoint i8* %14 to i32 ; <i32> [#uses=1]
  %16 = icmp eq i32 %15, 0 ; <i1> [#uses=1]
  br i1 %16, label %pass7, label %fail6

fail6: ; preds = %cont5
  %17 = load %0* @visit_stack ; <%0> [#uses=2]
  %18 = load i32* @n_visit ; <i32> [#uses=4]
  %19 = icmp sge i32 %18, 0 ; <i1> [#uses=1]
  br i1 %19, label %pass10, label %fail9

pass7: ; preds = %cont5
  br label %cont8

cont8: ; preds = %cont14, %pass7
  %20 = add i32 %1, 1 ; <i32> [#uses=1]
  %21 = tail call fastcc i8* @"visit_array_aux<`Reference>"(%0 %0, i32 %20) ;
<i8*> [#uses=1]
  ret i8* %21

fail9: ; preds = %fail6
  br label %cont11

pass10: ; preds = %fail6
  %22 = extractvalue %0 %17, 1 ; <i32> [#uses=1]
  %23 = icmp slt i32 %18, %22 ; <i1> [#uses=1]
  br label %cont11

cont11: ; preds = %pass10, %fail9
  %24 = phi i1 [ %23, %pass10 ], [ false, %fail9 ] ; <i1> [#uses=1]
  br i1 %24, label %pass13, label %fail12

fail12: ; preds = %cont11
  %25 = call i32 (i8*, ...)* @printf(i8* getelementptr ([27 x i8]* @buf10, i32
0, i32 0)) ; <i32> [#uses=0]
  call void @exit(i32 1)
  br label %cont14

pass13: ; preds = %cont11
  br label %cont14

cont14: ; preds = %pass13, %fail12
  %26 = extractvalue %0 %17, 2 ; <i8*> [#uses=1]
  %27 = bitcast i8* %26 to %0* ; <%0*> [#uses=1]
  %28 = getelementptr %0* %27, i32 %18 ; <%0*> [#uses=1]
  store %0 %13, %0* %28
  %29 = add i32 %18, 1 ; <i32> [#uses=1]
  store i32 %29, i32* @n_visit
  br label %cont8
}

define fastcc i8* @"fill<`Reference>"(%0, %0, i32) {
entry:
  %3 = load i32* @shadow_stack_depth ; <i32> [#uses=0]
  br label %start

start: ; preds = %entry
  %4 = extractvalue %0 %0, 1 ; <i32> [#uses=1]
  %5 = icmp slt i32 %2, %4 ; <i1> [#uses=1]
  br i1 %5, label %pass, label %fail

fail: ; preds = %start
  ret i8* undef

pass: ; preds = %start
  %6 = icmp sge i32 %2, 0 ; <i1> [#uses=1]
  br i1 %6, label %pass2, label %fail1

fail1: ; preds = %pass
  br label %cont

pass2: ; preds = %pass
  %7 = extractvalue %0 %0, 1 ; <i32> [#uses=1]
  %8 = icmp slt i32 %2, %7 ; <i1> [#uses=1]
  br label %cont

cont: ; preds = %pass2, %fail1
  %9 = phi i1 [ %8, %pass2 ], [ false, %fail1 ] ; <i1> [#uses=1]
  br i1 %9, label %pass4, label %fail3

fail3: ; preds = %cont
  %10 = call i32 (i8*, ...)* @printf(i8* getelementptr ([27 x i8]* @buf10, i32
0, i32 0)) ; <i32> [#uses=0]
  call void @exit(i32 1)
  br label %cont5

pass4: ; preds = %cont
  br label %cont5

cont5: ; preds = %pass4, %fail3
  %11 = extractvalue %0 %0, 2 ; <i8*> [#uses=1]
  %12 = bitcast i8* %11 to %0* ; <%0*> [#uses=1]
  %13 = getelementptr %0* %12, i32 %2 ; <%0*> [#uses=1]
  store %0 %1, %0* %13
  %14 = add i32 %2, 1 ; <i32> [#uses=1]
  %15 = tail call fastcc i8* @"fill<`Reference>"(%0 %0, %0 %1, i32 %14) ;
<i8*> [#uses=1]
  ret i8* %15
}

It was a bug in LLVM, with emitting tail calls at -O0. It's now
fixed, as of r89444.

Dan

Awesome, thanks. What about this one where the simple register allocator seems
to have an internal error:

$ llc --regalloc=simple -f aout.bc -o aout.s
llc: X86FloatingPoint.cpp:963:
void<unnamed>::FPS::handleSpecialFP(llvm::ilist_iterator<llvm::MachineInstr>&):
Assertion `StackTop == 0 && "Stack should be empty after a call!"' failed.
0 llc 0x08b8efe8
Stack dump:
0. Program arguments: llc --regalloc=simple -f aout.bc -o aout.s
1. Running pass 'X86 FP Stackifier' on function '@gc'
Aborted

regalloc=simple was removed in trunk because it was unmaintained.

Dan