The following function compiles with -O3 into the following IR. http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says
- Eliminates a basic block that only contains an unconditional branch.
but the first and third blocks in the compiled function only contain an unconditional branch; I would have expected them to be eliminated. What am I missing?
double f(double *a) {
for (int i = 0; i < 1000; i++)
a[i] *= 2;
for (int i = 0; i < 1000; i++)
a[i] *= 2;
return a[0] + a[1];
}
; Function Attrs: nounwind uwtable
define double @“\01?f@@YANPEAN@Z”(double* nocapture %a) #1 {
overflow.checked:
br label %vector.body, !dbg !18
vector.body: ; preds = %vector.body, %overflow.checked
%index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, %vector.body ], !dbg !18
%0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19
%1 = bitcast double* %0 to <2 x double>, !dbg !20
%wide.load = load <2 x double>, <2 x double> %1, align 8, !dbg !20
%2 = getelementptr double, double* %0, i64 2, !dbg !20
%3 = bitcast double* %2 to <2 x double>, !dbg !20
%wide.load8 = load <2 x double>, <2 x double> %3, align 8, !dbg !20
%4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double 2.000000e+00>, !dbg !20
%5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double 2.000000e+00>, !dbg !20
%6 = bitcast double* %0 to <2 x double>, !dbg !20
store <2 x double> %4, <2 x double> %6, align 8, !dbg !20
%7 = bitcast double* %2 to <2 x double>, !dbg !20
store <2 x double> %5, <2 x double> %7, align 8, !dbg !20
%index.next = or i64 %index, 4, !dbg !18
%8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg !19
%9 = bitcast double* %8 to <2 x double>, !dbg !20
%wide.load.1 = load <2 x double>, <2 x double> %9, align 8, !dbg !20
%10 = getelementptr double, double* %8, i64 2, !dbg !20
%11 = bitcast double* %10 to <2 x double>, !dbg !20
%wide.load8.1 = load <2 x double>, <2 x double> %11, align 8, !dbg !20
%12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !20
%13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !20
%14 = bitcast double* %8 to <2 x double>, !dbg !20
store <2 x double> %12, <2 x double> %14, align 8, !dbg !20
%15 = bitcast double* %10 to <2 x double>, !dbg !20
store <2 x double> %13, <2 x double> %15, align 8, !dbg !20
%index.next.1 = add nsw i64 %index, 8, !dbg !18
%16 = icmp eq i64 %index.next.1, 1000, !dbg !18
br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg !18, !llvm.loop !21
vector.body10.preheader: ; preds = %vector.body
br label %vector.body10, !dbg !24
vector.body10: ; preds = %vector.body10, %vector.body10.preheader
%index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, %vector.body10 ], !dbg !25
%17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24
%18 = bitcast double* %17 to <2 x double>, !dbg !26
%wide.load26 = load <2 x double>, <2 x double> %18, align 8, !dbg !26
%19 = getelementptr double, double* %17, i64 2, !dbg !26
%20 = bitcast double* %19 to <2 x double>, !dbg !26
%wide.load27 = load <2 x double>, <2 x double> %20, align 8, !dbg !26
%21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double 2.000000e+00>, !dbg !26
%22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double 2.000000e+00>, !dbg !26
%23 = bitcast double* %17 to <2 x double>, !dbg !26
store <2 x double> %21, <2 x double> %23, align 8, !dbg !26
%24 = bitcast double* %19 to <2 x double>, !dbg !26
store <2 x double> %22, <2 x double> %24, align 8, !dbg !26
%index.next21 = or i64 %index13, 4, !dbg !25
%25 = getelementptr inbounds double, double* %a, i64 %index.next21, !dbg !24
%26 = bitcast double* %25 to <2 x double>, !dbg !26
%wide.load26.1 = load <2 x double>, <2 x double> %26, align 8, !dbg !26
%27 = getelementptr double, double* %25, i64 2, !dbg !26
%28 = bitcast double* %27 to <2 x double>, !dbg !26
%wide.load27.1 = load <2 x double>, <2 x double> %28, align 8, !dbg !26
%29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !26
%30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !26
%31 = bitcast double* %25 to <2 x double>, !dbg !26
store <2 x double> %29, <2 x double> %31, align 8, !dbg !26
%32 = bitcast double* %27 to <2 x double>, !dbg !26
store <2 x double> %30, <2 x double> %32, align 8, !dbg !26
%index.next21.1 = add nsw i64 %index13, 8, !dbg !25
%33 = icmp eq i64 %index.next21.1, 1000, !dbg !25
br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, !llvm.loop !27
middle.block11: ; preds = %vector.body10
%34 = load double, double* %a, align 8, !dbg !28
%35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29
%36 = load double, double* %35, align 8, !dbg !29
%37 = fadd double %34, %36, !dbg !30
ret double %37, !dbg !31
}