The following function compiles with -O3 into the following IR. http://llvm.org/docs/Passes.html#simplifycfg-simplify-the-cfg says

- Eliminates a basic block that only contains an unconditional branch.

but the first and third blocks in the compiled function only contain an unconditional branch; I would have expected them to be eliminated. What am I missing?

double f(double *a) {

for (int i = 0; i < 1000; i++)

a[i] *= 2;

for (int i = 0; i < 1000; i++)

a[i] *= 2;

return a[0] + a[1];

}

; Function Attrs: nounwind uwtable

define double @“\01?f@@YANPEAN@Z”(double* nocapture %a) #1 {

overflow.checked:

br label %vector.body, !dbg !18

vector.body: ; preds = %vector.body, %overflow.checked

%index = phi i64 [ 0, %overflow.checked ], [ %index.next.1, %vector.body ], !dbg !18

%0 = getelementptr inbounds double, double* %a, i64 %index, !dbg !19

%1 = bitcast double* %0 to <2 x double>*, !dbg !20
%wide.load = load <2 x double>, <2 x double>* %1, align 8, !dbg !20

%2 = getelementptr double, double* %0, i64 2, !dbg !20

%3 = bitcast double* %2 to <2 x double>

*, !dbg !20*

%wide.load8 = load <2 x double>, <2 x double>%3, align 8, !dbg !20

%wide.load8 = load <2 x double>, <2 x double>

%4 = fmul <2 x double> %wide.load, <double 2.000000e+00, double 2.000000e+00>, !dbg !20

%5 = fmul <2 x double> %wide.load8, <double 2.000000e+00, double 2.000000e+00>, !dbg !20

%6 = bitcast double* %0 to <2 x double>

*, !dbg !20*

store <2 x double> %4, <2 x double>%6, align 8, !dbg !20

store <2 x double> %4, <2 x double>

%7 = bitcast double* %2 to <2 x double>

*, !dbg !20*

store <2 x double> %5, <2 x double>%7, align 8, !dbg !20

store <2 x double> %5, <2 x double>

%index.next = or i64 %index, 4, !dbg !18

%8 = getelementptr inbounds double, double* %a, i64 %index.next, !dbg !19

%9 = bitcast double* %8 to <2 x double>

*, !dbg !20*

%wide.load.1 = load <2 x double>, <2 x double>%9, align 8, !dbg !20

%wide.load.1 = load <2 x double>, <2 x double>

%10 = getelementptr double, double* %8, i64 2, !dbg !20

%11 = bitcast double* %10 to <2 x double>

*, !dbg !20*

%wide.load8.1 = load <2 x double>, <2 x double>%11, align 8, !dbg !20

%wide.load8.1 = load <2 x double>, <2 x double>

%12 = fmul <2 x double> %wide.load.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !20

%13 = fmul <2 x double> %wide.load8.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !20

%14 = bitcast double* %8 to <2 x double>

*, !dbg !20*

store <2 x double> %12, <2 x double>%14, align 8, !dbg !20

store <2 x double> %12, <2 x double>

%15 = bitcast double* %10 to <2 x double>

*, !dbg !20*

store <2 x double> %13, <2 x double>%15, align 8, !dbg !20

store <2 x double> %13, <2 x double>

%index.next.1 = add nsw i64 %index, 8, !dbg !18

%16 = icmp eq i64 %index.next.1, 1000, !dbg !18

br i1 %16, label %vector.body10.preheader, label %vector.body, !dbg !18, !llvm.loop !21

vector.body10.preheader: ; preds = %vector.body

br label %vector.body10, !dbg !24

vector.body10: ; preds = %vector.body10, %vector.body10.preheader

%index13 = phi i64 [ 0, %vector.body10.preheader ], [ %index.next21.1, %vector.body10 ], !dbg !25

%17 = getelementptr inbounds double, double* %a, i64 %index13, !dbg !24

%18 = bitcast double* %17 to <2 x double>*, !dbg !26
%wide.load26 = load <2 x double>, <2 x double>* %18, align 8, !dbg !26

%19 = getelementptr double, double* %17, i64 2, !dbg !26

%20 = bitcast double* %19 to <2 x double>

*, !dbg !26*

%wide.load27 = load <2 x double>, <2 x double>%20, align 8, !dbg !26

%wide.load27 = load <2 x double>, <2 x double>

%21 = fmul <2 x double> %wide.load26, <double 2.000000e+00, double 2.000000e+00>, !dbg !26

%22 = fmul <2 x double> %wide.load27, <double 2.000000e+00, double 2.000000e+00>, !dbg !26

%23 = bitcast double* %17 to <2 x double>

*, !dbg !26*

store <2 x double> %21, <2 x double>%23, align 8, !dbg !26

store <2 x double> %21, <2 x double>

%24 = bitcast double* %19 to <2 x double>

*, !dbg !26*

store <2 x double> %22, <2 x double>%24, align 8, !dbg !26

store <2 x double> %22, <2 x double>

%index.next21 = or i64 %index13, 4, !dbg !25

%25 = getelementptr inbounds double, double* %a, i64 %index.next21, !dbg !24

%26 = bitcast double* %25 to <2 x double>

*, !dbg !26*

%wide.load26.1 = load <2 x double>, <2 x double>%26, align 8, !dbg !26

%wide.load26.1 = load <2 x double>, <2 x double>

%27 = getelementptr double, double* %25, i64 2, !dbg !26

%28 = bitcast double* %27 to <2 x double>

*, !dbg !26*

%wide.load27.1 = load <2 x double>, <2 x double>%28, align 8, !dbg !26

%wide.load27.1 = load <2 x double>, <2 x double>

%29 = fmul <2 x double> %wide.load26.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !26

%30 = fmul <2 x double> %wide.load27.1, <double 2.000000e+00, double 2.000000e+00>, !dbg !26

%31 = bitcast double* %25 to <2 x double>

*, !dbg !26*

store <2 x double> %29, <2 x double>%31, align 8, !dbg !26

store <2 x double> %29, <2 x double>

%32 = bitcast double* %27 to <2 x double>

*, !dbg !26*

store <2 x double> %30, <2 x double>%32, align 8, !dbg !26

store <2 x double> %30, <2 x double>

%index.next21.1 = add nsw i64 %index13, 8, !dbg !25

%33 = icmp eq i64 %index.next21.1, 1000, !dbg !25

br i1 %33, label %middle.block11, label %vector.body10, !dbg !25, !llvm.loop !27

middle.block11: ; preds = %vector.body10

%34 = load double, double* %a, align 8, !dbg !28

%35 = getelementptr inbounds double, double* %a, i64 1, !dbg !29

%36 = load double, double* %35, align 8, !dbg !29

%37 = fadd double %34, %36, !dbg !30

ret double %37, !dbg !31

}