Assertion failed: (!CheckingMode && "CallGraphSCCPass did not update the CallGraph correctly!")

Hi,

I am trying to merge OpenMP for loops. e.g.,

#pragma omp parallel

{

#pragma omp for

for (int i=0; i < 100; i++)

printf(“Before–> %d\n”, g(i));

#pragma omp for

for (int j=0; j < 10; j++)

printf(“After–>%d\n”, f(j));

#pragma omp for

for (int i=0; i < 10; i++)

printf(“Middle–>%d\n”, g(i));

#pragma omp for

for (int i=0; i < 10; i++)

printf(“Last–>%d\n”, g(i));

}

The last three OpenMP for loops can be merged by wrapping them with a single pair of “call void @__kmpc_for_static_init_4” and “call void @__kmpc_for_static_fini” calls. This will also include some additional cleaning for redundant store instructions for the registers which become redundant by merging the loops.

The IR condition before merging

; Function Attrs: norecurse nounwind ssp uwtable

define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid.) #1 {

entry:

%.omp.lb = alloca i32, align 4

%.omp.ub = alloca i32, align 4

%.omp.stride = alloca i32, align 4

%.omp.is_last = alloca i32, align 4

%.omp.lb6 = alloca i32, align 4

%.omp.ub7 = alloca i32, align 4

%.omp.stride8 = alloca i32, align 4

%.omp.is_last9 = alloca i32, align 4

%.omp.lb30 = alloca i32, align 4

%.omp.ub31 = alloca i32, align 4

%.omp.stride32 = alloca i32, align 4

%.omp.is_last33 = alloca i32, align 4

%.omp.lb55 = alloca i32, align 4

%.omp.ub56 = alloca i32, align 4

%.omp.stride57 = alloca i32, align 4

%.omp.is_last58 = alloca i32, align 4

%0 = bitcast i32* %.omp.lb to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #5

store i32 0, i32* %.omp.lb, align 4, !tbaa !4

%1 = bitcast i32* %.omp.ub to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %1) #5

store i32 99, i32* %.omp.ub, align 4, !tbaa !4

%2 = bitcast i32* %.omp.stride to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %2) #5

store i32 1, i32* %.omp.stride, align 4, !tbaa !4

%3 = bitcast i32* %.omp.is_last to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %3) #5

store i32 0, i32* %.omp.is_last, align 4, !tbaa !4

%4 = load i32, i32* %.global_tid., align 4, !tbaa !4

call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %4, i32 34, i32* nonnull %.omp.is_last, i32* nonnull %.omp.lb, i32* nonnull %.omp.ub, i32* nonnull %.omp.stride, i32 1, i32 1) #5

%5 = load i32, i32* %.omp.ub, align 4, !tbaa !4

%6 = icmp slt i32 %5, 99

%cond = select i1 %6, i32 %5, i32 99

store i32 %cond, i32* %.omp.ub, align 4, !tbaa !4

%7 = load i32, i32* %.omp.lb, align 4, !tbaa !4

br label %omp.inner.for.cond

omp.inner.for.cond: ; preds = %omp.inner.for.body, %entry

%.omp.iv.0 = phi i32 [ %7, %entry ], [ %add3, %omp.inner.for.body ]

%8 = load i32, i32* %.omp.ub, align 4, !tbaa !4

%cmp1 = icmp sgt i32 %.omp.iv.0, %8

br i1 %cmp1, label %omp.loop.exit, label %omp.inner.for.body

omp.inner.for.body: ; preds = %omp.inner.for.cond

%call2 = call i32 (i8*, …) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([14 x i8], [14 x i8]* @.str.1, i64 0, i64 0), i32 %.omp.iv.0)

%add3 = add nsw i32 %.omp.iv.0, 1

br label %omp.inner.for.cond

omp.loop.exit: ; preds = %omp.inner.for.cond

call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %4) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %3) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %2) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %1) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #5

call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 %4) #5

%9 = bitcast i32* %.omp.lb6 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %9) #5

store i32 0, i32* %.omp.lb6, align 4, !tbaa !4

%10 = bitcast i32* %.omp.ub7 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %10) #5

store i32 9, i32* %.omp.ub7, align 4, !tbaa !4

%11 = bitcast i32* %.omp.stride8 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %11) #5

store i32 1, i32* %.omp.stride8, align 4, !tbaa !4

%12 = bitcast i32* %.omp.is_last9 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %12) #5

store i32 0, i32* %.omp.is_last9, align 4, !tbaa !4

call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %4, i32 34, i32* nonnull %.omp.is_last9, i32* nonnull %.omp.lb6, i32* nonnull %.omp.ub7, i32* nonnull %.omp.stride8, i32 1, i32 1) #5

%13 = load i32, i32* %.omp.ub7, align 4, !tbaa !4

%14 = icmp slt i32 %13, 9

%cond14 = select i1 %14, i32 %13, i32 9

store i32 %cond14, i32* %.omp.ub7, align 4, !tbaa !4

%15 = load i32, i32* %.omp.lb6, align 4, !tbaa !4

br label %omp.inner.for.cond15

omp.inner.for.cond15: ; preds = %omp.inner.for.body18, %omp.loop.exit

%.omp.iv4.0 = phi i32 [ %15, %omp.loop.exit ], [ %add25, %omp.inner.for.body18 ]

%16 = load i32, i32* %.omp.ub7, align 4, !tbaa !4

%cmp16 = icmp sgt i32 %.omp.iv4.0, %16

br i1 %cmp16, label %omp.loop.exit27, label %omp.inner.for.body18

omp.inner.for.body18: ; preds = %omp.inner.for.cond15

%call22 = call i32 (i8*, …) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([12 x i8], [12 x i8]* @.str.2, i64 0, i64 0), i32 %.omp.iv4.0)

%add25 = add nsw i32 %.omp.iv4.0, 1

br label %omp.inner.for.cond15

omp.loop.exit27: ; preds = %omp.inner.for.cond15

call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %4) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %12) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %11) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %10) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %9) #5

call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 %4) #5

%17 = bitcast i32* %.omp.lb30 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %17) #5

store i32 0, i32* %.omp.lb30, align 4, !tbaa !4

%18 = bitcast i32* %.omp.ub31 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %18) #5

store i32 9, i32* %.omp.ub31, align 4, !tbaa !4

%19 = bitcast i32* %.omp.stride32 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %19) #5

store i32 1, i32* %.omp.stride32, align 4, !tbaa !4

%20 = bitcast i32* %.omp.is_last33 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %20) #5

store i32 0, i32* %.omp.is_last33, align 4, !tbaa !4

call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %4, i32 34, i32* nonnull %.omp.is_last33, i32* nonnull %.omp.lb30, i32* nonnull %.omp.ub31, i32* nonnull %.omp.stride32, i32 1, i32 1) #5

%21 = load i32, i32* %.omp.ub31, align 4, !tbaa !4

%22 = icmp slt i32 %21, 9

%cond39 = select i1 %22, i32 %21, i32 9

store i32 %cond39, i32* %.omp.ub31, align 4, !tbaa !4

%23 = load i32, i32* %.omp.lb30, align 4, !tbaa !4

br label %omp.inner.for.cond40

omp.inner.for.cond40: ; preds = %omp.inner.for.body43, %omp.loop.exit27

%.omp.iv28.0 = phi i32 [ %23, %omp.loop.exit27 ], [ %add50, %omp.inner.for.body43 ]

%24 = load i32, i32* %.omp.ub31, align 4, !tbaa !4

%cmp41 = icmp sgt i32 %.omp.iv28.0, %24

br i1 %cmp41, label %omp.loop.exit52, label %omp.inner.for.body43

omp.inner.for.body43: ; preds = %omp.inner.for.cond40

%call47 = call i32 (i8*, …) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([13 x i8], [13 x i8]* @.str.3, i64 0, i64 0), i32 %.omp.iv28.0)

%add50 = add nsw i32 %.omp.iv28.0, 1

br label %omp.inner.for.cond40

omp.loop.exit52: ; preds = %omp.inner.for.cond40

call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %4) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %20) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %19) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %18) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %17) #5

call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 %4) #5

%25 = bitcast i32* %.omp.lb55 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %25) #5

store i32 0, i32* %.omp.lb55, align 4, !tbaa !4

%26 = bitcast i32* %.omp.ub56 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %26) #5

store i32 9, i32* %.omp.ub56, align 4, !tbaa !4

%27 = bitcast i32* %.omp.stride57 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %27) #5

store i32 1, i32* %.omp.stride57, align 4, !tbaa !4

%28 = bitcast i32* %.omp.is_last58 to i8*

call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %28) #5

store i32 0, i32* %.omp.is_last58, align 4, !tbaa !4

call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull @0, i32 %4, i32 34, i32* nonnull %.omp.is_last58, i32* nonnull %.omp.lb55, i32* nonnull %.omp.ub56, i32* nonnull %.omp.stride57, i32 1, i32 1) #5

%29 = load i32, i32* %.omp.ub56, align 4, !tbaa !4

%30 = icmp slt i32 %29, 9

%cond64 = select i1 %30, i32 %29, i32 9

store i32 %cond64, i32* %.omp.ub56, align 4, !tbaa !4

%31 = load i32, i32* %.omp.lb55, align 4, !tbaa !4

br label %omp.inner.for.cond65

omp.inner.for.cond65: ; preds = %omp.inner.for.body68, %omp.loop.exit52

%.omp.iv53.0 = phi i32 [ %31, %omp.loop.exit52 ], [ %add75, %omp.inner.for.body68 ]

%32 = load i32, i32* %.omp.ub56, align 4, !tbaa !4

%cmp66 = icmp sgt i32 %.omp.iv53.0, %32

br i1 %cmp66, label %omp.loop.exit77, label %omp.inner.for.body68

omp.inner.for.body68: ; preds = %omp.inner.for.cond65

%call72 = call i32 (i8*, …) @printf(i8* nonnull dereferenceable(1) getelementptr inbounds ([11 x i8], [11 x i8]* @.str.4, i64 0, i64 0), i32 %.omp.iv53.0)

%add75 = add nsw i32 %.omp.iv53.0, 1

br label %omp.inner.for.cond65

omp.loop.exit77: ; preds = %omp.inner.for.cond65

call void @__kmpc_for_static_fini(%struct.ident_t* nonnull @0, i32 %4) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %28) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %27) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %26) #5

call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %25) #5

call void @__kmpc_barrier(%struct.ident_t* nonnull @1, i32 %4) #5

ret void

}

=================>

After merging, I am getting