Hi all, I’m trying to unroll a following loop:
#define M 327
int foo(int * restrict a, int * restrict b , int * restrict c, int len) {
for (int i = 0; i < M; i++) {
a+=len; b+=len; c+=len;
for (int j = 0; j < len; j++) {
c[j] = a[j] + b[j];
}
}
return 0;
}
but fails because of there are two AftBlocks, for.cond.cleanup5.loopexit and for.cond.cleanup5:
*** IR Dump After Loop-Closed SSA Form Pass (lcssa) ***
; Function Attrs: nofree nosync nounwind
define dso_local i32 @foo(ptr noalias nocapture noundef readonly %a, ptr noalias nocapture noundef readonly %b, ptr noalias nocapture noundef writeonly %c, i32 noundef %len) local_unnamed_addr #0 {
entry:
%0 = tail call ptr @llvm.noalias.decl.p0.p0.i64(ptr null, i64 0, metadata !2)
%1 = tail call ptr @llvm.noalias.decl.p0.p0.i64(ptr null, i64 0, metadata !5)
%2 = tail call ptr @llvm.noalias.decl.p0.p0.i64(ptr null, i64 0, metadata !7)
%cmp423 = icmp sgt i32 %len, 0
br label %for.body
for.cond.cleanup: ; preds = %for.cond.cleanup5
ret i32 0
for.body: ; preds = %for.cond.cleanup5, %entry
%i.031 = phi i32 [ 0, %entry ], [ %inc10, %for.cond.cleanup5 ]
%a.addr.030 = phi ptr [ %a, %entry ], [ %add.ptr, %for.cond.cleanup5 ]
%prov.a.addr.029 = phi ptr [ %a, %entry ], [ %3, %for.cond.cleanup5 ]
%c.addr.028 = phi ptr [ %c, %entry ], [ %add.ptr2, %for.cond.cleanup5 ]
%prov.c.addr.027 = phi ptr [ %c, %entry ], [ %5, %for.cond.cleanup5 ]
%b.addr.026 = phi ptr [ %b, %entry ], [ %add.ptr1, %for.cond.cleanup5 ]
%prov.b.addr.025 = phi ptr [ %b, %entry ], [ %4, %for.cond.cleanup5 ]
%3 = tail call ptr @llvm.provenance.noalias.p0.p0.p0.p0.i64(ptr %prov.a.addr.029, ptr %0, ptr null, ptr undef, i64 0, metadata !2), !tbaa !9, !noalias !13
%add.ptr = getelementptr inbounds i32, ptr %a.addr.030, i32 %len
%4 = tail call ptr @llvm.provenance.noalias.p0.p0.p0.p0.i64(ptr %prov.b.addr.025, ptr %1, ptr null, ptr undef, i64 0, metadata !5), !tbaa !9, !noalias !13
%add.ptr1 = getelementptr inbounds i32, ptr %b.addr.026, i32 %len
%5 = tail call ptr @llvm.provenance.noalias.p0.p0.p0.p0.i64(ptr %prov.c.addr.027, ptr %2, ptr null, ptr undef, i64 0, metadata !7), !tbaa !9, !noalias !13
%add.ptr2 = getelementptr inbounds i32, ptr %c.addr.028, i32 %len
br i1 %cmp423, label %for.body6.preheader, label %for.cond.cleanup5
for.body6.preheader: ; preds = %for.body
br label %for.body6
for.cond.cleanup5.loopexit: ; preds = %for.body6
br label %for.cond.cleanup5
for.cond.cleanup5: ; preds = %for.cond.cleanup5.loopexit, %for.body
%inc10 = add nuw nsw i32 %i.031, 1
%exitcond32.not = icmp eq i32 %inc10, 327
br i1 %exitcond32.not, label %for.cond.cleanup, label %for.body, !llvm.loop !14, !nonzero_loop !18
for.body6: ; preds = %for.body6.preheader, %for.body6
%j.024 = phi i32 [ %inc, %for.body6 ], [ 0, %for.body6.preheader ]
%arrayidx = getelementptr inbounds i32, ptr %add.ptr, i32 %j.024
%6 = load i32, ptr %arrayidx, ptr_provenance ptr %3, align 4, !tbaa !19, !noalias !13
%arrayidx7 = getelementptr inbounds i32, ptr %add.ptr1, i32 %j.024
%7 = load i32, ptr %arrayidx7, ptr_provenance ptr %4, align 4, !tbaa !19, !noalias !13
%add = add nsw i32 %7, %6
%arrayidx8 = getelementptr inbounds i32, ptr %add.ptr2, i32 %j.024
store i32 %add, ptr %arrayidx8, ptr_provenance ptr %5, align 4, !tbaa !19, !noalias !13
%inc = add nuw nsw i32 %j.024, 1
%exitcond.not = icmp eq i32 %inc, %len
br i1 %exitcond.not, label %for.cond.cleanup5.loopexit, label %for.body6, !llvm.loop !21, !has_loop_guard !23
}