Hello everybody, I have some questions about the optimization of the following code
struct T {
long a, b;
};
__attribute__((noinline))long test1(const T *t) {
long x = t->a + t->b;
return x;
}
__attribute__((noinline))void func(T &t) {
T t1 = t;
T t2 = t;
long x1 = test1(&t1);
long x2 = test1(&t2);
std::cout << x1 + x2 << "\n";
}
with clang++ -O2, the IR of func
is as follows
define dso_local void @func(T&)(ptr nocapture noundef nonnull readonly align 8 dereferenceable(16) %t) local_unnamed_addr #2 {
entry:
%t1 = alloca %struct.T, align 8, !DIAssignID !894
%t2 = alloca %struct.T, align 8, !DIAssignID !896
call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %t1) #8
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %t1, ptr noundef nonnull align 8 dereferenceable(16) %t, i64 16, i1 false), !tbaa.struct !899, !DIAssignID !901
call void @llvm.lifetime.start.p0(i64 16, ptr nonnull %t2) #8
call void @llvm.memcpy.p0.p0.i64(ptr noundef nonnull align 8 dereferenceable(16) %t2, ptr noundef nonnull align 8 dereferenceable(16) %t, i64 16, i1 false), !tbaa.struct !899, !DIAssignID !904
%call = call noundef i64 @test1(T const*)(ptr noundef nonnull %t1)
%call1 = call noundef i64 @test1(T const*)(ptr noundef nonnull %t2)
%add = add nsw i64 %call1, %call
...
ret void
}
the attributes of test1
is as follows
attributes #0 = { mustprogress nofree noinline norecurse nosync nounwind willreturn memory(argmem: read) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
why not just use %t as the argument to the call test1
in func
, knowing that test1
has no side effects and doesn’t change its arguments
Does anyone know why it’s not optimized like this, thank you very much