I'm trying to get this simple function to compile to decent code. Lack of it is causing -O0 codegen to be monsterous:
struct DeclGroup {
unsigned NumDecls;
};
int foo(DeclGroup D);
void bar(DeclGroup *D) {
foo(*D);
}
The current issue that I'm fighting is that we get an extra pointless temporary and memcpy, which llvm-gcc isn't generating:
$ clang t.cc -S -o - -emit-llvm
...
define void @_Z3barP9DeclGroup(%struct.DeclGroup* %D) ssp {
entry:
%D.addr = alloca %struct.DeclGroup*, align 8 ; <%struct.DeclGroup**> [#uses=2]
%agg.tmp = alloca %struct.DeclGroup, align 4 ; <%struct.DeclGroup*> [#uses=2]
store %struct.DeclGroup* %D, %struct.DeclGroup** %D.addr
%tmp = load %struct.DeclGroup** %D.addr ; <%struct.DeclGroup*> [#uses=1]
%tmp1 = bitcast %struct.DeclGroup* %agg.tmp to i8* ; <i8*> [#uses=1]
%tmp2 = bitcast %struct.DeclGroup* %tmp to i8* ; <i8*> [#uses=1]
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %tmp1, i8* %tmp2, i64 4, i32 4, i1 false)
%coerce.dive = getelementptr %struct.DeclGroup* %agg.tmp, i32 0, i32 0 ; <i32*> [#uses=1]
%0 = load i32* %coerce.dive ; <i32> [#uses=1]
%coerce.val.ii = zext i32 %0 to i64 ; <i64> [#uses=1]
%call = call i32 @_Z3foo9DeclGroup(i64 %coerce.val.ii) ; <i32> [#uses=0]
ret void
}
After diving into it, the memcpy is being made by CodeGenFunction::EmitCXXConstructorCall, so I came up with this patch:
cxxctorcall.patch (746 Bytes)