Hi Tim,
I agree this should be “solved” in LLVM
Just as a matter of information, this is the test source code and the generated assembly for my target architecture (more comments below):
struct AA
{
char n;
char m;
char j;
};
struct BB
{
char n;
char m;
char j;
char k;
};
extern void convertA( struct AA *a);
extern void convertB( struct BB *b);
void callConvertA()
{
struct AA a = {3, 4};
convertA( &a );
}
void callConvertB()
{
struct BB b = {3, 4};
convertB( &b );
}
callConvertA: ; @callConvertA
; %bb.0: ; %entry
sub SP, #4, SP
mov &.LcallConvertA.a, r0
ld.sb [r0, #2], r1
st.b r1, [SP, #2]
ld.sb [r0, #0], r1
zext r1, r1
ld.sb [r0, #1], r0
zext r0, r0
bswap r0, r0
or r0, r1, r0
st.w r0, [SP, #0]
mov SP, r0
call &convertA
add SP, #4, SP
ret
.Lfunc_end0:
.size callConvertA, .Lfunc_end0-callConvertA
; – End function
.globl callConvertB ; – Begin function callConvertB
.p2align 1
.type callConvertB,@function
callConvertB: ; @callConvertB
; %bb.0: ; %entry
sub SP, #4, SP
mov #0, r0
st.w r0, [SP, #2]
mov #1027, r0
st.w r0, [SP, #0]
mov SP, r0
call &convertB
add SP, #4, SP
ret
.Lfunc_end1:
.size callConvertB, .Lfunc_end1-callConvertB
; – End function
.type .LcallConvertA.a,@object ; @callConvertA.a
.section .rodata,“a”,@progbits
.LcallConvertA.a:
.byte 3 ; 0x3
.byte 4 ; 0x4
.byte 0 ; 0x0
.size .LcallConvertA.a, 3
Please note that for this architecture the destination operand is on the RIGHT HAND SIDE, this is important to know to read the assembly correctly.
“ld.sb" are signextend 8 bit loads
“st,b” are 8 bit trunc stores
“ld.w” are 16 bit word loads
“st.w” are 16 bit word stores
The generated code is functionally correct, but:
- For callConvertA i8, i8, i8 loads + i8, i16 stores are generated. To get the value of two i8, i8 loads into a i16 store the “zext”+”bswap” (equivalent to shift) + “or” trick is performed
- For callConvertB i16, i16 loads + i16, i16 stores are generated.
The desired behaviour would be to have i16, i8 loads + i16, i8 stores for the callConvertA.
The only difference that I can spot by debugging the LLVM source code is that getMemcpyLoadsAndStores is called with align = 1 for callConvertA, but it is NOT called for callConvertB.
The Clang generated IR is this:
; ModuleID = ‘add.c’
source_filename = “add.c”
target datalayout = “e-m:e-p:16:16-i32:16-i64:16-f32:16-f64:16-a:8-n8:16-S16”
target triple = “cpu74”
%struct.AA = type { i8, i8, i8 }
%struct.BB = type { i8, i8, i8, i8 }
@callConvertA.a = private unnamed_addr constant %struct.AA { i8 3, i8 4, i8 0 }, align 1
; Function Attrs: minsize nounwind optsize
define dso_local void @callConvertA() local_unnamed_addr #0 {
entry:
%a = alloca %struct.AA, align 1
%0 = getelementptr inbounds %struct.AA, %struct.AA* %a, i16 0, i32 0
call void @llvm.lifetime.start.p0i8(i64 3, i8* nonnull %0) #3
call void @llvm.memcpy.p0i8.p0i8.i16(i8* nonnull align 1 %0, i8* align 1 getelementptr inbounds (%struct.AA, %struct.AA* @callConvertA.a, i16 0, i32 0), i16 3, i1 false)
call void @convertA(%struct.AA* nonnull %a) #4
call void @llvm.lifetime.end.p0i8(i64 3, i8* nonnull %0) #3
ret void
}
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64, i8* nocapture) #1
; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i16(i8* nocapture writeonly, i8* nocapture readonly, i16, i1) #1
; Function Attrs: minsize optsize
declare dso_local void @convertA(%struct.AA*) local_unnamed_addr #2
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64, i8* nocapture) #1
; Function Attrs: minsize nounwind optsize
define dso_local void @callConvertB() local_unnamed_addr #0 {
entry:
%b = alloca i32, align 2
%tmpcast = bitcast i32* %b to %struct.BB*
%0 = bitcast i32* %b to i8*
call void @llvm.lifetime.start.p0i8(i64 4, i8* nonnull %0) #3
store i32 1027, i32* %b, align 2
call void @convertB(%struct.BB* nonnull %tmpcast) #4
call void @llvm.lifetime.end.p0i8(i64 4, i8* nonnull %0) #3
ret void
}
; Function Attrs: minsize optsize
declare dso_local void @convertB(%struct.BB*) local_unnamed_addr #2
attributes #0 = { minsize nounwind optsize “correctly-rounded-divide-sqrt-fp-math”=“false” “disable-tail-calls”=“false” “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“true” “no-frame-pointer-elim-non-leaf” “no-infs-fp-math”=“false” “no-jump-tables”=“false” “no-nans-fp-math”=“false” “no-signed-zeros-fp-math”=“false” “no-trapping-math”=“false” “stack-protector-buffer-size”=“8” “unsafe-fp-math”=“false” “use-soft-float”=“false” }
attributes #1 = { argmemonly nounwind }
attributes #2 = { minsize optsize “correctly-rounded-divide-sqrt-fp-math”=“false” “disable-tail-calls”=“false” “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“true” “no-frame-pointer-elim-non-leaf” “no-infs-fp-math”=“false” “no-nans-fp-math”=“false” “no-signed-zeros-fp-math”=“false” “no-trapping-math”=“false” “stack-protector-buffer-size”=“8” “unsafe-fp-math”=“false” “use-soft-float”=“false” }
attributes #3 = { nounwind }
attributes #4 = { minsize nounwind optsize }
!llvm.module.flags = !{!0}
!llvm.ident = !{!1}
!0 = !{i32 1, !“wchar_size”, i32 2}
!1 = !{!“clang version 7.0.1 (tags/RELEASE_701/final)”}
John
Tel: 620 28 45 13