Hi! I’m using the LLVM C API as a backend for a toy programming language. I’m having trouble where the optimizer changes the result of my code, indicating there is UB in the generated IR, and I’d like to understand where it’s wrong.
Here’s the high-level version (hopefully semantics are clear enough by context):
fn test_padding() -> i64 {
let padded2 : PaddedAf2 = PaddedAf2 {
nested: PaddedAf2Nest {
short: 42 as u8,
long: 43 as u64,
short2: 44 as u8,
},
short: 45 as u8,
};
let ptr : RawPtr<u8> = &padded2 as RawPtr<u8>;
assert_eq(*((ptr + 8) as RawPtr<u64>) as i64, 43);
return 0;
}
This is casting the base address of the struct as a pointer to u8, adding an offset of 8 bytes, and reading that location as a 64-bit integer. This is the unoptimized IR that I generate:
define i64 @test_padding() {
"ir_block_IrBasicBlockId(0)":
%0 = alloca { i8, i64, i8 }, align 8
%1 = getelementptr { i8, i64, i8 }, ptr %0, i32 0, i32 0
store i8 42, ptr %1, align 1
%2 = getelementptr { i8, i64, i8 }, ptr %0, i32 0, i32 1
store i64 43, ptr %2, align 4
%3 = getelementptr { i8, i64, i8 }, ptr %0, i32 0, i32 2
store i8 44, ptr %3, align 1
%4 = alloca { { i8, i64, i8 }, i8 }, align 8
%5 = getelementptr { { i8, i64, i8 }, i8 }, ptr %4, i32 0, i32 0
call void @llvm.memcpy.p0.p0.i32(ptr align 8 %5, ptr align 8 %0, i32 24, i1 false)
%6 = getelementptr { { i8, i64, i8 }, i8 }, ptr %4, i32 0, i32 1
store i8 45, ptr %6, align 1
%7 = alloca ptr, align 8
store ptr %4, ptr %7, align 8
%8 = load ptr, ptr %7, align 8
%9 = getelementptr i8, ptr %8, i64 8
%10 = load i64, ptr %9, align 4
%11 = call i64 @assert_eq(i64 %10, i64 43)
br label %"ir_block_IrBasicBlockId(1)"
"ir_block_IrBasicBlockId(1)": ; preds = %"ir_block_IrBasicBlockId(0)"
ret i64 0
}
And this is the optimized IR
define i64 @test_padding() local_unnamed_addr {
"ir_block_IrBasicBlockId(0)":
%.sroa.23 = alloca { i64, i8 }, align 8
store i64 43, ptr %.sroa.23, align 8
%.sroa.23.8..sroa_idx6 = getelementptr inbounds i8, ptr %.sroa.23, i64 8
store i8 44, ptr %.sroa.23.8..sroa_idx6, align 8
%.sroa.23.4..sroa_idx = getelementptr inbounds i8, ptr %.sroa.23, i64 4
%.sroa.23.4..sroa.23.4..sroa.23.4..sroa.23.8..sroa.1.0.copyload = load i64, ptr %.sroa.23.4..sroa_idx, align 4
%0 = tail call i64 @assert_eq(i64 %.sroa.23.4..sroa.23.4..sroa.23.4..sroa.23.8..sroa.1.0.copyload, i64 43)
ret i64 0
}
I’m having trouble understanding what even went on in the optimized version. It just screams UB to me, and when I run the code the value that ends up being passed to assert_eq
is essentially random.
So, I’m focusing on the unoptimized code. Something that looks fishy in there is this instruction: %10 = load i64, ptr %9, align 4
. I’m not sure if an align of 4 is correct here. The thing is, I’m using LLVMBuildLoad2
, and passing the type of a 64-bit integer to it, I never picked an explicit alignment. So I’d expect the align of the load operation to be set to 8. I’m not sure if this is what’s wrong, however. I just can’t spot anything else that looks odd.
Thanks in advance!