load with alignment of 1 crashes from being unaligned

Below is attached a full IR module that can reproduce this issue, but the part to notice is this:

%Foo96Bits = type <{ i24, i24, i24, i24 }>

define internal fastcc i16 @main.0.1() unnamed_addr #2 !dbg !113 {

Entry:
%value = alloca %Foo96Bits, align 1
%b = alloca i24, align 4
%0 = bitcast %Foo96Bits* %value to i8*, !dbg !129
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%Foo96Bits* @4 to i8*), i64 16, i32 1, i1 false), !dbg !129
call void @llvm.dbg.declare(metadata %Foo96Bits* %value, metadata !118, metadata !75), !dbg !129
%1 = getelementptr inbounds %Foo96Bits, %Foo96Bits* %value, i32 0, i32 0, !dbg !130
%2 = load i24, i24* %1, align 1, !dbg !130
store i24 %2, i24* %b, align 4, !dbg !131
call void @llvm.dbg.declare(metadata i24* %b, metadata !127, metadata !75), !dbg !131
ret i16 0, !dbg !132

}

Notice that the Foo96Bits struct is allocated with alignment value of 1. Later on there is a load which also has this alignment value of 1.

LLVM docs for alignment say: “An alignment of 1 is always safe.” (http://llvm.org/docs/LangRef.html#id189)

This program when run crashes with a segfault.

However if I change the

%value = alloca %Foo96Bits, align 1

to

%value = alloca %Foo96Bits, align 32

and the corresponding load

%2 = load i24, i24* %1, align 32

Then the program runs fine.

I was unable to reproduce the issue with this IR code and clang, because clang doesn’t support cross compiling as seamlessly as my frontend:

$ clang -o test.exe test.ll -target x86_64-unknown-windows-msvc -fuse-ld=lld -Wl,-entry:_start

clang-5.0: warning: unable to find a Visual Studio installation; try running Clang from a developer command prompt [-Wmsvc-not-found]

/nix/store/jxm0qg5cazlv8h7f7kp6cgk7ka7ag5kr-lld-5.0.0rc3/bin/lld-link: error: could not open libcmt.lib: No such file or directory
clang-5.0: error: linker command failed with exit code 1 (use -v to see invocation)

But if you manage to get it linked I am sure this IR would trigger the issue. It even causes the segfault in wine64 in addition to native windows.

Can someone confirm this is a bug in llvm? Or is the documentation incorrect?

test.ll:

; ModuleID = ‘test’
source_filename = “test”
target datalayout = “e-m:w-i64:64-f80:128-n8:16:32:64-S128”
target triple = “x86_64-unknown-windows-msvc”

%"[]u8" = type { i8*, i64 }
%Foo96Bits = type <{ i24, i24, i24, i24 }>

@link_libc = internal unnamed_addr constant i1 false, align 1
@want_start_symbol = internal unnamed_addr constant i1 true, align 1
@os = internal unnamed_addr constant i6 15, align 1
@is_windows = internal unnamed_addr constant i1 true, align 1
@user_main_fn = internal unnamed_addr global i16 ()* null, align 8
@0 = internal unnamed_addr constant i8* getelementptr inbounds ([24 x i8], [24 x i8]* @1, i64 0, i64 0), align 8
@1 = internal unnamed_addr constant [24 x i8] c"reached unreachable code", align 1
@2 = internal unnamed_addr constant %"[]u8" { i8* getelementptr inbounds ([24 x i8], [24 x i8]* @1, i64 0, i64 0), i64 24 }, align 8
@3 = internal unnamed_addr constant i16 ()** @user_main_fn, align 8
@4 = internal unnamed_addr constant %Foo96Bits zeroinitializer, align 1

; Function Attrs: nobuiltin noreturn nounwind
define internal fastcc void @panic(%"[]u8"* byval nonnull readonly) unnamed_addr #0 !dbg !60 {
Entry:
call void @llvm.dbg.declare(metadata %"[]u8"* %0, metadata !74, metadata !75), !dbg !76
br label %WhileCond, !dbg !77

WhileCond: ; preds = %WhileCond, %Entry
br label %WhileCond, !dbg !77
}

; Function Attrs: naked nobuiltin noreturn nounwind
define void @_start() #1 !dbg !80 {
Entry:
call fastcc void @windowsCallMainAndExit(), !dbg !84
unreachable, !dbg !84
}

; Function Attrs: nobuiltin nounwind
define internal i32 @main.0(i32, i8** nonnull, i8** nonnull) unnamed_addr #2 !dbg !87 {
Entry:
%c_argc = alloca i32, align 4
%c_argv = alloca i8**, align 8
%c_envp = alloca i8**, align 8
store i32 %0, i32* %c_argc, align 4
call void @llvm.dbg.declare(metadata i32* %c_argc, metadata !94, metadata !75), !dbg !99
store i8** %1, i8*** %c_argv, align 8
call void @llvm.dbg.declare(metadata i8*** %c_argv, metadata !95, metadata !75), !dbg !100
store i8** %2, i8*** %c_envp, align 8
call void @llvm.dbg.declare(metadata i8*** %c_envp, metadata !97, metadata !75), !dbg !101
call fastcc void @panic(%"[]u8"* @2), !dbg !102
unreachable, !dbg !102
}

; Function Attrs: nobuiltin noreturn nounwind
define internal fastcc void @windowsCallMainAndExit() unnamed_addr #0 !dbg !106 {
Entry:
%0 = alloca i16, align 2
store i16 ()* @main.0.1, i16 ()** @user_main_fn, align 8, !dbg !107
%1 = call fastcc i16 @main.0.1(), !dbg !109
store i16 %1, i16* %0, align 2, !dbg !109
%2 = load i16, i16* %0, align 2, !dbg !110
%3 = icmp ne i16 %2, 0, !dbg !110
br i1 %3, label %UnwrapErrError, label %UnwrapErrOk, !dbg !110

UnwrapErrError: ; preds = %Entry
call void @ExitProcess(i32 1), !dbg !111
unreachable, !dbg !111

UnwrapErrOk: ; preds = %Entry
%4 = load i16, i16* %0, align 2, !dbg !110
br label %UnwrapErrEnd, !dbg !110

UnwrapErrEnd: ; preds = %UnwrapErrOk
call void @ExitProcess(i32 0), !dbg !112
unreachable, !dbg !112
}

; Function Attrs: nobuiltin nounwind
define internal fastcc i16 @main.0.1() unnamed_addr #2 !dbg !113 {
Entry:
%value = alloca %Foo96Bits, align 1
%b = alloca i24, align 4
%0 = bitcast %Foo96Bits* %value to i8*, !dbg !129
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %0, i8* bitcast (%Foo96Bits* @4 to i8*), i64 16, i32 1, i1 false), !dbg !129
call void @llvm.dbg.declare(metadata %Foo96Bits* %value, metadata !118, metadata !75), !dbg !129
%1 = getelementptr inbounds %Foo96Bits, %Foo96Bits* %value, i32 0, i32 0, !dbg !130
%2 = load i24, i24* %1, align 1, !dbg !130
store i24 %2, i24* %b, align 4, !dbg !131
call void @llvm.dbg.declare(metadata i24* %b, metadata !127, metadata !75), !dbg !131
ret i16 0, !dbg !132
}

; Function Attrs: nobuiltin noreturn nounwind
declare void @ExitProcess(i32) #0

; Function Attrs: nounwind readnone speculatable
declare void @llvm.dbg.declare(metadata, metadata, metadata) #3

; Function Attrs: argmemonly nounwind
declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture writeonly, i8* nocapture readonly, i64, i32, i1) #4

attributes #0 = { nobuiltin noreturn nounwind “no-frame-pointer-elim”=“true” “no-frame-pointer-elim-non-leaf” }
attributes #1 = { naked nobuiltin noreturn nounwind “no-frame-pointer-elim”=“true” “no-frame-pointer-elim-non-leaf” }
attributes #2 = { nobuiltin nounwind “no-frame-pointer-elim”=“true” “no-frame-pointer-elim-non-leaf” }
attributes #3 = { nounwind readnone speculatable }
attributes #4 = { argmemonly nounwind }

!llvm.module.flags = !{!0}
llvm.dbg.cu = !{!1}

!0 = !{i32 2, !“Debug Info Version”, i32 3}
!1 = distinct !DICompileUnit(language: DW_LANG_C99, file: !2, producer: “zig 0.0.0”, isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !3, globals: !47)
!2 = !DIFile(filename: “test”, directory: “.”)
!3 = !{!4, !40}
!4 = !DICompositeType(tag: DW_TAG_enumeration_type, name: “Os”, scope: !5, file: !5, line: 1, baseType: !6, size: 8, align: 8, elements: !7)
!5 = !DIFile(filename: “builtin.zig”, directory: “/home/andy/dev/zig/build/zig-cache”)
!6 = !DIBasicType(name: “u6”, size: 8, encoding: DW_ATE_unsigned)
!7 = !{!8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39}
!8 = !DIEnumerator(name: “freestanding”, value: 0)
!9 = !DIEnumerator(name: “ananas”, value: 1)
!10 = !DIEnumerator(name: “cloudabi”, value: 2)
!11 = !DIEnumerator(name: “darwin”, value: 3)
!12 = !DIEnumerator(name: “dragonfly”, value: 4)
!13 = !DIEnumerator(name: “freebsd”, value: 5)
!14 = !DIEnumerator(name: “fuchsia”, value: 6)
!15 = !DIEnumerator(name: “ios”, value: 7)
!16 = !DIEnumerator(name: “kfreebsd”, value: 8)
!17 = !DIEnumerator(name: “linux”, value: 9)
!18 = !DIEnumerator(name: “lv2”, value: 10)
!19 = !DIEnumerator(name: “macosx”, value: 11)
!20 = !DIEnumerator(name: “netbsd”, value: 12)
!21 = !DIEnumerator(name: “openbsd”, value: 13)
!22 = !DIEnumerator(name: “solaris”, value: 14)
!23 = !DIEnumerator(name: “windows”, value: 15)
!24 = !DIEnumerator(name: “haiku”, value: 16)
!25 = !DIEnumerator(name: “minix”, value: 17)
!26 = !DIEnumerator(name: “rtems”, value: 18)
!27 = !DIEnumerator(name: “nacl”, value: 19)
!28 = !DIEnumerator(name: “cnk”, value: 20)
!29 = !DIEnumerator(name: “bitrig”, value: 21)
!30 = !DIEnumerator(name: “aix”, value: 22)
!31 = !DIEnumerator(name: “cuda”, value: 23)
!32 = !DIEnumerator(name: “nvcl”, value: 24)
!33 = !DIEnumerator(name: “amdhsa”, value: 25)
!34 = !DIEnumerator(name: “ps4”, value: 26)
!35 = !DIEnumerator(name: “elfiamcu”, value: 27)
!36 = !DIEnumerator(name: “tvos”, value: 28)
!37 = !DIEnumerator(name: “watchos”, value: 29)
!38 = !DIEnumerator(name: “mesa3d”, value: 30)
!39 = !DIEnumerator(name: “contiki”, value: 31)
!40 = !DICompositeType(tag: DW_TAG_enumeration_type, name: “GlobalLinkage”, scope: !5, file: !5, line: 138, baseType: !41, size: 8, align: 8, elements: !42)
!41 = !DIBasicType(name: “u3”, size: 8, encoding: DW_ATE_unsigned)
!42 = !{!43, !44, !45, !46}
!43 = !DIEnumerator(name: “Internal”, value: 0)
!44 = !DIEnumerator(name: “Strong”, value: 1)
!45 = !DIEnumerator(name: “Weak”, value: 2)
!46 = !DIEnumerator(name: “LinkOnce”, value: 3)
!47 = !{!48, !51, !54, !56, !58}
!48 = !DIGlobalVariableExpression(var: !49)
!49 = distinct !DIGlobalVariable(name: “link_libc”, linkageName: “link_libc”, scope: !5, file: !5, line: 200, type: !50, isLocal: true, isDefinition: true)
!50 = !DIBasicType(name: “bool”, size: 8, encoding: DW_ATE_boolean)
!51 = !DIGlobalVariableExpression(var: !52)
!52 = distinct !DIGlobalVariable(name: “want_main_symbol”, linkageName: “want_main_symbol”, scope: !53, file: !53, line: 8, type: !50, isLocal: true, isDefinition: true)
!53 = !DIFile(filename: “bootstrap.zig”, directory: “/home/andy/dev/zig/build/lib/zig/std/special”)
!54 = !DIGlobalVariableExpression(var: !55)
!55 = distinct !DIGlobalVariable(name: “want_start_symbol”, linkageName: “want_start_symbol”, scope: !53, file: !53, line: 9, type: !50, isLocal: true, isDefinition: true)
!56 = !DIGlobalVariableExpression(var: !57)
!57 = distinct !DIGlobalVariable(name: “os”, linkageName: “os”, scope: !5, file: !5, line: 195, type: !4, isLocal: true, isDefinition: true)
!58 = !DIGlobalVariableExpression(var: !59)
!59 = distinct !DIGlobalVariable(name: “is_windows”, linkageName: “is_windows”, scope: !53, file: !53, line: 13, type: !50, isLocal: true, isDefinition: true)
!60 = distinct !DISubprogram(name: “panic”, scope: !61, file: !61, line: 47, type: !62, isLocal: true, isDefinition: true, scopeLine: 47, isOptimized: false, unit: !1, variables: !73)
!61 = !DIFile(filename: “test.zig”, directory: “/home/andy/dev/zig/build”)
!62 = !DISubroutineType(types: !63)
!63 = !{!64, !65}
!64 = !DIBasicType(name: “void”, encoding: DW_ATE_unsigned)
!65 = !DIDerivedType(tag: DW_TAG_pointer_type, name: “&[]const u8”, baseType: !66, size: 64, align: 64)
!66 = !DICompositeType(tag: DW_TAG_structure_type, name: “[]u8”, size: 128, align: 64, elements: !67)
!67 = !{!68, !71}
!68 = !DIDerivedType(tag: DW_TAG_member, name: “ptr”, scope: !66, baseType: !69, size: 64, align: 64)
!69 = !DIDerivedType(tag: DW_TAG_pointer_type, name: “&u8”, baseType: !70, size: 64, align: 64)
!70 = !DIBasicType(name: “u8”, size: 8, encoding: DW_ATE_unsigned_char)
!71 = !DIDerivedType(tag: DW_TAG_member, name: “len”, scope: !66, baseType: !72, size: 64, align: 64, offset: 64)
!72 = !DIBasicType(name: “usize”, size: 64, encoding: DW_ATE_unsigned)
!73 = !{!74}
!74 = !DILocalVariable(name: “msg”, arg: 1, scope: !60, file: !61, line: 47, type: !66)
!75 = !DIExpression()
!76 = !DILocation(line: 47, column: 14, scope: !60)
!77 = !DILocation(line: 47, column: 45, scope: !78)
!78 = distinct !DILexicalBlock(scope: !79, file: !61, line: 47, column: 43)
!79 = distinct !DILexicalBlock(scope: !60, file: !61, line: 47, column: 14)
!80 = distinct !DISubprogram(name: “_start”, scope: !53, file: !53, line: 15, type: !81, isLocal: false, isDefinition: true, scopeLine: 15, isOptimized: false, unit: !1, variables: !83)
!81 = !DISubroutineType(types: !82)
!82 = !{!64}
!83 = !{}
!84 = !DILocation(line: 22, column: 31, scope: !85)
!85 = distinct !DILexicalBlock(scope: !86, file: !53, line: 21, column: 21)
!86 = distinct !DILexicalBlock(scope: !80, file: !53, line: 15, column: 40)
!87 = distinct !DISubprogram(name: “main”, scope: !53, file: !53, line: 63, type: !88, isLocal: true, isDefinition: true, scopeLine: 63, isOptimized: false, unit: !1, variables: !93)
!88 = !DISubroutineType(types: !89)
!89 = !{!90, !90, !91, !92}
!90 = !DIBasicType(name: “i32”, size: 32, encoding: DW_ATE_signed)
!91 = !DIDerivedType(tag: DW_TAG_pointer_type, name: “&&u8”, baseType: !69, size: 64, align: 64)
!92 = !DIDerivedType(tag: DW_TAG_pointer_type, name: “&?&u8”, baseType: !69, size: 64, align: 64)
!93 = !{!94, !95, !97}
!94 = !DILocalVariable(name: “c_argc”, arg: 1, scope: !87, file: !53, line: 63, type: !90)
!95 = !DILocalVariable(name: “c_argv”, arg: 2, scope: !96, file: !53, line: 63, type: !91)
!96 = distinct !DILexicalBlock(scope: !87, file: !53, line: 63, column: 16)
!97 = !DILocalVariable(name: “c_envp”, arg: 3, scope: !98, file: !53, line: 63, type: !92)
!98 = distinct !DILexicalBlock(scope: !96, file: !53, line: 63, column: 29)
!99 = !DILocation(line: 63, column: 16, scope: !87)
!100 = !DILocation(line: 63, column: 29, scope: !96)
!101 = !DILocation(line: 63, column: 43, scope: !98)
!102 = !DILocation(line: 66, column: 9, scope: !103)
!103 = distinct !DILexicalBlock(scope: !104, file: !53, line: 64, column: 28)
!104 = distinct !DILexicalBlock(scope: !105, file: !53, line: 63, column: 65)
!105 = distinct !DILexicalBlock(scope: !98, file: !53, line: 63, column: 43)
!106 = distinct !DISubprogram(name: “windowsCallMainAndExit”, scope: !53, file: !53, line: 37, type: !81, isLocal: true, isDefinition: true, scopeLine: 37, isOptimized: false, unit: !1, variables: !83)
!107 = !DILocation(line: 38, column: 28, scope: !108)
!108 = distinct !DILexicalBlock(scope: !106, file: !53, line: 37, column: 41)
!109 = !DILocation(line: 39, column: 14, scope: !108)
!110 = !DILocation(line: 39, column: 17, scope: !108)
!111 = !DILocation(line: 39, column: 46, scope: !108)
!112 = !DILocation(line: 40, column: 31, scope: !108)
!113 = distinct !DISubprogram(name: “main”, scope: !61, file: !61, line: 14, type: !114, isLocal: true, isDefinition: true, scopeLine: 14, isOptimized: false, unit: !1, variables: !117)
!114 = !DISubroutineType(types: !115)
!115 = !{!116}
!116 = !DIBasicType(name: “u16”, size: 16, encoding: DW_ATE_unsigned)
!117 = !{!118, !127}
!118 = !DILocalVariable(name: “value”, scope: !119, file: !61, line: 16, type: !120)
!119 = distinct !DILexicalBlock(scope: !113, file: !61, line: 14, column: 24)
!120 = !DICompositeType(tag: DW_TAG_structure_type, name: “Foo96Bits”, scope: !61, file: !61, line: 7, size: 128, align: 128, elements: !121)
!121 = !{!122, !124, !125, !126}
!122 = !DIDerivedType(tag: DW_TAG_member, name: “a”, scope: !120, file: !61, line: 8, baseType: !123, size: 24, align: 1)
!123 = !DIBasicType(name: “u24”, size: 24, encoding: DW_ATE_unsigned)
!124 = !DIDerivedType(tag: DW_TAG_member, name: “b”, scope: !120, file: !61, line: 9, baseType: !123, size: 24, align: 1, offset: 32)
!125 = !DIDerivedType(tag: DW_TAG_member, name: “c”, scope: !120, file: !61, line: 10, baseType: !123, size: 24, align: 1, offset: 64)
!126 = !DIDerivedType(tag: DW_TAG_member, name: “d”, scope: !120, file: !61, line: 11, baseType: !123, size: 24, align: 1, offset: 96)
!127 = !DILocalVariable(name: “b”, scope: !128, file: !61, line: 22, type: !123)
!128 = distinct !DILexicalBlock(scope: !119, file: !61, line: 16, column: 5)
!129 = !DILocation(line: 16, column: 5, scope: !119)
!130 = !DILocation(line: 22, column: 20, scope: !128)
!131 = !DILocation(line: 22, column: 5, scope: !128)
!132 = !DILocation(line: 14, column: 24, scope: !113)

Forgot to mention. This same IR works fine on Linux and MacOS on the same CPU architecture.

The documentation is correct; LLVM should always find some way to lower an unaligned load to a correct sequence. So probably an LLVM bug, unless you've done something weird like call the function with a misaligned stack.

-Eli