Type based alias analysis
Now, I may have done this correctly, but here is the generated llvm showing tbaa information and that the three stores preceding loads are still there:
; ModuleID = 'testtbaa!'
source_filename = "testtbaa!"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-generic-linux"
%jl_value_t = type opaque
define double @"julia_testtbaa!_17792"(%jl_value_t addrspace(10)* nonnull align 8 dereferenceable(256), %jl_value_t addrspace(10)* nonnull align 8 dereferenceable(256), %jl_value_t addrspace(10)* nonnull align 8 dereferenceable(256)) !dbg !5 {
top:
%3 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*, !dbg !7
%4 = addrspacecast %jl_value_t addrspace(11)* %3 to %jl_value_t*
%5 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)*, !dbg !16
%6 = addrspacecast %jl_value_t addrspace(11)* %5 to %jl_value_t*
%7 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !20
%8 = addrspacecast %jl_value_t addrspace(11)* %7 to %jl_value_t*
%ptr.i = bitcast %jl_value_t* %8 to i8*, !dbg !24
call void @llvm.lifetime.start.p0i8(i64 256, i8* %ptr.i), !dbg !24
%9 = bitcast %jl_value_t* %4 to i8*, !dbg !30
%10 = bitcast %jl_value_t* %6 to i8*, !dbg !35
%ptr.i18 = bitcast %jl_value_t* %4 to <8 x double>*, !dbg !38
%res.i19 = load <8 x double>, <8 x double>* %ptr.i18, align 8, !dbg !38, !tbaa !40
%ptr.i16 = bitcast %jl_value_t* %6 to <8 x double>*, !dbg !44
%res.i17 = load <8 x double>, <8 x double>* %ptr.i16, align 8, !dbg !44, !tbaa !40
%res.i15 = fmul fast <8 x double> %res.i17, %res.i19, !dbg !46
%ptr.i14 = bitcast %jl_value_t* %8 to <8 x double>*, !dbg !59
store <8 x double> %res.i15, <8 x double>* %ptr.i14, align 8, !dbg !59, !tbaa !63
%11 = getelementptr i8, i8* %9, i64 64, !dbg !65
%ptr.i18.1 = bitcast i8* %11 to <8 x double>*, !dbg !38
%res.i19.1 = load <8 x double>, <8 x double>* %ptr.i18.1, align 8, !dbg !38, !tbaa !40
%12 = getelementptr i8, i8* %10, i64 64, !dbg !66
%ptr.i16.1 = bitcast i8* %12 to <8 x double>*, !dbg !44
%res.i17.1 = load <8 x double>, <8 x double>* %ptr.i16.1, align 8, !dbg !44, !tbaa !40
%res.i15.1 = fmul fast <8 x double> %res.i17.1, %res.i19.1, !dbg !46
%13 = getelementptr i8, i8* %ptr.i, i64 64, !dbg !67
%ptr.i14.1 = bitcast i8* %13 to <8 x double>*, !dbg !59
store <8 x double> %res.i15.1, <8 x double>* %ptr.i14.1, align 8, !dbg !59, !tbaa !63
%14 = getelementptr i8, i8* %9, i64 128, !dbg !65
%ptr.i18.2 = bitcast i8* %14 to <8 x double>*, !dbg !38
%res.i19.2 = load <8 x double>, <8 x double>* %ptr.i18.2, align 8, !dbg !38, !tbaa !40
%15 = getelementptr i8, i8* %10, i64 128, !dbg !66
%ptr.i16.2 = bitcast i8* %15 to <8 x double>*, !dbg !44
%res.i17.2 = load <8 x double>, <8 x double>* %ptr.i16.2, align 8, !dbg !44, !tbaa !40
%res.i15.2 = fmul fast <8 x double> %res.i17.2, %res.i19.2, !dbg !46
%16 = getelementptr i8, i8* %ptr.i, i64 128, !dbg !67
%ptr.i14.2 = bitcast i8* %16 to <8 x double>*, !dbg !59
store <8 x double> %res.i15.2, <8 x double>* %ptr.i14.2, align 8, !dbg !59, !tbaa !63
%17 = getelementptr i8, i8* %9, i64 192, !dbg !65
%ptr.i18.3 = bitcast i8* %17 to <8 x double>*, !dbg !38
%res.i19.3 = load <8 x double>, <8 x double>* %ptr.i18.3, align 8, !dbg !38, !tbaa !40
%18 = getelementptr i8, i8* %10, i64 192, !dbg !66
%ptr.i16.3 = bitcast i8* %18 to <8 x double>*, !dbg !44
%res.i17.3 = load <8 x double>, <8 x double>* %ptr.i16.3, align 8, !dbg !44, !tbaa !40
%res.i15.3 = fmul fast <8 x double> %res.i17.3, %res.i19.3, !dbg !46
%res.i11.1 = fadd fast <8 x double> %res.i15.1, %res.i15, !dbg !68
%res.i11.2 = fadd fast <8 x double> %res.i15.2, %res.i11.1, !dbg !68
%res.i11.3 = fadd fast <8 x double> %res.i15.3, %res.i11.2, !dbg !68
call void @llvm.lifetime.end.p0i8(i64 256, i8* %ptr.i), !dbg !75
%res.i = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.000000e+00, <8 x double> %res.i11.3), !dbg !79
ret double %res.i, !dbg !82
}
define nonnull %jl_value_t addrspace(10)* @"jfptr_testtbaa!_17793"(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) #0 {
top:
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"()
%ptls_i8 = getelementptr i8, i8* %thread_ptr, i64 -15712
%ptls = bitcast i8* %ptls_i8 to %jl_value_t***
%3 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, align 8, !nonnull !4, !dereferenceable !83, !align !84
%4 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i64 1
%5 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %4, align 8, !nonnull !4, !dereferenceable !83, !align !84
%6 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i64 2
%7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %6, align 8, !nonnull !4, !dereferenceable !83, !align !84
%8 = call double @"julia_testtbaa!_17792"(%jl_value_t addrspace(10)* %3, %jl_value_t addrspace(10)* %5, %jl_value_t addrspace(10)* %7)
%9 = bitcast %jl_value_t*** %ptls to i8*
%10 = call noalias nonnull %jl_value_t addrspace(10)* @jl_gc_pool_alloc(i8* %9, i32 1400, i32 16) #1
%11 = bitcast %jl_value_t addrspace(10)* %10 to %jl_value_t addrspace(10)* addrspace(10)*
%12 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(10)* %11, i64 -1
store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 140017571623568 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspace(10)* %12, !tbaa !85
%13 = bitcast %jl_value_t addrspace(10)* %10 to double addrspace(10)*
store double %8, double addrspace(10)* %13, align 8, !tbaa !88
ret %jl_value_t addrspace(10)* %10
}
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8*, i64, %jl_value_t addrspace(10)*) #1
; Function Attrs: nounwind readnone
declare %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)*) #2
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
; Function Attrs: nounwind readnone
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double, <8 x double>) #2
; Function Attrs: inaccessiblemem_or_argmemonly
declare void @jl_gc_queue_root(%jl_value_t addrspace(10)*) #4
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #1
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @jl_gc_big_alloc(i8*, i64) #1
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #1
attributes #0 = { "thunk" }
attributes #1 = { allocsize(1) }
attributes #2 = { nounwind readnone }
attributes #3 = { argmemonly nounwind }
attributes #4 = { inaccessiblemem_or_argmemonly }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 1, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, nameTableKind: GNU)
!3 = !DIFile(filename: "REPL[14]", directory: ".")
!4 = !{}
!5 = distinct !DISubprogram(name: "testtbaa!", linkageName: "julia_testtbaa!_17893", scope: null, file: !3, line: 6, type: !6, scopeLine: 6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!6 = !DISubroutineType(types: !4)
!7 = !DILocation(line: 147, scope: !8, inlinedAt: !10)
!8 = distinct !DISubprogram(name: "pointer_from_objref;", linkageName: "pointer_from_objref", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!9 = !DIFile(filename: "pointer.jl", directory: ".")
!10 = !DILocation(line: 341, scope: !11, inlinedAt: !13)
!11 = distinct !DISubprogram(name: "pointer;", linkageName: "pointer", scope: !12, file: !12, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!12 = !DIFile(filename: "/home/chriselrod/.julia/packages/PaddedMatrices/PU1f3/src/mutable_fs_padded_array.jl", directory: ".")
!13 = !DILocation(line: 346, scope: !14, inlinedAt: !15)
!14 = distinct !DISubprogram(name: "vectorizable;", linkageName: "vectorizable", scope: !12, file: !12, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!15 = !DILocation(line: 6, scope: !5)
!16 = !DILocation(line: 147, scope: !8, inlinedAt: !17)
!17 = !DILocation(line: 341, scope: !11, inlinedAt: !18)
!18 = !DILocation(line: 346, scope: !14, inlinedAt: !19)
!19 = !DILocation(line: 7, scope: !5)
!20 = !DILocation(line: 147, scope: !8, inlinedAt: !21)
!21 = !DILocation(line: 341, scope: !11, inlinedAt: !22)
!22 = !DILocation(line: 346, scope: !14, inlinedAt: !23)
!23 = !DILocation(line: 8, scope: !5)
!24 = !DILocation(line: 752, scope: !25, inlinedAt: !27)
!25 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!26 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/memory.jl", directory: ".")
!27 = !DILocation(line: 743, scope: !28, inlinedAt: !29)
!28 = distinct !DISubprogram(name: "lifetime_start!;", linkageName: "lifetime_start!", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!29 = !DILocation(line: 9, scope: !5)
!30 = !DILocation(line: 0, scope: !31, inlinedAt: !32)
!31 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!32 = !DILocation(line: 514, scope: !33, inlinedAt: !34)
!33 = distinct !DISubprogram(name: "vloadconstant;", linkageName: "vloadconstant", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!34 = !DILocation(line: 14, scope: !5)
!35 = !DILocation(line: 0, scope: !31, inlinedAt: !36)
!36 = !DILocation(line: 514, scope: !33, inlinedAt: !37)
!37 = !DILocation(line: 15, scope: !5)
!38 = !DILocation(line: 502, scope: !25, inlinedAt: !39)
!39 = !DILocation(line: 472, scope: !33, inlinedAt: !32)
!40 = !{!41, !41, i64 0}
!41 = !{!"jtbaa_const", !42, i64 0}
!42 = !{!"jtbaa", !43, i64 0}
!43 = !{!"jtbaa"}
!44 = !DILocation(line: 502, scope: !25, inlinedAt: !45)
!45 = !DILocation(line: 472, scope: !33, inlinedAt: !36)
!46 = !DILocation(line: 90, scope: !47, inlinedAt: !49)
!47 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !48, file: !48, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!48 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/llvmwrap.jl", directory: ".")
!49 = !DILocation(line: 67, scope: !50, inlinedAt: !51)
!50 = distinct !DISubprogram(name: "llvmwrap;", linkageName: "llvmwrap", scope: !48, file: !48, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!51 = !DILocation(line: 67, scope: !50, inlinedAt: !52)
!52 = !DILocation(line: 58, scope: !53, inlinedAt: !55)
!53 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !54, file: !54, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!54 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/floating_point_arithmetic.jl", directory: ".")
!55 = !DILocation(line: 36, scope: !56, inlinedAt: !58)
!56 = distinct !DISubprogram(name: "vmul;", linkageName: "vmul", scope: !57, file: !57, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!57 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/SIMDPirates.jl", directory: ".")
!58 = !DILocation(line: 16, scope: !5)
!59 = !DILocation(line: 257, scope: !25, inlinedAt: !60)
!60 = !DILocation(line: 226, scope: !61, inlinedAt: !62)
!61 = distinct !DISubprogram(name: "vstore!;", linkageName: "vstore!", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!62 = !DILocation(line: 371, scope: !61, inlinedAt: !58)
!63 = !{!64, !64, i64 0}
!64 = !{!"tbaa_mutab", !42, i64 0}
!65 = !DILocation(line: 159, scope: !31, inlinedAt: !32)
!66 = !DILocation(line: 159, scope: !31, inlinedAt: !36)
!67 = !DILocation(line: 159, scope: !31, inlinedAt: !62)
!68 = !DILocation(line: 90, scope: !47, inlinedAt: !69)
!69 = !DILocation(line: 67, scope: !50, inlinedAt: !70)
!70 = !DILocation(line: 67, scope: !50, inlinedAt: !71)
!71 = !DILocation(line: 58, scope: !53, inlinedAt: !72)
!72 = !DILocation(line: 36, scope: !73, inlinedAt: !74)
!73 = distinct !DISubprogram(name: "vadd;", linkageName: "vadd", scope: !57, file: !57, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!74 = !DILocation(line: 22, scope: !5)
!75 = !DILocation(line: 768, scope: !25, inlinedAt: !76)
!76 = !DILocation(line: 759, scope: !77, inlinedAt: !78)
!77 = distinct !DISubprogram(name: "lifetime_end!;", linkageName: "lifetime_end!", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!78 = !DILocation(line: 25, scope: !5)
!79 = !DILocation(line: 395, scope: !53, inlinedAt: !80)
!80 = !DILocation(line: 384, scope: !81, inlinedAt: !82)
!81 = distinct !DISubprogram(name: "vsum;", linkageName: "vsum", scope: !54, file: !54, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!82 = !DILocation(line: 26, scope: !5)
!83 = !{i64 256}
!84 = !{i64 8}
!85 = !{!86, !86, i64 0}
!86 = !{!"jtbaa_tag", !87, i64 0}
!87 = !{!"jtbaa_data", !42, i64 0}
!88 = !{!89, !89, i64 0}
!89 = !{!"jtbaa_immut", !90, i64 0}
!90 = !{!"jtbaa_value", !87, i64 0}
The loads (from b and c) are from !tbaa !40
, while the stores are into !tbaa !63
. The module also shows the following hierarchy:
!40 = !{!41, !41, i64 0}
!41 = !{!"jtbaa_const", !42, i64 0}
!42 = !{!"jtbaa", !43, i64 0}
!43 = !{!"jtbaa"}
!63 = !{!64, !64, i64 0}
!64 = !{!"tbaa_mutab", !42, i64 0}
so that jtbaa_const
and jtbaa_mutab
are both subtypes of the root jtbaa
, with !40
and !63
being the respective tags.
Did I specify this incorrectly?
If I did actually do this correctly, then why didn’t LLVM deduce that the loads and stores don’t alias?
Alias scopes
LLVM module:
; ModuleID = 'testscope!'
source_filename = "testscope!"
target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128-ni:10:11:12:13"
target triple = "x86_64-generic-linux"
%jl_value_t = type opaque
define double @"julia_testscope!_17730"(%jl_value_t addrspace(10)* nonnull align 8 dereferenceable(256), %jl_value_t addrspace(10)* nonnull align 8 dereferenceable(256), %jl_value_t addrspace(10)* nonnull align 8 dereferenceable(256)) !dbg !5 {
top:
%3 = addrspacecast %jl_value_t addrspace(10)* %1 to %jl_value_t addrspace(11)*, !dbg !7
%4 = addrspacecast %jl_value_t addrspace(11)* %3 to %jl_value_t*
%5 = addrspacecast %jl_value_t addrspace(10)* %2 to %jl_value_t addrspace(11)*, !dbg !16
%6 = addrspacecast %jl_value_t addrspace(11)* %5 to %jl_value_t*
%7 = addrspacecast %jl_value_t addrspace(10)* %0 to %jl_value_t addrspace(11)*, !dbg !20
%8 = addrspacecast %jl_value_t addrspace(11)* %7 to %jl_value_t*
%ptr.i = bitcast %jl_value_t* %8 to i8*, !dbg !24
call void @llvm.lifetime.start.p0i8(i64 256, i8* %ptr.i), !dbg !24
%9 = bitcast %jl_value_t* %4 to i8*, !dbg !30
%10 = bitcast %jl_value_t* %6 to i8*, !dbg !36
%ptr.i18 = bitcast %jl_value_t* %4 to <8 x double>*, !dbg !39
%res.i19 = load <8 x double>, <8 x double>* %ptr.i18, align 8, !dbg !39, !alias.scope !44
%ptr.i16 = bitcast %jl_value_t* %6 to <8 x double>*, !dbg !47
%res.i17 = load <8 x double>, <8 x double>* %ptr.i16, align 8, !dbg !47, !alias.scope !44
%res.i15 = fmul reassoc nnan ninf nsz arcp contract <8 x double> %res.i19, %res.i17, !dbg !49
%ptr.i14 = bitcast %jl_value_t* %8 to <8 x double>*, !dbg !62
store <8 x double> %res.i15, <8 x double>* %ptr.i14, align 8, !dbg !62, !noalias !44
%11 = getelementptr i8, i8* %9, i64 64, !dbg !67
%ptr.i18.1 = bitcast i8* %11 to <8 x double>*, !dbg !39
%res.i19.1 = load <8 x double>, <8 x double>* %ptr.i18.1, align 8, !dbg !39, !alias.scope !44
%12 = getelementptr i8, i8* %10, i64 64, !dbg !68
%ptr.i16.1 = bitcast i8* %12 to <8 x double>*, !dbg !47
%res.i17.1 = load <8 x double>, <8 x double>* %ptr.i16.1, align 8, !dbg !47, !alias.scope !44
%13 = getelementptr i8, i8* %ptr.i, i64 64, !dbg !69
%res.i15.1 = fmul reassoc nnan ninf nsz arcp contract <8 x double> %res.i19.1, %res.i17.1, !dbg !49
%ptr.i14.1 = bitcast i8* %13 to <8 x double>*, !dbg !62
store <8 x double> %res.i15.1, <8 x double>* %ptr.i14.1, align 8, !dbg !62, !noalias !44
%14 = getelementptr i8, i8* %9, i64 128, !dbg !67
%ptr.i18.2 = bitcast i8* %14 to <8 x double>*, !dbg !39
%res.i19.2 = load <8 x double>, <8 x double>* %ptr.i18.2, align 8, !dbg !39, !alias.scope !44
%15 = getelementptr i8, i8* %10, i64 128, !dbg !68
%ptr.i16.2 = bitcast i8* %15 to <8 x double>*, !dbg !47
%res.i17.2 = load <8 x double>, <8 x double>* %ptr.i16.2, align 8, !dbg !47, !alias.scope !44
%16 = getelementptr i8, i8* %ptr.i, i64 128, !dbg !69
%res.i15.2 = fmul reassoc nnan ninf nsz arcp contract <8 x double> %res.i19.2, %res.i17.2, !dbg !49
%ptr.i14.2 = bitcast i8* %16 to <8 x double>*, !dbg !62
store <8 x double> %res.i15.2, <8 x double>* %ptr.i14.2, align 8, !dbg !62, !noalias !44
%17 = getelementptr i8, i8* %9, i64 192, !dbg !67
%ptr.i18.3 = bitcast i8* %17 to <8 x double>*, !dbg !39
%res.i19.3 = load <8 x double>, <8 x double>* %ptr.i18.3, align 8, !dbg !39, !alias.scope !44
%18 = getelementptr i8, i8* %10, i64 192, !dbg !68
%ptr.i16.3 = bitcast i8* %18 to <8 x double>*, !dbg !47
%res.i17.3 = load <8 x double>, <8 x double>* %ptr.i16.3, align 8, !dbg !47, !alias.scope !44
%res.i15.3 = fmul reassoc nnan ninf nsz arcp contract <8 x double> %res.i19.3, %res.i17.3, !dbg !49
%res.i11.1 = fadd reassoc nnan ninf nsz arcp contract <8 x double> %res.i15, %res.i15.1, !dbg !71
%res.i11.2 = fadd reassoc nnan ninf nsz arcp contract <8 x double> %res.i11.1, %res.i15.2, !dbg !71
%res.i11.3 = fadd reassoc nnan ninf nsz arcp contract <8 x double> %res.i11.2, %res.i15.3, !dbg !71
call void @llvm.lifetime.end.p0i8(i64 256, i8* %ptr.i), !dbg !78
%res.i = call fast double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double 0.000000e+00, <8 x double> %res.i11.3), !dbg !82
ret double %res.i, !dbg !85
}
define nonnull %jl_value_t addrspace(10)* @"jfptr_testscope!_17731"(%jl_value_t addrspace(10)*, %jl_value_t addrspace(10)**, i32) #0 {
top:
%thread_ptr = call i8* asm "movq %fs:0, $0", "=r"()
%ptls_i8 = getelementptr i8, i8* %thread_ptr, i64 -15712
%ptls = bitcast i8* %ptls_i8 to %jl_value_t***
%3 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, align 8, !nonnull !4, !dereferenceable !86, !align !87
%4 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i64 1
%5 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %4, align 8, !nonnull !4, !dereferenceable !86, !align !87
%6 = getelementptr inbounds %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %1, i64 2
%7 = load %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)** %6, align 8, !nonnull !4, !dereferenceable !86, !align !87
%8 = call double @"julia_testscope!_17730"(%jl_value_t addrspace(10)* %3, %jl_value_t addrspace(10)* %5, %jl_value_t addrspace(10)* %7)
%9 = bitcast %jl_value_t*** %ptls to i8*
%10 = call noalias nonnull %jl_value_t addrspace(10)* @jl_gc_pool_alloc(i8* %9, i32 1400, i32 16) #1
%11 = bitcast %jl_value_t addrspace(10)* %10 to %jl_value_t addrspace(10)* addrspace(10)*
%12 = getelementptr %jl_value_t addrspace(10)*, %jl_value_t addrspace(10)* addrspace(10)* %11, i64 -1
store %jl_value_t addrspace(10)* addrspacecast (%jl_value_t* inttoptr (i64 139735451586816 to %jl_value_t*) to %jl_value_t addrspace(10)*), %jl_value_t addrspace(10)* addrspace(10)* %12, !tbaa !88
%13 = bitcast %jl_value_t addrspace(10)* %10 to double addrspace(10)*
store double %8, double addrspace(10)* %13, align 8, !tbaa !93
ret %jl_value_t addrspace(10)* %10
}
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @julia.gc_alloc_obj(i8*, i64, %jl_value_t addrspace(10)*) #1
; Function Attrs: nounwind readnone
declare %jl_value_t* @julia.pointer_from_objref(%jl_value_t addrspace(11)*) #2
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.start.p0i8(i64 immarg, i8* nocapture) #3
; Function Attrs: argmemonly nounwind
declare void @llvm.lifetime.end.p0i8(i64 immarg, i8* nocapture) #3
; Function Attrs: nounwind readnone
declare double @llvm.experimental.vector.reduce.v2.fadd.f64.v8f64(double, <8 x double>) #2
; Function Attrs: inaccessiblemem_or_argmemonly
declare void @jl_gc_queue_root(%jl_value_t addrspace(10)*) #4
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @jl_gc_pool_alloc(i8*, i32, i32) #1
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @jl_gc_big_alloc(i8*, i64) #1
; Function Attrs: allocsize(1)
declare noalias nonnull %jl_value_t addrspace(10)* @julia.gc_alloc_bytes(i8*, i64) #1
attributes #0 = { "thunk" }
attributes #1 = { allocsize(1) }
attributes #2 = { nounwind readnone }
attributes #3 = { argmemonly nounwind }
attributes #4 = { inaccessiblemem_or_argmemonly }
!llvm.module.flags = !{!0, !1}
!llvm.dbg.cu = !{!2}
!0 = !{i32 2, !"Dwarf Version", i32 4}
!1 = !{i32 1, !"Debug Info Version", i32 3}
!2 = distinct !DICompileUnit(language: DW_LANG_Julia, file: !3, producer: "julia", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, nameTableKind: GNU)
!3 = !DIFile(filename: "REPL[7]", directory: ".")
!4 = !{}
!5 = distinct !DISubprogram(name: "testscope!", linkageName: "julia_testscope!_17776", scope: null, file: !3, line: 6, type: !6, scopeLine: 6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!6 = !DISubroutineType(types: !4)
!7 = !DILocation(line: 147, scope: !8, inlinedAt: !10)
!8 = distinct !DISubprogram(name: "pointer_from_objref;", linkageName: "pointer_from_objref", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!9 = !DIFile(filename: "pointer.jl", directory: ".")
!10 = !DILocation(line: 341, scope: !11, inlinedAt: !13)
!11 = distinct !DISubprogram(name: "pointer;", linkageName: "pointer", scope: !12, file: !12, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!12 = !DIFile(filename: "/home/chriselrod/.julia/dev/PaddedMatrices/src/mutable_fs_padded_array.jl", directory: ".")
!13 = !DILocation(line: 346, scope: !14, inlinedAt: !15)
!14 = distinct !DISubprogram(name: "vectorizable;", linkageName: "vectorizable", scope: !12, file: !12, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!15 = !DILocation(line: 6, scope: !5)
!16 = !DILocation(line: 147, scope: !8, inlinedAt: !17)
!17 = !DILocation(line: 341, scope: !11, inlinedAt: !18)
!18 = !DILocation(line: 346, scope: !14, inlinedAt: !19)
!19 = !DILocation(line: 7, scope: !5)
!20 = !DILocation(line: 147, scope: !8, inlinedAt: !21)
!21 = !DILocation(line: 341, scope: !11, inlinedAt: !22)
!22 = !DILocation(line: 346, scope: !14, inlinedAt: !23)
!23 = !DILocation(line: 8, scope: !5)
!24 = !DILocation(line: 714, scope: !25, inlinedAt: !27)
!25 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!26 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/memory.jl", directory: ".")
!27 = !DILocation(line: 705, scope: !28, inlinedAt: !29)
!28 = distinct !DISubprogram(name: "lifetime_start!;", linkageName: "lifetime_start!", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!29 = !DILocation(line: 9, scope: !5)
!30 = !DILocation(line: 0, scope: !31, inlinedAt: !32)
!31 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !9, file: !9, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!32 = !DILocation(line: 229, scope: !33, inlinedAt: !35)
!33 = distinct !DISubprogram(name: "+;", linkageName: "+", scope: !34, file: !34, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!34 = !DIFile(filename: "/home/chriselrod/.julia/dev/VectorizationBase/src/vectorizable.jl", directory: ".")
!35 = !DILocation(line: 14, scope: !5)
!36 = !DILocation(line: 0, scope: !31, inlinedAt: !37)
!37 = !DILocation(line: 229, scope: !33, inlinedAt: !38)
!38 = !DILocation(line: 15, scope: !5)
!39 = !DILocation(line: 32, scope: !40, inlinedAt: !42)
!40 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !41, file: !41, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!41 = !DIFile(filename: "REPL[5]", directory: ".")
!42 = !DILocation(line: 5, scope: !43, inlinedAt: !35)
!43 = distinct !DISubprogram(name: "vloadscope;", linkageName: "vloadscope", scope: !41, file: !41, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!44 = !{!45}
!45 = !{!"waterbuffalo", !46}
!46 = !{!"salmon"}
!47 = !DILocation(line: 32, scope: !40, inlinedAt: !48)
!48 = !DILocation(line: 5, scope: !43, inlinedAt: !38)
!49 = !DILocation(line: 90, scope: !50, inlinedAt: !52)
!50 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !51, file: !51, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!51 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/llvmwrap.jl", directory: ".")
!52 = !DILocation(line: 67, scope: !53, inlinedAt: !54)
!53 = distinct !DISubprogram(name: "llvmwrap;", linkageName: "llvmwrap", scope: !51, file: !51, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!54 = !DILocation(line: 67, scope: !53, inlinedAt: !55)
!55 = !DILocation(line: 59, scope: !56, inlinedAt: !58)
!56 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !57, file: !57, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!57 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/floating_point_arithmetic.jl", directory: ".")
!58 = !DILocation(line: 46, scope: !59, inlinedAt: !61)
!59 = distinct !DISubprogram(name: "vmul;", linkageName: "vmul", scope: !60, file: !60, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!60 = !DIFile(filename: "/home/chriselrod/.julia/dev/SIMDPirates/src/SIMDPirates.jl", directory: ".")
!61 = !DILocation(line: 16, scope: !5)
!62 = !DILocation(line: 33, scope: !63, inlinedAt: !65)
!63 = distinct !DISubprogram(name: "macro expansion;", linkageName: "macro expansion", scope: !64, file: !64, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!64 = !DIFile(filename: "REPL[6]", directory: ".")
!65 = !DILocation(line: 6, scope: !66, inlinedAt: !61)
!66 = distinct !DISubprogram(name: "vstorescope!;", linkageName: "vstorescope!", scope: !64, file: !64, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!67 = !DILocation(line: 159, scope: !31, inlinedAt: !32)
!68 = !DILocation(line: 159, scope: !31, inlinedAt: !37)
!69 = !DILocation(line: 159, scope: !31, inlinedAt: !70)
!70 = !DILocation(line: 229, scope: !33, inlinedAt: !61)
!71 = !DILocation(line: 90, scope: !50, inlinedAt: !72)
!72 = !DILocation(line: 67, scope: !53, inlinedAt: !73)
!73 = !DILocation(line: 67, scope: !53, inlinedAt: !74)
!74 = !DILocation(line: 59, scope: !56, inlinedAt: !75)
!75 = !DILocation(line: 46, scope: !76, inlinedAt: !77)
!76 = distinct !DISubprogram(name: "vadd;", linkageName: "vadd", scope: !60, file: !60, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!77 = !DILocation(line: 23, scope: !5)
!78 = !DILocation(line: 730, scope: !25, inlinedAt: !79)
!79 = !DILocation(line: 721, scope: !80, inlinedAt: !81)
!80 = distinct !DISubprogram(name: "lifetime_end!;", linkageName: "lifetime_end!", scope: !26, file: !26, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!81 = !DILocation(line: 26, scope: !5)
!82 = !DILocation(line: 396, scope: !56, inlinedAt: !83)
!83 = !DILocation(line: 385, scope: !84, inlinedAt: !85)
!84 = distinct !DISubprogram(name: "vsum;", linkageName: "vsum", scope: !57, file: !57, type: !6, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !4)
!85 = !DILocation(line: 27, scope: !5)
!86 = !{i64 256}
!87 = !{i64 8}
!88 = !{!89, !89, i64 0}
!89 = !{!"jtbaa_tag", !90, i64 0}
!90 = !{!"jtbaa_data", !91, i64 0}
!91 = !{!"jtbaa", !92, i64 0}
!92 = !{!"jtbaa"}
!93 = !{!94, !94, i64 0}
!94 = !{!"jtbaa_immut", !95, i64 0}
!95 = !{!"jtbaa_value", !90, i64 0}
The reference manual said
; These two instructions don't alias:
%0 = load float, float* %c, align 4, !alias.scope !5
store float %0, float* %arrayidx.i, align 4, !noalias !5
Yet, the optimized LLVM above still shows
%res.i19 = load <8 x double>, <8 x double>* %ptr.i18, align 8, !dbg !39, !alias.scope !44
%ptr.i16 = bitcast %jl_value_t* %6 to <8 x double>*, !dbg !47
%res.i17 = load <8 x double>, <8 x double>* %ptr.i16, align 8, !dbg !47, !alias.scope !44
%res.i15 = fmul reassoc nnan ninf nsz arcp contract <8 x double> %res.i19, %res.i17, !dbg !49
%ptr.i14 = bitcast %jl_value_t* %8 to <8 x double>*, !dbg !62
store <8 x double> %res.i15, <8 x double>* %ptr.i14, align 8, !dbg !62, !noalias !44
With the scope definitions:
!44 = !{!45}
!45 = !{!"waterbuffalo", !46}
!46 = !{!"salmon"}
I would have again thought this is enough, but it doesn’t seem like noalias information was actually used. Did I do something wrong (again)?
Finally, something I tried that did work is declaring a noinline llvm function that returns its argument with the noalias
attribute. Inlining the llvm function caused the noalias information to be lost.
define noalias $typ* @noalias($typ *%a) noinline { ret $typ* %a }
where $typ
is the interpolated type of the pointer.
For this particular test problem, the function call overhead was actually higher than the benefit of elliminating three stores, meaning this is far from a general solution.
For what it’s worth, I’m on LLVM 9.0, and started Julia with -O3
to use the following alias analysis passes:
PM->add(createScopedNoAliasAAWrapperPass());
PM->add(createTypeBasedAAWrapperPass());
if (opt_level >= 3) {
PM->add(createBasicAAWrapperPass());
}