Hi all,
For the following snippet (Compiler Explorer):
#include <iostream>
template <typename T>
void bar(T arg);
void foo0(char *ptr) {
while (true) {
const auto mask = (uintptr_t)ptr & 0xf;
bar(mask);
ptr += 16;
}
}
void foo1(int64_t ptr) {
while (true) {
const auto mask = ptr & 0xf;
bar(mask);
ptr += 16;
}
}
In both cases, the computation of mask
can be moved outside the loop, as ptr
is incremented 0x10
, which is greater that the value of the mask 0xf
, therefore the 4 LSB of ptr
remain the same as bar
does not modify these bits. However, in the first case mask
is always computed within the loop. Taking a look at the IR:
define dso_local void @_Z4foo0Pc(ptr noundef %0) local_unnamed_addr #3 !dbg !908 {
call void @llvm.dbg.value(metadata ptr %0, metadata !912, metadata !DIExpression()), !dbg !916
br label %2, !dbg !917
2: ; preds = %1, %2
%3 = phi ptr [ %0, %1 ], [ %6, %2 ]
call void @llvm.dbg.value(metadata ptr %3, metadata !912, metadata !DIExpression()), !dbg !916
%4 = ptrtoint ptr %3 to i64, !dbg !918
%5 = and i64 %4, 15, !dbg !919
call void @llvm.dbg.value(metadata i64 %5, metadata !913, metadata !DIExpression()), !dbg !920
tail call void @_Z3barImEvT_(i64 noundef %5), !dbg !921
%6 = getelementptr inbounds i8, ptr %3, i64 16, !dbg !922
call void @llvm.dbg.value(metadata ptr %6, metadata !912, metadata !DIExpression()), !dbg !916
br label %2, !dbg !917, !llvm.loop !923
}
define dso_local void @_Z4foo1l(i64 noundef %0) local_unnamed_addr #3 !dbg !932 {
call void @llvm.dbg.value(metadata i64 %0, metadata !936, metadata !DIExpression()), !dbg !940
%2 = and i64 %0, 15, !dbg !941
br label %3, !dbg !941
3: ; preds = %1, %3
call void @llvm.dbg.value(metadata i64 poison, metadata !936, metadata !DIExpression()), !dbg !940
call void @llvm.dbg.value(metadata i64 %2, metadata !937, metadata !DIExpression()), !dbg !942
tail call void @_Z3barIlEvT_(i64 noundef %2), !dbg !943
call void @llvm.dbg.value(metadata i64 poison, metadata !936, metadata !DIExpression(DW_OP_plus_uconst, 16, DW_OP_stack_value)), !dbg !940
br label %3, !dbg !941, !llvm.loop !944
}
For foo0
, even though the cast %4 = ptrtoint ptr %3 to i64, !dbg !918
is necessary, couldn’t LLVM infer that this is, still, loop invariant? What other things I am missing here? I thought the opaque pointers representation here could help.
Many thanks!
PS: I have asked this in Loop invariant expression using pointers not being optimized · Issue #59633 · llvm/llvm-project · GitHub as well