Hi,
The function reciprocal below fails to optimize as expected. In particular is still includes the check if num is 1 or -1 and then a call to the reduce_in_place method, when that should be optimized out. GCC optimization this code correctly. Godbolt link: Compiler Explorer
#include <cstdlib>
#include <limits>
class Frac {
protected:
int num, den;
public:
Frac(int n, int d) {
if (d < 0) {
if (d == 0) abort();
if (n == std::numeric_limits<int>::lowest()) abort();
num = -n;
if (d == std::numeric_limits<int>::lowest()) abort();
den = -d;
} else {
num = n;
den = d;
}
}
};
class Reduced : public Frac {
void reduce_in_place();
public:
Reduced(int n, int d) : Frac(n,d) {
if (den == 1 || num == 1 || num == -1) return;
reduce_in_place();
}
};
Reduced int_value(int x) {
return Reduced(x,1);
}
Reduced reciprocal(int x) {
return Reduced(1,x);
}
LLVM IR: (from godbolt)
define dso_local i64 @reciprocal(int)(i32 noundef %x) local_unnamed_addr {
entry:
%retval = alloca %class.Reduced, align 8
%cmp.i.i = icmp slt i32 %x, 0
br i1 %cmp.i.i, label %if.end.i.i, label %if.else.i.i
if.end.i.i:
store i32 -1, ptr %retval, align 8
%cmp8.i.i = icmp eq i32 %x, -2147483648
br i1 %cmp8.i.i, label %if.then9.i.i, label %if.end10.i.i
if.then9.i.i:
tail call void @abort() #4
unreachable
if.end10.i.i:
%sub11.i.i = sub nsw i32 0, %x
br label %_ZN4FracC2Eii.exit.i
if.else.i.i:
store i32 1, ptr %retval, align 8
br label %_ZN4FracC2Eii.exit.i
_ZN4FracC2Eii.exit.i:
%0 = phi i32 [ 1, %if.else.i.i ], [ -1, %if.end10.i.i ]
%d.sink.i.i = phi i32 [ %x, %if.else.i.i ], [ %sub11.i.i, %if.end10.i.i ]
%den13.i.i = getelementptr inbounds nuw i8, ptr %retval, i64 4
store i32 %d.sink.i.i, ptr %den13.i.i, align 4
%cmp.i = icmp eq i32 %d.sink.i.i, 1
br i1 %cmp.i, label %_ZN7ReducedC2Eii.exit, label %lor.lhs.false.i
lor.lhs.false.i:
switch i32 %0, label %if.end.i [
i32 1, label %_ZN7ReducedC2Eii.exit
i32 -1, label %_ZN7ReducedC2Eii.exit
]
if.end.i:
call void @Reduced::reduce_in_place()(ptr noundef nonnull align 4 dereferenceable(8) %retval)
br label %_ZN7ReducedC2Eii.exit
_ZN7ReducedC2Eii.exit:
%1 = load i64, ptr %retval, align 8
ret i64 %1
}
I am only vaguely familiar with clangs IR, but it seams obvious from %0 = phi i32 [ 1, %if.else.i.i ], [ -1, %if.end10.i.i ] that %0 can only be 1 or -1, and yet the check is still included. If I remove the overflow checks, reciprocal optimizes as expected. Also if I instead test if n (the parameter to the constructor) is 1 or -1 instead of num the function optimized as expected.
Is this a known issue?
Thanks,
Kevin