`fp128` math functions: strange results

We are experimenting with addinge f128 support to the Rust frontend, but some weird results are popping up:

10.0: 0x40024000000000000000000000000000
 1.0: 0x3fff0000000000000000000000000000
 3.4: 0x4000b333333333333333333333333333
 4.5: 0x40012000000000000000000000000000
-inf: 0xffff0000000000000000000000000000
log10(10) = 0      0x0000000000000000fff0000000000000
3.4^4.5 = 3.4      0x4000b333333333333333333333333333
sqrt(-inf) = -inf  0xffff0000000000000000000000000000

The results should be more like 1.0, 245.4, and NaN.

The IR (below) looks like it should be ok, so I am a bit confused as to what is going on. Is this a form of unsupported math showing up from this table Math Functions — The LLVM C Library? Or a weird x87 extended precision artifact? Or am I just overlooking something in codegen.

(in the above example log10(10) = 0 isn’t actually 0 but I’m using double’s printing).

; ModuleID = 'test_f128.7c43ebc7dc3975e3-cgu.0'
source_filename = "test_f128.7c43ebc7dc3975e3-cgu.0"
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
target triple = "x86_64-unknown-linux-gnu"

; Function Attrs: mustprogress nofree nosync nounwind nonlazybind willreturn memory(none) uwtable
define noundef fp128 @log10_f128(fp128 noundef %a) unnamed_addr #0 {
start:
  %0 = tail call fp128 @llvm.log10.f128(fp128 %a)
  ret fp128 %0
}

; Function Attrs: mustprogress nofree nosync nounwind nonlazybind willreturn memory(none) uwtable
define noundef fp128 @powf_f128(fp128 noundef %a, fp128 noundef %b) unnamed_addr #0 {
start:
  %0 = tail call fp128 @llvm.pow.f128(fp128 %a, fp128 %b)
  ret fp128 %0
}

; Function Attrs: mustprogress nofree nosync nounwind nonlazybind willreturn memory(none) uwtable
define noundef fp128 @sqrt_f128(fp128 noundef %a) unnamed_addr #0 {
start:
  %0 = tail call fp128 @llvm.sqrt.f128(fp128 %a)
  ret fp128 %0
}

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare fp128 @llvm.log10.f128(fp128) #1

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare fp128 @llvm.pow.f128(fp128, fp128) #1

; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare fp128 @llvm.sqrt.f128(fp128) #1

attributes #0 = { mustprogress nofree nosync nounwind nonlazybind willreturn memory(none) uwtable "probe-stack"="inline-asm" "target-cpu"="x86-64" }
attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) }

!llvm.module.flags = !{!0, !1}
!llvm.ident = !{!2}

!0 = !{i32 8, !"PIC Level", i32 2}
!1 = !{i32 2, !"RtLibUseGOT", i32 1}
!2 = !{!"rustc version 1.73.0-dev"}

For further reference:

The generated assembly
	.text
	.file	"test_f128.7c43ebc7dc3975e3-cgu.0"
	.section	.text.log10_f128,"ax",@progbits
	.globl	log10_f128
	.p2align	4, 0x90
	.type	log10_f128,@function
log10_f128:
	.cfi_startproc
	jmpq	*log10l@GOTPCREL(%rip)
.Lfunc_end0:
	.size	log10_f128, .Lfunc_end0-log10_f128
	.cfi_endproc

	.section	.text.powf_f128,"ax",@progbits
	.globl	powf_f128
	.p2align	4, 0x90
	.type	powf_f128,@function
powf_f128:
	.cfi_startproc
	jmpq	*powl@GOTPCREL(%rip)
.Lfunc_end1:
	.size	powf_f128, .Lfunc_end1-powf_f128
	.cfi_endproc

	.section	.text.sqrt_f128,"ax",@progbits
	.globl	sqrt_f128
	.p2align	4, 0x90
	.type	sqrt_f128,@function
sqrt_f128:
	.cfi_startproc
	jmpq	*sqrtl@GOTPCREL(%rip)
.Lfunc_end2:
	.size	sqrt_f128, .Lfunc_end2-sqrt_f128
	.cfi_endproc

	.ident	"rustc version 1.73.0-dev"
	.section	".note.GNU-stack","",@progbits

The rust source

I excluded main and built as a lib to generate the above code.

You will need my branch from Add `f16` and `f128` by tgross35 · Pull Request #114607 · rust-lang/rust · GitHub if you want to build this

#![feature(f128)]

#[no_mangle]
pub fn log10_f128(a: f128) -> f128 {
    a.log10()
}

#[no_mangle]
pub fn powf_f128(a: f128, b: f128) -> f128 {
    a.powf(b)
}

#[no_mangle]
pub fn sqrt_f128(a: f128) -> f128 {
    a.sqrt()
}

fn main() {
    let ten: f128 = 10.0;
    let one: f128 = 1.0;
    let three_four: f128 = 3.4;
    let four_five: f128 = 4.5;
    let neg_inf: f128 = -1.0 / 0.0;

    let log10ten = log10_f128(ten);
    let three_pow_four = powf_f128(three_four, four_five);
    let sqrt_neginf = sqrt_f128(neg_inf);
    
    println!(
        "10.0: {:#034x}\n 1.0: {:#034x}\n 3.4: {:#034x}\n 4.5: {:#034x}\n-inf: {:#034x}",
        ten.to_bits(), one.to_bits(), three_four.to_bits(), four_five.to_bits(), neg_inf.to_bits(), 
    );
    println!("log10({}) = {}      {:#034x}", ten, log10ten, log10ten.to_bits());
    println!("{}^{} = {}      {:#034x}", three_four, four_five, three_pow_four, three_pow_four.to_bits());
    println!("sqrt({}) = {}  {:#034x}", neg_inf, sqrt_neginf, sqrt_neginf.to_bits());
}

Full IR with main (opt-level=3): test-f128-main-opt3.ll - Pastebin.com

Full assembly with main (opt-level=3): test-f128-main-opt3.s - Pastebin.com

From the assembly, it looks like it’s calling the wrong functions. log10l expects an 80-bit long double. I think recent versions of glibc provide log10f128, but maybe some code is missing to make LLVM call that.

Oh, that is interesting. It looks like gllibc >= 2.26 supports these (link) (I am linking against 2.35)

But it looks like clang emits calls to log10l both in IR and in assembly rather than log10f128, even with -mlong-double-128. Somehow it gets the correct result Compiler Explorer. There is currently no way to directly call log10f128 and similar functions with clang right? GCC exposes the functions via math.h but I think that’s filled with libquadmath

If you declare it by hand, it should work. I think the glibc headers don’t declare it by default because they don’t detect clang’s support.

That’s casting from __float128_t to long double. If you actually pass a __float128_t to log10l, it’ll break.

If you declare it by hand, it should work. I think the glibc headers don’t declare it by default because they don’t detect clang’s support.

It seems they are available under __builtin_log10f128, calls log10f128 in IR directly (link)

That’s casting from __float128_t to long double. If you actually pass a __float128_t to log10l, it’ll break.

Missed that call, thanks.

With that in mind I think there is no reason that llvm.log10.f128 should get lowered to a log10l call right, i.e. this seems like a bug? A minified example does the same Compiler Explorer. I know minimal about LLVM’s internal structure but would changing https://github.com/llvm/llvm-project/blob/899b840ff2f3d9b278b26fe5d196072c9124d121/llvm/include/llvm/IR/RuntimeLibcalls.def#L158 and similar instead be log10f128 resolve this issue? (I will dig a bit more but just wondering if that happens to be an easy find).

I guess this is actually written up already: Invalid lowering of llvm.*.f128 intrinsics · Issue #44744 · llvm/llvm-project · GitHub