I’m curious about the x86 floating point instruction generated by clang.
I reckon somebody got confused.
Ubuntu clang version 15.0.7
Target: x86_64-pc-linux-gnu
Thread model: posix
InstalledDir: /usr/bin
clang++-15 -S tmp.ll -march=native -O3
vmovddup (%rsi), %xmm1 # xmm1 = mem[0,0]
vmovddup (%rdi), %xmm2 # xmm2 = mem[0,0]
vmulsd %xmm1, %xmm2, %xmm0
vmulpd 8(%rdi), %xmm1, %xmm1
vmulpd 8(%rsi), %xmm2, %xmm2
vaddpd %xmm2, %xmm1, %xmm1
vmovhpd %xmm1, -8(%rsp)
fldl -8(%rsp)
retq
From my C++ code I’m getting the following IR code:
; ModuleID = 'example'
source_filename = "example"
%DualNumber = type { [3 x double] }
define %DualNumber @multiply(ptr %0, ptr %1) {
entry:
%2 = load %DualNumber, ptr %0, align 8
%3 = extractvalue %DualNumber %2, 0, 0
%4 = load %DualNumber, ptr %1, align 8
%5 = extractvalue %DualNumber %4, 0, 0
%6 = fmul double %3, %5
%7 = alloca %DualNumber, align 8
%8 = getelementptr %DualNumber, ptr %7, i32 0, i32 0, i32 0
store double %6, ptr %8, align 8
%9 = alloca i32, align 4
store i32 1, ptr %9, align 4
br label %cond
cond: ; preds = %body, %entry
%10 = load i32, ptr %9, align 4
%11 = icmp ult i32 %10, 3
br i1 %11, label %body, label %end
body: ; preds = %cond
%12 = getelementptr %DualNumber, ptr %7, i32 0, i32 0, i32 %10
%13 = getelementptr %DualNumber, ptr %0, i32 0, i32 0, i32 %10
%14 = load double, ptr %13, align 8
%15 = fmul double %14, %5
%16 = getelementptr %DualNumber, ptr %1, i32 0, i32 0, i32 %10
%17 = load double, ptr %16, align 8
%18 = fmul double %3, %17
%19 = fadd double %18, %15
store double %19, ptr %12, align 8
%20 = add i32 %10, 1
store i32 %20, ptr %9, align 4
br label %cond
end: ; preds = %cond
%21 = load %DualNumber, ptr %7, align 8
ret %DualNumber %21
}
which in turn was generated by the following C++ code:
#include <iostream>
#include "llvm/ADT/ArrayRef.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/Module.h"
#include "llvm/IR/Verifier.h"
using namespace llvm;
static constexpr std::size_t SIZE = 3;
int main(int, char**)
{
LLVMContext sContext;
Module sModule("example", sContext);
IRBuilder<> sBuilder(sContext);
const auto pDualNumberType = StructType::create(
sContext,
{ ArrayType::get(Type::getDoubleTy(sContext), SIZE)
},
"DualNumber"
);
// Define a function that multiplies two dual numbers.
const auto pMultiplyType = FunctionType::get(
pDualNumberType,
{ pDualNumberType->getPointerTo(),
pDualNumberType->getPointerTo()
},
false
);
const auto pFunction = Function::Create(
pMultiplyType,
Function::ExternalLinkage,
"multiply",
sModule
);
sBuilder.SetInsertPoint(
BasicBlock::Create(
sContext,
"entry",
pFunction
)
);
const auto pX = pFunction->args().begin();
const auto pXValue = sBuilder.CreateExtractValue(
sBuilder.CreateLoad(
pDualNumberType,
pX
),
{ 0,
0
}
);
const auto pY = std::next(pFunction->args().begin());
const auto pYValue = sBuilder.CreateExtractValue(
sBuilder.CreateLoad(
pDualNumberType,
pY
),
{ 0,
0
}
);
const auto pRetValue = sBuilder.CreateFMul(pXValue, pYValue);
const auto pResult = sBuilder.CreateAlloca(pDualNumberType);
const auto pZero = ConstantInt::get(Type::getInt32Ty(sContext), 0);
sBuilder.CreateStore(
pRetValue,
sBuilder.CreateGEP(
pDualNumberType,
pResult,
{ pZero,
pZero,
pZero,
}
)
);
/// loop variable
const auto i = sBuilder.CreateAlloca(Type::getInt32Ty(sContext));
/// initialization of the loop variable
sBuilder.CreateStore(ConstantInt::get(Type::getInt32Ty(sContext), 1), i);
const auto pConditionBlock = BasicBlock::Create(sContext, "cond", pFunction);
const auto pLoopBody = BasicBlock::Create(sContext, "body", pFunction);
//const auto incBB = BasicBlock::Create(sContext, "inc", pFunction);
const auto pLoopExit = BasicBlock::Create(sContext, "end", pFunction);
sBuilder.CreateBr(pConditionBlock);
sBuilder.SetInsertPoint(pConditionBlock);
const auto iVal = sBuilder.CreateLoad(Type::getInt32Ty(sContext), i);
sBuilder.CreateCondBr(
sBuilder.CreateICmpULT(iVal, ConstantInt::get(Type::getInt32Ty(sContext), SIZE)),
pLoopBody,
pLoopExit
);
sBuilder.SetInsertPoint(pLoopBody);
sBuilder.CreateStore(
sBuilder.CreateFAdd(
sBuilder.CreateFMul(
pXValue,
sBuilder.CreateLoad(Type::getDoubleTy(sContext), sBuilder.CreateGEP(pDualNumberType, pY, {pZero, pZero, iVal}))
),
sBuilder.CreateFMul(
sBuilder.CreateLoad(Type::getDoubleTy(sContext), sBuilder.CreateGEP(pDualNumberType, pX, {pZero, pZero, iVal})),
pYValue
)
),
sBuilder.CreateGEP(
pDualNumberType,
pResult,
{ pZero,
pZero,
iVal
}
)
);
sBuilder.CreateStore(
sBuilder.CreateAdd(iVal, ConstantInt::get(Type::getInt32Ty(sContext), 1)),
i
);
sBuilder.CreateBr(pConditionBlock);
sBuilder.SetInsertPoint(pLoopExit);
sBuilder.CreateRet(sBuilder.CreateLoad(pDualNumberType, pResult));
verifyFunction(*pFunction);
sModule.print(outs(), nullptr);
}