Hello,
This is my first question here. I apologize if something is incorrect. I’m trying to detail the question as possible.
I’m working on a task that involves writing an LLVM pass that replaces the combination of or
and not
instructions with a call to the orn
intrinsic function in LLVM IR. Additionally, I aim to map this intrinsic call to the RISC-V orn instruction.
To achieve this, I followed these steps:
- Defined
int_orn
intrinsic in llvm/include/llvm/IR/Intrinsics.td:
def int_orn : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
- Next, I implemented a pass that transforms combinations of
or
andnot
instructions into calls to the defined intrinsic function. I modified the existingHello
pass from the examples (llvm/lib/Transforms/Hello/Hello.cpp
).
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
using namespace llvm;
#define DEBUG_TYPE "hello"
namespace {
struct Hello : public FunctionPass {
static char ID;
Hello() : FunctionPass(ID) {}
virtual bool runOnFunction(Function &F) override
{
bool modified = false;
std::vector<Instruction*> to_delete;
if (F.getName() == "main")
{
outs() << F << "\n";
Module *M = F.getParent();
for (auto &B : F)
{
for (auto &I : B)
{
// NOT is implemented as XOR with -1
if (I.getOpcode() == Instruction::Xor)
{
if (auto *c = dyn_cast<ConstantInt>(I.getOperand(1)))
{
if (c->getSExtValue() == -1)
{
// If found a NOT instruction
Instruction *not_inst = &I;
// Find if one of the users is a OR instruction
for (User *U : not_inst->users())
{
if (Instruction *or_inst = dyn_cast<Instruction>(U))
{
if (or_inst->getOpcode() == Instruction::Or && or_inst->getOperand(0) == not_inst)
{
auto it = or_inst->getIterator();
it = std::next(it);
auto *next_inst = &(*it);
IRBuilder<> Builder(next_inst);
Value *operand1 = not_inst->getOperand(0);
Value *operand2 = or_inst->getOperand(1);
FunctionType *fTy = FunctionType::get(or_inst->getType(), {operand1->getType(), operand2->getType()}, false);
auto orn_func = Intrinsic::getDeclaration(M, Intrinsic::orn, fTy);
Value *args[] = {operand1, operand2};
auto *orn_result = Builder.CreateCall(orn_func, args);
or_inst->replaceAllUsesWith(orn_result);
modified = true;
to_delete.push_back(not_inst);
to_delete.push_back(or_inst);
}
}
}
}
}
}
}
}
}
while(!to_delete.empty())
{
Instruction* inst = to_delete.back();
to_delete.pop_back();
inst->eraseFromParent();
}
outs() << "Modified function:\n";
outs() << F << "\n";
return modified;
}
};
}
char Hello::ID = 0;
static RegisterPass<Hello> X("hello", "Hello World Pass");
- I used this file
testorn.c
as source code to generate the IR file to run the pass.
#include <stdio.h>
int nothing()
{
return 9;
}
int main() {
unsigned int operand1 = 5;
unsigned int operand2 = 10;
unsigned int result;
result = ~operand1 | operand2;
printf("Result: %u\n", result);
return 0;
}
- I generated the LLVM IR file
testorn.ll
withclang testorn.c -S -emit-llvm -o testorn.ll
. The output:
; ModuleID = 'testorn.c'
source_filename = "testorn.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-unknown-elf"
@.str = private unnamed_addr constant [12 x i8] c"Result: %u\0A\00", align 1
; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @nothing() #0 {
entry:
ret i32 9
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @main() #0 {
entry:
%retval = alloca i32, align 4
%operand1 = alloca i32, align 4
%operand2 = alloca i32, align 4
%result = alloca i32, align 4
store i32 0, ptr %retval, align 4
store i32 5, ptr %operand1, align 4
store i32 10, ptr %operand2, align 4
%0 = load i32, ptr %operand1, align 4
%not = xor i32 %0, -1
%1 = load i32, ptr %operand2, align 4
%or = or i32 %not, %1
store i32 %or, ptr %result, align 4
%2 = load i32, ptr %result, align 4
%call = call signext i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef signext %2)
ret i32 0
}
declare dso_local signext i32 @printf(ptr noundef, ...) #1
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64"}
!2 = !{i32 7, !"uwtable", i32 2}
!3 = !{i32 7, !"frame-pointer", i32 2}
!4 = !{i32 8, !"SmallDataLimit", i32 8}
!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git fbdeb8cbc147f8f49fbd4bf23fae01bd142f0f5d)"}
- Finally, I ran the pass using the following command:
~/riscv/_install/bin/opt -enable-new-pm=0 -load ~/riscv/riscv-llvm/_build/lib/LLVMHello.so -hello -S ./testorn.ll -o ./testornoutput.ll
. The output:
; ModuleID = 'testorn.ll'
source_filename = "testorn.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-unknown-elf"
@.str = private unnamed_addr constant [12 x i8] c"Result: %u\0A\00", align 1
; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @nothing() #0 {
entry:
ret i32 9
}
; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @main() #0 {
entry:
%retval = alloca i32, align 4
%operand1 = alloca i32, align 4
%operand2 = alloca i32, align 4
%result = alloca i32, align 4
store i32 0, ptr %retval, align 4
store i32 5, ptr %operand1, align 4
store i32 10, ptr %operand2, align 4
%0 = load i32, ptr %operand1, align 4
%1 = load i32, ptr %operand2, align 4
%2 = call i32 @llvm.orn.f_i32i32i32f(i32 %0, i32 %1)
store i32 %2, ptr %result, align 4
%3 = load i32, ptr %result, align 4
%call = call signext i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef signext %3)
ret i32 0
}
declare dso_local signext i32 @printf(ptr noundef, ...) #1
declare i32 @llvm.orn.f_i32i32i32f(i32, i32)
attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}
!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64"}
!2 = !{i32 7, !"uwtable", i32 2}
!3 = !{i32 7, !"frame-pointer", i32 2}
!4 = !{i32 8, !"SmallDataLimit", i32 8}
!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git fbdeb8cbc147f8f49fbd4bf23fae01bd142f0f5d)"}
- Then, I edited
llvm/include/llvm/CodeGen/ISDOpcodes.h
to add the ORN operation.
...
/// Bitwise operators - logical and, logical or, logical xor.
AND,
OR,
XOR,
ORN, // CUSTOM ORN INSTRUCTION -- JOSE
...
- Finally,
llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
, I added
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
unsigned Intrinsic) {
const TargetLowering &TLI = DAG.getTargetLoweringInfo();
SDLoc sdl = getCurSDLoc();
DebugLoc dl = getCurDebugLoc();
SDValue Res;
SDNodeFlags Flags;
if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
Flags.copyFMF(*FPOp);
switch (Intrinsic) {
...
// Custom case : ORN Jose
case Intrinsic::orn:
{
SDValue Ops[2];
Ops[0] = getValue(I.getArgOperand(0));
Ops[1] = getValue(I.getArgOperand(1));
SDValue Op = DAG.getNode(ISD::ORN, sdl, Ops[0].getValueType(), Ops[0], Ops[1]);
setValue(&I, Op);
return;
}
...
Could someone please guide me through the next steps? I believe I need to perform legalization and SDNode conversion to machine instruction, but I’m feeling a bit lost about it. Any assistance would be greatly appreciated.