Mapping intrinsic function to RISC-V instruction

Hello,

This is my first question here. I apologize if something is incorrect. I’m trying to detail the question as possible.

I’m working on a task that involves writing an LLVM pass that replaces the combination of or and not instructions with a call to the orn intrinsic function in LLVM IR. Additionally, I aim to map this intrinsic call to the RISC-V orn instruction.

To achieve this, I followed these steps:

  1. Defined int_orn intrinsic in llvm/include/llvm/IR/Intrinsics.td:
def int_orn : Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], []>;
  1. Next, I implemented a pass that transforms combinations of or and not instructions into calls to the defined intrinsic function. I modified the existing Hello pass from the examples (llvm/lib/Transforms/Hello/Hello.cpp).
#include "llvm/Pass.h"
#include "llvm/IR/Function.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/InstIterator.h"
#include "llvm/IR/IntrinsicInst.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Intrinsics.h"
using namespace llvm;

#define DEBUG_TYPE "hello"


namespace {
  struct Hello : public FunctionPass {
    static char ID;
    Hello() : FunctionPass(ID) {}
    virtual bool runOnFunction(Function &F) override
    {
      bool modified = false;
      std::vector<Instruction*> to_delete;
      if (F.getName() == "main")
      {
        outs() << F << "\n";
        
        Module *M = F.getParent();
        for (auto &B : F)
        {
          for (auto &I : B)
          {
            // NOT is implemented as XOR with -1
            if (I.getOpcode() == Instruction::Xor)
            {
              if (auto *c = dyn_cast<ConstantInt>(I.getOperand(1)))
              {
                if (c->getSExtValue() == -1)
                {
                  // If found a NOT instruction
                  Instruction *not_inst = &I;
                  // Find if one of the users is a OR instruction
                  for (User *U : not_inst->users())
                  {
                    if (Instruction *or_inst = dyn_cast<Instruction>(U))
                    {
                      if (or_inst->getOpcode() == Instruction::Or && or_inst->getOperand(0) == not_inst)
                      {
                        auto it = or_inst->getIterator();
                        it = std::next(it);
                        auto *next_inst = &(*it);
                        IRBuilder<> Builder(next_inst);
                        Value *operand1 = not_inst->getOperand(0);
                        Value *operand2 = or_inst->getOperand(1);
                        FunctionType *fTy = FunctionType::get(or_inst->getType(), {operand1->getType(), operand2->getType()}, false);
                        auto orn_func = Intrinsic::getDeclaration(M, Intrinsic::orn, fTy);
                        Value *args[] = {operand1, operand2};
                        auto *orn_result = Builder.CreateCall(orn_func, args);
                        or_inst->replaceAllUsesWith(orn_result);
                        modified = true;
                        to_delete.push_back(not_inst);
                        to_delete.push_back(or_inst);
                      }
                    }
                  }
                }
              }
                
            }
          }
        }

      }
      while(!to_delete.empty())
      {
        Instruction* inst = to_delete.back();
        to_delete.pop_back();
        inst->eraseFromParent();
      }
      
      outs() << "Modified function:\n";
      outs() << F << "\n";

      return modified;
    }
  };
}

char Hello::ID = 0;
static RegisterPass<Hello> X("hello", "Hello World Pass");
  1. I used this file testorn.c as source code to generate the IR file to run the pass.
#include <stdio.h>

int nothing()
{
    return 9;
}

int main() {
    unsigned int operand1 = 5;
    unsigned int operand2 = 10;
    unsigned int result;

    result = ~operand1 | operand2;

    printf("Result: %u\n", result);

    return 0;
}
  1. I generated the LLVM IR file testorn.ll with clang testorn.c -S -emit-llvm -o testorn.ll. The output:
; ModuleID = 'testorn.c'
source_filename = "testorn.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-unknown-elf"

@.str = private unnamed_addr constant [12 x i8] c"Result: %u\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @nothing() #0 {
entry:
  ret i32 9
}

; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @main() #0 {
entry:
  %retval = alloca i32, align 4
  %operand1 = alloca i32, align 4
  %operand2 = alloca i32, align 4
  %result = alloca i32, align 4
  store i32 0, ptr %retval, align 4
  store i32 5, ptr %operand1, align 4
  store i32 10, ptr %operand2, align 4
  %0 = load i32, ptr %operand1, align 4
  %not = xor i32 %0, -1
  %1 = load i32, ptr %operand2, align 4
  %or = or i32 %not, %1
  store i32 %or, ptr %result, align 4
  %2 = load i32, ptr %result, align 4
  %call = call signext i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef signext %2)
  ret i32 0
}

declare dso_local signext i32 @printf(ptr noundef, ...) #1

attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64"}
!2 = !{i32 7, !"uwtable", i32 2}
!3 = !{i32 7, !"frame-pointer", i32 2}
!4 = !{i32 8, !"SmallDataLimit", i32 8}
!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git fbdeb8cbc147f8f49fbd4bf23fae01bd142f0f5d)"}
  1. Finally, I ran the pass using the following command: ~/riscv/_install/bin/opt -enable-new-pm=0 -load ~/riscv/riscv-llvm/_build/lib/LLVMHello.so -hello -S ./testorn.ll -o ./testornoutput.ll. The output:
; ModuleID = 'testorn.ll'
source_filename = "testorn.c"
target datalayout = "e-m:e-p:64:64-i64:64-i128:128-n32:64-S128"
target triple = "riscv64-unknown-unknown-elf"

@.str = private unnamed_addr constant [12 x i8] c"Result: %u\0A\00", align 1

; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @nothing() #0 {
entry:
  ret i32 9
}

; Function Attrs: noinline nounwind optnone uwtable
define dso_local signext i32 @main() #0 {
entry:
  %retval = alloca i32, align 4
  %operand1 = alloca i32, align 4
  %operand2 = alloca i32, align 4
  %result = alloca i32, align 4
  store i32 0, ptr %retval, align 4
  store i32 5, ptr %operand1, align 4
  store i32 10, ptr %operand2, align 4
  %0 = load i32, ptr %operand1, align 4
  %1 = load i32, ptr %operand2, align 4
  %2 = call i32 @llvm.orn.f_i32i32i32f(i32 %0, i32 %1)
  store i32 %2, ptr %result, align 4
  %3 = load i32, ptr %result, align 4
  %call = call signext i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef signext %3)
  ret i32 0
}

declare dso_local signext i32 @printf(ptr noundef, ...) #1

declare i32 @llvm.orn.f_i32i32i32f(i32, i32)

attributes #0 = { noinline nounwind optnone uwtable "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic-rv64" "target-features"="+64bit,+a,+c,+m,+relax,-d,-e,-experimental-smaia,-experimental-ssaia,-experimental-zca,-experimental-zcb,-experimental-zcd,-experimental-zcf,-experimental-zcmp,-experimental-zcmt,-experimental-zfa,-experimental-zfbfmin,-experimental-zicond,-experimental-zihintntl,-experimental-ztso,-experimental-zvbb,-experimental-zvbc,-experimental-zvfbfmin,-experimental-zvfbfwma,-experimental-zvfh,-experimental-zvkg,-experimental-zvkn,-experimental-zvknc,-experimental-zvkned,-experimental-zvkng,-experimental-zvknha,-experimental-zvknhb,-experimental-zvks,-experimental-zvksc,-experimental-zvksed,-experimental-zvksg,-experimental-zvksh,-experimental-zvkt,-f,-h,-save-restore,-svinval,-svnapot,-svpbmt,-v,-xsfvcp,-xtheadba,-xtheadbb,-xtheadbs,-xtheadcmo,-xtheadcondmov,-xtheadfmemidx,-xtheadmac,-xtheadmemidx,-xtheadmempair,-xtheadsync,-xtheadvdot,-xventanacondops,-zawrs,-zba,-zbb,-zbc,-zbkb,-zbkc,-zbkx,-zbs,-zdinx,-zfh,-zfhmin,-zfinx,-zhinx,-zhinxmin,-zicbom,-zicbop,-zicboz,-zicntr,-zicsr,-zifencei,-zihintpause,-zihpm,-zk,-zkn,-zknd,-zkne,-zknh,-zkr,-zks,-zksed,-zksh,-zkt,-zmmul,-zve32f,-zve32x,-zve64d,-zve64f,-zve64x,-zvl1024b,-zvl128b,-zvl16384b,-zvl2048b,-zvl256b,-zvl32768b,-zvl32b,-zvl4096b,-zvl512b,-zvl64b,-zvl65536b,-zvl8192b" }

!llvm.module.flags = !{!0, !1, !2, !3, !4}
!llvm.ident = !{!5}

!0 = !{i32 1, !"wchar_size", i32 4}
!1 = !{i32 1, !"target-abi", !"lp64"}
!2 = !{i32 7, !"uwtable", i32 2}
!3 = !{i32 7, !"frame-pointer", i32 2}
!4 = !{i32 8, !"SmallDataLimit", i32 8}
!5 = !{!"clang version 17.0.0 (https://github.com/llvm/llvm-project.git fbdeb8cbc147f8f49fbd4bf23fae01bd142f0f5d)"}
  1. Then, I edited llvm/include/llvm/CodeGen/ISDOpcodes.h to add the ORN operation.
...
  /// Bitwise operators - logical and, logical or, logical xor.
  AND,
  OR,
  XOR,
  ORN, // CUSTOM ORN INSTRUCTION -- JOSE
...
  1. Finally, llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp, I added
/// Lower the call to the specified intrinsic function.
void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                                             unsigned Intrinsic) {
  const TargetLowering &TLI = DAG.getTargetLoweringInfo();
  SDLoc sdl = getCurSDLoc();
  DebugLoc dl = getCurDebugLoc();
  SDValue Res;

  SDNodeFlags Flags;
  if (auto *FPOp = dyn_cast<FPMathOperator>(&I))
    Flags.copyFMF(*FPOp);

  switch (Intrinsic) {
  
  ... 
  // Custom case : ORN Jose
  case Intrinsic::orn:
  {
    SDValue Ops[2];
    Ops[0] = getValue(I.getArgOperand(0));
    Ops[1] = getValue(I.getArgOperand(1));
    SDValue Op = DAG.getNode(ISD::ORN, sdl, Ops[0].getValueType(), Ops[0], Ops[1]);
    setValue(&I, Op);
    return; 
  }
  ...

Could someone please guide me through the next steps? I believe I need to perform legalization and SDNode conversion to machine instruction, but I’m feeling a bit lost about it. Any assistance would be greatly appreciated.

Hello. I solve this issue through a different approach. If anyone’s interested, check this link: Medium Post