How to run the backend module of LLVM independently?

I want to perform the following:

  1. Grab a simple program’s LLVM IR (from Compiler Explorer)
  2. Save it to an .ll file on disk
  3. Use some LLVM tool to ONLY perform codegen for RISCV
  4. Run the executable code on a RISCV simulator

Kindly help me understand how to independently run just the codegen module to generate target binaries from LLVM IR?

For example, for this simple C/C++ program:

#include <iostream>

int main(void)
{
    int a = 10;
    int b = 21;
    int c = a + b;
    printf("\nThe result is %d\n", c);
    return 0;
}

The LLVM IR emitted will be as follows:


%"class.std::ios_base::Init" = type { i8 }

@std::__ioinit = internal global %"class.std::ios_base::Init" zeroinitializer, align 1, !dbg !0
@__dso_handle = external hidden global i8
@.str = private unnamed_addr constant [19 x i8] c"\0AThe result is %d\0A\00", align 1, !dbg !7
@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 65535, ptr @_GLOBAL__sub_I_example.cpp, ptr null }]

; Function Attrs: noinline uwtable
define internal void @__cxx_global_var_init() #0 section ".text.startup" !dbg !864 {
  call void @std::ios_base::Init::Init()(ptr noundef nonnull align 1 dereferenceable(1) @std::__ioinit), !dbg !866
  %1 = call i32 @__cxa_atexit(ptr @std::ios_base::Init::~Init(), ptr @std::__ioinit, ptr @__dso_handle) #3, !dbg !868
  ret void, !dbg !866
}

declare void @std::ios_base::Init::Init()(ptr noundef nonnull align 1 dereferenceable(1)) unnamed_addr #1

; Function Attrs: nounwind
declare void @std::ios_base::Init::~Init()(ptr noundef nonnull align 1 dereferenceable(1)) unnamed_addr #2

; Function Attrs: nounwind
declare i32 @__cxa_atexit(ptr, ptr, ptr) #3

; Function Attrs: mustprogress noinline norecurse optnone uwtable
define dso_local noundef i32 @main() #4 !dbg !869 {
  %1 = alloca i32, align 4
  %2 = alloca i32, align 4
  %3 = alloca i32, align 4
  %4 = alloca i32, align 4
  store i32 0, ptr %1, align 4
  call void @llvm.dbg.declare(metadata ptr %2, metadata !870, metadata !DIExpression()), !dbg !871
  store i32 10, ptr %2, align 4, !dbg !871
  call void @llvm.dbg.declare(metadata ptr %3, metadata !872, metadata !DIExpression()), !dbg !873
  store i32 21, ptr %3, align 4, !dbg !873
  call void @llvm.dbg.declare(metadata ptr %4, metadata !874, metadata !DIExpression()), !dbg !875
  %5 = load i32, ptr %2, align 4, !dbg !876
  %6 = load i32, ptr %3, align 4, !dbg !877
  %7 = add nsw i32 %5, %6, !dbg !878
  store i32 %7, ptr %4, align 4, !dbg !875
  %8 = load i32, ptr %4, align 4, !dbg !879
  %9 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %8), !dbg !880
  ret i32 0, !dbg !881
}

; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
declare void @llvm.dbg.declare(metadata, metadata, metadata) #5

declare i32 @printf(ptr noundef, ...) #1

; Function Attrs: noinline uwtable
define internal void @_GLOBAL__sub_I_example.cpp() #0 section ".text.startup" !dbg !882 {
  call void @__cxx_global_var_init(), !dbg !884
  ret void
}

attributes #0 = { noinline uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #2 = { nounwind "frame-pointer"="all" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #3 = { nounwind }
attributes #4 = { mustprogress noinline norecurse optnone uwtable "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #5 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

What you are looking for is llc llc - LLVM static compiler — LLVM 17.0.0git documentation.

You will need to give it command line options to target risc-v, unless you use the riscv clang on godbolt, which may put the target options in the IR for you.

Note that this is purely the llvm backend passes, not clang, for that you want opt. And there will likely be some generic passes run before it gets to the riscv backend. llc has options to control which passes are run so you could take a look at those to narrow it down.

An AArch64 example:

$ ./bin/clang -target aarch64-unknown-linux-gnu /tmp/test.cpp -S -emit-llvm -o /tmp/test.i
$ ./bin/llc /tmp/test.i -o -
	.text
	.file	"test.cpp"
	.globl	main                            // -- Begin function main
	.p2align	2
	.type	main,@function
main:                                   // @main
	.cfi_startproc
// %bb.0:
<...>

And in Godbolt remember to turn off all the filtering options if you are copy pasting IR. I’ve found that can remove some important things (if there is an IR specific view use that, I’ve sometimes just added -emit-llvm to the clang options there).

2 Likes

Thanks for the detailed answer. Just one more question, using the above I am able to get the assembly as a text file. What I want is the executable elf file. How to generate that?

https://llvm.org/docs/CommandGuide/llc.html#cmdoption-llc-filetype

$ ./bin/llc /tmp/test.i -o /tmp/test.o -filetype=obj

And if you have an asm file already, you could put it through the llvm-mc tool to get the same result.

That file isn’t fully linked:

$ file /tmp/test.o
/tmp/test.o: ELF 64-bit LSB relocatable, ARM aarch64, version 1 (SYSV), not stripped

So you need to call the linker but it’s easier to have clang do that for you:

$ clang /tmp/test.o -o /tmp/test_linked.o

Then you can run that. Obviously make sure your IR examples have a main function to serve as the entry point.

1 Like