How to add NOP?

I would like to convert a C file that uses OpenMP library to X86 code with a condition: I want to insert a NOP before each instruction that does a private memory access.
I have written an optimization pass that inserts an inline assembly call (NOP) but it is applied at IR level, and I cannot determine where to insert it. How can I use LLVM to add NOPs accordingly?

Thanks.

Hi Erdem,

I would write a MachineFunction pass. You can check each load instruction to see if it is a private memory access and insert what you want by using BuildMI(…) function.

Volkan

Use MachineInstr::memoperands() function to get memory operands then you can get the address space by using MachineMemOperand::getAddrSpace().

Volkan

This seems to be what I am looking for. That was very helpful. Thank you.

Erdem

I wrote the pass but when I try to build LLVM, I get this error:
"/home/erdem/llvm/lib/CodeGen/CodeGen.cpp:80: error: undefined reference to 'llvm::initializeNoopInserterPass(llvm::PassRegistry&).
I need to modify Passes.h, InitializePasses.h, and Codegen.cpp files to register my pass, right?

Erdem

Hi Erdem,

Since it is a target specific pass, you should put your pass inside the target’s folder (e.g. lib/Target/X86/) and declare your pass in Target.h. Then you need to modify the target’s TargetPassConfig (http://llvm.org/docs/doxygen/html/classllvm_1_1TargetPassConfig.html) to enable your pass.

You can check out the existing MachineFunction passes (e.g. X86ExpandPseudo.cpp) to get an idea.

Volkan

Thanks. I enabled my pass. I have one resolved issue though: MachineMemOperand::getAddrSpace() always returns zero. How can I use it to distinguish private memory accesses?

Erdem

Can you send the IR you are using?

Volkan

Here it is:

; ModuleID = ‘omp_hello.c’
target datalayout = “e-m:e-i64:64-f80:128-n8:16:32:64-S128”
target triple = “x86_64-unknown-linux-gnu”

%ident_t = type { i32, i32, i32, i32, i8* }
%struct.anon = type { i32* }

@.str = private unnamed_addr constant [30 x i8] c"Hello World from thread = %d\0A\00", align 1
@.str.1 = private unnamed_addr constant [24 x i8] c"Number of threads = %d\0A\00", align 1
@.str.2 = private unnamed_addr constant [23 x i8] c";unknown;unknown;0;0;;\00", align 1
@0 = private unnamed_addr constant %ident_t { i32 0, i32 66, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i32 0, i32 0) }
@1 = private unnamed_addr constant %ident_t { i32 0, i32 2, i32 0, i32 0, i8* getelementptr inbounds ([23 x i8], [23 x i8]* @.str.2, i32 0, i32 0) }

; Function Attrs: nounwind optsize uwtable
define i32 @main(i32 %argc, i8** nocapture readnone %argv) #0 {
entry:
%a = alloca i32, align 4
%agg.captured = alloca %struct.anon, align 8
tail call void @omp_set_num_threads(i32 4) #4
%0 = bitcast i32* %a to i8*
call void @llvm.lifetime.start(i64 4, i8* %0) #1
store i32 5, i32* %a, align 4, !tbaa !1
%1 = getelementptr inbounds %struct.anon, %struct.anon* %agg.captured, i64 0, i32 0
store i32* %a, i32** %1, align 8, !tbaa !5
call void (%ident_t*, i32, void (i32*, i32*, …), …) @__kmpc_fork_call(%ident_t nonnull @1, i32 1, void (i32*, i32*, …)* bitcast (void (i32*, i32*, %struct.anon*)* @.omp_outlined. to void (i32*, i32*, …)), %struct.anon nonnull %agg.captured) #1
call void @llvm.lifetime.end(i64 4, i8* %0) #1
ret i32 0
}

; Function Attrs: nounwind
declare void @llvm.lifetime.start(i64, i8* nocapture) #1

; Function Attrs: optsize
declare void @omp_set_num_threads(i32) #2

; Function Attrs: nounwind optsize uwtable
define internal void @.omp_outlined.(i32* nocapture readonly %.global_tid., i32* nocapture readnone %.bound_tid., %struct.anon* nocapture readonly %__context) #0 {
entry:
%call = tail call i32 @omp_get_thread_num() #4
%0 = getelementptr inbounds %struct.anon, %struct.anon* %__context, i64 0, i32 0
%ref = load i32*, i32** %0, align 8, !tbaa !5
%1 = load i32, i32* %ref, align 4, !tbaa !1
%call1 = tail call i32 (i8*, …) @printf(i8* getelementptr inbounds ([30 x i8], [30 x i8]* @.str, i64 0, i64 0), i32 %1) #4
%cmp = icmp eq i32 %call, 0
br i1 %cmp, label %if.then, label %if.end

if.then: ; preds = %entry
call void asm “negl %eax;”, “”()
%call2 = tail call i32 @omp_get_num_threads() #4
%call3 = tail call i32 (i8*, …) @printf(i8* getelementptr inbounds ([24 x i8], [24 x i8]* @.str.1, i64 0, i64 0), i32 %call2) #4
br label %if.end

if.end: ; preds = %if.then, %entry
%2 = load i32, i32* %.global_tid., align 4, !tbaa !1
%3 = tail call i32 @__kmpc_cancel_barrier(%ident_t* nonnull @0, i32 %2) #1
ret void
}

; Function Attrs: optsize
declare i32 @omp_get_thread_num() #2

; Function Attrs: nounwind optsize
declare i32 @printf(i8* nocapture readonly, …) #3

; Function Attrs: optsize
declare i32 @omp_get_num_threads() #2

declare i32 @__kmpc_cancel_barrier(%ident_t*, i32)

; Function Attrs: nounwind
declare void @llvm.lifetime.end(i64, i8* nocapture) #1

declare void @__kmpc_fork_call(%ident_t*, i32, void (i32*, i32*, …)*, …)

attributes #0 = { nounwind optsize uwtable “disable-tail-calls”=“false” “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“false” “no-infs-fp-math”=“false” “no-nans-fp-math”=“false” “stack-protector-buffer-size”=“8” “target-cpu”=“x86-64” “target-features”=“+sse,+sse2” “unsafe-fp-math”=“false” “use-soft-float”=“false” }
attributes #1 = { nounwind }
attributes #2 = { optsize “disable-tail-calls”=“false” “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“false” “no-infs-fp-math”=“false” “no-nans-fp-math”=“false” “stack-protector-buffer-size”=“8” “target-cpu”=“x86-64” “target-features”=“+sse,+sse2” “unsafe-fp-math”=“false” “use-soft-float”=“false” }
attributes #3 = { nounwind optsize “disable-tail-calls”=“false” “less-precise-fpmad”=“false” “no-frame-pointer-elim”=“false” “no-infs-fp-math”=“false” “no-nans-fp-math”=“false” “stack-protector-buffer-size”=“8” “target-cpu”=“x86-64” “target-features”=“+sse,+sse2” “unsafe-fp-math”=“false” “use-soft-float”=“false” }
attributes #4 = { nounwind optsize }

!llvm.ident = !{!0}

!0 = !{!“clang version 3.8.0 (trunk 243832)”}
!1 = !{!2, !2, i64 0}
!2 = !{!“int”, !3, i64 0}
!3 = !{!“omnipotent char”, !4, i64 0}
!4 = !{!“Simple C/C++ TBAA”}
!5 = !{!3, !3, i64 0}

Erdem

According to LLVM Language Reference Manual (http://llvm.org/docs/LangRef.html#global-variables), the address space is 0 for those global variables.

“A global variable may be declared to reside in a target-specific numbered address space. For targets that support them, address spaces may affect how optimizations are performed and/or what target instructions are used to access the variable. The default address space is zero. The address space qualifier must precede any other attributes.”

Volkan