Strange verifier error with LLVM 3.9

Hello,

I’m new to LLVM and I’m trying to write a string obfuscation pass which encrypts strings using a simple XOR encryption, however I’ve faced a strange issue. Consider following code:

#include <stdio.h>

int main() {
printf(“Hello, World!\n”);
return 0;
}

When I try to compile it into executable using:

clang -o test test.c -mllvm -se

I get an error:

Global is referenced by parentless instruction!
[15 x i8]* @.e_954013943
; ModuleID = ‘test.c’
= getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i32 0, i32 0
fatal error: error in backend: Broken module found, compilation aborted!
clang-3.9: error: clang frontend command failed with exit code 70 (use -v to see invocation)
clang version 3.9.0 (https://github.com/llvm-mirror/clang f339de408790ba9a321810b9486538e4f04459ed) (https://github.com/llvm-mirror/llvm.git 3e9b31a2093ea41e1a4d42903d115b736ed66d67)
Target: x86_64-unknown-linux-gnu
Thread model: posix

When compiled without my obfuscation pass, it produces the following IR:

; ModuleID = ‘test.c’
source_filename = “test.c”
target datalayout = “e-m:e-i64:64-f80:128-n8:16:32:64-S128”
target triple = “x86_64-unknown-linux-gnu”

@.str = private unnamed_addr constant [15 x i8] c"Hello, World!\0A\00", align 1

; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%call = call i32 (i8*, …) @printf(i8* getelementptr inbounds ([15 x i8], [15 x i8]* @.str, i32 0, i32 0))
ret i32 0
}

declare i32 @printf(i8*, …) #1

When compiled with my obfuscation pass, the following IR is generated.

Note that all getelementptr instructions are generated with i64, while in the erro report above the instruction is = getelementptr inbounds [15 x i8], [15 x i8]* @.e_58295325, i32 0, i32 0, which is not present in the output IR bytecode.

Note that if I manually compile the IR bytecode into executable, then it compiles and works just fine. It only outputs an error if I try to compile and link in one command using clang -o test test.c

The full execution command line is:

clang-3.9 “-cc1” “-triple” “x86_64-unknown-linux-gnu” “-emit-obj” “-mrelax-all” “-disable-free” “-main-file-name” “test.c” “-mrelocation-model” “static” “-mthread-model” “posix” “-mdisable-fp-elim” “-fmath-errno” “-masm-verbose” “-mconstructor-aliases” “-munwind-tables” “-fuse-init-array” “-target-cpu” “x86-64” “-dwarf-column-info” “-debugger-tuning=gdb” “-ferror-limit” “19” “-fmessage-length” “271” “-fobjc-runtime=gcc” “-fdiagnostics-show-option” “-fcolor-diagnostics” “-mllvm” “-se” “-x” “c” “test-42d5b1.c”

Could someone please advice where I should search for mistake?

Thanks!
Sergey

; ModuleID = ‘test.c’
source_filename = “test.c”
target datalayout = “e-m:e-i64:64-f80:128-n8:16:32:64-S128”
target triple = “x86_64-unknown-linux-gnu”

@.e_954013943 = private constant [15 x i8] c"{2\0B\07*^J \0D\1B\09R\0ChP"

; Function Attrs: nounwind uwtable
define i32 @main() #0 {
entry:
%retval = alloca i32, align 4
store i32 0, i32* %retval, align 4
%0 = alloca i8, i64 15
%1 = getelementptr inbounds i8, i8* %0, i64 0
%2 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 0
%3 = load i8, i8* %2, align 8
%4 = xor i8 %3, 51
store i8 %4, i8* %1, align 8
%5 = getelementptr inbounds i8, i8* %0, i64 1
%6 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 1
%7 = load i8, i8* %6, align 8
%8 = xor i8 %7, 87
store i8 %8, i8* %5, align 8
%9 = getelementptr inbounds i8, i8* %0, i64 2
%10 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 2
%11 = load i8, i8* %10, align 8
%12 = xor i8 %11, 103
store i8 %12, i8* %9, align 8
%13 = getelementptr inbounds i8, i8* %0, i64 3
%14 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 3
%15 = load i8, i8* %14, align 8
%16 = xor i8 %15, 107
store i8 %16, i8* %13, align 8
%17 = getelementptr inbounds i8, i8* %0, i64 4
%18 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 4
%19 = load i8, i8* %18, align 8
%20 = xor i8 %19, 69
store i8 %20, i8* %17, align 8
%21 = getelementptr inbounds i8, i8* %0, i64 5
%22 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 5
%23 = load i8, i8* %22, align 8
%24 = xor i8 %23, 114
store i8 %24, i8* %21, align 8
%25 = getelementptr inbounds i8, i8* %0, i64 6
%26 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 6
%27 = load i8, i8* %26, align 8
%28 = xor i8 %27, 106
store i8 %28, i8* %25, align 8
%29 = getelementptr inbounds i8, i8* %0, i64 7
%30 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 7
%31 = load i8, i8* %30, align 8
%32 = xor i8 %31, 119
store i8 %32, i8* %29, align 8
%33 = getelementptr inbounds i8, i8* %0, i64 8
%34 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 8
%35 = load i8, i8* %34, align 8
%36 = xor i8 %35, 98
store i8 %36, i8* %33, align 8
%37 = getelementptr inbounds i8, i8* %0, i64 9
%38 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 9
%39 = load i8, i8* %38, align 8
%40 = xor i8 %39, 105
store i8 %40, i8* %37, align 8
%41 = getelementptr inbounds i8, i8* %0, i64 10
%42 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 10
%43 = load i8, i8* %42, align 8
%44 = xor i8 %43, 101
store i8 %44, i8* %41, align 8
%45 = getelementptr inbounds i8, i8* %0, i64 11
%46 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 11
%47 = load i8, i8* %46, align 8
%48 = xor i8 %47, 54
store i8 %48, i8* %45, align 8
%49 = getelementptr inbounds i8, i8* %0, i64 12
%50 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 12
%51 = load i8, i8* %50, align 8
%52 = xor i8 %51, 45
store i8 %52, i8* %49, align 8
%53 = getelementptr inbounds i8, i8* %0, i64 13
%54 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 13
%55 = load i8, i8* %54, align 8
%56 = xor i8 %55, 98
store i8 %56, i8* %53, align 8
%57 = getelementptr inbounds i8, i8* %0, i64 14
%58 = getelementptr inbounds [15 x i8], [15 x i8]* @.e_954013943, i64 0, i64 14
%59 = load i8, i8* %58, align 8
%60 = xor i8 %59, 80
store i8 %60, i8* %57, align 8
%call = call i32 (i8*, …) @printf(i8* %0)
ret i32 0
}

declare i32 @printf(i8*, …) #1

If you really do create all your GEPs with i64, it could be an
orphaned mutation of the original one (after you've called
ReplaceAllUsesWith on @.str). The one wrinkle there is that the
original was a Constant, not an Instruction; something you did might
have promoted it to an Instruction but not put it into any basic block
but I can't think of anything right now.

I'd start by using a much shorter string, stopping the debugger on
your pass, and setting a breakpoint on
GetElementPtrInst::GetElementPtrInst. Hopefully the context will
suggest what you need to do to remove the instruction entirely.

Tim.