Question About Function Cloning

Hello all;

I have been writing this LLVM pass that extracts loops in a function
and then duplicates the function so the loop is executed twice. The
pass executes fine but the produced bit code does not behave correctly
as it should (both the original and duplicated function in which the
loop is extracted is called, but from the output it seems only one of
them is executed.)

Here is the source:

#include<stdio.h>

int main()

{

   int i,j;

   int num_iter = 2;

   int val = 56;

for(int i = 0; i<num_iter; i++)

{

val++;

printf("Hello World\n");

}

printf("val is %d\n",val);
return 1;

}

Here's the bit code my pass produces:

; ModuleID = '<stdin>'

target datalayout =
"e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128"

target triple = "x86_64-unknown-linux-gnu"

@.str = private unnamed_addr constant [13 x i8] c"Hello World\0A\00", align 1

@.str1 = private unnamed_addr constant [11 x i8] c"val is %d\0A\00", align 1

; Function Attrs: nounwind uwtable

define i32 @main() #0 {

entry:

  %retval = alloca i32, align 4

  %i = alloca i32, align 4

  %j = alloca i32, align 4

  %num_iter = alloca i32, align 4

  %val = alloca i32, align 4

  %i1 = alloca i32, align 4

  store i32 0, i32* %retval

  store i32 2, i32* %num_iter, align 4

  store i32 56, i32* %val, align 4

  store i32 0, i32* %i1, align 4

  br label %codeRepl

codeRepl: ; preds = %entry

  call void @main_for.condarnamoy(i32* %i1, i32* %num_iter, i32* %val)

  call void @main_for.cond(i32* %i1, i32* %num_iter, i32* %val)

  br label %for.end

for.end: ; preds = %codeRepl

  %0 = load i32* %val, align 4

  %call3 = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds
([11 x i8]* @.str1, i32 0, i32 0), i32 %0)

  ret i32 1

}

declare i32 @printf(i8*, ...) #1

; Function Attrs: nounwind

define internal void @main_for.cond(i32* %i1, i32* %num_iter, i32* %val) #2 {

newFuncRoot:

  br label %for.cond

for.end.exitStub: ; preds = %for.cond

  ret void

for.cond: ; preds = %for.inc,
%newFuncRoot

  %0 = load i32* %i1, align 4

  %1 = load i32* %num_iter, align 4

  %cmp = icmp slt i32 %0, %1

  br i1 %cmp, label %for.body, label %for.end.exitStub

for.body: ; preds = %for.cond

  %2 = load i32* %val, align 4

  %inc = add nsw i32 %2, 1

  store i32 %inc, i32* %val, align 4

  %call = call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([13
x i8]* @.str, i32 0, i32 0))

  br label %for.inc

for.inc: ; preds = %for.body

  %3 = load i32* %i1, align 4

  %inc2 = add nsw i32 %3, 1

  store i32 %inc2, i32* %i1, align 4

  br label %for.cond

}

; Function Attrs: nounwind

define internal void @main_for.condarnamoy(i32* %i1, i32* %num_iter,
i32* %val) #2 {

newFuncRoot_arnamoy:

  br label %for.cond_arnamoy

for.end.exitStub_arnamoy: ; preds = %for.cond_arnamoy

  ret void

for.cond_arnamoy: ; preds =
%for.inc_arnamoy, %newFuncRoot_arnamoy

  %0 = load i32* %i1, align 4

  %1 = load i32* %num_iter, align 4

  %cmp_arnamoy = icmp slt i32 %0, %1

  br i1 %cmp_arnamoy, label %for.body_arnamoy, label %for.end.exitStub_arnamoy

for.body_arnamoy: ; preds = %for.cond_arnamoy

  %2 = load i32* %val, align 4

  %inc_arnamoy = add nsw i32 %2, 1

  store i32 %inc_arnamoy, i32* %val, align 4

  %call_arnamoy = call i32 (i8*, ...)* @printf(i8* getelementptr
inbounds ([13 x i8]* @.str, i32 0, i32 0))
    br label %for.inc_arnamoy

for.inc_arnamoy: ; preds = %for.body_arnamoy

  %3 = load i32* %i1, align 4
  %inc2_arnamoy = add nsw i32 %3, 1
  store i32 %inc2_arnamoy, i32* %i1, align 4
  br label %for.cond_arnamoy

}

attributes #0 = { nounwind uwtable "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"unsafe-fp-math"="false" "use-soft-float"="false" }

attributes #1 = { "less-precise-fpmad"="false"
"no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf"="true"
"no-infs-fp-math"="false" "no-nans-fp-math"="false"
"unsafe-fp-math"="false" "use-soft-float"="false" }

attributes #2 = { nounwind }

Hi Arnamoy,

Any idea, why the output looks like -

Hello World
Hello World
val is 58

While the it should print 4 times?

Both your loops are sharing the counter variable (%i1) without
resetting it to 0 in between calls. I think that accounts for the
behaviour.

Cheers.

Tim.

Thanks Tim for the pointer.

Now I am trying to create two global variables, and call functions
with the pointer to them.

Though I can create two global integers, could anyone please tell me
how do I create a pointer to a global variable (not an array) in the
IR? I know GetElementPtr can be used in case of array but what about
I want a pointer to just an integer and call my function with it?

Sorry, I used to use the LLVM demo page a lot for getting these done,
but now that is down. and also march=cpp does not work for me :frowning:

Any help will be appreciated.

Thanks a lot;

Hi Arnamoy,

in case the loop iteration variables are local (probably most of the times),
run the mem2reg pass before your cloning pass. In case they are not (or you don't
like the mem2reg solution), you could introduce loads in front of the
'original function' which then serve as input for the 'duplicated function'.
However, you have to consider loop variant upper bounds and strides too.

Best regards,
  Johannes

Though I can create two global integers, could anyone please tell me
how do I create a pointer to a global variable (not an array) in the
IR? I know GetElementPtr can be used in case of array but what about
I want a pointer to just an integer and call my function with it?

You should be able to just use the GlobalValue/GlobalVariable without
messing around with GetElementPtr (it'll already have type i32* or
whatever).

If you write "GV = Module->getOrCreateGlobal("whatever", MyType)" then
G will actually already have type "MyType *" as it were, and you
access the actual value by just loading and storing to GV. In your
case it should be directly usable in the CreateCall function.

Cheers.

Tim.