Memory accesses to struct variables in LLVM IR

Hi,

I’m using clang 3.4 to generate the bitcode of a C source file.
The source file is the following:

typedef struct attribute ((packed)) { float x, y; } myType;
myType make_float2(float x, float y) { myType f = { x, y }; return f; }

int main(int argc, char* argv)
{
myType myVar[5];

for(int i=0;i<5;i++)
myVar[i] = make_float2(i,i);

return(myVar[1].x);
}

The bitcode is generated using the following command:

clang -c -emit-llvm -O0 -fno-vectorize -fno-slp-vectorize -fno-lax-vector-conversions main.c -o main.bc

target triple = “x86_64-unknown-linux-gnu”

%struct.myType = type <{ float, float }>

; Function Attrs: nounwind uwtable
define <2 x float> @_Z11make_float2ff(float %x, float %y) #0 {
entry:
%retval = alloca %struct.myType, align 1
%x1 = getelementptr inbounds %struct.myType* %retval, i32 0, i32 0
store float %x, float* %x1, align 1
%y2 = getelementptr inbounds %struct.myType* %retval, i32 0, i32 1
store float %y, float* %y2, align 1
%0 = bitcast %struct.myType* %retval to <2 x float>*
%1 = load <2 x float>* %0, align 1
ret <2 x float> %1
}

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
%myVar = alloca [100 x %struct.myType], align 16
%ref.tmp = alloca %struct.myType, align 1
br label %for.cond

for.cond: ; preds = %for.inc, %entry
%i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
%cmp = icmp slt i32 %i.0, 5
br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
%idxprom = sext i32 %i.0 to i64
%arrayidx = getelementptr inbounds [100 x %struct.myType]* %myVar, i32 0, i64 %idxprom
%conv = sitofp i32 %i.0 to float
%conv1 = sitofp i32 %i.0 to float
%call = call <2 x float> @_Z11make_float2ff(float %conv, float %conv1)
%0 = bitcast %struct.myType* %ref.tmp to <2 x float>*
store <2 x float> %call, <2 x float>* %0, align 1
%1 = bitcast %struct.myType* %arrayidx to i8*
%2 = bitcast %struct.myType* %ref.tmp to i8*
call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 8, i32 1, i1 false)
br label %for.inc

for.inc: ; preds = %for.body
%inc = add nsw i32 %i.0, 1
br label %for.cond

for.end: ; preds = %for.cond
%arrayidx2 = getelementptr inbounds [100 x %struct.myType]* %myVar, i32 0, i64 1
%x = getelementptr inbounds %struct.myType* %arrayidx2, i32 0, i32 0
%3 = load float* %x, align 1
%conv3 = fptosi float %3 to i32
ret i32 %conv3
}

Looking at the C source code there should be 5 store instructions corresponding to the 5 assignments of myVar[0], myVar[1], myVar[2], myVar[3] and myVar[4].
When I look at the bitcode however, I see 5 instances of store <2 x float> %call, <2 x float>* %0, align 1 which correspond to 5 stores at the same address
of %0 (which is actually %ref.tmp defined as %ref.tmp = alloca %struct.myType, align 1).

I would appreciate it if anyone could let me know how the 5 memory accesses at the 5 different memory addresses are implemented in the bitcode.

Thanks,

Simona

Hi,

I'm using clang 3.4 to generate the bitcode of a C source file.
The source file is the following:

typedef struct __attribute__ ((__packed__)) { float x, y; } myType;
myType make_float2(float x, float y) { myType f = { x, y }; return f; }

int main(int argc, char* argv)
{
        myType myVar[5];

        for(int i=0;i<5;i++)
                myVar[i] = make_float2(i,i);

        return(myVar[1].x);
}

The bitcode is generated using the following command:
clang -c -emit-llvm -O0 -fno-vectorize -fno-slp-vectorize
-fno-lax-vector-conversions main.c -o main.bc

target triple = "x86_64-unknown-linux-gnu"

%struct.myType = type <{ float, float }>

; Function Attrs: nounwind uwtable
define <2 x float> @_Z11make_float2ff(float %x, float %y) #0 {
entry:
  %retval = alloca %struct.myType, align 1
  %x1 = getelementptr inbounds %struct.myType* %retval, i32 0, i32 0
  store float %x, float* %x1, align 1
  %y2 = getelementptr inbounds %struct.myType* %retval, i32 0, i32 1
  store float %y, float* %y2, align 1
  %0 = bitcast %struct.myType* %retval to <2 x float>*
  %1 = load <2 x float>* %0, align 1
  ret <2 x float> %1
}

; Function Attrs: nounwind uwtable
define i32 @main(i32 %argc, i8** %argv) #0 {
entry:
  %myVar = alloca [100 x %struct.myType], align 16

Looks like your IR corresponds to an array of length 100, not 5 as in your
source, but that's not too important

* %ref.tmp = alloca %struct.myType, align 1*
  br label %for.cond

for.cond: ; preds = %for.inc,
%entry
  %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ]
  %cmp = icmp slt i32 %i.0, 5
  br i1 %cmp, label %for.body, label %for.end

for.body: ; preds = %for.cond
  %idxprom = sext i32 %i.0 to i64
  %arrayidx = getelementptr inbounds [100 x %struct.myType]* %myVar, i32
0, i64 %idxprom
  %conv = sitofp i32 %i.0 to float
  %conv1 = sitofp i32 %i.0 to float
* %call = call <2 x float> @_Z11make_float2ff(float %conv, float %conv1)*
* %0 = bitcast %struct.myType* %ref.tmp to <2 x float>**
* store <2 x float> %call, <2 x float>* %0, align 1*
  %1 = bitcast %struct.myType* %arrayidx to i8*
  %2 = bitcast %struct.myType* %ref.tmp to i8*
  call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 8, i32 1, i1
false)

Here is the store ^ into your array (%1 is the destination, a bitcast of
%arrayidx, which is the pointer into your array at index %idxprom, which is
%i.0, etc) using the memcpy intrinsic, rather than a store instruction.

Thanks, David, this is useful.

So sometimes the front-end generates llvm.memcpy instead of store instructions.
Is there a rule in generating llvm.memcpy instructions instead of stores? I would have the same question for other instrinsics, such as memset and memmove.

Thanks,
Simona

There probably is a rule, but I don’t know what it is - I would imagine memcpy is used when storing a whole aggregate (but then you’ll get into ABI issues, etc - maybe if the struct contains only a single primitive type it just switches to a store, etc).

Thanks, David, I understand. Then, is there a way of disabling generating the llvm. intrinsics? opt seems to have an option called -disable-simplify-libcalls. However, in my case, it does not remove the llvm.memcpy instruction from the bitcode.

I shouldn’t think there’s a way to remove/disable them. It would mean rewriting the memcpies as loops, since the ‘store’ instruction is only for first class types ( http://llvm.org/docs/LangRef.html#store-instruction http://llvm.org/docs/LangRef.html#t-firstclass )

SimplifyLibCalls is for simplifying library calls - such as changing printf(“foo”) to puts(“foo”), etc:

/// LibCallSimplifier - This class implements a collection of optimizations
/// that replace well formed calls to library functions with a more optimal
/// form. For example, replacing ‘printf(“Hello!”)’ with ‘puts(“Hello!”)’.