Consider the following C code:

typedef **attribute**(( ext_vector_type(2) )) float float2;

typedef **attribute**(( ext_vector_type(2) )) **attribute**(( aligned(4) )) float float2_align2;

void foo(void)

{

const float * p;

size_t offset;

float2 tmp = *((float2_align2 *)(p+offset));

}

When compiled with clang —emit-llvm I get:

define void @foo() {

entry:

%p = alloca float*, align 4 ; <float**> [#uses=1]

%offset = alloca i32, align 4 ; <i32*> [#uses=1]

%tmp = alloca <2 x float>, align 8 ; <<2 x float>*> [#uses=1]
%tmp1 = load float** %p ; <float*> [#uses=1]

%tmp2 = load i32* %offset ; [#uses=1]

%add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*> [#uses=1]

%conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>

*> [#uses=1]*

%tmp3 = load <2 x float>%conv ; <<2 x float>> [#uses=1]

%tmp3 = load <2 x float>

store <2 x float> %tmp3, <2 x float>* %tmp

ret void

}

The problem is that the load into tmp3 seems to have lost any information that %conv should not be aligned to 8 bytes but rather 4. Of course, GCC only states that the alignment attribute will try and enforce a minimal alignment and so the above code generated by clang is valid but what about if the following code had been generated:

define void @foo() {

entry:

%p = alloca float*, align 4 ; <float**> [#uses=1]

%offset = alloca i32, align 4 ; <i32*> [#uses=1]

%tmp = alloca <2 x float>, align 4 ; <<2 x float>*> [#uses=1]
%tmp1 = load float** %p ; <float*> [#uses=1]

%tmp2 = load i32* %offset ; [#uses=1]

%add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*> [#uses=1]

%conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>

*> [#uses=1]*

%tmp3 = load <2 x float>%conv ; <<2 x float>> [#uses=1]

%tmp3 = load <2 x float>

store <2 x float> %tmp3, <2 x float>* %tmp

ret void

}

I’m assuming that, in general, this is not correct as the code:

%tmp3 = load <2 x float>* %conv ; <<2 x float>> [#uses=1]

lacks the now necessary information that conv is unaligned and instead must be written as:

%tmp3 = load <2 x float>*%conv, align 2 ; <<2 x float>>, align2 [#uses=1]

Of course, it is now up to the backend to assure that an unaligned load is handled correctly in hardware, but assuming this is ok, then is this correct?

Thanks,

Ben