Alignment of vectors

Consider the following C code:

typedef attribute(( ext_vector_type(2) )) float float2;
typedef attribute(( ext_vector_type(2) )) attribute(( aligned(4) )) float float2_align2;

void foo(void)
{
const float * p;
size_t offset;
float2 tmp = *((float2_align2 *)(p+offset));
}

When compiled with clang —emit-llvm I get:

define void @foo() {
entry:
%p = alloca float*, align 4 ; <float**> [#uses=1]
%offset = alloca i32, align 4 ; <i32*> [#uses=1]
%tmp = alloca <2 x float>, align 8 ; <<2 x float>> [#uses=1]
%tmp1 = load float** %p ; <float
> [#uses=1]
%tmp2 = load i32* %offset ; [#uses=1]
%add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*> [#uses=1]
%conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>> [#uses=1]
%tmp3 = load <2 x float>
%conv ; <<2 x float>> [#uses=1]
store <2 x float> %tmp3, <2 x float>* %tmp
ret void
}

The problem is that the load into tmp3 seems to have lost any information that %conv should not be aligned to 8 bytes but rather 4. Of course, GCC only states that the alignment attribute will try and enforce a minimal alignment and so the above code generated by clang is valid but what about if the following code had been generated:

define void @foo() {
entry:
%p = alloca float*, align 4 ; <float**> [#uses=1]
%offset = alloca i32, align 4 ; <i32*> [#uses=1]
%tmp = alloca <2 x float>, align 4 ; <<2 x float>> [#uses=1]
%tmp1 = load float** %p ; <float
> [#uses=1]
%tmp2 = load i32* %offset ; [#uses=1]
%add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*> [#uses=1]
%conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>> [#uses=1]
%tmp3 = load <2 x float>
%conv ; <<2 x float>> [#uses=1]
store <2 x float> %tmp3, <2 x float>* %tmp
ret void
}

I’m assuming that, in general, this is not correct as the code:

%tmp3 = load <2 x float>* %conv ; <<2 x float>> [#uses=1]

lacks the now necessary information that conv is unaligned and instead must be written as:

%tmp3 = load <2 x float>*%conv, align 2 ; <<2 x float>>, align2 [#uses=1]

Of course, it is now up to the backend to assure that an unaligned load is handled correctly in hardware, but assuming this is ok, then is this correct?

Thanks,

Ben

Consider the following C code:

typedef __attribute__(( ext_vector_type(2) )) float float2;
typedef __attribute__(( ext_vector_type(2) )) __attribute__(( aligned(4) ))

AFAIK, the aligned attribute doesn't do anything on a typedef of
anything other than a struct/union type in either gcc or clang. It
would be possible to implement something like this, but someone would
need to spec out exactly what it would do in various situations and
send the proposal past the gcc devs.

float float2_align2;

void foo(void)
{
  const float * p;
  size_t offset;
  float2 tmp = *((float2_align2 *)(p+offset));
}

When compiled with clang —emit-llvm I get:

define void @foo() {
entry:
    %p = alloca float*, align 4 ; <float**> [#uses=1]
    %offset = alloca i32, align 4 ; <i32*> [#uses=1]
    %tmp = alloca <2 x float>, align 8 ; <<2 x float>*> [#uses=1]
    %tmp1 = load float** %p ; <float*> [#uses=1]
    %tmp2 = load i32* %offset ; <i32> [#uses=1]
    %add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*>
[#uses=1]
    %conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>*>
[#uses=1]
    %tmp3 = load <2 x float>* %conv ; <<2 x float>> [#uses=1]
    store <2 x float> %tmp3, <2 x float>* %tmp
    ret void
}

The problem is that the load into tmp3 seems to have lost any information
that %conv should not be aligned to 8 bytes but rather 4. Of course, GCC
only states that the alignment attribute will try and enforce a minimal
alignment and so the above code generated by clang is valid

Right.

but what about
if the following code had been generated:

define void @foo() {
entry:
    %p = alloca float*, align 4 ; <float**> [#uses=1]
    %offset = alloca i32, align 4 ; <i32*> [#uses=1]
    %tmp = alloca <2 x float>, align 4 ; <<2 x float>*> [#uses=1]
    %tmp1 = load float** %p ; <float*> [#uses=1]
    %tmp2 = load i32* %offset ; <i32> [#uses=1]
    %add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*>
[#uses=1]
    %conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>*>
[#uses=1]
    %tmp3 = load <2 x float>* %conv ; <<2 x float>> [#uses=1]
    store <2 x float> %tmp3, <2 x float>* %tmp
    ret void
}

clang shouldn't be generating code like that. Per the current gcc
documentation, the aligned attribute isn't allowed to decrease
alignment. What would happen if the code subsequently passed a
pointer to the variable with the aligned attribute to another
function?

Of course, it is now up to the backend to assure that an unaligned load is
handled correctly in hardware, but assuming this is ok, then is this
correct?

Unaligned loads should codegen correctly in LLVM; if they don't,
please file a bug.

-Eli

Hi,

Comments inline.

Ben

Consider the following C code:

typedef __attribute__(( ext_vector_type(2) )) float float2;
typedef __attribute__(( ext_vector_type(2) )) __attribute__(( aligned(4) ))

AFAIK, the aligned attribute doesn't do anything on a typedef of
anything other than a struct/union type in either gcc or clang. It
would be possible to implement something like this, but someone would
need to spec out exactly what it would do in various situations and
send the proposal past the gcc devs.

[bg] Thanks, I missed that.

float float2_align2;

void foo(void)
{
  const float * p;
  size_t offset;
  float2 tmp = *((float2_align2 *)(p+offset));
}

When compiled with clang ‹emit-llvm I get:

define void @foo() {
entry:
    %p = alloca float*, align 4 ; <float**> [#uses=1]
    %offset = alloca i32, align 4 ; <i32*> [#uses=1]
    %tmp = alloca <2 x float>, align 8 ; <<2 x float>*> [#uses=1]
    %tmp1 = load float** %p ; <float*> [#uses=1]
    %tmp2 = load i32* %offset ; <i32> [#uses=1]
    %add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*>
[#uses=1]
    %conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>*>
[#uses=1]
    %tmp3 = load <2 x float>* %conv ; <<2 x float>> [#uses=1]
    store <2 x float> %tmp3, <2 x float>* %tmp
    ret void
}

The problem is that the load into tmp3 seems to have lost any information
that %conv should not be aligned to 8 bytes but rather 4. Of course, GCC
only states that the alignment attribute will try and enforce a minimal
alignment and so the above code generated by clang is valid

Right.

but what about
if the following code had been generated:

define void @foo() {
entry:
    %p = alloca float*, align 4 ; <float**> [#uses=1]
    %offset = alloca i32, align 4 ; <i32*> [#uses=1]
    %tmp = alloca <2 x float>, align 4 ; <<2 x float>*> [#uses=1]
    %tmp1 = load float** %p ; <float*> [#uses=1]
    %tmp2 = load i32* %offset ; <i32> [#uses=1]
    %add.ptr = getelementptr float* %tmp1, i32 %tmp2 ; <float*>
[#uses=1]
    %conv = bitcast float* %add.ptr to <2 x float>* ; <<2 x float>*>
[#uses=1]
    %tmp3 = load <2 x float>* %conv ; <<2 x float>> [#uses=1]
    store <2 x float> %tmp3, <2 x float>* %tmp
    ret void
}

clang shouldn't be generating code like that. Per the current gcc
documentation, the aligned attribute isn't allowed to decrease
alignment. What would happen if the code subsequently passed a
pointer to the variable with the aligned attribute to another
function?

[bg] Sorry I did not mean to imply that Clang generated this code, it was
written by hand. What I was trying to understand was: is if it is valid LLVM
IL? I agree that any frontend generating this code must carefully enforce
that restricted alignment pointers are not passed to functions that don't
expect them.

It has undefined behavior; all loads and stores must have an alignment
which is at most the alignment of the relevant pointer.

-Eli