instcombine adds alignment info

Is it correct behavior that 'instcombine' not only combines instructions but also adds alignment information, and why. (In my case I wished it wouldn't do that). Attached the input and output module.

Thanks,
Frank

; ModuleID = '<stdin>'

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

target triple = "x86_64-unknown-linux-gnu"

define void @main(float* noalias %arg0, float* noalias %arg1, float* noalias %arg2) {

entrypoint:

   %0 = bitcast float* %arg1 to <2 x float>*

   %1 = bitcast float* %arg2 to <2 x float>*

   %2 = bitcast <2 x float>* %0 to <4 x float>*

   %3 = bitcast <2 x float>* %1 to <4 x float>*

   %4 = bitcast float* %arg0 to <2 x float>*

   %5 = load <4 x float>* %2

   %6 = load <4 x float>* %3

   %7 = fadd <4 x float> %6, %5

   %8 = bitcast <2 x float>* %4 to <4 x float>*

   store <4 x float> %7, <4 x float>* %8

   %9 = getelementptr float* %arg1, i32 4

   %10 = bitcast float* %9 to <2 x float>*

   %11 = getelementptr float* %arg2, i32 4

   %12 = bitcast float* %11 to <2 x float>*

   %13 = getelementptr float* %arg0, i32 4

   %14 = bitcast <2 x float>* %10 to <4 x float>*

   %15 = bitcast <2 x float>* %12 to <4 x float>*

   %16 = bitcast float* %13 to <2 x float>*

   %17 = load <4 x float>* %14

   %18 = load <4 x float>* %15

   %19 = fadd <4 x float> %18, %17

   %20 = bitcast <2 x float>* %16 to <4 x float>*

   store <4 x float> %19, <4 x float>* %20

   %21 = getelementptr float* %arg1, i32 8

   %22 = bitcast float* %21 to <2 x float>*

   %23 = getelementptr float* %arg2, i32 8

   %24 = bitcast float* %23 to <2 x float>*

   %25 = getelementptr float* %arg0, i32 8

   %26 = bitcast <2 x float>* %22 to <4 x float>*

   %27 = bitcast <2 x float>* %24 to <4 x float>*

   %28 = bitcast float* %25 to <2 x float>*

   %29 = load <4 x float>* %26

   %30 = load <4 x float>* %27

   %31 = fadd <4 x float> %30, %29

   %32 = bitcast <2 x float>* %28 to <4 x float>*

   store <4 x float> %31, <4 x float>* %32

   %33 = getelementptr float* %arg1, i32 12

   %34 = bitcast float* %33 to <2 x float>*

   %35 = getelementptr float* %arg2, i32 12

   %36 = bitcast float* %35 to <2 x float>*

   %37 = getelementptr float* %arg0, i32 12

   %38 = bitcast <2 x float>* %34 to <4 x float>*

   %39 = bitcast <2 x float>* %36 to <4 x float>*

   %40 = bitcast float* %37 to <2 x float>*

   %41 = load <4 x float>* %38

   %42 = load <4 x float>* %39

   %43 = fadd <4 x float> %42, %41

   %44 = bitcast <2 x float>* %40 to <4 x float>*

   store <4 x float> %43, <4 x float>* %44

   ret void

}

Output after llvm-3.6/bin/opt -instcombine -S < vec_add.ll

; ModuleID = '<stdin>'

target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

target triple = "x86_64-unknown-linux-gnu"

define void @main(float* noalias %arg0, float* noalias %arg1, float* noalias %arg2) {

entrypoint:

   %0 = bitcast float* %arg1 to <4 x float>*

   %1 = bitcast float* %arg2 to <4 x float>*

   %2 = load <4 x float>* %0, align 16

   %3 = load <4 x float>* %1, align 16

   %4 = fadd <4 x float> %3, %2

   %5 = bitcast float* %arg0 to <4 x float>*

   store <4 x float> %4, <4 x float>* %5, align 16

   %6 = getelementptr float* %arg1, i64 4

   %7 = getelementptr float* %arg2, i64 4

   %8 = getelementptr float* %arg0, i64 4

   %9 = bitcast float* %6 to <4 x float>*

   %10 = bitcast float* %7 to <4 x float>*

   %11 = load <4 x float>* %9, align 16

   %12 = load <4 x float>* %10, align 16

   %13 = fadd <4 x float> %12, %11

   %14 = bitcast float* %8 to <4 x float>*

   store <4 x float> %13, <4 x float>* %14, align 16

   %15 = getelementptr float* %arg1, i64 8

   %16 = getelementptr float* %arg2, i64 8

   %17 = getelementptr float* %arg0, i64 8

   %18 = bitcast float* %15 to <4 x float>*

   %19 = bitcast float* %16 to <4 x float>*

   %20 = load <4 x float>* %18, align 16

   %21 = load <4 x float>* %19, align 16

   %22 = fadd <4 x float> %21, %20

   %23 = bitcast float* %17 to <4 x float>*

   store <4 x float> %22, <4 x float>* %23, align 16

   %24 = getelementptr float* %arg1, i64 12

   %25 = getelementptr float* %arg2, i64 12

   %26 = getelementptr float* %arg0, i64 12

   %27 = bitcast float* %24 to <4 x float>*

   %28 = bitcast float* %25 to <4 x float>*

   %29 = load <4 x float>* %27, align 16

   %30 = load <4 x float>* %28, align 16

   %31 = fadd <4 x float> %30, %29

   %32 = bitcast float* %26 to <4 x float>*

   store <4 x float> %31, <4 x float>* %32, align 16

   ret void

}

Your load instructions have no alignment attribute on them, this isn’t the same as saying that they are unaligned. Instead, it means that LLVM should use the preferred ABI alignment for that type. InstCombine is following the rules by sticking an explicit alignment attribute on your instructions.