When I compile attached IR with LLVM 3.6
llc -march=x86-64 -o f.S f.ll
it generates an aligned ADDPS with unaligned address. See attached f.S, here an extract:
addq $12, %r9 # $12 is not a multiple of 4, thus for xmm0 this is unaligned
xorl %esi, %esi
.align 16, 0x90
.LBB0_1: # %loop2
# =>This Inner Loop Header: Depth=1
movq offset_array3(,%rsi,8), %rdi
movq offset_array2(,%rsi,8), %r10
movss -28(%rax), %xmm0
movss -8(%rax), %xmm1
movss -4(%rax), %xmm2
unpcklps %xmm0, %xmm2 # xmm2 = xmm2[0],xmm0[0],xmm2[1],xmm0[1]
movss (%rax), %xmm0
unpcklps %xmm0, %xmm1 # xmm1 = xmm1[0],xmm0[0],xmm1[1],xmm0[1]
unpcklps %xmm2, %xmm1 # xmm1 = xmm1[0],xmm2[0],xmm1[1],xmm2[1]
addps (%r9), %xmm1 # here, it gets used, causes a segfault
Frank
f.ll (3.48 KB)
f.S (1.9 KB)