How can I compile a c source file to use SSE2 Data Movement Instructions?

I write a small function and test it under clang and gcc,

filet test.c:
double X[100]; double Y[100]; double DA = 0.3;
int f()
{
int i;
for (i = 0; i < 100; i++)
Y[i] = Y[i] - DA * X[i];
return 0;
}

clang -S -O3 -o test.s test.c -march=native -ccc-echo
result:
“D:/work/trunk/bin/Release/clang.exe” -cc1 -triple i686-pc-win32 -S -disable-fr
e -disable-llvm-verifier -main-file-name test.c -mrelocation-model static -mdis
ble-fp-elim -masm-verbose -mconstructor-aliases -target-cpu corei7 -momit-leaf-
rame-pointer -coverage-file test.s -resource-dir “D:/work/trunk/bin/Release\…
\lib\clang\3.1” -fmodule-cache-path “C:\DOCUME~1\ADMINI~1\LOCALS~1\Temp\
lang-module-cache” -internal-isystem D:/work/trunk/bin/Release/…/lib/clang/3.1
include -internal-isystem “C:\Program Files\Microsoft Visual Studio 9.0\VC\
nclude” -internal-isystem “C:\Program Files\Microsoft SDKs\Windows\v6.0A\
include” -O3 -ferror-limit 19 -fmessage-length 80 -mstackrealign -fms-extension
-fms-compatibility -fmsc-version=1300 -fdelayed-template-parsing -fgnu-runtime
-fobjc-runtime-has-arc -fobjc-runtime-has-weak -fobjc-fragile-abi -fdiagnostics
show-option -fcolor-diagnostics -o test.s -x c test.c

.def _f;
.scl 2;
.type 32;
.endef
.text
.globl _f
.align 16, 0x90
_f: # @f

BB#0:

movl $-800, %eax # imm = 0xFFFFFFFFFFFFFCE0
movsd _DA, %xmm0
.align 16, 0x90
LBB0_1: # =>This Inner Loop Header: Depth=1
movsd _X+800(%eax), %xmm1
mulsd %xmm0, %xmm1
movsd _Y+800(%eax), %xmm2
subsd %xmm1, %xmm2
movsd %xmm2, _Y+800(%eax)
addl $8, %eax
jne LBB0_1

BB#2:

xorl %eax, %eax
ret
.data
.globl _DA # @DA
.align 8
_DA:
.quad 4599075939470750515 # double 3.000000e-01
.comm _Y,800,3 # @Y
.comm _X,800,3 # @X

gcc -S -O3 -o test2.s test.c -march=native
result:
.file “test.c”
.text
.p2align 4,15
.globl _f
.def _f; .scl 2; .type 32; .endef
_f:
pushl %ebp
movddup _DA, %xmm2
movl %esp, %ebp
xorl %eax, %eax
.p2align 4,10
L2:
movapd _Y(%eax), %xmm0
movapd _X(%eax), %xmm1
mulpd %xmm2, %xmm1
subpd %xmm1, %xmm0
movapd %xmm0, _Y(%eax)
addl $16, %eax
cmpl $800, %eax
jne L2
xorw %ax, %ax
leave
ret
.globl _DA
.data
.align 16
_DA:
.long 858993459
.long 1070805811
.comm _X, 800, 5
.comm _Y, 800, 5

It seems gcc emit more effectivenss instuction. Are there any clang command arguments to get the similar result?

Hi lixiong hz, clang doesn't have an autovectorizer yet, so with clang for the
moment you have to use vectors explicitly in your C code. However dragonegg can
produce vector code using the -fplugin-arg-dragonegg-enable-gcc-optzns (it is
actually the gcc optimizers that do the vectorizing).

Ciao, Duncan.