Hi Serg,
Next, I disasseble the executable file and have not found any SSE instructions.
I know that LLVM support SSE.
So my questions:
1. It is occur only in my computer?
2. If it is not only my bug, then there are not SSE optimizations in LLVM?
3. Have anyone, already worked on this problem?
the gcc-4.5 tree vectorizer vectorizes this (see LLVM IR below) but LLVM does
not yet have an auto-vectorizer that can do this.
Ciao, Duncan.
IR produced by dragonegg using -O3 and -fplugin-arg-dragonegg-enable-gcc-optzns:
target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-f128:128:128-n8:16:32:64"
target triple = "x86_64-unknown-linux-gnu"
module asm "\09.ident\09\22GCC: (GNU) 4.5.4 20110506 (prerelease) LLVM: 131851M\22"
@v1 = common global [10000 x i32] zeroinitializer, align 32
@.cst = private constant [4 x i8] c"%d \00", align 8
define i32 @main() nounwind {
entry:
br label %"<bb 3>"
"<bb 3>": ; preds = %"<bb 3>", %entry
%indvar2 = phi i64 [ %indvar.next3, %"<bb 3>" ], [ 0, %entry ]
%vect_vec_iv_.8_10 = phi <4 x i32> [ %vect_vec_iv_.8_24, %"<bb 3>" ], [ <i32 0, i32 1, i32 2, i32 3>, %entry ]
%tmp6 = shl i64 %indvar2, 2
%scevgep7 = getelementptr [10000 x i32]* @v1, i64 0, i64 %tmp6
%scevgep78 = bitcast i32* %scevgep7 to <4 x i32>*
%vect_vec_iv_.8_24 = add nsw <4 x i32> %vect_vec_iv_.8_10, <i32 4, i32 4, i32 4, i32 4>
store <4 x i32> %vect_vec_iv_.8_10, <4 x i32>* %scevgep78, align 16
%indvar.next3 = add i64 %indvar2, 1
%exitcond4 = icmp eq i64 %indvar.next3, 2500
br i1 %exitcond4, label %"<bb 5>", label %"<bb 3>"
"<bb 5>": ; preds = %"<bb 3>", %"<bb 5>"
%indvar = phi i64 [ %indvar.next, %"<bb 5>" ], [ 0, %"<bb 3>" ]
%scevgep = getelementptr [10000 x i32]* @v1, i64 0, i64 %indvar
%D.3943_6 = load i32* %scevgep, align 4
%0 = tail call i32 (i8*, ...)* @printf(i8* getelementptr inbounds ([4 x i8]* @.cst, i64 0, i64 0), i32 %D.3943_6) nounwind
%indvar.next = add i64 %indvar, 1
%exitcond = icmp eq i64 %indvar.next, 10000
br i1 %exitcond, label %"<bb 6>", label %"<bb 5>"
"<bb 6>": ; preds = %"<bb 5>"
ret i32 0
}
declare i32 @printf(i8* nocapture, ...) nounwind