Transforming wide integer computations back to vector computations

Hi Matt,

It seems that one of the optimization passes (it seems to be SROA) sometimes transforms computations on vectors of ints to computations on wide integer types; for example, I’m seeing code like the following after optimizations(*):

%0 = bitcast<16 x i8> %float2uint to i128

%1 = shl i128 %0, 8

%ins = or i128 %1, 255

%2 = bitcast i128 %ins to<16 x i8>

this would probably be better expressed as a vector shuffle. What’s the
testcase?

The bitcode below, then run through “opt -scalarrepl-ssa”, shows the behavior. The original computation was setting a small array of i8s to 0xff, then storing a vector value to elements 2-10 of the array, then loading elements 1-9 of the array and storing them into the %RET pointer. After optimization it had eliminated the array (and the load/store to/from it) entirely, and directly computes the combination of 0xff in the low element of the vector and then a shifted version of the original value to store in %RET.

Thanks,
-matt

target datalayout = “e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128”
target triple = “x86_64-apple-darwin11.2.0”

define void @f_fu(float* nocapture %RET, float* nocapture %aFOO, float %b) nounwind {
for_exit:
%x = alloca i64, align 16
%tmpcast = bitcast i64* %x to [8 x i8]*
store i64 -1, i64* %x, align 16
%ptr_cast_for_load = bitcast float* %aFOO to <4 x i32>*
%masked_load202 = load <4 x i32>* %ptr_cast_for_load, align 4
%gather_bitcast = bitcast <4 x i32> %masked_load202 to <4 x float>
%float2uint = fptoui <4 x float> %gather_bitcast to <4 x i8>
%ptr190 = getelementptr [8 x i8]* %tmpcast, i64 0, i64 2
%ptrcast = bitcast i8* %ptr190 to <4 x i8>*
store <4 x i8> %float2uint, <4 x i8>* %ptrcast, align 2
%ptr194 = getelementptr [8 x i8]* %tmpcast, i64 0, i64 1
%ptr_cast_for_load203 = bitcast i8* %ptr194 to <4 x i8>*
%masked_load195204 = load <4 x i8>* %ptr_cast_for_load203, align 1
%uint2float = uitofp <4 x i8> %masked_load195204 to <4 x float>
%value2int = bitcast <4 x float> %uint2float to <4 x i32>
%ptrcast200 = bitcast float* %RET to <4 x i32>*
store <4 x i32> %value2int, <4 x i32>* %ptrcast200, align 4
ret void
}