Hi,
I was looking at the loop vectorizer instruction costs and found that a vector load that was scalarized was getting the cost of 2 * VF. This was because it was computing the cost as 1 for each scalar load plus 1 for each extracted operand. However, that operand was also scalarized, so there was actually no cost for any operand extraction.
Since this gives a considerable difference for a small loop with high VFs, I wanted to make a patch that calls getOperandsScalarizationOverhead() only with non-scalar (vectorized) operands. So I modified getScalarizationOverhead() per below. However, I also got the assert "Scalar values are not calculated for VF" when using it.
I wonder if this is just too difficult to implement right now, or if there is a way to do it? Basically, I think setCostBasedWideningDecision() would have to be called after collectLoopScalars(), but it seems there are some dependencies there that would make this difficult..?
/Jonas
/// Estimate the overhead of scalarizing an instruction. This is a
/// convenience wrapper for the type-based getScalarizationOverhead API.
-static unsigned getScalarizationOverhead(Instruction *I, unsigned VF,
- const TargetTransformInfo &TTI) {
+unsigned LoopVectorizationCostModel::
+getScalarizationOverhead(Instruction *I, unsigned VF,
+ const TargetTransformInfo &TTI) {
if (VF == 1)
return 0;
unsigned Cost = 0;
Type \*RetTy = ToVectorTy\(I\->getType\(\), VF\);
if \(\!RetTy\->isVoidTy\(\) &&
\(\!isa<LoadInst>\(I\) ||
\!TTI\.supportsEfficientVectorElementLoadStore\(\)\)\)
Cost \+= TTI\.getScalarizationOverhead\(RetTy, true, false\);
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- SmallVector<const Value *, 4> Operands(CI->arg_operands());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
- }
+ SmallVector<Value *, 4> Operands;
+ if (CallInst *CI = dyn_cast<CallInst>(I))
+ Operands.assign(CI->op_begin(), CI->op_end());
else if (!isa<StoreInst>(I) ||
- !TTI.supportsEfficientVectorElementLoadStore()) {
- SmallVector<const Value *, 4> Operands(I->operand_values());
- Cost += TTI.getOperandsScalarizationOverhead(Operands, VF);
+ !TTI.supportsEfficientVectorElementLoadStore())
+ Operands.assign(I->value_op_begin(), I->value_op_end());
+ SmallVector<Value *, 4> NonScalarOperands;
+ for (Value *Op : Operands) {
+ if (auto *I = dyn_cast<Instruction>(Op))
+ if (isScalarAfterVectorization(I, VF) || isProfitableToScalarize(I, VF))
+ continue;
+ NonScalarOperands.push_back(Op);
}
+ Cost += TTI.getOperandsScalarizationOverhead(NonScalarOperands, VF);
return Cost;
}