Hello, everyone.
I‘m new to mlir. And I’m currently trying to tile a linalg.batch_matmul
with a big tensor into some small tesnors. Then sum or concat all the result from the small batch_matmul.
such as:[b, m, k] · [b, k, n] = [b, m, n], tile to [b/x, m/y, k/z] · [b/x, k/z, n/q] = [b/x, m/y, n/q]. then sum all [b/x, m/y, n/q] to [b, m, n].
eg.
func.func @main(%arg0 : tensor<1x50x16xf32>, %arg1 : tensor<1x16x32xf32>, %arg2 : tensor<1x50x32xf32>) -> tensor<1x50x32xf32> {
%1 = linalg.batch_matmul ins(%arg0, %arg1: tensor<1x50x16xf32>, tensor<1x16x32xf32>) outs(%arg2: tensor<1x50x32xf32>) -> tensor<1x50x32xf32>
return %1 : tensor<1x50x32xf32>
}
module attributes {transform.with_named_sequence} {
transform.named_sequence @__transform_main(%arg1: !transform.any_op {transform.readonly}) {
%func = transform.structured.match ops{["func.func"]} in %arg1 : (!transform.any_op) -> !transform.op<"func.func">
%0 = transform.structured.match ops{["linalg.batch_matmul"]} in %func : (!transform.op<"func.func">) -> !transform.any_op
%1, %2, %3, %loop = transform.structured.tile_reduction_using_for %0
by tile_sizes = [1, 25, 8, 4] : (!transform.any_op) -> (!transform.any_op, !transform.any_op, !transform.any_op, !transform.any_op)
transform.yield
}
}
I’ve already tried transform.structured.tile_using_for
with expected tile_sizes. But the result seems to be uncorrect,like this Question about linalg matmul tile method - MLIR - LLVM Discussion Forums
Then I tried the transform.structured.tile_reduction_using_for
to do tile.
Got this:
#map = affine_map<(d0, d1, d2, d3) -> (d0, d1, d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d3, d2)>
#map2 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module {
func.func @main(%arg0: tensor<1x50x16xf32>, %arg1: tensor<1x16x32xf32>, %arg2: tensor<1x50x32xf32>) -> tensor<1x50x32xf32> {
%c4 = arith.constant 4 : index
%c8 = arith.constant 8 : index
%c25 = arith.constant 25 : index
%c16 = arith.constant 16 : index
%c32 = arith.constant 32 : index
%c50 = arith.constant 50 : index
%c0 = arith.constant 0 : index
%cst = arith.constant 0.000000e+00 : f32
%0 = tensor.empty() : tensor<1x50x32x4xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<1x50x32x4xf32>) -> tensor<1x50x32x4xf32>
%2 = scf.for %arg3 = %c0 to %c50 step %c25 iter_args(%arg4 = %1) -> (tensor<1x50x32x4xf32>) {
%3 = scf.for %arg5 = %c0 to %c32 step %c8 iter_args(%arg6 = %arg4) -> (tensor<1x50x32x4xf32>) {
%4 = scf.for %arg7 = %c0 to %c16 step %c4 iter_args(%arg8 = %arg6) -> (tensor<1x50x32x4xf32>) {
%extracted_slice = tensor.extract_slice %arg0[0, %arg3, %arg7] [1, 25, 4] [1, 1, 1] : tensor<1x50x16xf32> to tensor<1x25x4xf32>
%extracted_slice_0 = tensor.extract_slice %arg1[0, %arg7, %arg5] [1, 4, 8] [1, 1, 1] : tensor<1x16x32xf32> to tensor<1x4x8xf32>
%extracted_slice_1 = tensor.extract_slice %arg8[0, 0, 0, 0] [1, 25, 8, 4] [1, 1, 1, 1] : tensor<1x50x32x4xf32> to tensor<1x25x8x4xf32>
%5 = linalg.generic {indexing_maps = [#map, #map1, #map2], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%extracted_slice, %extracted_slice_0 : tensor<1x25x4xf32>, tensor<1x4x8xf32>) outs(%extracted_slice_1 : tensor<1x25x8x4xf32>) {
^bb0(%in: f32, %in_2: f32, %out: f32):
%6 = arith.mulf %in, %in_2 : f32
%7 = arith.addf %out, %6 : f32
linalg.yield %7 : f32
} -> tensor<1x25x8x4xf32>
%inserted_slice = tensor.insert_slice %5 into %arg8[0, 0, 0, 0] [1, 25, 8, 4] [1, 1, 1, 1] : tensor<1x25x8x4xf32> into tensor<1x50x32x4xf32>
scf.yield %inserted_slice : tensor<1x50x32x4xf32>
}
scf.yield %4 : tensor<1x50x32x4xf32>
}
scf.yield %3 : tensor<1x50x32x4xf32>
}
%reduced = linalg.reduce ins(%2 : tensor<1x50x32x4xf32>) outs(%arg2 : tensor<1x50x32xf32>) dimensions = [3]
(%in: f32, %init: f32) {
%3 = arith.addf %in, %init : f32
linalg.yield %3 : f32
}
return %reduced : tensor<1x50x32xf32>
}
}
It’s correct! But the result use a linalg.generic to caculate with small tesors. It lose the seamtic info with matmul.
Could you provide suggestions or point me towards resources that might help achieve this? Any insights or examples would be greatly appreciated.
Thank you in advance for your time and assistance.