Hi,
Because the error I met is about mlir-opt
, I put the problem here.
I am sorry if I misunderstand the rule.
I tried to modify IR after the pass LLVMCPUTileAndVectorize
(in iree project).
I just want to add some command in IR to know the location of loading information.
Here is my IR.
// -----// IR Dump After LLVMCPUTileAndVectorize //----- //
func @conv2d_1452x1122_same_dispatch_0() {
%cst = arith.constant 0.000000e+00 : f32
%c20 = arith.constant 20 : index
%c2 = arith.constant 2 : index
%c0 = arith.constant 0 : index
%c8 = arith.constant 8 : index
%c1 = arith.constant 1 : index
%0 = hal.interface.binding.subspan @io::@s0b0[%c0] {alignment = 32 : index} : !flow.dispatch.tensor<readonly:20x2xf32>
%1 = hal.interface.binding.subspan @io::@s0b1[%c0] {alignment = 32 : index} : !flow.dispatch.tensor<readonly:2x2xf32>
%2 = hal.interface.binding.subspan @io::@s0b2[%c0] {alignment = 32 : index} : !flow.dispatch.tensor<writeonly:20x2xf32>
%workgroup_id_x = hal.interface.workgroup.id[0] : index
%workgroup_count_x = hal.interface.workgroup.count[0] : index
%workgroup_id_y = hal.interface.workgroup.id[1] : index
%workgroup_count_y = hal.interface.workgroup.count[1] : index
%3 = affine.apply affine_map<()[s0] -> (s0 * 8)>()[%workgroup_id_y]
%4 = affine.apply affine_map<()[s0] -> (s0 * 8)>()[%workgroup_count_y]
scf.for %arg0 = %3 to %c20 step %4 {
%5 = affine.apply affine_map<()[s0] -> (s0 * 8)>()[%workgroup_id_x]
%6 = affine.apply affine_map<()[s0] -> (s0 * 8)>()[%workgroup_count_x]
scf.for %arg1 = %5 to %c2 step %6 {
%7 = affine.min affine_map<(d0) -> (8, -d0 + 20)>(%arg0)
%8 = flow.dispatch.tensor.load %0, offsets = [%arg0, 0], sizes = [%7, 2], strides = [1, 1] : !flow.dispatch.tensor<readonly:20x2xf32> -> tensor<?x2xf32>
%9 = affine.min affine_map<(d0) -> (8, -d0 + 2)>(%arg1)
%10 = flow.dispatch.tensor.load %1, offsets = [0, %arg1], sizes = [2, %9], strides = [1, 1] : !flow.dispatch.tensor<readonly:2x2xf32> -> tensor<2x?xf32>
// -----// load information here
%11 = affine.min affine_map<(d0) -> (-d0 + 20, 8)>(%arg0)
%12 = affine.min affine_map<(d0) -> (-d0 + 2, 8)>(%arg1)
%13 = linalg.init_tensor [%11, %12] : tensor<?x?xf32>
%14 = linalg.fill(%cst, %13) : f32, tensor<?x?xf32> -> tensor<?x?xf32>
%15 = tensor.dim %8, %c0 : tensor<?x2xf32>
%16 = tensor.dim %10, %c1 : tensor<2x?xf32>
%17 = scf.for %arg2 = %c0 to %15 step %c8 iter_args(%arg3 = %14) -> (tensor<?x?xf32>) {
%18 = scf.for %arg4 = %c0 to %16 step %c8 iter_args(%arg5 = %arg3) -> (tensor<?x?xf32>) {
%19 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg2, %15)
%20 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg4, %16)
%21 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg2, %15)
%22 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg4, %16)
%23 = tensor.extract_slice %arg5[%arg2, %arg4] [%21, %22] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%24 = scf.for %arg6 = %c0 to %19 step %c8 iter_args(%arg7 = %23) -> (tensor<?x?xf32>) {
%26 = scf.for %arg8 = %c0 to %20 step %c8 iter_args(%arg9 = %arg7) -> (tensor<?x?xf32>) {
%27 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg6, %19)
%28 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%arg6, %arg2)
%29 = tensor.extract_slice %8[%28, 0] [%27, 2] [1, 1] : tensor<?x2xf32> to tensor<?x2xf32>
%30 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg8, %20)
%31 = affine.apply affine_map<(d0, d1) -> (d0 + d1)>(%arg8, %arg4)
%32 = tensor.extract_slice %10[0, %31] [2, %30] [1, 1] : tensor<2x?xf32> to tensor<2x?xf32>
%33 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg6, %19)
%34 = affine.min affine_map<(d0, d1) -> (8, -d0 + d1)>(%arg8, %20)
%35 = tensor.extract_slice %arg9[%arg6, %arg8] [%33, %34] [1, 1] : tensor<?x?xf32> to tensor<?x?xf32>
%36 = linalg.matmul {__internal_linalg_transform__ = "vectorize", lowering.config = #iree_codegen.lowering.config<tile_sizes = [[], [8, 8, 8], [8, 8, 8]], native_vector_size = [8, 8, 8]>} ins(%29, %32 : tensor<?x2xf32>, tensor<2x?xf32>) outs(%35 : tensor<?x?xf32>) -> tensor<?x?xf32>
%37 = tensor.insert_slice %36 into %arg9[%arg6, %arg8] [%33, %34] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
scf.yield %37 : tensor<?x?xf32>
}
scf.yield %26 : tensor<?x?xf32>
}
%25 = tensor.insert_slice %24 into %arg5[%arg2, %arg4] [%21, %22] [1, 1] : tensor<?x?xf32> into tensor<?x?xf32>
scf.yield %25 : tensor<?x?xf32>
}
scf.yield %18 : tensor<?x?xf32>
}
flow.dispatch.tensor.store %17, %2, offsets = [%arg0, %arg1], sizes = [%7, %9], strides = [1, 1] : tensor<?x?xf32> -> !flow.dispatch.tensor<writeonly:20x2xf32>
}
}
return
}
Then I use command mlir-opt -convert-scf-to-std conv2D_LLVMCPUTileAndVectorize.mlir -o conv2D_LLVMCPUTileAndVectorize_std.mlir
I get the error.
conv2D_LLVMCPUTileAndVectorize.mlir:3:10: error: custom op 'arith.constant' is unknown
%cst = arith.constant 0.000000e+00 : f32
^
I found some similar problem on the forum, so I check the version of mlir-opt
I use.
LLVM version 14.0.0git
It is a stable version from git.
How could I solve this problem?
Thanks!