While prototyping a dialect I am using a function call as a proxy for an custom operation.
For reference I’m using LLVM version 14.0.6.
To be able to inline the function call in an affine.for
I thought about using scf.execute_region
.
!type2d = type memref<?x?xf64, offset:?, strides:[?, ?]>
!type3d = type memref<?x?x?xf64, offset:?, strides:[?, ?, ?]>
func private @matmul(%a: !type2d,
%b: !type2d,
%c: !type2d) {
linalg.matmul ins(%a, %b : !type2d, !type2d) outs(%c : !type2d) -> ()
return
}
func @batched_matmul(%a: !type3d,
%b: !type2d,
%c: !type3d) {
%d0 = arith.constant 0 : index
%d1 = arith.constant 1 : index
%d2 = arith.constant 2 : index
%E = memref.dim %a, %d0 : !type3d
%M = memref.dim %a, %d1 : !type3d
%K = memref.dim %a, %d2 : !type3d
%N = memref.dim %b, %d1 : !type2d
affine.for %e = 0 to %E {
%ea = memref.subview %a[%e, 0, 0][1, %M, %K][1, 1, 1]: !type3d to !type2d
%ec = memref.subview %c[%e, 0, 0][1, %M, %N][1, 1, 1]: !type3d to !type2d
scf.execute_region -> () {
call @matmul(%ea, %b, %ec) : (!type2d, !type2d, !type2d) -> ()
scf.yield
}
}
return
}
with mlir-opt -inline
the function call is not inline instead scf.execute_region
gets removed
#map0 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
#map1 = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
module {
func private @matmul(%arg0: memref<?x?xf64, #map0>, %arg1: memref<?x?xf64, #map0>, %arg2: memref<?x?xf64, #map0>) {
linalg.matmul ins(%arg0, %arg1 : memref<?x?xf64, #map0>, memref<?x?xf64, #map0>) outs(%arg2 : memref<?x?xf64, #map0>)
return
}
func @batched_matmul(%arg0: memref<?x?x?xf64, #map1>, %arg1: memref<?x?xf64, #map0>, %arg2: memref<?x?x?xf64, #map1>) {
%c2 = arith.constant 2 : index
%c1 = arith.constant 1 : index
%c0 = arith.constant 0 : index
%0 = memref.dim %arg0, %c0 : memref<?x?x?xf64, #map1>
%1 = memref.dim %arg0, %c1 : memref<?x?x?xf64, #map1>
%2 = memref.dim %arg0, %c2 : memref<?x?x?xf64, #map1>
%3 = memref.dim %arg1, %c1 : memref<?x?xf64, #map0>
affine.for %arg3 = 0 to %0 {
%4 = memref.subview %arg0[%arg3, 0, 0] [1, %1, %2] [1, 1, 1] : memref<?x?x?xf64, #map1> to memref<?x?xf64, #map0>
%5 = memref.subview %arg2[%arg3, 0, 0] [1, %1, %3] [1, 1, 1] : memref<?x?x?xf64, #map1> to memref<?x?xf64, #map0>
call @matmul(%4, %arg1, %5) : (memref<?x?xf64, #map0>, memref<?x?xf64, #map0>, memref<?x?xf64, #map0>) -> ()
}
return
}
}
Instead if I first lower the affine.for
to a scf.for
the function call gets inlined as I would have expected.
with mlir-opt -lower-affine -inline
#map0 = affine_map<(d0, d1, d2)[s0, s1, s2, s3] -> (d0 * s1 + s0 + d1 * s2 + d2 * s3)>
#map1 = affine_map<(d0, d1)[s0, s1, s2] -> (d0 * s1 + s0 + d1 * s2)>
module {
func @batched_matmul(%arg0: memref<?x?x?xf64, #map0>, %arg1: memref<?x?xf64, #map1>, %arg2: memref<?x?x?xf64, #map0>) {
%c2 = arith.constant 2 : index
%c0 = arith.constant 0 : index
%c1 = arith.constant 1 : index
%0 = memref.dim %arg0, %c0 : memref<?x?x?xf64, #map0>
%1 = memref.dim %arg0, %c1 : memref<?x?x?xf64, #map0>
%2 = memref.dim %arg0, %c2 : memref<?x?x?xf64, #map0>
%3 = memref.dim %arg1, %c1 : memref<?x?xf64, #map1>
scf.for %arg3 = %c0 to %0 step %c1 {
%4 = memref.subview %arg0[%arg3, 0, 0] [1, %1, %2] [1, 1, 1] : memref<?x?x?xf64, #map0> to memref<?x?xf64, #map1>
%5 = memref.subview %arg2[%arg3, 0, 0] [1, %1, %3] [1, 1, 1] : memref<?x?x?xf64, #map0> to memref<?x?xf64, #map1>
linalg.matmul ins(%4, %arg1 : memref<?x?xf64, #map1>, memref<?x?xf64, #map1>) outs(%5 : memref<?x?xf64, #map1>)
}
return
}
}
Is this expected behavior?