Conversion from Tosa to Linalg

Hi everyone,
I’m trying to convert Tosa which is generated from Torch-mlir for simple model with single conv2d layer. I’m trying to use inbuilt conversion from mlir but I’m getting an error

./my_mlir_parser /home/vimal/personal/MLIR-Read/samples/extTosa.mlir
loc("/home/vimal/personal/MLIR-Read/samples/extTosa.mlir":1:1): error: 'builtin.module' op trying to schedule a pass on an unsupported operation
Error: Tosa to Linalg lowering pass failed

The following is my main code,

#include "mlir/IR/Dialect.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "mlir/IR/DialectRegistry.h"
#include "mlir/Tools/mlir-opt/MlirOptMain.h"

#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Parser/Parser.h"
#include "mlir/Support/FileUtilities.h"

#include "mlir/Pass/PassManager.h"
#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
#include "mlir/Transforms/Passes.h"

#include "llvm/Support/raw_ostream.h"

using namespace mlir;
using namespace llvm; 

int main(int argc, char ** argv) {
  MLIRContext ctx;
  ctx.loadDialect<func::FuncDialect, tosa::TosaDialect>();
  auto src = parseSourceFile<ModuleOp>(argv[1], &ctx);

  PassManager pm(&ctx);
  pm.addPass(tosa::createTosaToLinalg());

  if (failed(pm.run(*src))) {
    llvm::errs() << "Error: Tosa to Linalg lowering pass failed\n";
    return 1;
  }
  
  src->dump();
  return 0;
}

and finally the mlir file I generated from torch-mlir,

module attributes {torch.debug_module_name = "SimpleConvNet"} {
  func.func @forward(%arg0: tensor<1x3x225x225xf32>) -> tensor<1x16x225x225xf32> {
    %0 = "tosa.const"() <{value = dense<[-0.182676569, -0.177921489, -0.0139302155, -0.183302373, 0.12598449, 0.187495232, -0.0370999128, 0.184032455, 0.0485874861, -0.0185722671, 0.147716835, 0.175081506, -0.0600383803, 0.0901354774, 0.0397139303, -0.00391211128]> : tensor<16xf32>}> : () -> tensor<16xf32>
    %1 = "tosa.const"() <{value = dense<"0xtensor<16x3x3x3xf32>}> : () -> tensor<16x3x3x3xf32>
    %2 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32>
    %3 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32>
    %4 = tosa.transpose %arg0, %2 : (tensor<1x3x225x225xf32>, tensor<4xi32>) -> tensor<1x225x225x3xf32>
    %5 = tosa.transpose %1, %2 : (tensor<16x3x3x3xf32>, tensor<4xi32>) -> tensor<16x3x3x3xf32>
    %6 = tosa.conv2d %4, %5, %0 {dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>} : (tensor<1x225x225x3xf32>, tensor<16x3x3x3xf32>, tensor<16xf32>) -> tensor<1x225x225x16xf32>
    %7 = tosa.transpose %6, %3 : (tensor<1x225x225x16xf32>, tensor<4xi32>) -> tensor<1x16x225x225xf32>
    return %7 : tensor<1x16x225x225xf32>
  }
}

It will be every helpful in my process of understanding mlir, if anyone could help me in this regards

Thanks
Vimal William

Hey Vimal,

I believe the TosaToLinalg pass is a FunctionOpInterface pass meaning it needs to run on operations that implement the function interface. I think that the pass manager you’ve created here is going to run at the builtin.module level, rather than at the level of the func.func nested within it. Could you try calling pm.addNestedPass<tosa::createTosaToLinalg()) and see if you get the same error?

More generally not everything from TOSA will legalize to LinAlg, you might want to try calling mlir::tosa::addTosaToLinalgPasses(pm, ...) instead since I think that adds a bunch of other passes handling things like broadcasting and canonicalization that should complete the lowering.

I’ve not tested this locally so could be that the issue is elsewhere but worth a first try :crossed_fingers:

Cheer,
Jack.

1 Like

Thanks for your question! A few things to unpack here - your example contains more than just LinAlg content . As @FranklandJack mentioned, some passes have a FuncOpInterface. But that can be overcome.

The larger problem is that a sequence of passes need to be called, since TOSA is decomposed to LinAlg, Arith, Tensor and SCF. This can be seen in the pass pipeline construction in Torch-MLIR’s Tosa To Linalg backend: torch-mlir/projects/pt1/python/torch_mlir_e2e_test/tosa_backends/linalg_on_tensors.py at main · llvm/torch-mlir · GitHub

This can be replicated with the core mlir-opt using:

$ ./mlir-opt -pass-pipeline="builtin.module(func.func(tosa-to-arith,tosa-to-scf,tosa-to-linalg-named,tosa-to-linalg,tosa-to-tensor,tosa-to-arith))" tosa-to-linalg.mlir
#map = affine_map<(d0, d1, d2, d3) -> (d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module attributes {torch.debug_module_name = "SimpleConvNet"} {
  func.func @forward(%arg0: tensor<1x3x225x225xf32>) -> tensor<1x16x225x225xf32> {
    %cst = arith.constant dense<[-0.182676569, -0.177921489, -0.0139302155, -0.183302373, 0.12598449, 0.187495232, -0.0370999128, 0.184032455, 0.0485874861, -0.0185722671, 0.147716835, 0.175081506, -0.0600383803, 0.0901354774, 0.0397139303, -0.00391211128]> : tensor<16xf32>
    %cst_0 = arith.constant dense<"0xtensor<16x3x3x3xf32>
    %cst_1 = arith.constant dense<[0, 2, 3, 1]> : tensor<4xi32>
    %cst_2 = arith.constant dense<[0, 3, 1, 2]> : tensor<4xi32>
    %0 = tensor.empty() : tensor<1x225x225x3xf32>
    %transposed = linalg.transpose ins(%arg0 : tensor<1x3x225x225xf32>) outs(%0 : tensor<1x225x225x3xf32>) permutation = [0, 2, 3, 1]
    %1 = tensor.empty() : tensor<16x3x3x3xf32>
    %transposed_3 = linalg.transpose ins(%cst_0 : tensor<16x3x3x3xf32>) outs(%1 : tensor<16x3x3x3xf32>) permutation = [0, 2, 3, 1]
    %cst_4 = arith.constant 0.000000e+00 : f32
    %padded = tensor.pad %transposed low[0, 1, 1, 0] high[0, 1, 1, 0] {
    ^bb0(%arg1: index, %arg2: index, %arg3: index, %arg4: index):
      tensor.yield %cst_4 : f32
    } : tensor<1x225x225x3xf32> to tensor<1x227x227x3xf32>
    %2 = tensor.empty() : tensor<1x225x225x16xf32>
    %3 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst : tensor<16xf32>) outs(%2 : tensor<1x225x225x16xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x225x225x16xf32>
    %4 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%padded, %transposed_3 : tensor<1x227x227x3xf32>, tensor<16x3x3x3xf32>) outs(%3 : tensor<1x225x225x16xf32>) -> tensor<1x225x225x16xf32>
    %5 = tensor.empty() : tensor<1x16x225x225xf32>
    %transposed_5 = linalg.transpose ins(%4 : tensor<1x225x225x16xf32>) outs(%5 : tensor<1x16x225x225xf32>) permutation = [0, 3, 1, 2]
    return %transposed_5 : tensor<1x16x225x225xf32>
  }
}```

@FranklandJack and @sjarus thanks for your help. Let me try and come again if any errors i come across. Thanks

I updated the code based on your suggestions and it ran without any error during pm.run and I wanted to see the lowered IR but I’ve no clue how to enable it, finally, i tried enableprintingIR it also did not seem to be working

#include "mlir/IR/Dialect.h"
#include "mlir/Dialect/Tosa/IR/TosaOps.h"
#include "mlir/Dialect/Func/IR/FuncOps.h"

#include "mlir/IR/DialectRegistry.h"
#include "mlir/Tools/mlir-opt/MlirOptMain.h"

#include "mlir/IR/AsmState.h"
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/MLIRContext.h"
#include "mlir/Parser/Parser.h"
#include "mlir/Support/FileUtilities.h"
#include "mlir/Transforms/Passes.h"

#include "mlir/Pass/PassManager.h"
#include "mlir/Conversion/TosaToLinalg/TosaToLinalg.h"
#include "mlir/Transforms/Passes.h"

#include "llvm/Support/raw_ostream.h"

using namespace mlir;
using namespace llvm;

void lowerTosaToLinalg(ModuleOp module) {
  PassManager pm(module.getContext());
  pm.enableIRPrinting(); // Enable IR printing

  // Add the built-in passes to convert TOSA to Linalg.
  OpPassManager &tosaToLinalgPM = pm.nest<ModuleOp>();
  TosaToLinalgOptions options; // Default options
  TosaToLinalgNamedOptions namedOptions; // Default named options
  tosa::TosaValidationOptions validationOptions; // Default validation options
  addTosaToLinalgPasses(tosaToLinalgPM, options, namedOptions, validationOptions);
  
  // Run the pass pipeline.
  if (failed(pm.run(module)))
    llvm::errs() << "Failed to lower TOSA to Linalg\n";

}

int main(int argc, char ** argv) {
  MLIRContext ctx;
  ctx.disableMultithreading(true);
  ctx.loadDialect<func::FuncDialect, tosa::TosaDialect>();
  auto src = parseSourceFile<ModuleOp>(argv[1], &ctx);

  lowerTosaToLinalg(src.get());
//   src->print(llvm::outs());



//   src->dump();
  return 0;
}

If there is any way to turn it on, kindly let me know please

This seems to be a general pass manager construct issue, not related to TosaToLinalg anymore. You can look at the toy example: llvm-project/mlir/examples/toy/Ch7 at main · llvm/llvm-project · GitHub

For this particular situation, the following after conversions ought to emit IR:

module->dump(); 
===-------------------------------------------------------------------------===
                         ... Pass statistics report ...
===-------------------------------------------------------------------------===
'builtin.module' Pipeline
  'func.func' Pipeline
    TosaOptionalDecompositions
    Canonicalizer
    TosaInferShapes
    TosaMakeBroadcastable
    TosaToLinalgNamed
    Canonicalizer
    TosaLayerwiseConstantFoldPass
    TosaMakeBroadcastable
  TosaValidation
  'func.func' Pipeline
    TosaToLinalg

module attributes {torch.debug_module_name = "SimpleConvNet"} {
  func.func @forward(%arg0: tensor<1x3x225x225xf32>) -> tensor<1x16x225x225xf32> {
    %0 = "tosa.const"() <{value = dense<[-0.182676569, -0.177921489, -0.0139302155, -0.183302373, 0.12598449, 0.187495232, -0.0370999128, 0.184032455, 0.0485874861, -0.0185722671, 0.147716835, 0.175081506, -0.0600383803, 0.0901354774, 0.0397139303, -0.00391211128]> : tensor<16xf32>}> : () -> tensor<16xf32>
    %1 = "tosa.const"() <{value = dense<"0xtensor<16x3x3x3xf32>}> : () -> tensor<16x3x3x3xf32>
    %2 = "tosa.const"() <{value = dense<[0, 2, 3, 1]> : tensor<4xi32>}> : () -> tensor<4xi32>
    %3 = "tosa.const"() <{value = dense<[0, 3, 1, 2]> : tensor<4xi32>}> : () -> tensor<4xi32>
    %4 = tosa.transpose %arg0, %2 : (tensor<1x3x225x225xf32>, tensor<4xi32>) -> tensor<1x225x225x3xf32>
    %5 = tosa.transpose %1, %2 : (tensor<16x3x3x3xf32>, tensor<4xi32>) -> tensor<16x3x3x3xf32>
    %6 = tosa.conv2d %4, %5, %0 {dilation = array<i64: 1, 1>, pad = array<i64: 1, 1, 1, 1>, stride = array<i64: 1, 1>} : (tensor<1x225x225x3xf32>, tensor<16x3x3x3xf32>, tensor<16xf32>) -> tensor<1x225x225x16xf32>
    %7 = tosa.transpose %6, %3 : (tensor<1x225x225x16xf32>, tensor<4xi32>) -> tensor<1x16x225x225xf32>
    return %7 : tensor<1x16x225x225xf32>
  }
}

Its dumping the same IR as input?

Hey Vimal,

It’s tricky to know what is going on here without building the example but as @sjarus said you should be able to explicitly dump your module after the pipeline has run.

In your original example you had the line src->dump() commented out, you might want to try uncommenting that line and rebuilding the example.

Cheers,
Jack.

Thanks @sjarus and @FranklandJack for the guidance,

#map = affine_map<(d0, d1, d2, d3) -> (d3)>
#map1 = affine_map<(d0, d1, d2, d3) -> (d0, d1, d2, d3)>
module attributes {torch.debug_module_name = "SimpleConvNet"} {
  func.func @forward(%arg0: tensor<1x3x225x225xf32>) -> tensor<1x16x225x225xf32> {
    %cst = arith.constant 0.000000e+00 : f32
    %cst_0 = arith.constant dense<[-0.182676569, -0.177921489, -0.0139302155, -0.183302373, 0.12598449, 0.187495232, -0.0370999128, 0.184032455, 0.0485874861, -0.0185722671, 0.147716835, 0.175081506, -0.0600383803, 0.0901354774, 0.0397139303, -0.00391211128]> : tensor<16xf32>
    %cst_1 = arith.constant dense<"0xtensor<16x3x3x3xf32>
    %0 = bufferization.alloc_tensor() : tensor<1x225x225x3xf32>
    %transposed = linalg.transpose ins(%arg0 : tensor<1x3x225x225xf32>) outs(%0 : tensor<1x225x225x3xf32>) permutation = [0, 2, 3, 1] 
    %1 = bufferization.alloc_tensor() : tensor<16x3x3x3xf32>
    %transposed_2 = linalg.transpose ins(%cst_1 : tensor<16x3x3x3xf32>) outs(%1 : tensor<16x3x3x3xf32>) permutation = [0, 2, 3, 1] 
    %2 = bufferization.alloc_tensor() : tensor<1x227x227x3xf32>
    %3 = linalg.fill ins(%cst : f32) outs(%2 : tensor<1x227x227x3xf32>) -> tensor<1x227x227x3xf32>
    %inserted_slice = tensor.insert_slice %transposed into %3[0, 1, 1, 0] [1, 225, 225, 3] [1, 1, 1, 1] : tensor<1x225x225x3xf32> into tensor<1x227x227x3xf32>
    %4 = bufferization.alloc_tensor() : tensor<1x225x225x16xf32>
    %5 = linalg.generic {indexing_maps = [#map, #map1], iterator_types = ["parallel", "parallel", "parallel", "parallel"]} ins(%cst_0 : tensor<16xf32>) outs(%4 : tensor<1x225x225x16xf32>) {
    ^bb0(%in: f32, %out: f32):
      linalg.yield %in : f32
    } -> tensor<1x225x225x16xf32>
    %6 = linalg.conv_2d_nhwc_fhwc {dilations = dense<1> : tensor<2xi64>, strides = dense<1> : tensor<2xi64>} ins(%inserted_slice, %transposed_2 : tensor<1x227x227x3xf32>, tensor<16x3x3x3xf32>) outs(%5 : tensor<1x225x225x16xf32>) -> tensor<1x225x225x16xf32>
    %7 = bufferization.alloc_tensor() : tensor<1x16x225x225xf32>
    %transposed_3 = linalg.transpose ins(%6 : tensor<1x225x225x16xf32>) outs(%7 : tensor<1x16x225x225xf32>) permutation = [0, 3, 1, 2] 
    return %transposed_3 : tensor<1x16x225x225xf32>
  }
}
based on some code modification and some updates the pipeline now i can get the pass done.