MLIR Execution Engine Invoke with Tensor Types

Hi,

I have some linalg mlir code that I want to lower to llvm dialect and then run using the mlir Execution Engine. I want to invoke a function that originally took a tensor type parameter before lowering. Are there any examples (code or documentation) of invoking a function with tensor types from C++?

Thanks for any help

check llvm-project/mlir/unittests/ExecutionEngine/Invoke.cpp at main · llvm/llvm-project · GitHub

Wow thanks so much! However, this file does not include any examples of MLIR functions that output complex types which is what I am having trouble with now.

I am looking to run something like:

func.func @my_func(...) -> tensor<2x3xf32> ... {
   ...
   return %result : tensor<2x3xf32>
}

The examples show out to retrieve output for primitive types like int, float however not for complex types.

can you also give an example with an op and the pass pipeline for lowering it?

Here is an example of the mlir:

func.func @add_tensors(%lhs : tensor<2x4xf32>, %rhs : tensor<2x4xf32>) -> tensor<2x4xf32> attributes { llvm.emit_c_interface } {
    %0 = arith.constant dense<3.000000e+00> : tensor<2x4xf32>
    %result =  linalg.add ins(%lhs, %rhs : tensor<2x4xf32> , tensor<2x4xf32> )
                          outs(%0 : tensor<2x4xf32> ) -> tensor<2x4xf32> 
    return %result : tensor<2x4xf32> 
}

Here is an example of the pass pipeline:

mlir-opt -one-shot-bufferize="bufferize-function-boundaries" -buffer-deallocation-pipeline -test-lower-to-llvm

TestLowerToLLVM is one of the passes defined in the LLVM repo for testing purposes.

you can try this

#include "mlir/Dialect/Bufferization/Transforms/OneShotAnalysis.h"
#include "mlir/InitAllPasses.h"
#include "mlir/Dialect/Bufferization/Pipelines/Passes.h"
#include <iostream>
#include <string>

...

static void buildTestLowerToLLVM(OpPassManager &pm) {

  // TODO: it is feasible to scope lowering at arbitrary level and introduce
  // unrealized casts, but there needs to be the final module-wise cleanup in
  // the end. Keep module-level for now.

  // Blanket-convert any remaining high-level vector ops to loops if any remain.
  pm.addNestedPass<func::FuncOp>(createConvertVectorToSCFPass());
  // Blanket-convert any remaining linalg ops to loops if any remain.
  pm.addNestedPass<func::FuncOp>(createConvertLinalgToLoopsPass());
  // Blanket-convert any remaining affine ops if any remain.
  pm.addPass(createLowerAffinePass());
  // Convert SCF to CF (always needed).
  pm.addPass(createConvertSCFToCFPass());
  // Sprinkle some cleanups.
  pm.addPass(createCanonicalizerPass());
  pm.addPass(createCSEPass());
  // Convert vector to LLVM (always needed).
  pm.addPass(createConvertVectorToLLVMPass());
  // Convert Math to LLVM (always needed).
  pm.addNestedPass<func::FuncOp>(createConvertMathToLLVMPass());
  // Expand complicated MemRef operations before lowering them.
  pm.addPass(memref::createExpandStridedMetadataPass());
  // The expansion may create affine expressions. Get rid of them.
  pm.addPass(createLowerAffinePass());
  // Convert MemRef to LLVM (always needed).
  pm.addPass(createFinalizeMemRefToLLVMConversionPass());
  // Convert Func to LLVM (always needed).
  pm.addPass(createConvertFuncToLLVMPass());
  // Convert Index to LLVM (always needed).
  pm.addPass(createConvertIndexToLLVMPass());
  // Convert remaining unrealized_casts (always needed).
  pm.addPass(createReconcileUnrealizedCastsPass());
}


static LogicalResult tensorLowerToLLVMDialect(ModuleOp module) {
  PassManager pm(module->getName());
  bufferization::OneShotBufferizationOptions options;
  options.bufferizeFunctionBoundaries = true;
  pm.addPass(bufferization::createOneShotBufferizePass(options));

  bufferization::BufferDeallocationPipelineOptions options2;
  bufferization::buildBufferDeallocationPipeline(pm, options2);
  
  buildTestLowerToLLVM(pm);
  return pm.run(module);
}

TEST(MLIRExecutionEngine, SKIP_WITHOUT_JIT(AddTensor)) {
  std::string moduleStr = R"mlir(
  func.func @add_tensors(%lhs : tensor<2x4xf32>, %rhs : tensor<2x4xf32>) -> tensor<2x4xf32> attributes { llvm.emit_c_interface } {
    %0 = arith.constant dense<3.000000e+00> : tensor<2x4xf32>
    %result =  linalg.add ins(%lhs, %rhs : tensor<2x4xf32> , tensor<2x4xf32> )
                          outs(%0 : tensor<2x4xf32> ) -> tensor<2x4xf32> 
    return %result : tensor<2x4xf32> 
}
  )mlir";

  mlir::bufferization::registerBufferizationPasses();
  //  mlir::test::registerTestLowerToLLVM();
  DialectRegistry registry;
  registerAllDialects(registry);
  registerBuiltinDialectTranslation(registry);
  registerLLVMDialectTranslation(registry);
  MLIRContext context(registry);
  OwningOpRef<ModuleOp> module =
      parseSourceString<ModuleOp>(moduleStr, &context);
  ASSERT_TRUE(!!module);
  tensorLowerToLLVMDialect(*module);

  (*module)->print(llvm::outs());

  auto jitOrError = ExecutionEngine::create(*module);
  ASSERT_TRUE(!!jitOrError);
  std::unique_ptr<ExecutionEngine> jit = std::move(jitOrError.get());
  // The result of the function must be passed as output argument.


  OwningMemRef<float, 2> bufA{
      {2, 4}, {}, [=](float &ptr, ArrayRef<int64_t> indices ) { ptr = 1.0f; }};
  OwningMemRef<float, 2> bufB{
      {2, 4}, {}, [=](float &ptr, ArrayRef<int64_t> indices) { ptr = 1.0f; }};
  OwningMemRef<float, 2> result{
      {2, 4}};

  llvm::Error error =
      jit->invoke("add_tensors",&*result, &*bufA, &*bufB);
  
  ASSERT_TRUE(!error);

  for (int i = 0; i < 2; i++) {
    for(int j = 0; j < 4; j++) {
      std::cout << "result[" << i << "," << j << "]=" << result[{i, j}] << std::endl;
    }  
  }
}