How to get numpy array output from ExecutionEngine invoke through python binding (where function returns a tensor output)?

We were working on tosa dialect and tried numeric testing for tosa.add op using python bindings. We took the reference from execution_engine.py .

We were adding two tensors using tosa.add op. When we tried to get the output from execution_engine.invoke, we are getting same initial output value. There is no change in output value.
I have attached the code below for reference.

import sys
import numpy as np
from mlir.execution_engine import ExecutionEngine, ctypes 
from mlir.ir import Context, Module
from mlir.passmanager import PassManager
from mlir.runtime import get_ranked_memref_descriptor


def log(*args):
    print(*args, file=sys.stderr)
    sys.stderr.flush()

def lower_tosa_to_llvm(module):
    pipeline="""
    builtin.module(
        func.func(tosa-to-linalg,tosa-to-linalg-named,linalg-bufferize),
        func-bufferize,convert-vector-to-scf,
        func.func(convert-linalg-to-loops,lower-affine),
        convert-scf-to-cf,canonicalize,cse,
        convert-linalg-to-llvm,convert-vector-to-llvm,convert-math-to-llvm,expand-strided-metadata,
        func.func(lower-affine),
        finalize-memref-to-llvm,
        convert-func-to-llvm,
        convert-index-to-llvm,reconcile-unrealized-casts
    )
    """
    pm = PassManager.parse(pipeline)
    pm.run(module)
    return module

def execute():
    with Context():
        input="""
        func.func @test_add(%arg0: tensor<2xf32>, %arg1: tensor<2xf32>) -> tensor<2xf32> attributes { llvm.emit_c_interface } {
            %0 = "tosa.add"(%arg0, %arg1) : (tensor<2xf32>, tensor<2xf32>) -> tensor<2xf32>
            return %0 : tensor<2xf32>
        }
        """
        tosa_module = Module.parse(input)
        arg1 = np.array([1.0,2.0]).astype(np.float32)
        arg2 = np.array([2.0,3.0]).astype(np.float32)
        res = np.array([0.0,0.0]).astype(np.float32)

        arg1_memref_ptr = ctypes.pointer(ctypes.pointer(get_ranked_memref_descriptor(arg1)))
        arg2_memref_ptr = ctypes.pointer(ctypes.pointer(get_ranked_memref_descriptor(arg2)))
        res_memref_ptr = ctypes.pointer(ctypes.pointer(get_ranked_memref_descriptor(res)))

        execution_engine=ExecutionEngine(
            lower_tosa_to_llvm(tosa_module),
            opt_level=3
        )
        
        execution_engine.invoke(
            "test_add", arg1_memref_ptr, arg2_memref_ptr, res_memref_ptr
        )
        # expected output 
        # CHECK: [1.0, 2.0] + [2.0, 3.0] = [3.0, 5.0]
        log("{0} + {1} = {2}".format(arg1, arg2, res))
        # actual output ( getting same initial value)
        # [1.0, 2.0] + [2.0, 3.0] = [0.0, 0.0]  
        
execute()

Could someone kindly help me with the above issue .

Arul Samuel

1 Like

The code you generate allocates a buffer and returns it (as opposed to writing to a buffer that’s passed in - there may be a way to lower it this way but I’m not sure what the right passes would be). To call it you’ll need to do something like:

# Set up output parameter
res_memref_ptr = ctypes.pointer(ctypes.pointer(
  make_nd_memref_descriptor(1, np.ctypeslib.as_ctypes_type(np.float32))()
))
# 'res_memref_ptr' is first argument here
execution_engine.invoke("test_add", res_memref_ptr, arg1_memref_ptr, arg2_memref_ptr)
res = ranked_memref_to_numpy(res_memref_ptr[0])

There’s an example of that in the sparse integration tests

Thanks a lot. It worked. @troggo