Sparse_tensor.convert for conversion from sparse to dense fails under different input conditions

Hello,everyone.
I used sparse_tensor.convert to try to turn sparsity into density. But in my example, I’m trying to manipulate a sparse tensor generated by linalg.add, with the following code:

    %5 = linalg.add ins(%arg1_sparse, %3 : tensor<512x512xf32, #COO>, tensor<512x512xf32, #COO>) outs(%sparse4 : tensor<512x512xf32, #COO>) -> tensor<512x512xf32, #COO>
    %dense5 = sparse_tensor.convert %5 : tensor<512x512xf32, #COO> to tensor<512x512xf32>

And the code is lowering to:

    %13 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%7, %3 : tensor<512x512xf32, #sparse>, tensor<512x512xf32, #sparse>) outs(%12 : tensor<512x512xf32, #sparse>) attrs =  {sorted = true} {
    ^bb0(%in: f32, %in_2: f32, %out: f32):
      %26 = arith.mulf %in_2, %cst : f32
      %27 = arith.addf %in, %26 : f32
      linalg.yield %27 : f32
    } -> tensor<512x512xf32, #sparse>

    //begin
    %14 = bufferization.alloc_tensor() : tensor<512x512xf32>
    %15 = linalg.fill ins(%cst_0 : f32) outs(%14 : tensor<512x512xf32>) -> tensor<512x512xf32>
    %16 = sparse_tensor.values %13 : tensor<512x512xf32, #sparse> to memref<?xf32>
    %17 = sparse_tensor.positions %13 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex>
    %18 = sparse_tensor.coordinates %13 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
    %19 = sparse_tensor.coordinates %13 {level = 1 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
    %20 = memref.load %17[%c0] : memref<?xindex>
    %21 = memref.load %17[%c1] : memref<?xindex>
    %22 = scf.while (%arg2 = %20) : (index) -> index {
      %26 = arith.cmpi ult, %arg2, %21 : index
      %27 = scf.if %26 -> (i1) {
        %28 = memref.load %18[%20] : memref<?xindex, strided<[?], offset: ?>>
        %29 = memref.load %18[%arg2] : memref<?xindex, strided<[?], offset: ?>>
        %30 = arith.cmpi eq, %28, %29 : index
        scf.yield %30 : i1
      } else {
        scf.yield %false : i1
      }
      scf.condition(%27) %arg2 : index
    } do {
    ^bb0(%arg2: index):
      %26 = arith.addi %arg2, %c1 : index
      scf.yield %26 : index
    }
    %23:3 = scf.while (%arg2 = %20, %arg3 = %22, %arg4 = %15) : (index, index, tensor<512x512xf32>) -> (index, index, tensor<512x512xf32>) {
      %26 = arith.cmpi ult, %arg2, %21 : index
      scf.condition(%26) %arg2, %arg3, %arg4 : index, index, tensor<512x512xf32>
    } do {
    ^bb0(%arg2: index, %arg3: index, %arg4: tensor<512x512xf32>):
      %26 = memref.load %18[%arg2] : memref<?xindex, strided<[?], offset: ?>>
      %27 = scf.for %arg5 = %arg2 to %arg3 step %c1 iter_args(%arg6 = %arg4) -> (tensor<512x512xf32>) {
        %29 = memref.load %19[%arg5] : memref<?xindex, strided<[?], offset: ?>>
        %30 = memref.load %16[%arg5] : memref<?xf32>
        %inserted = tensor.insert %30 into %arg6[%26, %29] : tensor<512x512xf32>
        scf.yield %inserted : tensor<512x512xf32>
      } {"Emitted from" = "sparse_tensor.foreach"}
      %28:2 = scf.if %true -> (index, index) {
        %29 = scf.while (%arg5 = %arg3) : (index) -> index {
          %30 = arith.cmpi ult, %arg5, %21 : index
          %31 = scf.if %30 -> (i1) {
            %32 = memref.load %18[%arg3] : memref<?xindex, strided<[?], offset: ?>>
            %33 = memref.load %18[%arg5] : memref<?xindex, strided<[?], offset: ?>>
            %34 = arith.cmpi eq, %32, %33 : index
            scf.yield %34 : i1
          } else {
            scf.yield %false : i1
          }
          scf.condition(%31) %arg5 : index
        } do {
        ^bb0(%arg5: index):
          %30 = arith.addi %arg5, %c1 : index
          scf.yield %30 : index
        }
        scf.yield %arg3, %29 : index, index
      } else {
        scf.yield %arg2, %arg3 : index, index
      }
      scf.yield %28#0, %28#1, %27 : index, index, tensor<512x512xf32>
    } attributes {"Emitted from" = "sparse_tensor.foreach"}

sparse_tensor.values error occurs when executing the --sprase-tensor-conversion pass:

 error: failed to legalize unresolved materialization from ('tensor<512x512xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>') to '!llvm.ptr' that remained live after conversion
    %16 = sparse_tensor.values %13 : tensor<512x512xf32, #sparse> to memref<?xf32>
          ^
sparsetensorcon-test.mlir:82:11: note: see current operation: %106 = "builtin.unrealized_conversion_cast"(%105) : (tensor<512x512xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) -> !llvm.ptr
sparsetensorcon-test.mlir:85:11: note: see existing live user here: %119 = "func.call"(%106, %118) <{callee = @sparseCoordinates0}> : (!llvm.ptr, index) -> memref<?xindex>
    %19 = sparse_tensor.coordinates %13 {level = 1 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>

The odd thing is that I used the same sparse_tensor.convert operation for the matmul example, the only difference being that the input in matmul is not a sparse tensor generated by linalg.generic, as follows:

  func.func @matmulCOO(%A: tensor<512x512xf32, #SortedCOO>,
                       %B: tensor<512x512xf32>,
                       %C: tensor<512x512xf32>) -> tensor<512x512xf32> {

    %Adense = sparse_tensor.convert %A : tensor<512x512xf32, #SortedCOO> to tensor<512x512xf32>
    %Bsparse = sparse_tensor.convert %B : tensor<512x512xf32> to tensor<512x512xf32, #SortedCOO>            
    %D = linalg.matmul
      ins(%Bsparse, %Adense: tensor<512x512xf32, #SortedCOO>, tensor<512x512xf32>)
      outs(%C: tensor<512x512xf32>) -> tensor<512x512xf32>
    return %D: tensor<512x512xf32>
  }

And the partly code is lowering to :

    %true = arith.constant true
    %false = arith.constant false
    %c1 = arith.constant 1 : index
    %cst = arith.constant 0.000000e+00 : f32
    %c0 = arith.constant 0 : index
    %c512 = arith.constant 512 : index
    %0 = bufferization.alloc_tensor() : tensor<512x512xf32>
    %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<512x512xf32>) -> tensor<512x512xf32>
    %2 = sparse_tensor.values %arg0 : tensor<512x512xf32, #sparse> to memref<?xf32>
    %3 = sparse_tensor.positions %arg0 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex>
    %4 = sparse_tensor.coordinates %arg0 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
    %5 = sparse_tensor.coordinates %arg0 {level = 1 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
    %6 = memref.load %3[%c0] : memref<?xindex>
    %7 = memref.load %3[%c1] : memref<?xindex>
    %8 = scf.while (%arg3 = %6) : (index) -> index {
      %43 = arith.cmpi ult, %arg3, %7 : index
      %44 = scf.if %43 -> (i1) {
        %45 = memref.load %4[%6] : memref<?xindex, strided<[?], offset: ?>>
        %46 = memref.load %4[%arg3] : memref<?xindex, strided<[?], offset: ?>>
        %47 = arith.cmpi eq, %45, %46 : index
        scf.yield %47 : i1
      } else {
        scf.yield %false : i1
      }
      scf.condition(%44) %arg3 : index
    } do {
    ^bb0(%arg3: index):
      %43 = arith.addi %arg3, %c1 : index
      scf.yield %43 : index
    }
    %9:3 = scf.while (%arg3 = %6, %arg4 = %8, %arg5 = %1) : (index, index, tensor<512x512xf32>) -> (index, index, tensor<512x512xf32>) {
      %43 = arith.cmpi ult, %arg3, %7 : index
      scf.condition(%43) %arg3, %arg4, %arg5 : index, index, tensor<512x512xf32>
    } do {
    ^bb0(%arg3: index, %arg4: index, %arg5: tensor<512x512xf32>):
      %43 = memref.load %4[%arg3] : memref<?xindex, strided<[?], offset: ?>>
      %44 = scf.for %arg6 = %arg3 to %arg4 step %c1 iter_args(%arg7 = %arg5) -> (tensor<512x512xf32>) {
        %46 = memref.load %5[%arg6] : memref<?xindex, strided<[?], offset: ?>>
        %47 = memref.load %2[%arg6] : memref<?xf32>
        %inserted = tensor.insert %47 into %arg7[%43, %46] : tensor<512x512xf32>
        scf.yield %inserted : tensor<512x512xf32>
      } {"Emitted from" = "sparse_tensor.foreach"}
      %45:2 = scf.if %true -> (index, index) {
        %46 = scf.while (%arg6 = %arg4) : (index) -> index {
          %47 = arith.cmpi ult, %arg6, %7 : index
          %48 = scf.if %47 -> (i1) {
            %49 = memref.load %4[%arg4] : memref<?xindex, strided<[?], offset: ?>>
            %50 = memref.load %4[%arg6] : memref<?xindex, strided<[?], offset: ?>>
            %51 = arith.cmpi eq, %49, %50 : index
            scf.yield %51 : i1
          } else {
            scf.yield %false : i1
          }
          scf.condition(%48) %arg6 : index
        } do {
        ^bb0(%arg6: index):
          %47 = arith.addi %arg6, %c1 : index
          scf.yield %47 : index
        }
        scf.yield %arg4, %46 : index, index
      } else {
        scf.yield %arg3, %arg4 : index, index
      }
      scf.yield %45#0, %45#1, %44 : index, index, tensor<512x512xf32>
    } attributes {"Emitted from" = "sparse_tensor.foreach"}

When executing the pass --sprase-tensor-conversion, convert to:

  func.func @matmulCOO(%arg0: !llvm.ptr, %arg1: tensor<512x512xf32>, %arg2: tensor<512x512xf32>) -> tensor<512x512xf32> {
    %true = arith.constant true
    %false = arith.constant false
    %c1 = arith.constant 1 : index
    %cst = arith.constant 0.000000e+00 : f32
    %c0 = arith.constant 0 : index
    %c512 = arith.constant 512 : index
    %0 = bufferization.alloc_tensor() : tensor<512x512xf32>
    %1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<512x512xf32>) -> tensor<512x512xf32>
    %2 = call @sparseValuesF32(%arg0) : (!llvm.ptr) -> memref<?xf32>
    %c0_0 = arith.constant 0 : index
    %3 = call @sparsePositions0(%arg0, %c0_0) : (!llvm.ptr, index) -> memref<?xindex>
    %c0_1 = arith.constant 0 : index
    %4 = call @sparseCoordinates0(%arg0, %c0_1) : (!llvm.ptr, index) -> memref<?xindex>
    %cast = memref.cast %4 : memref<?xindex> to memref<?xindex, strided<[?], offset: ?>>
    %c1_2 = arith.constant 1 : index
    %5 = call @sparseCoordinates0(%arg0, %c1_2) : (!llvm.ptr, index) -> memref<?xindex>

Obviously, during the execution of my example, I cannot convert tensor<512x512xf32, #sparse> to! llvm.ptr, which is fine for the example in matmul, because you can’t do this for sparse tensors-generated by generic ?