Hello,everyone.
I used sparse_tensor.convert to try to turn sparsity into density. But in my example, I’m trying to manipulate a sparse tensor generated by linalg.add, with the following code:
%5 = linalg.add ins(%arg1_sparse, %3 : tensor<512x512xf32, #COO>, tensor<512x512xf32, #COO>) outs(%sparse4 : tensor<512x512xf32, #COO>) -> tensor<512x512xf32, #COO>
%dense5 = sparse_tensor.convert %5 : tensor<512x512xf32, #COO> to tensor<512x512xf32>
And the code is lowering to:
%13 = linalg.generic {indexing_maps = [#map, #map, #map], iterator_types = ["parallel", "parallel"]} ins(%7, %3 : tensor<512x512xf32, #sparse>, tensor<512x512xf32, #sparse>) outs(%12 : tensor<512x512xf32, #sparse>) attrs = {sorted = true} {
^bb0(%in: f32, %in_2: f32, %out: f32):
%26 = arith.mulf %in_2, %cst : f32
%27 = arith.addf %in, %26 : f32
linalg.yield %27 : f32
} -> tensor<512x512xf32, #sparse>
//begin
%14 = bufferization.alloc_tensor() : tensor<512x512xf32>
%15 = linalg.fill ins(%cst_0 : f32) outs(%14 : tensor<512x512xf32>) -> tensor<512x512xf32>
%16 = sparse_tensor.values %13 : tensor<512x512xf32, #sparse> to memref<?xf32>
%17 = sparse_tensor.positions %13 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex>
%18 = sparse_tensor.coordinates %13 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
%19 = sparse_tensor.coordinates %13 {level = 1 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
%20 = memref.load %17[%c0] : memref<?xindex>
%21 = memref.load %17[%c1] : memref<?xindex>
%22 = scf.while (%arg2 = %20) : (index) -> index {
%26 = arith.cmpi ult, %arg2, %21 : index
%27 = scf.if %26 -> (i1) {
%28 = memref.load %18[%20] : memref<?xindex, strided<[?], offset: ?>>
%29 = memref.load %18[%arg2] : memref<?xindex, strided<[?], offset: ?>>
%30 = arith.cmpi eq, %28, %29 : index
scf.yield %30 : i1
} else {
scf.yield %false : i1
}
scf.condition(%27) %arg2 : index
} do {
^bb0(%arg2: index):
%26 = arith.addi %arg2, %c1 : index
scf.yield %26 : index
}
%23:3 = scf.while (%arg2 = %20, %arg3 = %22, %arg4 = %15) : (index, index, tensor<512x512xf32>) -> (index, index, tensor<512x512xf32>) {
%26 = arith.cmpi ult, %arg2, %21 : index
scf.condition(%26) %arg2, %arg3, %arg4 : index, index, tensor<512x512xf32>
} do {
^bb0(%arg2: index, %arg3: index, %arg4: tensor<512x512xf32>):
%26 = memref.load %18[%arg2] : memref<?xindex, strided<[?], offset: ?>>
%27 = scf.for %arg5 = %arg2 to %arg3 step %c1 iter_args(%arg6 = %arg4) -> (tensor<512x512xf32>) {
%29 = memref.load %19[%arg5] : memref<?xindex, strided<[?], offset: ?>>
%30 = memref.load %16[%arg5] : memref<?xf32>
%inserted = tensor.insert %30 into %arg6[%26, %29] : tensor<512x512xf32>
scf.yield %inserted : tensor<512x512xf32>
} {"Emitted from" = "sparse_tensor.foreach"}
%28:2 = scf.if %true -> (index, index) {
%29 = scf.while (%arg5 = %arg3) : (index) -> index {
%30 = arith.cmpi ult, %arg5, %21 : index
%31 = scf.if %30 -> (i1) {
%32 = memref.load %18[%arg3] : memref<?xindex, strided<[?], offset: ?>>
%33 = memref.load %18[%arg5] : memref<?xindex, strided<[?], offset: ?>>
%34 = arith.cmpi eq, %32, %33 : index
scf.yield %34 : i1
} else {
scf.yield %false : i1
}
scf.condition(%31) %arg5 : index
} do {
^bb0(%arg5: index):
%30 = arith.addi %arg5, %c1 : index
scf.yield %30 : index
}
scf.yield %arg3, %29 : index, index
} else {
scf.yield %arg2, %arg3 : index, index
}
scf.yield %28#0, %28#1, %27 : index, index, tensor<512x512xf32>
} attributes {"Emitted from" = "sparse_tensor.foreach"}
sparse_tensor.values error occurs when executing the --sprase-tensor-conversion pass:
error: failed to legalize unresolved materialization from ('tensor<512x512xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>') to '!llvm.ptr' that remained live after conversion
%16 = sparse_tensor.values %13 : tensor<512x512xf32, #sparse> to memref<?xf32>
^
sparsetensorcon-test.mlir:82:11: note: see current operation: %106 = "builtin.unrealized_conversion_cast"(%105) : (tensor<512x512xf32, #sparse_tensor.encoding<{ map = (d0, d1) -> (d0 : compressed(nonunique), d1 : singleton) }>>) -> !llvm.ptr
sparsetensorcon-test.mlir:85:11: note: see existing live user here: %119 = "func.call"(%106, %118) <{callee = @sparseCoordinates0}> : (!llvm.ptr, index) -> memref<?xindex>
%19 = sparse_tensor.coordinates %13 {level = 1 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
The odd thing is that I used the same sparse_tensor.convert operation for the matmul example, the only difference being that the input in matmul is not a sparse tensor generated by linalg.generic, as follows:
func.func @matmulCOO(%A: tensor<512x512xf32, #SortedCOO>,
%B: tensor<512x512xf32>,
%C: tensor<512x512xf32>) -> tensor<512x512xf32> {
%Adense = sparse_tensor.convert %A : tensor<512x512xf32, #SortedCOO> to tensor<512x512xf32>
%Bsparse = sparse_tensor.convert %B : tensor<512x512xf32> to tensor<512x512xf32, #SortedCOO>
%D = linalg.matmul
ins(%Bsparse, %Adense: tensor<512x512xf32, #SortedCOO>, tensor<512x512xf32>)
outs(%C: tensor<512x512xf32>) -> tensor<512x512xf32>
return %D: tensor<512x512xf32>
}
And the partly code is lowering to :
%true = arith.constant true
%false = arith.constant false
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c512 = arith.constant 512 : index
%0 = bufferization.alloc_tensor() : tensor<512x512xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<512x512xf32>) -> tensor<512x512xf32>
%2 = sparse_tensor.values %arg0 : tensor<512x512xf32, #sparse> to memref<?xf32>
%3 = sparse_tensor.positions %arg0 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex>
%4 = sparse_tensor.coordinates %arg0 {level = 0 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
%5 = sparse_tensor.coordinates %arg0 {level = 1 : index} : tensor<512x512xf32, #sparse> to memref<?xindex, strided<[?], offset: ?>>
%6 = memref.load %3[%c0] : memref<?xindex>
%7 = memref.load %3[%c1] : memref<?xindex>
%8 = scf.while (%arg3 = %6) : (index) -> index {
%43 = arith.cmpi ult, %arg3, %7 : index
%44 = scf.if %43 -> (i1) {
%45 = memref.load %4[%6] : memref<?xindex, strided<[?], offset: ?>>
%46 = memref.load %4[%arg3] : memref<?xindex, strided<[?], offset: ?>>
%47 = arith.cmpi eq, %45, %46 : index
scf.yield %47 : i1
} else {
scf.yield %false : i1
}
scf.condition(%44) %arg3 : index
} do {
^bb0(%arg3: index):
%43 = arith.addi %arg3, %c1 : index
scf.yield %43 : index
}
%9:3 = scf.while (%arg3 = %6, %arg4 = %8, %arg5 = %1) : (index, index, tensor<512x512xf32>) -> (index, index, tensor<512x512xf32>) {
%43 = arith.cmpi ult, %arg3, %7 : index
scf.condition(%43) %arg3, %arg4, %arg5 : index, index, tensor<512x512xf32>
} do {
^bb0(%arg3: index, %arg4: index, %arg5: tensor<512x512xf32>):
%43 = memref.load %4[%arg3] : memref<?xindex, strided<[?], offset: ?>>
%44 = scf.for %arg6 = %arg3 to %arg4 step %c1 iter_args(%arg7 = %arg5) -> (tensor<512x512xf32>) {
%46 = memref.load %5[%arg6] : memref<?xindex, strided<[?], offset: ?>>
%47 = memref.load %2[%arg6] : memref<?xf32>
%inserted = tensor.insert %47 into %arg7[%43, %46] : tensor<512x512xf32>
scf.yield %inserted : tensor<512x512xf32>
} {"Emitted from" = "sparse_tensor.foreach"}
%45:2 = scf.if %true -> (index, index) {
%46 = scf.while (%arg6 = %arg4) : (index) -> index {
%47 = arith.cmpi ult, %arg6, %7 : index
%48 = scf.if %47 -> (i1) {
%49 = memref.load %4[%arg4] : memref<?xindex, strided<[?], offset: ?>>
%50 = memref.load %4[%arg6] : memref<?xindex, strided<[?], offset: ?>>
%51 = arith.cmpi eq, %49, %50 : index
scf.yield %51 : i1
} else {
scf.yield %false : i1
}
scf.condition(%48) %arg6 : index
} do {
^bb0(%arg6: index):
%47 = arith.addi %arg6, %c1 : index
scf.yield %47 : index
}
scf.yield %arg4, %46 : index, index
} else {
scf.yield %arg3, %arg4 : index, index
}
scf.yield %45#0, %45#1, %44 : index, index, tensor<512x512xf32>
} attributes {"Emitted from" = "sparse_tensor.foreach"}
When executing the pass --sprase-tensor-conversion, convert to:
func.func @matmulCOO(%arg0: !llvm.ptr, %arg1: tensor<512x512xf32>, %arg2: tensor<512x512xf32>) -> tensor<512x512xf32> {
%true = arith.constant true
%false = arith.constant false
%c1 = arith.constant 1 : index
%cst = arith.constant 0.000000e+00 : f32
%c0 = arith.constant 0 : index
%c512 = arith.constant 512 : index
%0 = bufferization.alloc_tensor() : tensor<512x512xf32>
%1 = linalg.fill ins(%cst : f32) outs(%0 : tensor<512x512xf32>) -> tensor<512x512xf32>
%2 = call @sparseValuesF32(%arg0) : (!llvm.ptr) -> memref<?xf32>
%c0_0 = arith.constant 0 : index
%3 = call @sparsePositions0(%arg0, %c0_0) : (!llvm.ptr, index) -> memref<?xindex>
%c0_1 = arith.constant 0 : index
%4 = call @sparseCoordinates0(%arg0, %c0_1) : (!llvm.ptr, index) -> memref<?xindex>
%cast = memref.cast %4 : memref<?xindex> to memref<?xindex, strided<[?], offset: ?>>
%c1_2 = arith.constant 1 : index
%5 = call @sparseCoordinates0(%arg0, %c1_2) : (!llvm.ptr, index) -> memref<?xindex>
Obviously, during the execution of my example, I cannot convert tensor<512x512xf32, #sparse> to! llvm.ptr, which is fine for the example in matmul, because you can’t do this for sparse tensors-generated by generic ?