Hi all,
It seems that buffer-deallocation pass can not handle structure while op correctly.
Here is an example input:
module {
func @dynamic_shape_while2(%arg0: memref<8x8xf32>) -> memref<8x8xf32> {
%0 = scf.while (%arg1 = %arg0) : (memref<8x8xf32>) -> memref<8x8xf32> {
%1 = memref.alloc() : memref<i1>
"lmhlo.constant"(%1) {value = dense<true> : tensor<i1>} : (memref<i1>) -> ()
%2 = memref.alloc() : memref<8x8xf32>
"lmhlo.multiply"(%arg1, %arg1, %2) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
%3 = memref.alloc() : memref<8x8xf32>
"lmhlo.subtract"(%arg1, %2, %3) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
%4 = memref.alloc() : memref<8x8xi1>
"lmhlo.compare"(%arg1, %3, %4) {comparison_direction = "LT"} : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xi1>) -> ()
%5 = memref.alloc() : memref<i1>
"lmhlo.reduce"(%4, %1, %5) ( {
^bb0(%arg2: memref<i1>, %arg3: memref<i1>, %arg4: memref<i1>): // no predecessors
"lmhlo.and"(%arg2, %arg3, %arg4) : (memref<i1>, memref<i1>, memref<i1>) -> ()
"lmhlo.terminator"() : () -> ()
}) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (memref<8x8xi1>, memref<i1>, memref<i1>) -> ()
%6 = memref.load %5[] : memref<i1>
scf.condition(%6) %arg1 : memref<8x8xf32>
} do {
^bb0(%arg1: memref<8x8xf32>): // no predecessors
%1 = memref.alloc() : memref<8x8xf32>
"lmhlo.multiply"(%arg1, %arg1, %1) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
%2 = memref.alloc() : memref<8x8xf32>
"lmhlo.concatenate"(%1, %1, %2) {dimension = 0 : i64} : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
scf.yield %2 : memref<8x8xf32>
}
return %0 : memref<8x8xf32>
}
}
after applying -buffer-deallocation
pass:
module {
func @dynamic_shape_while2(%arg0: memref<8x8xf32>) -> memref<8x8xf32> {
%0 = memref.clone %arg0 : memref<8x8xf32> to memref<8x8xf32>
%1 = scf.while (%arg1 = %0) : (memref<8x8xf32>) -> memref<8x8xf32> {
%2 = memref.alloc() : memref<i1>
"lmhlo.constant"(%2) {value = dense<true> : tensor<i1>} : (memref<i1>) -> ()
%3 = memref.alloc() : memref<8x8xf32>
"lmhlo.multiply"(%arg1, %arg1, %3) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
%4 = memref.alloc() : memref<8x8xf32>
"lmhlo.subtract"(%arg1, %3, %4) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
memref.dealloc %3 : memref<8x8xf32>
%5 = memref.alloc() : memref<8x8xi1>
"lmhlo.compare"(%arg1, %4, %5) {comparison_direction = "LT"} : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xi1>) -> ()
memref.dealloc %4 : memref<8x8xf32>
%6 = memref.alloc() : memref<i1>
"lmhlo.reduce"(%5, %2, %6) ( {
^bb0(%arg2: memref<i1>, %arg3: memref<i1>, %arg4: memref<i1>): // no predecessors
"lmhlo.and"(%arg2, %arg3, %arg4) : (memref<i1>, memref<i1>, memref<i1>) -> ()
"lmhlo.terminator"() : () -> ()
}) {dimensions = dense<[0, 1]> : tensor<2xi64>} : (memref<8x8xi1>, memref<i1>, memref<i1>) -> ()
memref.dealloc %5 : memref<8x8xi1>
memref.dealloc %2 : memref<i1>
%7 = memref.load %6[] : memref<i1>
memref.dealloc %6 : memref<i1>
scf.condition(%7) %arg1 : memref<8x8xf32>
} do {
^bb0(%arg1: memref<8x8xf32>): // no predecessors
%2 = memref.alloc() : memref<8x8xf32>
"lmhlo.multiply"(%arg1, %arg1, %2) : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
%3 = memref.alloc() : memref<8x8xf32>
"lmhlo.concatenate"(%2, %2, %3) {dimension = 0 : i64} : (memref<8x8xf32>, memref<8x8xf32>, memref<8x8xf32>) -> ()
memref.dealloc %2 : memref<8x8xf32>
%4 = memref.clone %3 : memref<8x8xf32> to memref<8x8xf32>
memref.dealloc %3 : memref<8x8xf32>
scf.yield %4 : memref<8x8xf32>
}
return %1 : memref<8x8xf32>
}
}
The loop variable %arg1
is never gotten freed. Is this a supposed behavior? Or is it a bug?
Thanks!