Hello!
I am currently trying to experiment with the affine dialect of mlir for the flang project. I am working with a small example to get array copy elimination working.
!arr3d_ref = type memref<?x?x?xi8>
// c = ((a + b) * c) + (b / 2)
func @arr_compute(%a : !arr3d_ref, %b : !arr3d_ref, %c : !arr3d_ref) -> !arr3d_ref {
%i = dim %a, 0 : !arr3d_ref
%j = dim %a, 1 : !arr3d_ref
%k = dim %a, 2 : !arr3d_ref
%t1 = alloc(%i,%j,%k) : !arr3d_ref
%t2 = alloc(%i,%j,%k) : !arr3d_ref
// t1 = a + b
affine.for %t = 0 to %k {
affine.for %s = 0 to %j {
affine.for %r = 0 to %i {
%a_v = affine.load %a[%r, %s, %t] : !arr3d_ref
%b_v = affine.load %b[%r, %s, %t] : !arr3d_ref
%t1_v = addi %a_v, %b_v : i8
affine.store %t1_v, %t1[%r, %s, %t] : !arr3d_ref
}
}
}
// t2 = t1 + c
affine.for %t = 0 to %k {
affine.for %s = 0 to %j {
affine.for %r = 0 to %i {
%c_v = affine.load %c[%r, %s, %t] : !arr3d_ref
%t1_v = affine.load %t1[%r, %s, %t] : !arr3d_ref
%t2_v = muli %c_v, %t1_v : i8
affine.store %t2_v, %t2[%r, %s, %t] : !arr3d_ref
}
}
}
// c = t2 + b/2
affine.for %t = 0 to %k {
affine.for %s = 0 to %j {
affine.for %r = 0 to %i {
%t2_v = affine.load %t2[%r, %s, %t] : !arr3d_ref
%b_v = affine.load %b[%r, %s, %t] : !arr3d_ref
%v_2 = constant 2 : i8
%b_vv = divi_signed %b_v, %v_2 : i8
%c_v = addi %t2_v, %b_vv : i8
affine.store %c_v, %c[%r, %s, %t] : !arr3d_ref
}
}
}
return %c : !arr3d_ref
}
I was wondering if there is a way to get this program to optimize t1 and t2 arrays out. I tried the --affine-loop-fusion but that doesn’t seem to do anything.
Is there another pass which performs these kind of optimizations or is there anything I am missing here?
Thank you,
Rajan