I’m having trouble getting an affine loop nest (that looks slightly different than usual) lowered to OpenMP due to some unrealized conversion casts. I didn’t see integration tests or compiler pipelines within the repository that have an OpenMP lowering phase (though i definitely could be looking in the wrong places). The code I’m trying to lower is this:
#map = affine_map<(d0) -> (d0)>
module {
func.func @kernel(%arg0: memref<?xf64>, %arg1: memref<f64>, %arg2: memref<?xf64>, %arg3: memref<2xindex>, %arg4: memref<2xindex>, %arg5: memref<2xindex>) attributes {llvm.emit_c_interface} {
%c0 = arith.constant 0 : index
%0 = affine.load %arg5[%c0] : memref<2xindex>
%c1 = arith.constant 1 : index
%1 = affine.load %arg5[%c1] : memref<2xindex>
%2 = affine.load %arg1[] : memref<f64>
affine.parallel (%arg6) = (%c0) to (%0) {
affine.for %arg7 = #map(%c0) to #map(%1) {
%3 = affine.load %arg2[%arg6] : memref<?xf64>
%4 = arith.divf %3, %2 : f64
affine.store %4, %arg2[%arg6] : memref<?xf64>
}
}
return
}
}
And my conversion code is:
pm.addNestedPass<mlir::func::FuncOp>(mlir::createLowerAffinePass());
pm.addNestedPass<mlir::func::FuncOp>(mlir::arith::createArithExpandOpsPass());
pm.addPass(mlir::createConvertSCFToOpenMPPass());
pm.addPass(mlir::createConvertOpenMPToLLVMPass());
pm.addPass(mlir::createConvertSCFToCFPass());
pm.addPass(mlir::createConvertControlFlowToLLVMPass());
pm.addPass(mlir::memref::createExpandStridedMetadataPass());
pm.addPass(mlir::createFinalizeMemRefToLLVMConversionPass());
pm.addPass(mlir::createConvertMathToLLVMPass());
pm.addPass(mlir::createConvertMathToLibmPass());
pm.addPass(mlir::createConvertFuncToLLVMPass());
pm.addPass(mlir::createConvertIndexToLLVMPass());
pm.addPass(mlir::createReconcileUnrealizedCastsPass());
This pipeline has worked for loop nests that are fully parallel, but this one is a parallel loop with an inner sequential loop. I get the following error:
loc("binary_op"): error: failed to legalize operation 'builtin.unrealized_conversion_cast' that was explicitly marked illegal
module attributes {llvm.data_layout = ""} {
llvm.func @kernel(%arg0: !llvm.ptr, %arg1: !llvm.ptr, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: !llvm.ptr, %arg6: !llvm.ptr, %arg7: i64, %arg8: !llvm.ptr, %arg9: !llvm.ptr, %arg10: i64, %arg11: i64, %arg12: i64, %arg13: !llvm.ptr, %arg14: !llvm.ptr, %arg15: i64, %arg16: i64, %arg17: i64, %arg18: !llvm.ptr, %arg19: !llvm.ptr, %arg20: i64, %arg21: i64, %arg22: i64, %arg23: !llvm.ptr, %arg24: !llvm.ptr, %arg25: i64, %arg26: i64, %arg27: i64) attributes {llvm.emit_c_interface} {
%0 = llvm.mlir.constant(0 : index) : i64
%1 = llvm.mlir.constant(1 : index) : i64
%2 = builtin.unrealized_conversion_cast %1 : i64 to index
%3 = builtin.unrealized_conversion_cast %0 : i64 to index
%4 = llvm.load %arg24 : !llvm.ptr -> i64
%5 = llvm.getelementptr %arg24[1] : (!llvm.ptr) -> !llvm.ptr, i64
%6 = llvm.load %5 : !llvm.ptr -> i64
%7 = builtin.unrealized_conversion_cast %6 : i64 to index
%8 = llvm.load %arg6 : !llvm.ptr -> f64
omp.parallel {
omp.wsloop for (%arg28) : i64 = (%0) to (%4) step (%1) {
%9 = llvm.intr.stacksave : !llvm.ptr
llvm.br ^bb1
^bb1: // pred: ^bb0
cf.br ^bb2(%3 : index)
^bb2(%10: index): // 2 preds: ^bb1, ^bb3
%11 = builtin.unrealized_conversion_cast %10 : index to i64
%12 = llvm.icmp "slt" %11, %6 : i64
llvm.cond_br %12, ^bb3, ^bb4
^bb3: // pred: ^bb2
%13 = llvm.getelementptr %arg9[%arg28] : (!llvm.ptr, i64) -> !llvm.ptr, f64
%14 = llvm.load %13 : !llvm.ptr -> f64
%15 = llvm.fdiv %14, %8 : f64
%16 = llvm.getelementptr %arg9[%arg28] : (!llvm.ptr, i64) -> !llvm.ptr, f64
llvm.store %15, %16 : f64, !llvm.ptr
%17 = llvm.add %11, %1 : i64
%18 = builtin.unrealized_conversion_cast %17 : i64 to index
cf.br ^bb2(%18 : index)
^bb4: // pred: ^bb2
llvm.intr.stackrestore %9 : !llvm.ptr
llvm.br ^bb5
^bb5: // pred: ^bb4
omp.yield
}
omp.terminator
}
llvm.return
}
...
Based on looking at the output from the pass manager, the conversion casts are introduced by the OpenMPToLLVMPass and the FuncToLLVMPass, but are not getting removed by the IndexToLLVMPass. What am I doing wrong?