Hi, I am trying to do an affine parallelization and then lower to openmp. Here some loops gets parallelized other remain normal for loops.
In this case I still get the unrealized casts, im not sure why.
Here is the sample code
module {
func.func private @freeCsv() attributes {llvm.emit_c_interface}
func.func private @readCsvColumnString(!llvm.ptr<i8>, memref<?xi8>, index, index) attributes {llvm.emit_c_interface}
func.func private @readCsvColumnFloat(!llvm.ptr<i8>, memref<?xf64>, index)
func.func private @readCsvColumnInt(!llvm.ptr<i8>, memref<?xi32>, index)
func.func private @getFileSize(!llvm.ptr<i8>) -> index
llvm.mlir.global internal constant @"/home/ajayakar/compiler-project/data/lineitem.csv"("/home/ajayakar/compiler-project/data/lineitem.csv") {addr_space = 0 : i32}
func.func private @compareI8s(!llvm.ptr<i8>, i32, memref<?xi8>) -> i1 attributes {llvm.emit_c_interface}
llvm.mlir.global internal constant @"1998-09-02 00:00:00"("1998-09-02 00:00:00") {addr_space = 0 : i32}
func.func private @memrefToString(memref<?xi8>) -> !llvm.ptr<i8>
llvm.mlir.global internal constant @"%d,\09%f,\09%f,\09%f,\09%s,\09%s,\09%s,\09%d,\09%f,\09%f,\09\0A"("%d,\09%f,\09%f,\09%f,\09%s,\09%s,\09%s,\09%d,\09%f,\09%f,\09\0A") {addr_space = 0 : i32}
llvm.mlir.global internal constant @"l_quantity,\09l_extendedprice,\09l_discount,\09l_tax,\09l_returnflag,\09l_linestatus,\09l_shipdate,\09l_orderkey,\09avg_price,\09sum_disc_price,\09\0A"("l_quantity,\09l_extendedprice,\09l_discount,\09l_tax,\09l_returnflag,\09l_linestatus,\09l_shipdate,\09l_orderkey,\09avg_price,\09sum_disc_price,\09\0A") {addr_space = 0 : i32}
llvm.func @printf(!llvm.ptr<i8>, ...) -> i32
func.func @main() {
%c2_i32 = arith.constant 2 : i32
%c16 = arith.constant 16 : index
%c15 = arith.constant 15 : index
%c14 = arith.constant 14 : index
%c13 = arith.constant 13 : index
%c12 = arith.constant 12 : index
%c11 = arith.constant 11 : index
%c10 = arith.constant 10 : index
%c9 = arith.constant 9 : index
%c100 = arith.constant 100 : index
%c1 = arith.constant 1 : index
%c2 = arith.constant 2 : index
%c3 = arith.constant 3 : index
%c4 = arith.constant 4 : index
%c5 = arith.constant 5 : index
%c6 = arith.constant 6 : index
%c7 = arith.constant 7 : index
%c8 = arith.constant 8 : index
%cst = arith.constant 1.000000e+00 : f64
%c0 = arith.constant 0 : index
%0 = llvm.mlir.addressof @"/home/ajayakar/compiler-project/data/lineitem.csv" : !llvm.ptr<array<49 x i8>>
%1 = llvm.getelementptr %0[0, 0] : (!llvm.ptr<array<49 x i8>>) -> !llvm.ptr<i8>
%2 = call @getFileSize(%1) : (!llvm.ptr<i8>) -> index
%alloc = memref.alloc(%2) : memref<?xi32>
call @readCsvColumnInt(%1, %alloc, %c1) : (!llvm.ptr<i8>, memref<?xi32>, index) -> ()
%alloc_0 = memref.alloc(%2) : memref<?xi32>
call @readCsvColumnInt(%1, %alloc_0, %c2) : (!llvm.ptr<i8>, memref<?xi32>, index) -> ()
%alloc_1 = memref.alloc(%2) : memref<?xi32>
call @readCsvColumnInt(%1, %alloc_1, %c3) : (!llvm.ptr<i8>, memref<?xi32>, index) -> ()
%alloc_2 = memref.alloc(%2) : memref<?xi32>
call @readCsvColumnInt(%1, %alloc_2, %c4) : (!llvm.ptr<i8>, memref<?xi32>, index) -> ()
%alloc_3 = memref.alloc(%2) : memref<?xi32>
call @readCsvColumnInt(%1, %alloc_3, %c5) : (!llvm.ptr<i8>, memref<?xi32>, index) -> ()
%alloc_4 = memref.alloc(%2) : memref<?xf64>
call @readCsvColumnFloat(%1, %alloc_4, %c6) : (!llvm.ptr<i8>, memref<?xf64>, index) -> ()
%alloc_5 = memref.alloc(%2) : memref<?xf64>
call @readCsvColumnFloat(%1, %alloc_5, %c7) : (!llvm.ptr<i8>, memref<?xf64>, index) -> ()
%alloc_6 = memref.alloc(%2) : memref<?xf64>
call @readCsvColumnFloat(%1, %alloc_6, %c8) : (!llvm.ptr<i8>, memref<?xf64>, index) -> ()
%alloc_7 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c9, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_7[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_8 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c10, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_8[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_9 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c11, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_9[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_10 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c12, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_10[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_11 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c13, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_11[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_12 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c14, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_12[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_13 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c15, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_13[%arg0] : memref<?xmemref<?xi8>>
}
%alloc_14 = memref.alloc(%2) : memref<?xmemref<?xi8>>
affine.for %arg0 = 0 to %2 {
%alloc_49 = memref.alloc(%c100) : memref<?xi8>
func.call @readCsvColumnString(%1, %alloc_49, %c16, %arg0) : (!llvm.ptr<i8>, memref<?xi8>, index, index) -> ()
memref.store %alloc_49, %alloc_14[%arg0] : memref<?xmemref<?xi8>>
}
call @freeCsv() : () -> ()
%alloc_15 = memref.alloc(%2) : memref<?xi32>
memref.copy %alloc_3, %alloc_15 : memref<?xi32> to memref<?xi32>
%alloc_16 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_4, %alloc_16 : memref<?xf64> to memref<?xf64>
%alloc_17 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_5, %alloc_17 : memref<?xf64> to memref<?xf64>
%alloc_18 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_6, %alloc_18 : memref<?xf64> to memref<?xf64>
%alloc_19 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_7, %alloc_19 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_20 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_8, %alloc_20 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_21 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_9, %alloc_21 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_22 = memref.alloc(%2) : memref<?xi32>
memref.copy %alloc, %alloc_22 : memref<?xi32> to memref<?xi32>
%alloc_23 = memref.alloc(%2) : memref<?xi1>
%3 = llvm.mlir.addressof @"1998-09-02 00:00:00" : !llvm.ptr<array<19 x i8>>
%4 = llvm.getelementptr %3[0, 0] : (!llvm.ptr<array<19 x i8>>) -> !llvm.ptr<i8>
affine.for %arg0 = 0 to %2 {
%10 = affine.load %alloc_9[%arg0] : memref<?xmemref<?xi8>>
%11 = func.call @compareI8s(%4, %c2_i32, %10) : (!llvm.ptr<i8>, i32, memref<?xi8>) -> i1
affine.store %11, %alloc_23[%arg0] : memref<?xi1>
}
%alloc_24 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_4, %alloc_24 : memref<?xf64> to memref<?xf64>
%alloc_25 = memref.alloc(%2) : memref<?xi32>
memref.copy %alloc_3, %alloc_25 : memref<?xi32> to memref<?xi32>
%alloc_26 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_4, %alloc_26 : memref<?xf64> to memref<?xf64>
%alloc_27 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_5, %alloc_27 : memref<?xf64> to memref<?xf64>
%alloc_28 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_6, %alloc_28 : memref<?xf64> to memref<?xf64>
%alloc_29 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_7, %alloc_29 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_30 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_8, %alloc_30 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_31 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_9, %alloc_31 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_32 = memref.alloc(%2) : memref<?xi32>
memref.copy %alloc, %alloc_32 : memref<?xi32> to memref<?xi32>
%alloc_33 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_24, %alloc_33 : memref<?xf64> to memref<?xf64>
%alloc_34 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_27, %alloc_34 : memref<?xf64> to memref<?xf64>
%alloc_35 = memref.alloc(%c1) : memref<?xf64>
memref.store %cst, %alloc_35[%c0] : memref<?xf64>
%alloc_36 = memref.alloc(%2) : memref<?xf64>
affine.for %arg0 = 0 to %2 {
%10 = memref.load %alloc_35[%c0] : memref<?xf64>
%11 = affine.load %alloc_34[%arg0] : memref<?xf64>
%12 = arith.subf %10, %11 : f64
affine.store %12, %alloc_36[%arg0] : memref<?xf64>
}
%alloc_37 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_26, %alloc_37 : memref<?xf64> to memref<?xf64>
%alloc_38 = memref.alloc(%2) : memref<?xf64>
affine.for %arg0 = 0 to %2 {
%10 = affine.load %alloc_37[%arg0] : memref<?xf64>
%11 = affine.load %alloc_36[%arg0] : memref<?xf64>
%12 = arith.mulf %10, %11 : f64
affine.store %12, %alloc_38[%arg0] : memref<?xf64>
}
%alloc_39 = memref.alloc(%2) : memref<?xi32>
memref.copy %alloc_25, %alloc_39 : memref<?xi32> to memref<?xi32>
%alloc_40 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_26, %alloc_40 : memref<?xf64> to memref<?xf64>
%alloc_41 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_27, %alloc_41 : memref<?xf64> to memref<?xf64>
%alloc_42 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_28, %alloc_42 : memref<?xf64> to memref<?xf64>
%alloc_43 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_29, %alloc_43 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_44 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_30, %alloc_44 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_45 = memref.alloc(%2) : memref<?xmemref<?xi8>>
memref.copy %alloc_31, %alloc_45 : memref<?xmemref<?xi8>> to memref<?xmemref<?xi8>>
%alloc_46 = memref.alloc(%2) : memref<?xi32>
memref.copy %alloc_32, %alloc_46 : memref<?xi32> to memref<?xi32>
%alloc_47 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_33, %alloc_47 : memref<?xf64> to memref<?xf64>
%alloc_48 = memref.alloc(%2) : memref<?xf64>
memref.copy %alloc_38, %alloc_48 : memref<?xf64> to memref<?xf64>
%5 = llvm.mlir.addressof @"l_quantity,\09l_extendedprice,\09l_discount,\09l_tax,\09l_returnflag,\09l_linestatus,\09l_shipdate,\09l_orderkey,\09avg_price,\09sum_disc_price,\09\0A" : !llvm.ptr<array<128 x i8>>
%6 = llvm.getelementptr %5[0, 0] : (!llvm.ptr<array<128 x i8>>) -> !llvm.ptr<i8>
%7 = llvm.mlir.addressof @"%d,\09%f,\09%f,\09%f,\09%s,\09%s,\09%s,\09%d,\09%f,\09%f,\09\0A" : !llvm.ptr<array<41 x i8>>
%8 = llvm.getelementptr %7[0, 0] : (!llvm.ptr<array<41 x i8>>) -> !llvm.ptr<i8>
%9 = llvm.call @printf(%6) : (!llvm.ptr<i8>) -> i32
affine.for %arg0 = 0 to %2 {
%10 = memref.load %alloc_39[%arg0] : memref<?xi32>
%11 = memref.load %alloc_40[%arg0] : memref<?xf64>
%12 = memref.load %alloc_41[%arg0] : memref<?xf64>
%13 = memref.load %alloc_42[%arg0] : memref<?xf64>
%14 = memref.load %alloc_43[%arg0] : memref<?xmemref<?xi8>>
%15 = func.call @memrefToString(%14) : (memref<?xi8>) -> !llvm.ptr<i8>
%16 = memref.load %alloc_44[%arg0] : memref<?xmemref<?xi8>>
%17 = func.call @memrefToString(%16) : (memref<?xi8>) -> !llvm.ptr<i8>
%18 = memref.load %alloc_45[%arg0] : memref<?xmemref<?xi8>>
%19 = func.call @memrefToString(%18) : (memref<?xi8>) -> !llvm.ptr<i8>
%20 = memref.load %alloc_46[%arg0] : memref<?xi32>
%21 = memref.load %alloc_47[%arg0] : memref<?xf64>
%22 = memref.load %alloc_48[%arg0] : memref<?xf64>
%23 = llvm.call @printf(%8, %10, %11, %12, %13, %15, %17, %19, %20, %21, %22) : (!llvm.ptr<i8>, i32, f64, f64, f64, !llvm.ptr<i8>, !llvm.ptr<i8>, !llvm.ptr<i8>, i32, f64, f64) -> i32
}
return
}
}
I tried the following sequence
mlir-opt read_lower.mlir --affine-parallelize --lower-affine --convert-scf-to-openmp --convert-func-to-llvm --finalize-memref-to-llvm --convert-scf-to-cf --convert-openmp-to-llvm --convert-index-to-llvm --reconcile-unrealized-casts
It gives this error
../../../../mlir/examples/pandas/test/read_lower.mlir:57:5: error: failed to legalize operation 'builtin.unrealized_conversion_cast' that was explicitly marked illegal
affine.for %arg0 = 0 to %2 {
^
../../../../mlir/examples/pandas/test/read_lower.mlir:57:5: note: see current operation: %196 = "builtin.unrealized_conversion_cast"(%195) : (i64) -> index
Any thing im missing while lowering?