I converted affine & std dialect to llvm dialect, then translated to llvm ir. And then use llc and clang to build the object file. But I got segmentation error when I ran the executable on ARM.
Affine & std Dialect:
module {
func @laplace(%arg0: memref<72x18x16xf64>, %arg1: memref<72x18x16xf64>) {
%0 = memref.alloc() : memref<72x18x16xf64>
affine.for %arg2 = 1 to 71 {
affine.for %arg3 = 1 to 17 {
affine.for %arg4 = 0 to 16 {
%cst = constant -4.000000e+00 : f64
affine.store %cst, %arg1[%arg2, %arg3, %arg4] : memref<72x18x16xf64>
}
}
}
return
}
}
then I added these passes:
LLVMTypeConverter typeConverter(&getContext());
RewritePatternSet patterns(&getContext());
populateAffineToStdConversionPatterns(patterns);
populateLoopToStdConversionPatterns(patterns);
populateMemRefToLLVMConversionPatterns(typeConverter, patterns);
populateStdToLLVMConversionPatterns(typeConverter, patterns);
and got the llvm dialect:
module {
llvm.func @malloc(i64) -> !llvm.ptr<i8>
llvm.func @laplace(%arg0: !llvm.ptr<f64>, %arg1: !llvm.ptr<f64>, %arg2: i64, %arg3: i64, %arg4: i64, %arg5: i64, %arg6: i64, %arg7: i64, %arg8: i64, %arg9: !llvm.ptr<f64>, %arg10: !llvm.ptr<f64>, %arg11: i64, %arg12: i64, %arg13: i64, %arg14: i64, %arg15: i64, %arg16: i64, %arg17: i64) {
%0 = llvm.mlir.undef : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%1 = llvm.insertvalue %arg0, %0[0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%2 = llvm.insertvalue %arg1, %1[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%3 = llvm.insertvalue %arg2, %2[2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%4 = llvm.insertvalue %arg3, %3[3, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%5 = llvm.insertvalue %arg6, %4[4, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%6 = llvm.insertvalue %arg4, %5[3, 1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%7 = llvm.insertvalue %arg7, %6[4, 1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%8 = llvm.insertvalue %arg5, %7[3, 2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%9 = llvm.insertvalue %arg8, %8[4, 2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%10 = llvm.mlir.undef : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%11 = llvm.insertvalue %arg9, %10[0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%12 = llvm.insertvalue %arg10, %11[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%13 = llvm.insertvalue %arg11, %12[2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%14 = llvm.insertvalue %arg12, %13[3, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%15 = llvm.insertvalue %arg15, %14[4, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%16 = llvm.insertvalue %arg13, %15[3, 1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%17 = llvm.insertvalue %arg16, %16[4, 1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%18 = llvm.insertvalue %arg14, %17[3, 2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%19 = llvm.insertvalue %arg17, %18[4, 2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%20 = llvm.mlir.constant(72 : index) : i64
%21 = llvm.mlir.constant(18 : index) : i64
%22 = llvm.mlir.constant(16 : index) : i64
%23 = llvm.mlir.constant(1 : index) : i64
%24 = llvm.mlir.constant(288 : index) : i64
%25 = llvm.mlir.constant(20736 : index) : i64
%26 = llvm.mlir.null : !llvm.ptr<f64>
%27 = llvm.getelementptr %26[%25] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
%28 = llvm.ptrtoint %27 : !llvm.ptr<f64> to i64
%29 = llvm.call @malloc(%28) : (i64) -> !llvm.ptr<i8>
%30 = llvm.bitcast %29 : !llvm.ptr<i8> to !llvm.ptr<f64>
%31 = llvm.mlir.undef : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%32 = llvm.insertvalue %30, %31[0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%33 = llvm.insertvalue %30, %32[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%34 = llvm.mlir.constant(0 : index) : i64
%35 = llvm.insertvalue %34, %33[2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%36 = llvm.insertvalue %20, %35[3, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%37 = llvm.insertvalue %21, %36[3, 1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%38 = llvm.insertvalue %22, %37[3, 2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%39 = llvm.insertvalue %24, %38[4, 0] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%40 = llvm.insertvalue %22, %39[4, 1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%41 = llvm.insertvalue %23, %40[4, 2] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%42 = llvm.mlir.constant(1 : index) : i64
%43 = llvm.mlir.constant(71 : index) : i64
%44 = llvm.mlir.constant(1 : index) : i64
llvm.br ^bb1(%42 : i64)
^bb1(%45: i64): // 2 preds: ^bb0, ^bb8
%46 = llvm.icmp "slt" %45, %43 : i64
llvm.cond_br %46, ^bb2, ^bb9
^bb2: // pred: ^bb1
%47 = llvm.mlir.constant(1 : index) : i64
%48 = llvm.mlir.constant(17 : index) : i64
%49 = llvm.mlir.constant(1 : index) : i64
llvm.br ^bb3(%47 : i64)
^bb3(%50: i64): // 2 preds: ^bb2, ^bb7
%51 = llvm.icmp "slt" %50, %48 : i64
llvm.cond_br %51, ^bb4, ^bb8
^bb4: // pred: ^bb3
%52 = llvm.mlir.constant(0 : index) : i64
%53 = llvm.mlir.constant(16 : index) : i64
%54 = llvm.mlir.constant(1 : index) : i64
llvm.br ^bb5(%52 : i64)
^bb5(%55: i64): // 2 preds: ^bb4, ^bb6
%56 = llvm.icmp "slt" %55, %53 : i64
llvm.cond_br %56, ^bb6, ^bb7
^bb6: // pred: ^bb5
%57 = llvm.mlir.constant(-4.000000e+00 : f64) : f64
%58 = llvm.extractvalue %19[1] : !llvm.struct<(ptr<f64>, ptr<f64>, i64, array<3 x i64>, array<3 x i64>)>
%59 = llvm.mlir.constant(288 : index) : i64
%60 = llvm.mul %45, %59 : i64
%61 = llvm.mlir.constant(16 : index) : i64
%62 = llvm.mul %50, %61 : i64
%63 = llvm.add %60, %62 : i64
%64 = llvm.add %63, %55 : i64
%65 = llvm.getelementptr %58[%64] : (!llvm.ptr<f64>, i64) -> !llvm.ptr<f64>
llvm.store %57, %65 : !llvm.ptr<f64>
%66 = llvm.add %55, %54 : i64
llvm.br ^bb5(%66 : i64)
^bb7: // pred: ^bb5
%67 = llvm.add %50, %49 : i64
llvm.br ^bb3(%67 : i64)
^bb8: // pred: ^bb3
%68 = llvm.add %45, %44 : i64
llvm.br ^bb1(%68 : i64)
^bb9: // pred: ^bb1
llvm.return
}
}
then I ran these commands:
mlir-translate --mlir-to-llvmir laplace.mlir > laplace.bc
llc laplace.bc -o laplace.s
clang -c laplace.s -o laplace.o
and build a main.c file:
#include <stdio.h>
void laplace(double input[72][18][16], double output[72][18][16]);
int main(){
double input[72][18][16];
double output[72][18][16];
for(int i=0;i<72;i++){
for(int j=0;j<18;j++){
for(int k=0;k<16;k++){
input[i][j][k] = 1;
}
}
}
laplace(input, output);
return 0;
}
linked them:
gcc -c main.c -o main.o
gcc laplace.o main.o -o laplace
./laplace
and got this fault:
Segmentation fault (core dumped)
Any help to run this correctly is appreciated.