Segmentation fault on memref.store

I have the following C file -

#include <stdio.h>
#include <stdint.h>

extern void matmul(int64_t M, int64_t N, int64_t K, float *A, float *B, float *C);

void print_matrix(float *matrix, int64_t rows, int64_t cols) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            printf("%f ", matrix[i * cols + j]);
        }
        printf("\n");
    }
}

int main() {
    int64_t M = 2, N = 2, K = 2;
    float A[] = {1, 2, 3, 4};
    float B[] = {5, 6, 7, 8};
    float C[4] = {0};
    printf("Before matmul call\n");

    matmul(M, N, K, A, B, C);

    printf("Result matrix:\n");
    print_matrix(C, M, N);

    return 0;
}

And the following MLIR file -

module {
  func.func @matmul(%M: i64, %N: i64, %K: i64, %A: memref<?xf32>, %B: memref<?xf32>, %C: memref<?xf32>) {
    %c0 = arith.constant 0 : index
    
    // Load A[0]
    %a = memref.load %A[%c0] : memref<?xf32>
    
    // Load B[0]
    %b = memref.load %B[%c0] : memref<?xf32>
    
    // Multiply A[0] and B[0]
    %mul = arith.mulf %a, %b : f32
    
    // Store the result in C[0]
    memref.store %mul, %C[%c0] : memref<?xf32>
    
    return
  }
}

I lower the MLIR file to an object file by:

mlir-opt --convert-scf-to-cf --convert-func-to-llvm --convert-arith-to-llvm --finalize-memref-to-llvm --reconcile-unrealized-casts foo_mm.mlir -o foo_mm_opt.mlir
mlir-translate --mlir-to-llvmir foo_mm_opt.mlir -o foo_mm.ll
llc -filetype=obj foo_mm.ll -o foo_mm.o

I lower the C file to an object file by:
clang -c -g mlir_main2.c -o mlir_main2.o

And then link them with:
clang -g foo_mm.o mlir_main2.o -o program

But when I execute it, I get a segmentation fault.

If I comment out memref.store %mul, %C[%c0] : memref<?xf32> in the MLIR file, then there is no segmentation fault.

Is there some memory layout issue happening here?

Looking at How to compile and link with other c/c++ programs, I was able to update my program.

The new C++ (no longer C, because the example in that post uses C++) code is:

#include <iostream>

template <typename T, size_t N> struct MemRefDescriptor {
  T *allocated;
  T *aligned;
  intptr_t offset;
  intptr_t sizes[N];
  intptr_t strides[N];
};

extern "C" {
    void *_mlir_ciface_matmul(MemRefDescriptor<float, 1> A, MemRefDescriptor<float, 1> C);
}

void print_matrix(float *matrix, int64_t rows, int64_t cols) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            printf("%f ", matrix[i * cols + j]);
        }
        printf("\n");
    }
}

int main() {
    int64_t M = 2, N = 2, K = 2;
    float A[] = {1.0, 2.0, 3.0, 4.0};
    float C[] = {0, 0, 0, 0};
    MemRefDescriptor<float, 1> memrefA = {
        A,    // allocated
        A,    // aligned
        0,    // offset
        {M * N}, // sizes[N]
        {1},  // strides[N]
    };
    MemRefDescriptor<float, 1> memrefC = {
        C,    // allocated
        C,    // aligned
        0,    // offset
        {M * K}, // sizes[N]
        {1},  // strides[N]
    };
    std::cout << "Before matmul call\n" << std::endl;
    _mlir_ciface_matmul(memrefA, memrefC);
    std::cout << "Result matrix:\n";
    print_matrix(C, M, K);
    return 0;
}

And the update MLIR file:

module {
  func.func @matmul(%A: memref<?xf32>, %C: memref<?xf32>){
    %c0 = arith.constant 0 : index
    %bla = arith.constant 1.0 : f32
    // Store the result in C[0]
    memref.store %bla, %C[%c0] : memref<?xf32>
    return
  }
}

This works.

But, when I also pass %B in @matmul, by updating my C++ file to be:

#include <iostream>

template <typename T, size_t N> struct MemRefDescriptor {
  T *allocated;
  T *aligned;
  intptr_t offset;
  intptr_t sizes[N];
  intptr_t strides[N];
};

extern "C" {
    void *_mlir_ciface_matmul(MemRefDescriptor<float, 1> A, MemRefDescriptor<float, 1> B, MemRefDescriptor<float, 1> C);
}

void print_matrix(float *matrix, int64_t rows, int64_t cols) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            printf("%f ", matrix[i * cols + j]);
        }
        printf("\n");
    }
}

int main() {
    int64_t M = 2, N = 2, K = 2;
    float A[] = {1.0, 2.0, 3.0, 4.0};
    float B[] = {5.0, 6.0, 7.0, 8.0};
    float C[] = {0, 0, 0, 0};
    MemRefDescriptor<float, 1> memrefA = {
        A,    // allocated
        A,    // aligned
        0,    // offset
        {M * N}, // sizes[N]
        {1},  // strides[N]
    };
    MemRefDescriptor<float, 1> memrefB = {
        B,    // allocated
        B,    // aligned
        0,    // offset
        {N * K}, // sizes[N]
        {1},  // strides[N]
    };
    MemRefDescriptor<float, 1> memrefC = {
        C,    // allocated
        C,    // aligned
        0,    // offset
        {M * K}, // sizes[N]
        {1},  // strides[N]
    };
    std::cout << "Before matmul call\n" << std::endl;
    _mlir_ciface_matmul(memrefA, memrefB, memrefC);
    std::cout << "Result matrix:\n";
    print_matrix(C, M, K);
    return 0;
}

and my MLIR file to be:

module {
  func.func @matmul(%A: memref<?xf32>, %B: memref<?xf32>, %C: memref<?xf32>){
    %c0 = arith.constant 0 : index
    %bla = arith.constant 1.0 : f32
    memref.store %bla, %C[%c0] : memref<?xf32>
    return
  }
}

I now get a segmentation fault just because of passing another argument. Is there some calling convention error happening here?

Following worked for me,

You need to pass the memref descriptors as pointers. And the c wrapper returns nothing (remove void*).

#include <iostream>

template <typename T, size_t N> struct MemRefDescriptor {
  T *allocated;
  T *aligned;
  intptr_t offset;
  intptr_t sizes[N];
  intptr_t strides[N];
};

extern "C" {
    void _mlir_ciface_matmul(MemRefDescriptor<float, 1> *A, MemRefDescriptor<float, 1> *B, MemRefDescriptor<float, 1> *C);
}

void print_matrix(float *matrix, int64_t rows, int64_t cols) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            printf("%f ", matrix[i * cols + j]);
        }
        printf("\n");
    }
}

int main() {
    int64_t M = 2, N = 2, K = 2;
    float A[] = {1.0, 2.0, 3.0, 4.0};
    float B[] = {5.0, 6.0, 7.0, 8.0};
    float C[] = {0, 0, 0, 0};
    MemRefDescriptor<float, 1> * memrefA =  new MemRefDescriptor<float, 1> {
        A,    // allocated
        A,    // aligned
        0,    // offset
        {M * N}, // sizes[N]
        {1},  // strides[N]
    };
    MemRefDescriptor<float, 1> *memrefB = new MemRefDescriptor<float, 1> {
        B,    // allocated
        B,    // aligned
        0,    // offset
        {N * K}, // sizes[N]
        {1},  // strides[N]
    };
    MemRefDescriptor<float, 1> *memrefC = new MemRefDescriptor<float, 1> {
        C,    // allocated
        C,    // aligned
        0,    // offset
        {M * K}, // sizes[N]
        {1},  // strides[N]
    };
    std::cout << "Before matmul call\n" << std::endl;
    _mlir_ciface_matmul(memrefA, memrefB, memrefC);
    std::cout << "Result matrix:\n";
    print_matrix(C, M, K);
    return 0;
}
1 Like