Memref.store storing a memref.load

I have the following MLIR file:

  func.func @matmul(%A: memref<?xf32>, %C: memref<?xf32>){
    %c0 = arith.constant 0 : index
    %bla = arith.constant 1.0 : f32
    %a = memref.load %A[%c0] : memref<?xf32>
    memref.store %bla, %C[%c0] : memref<?xf32>
    // memref.store %a, %C[%c0] : memref<?xf32>
    return
  }

and the corresponding C++ file that calls into it:

#include <iostream>

template <typename T, size_t N> struct MemRefDescriptor {
  T *allocated;
  T *aligned;
  intptr_t offset;
  intptr_t sizes[N];
  intptr_t strides[N];
};

extern "C" {
    void *_mlir_ciface_matmul(MemRefDescriptor<float, 1> A, MemRefDescriptor<float, 1> C);
}

void print_matrix(float *matrix, int64_t rows, int64_t cols) {
    for (int i = 0; i < rows; i++) {
        for (int j = 0; j < cols; j++) {
            printf("%f ", matrix[i * cols + j]);
        }
        printf("\n");
    }
}

int main() {
    int64_t M = 2, N = 2, K = 2;
    float A[] = {1.0, 2.0, 3.0, 4.0};
    float C[] = {0, 0, 0, 0};
    MemRefDescriptor<float, 1> memrefA = {
        A,    // allocated
        A,    // aligned
        0,    // offset
        {M * N}, // sizes[N]
        {1},  // strides[N]
    };
    MemRefDescriptor<float, 1> memrefC = {
        C,    // allocated
        C,    // aligned
        0,    // offset
        {M * K}, // sizes[N]
        {1},  // strides[N]
    };

    std::cout << "Before matmul call\n" << std::endl;

    _mlir_ciface_matmul(memrefA, memrefC);

    std::cout << "Result matrix:\n";
    print_matrix(C, M, K);

    return 0;
}

With memref.store %bla, %C[%c0] : memref<?xf32>, when I print out C, I can see that the element in the 0th index has been updated. But with memref.store %a, %C[%c0] : memref<?xf32>, the value at C[0] is still 0. Am I using the memref dialect incorrectly?

These are the passes I am using to lower the MLIR file to LLVM - --llvm-request-c-wrappers --convert-func-to-llvm --convert-arith-to-llvm --finalize-memref-to-llvm --reconcile-unrealized-casts

This worked we. same changes as in Segmentation fault on memref.store - #2 by reikdas

#include <iostream>

template <typename T, size_t N> struct MemRefDescriptor {
  T *allocated;
  T *aligned;
  intptr_t offset;
  intptr_t sizes[N];
  intptr_t strides[N];
};

extern "C" {
void _mlir_ciface_matmul(MemRefDescriptor<float, 1> *A,
                          MemRefDescriptor<float, 1> *C);
}

void print_matrix(float *matrix, int64_t rows, int64_t cols) {
  for (int i = 0; i < rows; i++) {
    for (int j = 0; j < cols; j++) {
      printf("%f ", matrix[i * cols + j]);
    }
    printf("\n");
  }
}

int main() {
  int64_t M = 2, N = 2, K = 2;
  float A[] = {67686.0, 2.0, 3.0, 4.0};
  float C[] = {0, 0, 0, 0};
  MemRefDescriptor<float, 1> *memrefA = new MemRefDescriptor<float, 1>{
      A,       // allocated
      A,       // aligned
      0,       // offset
      {M * N}, // sizes[N]
      {1},     // strides[N]
  };
  MemRefDescriptor<float, 1> *memrefC = new MemRefDescriptor<float, 1>{
      C,       // allocated
      C,       // aligned
      0,       // offset
      {M * K}, // sizes[N]
      {1},     // strides[N]
  };

  std::cout << "Before matmul call\n" << std::endl;

  _mlir_ciface_matmul(memrefA, memrefC);

  std::cout << "Result matrix:\n";
  print_matrix(C, M, K);

  return 0;
}