Hi Richard,
as file names are concerned, I didn’t found any information how clang-tidy decides in which language the file is written. I made an experiment and renamed simple Hello World program hello.cpp to hello.frog and then run:
clang-tidy hello.frog --
with the Error while processing result. But then I found, that when I invoke
clang-tidy hello.frog -- -xc++
everything works fine. So it means, that clang-tidy doesn’t support cuda files. Below is simple cuda program listing:
// simple_add.cu
#include <algorithm>
#include <cmath>
#include <iostream>
#include <memory>
__global__
void add(float *x, const float *y, int n) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
// stride for grid size loop, so the kernel can be run with less threads then
// the sample size
int stride = blockDim.x * gridDim.x;
for (int i = index; i < n; i += stride) {
x[i] = y[i] + x[i];
}
}
int main(int argc, char** argv) {
const int N = 1 << 20;
const float val_x = 1.0f;
const float val_y = 2.0f;
// host memory initialization
std::unique_ptr<float[]> x{new float[N]};
std::unique_ptr<float[]> y{new float[N]};
std::fill(x.get(), x.get() + N, val_x);
std::fill(y.get(), y.get() + N, val_y);
// device memory initialization
float *d_x, *d_y;
cudaMalloc((void**)&d_x, N * sizeof(float));
cudaMalloc((void**)&d_y, N * sizeof(float));
// copy to device
cudaMemcpy(d_x, x.get(), N * sizeof(float), cudaMemcpyHostToDevice);
cudaMemcpy(d_y, y.get(), N * sizeof(float), cudaMemcpyHostToDevice);
// invoke the kernell
const int blockSize = 256;
const int numBlocks = (N + blockSize - 1) / blockSize;
add<<<numBlocks, blockSize>>>(d_x, d_y, N);
// copy out data from device
cudaMemcpy(x.get(), d_x, N * sizeof(float), cudaMemcpyDeviceToHost);
// check output validity
const float mean = val_x + val_y;
float max_error = std::fabs(*std::max_element(x.get(), x.get() + 100,
[](const float &a, const float &b) { return std::fabs(a - mean) < std::fabs(b - mean); }) - mean);
std::cout << "Max error: " << max_error << std::endl;
// free device memory
cudaFree(d_x);
cudaFree(d_y);
return 0;
}
It can be compiled by nvcc:
nvcc -o simple_add -std=c++11 simple_add.cu
or by clang:
clang-tidy simple_add.cu -- clang++ -o simple_add simple_add.cu -std=c++11 -L/usr/local/cuda/lib64 -lcudart_static -ldl -lrt -pthread
but clang-tidy issued by
clang-tidy simple_add.cu -- nvcc -o simple_add -std=c++11
simple_add.cu
or
clang-tidy simple_add.cu -- clang++ -o simple_add -std=c++11 -L/usr/local/cuda/lib64 -lcudart_static -ldl -lrt -pthread simple_add.cu
reports “Error while processing simple_add.cu.”
As can be seen from the above, clang can compile CUDA but clang-tidy doesn’t handle it. Is there any chance to support CUDA in clang-tidy in near future?
Versions of used software:
Ubuntu 16.04
clang 6.0.1 with libstdc++ + cuda 8.0
gcc 5.4.0 with cuda 9.2
Jakub