I want some private temporary memory on GPU to store some results.
Actually I write a piece of CUDA C++ code to dynamically allocate memory inside kernel using the new
keyword, and it works fine: (cuda 11.8, NVIDIA GeForce RTX 2060 SUPER
)
#include <stdio.h>
__global__ void myKernel(int n) {
int* test = new int[n];
test[threadIdx.x] = 1024;
printf("test[0]=%d from block %d, thread %d\n", test[0], blockIdx.x, threadIdx.x);
printf("test[1]=%d from block %d, thread %d\n", test[1], blockIdx.x, threadIdx.x);
delete[] test;
}
int main() {
myKernel<<<1, 2>>>(100);
cudaDeviceSynchronize();
return 0;
}
and it outputs:
test[0]=1024 from block 0, thread 0
test[0]=0 from block 0, thread 1
test[1]=0 from block 0, thread 0
test[1]=1024 from block 0, thread 1
which means it does dynamically allocate private memory on GPU.
I tried memref.alloca
op inside kernel, but it got crashed:
LLVM ERROR: Cannot select: t8: i64,ch = dynamic_stackalloc t0, Constant:i64<1280>, Constant:i64<0>
t4: i64 = Constant<1280>
t1: i64 = Constant<0>
In function: main_kernel
PLEASE submit a bug report to https://github.com/llvm/llvm-project/issues/ and include the crash backtrace.
Stack dump:
0. Program arguments: wafer-opt -pass-pipeline=builtin.module(gpu.module(strip-debuginfo,convert-gpu-to-nvvm,reconcile-unrealized-casts,gpu-to-cubin))
1. Running pass 'Function Pass Manager' on module 'LLVMDialectModule'.
2. Running pass 'NVPTX DAG->DAG Pattern Instruction Selection' on function '@main_kernel'
#0 0x000056187c5e6ab6 llvm::sys::PrintStackTrace(llvm::raw_ostream&, int) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x2bcbab6)
#1 0x000056187c5e4404 SignalHandler(int) Signals.cpp:0:0
#2 0x00007f6f52242520 (/lib/x86_64-linux-gnu/libc.so.6+0x42520)
#3 0x00007f6f52296a7c __pthread_kill_implementation ./nptl/pthread_kill.c:44:76
#4 0x00007f6f52296a7c __pthread_kill_internal ./nptl/pthread_kill.c:78:10
#5 0x00007f6f52296a7c pthread_kill ./nptl/pthread_kill.c:89:10
#6 0x00007f6f52242476 gsignal ./signal/../sysdeps/posix/raise.c:27:6
#7 0x00007f6f522287f3 abort ./stdlib/abort.c:81:7
#8 0x0000561879bf0d2e llvm::json::OStream::value(llvm::json::Value const&) (.cold) JSON.cpp:0:0
#9 0x000056187b420b1d llvm::SelectionDAGISel::CannotYetSelect(llvm::SDNode*) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1a05b1d)
#10 0x000056187b4230ea llvm::SelectionDAGISel::SelectCodeCommon(llvm::SDNode*, unsigned char const*, unsigned int) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1a080ea)
#11 0x000056187b41dd2a llvm::SelectionDAGISel::DoInstructionSelection() (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1a02d2a)
#12 0x000056187b42b609 llvm::SelectionDAGISel::CodeGenAndEmitDAG() (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1a10609)
#13 0x000056187b42f414 llvm::SelectionDAGISel::SelectAllBasicBlocks(llvm::Function const&) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1a14414)
#14 0x000056187b4312c2 llvm::SelectionDAGISel::runOnMachineFunction(llvm::MachineFunction&) (.part.0) SelectionDAGISel.cpp:0:0
#15 0x000056187b6488c5 llvm::MachineFunctionPass::runOnFunction(llvm::Function&) (.part.0) MachineFunctionPass.cpp:0:0
#16 0x000056187c1f4263 llvm::FPPassManager::runOnFunction(llvm::Function&) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x27d9263)
#17 0x000056187c1f4499 llvm::FPPassManager::runOnModule(llvm::Module&) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x27d9499)
#18 0x000056187c1f4a05 llvm::legacy::PassManagerImpl::run(llvm::Module&) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x27d9a05)
#19 0x0000561879fd666b mlir::gpu::SerializeToBlobPass::translateToISA[abi:cxx11](llvm::Module&, llvm::TargetMachine&) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x5bb66b)
#20 0x0000561879fd68cc mlir::gpu::SerializeToBlobPass::runOnOperation() (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x5bb8cc)
#21 0x000056187c345881 mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x292a881)
#22 0x000056187c345f59 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x292af59)
#23 0x000056187c344681 mlir::detail::OpToOpPassAdaptor::runOnOperationAsyncImpl(bool) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x2929681)
#24 0x000056187c34572e mlir::detail::OpToOpPassAdaptor::run(mlir::Pass*, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x292a72e)
#25 0x000056187c345f59 mlir::detail::OpToOpPassAdaptor::runPipeline(mlir::OpPassManager&, mlir::Operation*, mlir::AnalysisManager, bool, unsigned int, mlir::PassInstrumentor*, mlir::PassInstrumentation::PipelineParentInfo const*) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x292af59)
#26 0x000056187c346bb9 mlir::PassManager::run(mlir::Operation*) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x292bbb9)
#27 0x000056187b091efb performActions(llvm::raw_ostream&, bool, bool, std::shared_ptr<llvm::SourceMgr> const&, mlir::MLIRContext*, llvm::function_ref<mlir::LogicalResult (mlir::PassManager&)>, bool, bool) (.constprop.0) MlirOptMain.cpp:0:0
#28 0x000056187b09250f processBuffer(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, bool, bool, bool, bool, bool, bool, llvm::function_ref<mlir::LogicalResult (mlir::PassManager&)>, mlir::DialectRegistry&, llvm::ThreadPool*) MlirOptMain.cpp:0:0
#29 0x000056187b0927ab mlir::LogicalResult llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>::callback_fn<mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<mlir::LogicalResult (mlir::PassManager&)>, mlir::DialectRegistry&, bool, bool, bool, bool, bool, bool, bool)::'lambda'(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>(long, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&) MlirOptMain.cpp:0:0
#30 0x000056187c510e8e mlir::splitAndProcessBuffer(std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::function_ref<mlir::LogicalResult (std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, llvm::raw_ostream&)>, llvm::raw_ostream&, bool, bool) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x2af5e8e)
#31 0x000056187b090ba0 mlir::MlirOptMain(llvm::raw_ostream&, std::unique_ptr<llvm::MemoryBuffer, std::default_delete<llvm::MemoryBuffer>>, mlir::PassPipelineCLParser const&, mlir::DialectRegistry&, bool, bool, bool, bool, bool, bool, bool, bool) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1675ba0)
#32 0x000056187b092c0d mlir::MlirOptMain(int, char**, llvm::StringRef, mlir::DialectRegistry&, bool) (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1677c0d)
#33 0x0000561879bf0f2c main (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x1d5f2c)
#34 0x00007f6f52229d90 __libc_start_call_main ./csu/../sysdeps/nptl/libc_start_call_main.h:58:16
#35 0x00007f6f52229e40 call_init ./csu/../csu/libc-start.c:128:20
#36 0x00007f6f52229e40 __libc_start_main ./csu/../csu/libc-start.c:379:5
#37 0x0000561879c44485 _start (/wafer/tzy/wafer-compiler/build/bin/wafer-opt+0x229485)
Error: entry point not found