[NVPTX] CUDA inline PTX asm definitions scoping "{" "}" is broken


Looks like “{” and “}” are lost when trying to use the combination of Clang and NVPTX, which may result into clash of definitions of the function-scope and asm-scope. Here is an example:

cat test.cu
attribute((device)) attribute((nv_linkonce_odr)) inline int __any(int a) {
int result;
asm volatile ("{ \n\t"
“.reg .pred \t%%p1; \n\t”
“.reg .pred \t%%p2; \n\t”
“setp.ne.u32 \t%%p1, %1, 0; \n\t”
“vote.any.pred \t%%p2, %%p1; \n\t”
“selp.s32 \t%0, 1, 0, %%p2; \n\t”
“}” : “=r”(result) : “r”(a));
return result;

clang -cc1 -emit-llvm -fcuda-is-device -triple ptx64-unknown-unknown test.cu -o test.ll
cat test.ll
; ModuleID = ‘test.cu
target datalayout = “e-p:64:64-i64:64:64-f64:64:64-n1:8:16:32:64”
target triple = “ptx64-unknown-unknown”

define ptx_device i32 @_Z5__anyi(i32 %a) nounwind inlinehint {
%a.addr = alloca i32, align 4
%result = alloca i32, align 4
store i32 %a, i32* %a.addr, align 4
%0 = load i32* %a.addr, align 4
%1 = call i32 asm sideeffect “$( \0A\09.reg .pred \09%p1; \0A\09.reg .pred \09%p2; \0A\09setp.ne.u32 \09%p1, $1, 0; \0A\09vote.any.pred \09%p2, %p1; \0A\09selp.s32 \09$0, 1, 0, %p2; \0A\09$)”, “=r,r”(i32 %0) nounwind, !srcloc !0
store i32 %1, i32* %result, align 4
%2 = load i32* %result, align 4
ret i32 %2

!0 = metadata !{i32 127, i32 132, i32 166, i32 200, i32 242, i32 285, i32 327}

llc -march=nvptx64 test.ll -o test.ptx
cat test.ptx
// Generated by LLVM NVPTX Back-End

.version 3.0
.target sm_10, texmode_independent
.address_size 64

// .globl _Z5__anyi
.visible .global .align 4 .b8 __local_depot0[8];

.func (.reg .b32 func_retval0) _Z5__anyi(
.reg .b32 _Z5__anyi_param_0
) // @_Z5__anyi
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<396>;
.reg .s16 %rc<396>;
.reg .s16 %rs<396>;
.reg .s32 %r<396>;
.reg .s64 %rl<396>;
.reg .f32 %f<396>;
.reg .f64 %fl<396>;

// BB#0: // %entry
mov.u64 %SP, __local_depot0;
mov.b32 %r0, _Z5__anyi_param_0;
st.global.u32 [%SP+0], %r0;
// inline asm

.reg .pred %p1;
.reg .pred %p2;
setp.ne.u32 %p1, %r0, 0;
vote.any.pred %p2, %p1;
selp.s32 %r0, 1, 0, %p2;

// inline asm
st.global.u32 [%SP+4], %r0;
mov.b32 func_retval0, %r0;

ptxas test.ptx -o test.cubin
ptxas test.ptx, line 33; error : Duplicate definition of variable ‘%p1’
ptxas test.ptx, line 34; error : Duplicate definition of variable ‘%p2’
ptxas test.ptx, line 36; error : Instruction ‘vote’ requires .target sm_12 or higher
ptxas fatal : Ptx assembly aborted due to errors

  • D.

You might be better served by filing this as a bug (http://llvm.org/bugs/). Please include a test case and the steps to reproduce (i.e., what you’ve provided below).


Yes, sure, good idea, because might be also Clang-related.


2012/7/11 Chad Rosier <mcrosier@apple.com>

Let me propose a fix:

— a/llvm/tools/clang/lib/Basic/Targets.cpp (revision 157736)
+++ b/llvm/tools/clang/lib/Basic/Targets.cpp (working copy)
@@ -966,6 +966,10 @@
AddrSpaceMap = &NVPTXAddrSpaceMap;
// Define available target features
// These must be defined in sorted order!