[PATCH 1/1] ptx: Fix builtin names after clang r274770

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

ping, this is needed to fix nvptx build.

> Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
> ---
> I'm pretty sure barrier implementation is busted, as it's noop
> without CLK_LOCAL_MEM_FENCE.
>
> ptx-nvidiacl/lib/synchronization/barrier.cl | 2 +-
> ptx-nvidiacl/lib/workitem/get_group_id.cl | 6 +++---
> ptx-nvidiacl/lib/workitem/get_local_id.cl | 6 +++---
> ptx-nvidiacl/lib/workitem/get_local_size.cl | 6 +++---
> ptx-nvidiacl/lib/workitem/get_num_groups.cl | 6 +++---
> 5 files changed, 13 insertions(+), 13 deletions(-)
>
> diff --git a/ptx-nvidiacl/lib/synchronization/barrier.cl b/ptx-
> nvidiacl/lib/synchronization/barrier.cl
> index fb36c26..88e1493 100644
> --- a/ptx-nvidiacl/lib/synchronization/barrier.cl
> +++ b/ptx-nvidiacl/lib/synchronization/barrier.cl
> @@ -2,7 +2,7 @@
>
> _CLC_DEF void barrier(cl_mem_fence_flags flags) {
> if (flags & CLK_LOCAL_MEM_FENCE) {
> - __builtin_ptx_bar_sync(0);
> + __syncthreads();
> }
> }
>
> diff --git a/ptx-nvidiacl/lib/workitem/get_group_id.cl b/ptx-
> nvidiacl/lib/workitem/get_group_id.cl
> index 2b35b4e..dbc4784 100644
> --- a/ptx-nvidiacl/lib/workitem/get_group_id.cl
> +++ b/ptx-nvidiacl/lib/workitem/get_group_id.cl
> @@ -2,9 +2,9 @@
>
> _CLC_DEF size_t get_group_id(uint dim) {
> switch (dim) {
> - case 0: return __builtin_ptx_read_ctaid_x();
> - case 1: return __builtin_ptx_read_ctaid_y();
> - case 2: return __builtin_ptx_read_ctaid_z();
> + case 0: return __nvvm_read_ptx_sreg_ctaid_x();
> + case 1: return __nvvm_read_ptx_sreg_ctaid_y();
> + case 2: return __nvvm_read_ptx_sreg_ctaid_z();
> default: return 0;
> }
> }
> diff --git a/ptx-nvidiacl/lib/workitem/get_local_id.cl b/ptx-
> nvidiacl/lib/workitem/get_local_id.cl
> index f0cfdc0..f31581a 100644
> --- a/ptx-nvidiacl/lib/workitem/get_local_id.cl
> +++ b/ptx-nvidiacl/lib/workitem/get_local_id.cl
> @@ -2,9 +2,9 @@
>
> _CLC_DEF size_t get_local_id(uint dim) {
> switch (dim) {
> - case 0: return __builtin_ptx_read_tid_x();
> - case 1: return __builtin_ptx_read_tid_y();
> - case 2: return __builtin_ptx_read_tid_z();
> + case 0: return __nvvm_read_ptx_sreg_tid_x();
> + case 1: return __nvvm_read_ptx_sreg_tid_y();
> + case 2: return __nvvm_read_ptx_sreg_tid_z();
> default: return 0;
> }
> }
> diff --git a/ptx-nvidiacl/lib/workitem/get_local_size.cl b/ptx-
> nvidiacl/lib/workitem/get_local_size.cl
> index c3f5425..d00b0d6 100644
> --- a/ptx-nvidiacl/lib/workitem/get_local_size.cl
> +++ b/ptx-nvidiacl/lib/workitem/get_local_size.cl
> @@ -2,9 +2,9 @@
>
> _CLC_DEF size_t get_local_size(uint dim) {
> switch (dim) {
> - case 0: return __builtin_ptx_read_ntid_x();
> - case 1: return __builtin_ptx_read_ntid_y();
> - case 2: return __builtin_ptx_read_ntid_z();
> + case 0: return __nvvm_read_ptx_sreg_ntid_x();
> + case 1: return __nvvm_read_ptx_sreg_ntid_y();
> + case 2: return __nvvm_read_ptx_sreg_ntid_z();
> default: return 0;
> }
> }
> diff --git a/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/ptx-
> nvidiacl/lib/workitem/get_num_groups.cl
> index 90bdc2e..d7abf3f 100644
> --- a/ptx-nvidiacl/lib/workitem/get_num_groups.cl
> +++ b/ptx-nvidiacl/lib/workitem/get_num_groups.cl
> @@ -2,9 +2,9 @@
>
> _CLC_DEF size_t get_num_groups(uint dim) {
> switch (dim) {
> - case 0: return __builtin_ptx_read_nctaid_x();
> - case 1: return __builtin_ptx_read_nctaid_y();
> - case 2: return __builtin_ptx_read_nctaid_z();
> + case 0: return __nvvm_read_ptx_sreg_nctaid_x();
> + case 1: return __nvvm_read_ptx_sreg_nctaid_y();
> + case 2: return __nvvm_read_ptx_sreg_nctaid_z();
> default: return 0;
> }
> }

ping, this is needed to fix nvptx build.

I don't know PTX well enough to provide a proper review, but it looks sane
enough to me, and I can confirm that it fixes the build for me (and that it
was broken before).

How about an Acked-By: Aaron Watry <awatry@gmail.com>

--Aaron

> > Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
> > ---
> > I'm pretty sure barrier implementation is busted, as it's noop
> > without CLK_LOCAL_MEM_FENCE.
> >
> > ptx-nvidiacl/lib/synchronization/barrier.cl | 2 +-
> > ptx-nvidiacl/lib/workitem/get_group_id.cl | 6 +++---
> > ptx-nvidiacl/lib/workitem/get_local_id.cl | 6 +++---
> > ptx-nvidiacl/lib/workitem/get_local_size.cl | 6 +++---
> > ptx-nvidiacl/lib/workitem/get_num_groups.cl | 6 +++---
> > 5 files changed, 13 insertions(+), 13 deletions(-)
> >
> > diff --git a/ptx-nvidiacl/lib/synchronization/barrier.cl b/ptx-
> > nvidiacl/lib/synchronization/barrier.cl
> > index fb36c26..88e1493 100644
> > --- a/ptx-nvidiacl/lib/synchronization/barrier.cl
> > +++ b/ptx-nvidiacl/lib/synchronization/barrier.cl
> > @@ -2,7 +2,7 @@
> >
> > _CLC_DEF void barrier(cl_mem_fence_flags flags) {
> > if (flags & CLK_LOCAL_MEM_FENCE) {
> > - __builtin_ptx_bar_sync(0);
> > + __syncthreads();
> > }
> > }
> >
> > diff --git a/ptx-nvidiacl/lib/workitem/get_group_id.cl b/ptx-
> > nvidiacl/lib/workitem/get_group_id.cl
> > index 2b35b4e..dbc4784 100644
> > --- a/ptx-nvidiacl/lib/workitem/get_group_id.cl
> > +++ b/ptx-nvidiacl/lib/workitem/get_group_id.cl
> > @@ -2,9 +2,9 @@
> >
> > _CLC_DEF size_t get_group_id(uint dim) {
> > switch (dim) {
> > - case 0: return __builtin_ptx_read_ctaid_x();
> > - case 1: return __builtin_ptx_read_ctaid_y();
> > - case 2: return __builtin_ptx_read_ctaid_z();
> > + case 0: return __nvvm_read_ptx_sreg_ctaid_x();
> > + case 1: return __nvvm_read_ptx_sreg_ctaid_y();
> > + case 2: return __nvvm_read_ptx_sreg_ctaid_z();
> > default: return 0;
> > }
> > }
> > diff --git a/ptx-nvidiacl/lib/workitem/get_local_id.cl b/ptx-
> > nvidiacl/lib/workitem/get_local_id.cl
> > index f0cfdc0..f31581a 100644
> > --- a/ptx-nvidiacl/lib/workitem/get_local_id.cl
> > +++ b/ptx-nvidiacl/lib/workitem/get_local_id.cl
> > @@ -2,9 +2,9 @@
> >
> > _CLC_DEF size_t get_local_id(uint dim) {
> > switch (dim) {
> > - case 0: return __builtin_ptx_read_tid_x();
> > - case 1: return __builtin_ptx_read_tid_y();
> > - case 2: return __builtin_ptx_read_tid_z();
> > + case 0: return __nvvm_read_ptx_sreg_tid_x();
> > + case 1: return __nvvm_read_ptx_sreg_tid_y();
> > + case 2: return __nvvm_read_ptx_sreg_tid_z();
> > default: return 0;
> > }
> > }
> > diff --git a/ptx-nvidiacl/lib/workitem/get_local_size.cl b/ptx-
> > nvidiacl/lib/workitem/get_local_size.cl
> > index c3f5425..d00b0d6 100644
> > --- a/ptx-nvidiacl/lib/workitem/get_local_size.cl
> > +++ b/ptx-nvidiacl/lib/workitem/get_local_size.cl
> > @@ -2,9 +2,9 @@
> >
> > _CLC_DEF size_t get_local_size(uint dim) {
> > switch (dim) {
> > - case 0: return __builtin_ptx_read_ntid_x();
> > - case 1: return __builtin_ptx_read_ntid_y();
> > - case 2: return __builtin_ptx_read_ntid_z();
> > + case 0: return __nvvm_read_ptx_sreg_ntid_x();
> > + case 1: return __nvvm_read_ptx_sreg_ntid_y();
> > + case 2: return __nvvm_read_ptx_sreg_ntid_z();
> > default: return 0;
> > }
> > }
> > diff --git a/ptx-nvidiacl/lib/workitem/get_num_groups.cl b/ptx-
> > nvidiacl/lib/workitem/get_num_groups.cl
> > index 90bdc2e..d7abf3f 100644
> > --- a/ptx-nvidiacl/lib/workitem/get_num_groups.cl
> > +++ b/ptx-nvidiacl/lib/workitem/get_num_groups.cl
> > @@ -2,9 +2,9 @@
> >
> > _CLC_DEF size_t get_num_groups(uint dim) {
> > switch (dim) {
> > - case 0: return __builtin_ptx_read_nctaid_x();
> > - case 1: return __builtin_ptx_read_nctaid_y();
> > - case 2: return __builtin_ptx_read_nctaid_z();
> > + case 0: return __nvvm_read_ptx_sreg_nctaid_x();
> > + case 1: return __nvvm_read_ptx_sreg_nctaid_y();
> > + case 2: return __nvvm_read_ptx_sreg_nctaid_z();
> > default: return 0;
> > }
> > }
>
> ping, this is needed to fix nvptx build.
>

I don't know PTX well enough to provide a proper review, but it looks sane
enough to me, and I can confirm that it fixes the build for me (and that it
was broken before).

How about an Acked-By: Aaron Watry <awatry@gmail.com>

I'm fine with this too. Please merge.

-Tom