[PATCH 1/2] amdgcn,waitcnt: Add datalayout info

This file is only compiled for GCN which all share the same layout

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

We don't have memory fences for r600 so just call group barrier directly

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

ping. these are minor cleanups

Jan

We don’t have memory fences for r600 so just call group barrier directly

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

amdgpu/lib/SOURCES | 1 -
amdgpu/lib/synchronization/barrier.cl | 10 ----------
r600/lib/synchronization/barrier_impl.ll | 31 ++++±-------------------------
3 files changed, 5 insertions(+), 37 deletions(-)
delete mode 100644 amdgpu/lib/synchronization/barrier.cl

diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES
index 9379ad7…f817538 100644
— a/amdgpu/lib/SOURCES
+++ b/amdgpu/lib/SOURCES
@@ -1,7 +1,6 @@
atomic/atomic.cl
math/nextafter.cl
math/sqrt.cl
-synchronization/barrier.cl
image/get_image_width.cl
image/get_image_height.cl
image/get_image_depth.cl
diff --git a/amdgpu/lib/synchronization/barrier.cl b/amdgpu/lib/synchronization/barrier.cl
deleted file mode 100644
index 6f2900b…0000000
— a/amdgpu/lib/synchronization/barrier.cl
+++ /dev/null
@@ -1,10 +0,0 @@

-#include <clc/clc.h>

-_CLC_DEF int __clc_clk_local_mem_fence() {

  • return CLK_LOCAL_MEM_FENCE;
    -}

-_CLC_DEF int __clc_clk_global_mem_fence() {

  • return CLK_GLOBAL_MEM_FENCE;
    -}
    diff --git a/r600/lib/synchronization/barrier_impl.ll b/r600/lib/synchronization/barrier_impl.ll
    index 9b8fefb…777001a 100644
    — a/r600/lib/synchronization/barrier_impl.ll
    +++ b/r600/lib/synchronization/barrier_impl.ll
    @@ -1,32 +1,11 @@
    -declare i32 @__clc_clk_local_mem_fence() #1
    -declare i32 @__clc_clk_global_mem_fence() #1
    declare void @llvm.r600.group.barrier() #0

-define void @barrier(i32 %flags) #2 {
-barrier_local_test:

  • %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
  • %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
  • %1 = icmp ne i32 %0, 0
  • br i1 %1, label %barrier_local, label %barrier_global_test

-barrier_local:

  • call void @llvm.r600.group.barrier()
  • br label %barrier_global_test

-barrier_global_test:

  • %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
  • %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
  • %3 = icmp ne i32 %2, 0
  • br i1 %3, label %barrier_global, label %done

-barrier_global:

  • call void @llvm.r600.group.barrier()
  • br label %done

-done:
+define void @barrier(i32 %flags) #1 {
+entry:

  • ; We should call mem_fence here, but that is not implemented for r600 yet
  • tail call void @llvm.r600.group.barrier()
    ret void
    }

attributes #0 = { nounwind convergent }
-attributes #1 = { nounwind alwaysinline }
-attributes #2 = { nounwind convergent alwaysinline }
+attributes #1 = { nounwind convergent alwaysinline }

ping. these are minor cleanups

Sorry, this looks fine to me, and I think it fixes a bug where no barrier would get triggered if flags was 0.

–Aaroon

> > We don't have memory fences for r600 so just call group barrier directly
> >
> > Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
> > ---
> > amdgpu/lib/SOURCES | 1 -
> > amdgpu/lib/synchronization/barrier.cl | 10 ----------
> > r600/lib/synchronization/barrier_impl.ll | 31
>
> +++++--------------------------
> > 3 files changed, 5 insertions(+), 37 deletions(-)
> > delete mode 100644 amdgpu/lib/synchronization/barrier.cl
> >
> > diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES
> > index 9379ad7..f817538 100644
> > --- a/amdgpu/lib/SOURCES
> > +++ b/amdgpu/lib/SOURCES
> > @@ -1,7 +1,6 @@
> > atomic/atomic.cl
> > math/nextafter.cl
> > math/sqrt.cl
> > -synchronization/barrier.cl
> > image/get_image_width.cl
> > image/get_image_height.cl
> > image/get_image_depth.cl
> > diff --git a/amdgpu/lib/synchronization/barrier.cl
>
> b/amdgpu/lib/synchronization/barrier.cl
> > deleted file mode 100644
> > index 6f2900b..0000000
> > --- a/amdgpu/lib/synchronization/barrier.cl
> > +++ /dev/null
> > @@ -1,10 +0,0 @@
> > -
> > -#include <clc/clc.h>
> > -
> > -_CLC_DEF int __clc_clk_local_mem_fence() {
> > - return CLK_LOCAL_MEM_FENCE;
> > -}
> > -
> > -_CLC_DEF int __clc_clk_global_mem_fence() {
> > - return CLK_GLOBAL_MEM_FENCE;
> > -}
> > diff --git a/r600/lib/synchronization/barrier_impl.ll
>
> b/r600/lib/synchronization/barrier_impl.ll
> > index 9b8fefb..777001a 100644
> > --- a/r600/lib/synchronization/barrier_impl.ll
> > +++ b/r600/lib/synchronization/barrier_impl.ll
> > @@ -1,32 +1,11 @@
> > -declare i32 @__clc_clk_local_mem_fence() #1
> > -declare i32 @__clc_clk_global_mem_fence() #1
> > declare void @llvm.r600.group.barrier() #0
> >
> > -define void @barrier(i32 %flags) #2 {
> > -barrier_local_test:
> > - %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
> > - %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
> > - %1 = icmp ne i32 %0, 0
> > - br i1 %1, label %barrier_local, label %barrier_global_test
> > -
> > -barrier_local:
> > - call void @llvm.r600.group.barrier()
> > - br label %barrier_global_test
> > -
> > -barrier_global_test:
> > - %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
> > - %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
> > - %3 = icmp ne i32 %2, 0
> > - br i1 %3, label %barrier_global, label %done
> > -
> > -barrier_global:
> > - call void @llvm.r600.group.barrier()
> > - br label %done
> > -
> > -done:
> > +define void @barrier(i32 %flags) #1 {
> > +entry:
> > + ; We should call mem_fence here, but that is not implemented for r600
>
> yet
> > + tail call void @llvm.r600.group.barrier()
> > ret void
> > }
> >
> > attributes #0 = { nounwind convergent }
> > -attributes #1 = { nounwind alwaysinline }
> > -attributes #2 = { nounwind convergent alwaysinline }
> > +attributes #1 = { nounwind convergent alwaysinline }
>
> ping. these are minor cleanups
>

Sorry, this looks fine to me, and I think it fixes a bug where no barrier
would get triggered if flags was 0.

thanks. I've mentioned it in the commit message.
Can I consider it RB? does it cover 1/2 as well?

regards,
Jan

We don’t have memory fences for r600 so just call group barrier directly

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

amdgpu/lib/SOURCES | 1 -
amdgpu/lib/synchronization/barrier.cl | 10 ----------
r600/lib/synchronization/barrier_impl.ll | 31

++++±-------------------------

3 files changed, 5 insertions(+), 37 deletions(-)
delete mode 100644 amdgpu/lib/synchronization/barrier.cl

diff --git a/amdgpu/lib/SOURCES b/amdgpu/lib/SOURCES
index 9379ad7…f817538 100644
— a/amdgpu/lib/SOURCES
+++ b/amdgpu/lib/SOURCES
@@ -1,7 +1,6 @@
atomic/atomic.cl
math/nextafter.cl
math/sqrt.cl
-synchronization/barrier.cl
image/get_image_width.cl
image/get_image_height.cl
image/get_image_depth.cl
diff --git a/amdgpu/lib/synchronization/barrier.cl

b/amdgpu/lib/synchronization/barrier.cl

deleted file mode 100644
index 6f2900b…0000000
— a/amdgpu/lib/synchronization/barrier.cl
+++ /dev/null
@@ -1,10 +0,0 @@

-#include <clc/clc.h>

-_CLC_DEF int __clc_clk_local_mem_fence() {

  • return CLK_LOCAL_MEM_FENCE;
    -}

-_CLC_DEF int __clc_clk_global_mem_fence() {

  • return CLK_GLOBAL_MEM_FENCE;
    -}
    diff --git a/r600/lib/synchronization/barrier_impl.ll

b/r600/lib/synchronization/barrier_impl.ll

index 9b8fefb…777001a 100644
— a/r600/lib/synchronization/barrier_impl.ll
+++ b/r600/lib/synchronization/barrier_impl.ll
@@ -1,32 +1,11 @@
-declare i32 @__clc_clk_local_mem_fence() #1
-declare i32 @__clc_clk_global_mem_fence() #1
declare void @llvm.r600.group.barrier() #0

-define void @barrier(i32 %flags) #2 {
-barrier_local_test:

  • %CLK_LOCAL_MEM_FENCE = call i32 @__clc_clk_local_mem_fence()
  • %0 = and i32 %flags, %CLK_LOCAL_MEM_FENCE
  • %1 = icmp ne i32 %0, 0
  • br i1 %1, label %barrier_local, label %barrier_global_test

-barrier_local:

  • call void @llvm.r600.group.barrier()
  • br label %barrier_global_test

-barrier_global_test:

  • %CLK_GLOBAL_MEM_FENCE = call i32 @__clc_clk_global_mem_fence()
  • %2 = and i32 %flags, %CLK_GLOBAL_MEM_FENCE
  • %3 = icmp ne i32 %2, 0
  • br i1 %3, label %barrier_global, label %done

-barrier_global:

  • call void @llvm.r600.group.barrier()
  • br label %done

-done:
+define void @barrier(i32 %flags) #1 {
+entry:

  • ; We should call mem_fence here, but that is not implemented for r600

yet

  • tail call void @llvm.r600.group.barrier()
    ret void
    }

attributes #0 = { nounwind convergent }
-attributes #1 = { nounwind alwaysinline }
-attributes #2 = { nounwind convergent alwaysinline }
+attributes #1 = { nounwind convergent alwaysinline }

ping. these are minor cleanups

Sorry, this looks fine to me, and I think it fixes a bug where no barrier
would get triggered if flags was 0.

thanks. I’ve mentioned it in the commit message.
Can I consider it RB? does it cover 1/2 as well?

Yeah, 1 looks ok to me as well. Reviewed-by for both.

–Aaron