[PATCH 1/1] Implement generic mad_sat

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed overflow is undefined, so you can't test for it this way. You need to check if it would overflow before performing the overflowing computation

Whoops, nevermind. I thought m was also int

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
---
generic/include/clc/clc.h | 1 +
generic/include/clc/integer/mad_sat.h | 3 ++
generic/include/clc/integer/mad_sat.inc | 1 +
generic/lib/SOURCES | 1 +
generic/lib/clcmacro.h | 22 ++++++++++
generic/lib/integer/mad_sat.cl | 71 +++++++++++++++++++++++++++++++++
6 files changed, 99 insertions(+)
create mode 100644 generic/include/clc/integer/mad_sat.h
create mode 100644 generic/include/clc/integer/mad_sat.inc
create mode 100644 generic/lib/integer/mad_sat.cl

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index 9815c56..aca9b53 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -82,6 +82,7 @@
#include <clc/integer/hadd.h>
#include <clc/integer/mad24.h>
#include <clc/integer/mad_hi.h>
+#include <clc/integer/mad_sat.h>
#include <clc/integer/mul24.h>
#include <clc/integer/mul_hi.h>
#include <clc/integer/rhadd.h>
diff --git a/generic/include/clc/integer/mad_sat.h b/generic/include/clc/integer/mad_sat.h
new file mode 100644
index 0000000..3e92372
--- /dev/null
+++ b/generic/include/clc/integer/mad_sat.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/integer/mad_sat.inc>
+#include <clc/integer/gentype.inc>
+#undef __CLC_BODY
diff --git a/generic/include/clc/integer/mad_sat.inc b/generic/include/clc/integer/mad_sat.inc
new file mode 100644
index 0000000..5da2bdf
--- /dev/null
+++ b/generic/include/clc/integer/mad_sat.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index bfdec7b..7d3fa6b 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -19,6 +19,7 @@ integer/clz_if.ll
integer/clz_impl.ll
integer/hadd.cl
integer/mad24.cl
+integer/mad_sat.cl
integer/mul24.cl
integer/mul_hi.cl
integer/rhadd.cl
diff --git a/generic/lib/clcmacro.h b/generic/lib/clcmacro.h
index 730073a..ef102ea 100644
--- a/generic/lib/clcmacro.h
+++ b/generic/lib/clcmacro.h
@@ -41,6 +41,28 @@
     return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
   }

+#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
+ DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \
+ return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
+ } \
+\
+ DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \
+ return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
+ FUNCTION(x.z, y.z, z.z)); \
+ } \
+\
+ DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \
+ return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \
+ return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
+ } \
+\
+ DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \
+ return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
+ }
+
#define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
_CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
   return BUILTIN(x, y); \
diff --git a/generic/lib/integer/mad_sat.cl b/generic/lib/integer/mad_sat.cl
new file mode 100644
index 0000000..d04bf78
--- /dev/null
+++ b/generic/lib/integer/mad_sat.cl
@@ -0,0 +1,71 @@
+#include <clc/clc.h>
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
+ return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);

Trailing whitespace on this and the next 3 clamp() lines.

+}
+
+_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) {
+ return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) {
+ return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) {
+ return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX);
+}
+
+_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) {
+ int mhi = mul_hi(x, y);
+ uint mlo = x * y;
+ long m = upsample(mhi, mlo);
+ m += z;
+ if (m > INT_MAX)
+ return INT_MAX;
+ if (m < INT_MIN)
+ return INT_MIN;
+ return m;
+}
+
+_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) {
+ if (mul_hi(x, y) != 0)
+ return UINT_MAX;
+ return add_sat(x * y, z);
+}
+
+_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) {
+ long hi = mul_hi(x, y);
+ ulong ulo = x * y;
+ long slo = x * y;
+ /* Big overflow of more than 2 bits, add can't fix this */
+ if (((x < 0) == (y < 0)) && hi != 0)
+ return LONG_MAX;
+ /* Low overflow in mul and z not neg enough to correct it */
+ if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
+ return LONG_MAX;
+ /* Big overflow of more than 2 bits, add can't fix this */
+ if (((x < 0) != (y < 0)) && hi != -1)
+ return LONG_MIN;
+ /* Low overflow in mul and z not pos enough to correct it */
+ if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))

I get the following warnings during build:
./generic/lib/integer/mad_sat.cl:52:36: warning: overflow in
expression; result is -9223372036854775808 with type 'long'
[-Winteger-overflow]
  if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))
                                   ^
./generic/lib/integer/mad_sat.cl:52:36: warning: overflow in
expression; result is -9223372036854775808 with type 'long'
[-Winteger-overflow]
  if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))
                                   ^
./generic/lib/integer/mad_sat.cl:52:36: warning: overflow in
expression; result is -9223372036854775808 with type 'long'
[-Winteger-overflow]
  if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))

I'm assuming what you want to do is cast LONG_MAX to ulong before the
addition and then change 1U to 1UL

I've managed to test char/uchar/short/ushort/int/uint successfully on
r600 (pitcairn, SI) using the existing piglit tests.

The long test just segfaults and the ulong test dies out with
instruction selection errors. I haven't determined yet if these are
bugs in clover, the R600 back-end, or the code here. I've seen issues
with long calculations/instructions in the past on radeonsi, so it's
possible that the rest of your code is fine, but I haven't had time to
really dig into it yet, and I'm not sure I will in the next few days
due to real life events... but maybe I'll have an over-abundance of
free time for the next week or so.

--Aaron

> Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
> ---
> generic/include/clc/clc.h | 1 +
> generic/include/clc/integer/mad_sat.h | 3 ++
> generic/include/clc/integer/mad_sat.inc | 1 +
> generic/lib/SOURCES | 1 +
> generic/lib/clcmacro.h | 22 ++++++++++
> generic/lib/integer/mad_sat.cl | 71 +++++++++++++++++++++++++++++++++
> 6 files changed, 99 insertions(+)
> create mode 100644 generic/include/clc/integer/mad_sat.h
> create mode 100644 generic/include/clc/integer/mad_sat.inc
> create mode 100644 generic/lib/integer/mad_sat.cl
>
> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
> index 9815c56..aca9b53 100644
> --- a/generic/include/clc/clc.h
> +++ b/generic/include/clc/clc.h
> @@ -82,6 +82,7 @@
> #include <clc/integer/hadd.h>
> #include <clc/integer/mad24.h>
> #include <clc/integer/mad_hi.h>
> +#include <clc/integer/mad_sat.h>
> #include <clc/integer/mul24.h>
> #include <clc/integer/mul_hi.h>
> #include <clc/integer/rhadd.h>
> diff --git a/generic/include/clc/integer/mad_sat.h b/generic/include/clc/integer/mad_sat.h
> new file mode 100644
> index 0000000..3e92372
> --- /dev/null
> +++ b/generic/include/clc/integer/mad_sat.h
> @@ -0,0 +1,3 @@
> +#define __CLC_BODY <clc/integer/mad_sat.inc>
> +#include <clc/integer/gentype.inc>
> +#undef __CLC_BODY
> diff --git a/generic/include/clc/integer/mad_sat.inc b/generic/include/clc/integer/mad_sat.inc
> new file mode 100644
> index 0000000..5da2bdf
> --- /dev/null
> +++ b/generic/include/clc/integer/mad_sat.inc
> @@ -0,0 +1 @@
> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE mad_sat(__CLC_GENTYPE x, __CLC_GENTYPE y, __CLC_GENTYPE z);
> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
> index bfdec7b..7d3fa6b 100644
> --- a/generic/lib/SOURCES
> +++ b/generic/lib/SOURCES
> @@ -19,6 +19,7 @@ integer/clz_if.ll
> integer/clz_impl.ll
> integer/hadd.cl
> integer/mad24.cl
> +integer/mad_sat.cl
> integer/mul24.cl
> integer/mul_hi.cl
> integer/rhadd.cl
> diff --git a/generic/lib/clcmacro.h b/generic/lib/clcmacro.h
> index 730073a..ef102ea 100644
> --- a/generic/lib/clcmacro.h
> +++ b/generic/lib/clcmacro.h
> @@ -41,6 +41,28 @@
> return (RET_TYPE##16)(FUNCTION(x.lo, y.lo), FUNCTION(x.hi, y.hi)); \
> }
>
> +#define _CLC_TERNARY_VECTORIZE(DECLSPEC, RET_TYPE, FUNCTION, ARG1_TYPE, ARG2_TYPE, ARG3_TYPE) \
> + DECLSPEC RET_TYPE##2 FUNCTION(ARG1_TYPE##2 x, ARG2_TYPE##2 y, ARG3_TYPE##2 z) { \
> + return (RET_TYPE##2)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y)); \
> + } \
> +\
> + DECLSPEC RET_TYPE##3 FUNCTION(ARG1_TYPE##3 x, ARG2_TYPE##3 y, ARG3_TYPE##3 z) { \
> + return (RET_TYPE##3)(FUNCTION(x.x, y.x, z.x), FUNCTION(x.y, y.y, z.y), \
> + FUNCTION(x.z, y.z, z.z)); \
> + } \
> +\
> + DECLSPEC RET_TYPE##4 FUNCTION(ARG1_TYPE##4 x, ARG2_TYPE##4 y, ARG3_TYPE##4 z) { \
> + return (RET_TYPE##4)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> + } \
> +\
> + DECLSPEC RET_TYPE##8 FUNCTION(ARG1_TYPE##8 x, ARG2_TYPE##8 y, ARG3_TYPE##8 z) { \
> + return (RET_TYPE##8)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> + } \
> +\
> + DECLSPEC RET_TYPE##16 FUNCTION(ARG1_TYPE##16 x, ARG2_TYPE##16 y, ARG3_TYPE##16 z) { \
> + return (RET_TYPE##16)(FUNCTION(x.lo, y.lo, z.lo), FUNCTION(x.hi, y.hi, z.hi)); \
> + }
> +
> #define _CLC_DEFINE_BINARY_BUILTIN(RET_TYPE, FUNCTION, BUILTIN, ARG1_TYPE, ARG2_TYPE) \
> _CLC_DEF _CLC_OVERLOAD RET_TYPE FUNCTION(ARG1_TYPE x, ARG2_TYPE y) { \
> return BUILTIN(x, y); \
> diff --git a/generic/lib/integer/mad_sat.cl b/generic/lib/integer/mad_sat.cl
> new file mode 100644
> index 0000000..d04bf78
> --- /dev/null
> +++ b/generic/lib/integer/mad_sat.cl
> @@ -0,0 +1,71 @@
> +#include <clc/clc.h>
> +#include "../clcmacro.h"
> +
> +_CLC_OVERLOAD _CLC_DEF char mad_sat(char x, char y, char z) {
> + return clamp((short)mad24((short)x, (short)y, (short)z), (short)CHAR_MIN, (short) CHAR_MAX);

Trailing whitespace on this and the next 3 clamp() lines.

fixed in v2.
Wonder why neither git nor vim screamed about it.

> +}
> +
> +_CLC_OVERLOAD _CLC_DEF uchar mad_sat(uchar x, uchar y, uchar z) {
> + return clamp((ushort)mad24((ushort)x, (ushort)y, (ushort)z), (ushort)0, (ushort) UCHAR_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF short mad_sat(short x, short y, short z) {
> + return clamp((int)mad24((int)x, (int)y, (int)z), (int)SHRT_MIN, (int) SHRT_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF ushort mad_sat(ushort x, ushort y, ushort z) {
> + return clamp((uint)mad24((uint)x, (uint)y, (uint)z), (uint)0, (uint) USHRT_MAX);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF int mad_sat(int x, int y, int z) {
> + int mhi = mul_hi(x, y);
> + uint mlo = x * y;
> + long m = upsample(mhi, mlo);
> + m += z;
> + if (m > INT_MAX)
> + return INT_MAX;
> + if (m < INT_MIN)
> + return INT_MIN;
> + return m;
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF uint mad_sat(uint x, uint y, uint z) {
> + if (mul_hi(x, y) != 0)
> + return UINT_MAX;
> + return add_sat(x * y, z);
> +}
> +
> +_CLC_OVERLOAD _CLC_DEF long mad_sat(long x, long y, long z) {
> + long hi = mul_hi(x, y);
> + ulong ulo = x * y;
> + long slo = x * y;
> + /* Big overflow of more than 2 bits, add can't fix this */
> + if (((x < 0) == (y < 0)) && hi != 0)
> + return LONG_MAX;
> + /* Low overflow in mul and z not neg enough to correct it */
> + if (hi == 0 && ulo >= LONG_MAX && (z > 0 || (ulo + z) > LONG_MAX))
> + return LONG_MAX;
> + /* Big overflow of more than 2 bits, add can't fix this */
> + if (((x < 0) != (y < 0)) && hi != -1)
> + return LONG_MIN;
> + /* Low overflow in mul and z not pos enough to correct it */
> + if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))

I get the following warnings during build:
./generic/lib/integer/mad_sat.cl:52:36: warning: overflow in
expression; result is -9223372036854775808 with type 'long'
[-Winteger-overflow]
  if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))
                                   ^
./generic/lib/integer/mad_sat.cl:52:36: warning: overflow in
expression; result is -9223372036854775808 with type 'long'
[-Winteger-overflow]
  if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))
                                   ^
./generic/lib/integer/mad_sat.cl:52:36: warning: overflow in
expression; result is -9223372036854775808 with type 'long'
[-Winteger-overflow]
  if (hi == -1 && ulo <= (LONG_MAX + 1U) && (z < 0 || z < (LONG_MAX - ulo)))

I'm assuming what you want to do is cast LONG_MAX to ulong before the
addition and then change 1U to 1UL

yeah, that was the idea, fixed in v2 thanks.

I've managed to test char/uchar/short/ushort/int/uint successfully on
r600 (pitcairn, SI) using the existing piglit tests.

The long test just segfaults and the ulong test dies out with
instruction selection errors. I haven't determined yet if these are
bugs in clover, the R600 back-end, or the code here. I've seen issues
with long calculations/instructions in the past on radeonsi, so it's
possible that the rest of your code is fine, but I haven't had time to
really dig into it yet, and I'm not sure I will in the next few days
due to real life events... but maybe I'll have an over-abundance of
free time for the next week or so.

I ran into the same problem, and posted a patch for llvm [0]
(I have attached as well). I plan to investigate Matt's suggestion and
send a real patch soon (tm).
I forgot to mention it in this patch, sorry about that.

Jan

[0]
http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20140804/229312.html

0001-utils-Fix-segfault-in-flattencfg.patch (1.13 KB)