[PATCH] libclc/math: Add cospi

Ported from the libclc/amd-builtins branch

Signed-off-by: Aaron Watry <awatry@gmail.com>

Hi Aaron,

Comments inline.

Ported from the libclc/amd-builtins branch

Signed-off-by: Aaron Watry <awatry@gmail.com>
---
generic/include/clc/clc.h | 1 +
generic/include/clc/math/cospi.h | 3 ++
generic/include/clc/math/cospi.inc | 1 +
generic/lib/SOURCES | 1 +
generic/lib/math/cospi.cl | 96 ++++++++++++++++++++++++++++++++++++++
generic/lib/math/sincospiF_piby4.h | 57 ++++++++++++++++++++++
6 files changed, 159 insertions(+)
create mode 100644 generic/include/clc/math/cospi.h
create mode 100644 generic/include/clc/math/cospi.inc
create mode 100644 generic/lib/math/cospi.cl
create mode 100644 generic/lib/math/sincospiF_piby4.h

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index bd92fdb..bff6ddd 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -38,6 +38,7 @@
#include <clc/math/atan2.h>
#include <clc/math/copysign.h>
#include <clc/math/cos.h>
+#include <clc/math/cospi.h>
#include <clc/math/ceil.h>
#include <clc/math/exp.h>
#include <clc/math/exp10.h>
diff --git a/generic/include/clc/math/cospi.h b/generic/include/clc/math/cospi.h
new file mode 100644
index 0000000..427733b
--- /dev/null
+++ b/generic/include/clc/math/cospi.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/cospi.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/generic/include/clc/math/cospi.inc b/generic/include/clc/math/cospi.inc
new file mode 100644
index 0000000..1e786cf
--- /dev/null
+++ b/generic/include/clc/math/cospi.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cospi(__CLC_GENTYPE a);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index b76fec9..2b9426e 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -57,6 +57,7 @@ math/atan.cl
math/atan2.cl
math/copysign.cl
math/cos.cl
+math/cospi.cl
math/exp.cl
math/exp10.cl
math/fmax.cl
diff --git a/generic/lib/math/cospi.cl b/generic/lib/math/cospi.cl
new file mode 100644
index 0000000..666deb7
--- /dev/null
+++ b/generic/lib/math/cospi.cl
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincos_helpers.h"
+#include "sincospiF_piby4.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float cospi(float x)
+{
+ int ix = as_int(x) & 0x7fffffff;
+ float ax = as_float(ix);
+ int iax = (int)ax;
+ float r = ax - iax;
+ int xodd = iax & 0x1 ? 0x80000000 : 0;
+
+ // Initialize with return for +-Inf and NaN
+ int ir = 0x7fc00000;
+
+ // 2^24 <= |x| < Inf, the result is always even integer
+ ir = ix < 0x7f800000 ? 0x3f800000 : ir;
+
+ // 2^23 <= |x| < 2^24, the result is always integer
+ ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ float a = 1.0f - r;
+ int e = 1;
+ int s = xodd ^ 0x80000000;
+
+ // r <= 0.75
+ int c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ float2 t = sincosf_piby4(a * M_PI_F);
+ int jr = s ^ as_int(e ? t.hi : t.lo);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return as_float(ir);
+}
+
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define __CLC_FUNCTION __clc_cos_intrinsic
+#define __CLC_INTRINSIC “llvm.cos"

Does llvm.cos give sufficient precision for double, or is the intention to leave double as it is for now?

+#include <clc/math/unary_intrin.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
+
+_CLC_OVERLOAD _CLC_DEF double cospi(double x) {
+ return __clc_cos_intrinsic(x*M_PI);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
+
+#endif
diff --git a/generic/lib/math/sincospiF_piby4.h b/generic/lib/math/sincospiF_piby4.h
new file mode 100644
index 0000000..3274d90
--- /dev/null
+++ b/generic/lib/math/sincospiF_piby4.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
+inline float2
+sincosf_piby4(float x)

Would it be possible to start prefixing the internal functions like these with some standard prefix (__libclc_ or something like that). Just the be sure they do not clash with the user’s namespace?

Also by C99’s definition of inline, you would also need to provide a non-inlined variant to make sure linking always succeeds; did you also meant to add __attribute__((always_inline)) ?

Jeroen

Hi Aaron,

Comments inline.

Ported from the libclc/amd-builtins branch

Signed-off-by: Aaron Watry <awatry@gmail.com>
---
generic/include/clc/clc.h | 1 +
generic/include/clc/math/cospi.h | 3 ++
generic/include/clc/math/cospi.inc | 1 +
generic/lib/SOURCES | 1 +
generic/lib/math/cospi.cl | 96 ++++++++++++++++++++++++++++++++++++++
generic/lib/math/sincospiF_piby4.h | 57 ++++++++++++++++++++++
6 files changed, 159 insertions(+)
create mode 100644 generic/include/clc/math/cospi.h
create mode 100644 generic/include/clc/math/cospi.inc
create mode 100644 generic/lib/math/cospi.cl
create mode 100644 generic/lib/math/sincospiF_piby4.h

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index bd92fdb..bff6ddd 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -38,6 +38,7 @@
#include <clc/math/atan2.h>
#include <clc/math/copysign.h>
#include <clc/math/cos.h>
+#include <clc/math/cospi.h>
#include <clc/math/ceil.h>
#include <clc/math/exp.h>
#include <clc/math/exp10.h>
diff --git a/generic/include/clc/math/cospi.h b/generic/include/clc/math/cospi.h
new file mode 100644
index 0000000..427733b
--- /dev/null
+++ b/generic/include/clc/math/cospi.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/cospi.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/generic/include/clc/math/cospi.inc b/generic/include/clc/math/cospi.inc
new file mode 100644
index 0000000..1e786cf
--- /dev/null
+++ b/generic/include/clc/math/cospi.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cospi(__CLC_GENTYPE a);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index b76fec9..2b9426e 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -57,6 +57,7 @@ math/atan.cl
math/atan2.cl
math/copysign.cl
math/cos.cl
+math/cospi.cl
math/exp.cl
math/exp10.cl
math/fmax.cl
diff --git a/generic/lib/math/cospi.cl b/generic/lib/math/cospi.cl
new file mode 100644
index 0000000..666deb7
--- /dev/null
+++ b/generic/lib/math/cospi.cl
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincos_helpers.h"
+#include "sincospiF_piby4.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float cospi(float x)
+{
+ int ix = as_int(x) & 0x7fffffff;
+ float ax = as_float(ix);
+ int iax = (int)ax;
+ float r = ax - iax;
+ int xodd = iax & 0x1 ? 0x80000000 : 0;
+
+ // Initialize with return for +-Inf and NaN
+ int ir = 0x7fc00000;
+
+ // 2^24 <= |x| < Inf, the result is always even integer
+ ir = ix < 0x7f800000 ? 0x3f800000 : ir;
+
+ // 2^23 <= |x| < 2^24, the result is always integer
+ ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ float a = 1.0f - r;
+ int e = 1;
+ int s = xodd ^ 0x80000000;
+
+ // r <= 0.75
+ int c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ float2 t = sincosf_piby4(a * M_PI_F);
+ int jr = s ^ as_int(e ? t.hi : t.lo);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return as_float(ir);
+}
+
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define __CLC_FUNCTION __clc_cos_intrinsic
+#define __CLC_INTRINSIC “llvm.cos"

Does llvm.cos give sufficient precision for double, or is the intention to leave double as it is for now?

I honestly don't know if llvm.cos gives sufficient precision for
doubles here. I didn't see any precision notes in the SI ISA guide,
so until we either have some unit tests for doubles or maybe if Tom
sees this, I can't really answer that. There is a cospi(double)
implementation in the amd builtins that were released recently, but I
haven't attempted to port that yet. I was assuming that because
cos(double) used __clc_cos_intrinsic, it was probably safe to use it
for cospi(double) as well... although it's also possible that
cos(double) isn't precise enough either.

One of these days, we need to extend the generated piglit tests to
test double for inputs/outputs for builtins (that should give us
decent test coverage of doubles without too much effort, at least for
devices that support doubles). I have been focusing on other things
first (basically getting all of the float builtins implemented so that
cppamp-driver-ng, clBLAS, clFFT, and their unit/precision tests start
working).

+#include <clc/math/unary_intrin.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
+
+_CLC_OVERLOAD _CLC_DEF double cospi(double x) {
+ return __clc_cos_intrinsic(x*M_PI);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
+
+#endif
diff --git a/generic/lib/math/sincospiF_piby4.h b/generic/lib/math/sincospiF_piby4.h
new file mode 100644
index 0000000..3274d90
--- /dev/null
+++ b/generic/lib/math/sincospiF_piby4.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
+inline float2
+sincosf_piby4(float x)

Would it be possible to start prefixing the internal functions like these with some standard prefix (__libclc_ or something like that). Just the be sure they do not clash with the user’s namespace?

Sounds reasonable.

Also by C99’s definition of inline, you would also need to provide a non-inlined variant to make sure linking always succeeds; did you also meant to add __attribute__((always_inline)) ?

Yeah, I did mean to add the attribute. Instead of coding that
directly, I'd like to use the _CLC_INLINE macro from <clc/clcfunc.h>.
Sound good?

--Aaron

Hi Aaron,

Comments inline.

Ported from the libclc/amd-builtins branch

Signed-off-by: Aaron Watry <awatry@gmail.com>
---
generic/include/clc/clc.h | 1 +
generic/include/clc/math/cospi.h | 3 ++
generic/include/clc/math/cospi.inc | 1 +
generic/lib/SOURCES | 1 +
generic/lib/math/cospi.cl | 96 ++++++++++++++++++++++++++++++++++++++
generic/lib/math/sincospiF_piby4.h | 57 ++++++++++++++++++++++
6 files changed, 159 insertions(+)
create mode 100644 generic/include/clc/math/cospi.h
create mode 100644 generic/include/clc/math/cospi.inc
create mode 100644 generic/lib/math/cospi.cl
create mode 100644 generic/lib/math/sincospiF_piby4.h

diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
index bd92fdb..bff6ddd 100644
--- a/generic/include/clc/clc.h
+++ b/generic/include/clc/clc.h
@@ -38,6 +38,7 @@
#include <clc/math/atan2.h>
#include <clc/math/copysign.h>
#include <clc/math/cos.h>
+#include <clc/math/cospi.h>
#include <clc/math/ceil.h>
#include <clc/math/exp.h>
#include <clc/math/exp10.h>
diff --git a/generic/include/clc/math/cospi.h b/generic/include/clc/math/cospi.h
new file mode 100644
index 0000000..427733b
--- /dev/null
+++ b/generic/include/clc/math/cospi.h
@@ -0,0 +1,3 @@
+#define __CLC_BODY <clc/math/cospi.inc>
+#include <clc/math/gentype.inc>
+#undef __CLC_BODY
diff --git a/generic/include/clc/math/cospi.inc b/generic/include/clc/math/cospi.inc
new file mode 100644
index 0000000..1e786cf
--- /dev/null
+++ b/generic/include/clc/math/cospi.inc
@@ -0,0 +1 @@
+_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cospi(__CLC_GENTYPE a);
diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
index b76fec9..2b9426e 100644
--- a/generic/lib/SOURCES
+++ b/generic/lib/SOURCES
@@ -57,6 +57,7 @@ math/atan.cl
math/atan2.cl
math/copysign.cl
math/cos.cl
+math/cospi.cl
math/exp.cl
math/exp10.cl
math/fmax.cl
diff --git a/generic/lib/math/cospi.cl b/generic/lib/math/cospi.cl
new file mode 100644
index 0000000..666deb7
--- /dev/null
+++ b/generic/lib/math/cospi.cl
@@ -0,0 +1,96 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include <clc/clc.h>
+
+#include "math.h"
+#include "sincos_helpers.h"
+#include "sincospiF_piby4.h"
+#include "../clcmacro.h"
+
+_CLC_OVERLOAD _CLC_DEF float cospi(float x)
+{
+ int ix = as_int(x) & 0x7fffffff;
+ float ax = as_float(ix);
+ int iax = (int)ax;
+ float r = ax - iax;
+ int xodd = iax & 0x1 ? 0x80000000 : 0;
+
+ // Initialize with return for +-Inf and NaN
+ int ir = 0x7fc00000;
+
+ // 2^24 <= |x| < Inf, the result is always even integer
+ ir = ix < 0x7f800000 ? 0x3f800000 : ir;
+
+ // 2^23 <= |x| < 2^24, the result is always integer
+ ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
+
+ // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
+
+ // r < 1.0
+ float a = 1.0f - r;
+ int e = 1;
+ int s = xodd ^ 0x80000000;
+
+ // r <= 0.75
+ int c = r <= 0.75f;
+ a = c ? r - 0.5f : a;
+ e = c ? 0 : e;
+
+ // r < 0.5
+ c = r < 0.5f;
+ a = c ? 0.5f - r : a;
+ s = c ? xodd : s;
+
+ // r <= 0.25
+ c = r <= 0.25f;
+ a = c ? r : a;
+ e = c ? 1 : e;
+
+ float2 t = sincosf_piby4(a * M_PI_F);
+ int jr = s ^ as_int(e ? t.hi : t.lo);
+
+ ir = ix < 0x4b000000 ? jr : ir;
+
+ return as_float(ir);
+}
+
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
+
+#ifdef cl_khr_fp64
+
+#pragma OPENCL EXTENSION cl_khr_fp64 : enable
+
+#define __CLC_FUNCTION __clc_cos_intrinsic
+#define __CLC_INTRINSIC “llvm.cos"

Does llvm.cos give sufficient precision for double, or is the intention to leave double as it is for now?

I honestly don't know if llvm.cos gives sufficient precision for
doubles here. I didn't see any precision notes in the SI ISA guide,
so until we either have some unit tests for doubles or maybe if Tom
sees this, I can't really answer that. There is a cospi(double)
implementation in the amd builtins that were released recently, but I
haven't attempted to port that yet. I was assuming that because
cos(double) used __clc_cos_intrinsic, it was probably safe to use it
for cospi(double) as well... although it's also possible that
cos(double) isn't precise enough either.

AFAIK, Tom didn’t try to fix-up the double implementation of cos, but
only the float one. So, whatever applies there is likely to apply here
as well.

One of these days, we need to extend the generated piglit tests to
test double for inputs/outputs for builtins (that should give us
decent test coverage of doubles without too much effort, at least for
devices that support doubles). I have been focusing on other things
first (basically getting all of the float builtins implemented so that
cppamp-driver-ng, clBLAS, clFFT, and their unit/precision tests start
working).

+#include <clc/math/unary_intrin.inc>
+#undef __CLC_FUNCTION
+#undef __CLC_INTRINSIC
+
+_CLC_OVERLOAD _CLC_DEF double cospi(double x) {
+ return __clc_cos_intrinsic(x*M_PI);
+}
+
+_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, double, cospi, double);
+
+#endif
diff --git a/generic/lib/math/sincospiF_piby4.h b/generic/lib/math/sincospiF_piby4.h
new file mode 100644
index 0000000..3274d90
--- /dev/null
+++ b/generic/lib/math/sincospiF_piby4.h
@@ -0,0 +1,57 @@
+/*
+ * Copyright (c) 2014 Advanced Micro Devices, Inc.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+// Evaluate single precisions in and cos of value in interval [-pi/4, pi/4]
+inline float2
+sincosf_piby4(float x)

Would it be possible to start prefixing the internal functions like these with some standard prefix (__libclc_ or something like that). Just the be sure they do not clash with the user’s namespace?

Sounds reasonable.

Also by C99’s definition of inline, you would also need to provide a non-inlined variant to make sure linking always succeeds; did you also meant to add __attribute__((always_inline)) ?

Yeah, I did mean to add the attribute. Instead of coding that
directly, I'd like to use the _CLC_INLINE macro from <clc/clcfunc.h>.
Sound good?

Sounds good to me.

Jeroen

There aren't any there, but I do know it is not sufficient

>
>>
>> Hi Aaron,
>>
>> Comments inline.
>>
>>>
>>> Ported from the libclc/amd-builtins branch
>>>
>>> Signed-off-by: Aaron Watry <awatry@gmail.com>
>>> ---
>>> generic/include/clc/clc.h | 1 +
>>> generic/include/clc/math/cospi.h | 3 ++
>>> generic/include/clc/math/cospi.inc | 1 +
>>> generic/lib/SOURCES | 1 +
>>> generic/lib/math/cospi.cl | 96 ++++++++++++++++++++++++++++++++++++++
>>> generic/lib/math/sincospiF_piby4.h | 57 ++++++++++++++++++++++
>>> 6 files changed, 159 insertions(+)
>>> create mode 100644 generic/include/clc/math/cospi.h
>>> create mode 100644 generic/include/clc/math/cospi.inc
>>> create mode 100644 generic/lib/math/cospi.cl
>>> create mode 100644 generic/lib/math/sincospiF_piby4.h
>>>
>>> diff --git a/generic/include/clc/clc.h b/generic/include/clc/clc.h
>>> index bd92fdb..bff6ddd 100644
>>> --- a/generic/include/clc/clc.h
>>> +++ b/generic/include/clc/clc.h
>>> @@ -38,6 +38,7 @@
>>> #include <clc/math/atan2.h>
>>> #include <clc/math/copysign.h>
>>> #include <clc/math/cos.h>
>>> +#include <clc/math/cospi.h>
>>> #include <clc/math/ceil.h>
>>> #include <clc/math/exp.h>
>>> #include <clc/math/exp10.h>
>>> diff --git a/generic/include/clc/math/cospi.h b/generic/include/clc/math/cospi.h
>>> new file mode 100644
>>> index 0000000..427733b
>>> --- /dev/null
>>> +++ b/generic/include/clc/math/cospi.h
>>> @@ -0,0 +1,3 @@
>>> +#define __CLC_BODY <clc/math/cospi.inc>
>>> +#include <clc/math/gentype.inc>
>>> +#undef __CLC_BODY
>>> diff --git a/generic/include/clc/math/cospi.inc b/generic/include/clc/math/cospi.inc
>>> new file mode 100644
>>> index 0000000..1e786cf
>>> --- /dev/null
>>> +++ b/generic/include/clc/math/cospi.inc
>>> @@ -0,0 +1 @@
>>> +_CLC_OVERLOAD _CLC_DECL __CLC_GENTYPE cospi(__CLC_GENTYPE a);
>>> diff --git a/generic/lib/SOURCES b/generic/lib/SOURCES
>>> index b76fec9..2b9426e 100644
>>> --- a/generic/lib/SOURCES
>>> +++ b/generic/lib/SOURCES
>>> @@ -57,6 +57,7 @@ math/atan.cl
>>> math/atan2.cl
>>> math/copysign.cl
>>> math/cos.cl
>>> +math/cospi.cl
>>> math/exp.cl
>>> math/exp10.cl
>>> math/fmax.cl
>>> diff --git a/generic/lib/math/cospi.cl b/generic/lib/math/cospi.cl
>>> new file mode 100644
>>> index 0000000..666deb7
>>> --- /dev/null
>>> +++ b/generic/lib/math/cospi.cl
>>> @@ -0,0 +1,96 @@
>>> +/*
>>> + * Copyright (c) 2014 Advanced Micro Devices, Inc.
>>> + *
>>> + * Permission is hereby granted, free of charge, to any person obtaining a copy
>>> + * of this software and associated documentation files (the "Software"), to deal
>>> + * in the Software without restriction, including without limitation the rights
>>> + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
>>> + * copies of the Software, and to permit persons to whom the Software is
>>> + * furnished to do so, subject to the following conditions:
>>> + *
>>> + * The above copyright notice and this permission notice shall be included in
>>> + * all copies or substantial portions of the Software.
>>> + *
>>> + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
>>> + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
>>> + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
>>> + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
>>> + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
>>> + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
>>> + * THE SOFTWARE.
>>> + */
>>> +
>>> +#include <clc/clc.h>
>>> +
>>> +#include "math.h"
>>> +#include "sincos_helpers.h"
>>> +#include "sincospiF_piby4.h"
>>> +#include "../clcmacro.h"
>>> +
>>> +_CLC_OVERLOAD _CLC_DEF float cospi(float x)
>>> +{
>>> + int ix = as_int(x) & 0x7fffffff;
>>> + float ax = as_float(ix);
>>> + int iax = (int)ax;
>>> + float r = ax - iax;
>>> + int xodd = iax & 0x1 ? 0x80000000 : 0;
>>> +
>>> + // Initialize with return for +-Inf and NaN
>>> + int ir = 0x7fc00000;
>>> +
>>> + // 2^24 <= |x| < Inf, the result is always even integer
>>> + ir = ix < 0x7f800000 ? 0x3f800000 : ir;
>>> +
>>> + // 2^23 <= |x| < 2^24, the result is always integer
>>> + ir = ix < 0x4b800000 ? xodd | 0x3f800000 : ir;
>>> +
>>> + // 0x1.0p-7 <= |x| < 2^23, result depends on which 0.25 interval
>>> +
>>> + // r < 1.0
>>> + float a = 1.0f - r;
>>> + int e = 1;
>>> + int s = xodd ^ 0x80000000;
>>> +
>>> + // r <= 0.75
>>> + int c = r <= 0.75f;
>>> + a = c ? r - 0.5f : a;
>>> + e = c ? 0 : e;
>>> +
>>> + // r < 0.5
>>> + c = r < 0.5f;
>>> + a = c ? 0.5f - r : a;
>>> + s = c ? xodd : s;
>>> +
>>> + // r <= 0.25
>>> + c = r <= 0.25f;
>>> + a = c ? r : a;
>>> + e = c ? 1 : e;
>>> +
>>> + float2 t = sincosf_piby4(a * M_PI_F);
>>> + int jr = s ^ as_int(e ? t.hi : t.lo);
>>> +
>>> + ir = ix < 0x4b000000 ? jr : ir;
>>> +
>>> + return as_float(ir);
>>> +}
>>> +
>>> +
>>> +_CLC_UNARY_VECTORIZE(_CLC_OVERLOAD _CLC_DEF, float, cospi, float);
>>> +
>>> +#ifdef cl_khr_fp64
>>> +
>>> +#pragma OPENCL EXTENSION cl_khr_fp64 : enable
>>> +
>>> +#define __CLC_FUNCTION __clc_cos_intrinsic
>>> +#define __CLC_INTRINSIC “llvm.cos"
>>
>> Does llvm.cos give sufficient precision for double, or is the intention to leave double as it is for now?
>>
>
> I honestly don't know if llvm.cos gives sufficient precision for
> doubles here. I didn't see any precision notes in the SI ISA guide,
> so until we either have some unit tests for doubles or maybe if Tom
> sees this, I can't really answer that. There is a cospi(double)
> implementation in the amd builtins that were released recently, but I
> haven't attempted to port that yet. I was assuming that because
> cos(double) used __clc_cos_intrinsic, it was probably safe to use it
> for cospi(double) as well... although it's also possible that
> cos(double) isn't precise enough either.

AFAIK, Tom didn’t try to fix-up the double implementation of cos, but
only the float one. So, whatever applies there is likely to apply here
as well.

The only reason I didn't fix the double implementation was that I didn't
have a good way to test it. I think it's fine to leave the double
version as is for cospi too.

-Tom