[PATCH 00/14] Implement native_* functions using llvm

This allows for better performance of programs that use native_ calls.
It also allows llvm to replace known patters with native instruction,
such as: 1/ sqrt -> rsq

Most of these give results within 4096 ULP so they can be used to
implement half_ functions.

Jan

Jan Vesely (14):
  native_log2: Switch to generic native intrinsic inc file
  native_log: Switch to generic native intrinsic inc file
  native_log10: Switch to generic native intrinsic inc file
  amdgpu: Add workaround for unimplemented llvm.exp intrinsic
  native_exp: Switch implementation ot llvm intrinsic
  native_exp2: Switch implementation to llvm intrinsic
  native_cos: Switch implementation to llvm intrinsic
  native_sin: Switch implementation to llvm intrinsic
  native_sqrt: Switch implementation to llvm intrinsic
  native_exp10: Switch implementation to use llvm intrinsic
  math: Use precomputed constant for log2(10.0)
  native_tan: Switch implementaiton to use native_sin/native_cos
  native_rsqrt: Switch implementation to 1 / native_sqrt
  native_recip: provide function implementation instead of macro

amdgpu/lib/SOURCES | 1 +
amdgpu/lib/math/native_exp.cl | 5 +++++
amdgpu/lib/math/native_exp.inc | 3 +++
generic/include/clc/float/definitions.h | 8 +++++++
generic/include/clc/math/native_cos.h | 10 ++++++++-
generic/include/clc/math/native_exp.h | 10 ++++++++-
generic/include/clc/math/native_exp10.h | 10 ++++++++-
generic/include/clc/math/native_exp2.h | 10 ++++++++-
generic/include/clc/math/native_recip.h | 10 ++++++++-
generic/include/clc/math/native_rsqrt.h | 10 ++++++++-
generic/include/clc/math/native_sin.h | 10 ++++++++-
generic/include/clc/math/native_sqrt.h | 10 ++++++++-
generic/include/clc/math/native_tan.h | 17 +++++++--------
generic/include/utils.h | 9 ++++++++
generic/lib/SOURCES | 9 ++++++++
generic/lib/math/exp10.inc | 4 ++--
generic/lib/math/native_cos.cl | 7 ++++++
generic/lib/math/native_exp.cl | 7 ++++++
generic/lib/math/native_exp10.cl | 5 +++++
generic/lib/math/native_exp10.inc | 3 +++
generic/lib/math/native_exp2.cl | 7 ++++++
generic/lib/math/native_log.cl | 7 ++----
generic/lib/math/native_log10.cl | 7 ++----
generic/lib/math/native_log10.inc | 3 ---
generic/lib/math/native_log2.cl | 8 ++-----
generic/lib/math/native_log2.inc | 25 ----------------------
generic/lib/math/native_recip.cl | 5 +++++
generic/lib/math/native_recip.inc | 3 +++
generic/lib/math/native_rsqrt.cl | 5 +++++
generic/lib/math/native_rsqrt.inc | 3 +++
generic/lib/math/native_sin.cl | 7 ++++++
generic/lib/math/native_sqrt.cl | 7 ++++++
generic/lib/math/native_tan.cl | 5 +++++
generic/lib/math/native_tan.inc | 3 +++
.../{native_log.inc => native_unary_intrinsic.inc} | 21 ++++++++++++++++--
35 files changed, 209 insertions(+), 65 deletions(-)
create mode 100644 amdgpu/lib/math/native_exp.cl
create mode 100644 amdgpu/lib/math/native_exp.inc
create mode 100644 generic/include/utils.h
create mode 100644 generic/lib/math/native_cos.cl
create mode 100644 generic/lib/math/native_exp.cl
create mode 100644 generic/lib/math/native_exp10.cl
create mode 100644 generic/lib/math/native_exp10.inc
create mode 100644 generic/lib/math/native_exp2.cl
delete mode 100644 generic/lib/math/native_log10.inc
delete mode 100644 generic/lib/math/native_log2.inc
create mode 100644 generic/lib/math/native_recip.cl
create mode 100644 generic/lib/math/native_recip.inc
create mode 100644 generic/lib/math/native_rsqrt.cl
create mode 100644 generic/lib/math/native_rsqrt.inc
create mode 100644 generic/lib/math/native_sin.cl
create mode 100644 generic/lib/math/native_sqrt.cl
create mode 100644 generic/lib/math/native_tan.cl
create mode 100644 generic/lib/math/native_tan.inc
rename generic/lib/math/{native_log.inc => native_unary_intrinsic.inc} (65%)

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

exp10 CTS fails with or withoput this change

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>

Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
---
generic/include/utils.h | 9 +++++++++
generic/lib/math/native_log2.cl | 8 ++------
.../{native_log2.inc => native_unary_intrinsic.inc} | 21 +++++++++++++++++++--
3 files changed, 30 insertions(+), 8 deletions(-)
create mode 100644 generic/include/utils.h
rename generic/lib/math/{native_log2.inc => native_unary_intrinsic.inc} (65%)

diff --git a/generic/include/utils.h b/generic/include/utils.h

Does placing the file in generic/include mean that it will be copied upon installation? If so, why is that necessary?

new file mode 100644
index 0000000..23f4d34
--- /dev/null
+++ b/generic/include/utils.h
@@ -0,0 +1,9 @@
+
+#ifndef __CLC_UTILS_H_
+#define __CLC_UTILS_H_
+
+#define __CLC_CONCAT(x, y) x ## y
+#define __CLC_STR(x) #x
+#define __CLC_XSTR(x) __CLC_STR(x)
+
+#endif
diff --git a/generic/lib/math/native_log2.cl b/generic/lib/math/native_log2.cl
index 35ed18b..b610423 100644
--- a/generic/lib/math/native_log2.cl
+++ b/generic/lib/math/native_log2.cl
@@ -22,11 +22,7 @@

#include <clc/clc.h>

-#define __CLC_FUNCTION __clc_native_log2
-#define __CLC_INTRINSIC "llvm.log2"
-#undef cl_khr_fp64
-#include <clc/math/unary_intrin.inc>
-
-#define __CLC_BODY <native_log2.inc>
+#define __CLC_NATIVE_INTRINSIC log2
+#define __CLC_BODY <native_unary_intrinsic.inc>
#define __FLOAT_ONLY
#include <clc/math/gentype.inc>
diff --git a/generic/lib/math/native_log2.inc b/generic/lib/math/native_unary_intrinsic.inc
similarity index 65%
rename from generic/lib/math/native_log2.inc
rename to generic/lib/math/native_unary_intrinsic.inc
index 0f6a509..048da8a 100644
--- a/generic/lib/math/native_log2.inc
+++ b/generic/lib/math/native_unary_intrinsic.inc
@@ -20,6 +20,23 @@
* THE SOFTWARE.
*/

-_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log2(__CLC_GENTYPE val) {
- return __clc_native_log2(val);
+#include <utils.h>
+
+#ifdef __CLC_SCALAR
+#define __CLC_FUNCTION __CLC_CONCAT(__clc_native, __CLC_NATIVE_INTRINSIC)
+#define __CLC_INTRINSIC "llvm." __CLC_XSTR(__CLC_NATIVE_INTRINSIC)
+
+#undef cl_khr_fp64
+#include <clc/math/unary_intrin.inc>
+
+#endif
+
+// For some reason this requires double redirection

This comment is not very helpful, and is for the same reason you need __CLC_XSTR. I would just get rid of the comment.

Jeroen

>
> Signed-off-by: Jan Vesely <jan.vesely@rutgers.edu>
> ---
> generic/include/utils.h | 9 +++++++++
> generic/lib/math/native_log2.cl | 8 ++------
> .../{native_log2.inc => native_unary_intrinsic.inc} | 21 +++++++++++++++++++--
> 3 files changed, 30 insertions(+), 8 deletions(-)
> create mode 100644 generic/include/utils.h
> rename generic/lib/math/{native_log2.inc => native_unary_intrinsic.inc} (65%)
>
> diff --git a/generic/include/utils.h b/generic/include/utils.h

Does placing the file in generic/include mean that it will be copied
upon installation? If so, why is that necessary?

no, only the include files in generic/include/clc are installed.
I've a cleanup to remove all but function declaration includes from
include/clc on my todo list, but it will take some time.

> new file mode 100644
> index 0000000..23f4d34
> --- /dev/null
> +++ b/generic/include/utils.h
> @@ -0,0 +1,9 @@
> +
> +#ifndef __CLC_UTILS_H_
> +#define __CLC_UTILS_H_
> +
> +#define __CLC_CONCAT(x, y) x ## y
> +#define __CLC_STR(x) #x
> +#define __CLC_XSTR(x) __CLC_STR(x)
> +
> +#endif
> diff --git a/generic/lib/math/native_log2.cl b/generic/lib/math/native_log2.cl
> index 35ed18b..b610423 100644
> --- a/generic/lib/math/native_log2.cl
> +++ b/generic/lib/math/native_log2.cl
> @@ -22,11 +22,7 @@
>
> #include <clc/clc.h>
>
> -#define __CLC_FUNCTION __clc_native_log2
> -#define __CLC_INTRINSIC "llvm.log2"
> -#undef cl_khr_fp64
> -#include <clc/math/unary_intrin.inc>
> -
> -#define __CLC_BODY <native_log2.inc>
> +#define __CLC_NATIVE_INTRINSIC log2
> +#define __CLC_BODY <native_unary_intrinsic.inc>
> #define __FLOAT_ONLY
> #include <clc/math/gentype.inc>
> diff --git a/generic/lib/math/native_log2.inc b/generic/lib/math/native_unary_intrinsic.inc
> similarity index 65%
> rename from generic/lib/math/native_log2.inc
> rename to generic/lib/math/native_unary_intrinsic.inc
> index 0f6a509..048da8a 100644
> --- a/generic/lib/math/native_log2.inc
> +++ b/generic/lib/math/native_unary_intrinsic.inc
> @@ -20,6 +20,23 @@
> * THE SOFTWARE.
> */
>
> -_CLC_OVERLOAD _CLC_DEF __CLC_GENTYPE native_log2(__CLC_GENTYPE val) {
> - return __clc_native_log2(val);
> +#include <utils.h>
> +
> +#ifdef __CLC_SCALAR
> +#define __CLC_FUNCTION __CLC_CONCAT(__clc_native, __CLC_NATIVE_INTRINSIC)
> +#define __CLC_INTRINSIC "llvm." __CLC_XSTR(__CLC_NATIVE_INTRINSIC)
> +
> +#undef cl_khr_fp64
> +#include <clc/math/unary_intrin.inc>
> +
> +#endif
> +
> +// For some reason this requires double redirection

This comment is not very helpful, and is for the same reason you need
__CLC_XSTR. I would just get rid of the comment.

right, removed locally.

thanks,
Jan