PPC64 support

Hi all,

Attached a patch for PPC64 support. Please let me know about any comments.

Thanks

– Carlo

diff --git a/runtime/Makefile.bgq b/runtime/Makefile.bgq
new file mode 100644
index 0000000..d9aa914
--- /dev/null
+++ b/runtime/Makefile.bgq
@@ -0,0 +1,76 @@
+date := $(shell date '+%Y%m%d')
+
+FEATURE_FLAGS = -DOMP_40_ENABLED=1 -DOMP_30_ENABLED=1 -DOMP_VERSION=201107 -DKMP_VERSION_MAJOR=5 \
+ -DCACHE_LINE=64 -DKMP_ADJUST_BLOCKTIME=1 -DBUILD_I8 -DKMP_USE_ADAPTIVE_LOCKS=0 \
+ -DKMP_DEBUG_ADAPTIVE_LOCKS=0 -DINTEL_NO_ITTNOTIFY_API -DKMP_VERSION_MINOR=0 -DKMP_VERSION_BUILD=0 \
+ -DKMP_LIBRARY_FILE=\"libiomp5.so\" -D_KMP_BUILD_TIME="\"$(date)\"" -DKMP_GOMP_COMPAT
+
+CPPFLAGS = ${FEATURE_FLAGS} -D__float128='long double'
+
+CC = powerpc64-bgq-linux-gcc
+CXX = powerpc64-bgq-linux-g++
+
+all: build/libiomp5.a build/libiomp5.so
+
+build/.dir:
+ mkdir -p build
+ touch build/.dir
+
+build/omp.h:
+ perl tools/expand-vars.pl --strict $$(echo $(FEATURE_FLAGS) | sed 's/-D/-D /g') \
+ -D KMP_BUILD_DATE=$(date) -D Revision=Revision -D Date=Date \
+ src/include/40/omp.h.var build/omp.h
+
+build/%.o: src/%.cpp build/kmp_i18n_id.inc build/kmp_i18n_default.inc build/omp.h
+ ${CXX} -c ${CPPFLAGS} -g -O3 -Isrc -Ibuild -o $@ $<
+
+build/%.o: src/%.c build/kmp_i18n_id.inc build/kmp_i18n_default.inc build/omp.h
+ ${CC} -x c++ -c ${CPPFLAGS} -g -O3 -Isrc -Ibuild -o $@ $<
+
+OBJS = build/kmp_alloc.o \
+ build/kmp_atomic.o \
+ build/kmp_cancel.o \
+ build/kmp_csupport.o \
+ build/kmp_dispatch.o \
+ build/kmp_debug.o \
+ build/kmp_environment.o \
+ build/kmp_error.o \
+ build/kmp_ftn_cdecl.o \
+ build/kmp_ftn_extra.o \
+ build/kmp_ftn_stdcall.o \
+ build/kmp_global.o \
+ build/kmp_i18n.o \
+ build/kmp_io.o \
+ build/kmp_itt.o \
+ build/kmp_runtime.o \
+ build/kmp_settings.o \
+ build/kmp_sched.o \
+ build/kmp_str.o \
+ build/kmp_tasking.o \
+ build/kmp_taskq.o \
+ build/kmp_taskdeps.o \
+ build/kmp_threadprivate.o \
+ build/kmp_utility.o \
+ build/kmp_version.o \
+ build/kmp_lock.o \
+ build/z_Linux_util.o
+
+BGSYS_FLOOR=$(shell readlink /bgsys/drivers/ppcfloor)
+build/libiomp5.so: $(OBJS)
+ ${CXX} -Wl,--build-id -Wl,-rpath -Wl,${BGSYS_FLOOR}/gnu-linux/powerpc64-bgq-linux/lib -shared -o $@.1.0 $^ -Wl,-soname,$(shell basename $@.1) -lpthread
+ (cd $(shell dirname $@) && ln -sf $(shell basename $@.1.0) $(shell basename $@.1))
+ (cd $(shell dirname $@) && ln -sf $(shell basename $@.1) $(shell basename $@))
+# Note: We should not need the --build-id when we switch to clang (it should add it for us).
+
+build/libiomp5.a: $(OBJS)
+ ar cr $@ $^
+
+build/kmp_i18n_id.inc: src/i18n/en_US.txt tools/message-converter.pl build/.dir
+ perl tools/message-converter.pl --prefix=kmp_i18n --enum=$@ $<
+
+build/kmp_i18n_default.inc: src/i18n/en_US.txt tools/message-converter.pl build/.dir
+ perl tools/message-converter.pl --prefix=kmp_i18n --default=$@ $<
+
+clean:
+ rm -rf build
+
diff --git a/runtime/src/CMakeLists.txt b/runtime/src/CMakeLists.txt
index dd29966..4814cb4 100644
--- a/runtime/src/CMakeLists.txt
+++ b/runtime/src/CMakeLists.txt
@@ -12,6 +12,9 @@ include_directories(
${CMAKE_CURRENT_BINARY_DIR}
)
`+# detect architecture` `+EXECUTE_PROCESS( COMMAND uname -m COMMAND tr -d '\n' OUTPUT_VARIABLE ARCH )` `+` `if(WIN32)` `set(OS_GEN "win")` `elseif(APPLE)` `@@ -22,12 +25,14 @@ else()` `message(FATAL_ERROR "Unsupported OS")` `endif()`
-if("${ARCH}" STREQUAL "")
+if(NOT "${ARCH}" STREQUAL "ppc64")
set(ARCH "32e")
+ set(ARCH_STR "Intel(R) 64")
+else()
+# arch already set above
+ set(ARCH_STR "PPC64")
endif()
`-set(ARCH_STR "Intel(R) 64")` `-` `set(FEATURE_FLAGS "-D USE_ITT_BUILD")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D NDEBUG")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_ARCH_STR=\"\\\"${ARCH_STR}\\\"\"")` `@@ -50,8 +55,13 @@ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_LOAD_BALANCE")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D USE_CBLKDATA")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D GUIDEDLL_EXPORTS")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_GOMP_COMPAT")` `-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")` `-set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")` `+` `+#adaptive locks use x86 assembly - disable for ppc64` `+if(NOT "${ARCH}" STREQUAL "ppc64")` `+ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_USE_ADAPTIVE_LOCKS=1")` `+ set(FEATURE_FLAGS "${FEATURE_FLAGS} -D KMP_DEBUG_ADAPTIVE_LOCKS=0")` `+endif()` `+` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_50_ENABLED=0")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_41_ENABLED=0")` `set(FEATURE_FLAGS "${FEATURE_FLAGS} -D OMP_40_ENABLED=1")` `@@ -122,16 +132,35 @@ add_custom_command(` `OUTPUT omp.h` `COMMAND perl ${CMAKE_CURRENT_SOURCE_DIR}/../tools/expand-vars.pl --strict -D Revision=\"\\$$Revision\" -D Date=\"\\$$Date\" -D KMP_TYPE=\"Performance\" -D KMP_ARCH=\"\\\"${ARCH_STR}\\\"\" -D KMP_VERSION_MAJOR=${VERSION} -D KMP_VERSION_MINOR=0 -D KMP_VERSION_BUILD=00000000 -D KMP_BUILD_DATE=\"${BUILD_TIME} UTC\" -D KMP_TARGET_COMPILER=12 -D KMP_DIAG=0 -D KMP_DEBUG_INFO=0 -D OMP_VERSION=${OMP_VERSION} ${CMAKE_CURRENT_SOURCE_DIR}/include/${OMP_VERSION_NUM}/omp.h.var omp.h` `)` `-add_custom_command(` `- OUTPUT z_Linux_asm.o` `- COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}` `-)` `+` `+if(NOT "${ARCH}" STREQUAL "ppc64")` `+ add_custom_command(` `+ OUTPUT z_Linux_asm.o` `+ COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_X86_64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}` `+ )` `+else()` `+ add_custom_command(` `+ OUTPUT z_Linux_asm.o` `+ COMMAND ${CMAKE_CXX_COMPILER} -c -o z_Linux_asm.o -D KMP_ASM_INTRINS -D KMP_GOMP_COMPAT -D KMP_ARCH_PPC64 -x assembler-with-cpp ${CMAKE_CURRENT_SOURCE_DIR}/${ASM_SOURCES}` `+ )` `+` `+endif()` `+`
add_custom_target(gen_kmp_i18n DEPENDS kmp_i18n_id.inc kmp_i18n_default.inc omp.h z_Linux_asm.o)
`if(NOT APPLE)` `- set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt")` `+ set(CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports_so.txt -ldl")` `endif()`
add_library(iomp5 SHARED ${SOURCES} z_Linux_asm.o)
+
+# This is a workaround to a known ppc64 issue about libpthread. For more
+# information see
+# ``[http://ryanarn.blogspot.com/2011/07/curious-case-of-pthreadatfork-on.html](http://ryanarn.blogspot.com/2011/07/curious-case-of-pthreadatfork-on.html)
+if("${ARCH}" STREQUAL "ppc64")
+ find_library(PTHREAD NAMES pthread)
+ target_link_libraries(iomp5 ${PTHREAD})
+endif()
+
add_dependencies(iomp5 gen_kmp_i18n)
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index a8c600b..f5dd10f 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -459,9 +459,9 @@ typedef int PACKED_REDUCTION_METHOD_T;
/*
* Only Linux* OS and Windows* OS support thread affinity.
*/
-#if KMP_OS_LINUX || KMP_OS_WINDOWS
+#if (KMP_OS_LINUX || KMP_OS_WINDOWS) && !KMP_OS_CNK && !KMP_ARCH_PPC64
# define KMP_AFFINITY_SUPPORTED 1
-#elif KMP_OS_DARWIN || KMP_OS_FREEBSD
+#elif KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_CNK || KMP_ARCH_PPC64
// affinity not supported
# define KMP_AFFINITY_SUPPORTED 0
#else
@@ -476,7 +476,7 @@ extern size_t __kmp_affin_mask_size;
`# if KMP_OS_LINUX` `//` `-// On Linux* OS, the mask isactually a vector of length __kmp_affin_mask_size` `+// On Linux* OS, the mask is actually a vector of length __kmp_affin_mask_size` `// (in bytes). It should be allocated on a word boundary.` `//` `// WARNING!!! We have made the base type of the affinity mask unsigned char,` `@@ -946,6 +946,9 @@ extern unsigned int __kmp_place_core_offset;` `#if KMP_OS_WINDOWS` `# define KMP_INIT_WAIT 64U /* initial number of spin-tests */` `# define KMP_NEXT_WAIT 32U /* susequent number of spin-tests */` `+#elif KMP_OS_CNK` `+# define KMP_INIT_WAIT 16U /* initial number of spin-tests */` `+# define KMP_NEXT_WAIT 8U /* susequent number of spin-tests */` `#elif KMP_OS_LINUX` `# define KMP_INIT_WAIT 1024U /* initial number of spin-tests */` `# define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */` `@@ -971,6 +974,11 @@ extern void __kmp_x86_cpuid( int mode, int mode2, struct kmp_cpuid *p );` `extern void __kmp_x86_pause( void );` `# endif` `# define KMP_CPU_PAUSE() __kmp_x86_pause()` `+#elif KMP_ARCH_PPC64` `+# define KMP_PPC64_PRI_LOW() __asm__ volatile ("or 1, 1, 1")` `+# define KMP_PPC64_PRI_MED() __asm__ volatile ("or 2, 2, 2")` `+# define KMP_PPC64_PRI_LOC_MB() __asm__ volatile ("" : : : "memory")` `+# define KMP_CPU_PAUSE() do { KMP_PPC64_PRI_LOW(); KMP_PPC64_PRI_MED(); KMP_PPC64_PRI_LOC_MB(); } while (0)` `#else` `# define KMP_CPU_PAUSE() /* nothing to do */` `#endif` `diff --git a/runtime/src/kmp_csupport.c b/runtime/src/kmp_csupport.c` `index 18b6c35..0b7d3ed 100644` `--- a/runtime/src/kmp_csupport.c` `+++ b/runtime/src/kmp_csupport.c` `@@ -837,6 +837,19 @@ __kmpc_flush(ident_t *loc, ...)` `#endif // KMP_MIC` `#elif KMP_ARCH_ARM` `// Nothing yet` `+ #elif KMP_ARCH_PPC64` `+ // Nothing needed here (we have a real MB above).` `+ #if KMP_OS_CNK` `+ // The flushing thread needs to yield here; this prevents a` `+ // busy-waiting thread from saturating the pipeline. flush is` `+ // often used in loops like this:` `+ // while (!flag) {` `+ // #pragma omp flush(flag)` `+ // }` `+ // and adding the yield here is good for at least a 10x speedup` `+ // when running >2 threads per core (on the NAS LU benchmark).` `+ __kmp_yield(TRUE);` `+ #endif` `#else` `#error Unknown or unsupported architecture` `#endif` `diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h` `index f241751..d78d846 100644` `--- a/runtime/src/kmp_ftn_os.h` `+++ b/runtime/src/kmp_ftn_os.h` `@@ -478,7 +478,7 @@` `//#define KMP_API_NAME_GOMP_TARGET_UPDATE GOMP_target_update` `#define KMP_API_NAME_GOMP_TEAMS GOMP_teams`
-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK && !KMP_ARCH_PPC64
#define xstr(x) str(x)
#define str(x) #x
`diff --git a/runtime/src/kmp_global.c b/runtime/src/kmp_global.c` `index aa1f8e3..d3c3195 100644` `--- a/runtime/src/kmp_global.c` `+++ b/runtime/src/kmp_global.c` `@@ -321,7 +321,11 @@ int __kmp_env_consistency_check = FALSE; /* KMP_CONSISTENCY_CHECK speci` `kmp_uint32 __kmp_yield_init = KMP_INIT_WAIT;` `kmp_uint32 __kmp_yield_next = KMP_NEXT_WAIT;` `kmp_uint32 __kmp_yielding_on = 1;` `+#if KMP_OS_CNK` `+kmp_uint32 __kmp_yield_cycle = 0;` `+#else` `kmp_uint32 __kmp_yield_cycle = 1; /* Yield-cycle is on by default */` `+#endif` `kmp_int32 __kmp_yield_on_count = 10; /* By default, yielding is on for 10 monitor periods. */` `kmp_int32 __kmp_yield_off_count = 1; /* By default, yielding is off for 1 monitor periods. */` `/* ----------------------------------------------------- */` `diff --git a/runtime/src/kmp_gsupport.c b/runtime/src/kmp_gsupport.c` `index 9d8e553..aa52024 100644` `--- a/runtime/src/kmp_gsupport.c` `+++ b/runtime/src/kmp_gsupport.c` `@@ -15,7 +15,7 @@` `//===----------------------------------------------------------------------===//`
`-#if defined(__x86_64)` `+#if defined(__x86_64) || defined (__powerpc64__)` `# define KMP_I8` `#endif` `#include "kmp.h"` `diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h` `index 8009d18..c5ce838 100644` `--- a/runtime/src/kmp_lock.h` `+++ b/runtime/src/kmp_lock.h` `@@ -518,7 +518,7 @@ __kmp_destroy_bootstrap_lock( kmp_bootstrap_lock_t *lck )` `// Internal RTL locks are also implemented as ticket locks, for now.` `//` `// FIXME - We should go through and figure out which lock kind works best for` `-// each internal lock, and use the type deeclaration and function calls for` `+// each internal lock, and use the type declaration and function calls for` `// that explicit lock kind (and get rid of this section).` `//`
diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
index bb5e72f..db1981e 100644
--- a/runtime/src/kmp_os.h
+++ b/runtime/src/kmp_os.h
@@ -66,10 +66,12 @@
#define KMP_OS_FREEBSD 0
#define KMP_OS_DARWIN 0
#define KMP_OS_WINDOWS 0
+#define KMP_OS_CNK 0
#define KMP_OS_UNIX 0 /* disjunction of KMP_OS_LINUX, KMP_OS_DARWIN etc. */
`#define KMP_ARCH_X86 0` `#define KMP_ARCH_X86_64 0` `+#define KMP_ARCH_PPC64 0`
#ifdef _WIN32
# undef KMP_OS_WINDOWS
@@ -91,6 +93,11 @@
# define KMP_OS_FREEBSD 1
#endif
`+#if ( defined __bgq__ )` `+# undef KMP_OS_CNK` `+# define KMP_OS_CNK 1` `+#endif` `+` `#if (1 != KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS)` `# error Unknown OS` `#endif` `@@ -117,6 +124,9 @@` `# elif defined __i386` `# undef KMP_ARCH_X86` `# define KMP_ARCH_X86 1` `+# elif defined __powerpc64__` `+# undef KMP_ARCH_PPC64` `+# define KMP_ARCH_PPC64 1` `# endif` `#endif`
@@ -156,7 +166,7 @@
# define KMP_ARCH_ARM 1
#endif
`-#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM)` `+#if (1 != KMP_ARCH_X86 + KMP_ARCH_X86_64 + KMP_ARCH_ARM + KMP_ARCH_PPC64)` `# error Unknown or unsupported architecture` `#endif`
@@ -232,7 +242,7 @@
`#if KMP_ARCH_X86 || KMP_ARCH_ARM` `# define KMP_SIZE_T_SPEC KMP_UINT32_SPEC` `-#elif KMP_ARCH_X86_64` `+#elif KMP_ARCH_X86_64 || KMP_ARCH_PPC64` `# define KMP_SIZE_T_SPEC KMP_UINT64_SPEC` `#else` `# error "Can't determine size_t printf format specifier."` `@@ -657,6 +667,10 @@ extern kmp_real64 __kmp_test_then_add_real64 ( volatile kmp_real64 *p, kmp_real6` `# endif` `#endif /* KMP_OS_WINDOWS */`
+#if KMP_ARCH_PPC64
+# define KMP_MB() __sync_synchronize()
+#endif
+
#ifndef KMP_MB
# define KMP_MB() /* nothing to do */
#endif
@@ -763,7 +777,7 @@ typedef void (*microtask_t)( int *gtid, int *npr, ... );
#endif /* KMP_I8 */
`/* Workaround for Intel(R) 64 code gen bug when taking address of static array (Intel(R) 64 Tracker #138) */` `-#if KMP_ARCH_X86_64 && KMP_OS_LINUX` `+#if (KMP_ARCH_X86_64 || KMP_ARCH_PPC64) && KMP_OS_LINUX` `# define STATIC_EFI2_WORKAROUND` `#else` `# define STATIC_EFI2_WORKAROUND static` `diff --git a/runtime/src/kmp_runtime.c b/runtime/src/kmp_runtime.c` `index fea41d0..d243700 100644` `--- a/runtime/src/kmp_runtime.c` `+++ b/runtime/src/kmp_runtime.c` `@@ -8450,7 +8450,7 @@ __kmp_determine_reduction_method( ident_t *loc, kmp_int32 global_tid,` `int atomic_available = FAST_REDUCTION_ATOMIC_METHOD_GENERATED;` `int tree_available = FAST_REDUCTION_TREE_METHOD_GENERATED;`
- #if KMP_ARCH_X86_64
+ #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64
`#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN` `#if KMP_MIC` `diff --git a/runtime/src/kmp_settings.c b/runtime/src/kmp_settings.c` `index 54745cb..b85678e 100644` `--- a/runtime/src/kmp_settings.c` `+++ b/runtime/src/kmp_settings.c` `@@ -536,6 +536,7 @@ __kmp_stg_parse_file(`
static char * par_range_to_print = NULL;
`+#ifdef KMP_DEBUG` `static void` `__kmp_stg_parse_par_range(` `char const * name,` `@@ -614,7 +615,7 @@ __kmp_stg_parse_par_range(` `break;` `}` `} // __kmp_stg_parse_par_range` `-` `+#endif`
int
__kmp_initial_threads_capacity( int req_nproc )
diff --git a/runtime/src/kmp_version.c b/runtime/src/kmp_version.c
index f64d052..5dc82d1 100644
--- a/runtime/src/kmp_version.c
+++ b/runtime/src/kmp_version.c
@@ -20,7 +20,7 @@
#include "kmp_version.h"
`// Replace with snapshot date YYYYMMDD for promotion build.` `-#define KMP_VERSION_BUILD 00000000` `+//#define KMP_VERSION_BUILD 00000000`
// Helper macros to convert value of macro to string literal.
#define _stringer( x ) #x
diff --git a/runtime/src/makefile.mk b/runtime/src/makefile.mk
index 9be8799..84d6ac2 100644
--- a/runtime/src/makefile.mk
+++ b/runtime/src/makefile.mk
@@ -310,6 +310,9 @@ endif
ifeq "$(CPLUSPLUS)" "on"
ifeq "$(os)" "win"
c-flags += -TP
+ else ifeq "$(arch)" "ppc64"
+ # c++0x on ppc64 linux removes definition of preproc. macros, needed in .hs
+ c-flags += -x c++ -std=gnu++0x
else
ifneq "$(filter gcc clang,$(c))" ""
c-flags += -x c++ -std=c++0x
@@ -370,7 +373,7 @@ ifeq "$(os)" "lin"
ld-flags-extra += -lirc_pic
endif
endif
- ifeq "$(filter 32 32e 64,$(arch))" ""
+ ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
ld-flags-extra += $(shell pkg-config --libs libffi)
endif
else
@@ -581,9 +584,12 @@ ifneq "$(os)" "win"
ifeq "$(arch)" "arm"
z_Linux_asm$(obj) : \
cpp-flags += -D KMP_ARCH_ARM
- else
+ else ifeq "$(arch)" "ppc64"
+ z_Linux_asm$(obj) : \
+ cpp-flags += -D KMP_ARCH_PPC64
+ else
z_Linux_asm$(obj) : \
- cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
+ cpp-flags += -D KMP_ARCH_X86$(if $(filter 32e,$(arch)),_64)
endif
endif
`@@ -729,7 +735,9 @@ endif` `else # 5` `lib_c_items += kmp_gsupport` `endif` `+# ifneq "$(arch)" "ppc64"` `lib_asm_items += z_Linux_asm` `+# endif` `endif` `endif`
@@ -1391,9 +1399,13 @@ ifneq "$(filter %-dyna win-%,$(os)-$(LINK_TYPE))" ""
td_exp += libc.so.6
td_exp += ld-linux-armhf.so.3
endif
+ ifeq "$(arch)" "ppc64"
+ td_exp += libc.so.6
+ td_exp += ld64.so.1
+ endif
td_exp += libdl.so.2
td_exp += libgcc_s.so.1
- ifeq "$(filter 32 32e 64,$(arch))" ""
+ ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
td_exp += libffi.so.6
td_exp += libffi.so.5
endif
diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_config.h b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
index 40c8614..9e7b36b 100644
--- a/runtime/src/thirdparty/ittnotify/ittnotify_config.h
+++ b/runtime/src/thirdparty/ittnotify/ittnotify_config.h
@@ -132,6 +132,11 @@
# define ITT_ARCH_ARM 4
#endif /* ITT_ARCH_ARM */
`+#ifndef ITT_ARCH_PPC64` `+# define ITT_ARCH_PPC64 5` `+#endif /* ITT_ARCH_PPC64 */` `+` `+` `#ifndef ITT_ARCH` `# if defined _M_IX86 || defined __i386__` `# define ITT_ARCH ITT_ARCH_IA32` `@@ -141,6 +146,8 @@` `# define ITT_ARCH ITT_ARCH_IA64` `# elif defined _M_ARM || __arm__` `# define ITT_ARCH ITT_ARCH_ARM` `+# elif defined __powerpc64__` `+# define ITT_ARCH ITT_ARCH_PPC64` `# endif` `#endif`
@@ -274,7 +281,7 @@ ITT_INLINE long __TBB_machine_fetchadd4(volatile void* ptr, long addend)
: "memory");
return result;
}
-#elif ITT_ARCH==ITT_ARCH_ARM
+#elif ITT_ARCH==ITT_ARCH_ARM || ITT_ARCH==ITT_ARCH_PPC64
#define __TBB_machine_fetchadd4(addr, val) __sync_fetch_and_add(addr, val)
#endif /* ITT_ARCH==ITT_ARCH_IA64 */
#ifndef ITT_SIMPLE_INIT
diff --git a/runtime/src/z_Linux_asm.s b/runtime/src/z_Linux_asm.s
index 1f1ba1b..64c8052 100644
--- a/runtime/src/z_Linux_asm.s
+++ b/runtime/src/z_Linux_asm.s
@@ -138,7 +138,7 @@ __kmp_unnamed_critical_addr:
#endif /* KMP_GOMP_COMPAT */

-#if KMP_ARCH_X86
+#if KMP_ARCH_X86 && !KMP_ARCH_PPC64
`// -----------------------------------------------------------------------` `// microtasking routines specifically written for IA-32 architecture` `@@ -1585,6 +1585,16 @@ __kmp_unnamed_critical_addr:` `.size __kmp_unnamed_critical_addr,4` `#endif /* KMP_ARCH_ARM */`
+#if KMP_ARCH_PPC64
+ .data
+ .comm .gomp_critical_user_,32,8
+ .data
+ .align 8
+ .global __kmp_unnamed_critical_ad dr
+__kmp_unnamed_critical_addr:
+ .8byte .gomp_critical_user_
+ .size __kmp_unnamed_critical_addr,8
+#endif /* KMP_ARCH_PPC64 */
`#if defined(__linux__)` `.section .note.GNU-stack,"",@progbits` `diff --git a/runtime/src/z_Linux_util.c b/runtime/src/z_Linux_util.c` `index 7633f99..348f5d8 100644` `--- a/runtime/src/z_Linux_util.c` `+++ b/runtime/src/z_Linux_util.c` `@@ -32,7 +32,7 @@` `#include <sys/resource.h>` `#include <sys/syscall.h>`
-#if KMP_OS_LINUX
+#if KMP_OS_LINUX && !KMP_OS_CNK
# include <sys/sysinfo.h>
# if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
// We should really include <futex.h>, but that causes compatibility problems on different
@@ -61,7 +61,7 @@
#include <fcntl.h>
`// For non-x86 architecture` `-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)` `+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)` `# include <stdbool.h>` `# include <ffi.h>` `#endif` `@@ -110,7 +110,7 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond )` `/* ------------------------------------------------------------------------ */` `/* ------------------------------------------------------------------------ */`
-#if KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
+#if ( KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED)
`/*` `* Affinity support` `@@ -147,6 +147,19 @@ __kmp_print_cond( char *buffer, kmp_cond_align_t *cond )` `# error Wrong code for getaffinity system call.` `# endif /* __NR_sched_getaffinity */`
+# elif KMP_ARCH_PPC64
+# ifndef __NR_sched_setaffinity
+# define __NR_sched_setaffinity 222
+# elif __NR_sched_setaffinity != 222
+# error Wrong code for setaffinity system call.
+# endif /* __NR_sched_setaffinity */
+# ifndef __NR_sched_getaffinity
+# define __NR_sched_getaffinity 223
+# elif __NR_sched_getaffinity != 223
+# error Wrong code for getaffinity system call.
+# endif /* __NR_sched_getaffinity */
+
+
# else
# error Unknown or unsupported architecture
`@@ -445,7 +458,7 @@ __kmp_change_thread_affinity_mask( int gtid, kmp_affin_mask_t *new_mask,` `/* ------------------------------------------------------------------------ */` `/* ------------------------------------------------------------------------ */`
-#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#if KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK
`int` `__kmp_futex_determine_capable()` `@@ -462,7 +475,7 @@ __kmp_futex_determine_capable()` `return retval;` `}`
-#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM)
+#endif // KMP_OS_LINUX && (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM) && !KMP_OS_CNK
`/* ------------------------------------------------------------------------ */` `/* ------------------------------------------------------------------------ */` `@@ -481,7 +494,7 @@ __kmp_test_then_or32( volatile kmp_int32 *p, kmp_int32 d )` `old_value = TCR_4( *p );` `new_value = old_value | d;`
- while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_4( *p );
@@ -498,7 +511,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )
old_value = TCR_4( *p );
new_value = old_value & d;
`- while ( ! __kmp_compare_and_store32 ( p, old_value, new_value ) )` `+ while ( ! KMP_COMPARE_AND_STORE_REL32 ( p, old_value, new_value ) )` `{` `KMP_CPU_PAUSE();` `old_value = TCR_4( *p );` `@@ -507,7 +520,7 @@ __kmp_test_then_and32( volatile kmp_int32 *p, kmp_int32 d )` `return old_value;` `}`
-# if KMP_ARCH_X86
+# if KMP_ARCH_X86 || KMP_ARCH_PPC64
kmp_int64
__kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
{
@@ -516,7 +529,7 @@ __kmp_test_then_add64( volatile kmp_int64 *p, kmp_int64 d )
old_value = TCR_8( *p );
new_value = old_value + d;
`- while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )` `+ while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )` `{` `KMP_CPU_PAUSE();` `old_value = TCR_8( *p );` `@@ -533,7 +546,7 @@ __kmp_test_then_or64( volatile kmp_int64 *p, kmp_int64 d )`
old_value = TCR_8( *p );
new_value = old_value | d;
- while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )
+ while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )
{
KMP_CPU_PAUSE();
old_value = TCR_8( *p );
@@ -549,7 +562,7 @@ __kmp_test_then_and64( volatile kmp_int64 *p, kmp_int64 d )
`old_value = TCR_8( *p );` `new_value = old_value & d;` `- while ( ! __kmp_compare_and_store64 ( p, old_value, new_value ) )` `+ while ( ! KMP_COMPARE_AND_STORE_REL64 ( p, old_value, new_value ) )` `{` `KMP_CPU_PAUSE();` `old_value = TCR_8( *p );` `@@ -2527,7 +2540,7 @@ __kmp_get_load_balance( int max )` `#endif // USE_LOAD_BALANCE`
`-#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)` `+#if KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)`
int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
void *p_argv[] )
@@ -2561,7 +2574,89 @@ int __kmp_invoke_microtask( microtask_t pkfn, int gtid, int tid, int argc,
return 1;
}
`-#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64)` `+#endif // KMP_COMPILER_GCC && !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_PPC64)` `+` `+#if KMP_ARCH_PPC64` `+` `+// we really only need the case with 1 argument, because CLANG always build` `+// a struct of pointers to shared variables referenced in the outlined function` `+int` `+__kmp_invoke_microtask( microtask_t pkfn,` `+ int gtid, int tid,` `+ int argc, void *p_argv[] ) {` `+ switch (argc) {` `+ default:` `+ fprintf(stderr, "Too many args to microtask: %d!\n", argc);` `+ fflush(stderr);` `+ exit(-1);` `+ case 0:` `+ (*pkfn)(&gtid, &tid);` `+ break;` `+ case 1:` `+ (*pkfn)(&gtid, &tid, p_argv[0]);` `+ break;` `+ case 2:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);` `+ break;` `+ case 3:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);` `+ break;` `+ case 4:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);` `+ break;` `+ case 5:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);` `+ break;` `+ case 6:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5]);` `+ break;` `+ case 7:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6]);` `+ break;` `+ case 8:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7]);` `+ break;` `+ case 9:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8]);` `+ break;` `+ case 10:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);` `+ break;` `+ case 11:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);` `+ break;` `+ case 12:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],` `+ p_argv[11]);` `+ break;` `+ case 13:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],` `+ p_argv[11], p_argv[12]);` `+ break;` `+ case 14:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],` `+ p_argv[11], p_argv[12], p_argv[13]);` `+ break;` `+ case 15:` `+ (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],` `+ p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],` `+ p_argv[11], p_argv[12], p_argv[13], p_argv[14]);` `+ break;` `+ }` `+` `+ return 1;` `+}` `+` `+#endif`
// end of file //
`diff --git a/runtime/tools/lib/Platform.pm b/runtime/tools/lib/Platform.pm` `index 5b399f7..763f3cb 100644` `--- a/runtime/tools/lib/Platform.pm` `+++ b/runtime/tools/lib/Platform.pm` `@@ -50,6 +50,8 @@ sub canon_arch($) {` `$arch = "32e";` `} elsif ( $arch =~ m{\Aarm(?:v7\D*)?\z} ) {` `$arch = "arm";` `+ } elsif ( $arch =~ m{\Appc64} ) {` `+ $arch = "ppc64";` `} else {` `$arch = undef;` `}; # if` `@@ -62,6 +64,7 @@ sub canon_arch($) {` `"32" => "IA-32 architecture",` `"32e" => "Intel(R) 64",` `"arm" => "ARM",` `+ "ppc64" => "PPC64",` `);`
sub legal_arch($) {
@@ -159,6 +162,8 @@ sub target_options() {
$_host_arch = "32e";
} elsif ( $hardware_platform eq "arm" ) {
$_host_arch = "arm";
+ } elsif ( $hardware_platform eq "ppc64" ) {
+ $_host_arch = "ppc64";
} else {
die "Unsupported host hardware platform: \"$hardware_platform\"; stopped";
}; # if
diff --git a/runtime/tools/lib/Uname.pm b/runtime/tools/lib/Uname.pm
index e212501..52518b4 100644
--- a/runtime/tools/lib/Uname.pm
+++ b/runtime/tools/lib/Uname.pm
@@ -147,6 +147,8 @@ if ( 0 ) {
$values{ hardware_platform } = "x86_64";
} elsif ( $values{ machine } =~ m{\Aarmv7\D*\z} ) {
$values{ hardware_platform } = "arm";
+ } elsif ( $values{ machine } =~ m{\Appc64\z} ) {
+ $values{ hardware_platform } = "ppc64";
} else {
die "Unsupported machine (\"$values{ machine }\") returned by POSIX::uname(); stopped";
}; # if
diff --git a/runtime/tools/src/common-defs.mk b/runtime/tools/src/common-defs.mk
index ebd1922..7eb64b0 100644
--- a/runtime/tools/src/common-defs.mk
+++ b/runtime/tools/src/common-defs.mk
@@ -45,7 +45,7 @@ endif
# Description:
# The function return printable name of specified architecture, IA-32 architecture or Intel(R) 64.
#
-legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(error Bad architecture specified: $(1))))))
+legal_arch = $(if $(filter 32,$(1)),IA-32,$(if $(filter 32e,$(1)),Intel(R) 64,$(if $(filter l1,$(1)),L1OM,$(if $(filter arm,$(1)),ARM,$(if $(filter ppc64,$(1)),PPC64,$(error Bad architecture specified: $(1)))))))
`# Synopsis:` `# var_name = $(call check_variable,var,list)` `@@ -128,9 +128,9 @@ endif` `# --------------------------------------------------------------------------------------------------`
os := $(call check_variable,os,lin lrb mac win)
-arch := $(call check_variable,arch,32 32e 64 arm)
+arch := $(call check_variable,arch,32 32e 64 arm ppc64)
platform := $(os)_$(arch)
-platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm lrb_32e mac_32 mac_32e win_32 win_32e win_64)
+platform := $(call check_variable,platform,lin_32 lin_32e lin_64 lin_arm lrb_32e mac_32 mac_32e win_32 win_32e win_64 lin_ppc64)
# oa-opts means "os and arch options". They are passed to almost all perl scripts.
oa-opts := --os=$(os) --arch=$(arch)
``
diff --git a/runtime/tools/src/common-tools.mk b/runtime/tools/src/common-tools.mk
index a9c9fbc..8c86791 100644
--- a/runtime/tools/src/common-tools.mk
+++ b/runtime/tools/src/common-tools.mk
@@ -34,7 +34,7 @@
# "No rule to build .\kmp_i18n.inc". Using "./" solves the problem.
cpp-flags += -I ./
# For non-x86 architecture
-ifeq "$(filter 32 32e 64,$(arch))" ""
+ifeq "$(filter 32 32e 64 ppc64,$(arch))" ""
cpp-flags += $(shell pkg-config --cflags libffi)
endif
# Add all VPATH directories to path for searching include files.