aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPirama Arumuga Nainar <pirama@google.com>2019-01-14 11:00:28 -0800
committerPirama Arumuga Nainar <pirama@google.com>2019-01-14 11:00:28 -0800
commit4a3ebbfc8cacc0368a4a61943b5ea2bbfdc3e5f5 (patch)
tree3978b4513cf680d08677e434dadde447d70d9208
parent985edb5b045dcb0dd4beb807d882a0220324b520 (diff)
parent7a5b7589b92524c0d141ec70ab7435d9897972ed (diff)
downloadopenmp_llvm-4a3ebbfc8cacc0368a4a61943b5ea2bbfdc3e5f5.tar.gz
Merge 7a5b758 for LLVM update to 349610
Change-Id: I1ba0bbe7b606a2539855c4eb67804d1001bd8b0b
-rw-r--r--cmake/HandleOpenMPOptions.cmake5
-rw-r--r--cmake/config-ix.cmake3
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/cancel.cu8
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/critical.cu10
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/data_sharing.cu69
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/debug.h26
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/interface.h167
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/libcall.cu25
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/loop.cu338
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu10
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h2
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/parallel.cu50
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/reduction.cu43
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/supporti.h62
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/sync.cu51
-rw-r--r--libomptarget/deviceRTLs/nvptx/src/task.cu48
-rw-r--r--libomptarget/src/omptarget.cpp11
-rw-r--r--runtime/cmake/LibompHandleFlags.cmake6
-rw-r--r--runtime/cmake/LibompMicroTests.cmake3
-rw-r--r--runtime/cmake/config-ix.cmake17
-rw-r--r--runtime/src/dllexports9
-rw-r--r--runtime/src/i18n/en_US.txt1
-rw-r--r--runtime/src/include/50/omp.h.var11
-rw-r--r--runtime/src/include/50/omp_lib.f.var37
-rw-r--r--runtime/src/include/50/omp_lib.f90.var21
-rw-r--r--runtime/src/include/50/omp_lib.h.var29
-rw-r--r--runtime/src/include/50/ompt.h.var99
-rw-r--r--runtime/src/kmp.h59
-rw-r--r--runtime/src/kmp_affinity.cpp140
-rw-r--r--runtime/src/kmp_affinity.h10
-rw-r--r--runtime/src/kmp_atomic.h6
-rw-r--r--runtime/src/kmp_barrier.cpp28
-rw-r--r--runtime/src/kmp_config.h.cmake2
-rw-r--r--runtime/src/kmp_csupport.cpp267
-rw-r--r--runtime/src/kmp_dispatch.cpp12
-rw-r--r--runtime/src/kmp_ftn_entry.h202
-rw-r--r--runtime/src/kmp_ftn_os.h16
-rw-r--r--runtime/src/kmp_global.cpp5
-rw-r--r--runtime/src/kmp_gsupport.cpp90
-rw-r--r--runtime/src/kmp_io.cpp64
-rw-r--r--runtime/src/kmp_io.h3
-rw-r--r--runtime/src/kmp_lock.cpp14
-rw-r--r--runtime/src/kmp_lock.h17
-rw-r--r--runtime/src/kmp_os.h16
-rw-r--r--runtime/src/kmp_platform.h21
-rw-r--r--runtime/src/kmp_runtime.cpp496
-rw-r--r--runtime/src/kmp_safe_c_api.h17
-rw-r--r--runtime/src/kmp_settings.cpp52
-rw-r--r--runtime/src/kmp_str.cpp29
-rw-r--r--runtime/src/kmp_str.h7
-rw-r--r--runtime/src/kmp_stub.cpp15
-rw-r--r--runtime/src/kmp_taskdeps.cpp12
-rw-r--r--runtime/src/kmp_tasking.cpp73
-rw-r--r--runtime/src/kmp_utility.cpp4
-rw-r--r--runtime/src/kmp_wait_release.h40
-rw-r--r--runtime/src/kmp_wrapper_getpid.h5
-rw-r--r--runtime/src/kmp_wrapper_malloc.h4
-rw-r--r--runtime/src/libomp.rc.var2
-rw-r--r--runtime/src/ompt-general.cpp34
-rw-r--r--runtime/src/ompt-internal.h6
-rw-r--r--runtime/src/ompt-specific.cpp12
-rw-r--r--runtime/src/ompt-specific.h4
-rw-r--r--runtime/src/thirdparty/ittnotify/ittnotify_static.c19
-rw-r--r--runtime/src/z_Linux_util.cpp75
-rw-r--r--runtime/src/z_Windows_NT_util.cpp1
-rw-r--r--runtime/test/affinity/format/affinity_display.1.c92
-rw-r--r--runtime/test/affinity/format/affinity_values.c135
-rw-r--r--runtime/test/affinity/format/api.c56
-rw-r--r--runtime/test/affinity/format/api2.c84
-rw-r--r--runtime/test/affinity/format/check.py73
-rw-r--r--runtime/test/affinity/format/fields_modifiers.c117
-rw-r--r--runtime/test/affinity/format/fields_values.c152
-rw-r--r--runtime/test/affinity/format/increase.c36
-rw-r--r--runtime/test/affinity/format/lit.local.cfg2
-rw-r--r--runtime/test/affinity/format/nested.c23
-rw-r--r--runtime/test/affinity/format/nested2.c29
-rw-r--r--runtime/test/affinity/format/nested_mixed.c46
-rw-r--r--runtime/test/affinity/format/nested_serial.c35
-rw-r--r--runtime/test/affinity/format/proc_bind.c31
-rw-r--r--runtime/test/affinity/format/simple.c27
-rw-r--r--runtime/test/affinity/format/simple_env.c16
-rw-r--r--runtime/test/api/omp_alloc.c3
-rw-r--r--runtime/test/api/omp_get_wtick.c2
-rw-r--r--runtime/test/api/omp_in_parallel.c5
-rw-r--r--runtime/test/flush/omp_flush.c5
-rw-r--r--runtime/test/lit.cfg15
-rw-r--r--runtime/test/lit.site.cfg.in2
-rwxr-xr-xruntime/test/ompt/callback.h32
-rw-r--r--runtime/test/ompt/misc/api_calls_from_other_thread.cpp4
-rw-r--r--runtime/test/ompt/misc/api_calls_misc.c4
-rw-r--r--runtime/test/ompt/misc/api_calls_places.c2
-rw-r--r--runtime/test/ompt/misc/control_tool.c3
-rw-r--r--runtime/test/ompt/misc/control_tool_no_ompt_support.c3
-rw-r--r--runtime/test/ompt/misc/interoperability.cpp4
-rw-r--r--runtime/test/ompt/parallel/nested.c12
-rw-r--r--runtime/test/ompt/parallel/nested_thread_num.c12
-rw-r--r--runtime/test/ompt/parallel/nested_threadnum.c62
-rw-r--r--runtime/test/ompt/synchronization/taskwait.c1
-rw-r--r--runtime/test/ompt/tasks/explicit_task.c18
-rw-r--r--runtime/test/ompt/tasks/serialized.c12
-rw-r--r--runtime/test/ompt/tasks/task_in_joinbarrier.c10
-rw-r--r--runtime/test/ompt/tasks/untied_task.c16
-rw-r--r--runtime/test/parallel/omp_nested.c2
-rw-r--r--runtime/test/tasking/bug_nested_proxy_task.c1
-rw-r--r--runtime/test/tasking/bug_proxy_task_dep_waiting.c1
-rw-r--r--runtime/test/tasking/kmp_task_reduction_nest.cpp1
-rw-r--r--runtime/test/tasking/kmp_taskloop.c1
-rw-r--r--runtime/test/tasking/omp_task.c3
-rw-r--r--runtime/test/tasking/omp_task_priority.c1
-rw-r--r--runtime/test/tasking/omp_taskloop_grainsize.c1
-rw-r--r--runtime/test/tasking/omp_taskloop_num_tasks.c1
-rw-r--r--runtime/test/tasking/omp_taskyield.c3
-rw-r--r--runtime/test/worksharing/for/kmp_doacross_check.c1
-rw-r--r--runtime/test/worksharing/for/kmp_sch_simd_guided.c1
-rw-r--r--runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c4
-rw-r--r--runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c4
-rw-r--r--runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c4
-rw-r--r--runtime/test/worksharing/for/omp_doacross.c3
118 files changed, 3234 insertions, 1019 deletions
diff --git a/cmake/HandleOpenMPOptions.cmake b/cmake/HandleOpenMPOptions.cmake
index 5e5215d..97b616e 100644
--- a/cmake/HandleOpenMPOptions.cmake
+++ b/cmake/HandleOpenMPOptions.cmake
@@ -13,4 +13,7 @@ if (${OPENMP_ENABLE_WERROR})
append_if(OPENMP_HAVE_WERROR_FLAG "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS)
endif()
-append_if(OPENMP_HAVE_STD_CPP11_FLAG "-std=c++11" CMAKE_CXX_FLAGS) \ No newline at end of file
+append_if(OPENMP_HAVE_STD_GNUPP11_FLAG "-std=gnu++11" CMAKE_CXX_FLAGS)
+if (NOT OPENMP_HAVE_STD_GNUPP11_FLAG)
+ append_if(OPENMP_HAVE_STD_CPP11_FLAG "-std=c++11" CMAKE_CXX_FLAGS)
+endif()
diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake
index 912cbd0..13eace9 100644
--- a/cmake/config-ix.cmake
+++ b/cmake/config-ix.cmake
@@ -3,4 +3,5 @@ include(CheckCXXCompilerFlag)
check_c_compiler_flag(-Werror OPENMP_HAVE_WERROR_FLAG)
-check_cxx_compiler_flag(-std=c++11 OPENMP_HAVE_STD_CPP11_FLAG) \ No newline at end of file
+check_cxx_compiler_flag(-std=gnu++11 OPENMP_HAVE_STD_GNUPP11_FLAG)
+check_cxx_compiler_flag(-std=c++11 OPENMP_HAVE_STD_CPP11_FLAG)
diff --git a/libomptarget/deviceRTLs/nvptx/src/cancel.cu b/libomptarget/deviceRTLs/nvptx/src/cancel.cu
index 77033db..9f92e2d 100644
--- a/libomptarget/deviceRTLs/nvptx/src/cancel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/cancel.cu
@@ -13,16 +13,16 @@
#include "omptarget-nvptx.h"
-EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal) {
- PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", cancelVal);
+ PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", (int)cancelVal);
// disabled
return FALSE;
}
-EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal) {
- PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", cancelVal);
+ PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", (int)cancelVal);
// disabled
return FALSE;
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/critical.cu b/libomptarget/deviceRTLs/nvptx/src/critical.cu
index fef8101..9bf2a30 100644
--- a/libomptarget/deviceRTLs/nvptx/src/critical.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/critical.cu
@@ -16,17 +16,15 @@
#include "omptarget-nvptx.h"
EXTERN
-void __kmpc_critical(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *lck) {
PRINT0(LD_IO, "call to kmpc_critical()\n");
- omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
- omp_set_lock(teamDescr.CriticalLock());
+ omp_set_lock((omp_lock_t *)lck);
}
EXTERN
-void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *lck) {
PRINT0(LD_IO, "call to kmpc_end_critical()\n");
- omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
- omp_unset_lock(teamDescr.CriticalLock());
+ omp_unset_lock((omp_lock_t *)lck);
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
index bfb8208..f69daa1 100644
--- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu
@@ -84,7 +84,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
"Entering __kmpc_initialize_data_sharing_environment\n");
unsigned WID = getWarpId();
- DSPRINT(DSFLAG_INIT, "Warp ID: %d\n", WID);
+ DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID);
omptarget_nvptx_TeamDescr *teamDescr =
&omptarget_nvptx_threadPrivateContext->TeamContext();
@@ -95,15 +95,16 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS,
// We don't need to initialize the frame and active threads.
- DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", InitialDataSize);
- DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (long long)RootS);
+ DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", (unsigned)InitialDataSize);
+ DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (unsigned long long)RootS);
DSPRINT(DSFLAG_INIT, "Root slot data-end at: %016llx \n",
- (long long)RootS->DataEnd);
- DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n", (long long)RootS->Next);
+ (unsigned long long)RootS->DataEnd);
+ DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n",
+ (unsigned long long)RootS->Next);
DSPRINT(DSFLAG_INIT, "Shared slot ptr at: %016llx \n",
- (long long)DataSharingState.SlotPtr[WID]);
+ (unsigned long long)DataSharingState.SlotPtr[WID]);
DSPRINT(DSFLAG_INIT, "Shared stack ptr at: %016llx \n",
- (long long)DataSharingState.StackPtr[WID]);
+ (unsigned long long)DataSharingState.StackPtr[WID]);
DSPRINT0(DSFLAG_INIT, "Exiting __kmpc_initialize_data_sharing_environment\n");
}
@@ -121,8 +122,9 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
if (!IsOMPRuntimeInitialized)
return (void *)&DataSharingState;
- DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize);
- DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize);
+ DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize);
+ DSPRINT(DSFLAG, "Default Data Size %016llx\n",
+ (unsigned long long)SharingDefaultDataSize);
unsigned WID = getWarpId();
unsigned CurActiveThreads = getActiveThreadsMask();
@@ -139,11 +141,11 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
*SavedSharedFrame = FrameP;
*SavedActiveThreads = ActiveT;
- DSPRINT(DSFLAG, "Warp ID: %d\n", WID);
- DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (long long)SlotP);
- DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (long long)StackP);
+ DSPRINT(DSFLAG, "Warp ID: %u\n", WID);
+ DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (unsigned long long)SlotP);
+ DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (unsigned long long)StackP);
DSPRINT(DSFLAG, "Saved frame ptr at: %016llx \n", (long long)FrameP);
- DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT);
+ DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
// Only the warp active master needs to grow the stack.
if (IsWarpMasterActiveThread()) {
@@ -161,12 +163,16 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
const uintptr_t RequiredEndAddress =
CurrentStartAddress + (uintptr_t)SharingDataSize;
- DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize);
- DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize);
- DSPRINT(DSFLAG, "Current Start Address %016llx\n", CurrentStartAddress);
- DSPRINT(DSFLAG, "Current End Address %016llx\n", CurrentEndAddress);
- DSPRINT(DSFLAG, "Required End Address %016llx\n", RequiredEndAddress);
- DSPRINT(DSFLAG, "Active Threads %08x\n", ActiveT);
+ DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize);
+ DSPRINT(DSFLAG, "Default Data Size %016llx\n",
+ (unsigned long long)SharingDefaultDataSize);
+ DSPRINT(DSFLAG, "Current Start Address %016llx\n",
+ (unsigned long long)CurrentStartAddress);
+ DSPRINT(DSFLAG, "Current End Address %016llx\n",
+ (unsigned long long)CurrentEndAddress);
+ DSPRINT(DSFLAG, "Required End Address %016llx\n",
+ (unsigned long long)RequiredEndAddress);
+ DSPRINT(DSFLAG, "Active Threads %08x\n", (unsigned)ActiveT);
// If we require a new slot, allocate it and initialize it (or attempt to
// reuse one). Also, set the shared stack and slot pointers to the new
@@ -184,11 +190,11 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
(uintptr_t)(&ExistingSlot->Data[0]);
if (ExistingSlotSize >= NewSize) {
DSPRINT(DSFLAG, "Reusing stack slot %016llx\n",
- (long long)ExistingSlot);
+ (unsigned long long)ExistingSlot);
NewSlot = ExistingSlot;
} else {
DSPRINT(DSFLAG, "Cleaning up -failed reuse - %016llx\n",
- (long long)SlotP->Next);
+ (unsigned long long)SlotP->Next);
free(ExistingSlot);
}
}
@@ -197,7 +203,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
NewSlot = (__kmpc_data_sharing_slot *)malloc(
sizeof(__kmpc_data_sharing_slot) + NewSize);
DSPRINT(DSFLAG, "New slot allocated %016llx (data size=%016llx)\n",
- (long long)NewSlot, NewSize);
+ (unsigned long long)NewSlot, NewSize);
}
NewSlot->Next = 0;
@@ -213,7 +219,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin(
// not eliminate them because that may be used to return data.
if (SlotP->Next) {
DSPRINT(DSFLAG, "Cleaning up - old not required - %016llx\n",
- (long long)SlotP->Next);
+ (unsigned long long)SlotP->Next);
free(SlotP->Next);
SlotP->Next = 0;
}
@@ -275,8 +281,8 @@ EXTERN void __kmpc_data_sharing_environment_end(
// have other threads that will return after the current ones.
ActiveT &= ~CurActive;
- DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n", CurActive,
- ActiveT);
+ DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n",
+ (unsigned)CurActive, (unsigned)ActiveT);
if (!ActiveT) {
// No other active threads? Great, lets restore the stack.
@@ -290,10 +296,13 @@ EXTERN void __kmpc_data_sharing_environment_end(
FrameP = *SavedSharedFrame;
ActiveT = *SavedActiveThreads;
- DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n", (long long)SlotP);
- DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n", (long long)StackP);
- DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n", (long long)FrameP);
- DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT);
+ DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n",
+ (unsigned long long)SlotP);
+ DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n",
+ (unsigned long long)StackP);
+ DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n",
+ (unsigned long long)FrameP);
+ DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT);
}
}
@@ -319,7 +328,7 @@ __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID,
unsigned SourceWID = SourceThreadID / WARPSIZE;
- DSPRINT(DSFLAG, "Source warp: %d\n", SourceWID);
+ DSPRINT(DSFLAG, "Source warp: %u\n", SourceWID);
void * volatile P = DataSharingState.FramePtr[SourceWID];
DSPRINT0(DSFLAG, "Exiting __kmpc_get_data_sharing_environment_frame\n");
diff --git a/libomptarget/deviceRTLs/nvptx/src/debug.h b/libomptarget/deviceRTLs/nvptx/src/debug.h
index 9f59d66..8577c8f 100644
--- a/libomptarget/deviceRTLs/nvptx/src/debug.h
+++ b/libomptarget/deviceRTLs/nvptx/src/debug.h
@@ -127,6 +127,14 @@
#if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING
#include <stdio.h>
+#include "option.h"
+
+template <typename... Arguments>
+static NOINLINE void log(const char *fmt, Arguments... parameters) {
+ printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE),
+ (int)(threadIdx.x & 0x1F), parameters...);
+}
+
#endif
#if OMPTARGET_NVPTX_TEST
#include <assert.h>
@@ -164,16 +172,14 @@
#define PRINT0(_flag, _str) \
{ \
if (omptarget_device_environment.debug_level && DON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x, \
- threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \
+ log("<b %2d, t %4d, w %2d, l %2d>: " _str); \
} \
}
#define PRINT(_flag, _str, _args...) \
{ \
if (omptarget_device_environment.debug_level && DON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x, \
- threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \
+ log("<b %2d, t %4d, w %2d, l %2d>: " _str, _args); \
} \
}
#else
@@ -217,16 +223,14 @@
#define ASSERT0(_flag, _cond, _str) \
{ \
if (TON(_flag) && !(_cond)) { \
- printf("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n", blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \
+ log("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n"); \
assert(_cond); \
} \
}
#define ASSERT(_flag, _cond, _str, _args...) \
{ \
if (TON(_flag) && !(_cond)) { \
- printf("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \
+ log("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", _args); \
assert(_cond); \
} \
}
@@ -253,15 +257,13 @@
#define WARNING0(_flag, _str) \
{ \
if (WON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \
+ log("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str); \
} \
}
#define WARNING(_flag, _str, _args...) \
{ \
if (WON(_flag)) { \
- printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x, \
- threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \
+ log("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, _args); \
} \
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/interface.h b/libomptarget/deviceRTLs/nvptx/src/interface.h
index 7a37c04..2c2beae 100644
--- a/libomptarget/deviceRTLs/nvptx/src/interface.h
+++ b/libomptarget/deviceRTLs/nvptx/src/interface.h
@@ -160,8 +160,36 @@ typedef enum kmp_sched_t {
} kmp_sched_t;
+/*!
+ * Enum for accesseing the reserved_2 field of the ident_t struct below.
+ */
+enum {
+ /*! Bit set to 1 when in SPMD mode. */
+ KMP_IDENT_SPMD_MODE = 0x01,
+ /*! Bit set to 1 when a simplified runtime is used. */
+ KMP_IDENT_SIMPLE_RT_MODE = 0x02,
+};
+
+/*!
+ * The ident structure that describes a source location.
+ * The struct is identical to the one in the kmp.h file.
+ * We maintain the same data structure for compatibility.
+ */
+typedef int kmp_int32;
+typedef struct ident {
+ kmp_int32 reserved_1; /**< might be used in Fortran; see above */
+ kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC
+ identifies this union member */
+ kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */
+ kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */
+ char const *psource; /**< String describing the source location.
+ The string is composed of semi-colon separated fields
+ which describe the source file, the function and a pair
+ of line numbers that delimit the construct. */
+} ident_t;
+
// parallel defs
-typedef void kmp_Indent;
+typedef ident_t kmp_Ident;
typedef void (*kmp_ParFctPtr)(int32_t *global_tid, int32_t *bound_tid, ...);
typedef void (*kmp_ReductFctPtr)(void *lhsData, void *rhsData);
typedef void (*kmp_InterWarpCopyFctPtr)(void *src, int32_t warp_num);
@@ -223,28 +251,28 @@ typedef int32_t kmp_CriticalName[8];
////////////////////////////////////////////////////////////////////////////////
// query
-EXTERN int32_t __kmpc_global_num_threads(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_bound_thread_num(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_bound_num_threads(kmp_Indent *loc); // missing
-EXTERN int32_t __kmpc_in_parallel(kmp_Indent *loc); // missing
+EXTERN int32_t __kmpc_global_num_threads(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_bound_thread_num(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_bound_num_threads(kmp_Ident *loc); // missing
+EXTERN int32_t __kmpc_in_parallel(kmp_Ident *loc); // missing
// parallel
-EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc);
-EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc);
+EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t global_tid,
int32_t num_threads);
// simd
-EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t global_tid,
int32_t simd_limit);
// aee ... not supported
-// EXTERN void __kmpc_fork_call(kmp_Indent *loc, int32_t argc, kmp_ParFctPtr
+// EXTERN void __kmpc_fork_call(kmp_Ident *loc, int32_t argc, kmp_ParFctPtr
// microtask, ...);
-EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
+EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
uint32_t global_tid);
-EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid);
+EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid);
// proc bind
-EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t global_tid,
int proc_bind);
EXTERN int omp_get_num_places(void);
EXTERN int omp_get_place_num_procs(int place_num);
@@ -254,52 +282,52 @@ EXTERN int omp_get_partition_num_places(void);
EXTERN void omp_get_partition_place_nums(int *place_nums);
// for static (no chunk or chunk)
-EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
-EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
-EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
-EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter1,
uint64_t *plower, uint64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
EXTERN
-void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
EXTERN
-void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk);
EXTERN
-void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
EXTERN
-void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t *plastiter1,
uint64_t *plower, uint64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk);
EXTERN
-void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc,
+void __kmpc_for_static_init_4_simple_generic(kmp_Ident *loc,
int32_t global_tid, int32_t sched,
int32_t *plastiter,
int32_t *plower, int32_t *pupper,
@@ -307,11 +335,11 @@ void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc,
int32_t chunk);
EXTERN
void __kmpc_for_static_init_4u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
int32_t chunk);
EXTERN
-void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc,
+void __kmpc_for_static_init_8_simple_generic(kmp_Ident *loc,
int32_t global_tid, int32_t sched,
int32_t *plastiter,
int64_t *plower, int64_t *pupper,
@@ -319,48 +347,48 @@ void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc,
int64_t chunk);
EXTERN
void __kmpc_for_static_init_8u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
+ kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1,
uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
int64_t chunk);
-EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid);
// for dynamic
-EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int32_t lower, int32_t upper,
int32_t incr, int32_t chunk);
-EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, uint32_t lower,
uint32_t upper, int32_t incr,
int32_t chunk);
-EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t global_tid,
int32_t sched, int64_t lower, int64_t upper,
int64_t incr, int64_t chunk);
-EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t global_tid,
int32_t sched, uint64_t lower,
uint64_t upper, int64_t incr,
int64_t chunk);
-EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, int32_t *plower,
int32_t *pupper, int32_t *pstride);
-EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, uint32_t *plower,
uint32_t *pupper, int32_t *pstride);
-EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, int64_t *plower,
int64_t *pupper, int64_t *pstride);
-EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t global_tid,
int32_t *plastiter, uint64_t *plower,
uint64_t *pupper, int64_t *pstride);
-EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t global_tid);
// Support for reducing conditional lastprivate variables
-EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc,
+EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc,
int32_t global_tid,
int32_t varNum, void *array);
@@ -391,67 +419,73 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple_generic(
int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data,
kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct,
kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct);
+EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
+ int32_t global_tid,
+ kmp_CriticalName *crit);
+EXTERN void __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc,
+ int32_t global_tid,
+ kmp_CriticalName *crit);
EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size);
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size);
// sync barrier
-EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid);
-EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid);
-EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid);
-EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc, int32_t global_tid);
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid);
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid);
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid);
// single
-EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid);
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid);
// sync
-EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t global_tid);
-EXTERN void __kmpc_critical(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t global_tid);
+EXTERN void __kmpc_critical(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit);
-EXTERN void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid,
kmp_CriticalName *crit);
-EXTERN void __kmpc_flush(kmp_Indent *loc);
+EXTERN void __kmpc_flush(kmp_Ident *loc);
// vote
EXTERN int32_t __kmpc_warp_active_thread_mask();
// tasks
-EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Indent *loc,
+EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc,
uint32_t global_tid, int32_t flag,
size_t sizeOfTaskInclPrivate,
size_t sizeOfSharedTable,
kmp_TaskFctPtr sub);
-EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr,
int32_t depNum, void *depList,
int32_t noAliasDepNum,
void *noAliasDepList);
-EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newLegacyTaskDescr);
-EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList);
-EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid);
-EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid);
+EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
int end_part);
-EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid);
-EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid);
+EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr, int if_val,
uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
int32_t sched, uint64_t grainsize, void *task_dup);
// cancel
-EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal);
-EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid,
+EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid,
int32_t cancelVal);
// non standard
@@ -460,7 +494,8 @@ EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime);
EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized);
EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
int16_t RequiresDataSharing);
-EXTERN void __kmpc_spmd_kernel_deinit();
+EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit();
+EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime);
EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
int16_t IsOMPRuntimeInitialized);
EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
index ea9225d..91b270c 100644
--- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu
@@ -222,9 +222,11 @@ EXTERN int omp_get_ancestor_thread_num(int level) {
" chunk %" PRIu64 "; tid %d, tnum %d, nthreads %d\n",
"ancestor", steps,
(currTaskDescr->IsParallelConstruct() ? "par" : "task"),
- currTaskDescr->InParallelRegion(), sched,
- currTaskDescr->RuntimeChunkSize(), currTaskDescr->ThreadId(),
- currTaskDescr->ThreadsInTeam(), currTaskDescr->NThreads());
+ (int)currTaskDescr->InParallelRegion(), (int)sched,
+ currTaskDescr->RuntimeChunkSize(),
+ (int)currTaskDescr->ThreadId(),
+ (int)currTaskDescr->ThreadsInTeam(),
+ (int)currTaskDescr->NThreads());
}
if (currTaskDescr->IsParallelConstruct()) {
@@ -404,23 +406,21 @@ EXTERN int omp_get_max_task_priority(void) {
#define SET 1
EXTERN void omp_init_lock(omp_lock_t *lock) {
- *lock = UNSET;
+ omp_unset_lock(lock);
PRINT0(LD_IO, "call omp_init_lock()\n");
}
EXTERN void omp_destroy_lock(omp_lock_t *lock) {
+ omp_unset_lock(lock);
PRINT0(LD_IO, "call omp_destroy_lock()\n");
}
EXTERN void omp_set_lock(omp_lock_t *lock) {
// int atomicCAS(int* address, int compare, int val);
// (old == compare ? val : old)
- int compare = UNSET;
- int val = SET;
// TODO: not sure spinning is a good idea here..
- while (atomicCAS(lock, compare, val) != UNSET) {
-
+ while (atomicCAS(lock, UNSET, SET) != UNSET) {
clock_t start = clock();
clock_t now;
for (;;) {
@@ -436,9 +436,7 @@ EXTERN void omp_set_lock(omp_lock_t *lock) {
}
EXTERN void omp_unset_lock(omp_lock_t *lock) {
- int compare = SET;
- int val = UNSET;
- int old = atomicCAS(lock, compare, val);
+ (void)atomicExch(lock, UNSET);
PRINT0(LD_IO, "call omp_unset_lock()\n");
}
@@ -446,10 +444,7 @@ EXTERN void omp_unset_lock(omp_lock_t *lock) {
EXTERN int omp_test_lock(omp_lock_t *lock) {
// int atomicCAS(int* address, int compare, int val);
// (old == compare ? val : old)
- int compare = UNSET;
- int val = SET;
-
- int ret = atomicCAS(lock, compare, val);
+ int ret = atomicAdd(lock, 0);
PRINT(LD_IO, "call omp_test_lock() return %d\n", ret);
diff --git a/libomptarget/deviceRTLs/nvptx/src/loop.cu b/libomptarget/deviceRTLs/nvptx/src/loop.cu
index bd84f0f..c100be5 100644
--- a/libomptarget/deviceRTLs/nvptx/src/loop.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/loop.cu
@@ -93,9 +93,10 @@ public:
////////////////////////////////////////////////////////////////////////////////
// Support for Static Init
- INLINE static void for_static_init(int32_t schedtype, int32_t *plastiter,
- T *plower, T *pupper, ST *pstride,
- ST chunk, bool IsSPMDExecutionMode,
+ INLINE static void for_static_init(int32_t gtid, int32_t schedtype,
+ int32_t *plastiter, T *plower, T *pupper,
+ ST *pstride, ST chunk,
+ bool IsSPMDExecutionMode,
bool IsRuntimeUninitialized) {
// When IsRuntimeUninitialized is true, we assume that the caller is
// in an L0 parallel region and that all worker threads participate.
@@ -112,108 +113,73 @@ public:
PRINT(LD_LOOP,
"OMP Thread %d: schedule type %d, chunk size = %lld, mytid "
"%d, num tids %d\n",
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
- schedtype, P64(chunk),
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
- GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized));
- ASSERT0(
- LT_FUSSY,
- (GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized)) <
- (GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized)),
- "current thread is not needed here; error");
+ (int)gtid, (int)schedtype, (long long)chunk, (int)gtid,
+ (int)numberOfActiveOMPThreads);
+ ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads,
+ "current thread is not needed here; error");
// copy
int lastiter = 0;
T lb = *plower;
T ub = *pupper;
ST stride = *pstride;
- T entityId, numberOfEntities;
// init
switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) {
case kmp_sched_static_chunk: {
if (chunk > 0) {
- entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
- numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized);
- ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
+ numberOfActiveOMPThreads);
break;
}
} // note: if chunk <=0, use nochunk
case kmp_sched_static_balanced_chunk: {
if (chunk > 0) {
- entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
- numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized);
-
// round up to make sure the chunk is enough to cover all iterations
T tripCount = ub - lb + 1; // +1 because ub is inclusive
- T span = (tripCount + numberOfEntities - 1) / numberOfEntities;
+ T span = (tripCount + numberOfActiveOMPThreads - 1) /
+ numberOfActiveOMPThreads;
// perform chunk adjustment
chunk = (span + chunk - 1) & ~(chunk - 1);
ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
T oldUb = ub;
- ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
+ numberOfActiveOMPThreads);
if (ub > oldUb)
ub = oldUb;
break;
}
} // note: if chunk <=0, use nochunk
case kmp_sched_static_nochunk: {
- entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
- numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized);
- ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid,
+ numberOfActiveOMPThreads);
break;
}
case kmp_sched_distr_static_chunk: {
if (chunk > 0) {
- entityId = GetOmpTeamId();
- numberOfEntities = GetNumberOfOmpTeams();
- ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk, GetOmpTeamId(),
+ GetNumberOfOmpTeams());
break;
} // note: if chunk <=0, use nochunk
}
case kmp_sched_distr_static_nochunk: {
- entityId = GetOmpTeamId();
- numberOfEntities = GetNumberOfOmpTeams();
-
- ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ ForStaticNoChunk(lastiter, lb, ub, stride, chunk, GetOmpTeamId(),
+ GetNumberOfOmpTeams());
break;
}
case kmp_sched_distr_static_chunk_sched_static_chunkone: {
- entityId =
- GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized) *
- GetOmpTeamId() +
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
- numberOfEntities = GetNumberOfOmpTeams() *
- GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized);
- ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk,
+ numberOfActiveOMPThreads * GetOmpTeamId() + gtid,
+ GetNumberOfOmpTeams() * numberOfActiveOMPThreads);
break;
}
default: {
- ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", schedtype);
+ ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", (int)schedtype);
PRINT(LD_LOOP, "unknown schedtype %d, revert back to static chunk\n",
- schedtype);
- entityId =
- GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized);
- numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode,
- IsRuntimeUninitialized);
- ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId,
- numberOfEntities);
+ (int)schedtype);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid,
+ numberOfActiveOMPThreads);
+ break;
}
}
// copy back
@@ -221,13 +187,12 @@ public:
*plower = lb;
*pupper = ub;
*pstride = stride;
- PRINT(
- LD_LOOP,
- "Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last "
- "%d\n",
- GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, IsRuntimeUninitialized),
- GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride),
- lastiter);
+ PRINT(LD_LOOP,
+ "Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last "
+ "%d\n",
+ (int)numberOfActiveOMPThreads, (int)GetNumberOfWorkersInTeam(),
+ (long long)(*plower), (long long)(*pupper), (long long)(*pstride),
+ (int)lastiter);
}
////////////////////////////////////////////////////////////////////////////////
@@ -238,20 +203,17 @@ public:
schedule <= kmp_sched_ordered_last;
}
- INLINE static void dispatch_init(kmp_Indent *loc, int32_t threadId,
+ INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId,
kmp_sched_t schedule, T lb, T ub, ST st,
ST chunk) {
- ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
"Expected non-SPMD mode + initialized runtime.");
int tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid);
T tnum = currTaskDescr->ThreadsInTeam();
T tripCount = ub - lb + 1; // +1 because ub is inclusive
- ASSERT0(
- LT_FUSSY,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) <
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
- "current thread is not needed here; error");
+ ASSERT0(LT_FUSSY, threadId < tnum,
+ "current thread is not needed here; error");
/* Currently just ignore the monotonic and non-monotonic modifiers
* (the compiler isn't producing them * yet anyway).
@@ -269,7 +231,7 @@ public:
__kmpc_barrier(loc, threadId);
PRINT(LD_LOOP,
"go sequential as tnum=%ld, trip count %lld, ordered sched=%d\n",
- (long)tnum, P64(tripCount), schedule);
+ (long)tnum, (long long)tripCount, (int)schedule);
schedule = kmp_sched_static_chunk;
chunk = tripCount; // one thread gets the whole loop
} else if (schedule == kmp_sched_runtime) {
@@ -295,18 +257,20 @@ public:
break;
}
}
- PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", schedule,
- P64(chunk));
+ PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", (int)schedule,
+ (long long)chunk);
} else if (schedule == kmp_sched_auto) {
schedule = kmp_sched_static_chunk;
chunk = 1;
- PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", schedule,
- P64(chunk));
+ PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", (int)schedule,
+ (long long)chunk);
} else {
- PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", schedule, P64(chunk));
+ PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", (int)schedule,
+ (long long)chunk);
ASSERT(LT_FUSSY,
schedule == kmp_sched_dynamic || schedule == kmp_sched_guided,
- "unknown schedule %d & chunk %lld\n", schedule, P64(chunk));
+ "unknown schedule %d & chunk %lld\n", (int)schedule,
+ (long long)chunk);
}
// init schedules
@@ -319,9 +283,7 @@ public:
// compute static chunk
ST stride;
int lastiter = 0;
- ForStaticChunk(
- lastiter, lb, ub, stride, chunk,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
// save computed params
omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -329,10 +291,12 @@ public:
PRINT(LD_LOOP,
"dispatch init (static chunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ (int)tnum,
omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
- omptarget_nvptx_threadPrivateContext->Stride(tid));
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+ tid));
} else if (schedule == kmp_sched_static_balanced_chunk) {
ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value");
// save sched state
@@ -348,9 +312,7 @@ public:
chunk = (span + chunk - 1) & ~(chunk - 1);
T oldUb = ub;
- ForStaticChunk(
- lastiter, lb, ub, stride, chunk,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+ ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb.");
if (ub > oldUb)
ub = oldUb;
@@ -361,10 +323,12 @@ public:
PRINT(LD_LOOP,
"dispatch init (static chunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ (int)tnum,
omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
- omptarget_nvptx_threadPrivateContext->Stride(tid));
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+ tid));
} else if (schedule == kmp_sched_static_nochunk) {
ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value");
// save sched state
@@ -374,9 +338,7 @@ public:
// compute static chunk
ST stride;
int lastiter = 0;
- ForStaticNoChunk(
- lastiter, lb, ub, stride, chunk,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum);
+ ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum);
// save computed params
omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk;
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb;
@@ -384,10 +346,12 @@ public:
PRINT(LD_LOOP,
"dispatch init (static nochunk) : num threads = %d, ub = %" PRId64
", next lower bound = %llu, stride = %llu\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ (int)tnum,
omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
- omptarget_nvptx_threadPrivateContext->Stride(tid));
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(tid),
+ (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride(
+ tid));
} else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) {
__kmpc_barrier(loc, threadId);
@@ -405,8 +369,9 @@ public:
PRINT(LD_LOOP,
"dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64
", chunk %" PRIu64 "\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
- omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
+ (int)tnum,
+ (unsigned long long)
+ omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId),
omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId),
omptarget_nvptx_threadPrivateContext->Chunk(teamId));
}
@@ -430,41 +395,40 @@ public:
// c. lb and ub >= loopUpperBound: empty chunk --> FINISHED
// a.
if (lb <= loopUpperBound && ub < loopUpperBound) {
- PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n", P64(lb),
- P64(ub), P64(loopUpperBound));
+ PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n",
+ (long long)lb, (long long)ub, (long long)loopUpperBound);
return NOT_FINISHED;
}
// b.
if (lb <= loopUpperBound) {
PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; clip to loop ub\n",
- P64(lb), P64(ub), P64(loopUpperBound));
+ (long long)lb, (long long)ub, (long long)loopUpperBound);
ub = loopUpperBound;
return LAST_CHUNK;
}
// c. if we are here, we are in case 'c'
lb = loopUpperBound + 2;
ub = loopUpperBound + 1;
- PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", P64(lb),
- P64(ub), P64(loopUpperBound));
+ PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", (long long)lb,
+ (long long)ub, (long long)loopUpperBound);
return FINISHED;
}
// On Pascal, with inlining of the runtime into the user application,
// this code deadlocks. This is probably because different threads
// in a warp cannot make independent progress.
- NOINLINE static int dispatch_next(int32_t *plast, T *plower, T *pupper,
- ST *pstride) {
+ NOINLINE static int dispatch_next(int32_t gtid, int32_t *plast, T *plower,
+ T *pupper, ST *pstride) {
ASSERT0(LT_FUSSY, isRuntimeInitialized(),
"Expected non-SPMD mode + initialized runtime.");
// ID of a thread in its own warp
// automatically selects thread or warp ID based on selected implementation
int tid = GetLogicalThreadIdInBlock();
- ASSERT0(
- LT_FUSSY,
- GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) <
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
- "current thread is not needed here; error");
+ ASSERT0(LT_FUSSY,
+ gtid < GetNumberOfOmpThreads(tid, isSPMDMode(),
+ isRuntimeUninitialized()),
+ "current thread is not needed here; error");
// retrieve schedule
kmp_sched_t schedule =
omptarget_nvptx_threadPrivateContext->ScheduleType(tid);
@@ -477,7 +441,7 @@ public:
// finished?
if (myLb > ub) {
PRINT(LD_LOOP, "static loop finished with myLb %lld, ub %lld\n",
- P64(myLb), P64(ub));
+ (long long)myLb, (long long)ub);
return DISPATCH_FINISHED;
}
// not finished, save current bounds
@@ -493,7 +457,7 @@ public:
ST stride = omptarget_nvptx_threadPrivateContext->Stride(tid);
omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = myLb + stride;
PRINT(LD_LOOP, "static loop continues with myLb %lld, myUb %lld\n",
- P64(*plower), P64(*pupper));
+ (long long)*plower, (long long)*pupper);
return DISPATCH_NOTFINISHED;
}
ASSERT0(LT_FUSSY,
@@ -515,12 +479,13 @@ public:
*pupper = myUb;
*pstride = 1;
- PRINT(LD_LOOP,
- "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
- "last %d\n",
- GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
- GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride),
- *plast);
+ PRINT(
+ LD_LOOP,
+ "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, "
+ "last %d\n",
+ (int)GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()),
+ (int)GetNumberOfWorkersInTeam(), (long long)*plower, (long long)*pupper,
+ (long long)*pstride, (int)*plast);
return DISPATCH_NOTFINISHED;
}
@@ -538,7 +503,7 @@ public:
////////////////////////////////////////////////////////////////////////////////
// init
-EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t tid,
int32_t schedule, int32_t lb, int32_t ub,
int32_t st, int32_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_4\n");
@@ -546,7 +511,7 @@ EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid,
loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
}
-EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t tid,
int32_t schedule, uint32_t lb, uint32_t ub,
int32_t st, int32_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_4u\n");
@@ -554,7 +519,7 @@ EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid,
loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
}
-EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t tid,
int32_t schedule, int64_t lb, int64_t ub,
int64_t st, int64_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_8\n");
@@ -562,7 +527,7 @@ EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid,
loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk);
}
-EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t tid,
int32_t schedule, uint64_t lb, uint64_t ub,
int64_t st, int64_t chunk) {
PRINT0(LD_IO, "call kmpc_dispatch_init_8u\n");
@@ -571,53 +536,53 @@ EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid,
}
// next
-EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t tid, int32_t *p_last,
+EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t tid, int32_t *p_last,
int32_t *p_lb, int32_t *p_ub, int32_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_4\n");
return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next(
- p_last, p_lb, p_ub, p_st);
+ tid, p_last, p_lb, p_ub, p_st);
}
-EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t tid,
+EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t tid,
int32_t *p_last, uint32_t *p_lb,
uint32_t *p_ub, int32_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_4u\n");
return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next(
- p_last, p_lb, p_ub, p_st);
+ tid, p_last, p_lb, p_ub, p_st);
}
-EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t tid, int32_t *p_last,
+EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t tid, int32_t *p_last,
int64_t *p_lb, int64_t *p_ub, int64_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_8\n");
return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next(
- p_last, p_lb, p_ub, p_st);
+ tid, p_last, p_lb, p_ub, p_st);
}
-EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t tid,
+EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t tid,
int32_t *p_last, uint64_t *p_lb,
uint64_t *p_ub, int64_t *p_st) {
PRINT0(LD_IO, "call kmpc_dispatch_next_8u\n");
return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next(
- p_last, p_lb, p_ub, p_st);
+ tid, p_last, p_lb, p_ub, p_st);
}
// fini
-EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini();
}
-EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini();
}
-EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini();
}
-EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_dispatch_fini_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini();
}
@@ -626,151 +591,143 @@ EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) {
// KMP interface implementation (static loops)
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
-EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
-EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
-EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid,
+EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
uint64_t *plower, uint64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(),
- isRuntimeUninitialized());
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ checkSPMDMode(loc), checkRuntimeUninitialized(loc));
}
EXTERN
-void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int32_t *plower, int32_t *pupper,
int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_spmd\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/true,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
}
EXTERN
-void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype,
int32_t *plastiter, uint32_t *plower,
uint32_t *pupper, int32_t *pstride,
int32_t incr, int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_spmd\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/true,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
}
EXTERN
-void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype, int32_t *plastiter,
int64_t *plower, int64_t *pupper,
int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_spmd\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/true,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
}
EXTERN
-void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid,
+void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid,
int32_t schedtype,
int32_t *plastiter, uint64_t *plower,
uint64_t *pupper, int64_t *pstride,
int64_t incr, int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_spmd\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/true,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true);
}
EXTERN
void __kmpc_for_static_init_4_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_generic\n");
omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/false,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
}
EXTERN
void __kmpc_for_static_init_4u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr,
int32_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_generic\n");
omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/false,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
}
EXTERN
void __kmpc_for_static_init_8_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_generic\n");
omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/false,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
}
EXTERN
void __kmpc_for_static_init_8u_simple_generic(
- kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
+ kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter,
uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr,
int64_t chunk) {
PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_generic\n");
omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init(
- schedtype, plastiter, plower, pupper, pstride, chunk,
- /*IsSPMDExecutionMode=*/false,
- /*IsRuntimeUninitialized=*/true);
+ global_tid, schedtype, plastiter, plower, pupper, pstride, chunk,
+ /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true);
}
-EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_for_static_fini\n");
}
@@ -792,21 +749,20 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) {
}
}; // namespace
-EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc, int32_t gtid,
+EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid,
int32_t varNum, void *array) {
PRINT0(LD_IO, "call to __kmpc_reduce_conditional_lastprivate(...)\n");
- ASSERT0(LT_FUSSY, isRuntimeInitialized(),
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
"Expected non-SPMD mode + initialized runtime.");
omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor();
- int tid = GetOmpThreadId(GetLogicalThreadIdInBlock(), isSPMDMode(),
- isRuntimeUninitialized());
- uint32_t NumThreads = GetNumberOfOmpThreads(
- GetLogicalThreadIdInBlock(), isSPMDMode(), isRuntimeUninitialized());
+ int tid = GetLogicalThreadIdInBlock();
+ uint32_t NumThreads = GetNumberOfOmpThreads(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc));
uint64_t *Buffer = teamDescr.getLastprivateIterBuffer();
for (unsigned i = 0; i < varNum; i++) {
// Reset buffer.
- if (tid == 0)
+ if (gtid == 0)
*Buffer = 0; // Reset to minimum loop iteration value.
// Barrier.
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
index 5d95eb1..b0b1290 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu
@@ -150,7 +150,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",
- newTaskDescr->ThreadId(), newTaskDescr->ThreadsInTeam());
+ (int)newTaskDescr->ThreadId(), (int)newTaskDescr->ThreadsInTeam());
if (RequiresDataSharing && threadId % WARPSIZE == 0) {
// Warp master innitializes data sharing environment.
@@ -162,12 +162,16 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime,
}
}
-EXTERN void __kmpc_spmd_kernel_deinit() {
+EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit() {
+ __kmpc_spmd_kernel_deinit_v2(isRuntimeInitialized());
+}
+
+EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) {
// We're not going to pop the task descr stack of each thread since
// there are no more parallel regions in SPMD mode.
__syncthreads();
int threadId = GetThreadIdInBlock();
- if (isRuntimeUninitialized()) {
+ if (!RequiresOMPRuntime) {
if (threadId == 0) {
// Enqueue omp state object for use by another team.
int slot = usedSlotIdx;
diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
index 2a6de28..b63feae 100644
--- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
+++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h
@@ -251,7 +251,6 @@ public:
INLINE omptarget_nvptx_WorkDescr &WorkDescr() {
return workDescrForActiveParallel;
}
- INLINE omp_lock_t *CriticalLock() { return &criticalLock; }
INLINE uint64_t *getLastprivateIterBuffer() { return &lastprivateIterBuffer; }
// init
@@ -303,7 +302,6 @@ private:
levelZeroTaskDescr; // icv for team master initial thread
omptarget_nvptx_WorkDescr
workDescrForActiveParallel; // one, ONLY for the active par
- omp_lock_t criticalLock;
uint64_t lastprivateIterBuffer;
__align__(16)
diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
index 13e64e4..fbcbeab 100644
--- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu
@@ -76,7 +76,7 @@ EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask,
else
*NumLanes = ConvergentSize;
ASSERT(LT_FUSSY, *NumLanes > 0, "bad thread request of %d threads",
- *NumLanes);
+ (int)*NumLanes);
// Set to true for lanes participating in the simd region.
bool isActive = false;
@@ -152,7 +152,7 @@ EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask,
else
NumThreads = ConvergentSize;
ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
- NumThreads);
+ (int)NumThreads);
// Set to true for workers participating in the parallel region.
bool isActive = false;
@@ -260,7 +260,7 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn,
}
ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads",
- NumThreads);
+ (int)NumThreads);
ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(),
"only team master can create parallel");
@@ -307,7 +307,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn,
PRINT(LD_PAR,
"thread will execute parallel region with id %d in a team of "
"%d threads\n",
- newTaskDescr->ThreadId(), newTaskDescr->NThreads());
+ (int)newTaskDescr->ThreadId(), (int)newTaskDescr->NThreads());
isActive = true;
}
@@ -332,11 +332,11 @@ EXTERN void __kmpc_kernel_end_parallel() {
// support for parallel that goes sequential
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_serialized_parallel\n");
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+ if (checkRuntimeUninitialized(loc)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
omptarget_nvptx_simpleThreadPrivateContext->IncParLevel();
return;
@@ -370,12 +370,12 @@ EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) {
newTaskDescr);
}
-EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
+EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc,
uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_end_serialized_parallel\n");
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+ if (checkRuntimeUninitialized(loc)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
omptarget_nvptx_simpleThreadPrivateContext->DecParLevel();
return;
@@ -393,11 +393,11 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc,
currTaskDescr->RestoreLoopData();
}
-EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_parallel_level\n");
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+ if (checkRuntimeUninitialized(loc)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc),
"Expected SPMD mode with uninitialized runtime.");
return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel();
}
@@ -417,27 +417,29 @@ EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) {
// cached by the compiler and used when calling the runtime. On nvptx
// it's cheap to recalculate this value so we never use the result
// of this call.
-EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc) {
- return GetLogicalThreadIdInBlock();
+EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) {
+ int tid = GetLogicalThreadIdInBlock();
+ return GetOmpThreadId(tid, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc));
}
////////////////////////////////////////////////////////////////////////////////
// push params
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid,
int32_t num_threads) {
PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads);
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) =
num_threads;
}
-EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t tid,
int32_t simd_limit) {
- PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", simd_limit);
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", (int)simd_limit);
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized.");
tid = GetLogicalThreadIdInBlock();
omptarget_nvptx_threadPrivateContext->SimdLimitForNextSimd(tid) = simd_limit;
}
@@ -445,14 +447,14 @@ EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid,
// Do nothing. The host guarantees we started the requested number of
// teams and we only need inspection of gridDim.
-EXTERN void __kmpc_push_num_teams(kmp_Indent *loc, int32_t tid,
+EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid,
int32_t num_teams, int32_t thread_limit) {
- PRINT(LD_IO, "call kmpc_push_num_teams %d\n", num_teams);
+ PRINT(LD_IO, "call kmpc_push_num_teams %d\n", (int)num_teams);
ASSERT0(LT_FUSSY, FALSE,
"should never have anything with new teams on device");
}
-EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t tid,
+EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t tid,
int proc_bind) {
- PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", proc_bind);
+ PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", (int)proc_bind);
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/libomptarget/deviceRTLs/nvptx/src/reduction.cu
index 21a419c..c0d22df 100644
--- a/libomptarget/deviceRTLs/nvptx/src/reduction.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/reduction.cu
@@ -31,7 +31,7 @@ int32_t __gpu_block_reduce() {
}
EXTERN
-int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
+int32_t __kmpc_reduce_gpu(kmp_Ident *loc, int32_t global_tid, int32_t num_vars,
size_t reduce_size, void *reduce_data,
void *reduce_array_size, kmp_ReductFctPtr *reductFct,
kmp_CriticalName *lck) {
@@ -40,7 +40,8 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
int numthread;
if (currTaskDescr->IsParallelConstruct()) {
numthread =
- GetNumberOfOmpThreads(threadId, isSPMDMode(), isRuntimeUninitialized());
+ GetNumberOfOmpThreads(threadId, checkSPMDMode(loc),
+ checkRuntimeUninitialized(loc));
} else {
numthread = GetNumberOfOmpTeams();
}
@@ -55,12 +56,12 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars,
}
EXTERN
-int32_t __kmpc_reduce_combined(kmp_Indent *loc) {
+int32_t __kmpc_reduce_combined(kmp_Ident *loc) {
return threadIdx.x == 0 ? 2 : 0;
}
EXTERN
-int32_t __kmpc_reduce_simd(kmp_Indent *loc) {
+int32_t __kmpc_reduce_simd(kmp_Ident *loc) {
return (threadIdx.x % 32 == 0) ? 1 : 0;
}
@@ -75,12 +76,12 @@ EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) {
}
EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) {
- int lo, hi;
- asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
- hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
- lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
- asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
- return val;
+ int lo, hi;
+ asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val));
+ hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size);
+ lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size);
+ asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi));
+ return val;
}
static INLINE void gpu_regular_warp_reduce(void *reduce_data,
@@ -231,8 +232,7 @@ int32_t nvptx_parallel_reduce_nowait(int32_t global_tid, int32_t num_vars,
// Get the OMP thread Id. This is different from BlockThreadId in the case of
// an L2 parallel region.
- return GetOmpThreadId(BlockThreadId, isSPMDExecutionMode,
- isRuntimeUninitialized) == 0;
+ return global_tid == 0;
#endif // __CUDA_ARCH__ >= 700
}
@@ -429,3 +429,22 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_simple_generic(
/*isSPMDExecutionMode=*/false,
/*isRuntimeUninitialized=*/true);
}
+
+EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc,
+ int32_t global_tid,
+ kmp_CriticalName *crit) {
+ if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0)
+ return 0;
+ // The master thread of the team actually does the reduction.
+ while (atomicCAS((uint32_t *)crit, 0, 1))
+ ;
+ return 1;
+}
+
+EXTERN void
+__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid,
+ kmp_CriticalName *crit) {
+ __threadfence_system();
+ (void)atomicExch((uint32_t *)crit, 0);
+}
+
diff --git a/libomptarget/deviceRTLs/nvptx/src/supporti.h b/libomptarget/deviceRTLs/nvptx/src/supporti.h
index c93657e..e2ea2d1 100644
--- a/libomptarget/deviceRTLs/nvptx/src/supporti.h
+++ b/libomptarget/deviceRTLs/nvptx/src/supporti.h
@@ -33,6 +33,59 @@ INLINE bool isRuntimeInitialized() {
}
////////////////////////////////////////////////////////////////////////////////
+// Execution Modes based on location parameter fields
+////////////////////////////////////////////////////////////////////////////////
+
+INLINE bool checkSPMDMode(kmp_Ident *loc) {
+ if (!loc)
+ return isSPMDMode();
+
+ // If SPMD is true then we are not in the UNDEFINED state so
+ // we can return immediately.
+ if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+ return true;
+
+ // If not in SPMD mode and runtime required is a valid
+ // combination of flags so we can return immediately.
+ if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+ return false;
+
+ // We are in underfined state.
+ return isSPMDMode();
+}
+
+INLINE bool checkGenericMode(kmp_Ident *loc) {
+ return !checkSPMDMode(loc);
+}
+
+INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) {
+ if (!loc)
+ return isRuntimeUninitialized();
+
+ // If runtime is required then we know we can't be
+ // in the undefined mode. We can return immediately.
+ if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE))
+ return false;
+
+ // If runtime is required then we need to check is in
+ // SPMD mode or not. If not in SPMD mode then we end
+ // up in the UNDEFINED state that marks the orphaned
+ // functions.
+ if (loc->reserved_2 & KMP_IDENT_SPMD_MODE)
+ return true;
+
+ // Check if we are in an UNDEFINED state. Undefined is denoted by
+ // non-SPMD + noRuntimeRequired which is a combination that
+ // cannot actually happen. Undefined states is used to mark orphaned
+ // functions.
+ return isRuntimeUninitialized();
+}
+
+INLINE bool checkRuntimeInitialized(kmp_Ident *loc) {
+ return !checkRuntimeUninitialized(loc);
+}
+
+////////////////////////////////////////////////////////////////////////////////
// support: get info from machine
////////////////////////////////////////////////////////////////////////////////
@@ -78,8 +131,6 @@ INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); }
// id is GetMasterThreadID()) calls this routine, we return 0 because
// it is a shadow for the first worker.
INLINE int GetLogicalThreadIdInBlock() {
- // return GetThreadIdInBlock() % GetMasterThreadID();
-
// Implemented using control flow (predication) instead of with a modulo
// operation.
int tid = GetThreadIdInBlock();
@@ -180,19 +231,20 @@ INLINE unsigned long PadBytes(unsigned long size,
{
// compute the necessary padding to satisfy alignment constraint
ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0,
- "alignment %ld is not a power of 2\n", alignment);
+ "alignment %lu is not a power of 2\n", alignment);
return (~(unsigned long)size + 1) & (alignment - 1);
}
INLINE void *SafeMalloc(size_t size, const char *msg) // check if success
{
void *ptr = malloc(size);
- PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg, P64(ptr));
+ PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg,
+ (unsigned long long)ptr);
return ptr;
}
INLINE void *SafeFree(void *ptr, const char *msg) {
- PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", P64(ptr), msg);
+ PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg);
free(ptr);
return NULL;
}
diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu
index 0a99405..7cdb7ff 100644
--- a/libomptarget/deviceRTLs/nvptx/src/sync.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu
@@ -17,11 +17,11 @@
// KMP Ordered calls
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_ordered\n");
}
-EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) {
+EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) {
PRINT0(LD_IO, "call kmpc_end_ordered\n");
}
@@ -33,16 +33,16 @@ EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) {
// FIXME: what if not all threads (warps) participate to the barrier?
// We may need to implement it differently
-EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_IO, "call kmpc_cancel_barrier\n");
__kmpc_barrier(loc_ref, tid);
PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n");
return 0;
}
-EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
- if (isRuntimeUninitialized()) {
- ASSERT0(LT_FUSSY, isSPMDMode(),
+EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) {
+ if (checkRuntimeUninitialized(loc_ref)) {
+ ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref),
"Expected SPMD mode with uninitialized runtime.");
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
@@ -50,9 +50,9 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
omptarget_nvptx_TaskDescr *currTaskDescr =
omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid);
int numberOfActiveOMPThreads = GetNumberOfOmpThreads(
- tid, isSPMDMode(), /*isRuntimeUninitialized=*/false);
+ tid, checkSPMDMode(loc_ref), /*isRuntimeUninitialized=*/false);
if (numberOfActiveOMPThreads > 1) {
- if (isSPMDMode()) {
+ if (checkSPMDMode(loc_ref)) {
__kmpc_barrier_simple_spmd(loc_ref, tid);
} else {
// The #threads parameter must be rounded up to the WARPSIZE.
@@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
PRINT(LD_SYNC,
"call kmpc_barrier with %d omp threads, sync parameter %d\n",
- numberOfActiveOMPThreads, threads);
+ (int)numberOfActiveOMPThreads, (int)threads);
// Barrier #1 is for synchronization among active threads.
named_sync(L1_BARRIER, threads);
}
@@ -72,7 +72,7 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) {
// Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0
// parallel region and that all worker threads participate.
-EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) {
PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n");
__syncthreads();
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n");
@@ -80,7 +80,7 @@ EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) {
// Emit a simple barrier call in Generic mode. Assumes the caller is in an L0
// parallel region and that all worker threads participate.
-EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) {
+EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) {
int numberOfActiveOMPThreads = GetNumberOfThreadsInBlock() - WARPSIZE;
// The #threads parameter must be rounded up to the WARPSIZE.
int threads =
@@ -89,7 +89,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) {
PRINT(LD_SYNC,
"call kmpc_barrier_simple_generic with %d omp threads, sync parameter "
"%d\n",
- numberOfActiveOMPThreads, threads);
+ (int)numberOfActiveOMPThreads, (int)threads);
// Barrier #1 is for synchronization among active threads.
named_sync(L1_BARRIER, threads);
PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n");
@@ -99,37 +99,30 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) {
// KMP MASTER
////////////////////////////////////////////////////////////////////////////////
-INLINE int32_t IsMaster() {
- // only the team master updates the state
- int tid = GetLogicalThreadIdInBlock();
- int ompThreadId = GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized());
- return IsTeamMaster(ompThreadId);
-}
-
-EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid) {
+EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_master\n");
- return IsMaster();
+ return IsTeamMaster(global_tid);
}
-EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_end_master\n");
- ASSERT0(LT_FUSSY, IsMaster(), "expected only master here");
+ ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
}
////////////////////////////////////////////////////////////////////////////////
// KMP SINGLE
////////////////////////////////////////////////////////////////////////////////
-EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid) {
+EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_single\n");
// decide to implement single with master; master get the single
- return IsMaster();
+ return IsTeamMaster(global_tid);
}
-EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) {
+EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) {
PRINT0(LD_IO, "call kmpc_end_single\n");
// decide to implement single with master: master get the single
- ASSERT0(LT_FUSSY, IsMaster(), "expected only master here");
+ ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here");
// sync barrier is explicitely called... so that is not a problem
}
@@ -137,9 +130,9 @@ EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) {
// Flush
////////////////////////////////////////////////////////////////////////////////
-EXTERN void __kmpc_flush(kmp_Indent *loc) {
+EXTERN void __kmpc_flush(kmp_Ident *loc) {
PRINT0(LD_IO, "call kmpc_flush\n");
- __threadfence_block();
+ __threadfence_system();
}
////////////////////////////////////////////////////////////////////////////////
diff --git a/libomptarget/deviceRTLs/nvptx/src/task.cu b/libomptarget/deviceRTLs/nvptx/src/task.cu
index f0431ab..2f47d4b 100644
--- a/libomptarget/deviceRTLs/nvptx/src/task.cu
+++ b/libomptarget/deviceRTLs/nvptx/src/task.cu
@@ -31,7 +31,7 @@
#include "omptarget-nvptx.h"
EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
- kmp_Indent *loc, // unused
+ kmp_Ident *loc, // unused
uint32_t global_tid, // unused
int32_t flag, // unused (because in our impl, all are immediately exec
size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable,
@@ -39,14 +39,15 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
PRINT(LD_IO,
"call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, "
"fct 0x%llx)\n",
- P64(sizeOfTaskInclPrivate), P64(sizeOfSharedTable), P64(taskSub));
+ (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable,
+ (unsigned long long)taskSub);
// want task+priv to be a multiple of 8 bytes
size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *));
sizeOfTaskInclPrivate += padForTaskInclPriv;
size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable;
ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0,
"need task descr of size %d to be a multiple of %d\n",
- sizeof(omptarget_nvptx_TaskDescr), sizeof(void *));
+ (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *));
size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize;
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc(
@@ -63,25 +64,27 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(
newKmpTaskDescr->sub = taskSub;
newKmpTaskDescr->destructors = NULL;
PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n",
- P64(newKmpTaskDescr), P64(newExplicitTaskDescr));
+ (unsigned long long)newKmpTaskDescr,
+ (unsigned long long)newExplicitTaskDescr);
return newKmpTaskDescr;
}
-EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0,
0);
}
-EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr,
int32_t depNum, void *depList,
int32_t noAliasDepNum,
void *noAliasDepList) {
PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n",
P64(newKmpTaskDescr));
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
+ "Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -101,10 +104,11 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
// 3. call sub
PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n",
- P64(newKmpTaskDescr->sub), P64(newKmpTaskDescr));
+ (unsigned long long)newKmpTaskDescr->sub,
+ (unsigned long long)newKmpTaskDescr);
newKmpTaskDescr->sub(0, newKmpTaskDescr);
PRINT(LD_TASK, "return from call task sub 0x%llx()\n",
- P64(newKmpTaskDescr->sub));
+ (unsigned long long)newKmpTaskDescr->sub);
// 4. pop context
omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid,
@@ -114,11 +118,12 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid,
return 0;
}
-EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n",
- P64(newKmpTaskDescr));
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ (unsigned long long)newKmpTaskDescr);
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
+ "Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -139,11 +144,12 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid,
// 4 & 5 ... done in complete
}
-EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr) {
PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n",
- P64(newKmpTaskDescr));
- ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized.");
+ (unsigned long long)newKmpTaskDescr);
+ ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc),
+ "Runtime must be initialized.");
// 1. get explict task descr from kmp task descr
omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr =
(omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES(
@@ -164,37 +170,37 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid,
SafeFree(newExplicitTaskDescr, "explicit task descriptor");
}
-EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid,
int32_t depNum, void *depList,
int32_t noAliasDepNum, void *noAliasDepList) {
PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n");
// nothing to do as all our tasks are executed as final
}
-EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n");
// nothing to do as all our tasks are executed as final
}
-EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n");
// nothing to do as all our tasks are executed as final
}
-EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid,
+EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid,
int end_part) {
PRINT0(LD_IO, "call to __kmpc_taskyield()\n");
// do nothing: tasks are executed immediately, no yielding allowed
return 0;
}
-EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid) {
+EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) {
PRINT0(LD_IO, "call to __kmpc_taskwait()\n");
// nothing to do as all our tasks are executed as final
return 0;
}
-EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid,
+EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid,
kmp_TaskDescr *newKmpTaskDescr, int if_val,
uint64_t *lb, uint64_t *ub, int64_t st, int nogroup,
int32_t sched, uint64_t grainsize, void *task_dup) {
diff --git a/libomptarget/src/omptarget.cpp b/libomptarget/src/omptarget.cpp
index a1ffd04..a23d82b 100644
--- a/libomptarget/src/omptarget.cpp
+++ b/libomptarget/src/omptarget.cpp
@@ -638,19 +638,20 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num,
assert(tgtIdx != -1 && "Base address must be translated already.");
// The parent lambda must be processed already and it must be the last
// in tgt_args and tgt_offsets arrays.
- void *HstPtrBegin = args[i];
- void *HstPtrBase = args_base[i];
+ void *HstPtrVal = args[i];
+ void *HstPtrBegin = args_base[i];
+ void *HstPtrBase = args[idx];
bool IsLast; // unused.
void *TgtPtrBase =
(void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]);
DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase));
uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase;
void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta);
- void *Pointer_TgtPtrBegin = Device.getTgtPtrBegin(
- *(void **)HstPtrBegin, arg_sizes[i], IsLast, false);
+ void *Pointer_TgtPtrBegin =
+ Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false);
if (!Pointer_TgtPtrBegin) {
DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n",
- DPxPTR(*(void **)HstPtrBegin));
+ DPxPTR(HstPtrVal));
continue;
}
DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n",
diff --git a/runtime/cmake/LibompHandleFlags.cmake b/runtime/cmake/LibompHandleFlags.cmake
index efe2099..0b829a5 100644
--- a/runtime/cmake/LibompHandleFlags.cmake
+++ b/runtime/cmake/LibompHandleFlags.cmake
@@ -50,6 +50,7 @@ function(libomp_get_c_and_cxxflags_common flags)
libomp_append(flags_local /GS LIBOMP_HAVE_GS_FLAG)
libomp_append(flags_local /EHsc LIBOMP_HAVE_EHSC_FLAG)
libomp_append(flags_local /Oy- LIBOMP_HAVE_OY__FLAG)
+ libomp_append(flags_local -mrtm LIBOMP_HAVE_MRTM_FLAG)
# Intel(R) C Compiler flags
libomp_append(flags_local /Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG)
libomp_append(flags_local -Qoption,cpp,--extended_float_types LIBOMP_HAVE_EXTENDED_FLOAT_TYPES_FLAG)
@@ -158,6 +159,11 @@ function(libomp_get_libflags libflags)
if(${IA32})
libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY)
endif()
+ IF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly")
+ libomp_append(libflags_local "-Wl,--no-as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG)
+ libomp_append(libflags_local "-lm")
+ libomp_append(libflags_local "-Wl,--as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG)
+ ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly")
IF(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD")
libomp_append(libflags_local -lm)
ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD")
diff --git a/runtime/cmake/LibompMicroTests.cmake b/runtime/cmake/LibompMicroTests.cmake
index 0918fdd..bdecf7f 100644
--- a/runtime/cmake/LibompMicroTests.cmake
+++ b/runtime/cmake/LibompMicroTests.cmake
@@ -176,6 +176,9 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD")
elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD")
set(libomp_expected_library_deps libc.so.12 libpthread.so.1 libm.so.0)
libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
+elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly")
+ set(libomp_expected_library_deps libc.so.8 libpthread.so.0 libm.so.4)
+ libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC)
elseif(APPLE)
set(libomp_expected_library_deps /usr/lib/libSystem.B.dylib)
elseif(WIN32)
diff --git a/runtime/cmake/config-ix.cmake b/runtime/cmake/config-ix.cmake
index 5415e57..019c83c 100644
--- a/runtime/cmake/config-ix.cmake
+++ b/runtime/cmake/config-ix.cmake
@@ -73,13 +73,16 @@ check_c_compiler_flag(-ftls-model=initial-exec LIBOMP_HAVE_FTLS_MODEL_FLAG)
libomp_check_architecture_flag(-mmic LIBOMP_HAVE_MMIC_FLAG)
libomp_check_architecture_flag(-m32 LIBOMP_HAVE_M32_FLAG)
if(WIN32)
- # Check Windows MSVC style flags.
- check_c_compiler_flag(/TP LIBOMP_HAVE_TP_FLAG)
- check_cxx_compiler_flag(/EHsc LIBOMP_HAVE_EHSC_FLAG)
- check_cxx_compiler_flag(/GS LIBOMP_HAVE_GS_FLAG)
- check_cxx_compiler_flag(/Oy- LIBOMP_HAVE_Oy__FLAG)
- check_cxx_compiler_flag(/arch:SSE2 LIBOMP_HAVE_ARCH_SSE2_FLAG)
- check_cxx_compiler_flag(/Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG)
+ if(MSVC)
+ # Check Windows MSVC style flags.
+ check_c_compiler_flag(/TP LIBOMP_HAVE_TP_FLAG)
+ check_cxx_compiler_flag(/EHsc LIBOMP_HAVE_EHSC_FLAG)
+ check_cxx_compiler_flag(/GS LIBOMP_HAVE_GS_FLAG)
+ check_cxx_compiler_flag(/Oy- LIBOMP_HAVE_Oy__FLAG)
+ check_cxx_compiler_flag(/arch:SSE2 LIBOMP_HAVE_ARCH_SSE2_FLAG)
+ check_cxx_compiler_flag(/Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG)
+ endif()
+ check_c_compiler_flag(-mrtm LIBOMP_HAVE_MRTM_FLAG)
# It is difficult to create a dummy masm assembly file
# and then check the MASM assembler to see if these flags exist and work,
# so we assume they do for Windows.
diff --git a/runtime/src/dllexports b/runtime/src/dllexports
index 1108930..963ac61 100644
--- a/runtime/src/dllexports
+++ b/runtime/src/dllexports
@@ -405,6 +405,7 @@ kmpc_set_disp_num_buffers 267
__kmpc_task_reduction_get_th_data 269
# USED FOR 4.5 __kmpc_critical_with_hint 270
__kmpc_get_target_offload 271
+ __kmpc_omp_reg_task_with_affinity 272
%endif
%endif
@@ -546,6 +547,14 @@ kmp_set_disp_num_buffers 890
omp_get_default_allocator 893
omp_alloc 894
omp_free 895
+ omp_set_affinity_format 748
+ omp_get_affinity_format 749
+ omp_display_affinity 750
+ omp_capture_affinity 751
+ ompc_set_affinity_format 752
+ ompc_get_affinity_format 753
+ ompc_display_affinity 754
+ ompc_capture_affinity 755
OMP_NULL_ALLOCATOR DATA
omp_default_mem_alloc DATA
diff --git a/runtime/src/i18n/en_US.txt b/runtime/src/i18n/en_US.txt
index 067cb94..3e5283e 100644
--- a/runtime/src/i18n/en_US.txt
+++ b/runtime/src/i18n/en_US.txt
@@ -425,6 +425,7 @@ AffHWSubsetManyNodes "KMP_HW_SUBSET ignored: too many NUMA Nodes request
AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested."
AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested."
HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s."
+AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}"
# --------------------------------------------------------------------------------------------------
diff --git a/runtime/src/include/50/omp.h.var b/runtime/src/include/50/omp.h.var
index 7a626bd..81b6c85 100644
--- a/runtime/src/include/50/omp.h.var
+++ b/runtime/src/include/50/omp.h.var
@@ -25,6 +25,11 @@
extern "C" {
# endif
+# define omp_set_affinity_format ompc_set_affinity_format
+# define omp_get_affinity_format ompc_get_affinity_format
+# define omp_display_affinity ompc_display_affinity
+# define omp_capture_affinity ompc_capture_affinity
+
# if defined(_WIN32)
# define __KAI_KMPC_CONVENTION __cdecl
# ifndef __KMP_IMP
@@ -235,6 +240,12 @@
extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator);
#endif
+ /* OpenMP 5.0 Affinity Format */
+ extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *);
+ extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t);
+ extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *);
+ extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *);
+
# undef __KAI_KMPC_CONVENTION
# undef __KMP_IMP
diff --git a/runtime/src/include/50/omp_lib.f.var b/runtime/src/include/50/omp_lib.f.var
index 8a02b62..d5a8057 100644
--- a/runtime/src/include/50/omp_lib.f.var
+++ b/runtime/src/include/50/omp_lib.f.var
@@ -375,6 +375,27 @@
integer (kind=omp_allocator_kind) omp_get_default_allocator
end function omp_get_default_allocator
+ subroutine omp_set_affinity_format(format)
+ character (len=*) format
+ end subroutine omp_set_affinity_format
+
+ function omp_get_affinity_format(buffer)
+ use omp_lib_kinds
+ character (len=*) buffer
+ integer (kind=kmp_size_t_kind) omp_get_affinity_format
+ end function omp_get_affinity_format
+
+ subroutine omp_display_affinity(format)
+ character (len=*) format
+ end subroutine omp_display_affinity
+
+ function omp_capture_affinity(buffer, format)
+ use omp_lib_kinds
+ character (len=*) format
+ character (len=*) buffer
+ integer (kind=kmp_size_t_kind) omp_capture_affinity
+ end function omp_capture_affinity
+
! ***
! *** kmp_* entry points
! ***
@@ -594,6 +615,10 @@
!dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
!dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority
!dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool
+!dec$ attributes alias:'OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format
+!dec$ attributes alias:'OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format
+!dec$ attributes alias:'OMP_DISPLAY_AFFINITY' :: omp_display_affinity
+!dec$ attributes alias:'OMP_CAPTURE_AFFINITY' :: omp_capture_affinity
!dec$ attributes alias:'omp_init_lock' :: omp_init_lock
!dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint
@@ -675,6 +700,10 @@
!dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device
!dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority
!dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool
+!dec$ attributes alias:'_OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format
+!dec$ attributes alias:'_OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format
+!dec$ attributes alias:'_OMP_DISPLAY_AFFINITY' :: omp_display_affinity
+!dec$ attributes alias:'_OMP_CAPTURE_AFFINITY' :: omp_capture_affinity
!dec$ attributes alias:'_omp_init_lock' :: omp_init_lock
!dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint
@@ -758,6 +787,10 @@
!dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation
!dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device
!dec$ attributes alias:'omp_get_max_task_priority_'::omp_get_max_task_priority
+!dec$ attributes alias:'omp_set_affinity_format_' :: omp_set_affinity_format
+!dec$ attributes alias:'omp_get_affinity_format_' :: omp_get_affinity_format
+!dec$ attributes alias:'omp_display_affinity_' :: omp_display_affinity
+!dec$ attributes alias:'omp_capture_affinity_' :: omp_capture_affinity
!dec$ attributes alias:'omp_init_lock_'::omp_init_lock
!dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint
@@ -852,6 +885,10 @@
!dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock
!dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock
!dec$ attributes alias:'_omp_control_tool_'::omp_control_tool
+!dec$ attributes alias:'_omp_set_affinity_format_' :: omp_set_affinity_format
+!dec$ attributes alias:'_omp_get_affinity_format_' :: omp_get_affinity_format
+!dec$ attributes alias:'_omp_display_affinity_' :: omp_display_affinity
+!dec$ attributes alias:'_omp_capture_affinity_' :: omp_capture_affinity
!dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize
!dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s
diff --git a/runtime/src/include/50/omp_lib.f90.var b/runtime/src/include/50/omp_lib.f90.var
index 624774d..afc6d67 100644
--- a/runtime/src/include/50/omp_lib.f90.var
+++ b/runtime/src/include/50/omp_lib.f90.var
@@ -391,6 +391,27 @@
integer (kind=omp_allocator_kind) omp_get_default_allocator
end function omp_get_default_allocator
+ subroutine omp_set_affinity_format(format)
+ character (len=*) :: format
+ end subroutine omp_set_affinity_format
+
+ function omp_get_affinity_format(buffer)
+ use omp_lib_kinds
+ character (len=*) :: buffer
+ integer (kind=kmp_size_t_kind) :: omp_get_affinity_format
+ end function omp_get_affinity_format
+
+ subroutine omp_display_affinity(format)
+ character (len=*) :: format
+ end subroutine omp_display_affinity
+
+ function omp_capture_affinity(buffer, format)
+ use omp_lib_kinds
+ character (len=*) :: format
+ character (len=*) :: buffer
+ integer (kind=kmp_size_t_kind) :: omp_capture_affinity
+ end function omp_capture_affinity
+
! ***
! *** kmp_* entry points
! ***
diff --git a/runtime/src/include/50/omp_lib.h.var b/runtime/src/include/50/omp_lib.h.var
index 0e4c2c6..11dbc0a 100644
--- a/runtime/src/include/50/omp_lib.h.var
+++ b/runtime/src/include/50/omp_lib.h.var
@@ -424,6 +424,27 @@
integer (kind=omp_allocator_kind) omp_get_default_allocator
end function omp_get_default_allocator
+ subroutine omp_set_affinity_format(format)
+ character (len=*) :: format
+ end subroutine omp_set_affinity_format
+
+ function omp_get_affinity_format(buffer)
+ import
+ character (len=*) :: buffer
+ integer (kind=kmp_size_t_kind) :: omp_get_affinity_format
+ end function omp_get_affinity_format
+
+ subroutine omp_display_affinity(format)
+ character (len=*) :: format
+ end subroutine omp_display_affinity
+
+ function omp_capture_affinity(buffer, format)
+ import
+ character (len=*) :: format
+ character (len=*) :: buffer
+ integer (kind=kmp_size_t_kind) :: omp_capture_affinity
+ end function omp_capture_affinity
+
! ***
! *** kmp_* entry points
! ***
@@ -637,6 +658,10 @@
!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock
!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock
!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_task_priority
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_affinity_format
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_affinity_format
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_display_affinity
+!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_capture_affinity
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s
!DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime
@@ -710,6 +735,10 @@
!$omp declare target(omp_unset_nest_lock )
!$omp declare target(omp_test_nest_lock )
!$omp declare target(omp_get_max_task_priority )
+!$omp declare target(omp_set_affinity_format )
+!$omp declare target(omp_get_affinity_format )
+!$omp declare target(omp_display_affinity )
+!$omp declare target(omp_capture_affinity )
!$omp declare target(kmp_set_stacksize )
!$omp declare target(kmp_set_stacksize_s )
!$omp declare target(kmp_set_blocktime )
diff --git a/runtime/src/include/50/ompt.h.var b/runtime/src/include/50/ompt.h.var
index 24fc90b..478c6cc 100644
--- a/runtime/src/include/50/ompt.h.var
+++ b/runtime/src/include/50/ompt.h.var
@@ -53,50 +53,50 @@
macro(ompt_get_target_info) \
macro(ompt_get_num_devices)
-#define FOREACH_OMP_STATE(macro) \
+#define FOREACH_OMPT_STATE(macro) \
\
/* first available state */ \
- macro (omp_state_undefined, 0x102) /* undefined thread state */ \
+ macro (ompt_state_undefined, 0x102) /* undefined thread state */ \
\
/* work states (0..15) */ \
- macro (omp_state_work_serial, 0x000) /* working outside parallel */ \
- macro (omp_state_work_parallel, 0x001) /* working within parallel */ \
- macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \
+ macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \
+ macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \
+ macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \
\
/* barrier wait states (16..31) */ \
- macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \
- macro (omp_state_wait_barrier_implicit_parallel, 0x011) \
+ macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \
+ macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \
/* implicit barrier at the end of parallel region */\
- macro (omp_state_wait_barrier_implicit_workshare, 0x012) \
+ macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \
/* implicit barrier at the end of worksharing */ \
- macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
- macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
+ macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \
+ macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \
\
/* task wait states (32..63) */ \
- macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
- macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \
+ macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \
+ macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \
\
/* mutex wait states (64..127) */ \
- macro (omp_state_wait_mutex, 0x040) \
- macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \
- macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \
- macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \
- macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \
+ macro (ompt_state_wait_mutex, 0x040) \
+ macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \
+ macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \
+ macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \
+ macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \
\
/* target wait states (128..255) */ \
- macro (omp_state_wait_target, 0x080) /* waiting for target region */ \
- macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \
- macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \
+ macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \
+ macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \
+ macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \
\
/* misc (256..511) */ \
- macro (omp_state_idle, 0x100) /* waiting for work */ \
- macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \
+ macro (ompt_state_idle, 0x100) /* waiting for work */ \
+ macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \
\
/* implementation-specific states (512..) */
#define FOREACH_KMP_MUTEX_IMPL(macro) \
- macro (ompt_mutex_impl_unknown, 0) /* unknown implementation */ \
+ macro (ompt_mutex_impl_none, 0) /* unknown implementation */ \
macro (kmp_mutex_impl_spin, 1) /* based on spin */ \
macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \
macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */
@@ -178,20 +178,11 @@ typedef union ompt_data_t {
static const ompt_data_t ompt_data_none = {0};
-typedef uint64_t omp_wait_id_t;
-static const omp_wait_id_t omp_wait_id_none = 0;
+typedef uint64_t ompt_wait_id_t;
+static const ompt_wait_id_t omp_wait_id_none = 0;
typedef void ompt_device_t;
-/*---------------------
- * omp_frame_t
- *---------------------*/
-
-typedef struct omp_frame_t {
- void *exit_frame; /* next frame is user code */
- void *enter_frame; /* previous frame is user code */
-} omp_frame_t;
-
/*---------------------
* dependences types
@@ -220,10 +211,18 @@ typedef struct ompt_task_dependence_t {
*---------------------*/
typedef enum {
-#define omp_state_macro(state, code) state = code,
- FOREACH_OMP_STATE(omp_state_macro)
-#undef omp_state_macro
-} omp_state_t;
+#define ompt_state_macro(state, code) state = code,
+ FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
+} ompt_state_t;
+
+typedef enum ompt_frame_flag_t {
+ ompt_frame_runtime = 0x00,
+ ompt_frame_application = 0x01,
+ ompt_frame_cfa = 0x10,
+ ompt_frame_framepointer = 0x20,
+ ompt_frame_stackaddress = 0x30
+} ompt_frame_flag_t;
/*---------------------
@@ -278,6 +277,12 @@ typedef enum ompt_thread_t {
ompt_thread_unknown = 4
} ompt_thread_t;
+typedef struct ompt_frame_t {
+ ompt_data_t exit_frame;
+ ompt_data_t enter_frame;
+ int exit_frame_flags;
+ int enter_frame_flags;
+} ompt_frame_t;
typedef enum ompt_parallel_flag_t {
ompt_parallel_invoker_program = 0x00000001, /* program invokes master task */
ompt_parallel_invoker_runtime = 0x00000002, /* runtime invokes master task */
@@ -295,7 +300,7 @@ typedef void (*ompt_callback_thread_end_t) (
);
typedef void (*ompt_wait_callback_t) (
- omp_wait_id_t wait_id /* wait data */
+ ompt_wait_id_t wait_id /* wait data */
);
/* parallel and workshares */
@@ -316,7 +321,7 @@ typedef void (*ompt_callback_implicit_task_t) (
typedef void (*ompt_callback_parallel_begin_t) (
ompt_data_t *encountering_task_data, /* data of encountering task */
- const omp_frame_t *encountering_task_frame, /* frame data of encountering task */
+ const ompt_frame_t *encountering_task_frame, /* frame data of encountering task */
ompt_data_t *parallel_data, /* data of parallel region */
unsigned int requested_team_size, /* requested number of threads in team */
int flag, /* flag for additional information */
@@ -358,7 +363,7 @@ typedef void (*ompt_callback_task_schedule_t) (
typedef void (*ompt_callback_task_create_t) (
ompt_data_t *encountering_task_data, /* data of parent task */
- const omp_frame_t *encountering_task_frame, /* frame data for parent task */
+ const ompt_frame_t *encountering_task_frame, /* frame data for parent task */
ompt_data_t *new_task_data, /* data of created task */
int flag, /* type of created task */
int has_dependences, /* created task has dependences */
@@ -479,19 +484,19 @@ typedef void (*ompt_callback_mutex_acquire_t) (
ompt_mutex_t kind, /* mutex kind */
unsigned int hint, /* mutex hint */
unsigned int impl, /* mutex implementation */
- omp_wait_id_t wait_id, /* id of object being awaited */
+ ompt_wait_id_t wait_id, /* id of object being awaited */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_mutex_t) (
ompt_mutex_t kind, /* mutex kind */
- omp_wait_id_t wait_id, /* id of object being awaited */
+ ompt_wait_id_t wait_id, /* id of object being awaited */
const void *codeptr_ra /* return address of runtime call */
);
typedef void (*ompt_callback_nest_lock_t) (
ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */
- omp_wait_id_t wait_id, /* id of object being awaited */
+ ompt_wait_id_t wait_id, /* id of object being awaited */
const void *codeptr_ra /* return address of runtime call */
);
@@ -592,8 +597,8 @@ extern "C" {
***************************************************************************/
/* state */
-OMPT_API_FUNCTION(omp_state_t, ompt_get_state, (
- omp_wait_id_t *wait_id
+OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, (
+ ompt_wait_id_t *wait_id
));
/* thread */
@@ -611,7 +616,7 @@ OMPT_API_FUNCTION(int, ompt_get_task_info, (
int ancestor_level,
int *type,
ompt_data_t **task_data,
- omp_frame_t **task_frame,
+ ompt_frame_t **task_frame,
ompt_data_t **parallel_data,
int *thread_num
));
diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h
index ef9e0a9..23bbeb3 100644
--- a/runtime/src/kmp.h
+++ b/runtime/src/kmp.h
@@ -129,6 +129,11 @@ class kmp_stats_list;
#include "ompt-internal.h"
#endif
+#if OMP_50_ENABLED
+// Affinity format function
+#include "kmp_str.h"
+#endif
+
// 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64.
// 3 - fast allocation using sync, non-sync free lists of any size, non-self
// free lists of limited size.
@@ -544,11 +549,15 @@ typedef int PACKED_REDUCTION_METHOD_T;
#if KMP_OS_WINDOWS
#define USE_CBLKDATA
+#if KMP_MSVC_COMPAT
#pragma warning(push)
#pragma warning(disable : 271 310)
+#endif
#include <windows.h>
+#if KMP_MSVC_COMPAT
#pragma warning(pop)
#endif
+#endif
#if KMP_OS_UNIX
#include <dlfcn.h>
@@ -560,7 +569,7 @@ typedef int PACKED_REDUCTION_METHOD_T;
// GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later).
#if KMP_OS_WINDOWS
-#if _MSC_VER < 1600
+#if _MSC_VER < 1600 && KMP_MSVC_COMPAT
typedef struct GROUP_AFFINITY {
KAFFINITY Mask;
WORD Group;
@@ -793,6 +802,12 @@ extern kmp_nested_proc_bind_t __kmp_nested_proc_bind;
#endif /* OMP_40_ENABLED */
+#if OMP_50_ENABLED
+extern int __kmp_display_affinity;
+extern char *__kmp_affinity_format;
+static const size_t KMP_AFFINITY_FORMAT_SIZE = 512;
+#endif // OMP_50_ENABLED
+
#if KMP_AFFINITY_SUPPORTED
#define KMP_PLACE_ALL (-1)
#define KMP_PLACE_UNDEFINED (-2)
@@ -1042,6 +1057,10 @@ extern kmp_uint64 __kmp_now_nsec();
/* TODO: tune for KMP_OS_DARWIN */
#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#elif KMP_OS_DRAGONFLY
+/* TODO: tune for KMP_OS_DRAGONFLY */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
#elif KMP_OS_FREEBSD
/* TODO: tune for KMP_OS_FREEBSD */
#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
@@ -1054,6 +1073,10 @@ extern kmp_uint64 __kmp_now_nsec();
/* TODO: tune for KMP_OS_HURD */
#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
+#elif KMP_OS_OPENBSD
+/* TODO: tune for KMP_OS_OPENBSD */
+#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */
+#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */
#endif
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
@@ -2222,6 +2245,18 @@ typedef struct kmp_dephash {
#endif
} kmp_dephash_t;
+#if OMP_50_ENABLED
+typedef struct kmp_task_affinity_info {
+ kmp_intptr_t base_addr;
+ size_t len;
+ struct {
+ bool flag1 : 1;
+ bool flag2 : 1;
+ kmp_int32 reserved : 30;
+ } flags;
+} kmp_task_affinity_info_t;
+#endif
+
#endif
#ifdef BUILD_TIED_TASK_STACK
@@ -2483,6 +2518,10 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info {
int th_last_place; /* last place in partition */
#endif
#endif
+#if OMP_50_ENABLED
+ int th_prev_level; /* previous level for affinity format */
+ int th_prev_num_threads; /* previous num_threads for affinity format */
+#endif
#if USE_ITT_BUILD
kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */
kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */
@@ -2676,6 +2715,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team {
int t_first_place; // first & last place in parent thread's partition.
int t_last_place; // Restore these values to master after par region.
#endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED
+#if OMP_50_ENABLED
+ int t_display_affinity;
+#endif
int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via
// omp_set_num_threads() call
#if OMP_50_ENABLED
@@ -3359,6 +3401,8 @@ extern void __kmp_runtime_destroy(void);
#if KMP_AFFINITY_SUPPORTED
extern char *__kmp_affinity_print_mask(char *buf, int buf_len,
kmp_affin_mask_t *mask);
+extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
+ kmp_affin_mask_t *mask);
extern void __kmp_affinity_initialize(void);
extern void __kmp_affinity_uninitialize(void);
extern void __kmp_affinity_set_init_mask(
@@ -3378,6 +3422,14 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size);
extern int kmp_set_thread_affinity_mask_initial(void);
#endif
#endif /* KMP_AFFINITY_SUPPORTED */
+#if OMP_50_ENABLED
+// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the
+// format string is for affinity, so platforms that do not support
+// affinity can still use the other fields, e.g., %n for num_threads
+extern size_t __kmp_aux_capture_affinity(int gtid, const char *format,
+ kmp_str_buf_t *buffer);
+extern void __kmp_aux_display_affinity(int gtid, const char *format);
+#endif
extern void __kmp_cleanup_hierarchy();
extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar);
@@ -3530,6 +3582,8 @@ KMP_EXPORT int __kmpc_invoke_task_func(int gtid);
#if OMP_40_ENABLED
extern int __kmp_invoke_teams_master(int gtid);
extern void __kmp_teams_master(int gtid);
+extern int __kmp_aux_get_team_num();
+extern int __kmp_aux_get_num_teams();
#endif
extern void __kmp_save_internal_controls(kmp_info_t *thread);
extern void __kmp_user_set_library(enum library_type arg);
@@ -3783,6 +3837,9 @@ KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task,
#if OMP_50_ENABLED
KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data);
KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d);
+KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity(
+ ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins,
+ kmp_task_affinity_info_t *affin_list);
#endif
#endif
diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp
index a9a21cf..775862e 100644
--- a/runtime/src/kmp_affinity.cpp
+++ b/runtime/src/kmp_affinity.cpp
@@ -83,55 +83,135 @@ void KMPAffinity::destroy_api() {
}
}
+#define KMP_ADVANCE_SCAN(scan) \
+ while (*scan != '\0') { \
+ scan++; \
+ }
+
// Print the affinity mask to the character array in a pretty format.
+// The format is a comma separated list of non-negative integers or integer
+// ranges: e.g., 1,2,3-5,7,9-15
+// The format can also be the string "{<empty>}" if no bits are set in mask
char *__kmp_affinity_print_mask(char *buf, int buf_len,
kmp_affin_mask_t *mask) {
+ int start = 0, finish = 0, previous = 0;
+ bool first_range;
+ KMP_ASSERT(buf);
KMP_ASSERT(buf_len >= 40);
+ KMP_ASSERT(mask);
char *scan = buf;
char *end = buf + buf_len - 1;
- // Find first element / check for empty set.
- int i;
- i = mask->begin();
- if (i == mask->end()) {
+ // Check for empty set.
+ if (mask->begin() == mask->end()) {
KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}");
- while (*scan != '\0')
- scan++;
+ KMP_ADVANCE_SCAN(scan);
KMP_ASSERT(scan <= end);
return buf;
}
- KMP_SNPRINTF(scan, end - scan + 1, "{%d", i);
- while (*scan != '\0')
- scan++;
- i++;
- for (; i != mask->end(); i = mask->next(i)) {
- if (!KMP_CPU_ISSET(i, mask)) {
- continue;
+ first_range = true;
+ start = mask->begin();
+ while (1) {
+ // Find next range
+ // [start, previous] is inclusive range of contiguous bits in mask
+ for (finish = mask->next(start), previous = start;
+ finish == previous + 1 && finish != mask->end();
+ finish = mask->next(finish)) {
+ previous = finish;
}
- // Check for buffer overflow. A string of the form ",<n>" will have at most
- // 10 characters, plus we want to leave room to print ",...}" if the set is
- // too large to print for a total of 15 characters. We already left room for
- // '\0' in setting end.
- if (end - scan < 15) {
- break;
+ // The first range does not need a comma printed before it, but the rest
+ // of the ranges do need a comma beforehand
+ if (!first_range) {
+ KMP_SNPRINTF(scan, end - scan + 1, "%s", ",");
+ KMP_ADVANCE_SCAN(scan);
+ } else {
+ first_range = false;
}
- KMP_SNPRINTF(scan, end - scan + 1, ",%-d", i);
- while (*scan != '\0')
- scan++;
- }
- if (i != mask->end()) {
- KMP_SNPRINTF(scan, end - scan + 1, ",...");
- while (*scan != '\0')
- scan++;
+ // Range with three or more contiguous bits in the affinity mask
+ if (previous - start > 1) {
+ KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast<int>(start),
+ static_cast<int>(previous));
+ } else {
+ // Range with one or two contiguous bits in the affinity mask
+ KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast<int>(start));
+ KMP_ADVANCE_SCAN(scan);
+ if (previous - start > 0) {
+ KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast<int>(previous));
+ }
+ }
+ KMP_ADVANCE_SCAN(scan);
+ // Start over with new start point
+ start = finish;
+ if (start == mask->end())
+ break;
+ // Check for overflow
+ if (end - scan < 2)
+ break;
}
- KMP_SNPRINTF(scan, end - scan + 1, "}");
- while (*scan != '\0')
- scan++;
+
+ // Check for overflow
KMP_ASSERT(scan <= end);
return buf;
}
+#undef KMP_ADVANCE_SCAN
+
+// Print the affinity mask to the string buffer object in a pretty format
+// The format is a comma separated list of non-negative integers or integer
+// ranges: e.g., 1,2,3-5,7,9-15
+// The format can also be the string "{<empty>}" if no bits are set in mask
+kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf,
+ kmp_affin_mask_t *mask) {
+ int start = 0, finish = 0, previous = 0;
+ bool first_range;
+ KMP_ASSERT(buf);
+ KMP_ASSERT(mask);
+
+ __kmp_str_buf_clear(buf);
+
+ // Check for empty set.
+ if (mask->begin() == mask->end()) {
+ __kmp_str_buf_print(buf, "%s", "{<empty>}");
+ return buf;
+ }
+
+ first_range = true;
+ start = mask->begin();
+ while (1) {
+ // Find next range
+ // [start, previous] is inclusive range of contiguous bits in mask
+ for (finish = mask->next(start), previous = start;
+ finish == previous + 1 && finish != mask->end();
+ finish = mask->next(finish)) {
+ previous = finish;
+ }
+
+ // The first range does not need a comma printed before it, but the rest
+ // of the ranges do need a comma beforehand
+ if (!first_range) {
+ __kmp_str_buf_print(buf, "%s", ",");
+ } else {
+ first_range = false;
+ }
+ // Range with three or more contiguous bits in the affinity mask
+ if (previous - start > 1) {
+ __kmp_str_buf_print(buf, "%d-%d", static_cast<int>(start),
+ static_cast<int>(previous));
+ } else {
+ // Range with one or two contiguous bits in the affinity mask
+ __kmp_str_buf_print(buf, "%d", static_cast<int>(start));
+ if (previous - start > 0) {
+ __kmp_str_buf_print(buf, ",%d", static_cast<int>(previous));
+ }
+ }
+ // Start over with new start point
+ start = finish;
+ if (start == mask->end())
+ break;
+ }
+ return buf;
+}
void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
KMP_CPU_ZERO(mask);
diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h
index cb1a7e3..e62508a 100644
--- a/runtime/src/kmp_affinity.h
+++ b/runtime/src/kmp_affinity.h
@@ -376,26 +376,26 @@ class KMPNativeAffinity : public KMPAffinity {
mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
}
void zero() override {
- for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = 0;
}
void copy(const KMPAffinity::Mask *src) override {
const Mask *convert = static_cast<const Mask *>(src);
- for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = convert->mask[i];
}
void bitwise_and(const KMPAffinity::Mask *rhs) override {
const Mask *convert = static_cast<const Mask *>(rhs);
- for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] &= convert->mask[i];
}
void bitwise_or(const KMPAffinity::Mask *rhs) override {
const Mask *convert = static_cast<const Mask *>(rhs);
- for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] |= convert->mask[i];
}
void bitwise_not() override {
- for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
+ for (int i = 0; i < __kmp_num_proc_groups; ++i)
mask[i] = ~(mask[i]);
}
int begin() const override {
diff --git a/runtime/src/kmp_atomic.h b/runtime/src/kmp_atomic.h
index 3b75a6b..288916c 100644
--- a/runtime/src/kmp_atomic.h
+++ b/runtime/src/kmp_atomic.h
@@ -364,7 +364,7 @@ static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck,
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
- ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (omp_wait_id_t)lck,
+ ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (ompt_wait_id_t)lck,
OMPT_GET_RETURN_ADDRESS(0));
}
#endif
@@ -374,7 +374,7 @@ static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck,
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_atomic, (omp_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
+ ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
@@ -390,7 +390,7 @@ static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck,
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_atomic, (omp_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
+ ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0));
}
#endif
}
diff --git a/runtime/src/kmp_barrier.cpp b/runtime/src/kmp_barrier.cpp
index 2b78b54..79b6bf3 100644
--- a/runtime/src/kmp_barrier.cpp
+++ b/runtime/src/kmp_barrier.cpp
@@ -1253,7 +1253,7 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
// It is OK to report the barrier state after the barrier begin callback.
// According to the OMPT specification, a compliant implementation may
// even delay reporting this state until the barrier begins to wait.
- this_thr->th.ompt_thread_info.state = omp_state_wait_barrier;
+ this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier;
}
#endif
@@ -1502,7 +1502,7 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split,
my_task_data, return_address);
}
#endif
- this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
}
#endif
ANNOTATE_BARRIER_END(&team->t.t_bar);
@@ -1624,7 +1624,7 @@ void __kmp_join_barrier(int gtid) {
if (!KMP_MASTER_TID(ds_tid))
this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr);
#endif
- this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit;
+ this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit;
}
#endif
@@ -1698,6 +1698,11 @@ void __kmp_join_barrier(int gtid) {
if (__kmp_tasking_mode != tskm_immediate_exec) {
__kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj));
}
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity) {
+ KMP_CHECK_UPDATE(team->t.t_display_affinity, 0);
+ }
+#endif
#if KMP_STATS_ENABLED
// Have master thread flag the workers to indicate they are now waiting for
// next parallel region, Also wake them up so they switch their timers to
@@ -1882,12 +1887,12 @@ void __kmp_fork_barrier(int gtid, int tid) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+ this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = (team)
? OMPT_CUR_TASK_DATA(this_thr)
: &(this_thr->th.ompt_thread_info.task_data);
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
if (KMP_MASTER_TID(ds_tid) &&
@@ -1985,6 +1990,19 @@ void __kmp_fork_barrier(int gtid, int tid) {
}
#endif
#if OMP_50_ENABLED
+ // Perform the display affinity functionality
+ if (__kmp_display_affinity) {
+ if (team->t.t_display_affinity
+#if KMP_AFFINITY_SUPPORTED
+ || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed)
+#endif
+ ) {
+ // NULL means use the affinity-format-var ICV
+ __kmp_aux_display_affinity(gtid, NULL);
+ this_thr->th.th_prev_num_threads = team->t.t_nproc;
+ this_thr->th.th_prev_level = team->t.t_level;
+ }
+ }
if (!KMP_MASTER_TID(tid))
KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator);
#endif
diff --git a/runtime/src/kmp_config.h.cmake b/runtime/src/kmp_config.h.cmake
index 6b778ea..c9ebbc0 100644
--- a/runtime/src/kmp_config.h.cmake
+++ b/runtime/src/kmp_config.h.cmake
@@ -74,6 +74,8 @@
#if LIBOMP_TSAN_SUPPORT
#define TSAN_SUPPORT
#endif
+#cmakedefine01 MSVC
+#define KMP_MSVC_COMPAT MSVC
// Configured cache line based on architecture
#if KMP_ARCH_PPC64
diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp
index ac76794..4c62720 100644
--- a/runtime/src/kmp_csupport.cpp
+++ b/runtime/src/kmp_csupport.cpp
@@ -293,7 +293,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
va_start(ap, microtask);
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
kmp_info_t *master_th = __kmp_threads[gtid];
kmp_team_t *parent_team = master_th->th.th_team;
@@ -305,7 +305,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
ompt_frame = &(
parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
}
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -395,7 +395,7 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
int tid = __kmp_tid_from_gtid(gtid);
if (ompt_enabled.enabled) {
parent_team->t.t_implicit_task_taskdata[tid]
- .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
OMPT_STORE_RETURN_ADDRESS(gtid);
#endif
@@ -506,8 +506,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state != omp_state_overhead) {
- OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL;
+ this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
+ OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
@@ -524,7 +524,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid));
}
__ompt_lw_taskteam_unlink(this_thr);
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
@@ -606,8 +606,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled)
this_thr->th.ompt_thread_info.state =
- ((this_thr->th.th_team_serialized) ? omp_state_work_serial
- : omp_state_work_parallel);
+ ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
+ : ompt_state_work_parallel);
#endif
}
@@ -705,11 +705,11 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
}
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -724,7 +724,7 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
@@ -843,22 +843,22 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
kmp_team_t *team;
- omp_wait_id_t lck;
+ ompt_wait_id_t lck;
void *codeptr_ra;
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
team = __kmp_team_from_gtid(gtid);
- lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value;
+ lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value;
/* OMPT state update */
th->th.ompt_thread_info.wait_id = lck;
- th->th.ompt_thread_info.state = omp_state_wait_ordered;
+ th->th.ompt_thread_info.state = ompt_state_wait_ordered;
/* OMPT event callback */
codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin,
- (omp_wait_id_t)lck, codeptr_ra);
+ (ompt_wait_id_t)lck, codeptr_ra);
}
}
#endif
@@ -871,13 +871,13 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
/* OMPT state update */
- th->th.ompt_thread_info.state = omp_state_work_parallel;
+ th->th.ompt_thread_info.state = ompt_state_work_parallel;
th->th.ompt_thread_info.wait_id = 0;
/* OMPT event callback */
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra);
+ ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra);
}
}
#endif
@@ -917,7 +917,7 @@ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
ompt_mutex_ordered,
- (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
+ (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
@@ -1144,7 +1144,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
#else
KMP_COUNT_BLOCK(OMP_CRITICAL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
- omp_state_t prev_state = omp_state_undefined;
+ ompt_state_t prev_state = ompt_state_undefined;
ompt_thread_info_t ti;
#endif
kmp_user_lock_p lck;
@@ -1188,15 +1188,15 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
/* OMPT state update */
prev_state = ti.state;
- ti.wait_id = (omp_wait_id_t)lck;
- ti.state = omp_state_wait_critical;
+ ti.wait_id = (ompt_wait_id_t)lck;
+ ti.state = ompt_state_wait_critical;
/* OMPT event callback */
codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (omp_wait_id_t)crit, codeptr_ra);
+ (ompt_wait_id_t)crit, codeptr_ra);
}
}
#endif
@@ -1216,7 +1216,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
/* OMPT event callback */
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra);
+ ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra);
}
}
#endif
@@ -1292,7 +1292,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
return kmp_mutex_impl_speculative;
#endif
default:
- return ompt_mutex_impl_unknown;
+ return ompt_mutex_impl_none;
}
ilock = KMP_LOOKUP_I_LOCK(user_lock);
}
@@ -1316,7 +1316,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
case locktag_nested_drdpa:
return kmp_mutex_impl_queuing;
default:
- return ompt_mutex_impl_unknown;
+ return ompt_mutex_impl_none;
}
}
#else
@@ -1339,7 +1339,7 @@ static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
return kmp_mutex_impl_speculative;
#endif
default:
- return ompt_mutex_impl_unknown;
+ return ompt_mutex_impl_none;
}
}
#endif // KMP_USE_DYNAMIC_LOCK
@@ -1363,7 +1363,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
KMP_COUNT_BLOCK(OMP_CRITICAL);
kmp_user_lock_p lck;
#if OMPT_SUPPORT && OMPT_OPTIONAL
- omp_state_t prev_state = omp_state_undefined;
+ ompt_state_t prev_state = ompt_state_undefined;
ompt_thread_info_t ti;
// This is the case, if called from __kmpc_critical:
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
@@ -1402,14 +1402,14 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
/* OMPT state update */
prev_state = ti.state;
- ti.wait_id = (omp_wait_id_t)lck;
- ti.state = omp_state_wait_critical;
+ ti.wait_id = (ompt_wait_id_t)lck;
+ ti.state = ompt_state_wait_critical;
/* OMPT event callback */
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_critical, (unsigned int)hint,
- __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr);
+ __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr);
}
}
#endif
@@ -1440,14 +1440,14 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
ti = __kmp_threads[global_tid]->th.ompt_thread_info;
/* OMPT state update */
prev_state = ti.state;
- ti.wait_id = (omp_wait_id_t)lck;
- ti.state = omp_state_wait_critical;
+ ti.wait_id = (ompt_wait_id_t)lck;
+ ti.state = ompt_state_wait_critical;
/* OMPT event callback */
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_critical, (unsigned int)hint,
- __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr);
+ __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr);
}
}
#endif
@@ -1467,7 +1467,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
/* OMPT event callback */
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_critical, (omp_wait_id_t)crit, codeptr);
+ ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr);
}
}
#endif
@@ -1565,7 +1565,7 @@ void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
OMPT_STORE_RETURN_ADDRESS(global_tid);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
+ ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0));
}
#endif
@@ -1594,11 +1594,11 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
__kmp_check_barrier(global_tid, ct_barrier, loc);
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -1608,7 +1608,7 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -1656,11 +1656,11 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
}
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -1670,7 +1670,7 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -1867,6 +1867,59 @@ int ompc_get_team_size(int level) {
return __kmp_get_team_size(__kmp_entry_gtid(), level);
}
+#if OMP_50_ENABLED
+/* OpenMP 5.0 Affinity Format API */
+
+void ompc_set_affinity_format(char const *format) {
+ if (!__kmp_init_serial) {
+ __kmp_serial_initialize();
+ }
+ __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
+ format, KMP_STRLEN(format) + 1);
+}
+
+size_t ompc_get_affinity_format(char *buffer, size_t size) {
+ size_t format_size;
+ if (!__kmp_init_serial) {
+ __kmp_serial_initialize();
+ }
+ format_size = KMP_STRLEN(__kmp_affinity_format);
+ if (buffer && size) {
+ __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
+ format_size + 1);
+ }
+ return format_size;
+}
+
+void ompc_display_affinity(char const *format) {
+ int gtid;
+ if (!TCR_4(__kmp_init_middle)) {
+ __kmp_middle_initialize();
+ }
+ gtid = __kmp_get_gtid();
+ __kmp_aux_display_affinity(gtid, format);
+}
+
+size_t ompc_capture_affinity(char *buffer, size_t buf_size,
+ char const *format) {
+ int gtid;
+ size_t num_required;
+ kmp_str_buf_t capture_buf;
+ if (!TCR_4(__kmp_init_middle)) {
+ __kmp_middle_initialize();
+ }
+ gtid = __kmp_get_gtid();
+ __kmp_str_buf_init(&capture_buf);
+ num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
+ if (buffer && buf_size) {
+ __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
+ capture_buf.used + 1);
+ }
+ __kmp_str_buf_free(&capture_buf);
+ return num_required;
+}
+#endif /* OMP_50_ENABLED */
+
void kmpc_set_stacksize(int arg) {
// __kmp_aux_set_stacksize initializes the library if needed
__kmp_aux_set_stacksize(arg);
@@ -2006,11 +2059,11 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
*data_ptr = cpy_data;
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -2038,7 +2091,7 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
@@ -2136,7 +2189,7 @@ void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_lock, (omp_lock_hint_t)hint,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -2160,7 +2213,7 @@ void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -2186,7 +2239,7 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -2229,7 +2282,7 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (omp_wait_id_t)user_lock, codeptr);
+ (ompt_wait_id_t)user_lock, codeptr);
}
#endif
@@ -2258,7 +2311,7 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -2304,7 +2357,7 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_lock_init) {
ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (omp_wait_id_t)user_lock, codeptr);
+ (ompt_wait_id_t)user_lock, codeptr);
}
#endif
@@ -2340,7 +2393,7 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
lck = (kmp_user_lock_p)user_lock;
}
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
@@ -2368,7 +2421,7 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
@@ -2408,7 +2461,7 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
@@ -2440,7 +2493,7 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_lock_destroy) {
ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
- ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
@@ -2485,7 +2538,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -2507,7 +2560,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
@@ -2540,7 +2593,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (omp_wait_id_t)lck, codeptr);
+ (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -2553,7 +2606,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -2575,7 +2628,7 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
}
@@ -2593,13 +2646,13 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
// lock_first
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
// lock_next
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
+ ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
}
}
}
@@ -2637,7 +2690,7 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
+ __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
}
}
#endif
@@ -2654,13 +2707,13 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
// lock_first
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
// lock_next
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_begin, (omp_wait_id_t)lck, codeptr);
+ ompt_scope_begin, (ompt_wait_id_t)lck, codeptr);
}
}
}
@@ -2696,7 +2749,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
@@ -2725,7 +2778,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -2757,7 +2810,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
codeptr = OMPT_GET_RETURN_ADDRESS(0);
if (ompt_enabled.ompt_callback_mutex_released) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -2785,12 +2838,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_released) {
// release_lock_last
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
}
} else if (ompt_enabled.ompt_callback_nest_lock) {
// release_lock_prev
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_end, (omp_wait_id_t)user_lock, codeptr);
+ ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr);
}
}
#endif
@@ -2834,12 +2887,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_released) {
// release_lock_last
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
}
} else if (ompt_enabled.ompt_callback_nest_lock) {
// release_lock_previous
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
}
}
#endif
@@ -2876,12 +2929,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_released) {
// release_lock_last
ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
- ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
}
} else if (ompt_enabled.ompt_callback_nest_lock) {
// release_lock_previous
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr);
}
}
#endif
@@ -2907,7 +2960,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -2930,7 +2983,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr);
}
#endif
return FTN_TRUE;
@@ -2971,7 +3024,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
- (omp_wait_id_t)lck, codeptr);
+ (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -2986,7 +3039,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -3012,7 +3065,7 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock,
+ __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock,
codeptr);
}
#endif
@@ -3030,13 +3083,13 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
// lock_first
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
// lock_next
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr);
+ ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr);
}
}
}
@@ -3077,7 +3130,7 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
ompt_enabled.ompt_callback_mutex_acquire) {
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
ompt_mutex_nest_lock, omp_lock_hint_none,
- __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr);
+ __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr);
}
#endif
@@ -3095,13 +3148,13 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
if (ompt_enabled.ompt_callback_mutex_acquired) {
// lock_first
ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
- ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr);
}
} else {
if (ompt_enabled.ompt_callback_nest_lock) {
// lock_next
ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
- ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr);
+ ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr);
}
}
}
@@ -3392,11 +3445,11 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
// JP: as long as there is a barrier in the implementation, OMPT should and
// will provide the barrier events
// so we set-up the necessary frame/return addresses.
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -3409,7 +3462,7 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
retval = (retval != 0) ? (0) : (1);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -3573,11 +3626,11 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
// this barrier should be visible to a customer and to the threading profile
// tool (it's a terminating barrier on constructs if NOWAIT not specified)
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -3591,7 +3644,7 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
retval = (retval != 0) ? (0) : (1);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -3659,11 +3712,11 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
// TODO: implicit barrier: should be exposed
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -3673,7 +3726,7 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -3683,11 +3736,11 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
// TODO: implicit barrier: should be exposed
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -3697,18 +3750,18 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
} else if (packed_reduction_method == atomic_reduce_block) {
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- if (ompt_frame->enter_frame == NULL)
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (ompt_frame->enter_frame.ptr == NULL)
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(global_tid);
}
#endif
@@ -3719,7 +3772,7 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
__kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
diff --git a/runtime/src/kmp_dispatch.cpp b/runtime/src/kmp_dispatch.cpp
index b4192df..1090e9d 100644
--- a/runtime/src/kmp_dispatch.cpp
+++ b/runtime/src/kmp_dispatch.cpp
@@ -24,7 +24,7 @@
#include "kmp_itt.h"
#include "kmp_stats.h"
#include "kmp_str.h"
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
#include <float.h>
#endif
#include "kmp_lock.h"
@@ -478,7 +478,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
/* commonly used term: (2 nproc - 1)/(2 nproc) */
DBL x;
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
/* Linux* OS already has 64-bit computation by default for long double,
and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On
Windows* OS on IA-32 architecture, we need to set precision to 64-bit
@@ -573,7 +573,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid,
pr->u.p.count = tc - __kmp_dispatch_guided_remaining(
tc, GUIDED_ANALYTICAL_WORKAROUND, cross) -
cross * chunk;
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
// restore FPCW
_control87(oldFpcw, _MCW_PC);
#endif
@@ -1625,7 +1625,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
case kmp_sch_guided_analytical_chunked: {
T chunkspec = pr->u.p.parm1;
UT chunkIdx;
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
/* for storing original FPCW value for Windows* OS on
IA-32 architecture 8-byte version */
unsigned int oldFpcw;
@@ -1662,7 +1662,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
Windows* OS.
This check works around the possible effect that init != 0 for chunkIdx == 0.
*/
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
/* If we haven't already done so, save original
FPCW and set precision to 64-bit, as Windows* OS
on IA-32 architecture defaults to 53-bit */
@@ -1690,7 +1690,7 @@ int __kmp_dispatch_next_algorithm(int gtid,
} // if
} // if
} // while (1)
-#if KMP_OS_WINDOWS && KMP_ARCH_X86
+#if KMP_USE_X87CONTROL
/* restore FPCW if necessary
AC: check fpcwSet flag first because oldFpcw can be uninitialized here
*/
diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h
index 6910c37..abf1892 100644
--- a/runtime/src/kmp_ftn_entry.h
+++ b/runtime/src/kmp_ftn_entry.h
@@ -21,6 +21,12 @@
#include "kmp_i18n.h"
+#if OMP_50_ENABLED
+// For affinity format functions
+#include "kmp_io.h"
+#include "kmp_str.h"
+#endif
+
#if OMPT_SUPPORT
#include "ompt-specific.h"
#endif
@@ -355,9 +361,9 @@ int FTN_STDCALL FTN_CONTROL_TOOL(int command, int modifier, void *arg) {
}
kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()];
ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
- parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
int ret = __kmp_control_tool(command, modifier, arg);
- parent_task_info->frame.enter_frame = 0;
+ parent_task_info->frame.enter_frame.ptr = 0;
return ret;
#endif
}
@@ -389,6 +395,137 @@ void FTN_STDCALL FTN_FREE(void *ptr, const omp_allocator_t *allocator) {
__kmpc_free(__kmp_entry_gtid(), ptr, allocator);
#endif
}
+
+/* OpenMP 5.0 affinity format support */
+
+#ifndef KMP_STUB
+static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size,
+ char const *csrc, size_t csrc_size) {
+ size_t capped_src_size = csrc_size;
+ if (csrc_size >= buf_size) {
+ capped_src_size = buf_size - 1;
+ }
+ KMP_STRNCPY_S(buffer, buf_size, csrc, capped_src_size);
+ if (csrc_size >= buf_size) {
+ KMP_DEBUG_ASSERT(buffer[buf_size - 1] == '\0');
+ buffer[buf_size - 1] = csrc[buf_size - 1];
+ } else {
+ for (size_t i = csrc_size; i < buf_size; ++i)
+ buffer[i] = ' ';
+ }
+}
+
+// Convert a Fortran string to a C string by adding null byte
+class ConvertedString {
+ char *buf;
+ kmp_info_t *th;
+
+public:
+ ConvertedString(char const *fortran_str, size_t size) {
+ th = __kmp_get_thread();
+ buf = (char *)__kmp_thread_malloc(th, size + 1);
+ KMP_STRNCPY_S(buf, size + 1, fortran_str, size);
+ buf[size] = '\0';
+ }
+ ~ConvertedString() { __kmp_thread_free(th, buf); }
+ const char *get() const { return buf; }
+};
+#endif // KMP_STUB
+
+/*
+ * Set the value of the affinity-format-var ICV on the current device to the
+ * format specified in the argument.
+*/
+void FTN_STDCALL FTN_SET_AFFINITY_FORMAT(char const *format, size_t size) {
+#ifdef KMP_STUB
+ return;
+#else
+ if (!__kmp_init_serial) {
+ __kmp_serial_initialize();
+ }
+ ConvertedString cformat(format, size);
+ // Since the __kmp_affinity_format variable is a C string, do not
+ // use the fortran strncpy function
+ __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
+ cformat.get(), KMP_STRLEN(cformat.get()));
+#endif
+}
+
+/*
+ * Returns the number of characters required to hold the entire affinity format
+ * specification (not including null byte character) and writes the value of the
+ * affinity-format-var ICV on the current device to buffer. If the return value
+ * is larger than size, the affinity format specification is truncated.
+*/
+size_t FTN_STDCALL FTN_GET_AFFINITY_FORMAT(char *buffer, size_t size) {
+#ifdef KMP_STUB
+ return 0;
+#else
+ size_t format_size;
+ if (!__kmp_init_serial) {
+ __kmp_serial_initialize();
+ }
+ format_size = KMP_STRLEN(__kmp_affinity_format);
+ if (buffer && size) {
+ __kmp_fortran_strncpy_truncate(buffer, size, __kmp_affinity_format,
+ format_size);
+ }
+ return format_size;
+#endif
+}
+
+/*
+ * Prints the thread affinity information of the current thread in the format
+ * specified by the format argument. If the format is NULL or a zero-length
+ * string, the value of the affinity-format-var ICV is used.
+*/
+void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) {
+#ifdef KMP_STUB
+ return;
+#else
+ int gtid;
+ if (!TCR_4(__kmp_init_middle)) {
+ __kmp_middle_initialize();
+ }
+ gtid = __kmp_get_gtid();
+ ConvertedString cformat(format, size);
+ __kmp_aux_display_affinity(gtid, cformat.get());
+#endif
+}
+
+/*
+ * Returns the number of characters required to hold the entire affinity format
+ * specification (not including null byte) and prints the thread affinity
+ * information of the current thread into the character string buffer with the
+ * size of size in the format specified by the format argument. If the format is
+ * NULL or a zero-length string, the value of the affinity-format-var ICV is
+ * used. The buffer must be allocated prior to calling the routine. If the
+ * return value is larger than size, the affinity format specification is
+ * truncated.
+*/
+size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format,
+ size_t buf_size, size_t for_size) {
+#if defined(KMP_STUB)
+ return 0;
+#else
+ int gtid;
+ size_t num_required;
+ kmp_str_buf_t capture_buf;
+ if (!TCR_4(__kmp_init_middle)) {
+ __kmp_middle_initialize();
+ }
+ gtid = __kmp_get_gtid();
+ __kmp_str_buf_init(&capture_buf);
+ ConvertedString cformat(format, for_size);
+ num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf);
+ if (buffer && buf_size) {
+ __kmp_fortran_strncpy_truncate(buffer, buf_size, capture_buf.str,
+ capture_buf.used);
+ }
+ __kmp_str_buf_free(&capture_buf);
+ return num_required;
+#endif
+}
#endif /* OMP_50_ENABLED */
int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) {
@@ -397,7 +534,8 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) {
#else
int gtid;
-#if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_HURD
gtid = __kmp_entry_gtid();
#elif KMP_OS_WINDOWS
if (!__kmp_init_parallel ||
@@ -777,34 +915,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_TEAMS)(void) {
#ifdef KMP_STUB
return 1;
#else
- kmp_info_t *thr = __kmp_entry_thread();
- if (thr->th.th_teams_microtask) {
- kmp_team_t *team = thr->th.th_team;
- int tlevel = thr->th.th_teams_level;
- int ii = team->t.t_level; // the level of the teams construct
- int dd = team->t.t_serialized;
- int level = tlevel + 1;
- KMP_DEBUG_ASSERT(ii >= tlevel);
- while (ii > level) {
- for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
- }
- if (team->t.t_serialized && (!dd)) {
- team = team->t.t_parent;
- continue;
- }
- if (ii > level) {
- team = team->t.t_parent;
- ii--;
- }
- }
- if (dd > 1) {
- return 1; // teams region is serialized ( 1 team of 1 thread ).
- } else {
- return team->t.t_parent->t.t_nproc;
- }
- } else {
- return 1;
- }
+ return __kmp_aux_get_num_teams();
#endif
}
@@ -812,34 +923,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_TEAM_NUM)(void) {
#ifdef KMP_STUB
return 0;
#else
- kmp_info_t *thr = __kmp_entry_thread();
- if (thr->th.th_teams_microtask) {
- kmp_team_t *team = thr->th.th_team;
- int tlevel = thr->th.th_teams_level; // the level of the teams construct
- int ii = team->t.t_level;
- int dd = team->t.t_serialized;
- int level = tlevel + 1;
- KMP_DEBUG_ASSERT(ii >= tlevel);
- while (ii > level) {
- for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) {
- }
- if (team->t.t_serialized && (!dd)) {
- team = team->t.t_parent;
- continue;
- }
- if (ii > level) {
- team = team->t.t_parent;
- ii--;
- }
- }
- if (dd > 1) {
- return 0; // teams region is serialized ( 1 team of 1 thread ).
- } else {
- return team->t.t_master_tid;
- }
- } else {
- return 0;
- }
+ return __kmp_aux_get_team_num();
#endif
}
diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h
index 5d0aaa2..47188fc 100644
--- a/runtime/src/kmp_ftn_os.h
+++ b/runtime/src/kmp_ftn_os.h
@@ -139,6 +139,10 @@
#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator
#define FTN_ALLOC omp_alloc
#define FTN_FREE omp_free
+#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format
+#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format
+#define FTN_DISPLAY_AFFINITY omp_display_affinity
+#define FTN_CAPTURE_AFFINITY omp_capture_affinity
#endif
#endif /* KMP_FTN_PLAIN */
@@ -265,6 +269,10 @@
#define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_
#define FTN_ALLOC omp_alloc_
#define FTN_FREE omp_free_
+#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_
+#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_
+#define FTN_DISPLAY_AFFINITY omp_display_affinity_
+#define FTN_CAPTURE_AFFINITY omp_capture_affinity_
#endif
#endif /* KMP_FTN_APPEND */
@@ -391,6 +399,10 @@
#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR
#define FTN_ALLOC OMP_ALLOC
#define FTN_FREE OMP_FREE
+#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT
+#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT
+#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY
+#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY
#endif
#endif /* KMP_FTN_UPPER */
@@ -517,6 +529,10 @@
#define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_
#define FTN_ALLOC OMP_ALLOC_
#define FTN_FREE OMP_FREE_
+#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_
+#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_
+#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_
+#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY_
#endif
#endif /* KMP_FTN_UAPPEND */
diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp
index 34465de..ef8a116 100644
--- a/runtime/src/kmp_global.cpp
+++ b/runtime/src/kmp_global.cpp
@@ -282,6 +282,11 @@ kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0};
int __kmp_affinity_num_places = 0;
#endif
+#if OMP_50_ENABLED
+int __kmp_display_affinity = FALSE;
+char *__kmp_affinity_format = NULL;
+#endif // OMP_50_ENABLED
+
kmp_hws_item_t __kmp_hws_socket = {0, 0};
kmp_hws_item_t __kmp_hws_node = {0, 0};
kmp_hws_item_t __kmp_hws_tile = {0, 0};
diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp
index e218018..646d75d 100644
--- a/runtime/src/kmp_gsupport.cpp
+++ b/runtime/src/kmp_gsupport.cpp
@@ -32,17 +32,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) {
MKLOC(loc, "GOMP_barrier");
KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid));
#if OMPT_SUPPORT && OMPT_OPTIONAL
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmpc_barrier(&loc, gtid);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
@@ -178,10 +178,10 @@ void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
// and for all other threads to reach this point.
#if OMPT_SUPPORT && OMPT_OPTIONAL
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -198,7 +198,7 @@ void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) {
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
return retval;
@@ -214,10 +214,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
// propagated to all threads before trying to reuse the t_copypriv_data field.
__kmp_team_from_gtid(gtid)->t.t_copypriv_data = data;
#if OMPT_SUPPORT && OMPT_OPTIONAL
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -230,7 +230,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) {
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
}
@@ -284,8 +284,8 @@ static
void *data) {
#if OMPT_SUPPORT
kmp_info_t *thr;
- omp_frame_t *ompt_frame;
- omp_state_t enclosing_state;
+ ompt_frame_t *ompt_frame;
+ ompt_state_t enclosing_state;
if (ompt_enabled.enabled) {
// get pointer to thread data structure
@@ -293,11 +293,11 @@ static
// save enclosing task state; set current state for task
enclosing_state = thr->th.ompt_thread_info.state;
- thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ thr->th.ompt_thread_info.state = ompt_state_work_parallel;
// set task frame
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+ ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
@@ -306,7 +306,7 @@ static
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
// clear task frame
- ompt_frame->exit_frame = NULL;
+ ompt_frame->exit_frame = ompt_data_none;
// restore enclosing state
thr->th.ompt_thread_info.state = enclosing_state;
@@ -331,18 +331,18 @@ static
#if OMPT_SUPPORT
kmp_info_t *thr;
- omp_frame_t *ompt_frame;
- omp_state_t enclosing_state;
+ ompt_frame_t *ompt_frame;
+ ompt_state_t enclosing_state;
if (ompt_enabled.enabled) {
thr = __kmp_threads[*gtid];
// save enclosing task state; set current state for task
enclosing_state = thr->th.ompt_thread_info.state;
- thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ thr->th.ompt_thread_info.state = ompt_state_work_parallel;
// set task frame
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+ ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
@@ -352,7 +352,7 @@ static
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
// clear task frame
- ompt_frame->exit_frame = NULL;
+ ompt_frame->exit_frame = ompt_data_none;
// reset enclosing state
thr->th.ompt_thread_info.state = enclosing_state;
@@ -403,7 +403,7 @@ static
&(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid));
task_info->thread_num = __kmp_tid_from_gtid(gtid);
}
- thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ thr->th.ompt_thread_info.state = ompt_state_work_parallel;
}
#endif
}
@@ -422,11 +422,11 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT
- omp_frame_t *parent_frame, *frame;
+ ompt_frame_t *parent_frame, *frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
- parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -448,7 +448,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *),
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL);
- frame->exit_frame = OMPT_GET_FRAME_ADDRESS(1);
+ frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
}
@@ -471,7 +471,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) {
// Implicit task is finished here, in the barrier we might schedule
// deferred tasks,
// these don't see the implicit task on the stack
- OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = NULL;
+ OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none;
}
#endif
@@ -764,17 +764,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) {
KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid))
#if OMPT_SUPPORT && OMPT_OPTIONAL
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -1075,16 +1075,16 @@ LOOP_DOACROSS_RUNTIME_START_ULL(
#if OMPT_SUPPORT && OMPT_OPTIONAL
#define OMPT_LOOP_PRE() \
- omp_frame_t *parent_frame; \
+ ompt_frame_t *parent_frame; \
if (ompt_enabled.enabled) { \
__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \
- parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); \
+ parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \
OMPT_STORE_RETURN_ADDRESS(gtid); \
}
#define OMPT_LOOP_POST() \
if (ompt_enabled.enabled) { \
- parent_frame->enter_frame = NULL; \
+ parent_frame->enter_frame = ompt_data_none; \
}
#else
@@ -1164,7 +1164,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
current_task = __kmp_threads[gtid]->th.th_current_task;
- current_task->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
@@ -1198,8 +1198,8 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
taskdata = KMP_TASK_TO_TASKDATA(task);
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
- thread->th.ompt_thread_info.state = omp_state_work_parallel;
- taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+ thread->th.ompt_thread_info.state = ompt_state_work_parallel;
+ taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -1211,13 +1211,13 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data,
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
thread->th.ompt_thread_info = oldInfo;
- taskdata->ompt_task_info.frame.exit_frame = NULL;
+ taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
}
#endif
}
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- current_task->ompt_task_info.frame.enter_frame = NULL;
+ current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
@@ -1302,11 +1302,11 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
int gtid = __kmp_entry_gtid();
#if OMPT_SUPPORT
- omp_frame_t *parent_frame;
+ ompt_frame_t *parent_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL);
- parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -1328,7 +1328,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)(
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- parent_frame->enter_frame = NULL;
+ parent_frame->enter_frame = ompt_data_none;
}
#endif
@@ -1342,17 +1342,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) {
KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid))
#if OMPT_SUPPORT
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
if (ompt_enabled.enabled) {
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
__kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
}
#endif
@@ -1383,7 +1383,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
ompt_task_info_t *parent_task_info, *task_info;
if (ompt_enabled.enabled) {
parent_task_info = __ompt_get_task_info_object(0);
- parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
OMPT_STORE_RETURN_ADDRESS(gtid);
}
#endif
@@ -1403,7 +1403,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
task_info = __ompt_get_task_info_object(0);
- task_info->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+ task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
task(data);
@@ -1415,8 +1415,8 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *),
KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)();
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- task_info->frame.exit_frame = NULL;
- parent_task_info->frame.enter_frame = NULL;
+ task_info->frame.exit_frame = ompt_data_none;
+ parent_task_info->frame.enter_frame = ompt_data_none;
}
#endif
}
diff --git a/runtime/src/kmp_io.cpp b/runtime/src/kmp_io.cpp
index 4f58ea0..24c6e72 100644
--- a/runtime/src/kmp_io.cpp
+++ b/runtime/src/kmp_io.cpp
@@ -27,11 +27,15 @@
#include "kmp_str.h"
#if KMP_OS_WINDOWS
+#if KMP_MSVC_COMPAT
#pragma warning(push)
#pragma warning(disable : 271 310)
+#endif
#include <windows.h>
+#if KMP_MSVC_COMPAT
#pragma warning(pop)
#endif
+#endif
/* ------------------------------------------------------------------------ */
@@ -42,10 +46,7 @@ kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER(
#if KMP_OS_WINDOWS
-#ifdef KMP_DEBUG
-/* __kmp_stdout is used only for dev build */
static HANDLE __kmp_stdout = NULL;
-#endif
static HANDLE __kmp_stderr = NULL;
static int __kmp_console_exists = FALSE;
static kmp_str_buf_t __kmp_console_buf;
@@ -72,10 +73,7 @@ void __kmp_close_console(void) {
/* wait until user presses return before closing window */
/* TODO only close if a window was opened */
if (__kmp_console_exists) {
-#ifdef KMP_DEBUG
- /* standard out is used only in dev build */
__kmp_stdout = NULL;
-#endif
__kmp_stderr = NULL;
__kmp_str_buf_free(&__kmp_console_buf);
__kmp_console_exists = FALSE;
@@ -88,21 +86,17 @@ static void __kmp_redirect_output(void) {
__kmp_acquire_bootstrap_lock(&__kmp_console_lock);
if (!__kmp_console_exists) {
-#ifdef KMP_DEBUG
- /* standard out is used only in dev build */
HANDLE ho;
-#endif
HANDLE he;
__kmp_str_buf_init(&__kmp_console_buf);
AllocConsole();
-// We do not check the result of AllocConsole because
-// 1. the call is harmless
-// 2. it is not clear how to communicate failue
-// 3. we will detect failure later when we get handle(s)
+ // We do not check the result of AllocConsole because
+ // 1. the call is harmless
+ // 2. it is not clear how to communicate failue
+ // 3. we will detect failure later when we get handle(s)
-#ifdef KMP_DEBUG
ho = GetStdHandle(STD_OUTPUT_HANDLE);
if (ho == INVALID_HANDLE_VALUE || ho == NULL) {
@@ -114,7 +108,6 @@ static void __kmp_redirect_output(void) {
__kmp_stdout = ho; // temporary code, need new global for ho
}
-#endif
he = GetStdHandle(STD_ERROR_HANDLE);
if (he == INVALID_HANDLE_VALUE || he == NULL) {
@@ -133,22 +126,22 @@ static void __kmp_redirect_output(void) {
#else
#define __kmp_stderr (stderr)
+#define __kmp_stdout (stdout)
#endif /* KMP_OS_WINDOWS */
-void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) {
+void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap) {
#if KMP_OS_WINDOWS
if (!__kmp_console_exists) {
__kmp_redirect_output();
}
- if (!__kmp_stderr && __kmp_io == kmp_err) {
+ if (!__kmp_stderr && out_stream == kmp_err) {
return;
}
-#ifdef KMP_DEBUG
- if (!__kmp_stdout && __kmp_io == kmp_out) {
+ if (!__kmp_stdout && out_stream == kmp_out) {
return;
}
-#endif
#endif /* KMP_OS_WINDOWS */
+ auto stream = ((out_stream == kmp_out) ? __kmp_stdout : __kmp_stderr);
if (__kmp_debug_buf && __kmp_debug_buffer != NULL) {
@@ -170,14 +163,14 @@ void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) {
"overflow; increase "
"KMP_DEBUG_BUF_CHARS to %d\n",
chars + 1);
- WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used,
- &count, NULL);
+ WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count,
+ NULL);
__kmp_str_buf_clear(&__kmp_console_buf);
#else
- fprintf(__kmp_stderr, "OMP warning: Debugging buffer overflow; "
- "increase KMP_DEBUG_BUF_CHARS to %d\n",
+ fprintf(stream, "OMP warning: Debugging buffer overflow; "
+ "increase KMP_DEBUG_BUF_CHARS to %d\n",
chars + 1);
- fflush(__kmp_stderr);
+ fflush(stream);
#endif
__kmp_debug_buf_warn_chars = chars + 1;
}
@@ -192,15 +185,15 @@ void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) {
__kmp_str_buf_print(&__kmp_console_buf, "pid=%d: ", (kmp_int32)getpid());
#endif
__kmp_str_buf_vprint(&__kmp_console_buf, format, ap);
- WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used,
- &count, NULL);
+ WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count,
+ NULL);
__kmp_str_buf_clear(&__kmp_console_buf);
#else
#ifdef KMP_DEBUG_PIDS
- fprintf(__kmp_stderr, "pid=%d: ", (kmp_int32)getpid());
+ fprintf(stream, "pid=%d: ", (kmp_int32)getpid());
#endif
- vfprintf(__kmp_stderr, format, ap);
- fflush(__kmp_stderr);
+ vfprintf(stream, format, ap);
+ fflush(stream);
#endif
}
}
@@ -224,3 +217,14 @@ void __kmp_printf_no_lock(char const *format, ...) {
va_end(ap);
}
+
+void __kmp_fprintf(enum kmp_io stream, char const *format, ...) {
+ va_list ap;
+ va_start(ap, format);
+
+ __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock);
+ __kmp_vprintf(stream, format, ap);
+ __kmp_release_bootstrap_lock(&__kmp_stdio_lock);
+
+ va_end(ap);
+}
diff --git a/runtime/src/kmp_io.h b/runtime/src/kmp_io.h
index 84ac67b..dac7a42 100644
--- a/runtime/src/kmp_io.h
+++ b/runtime/src/kmp_io.h
@@ -26,9 +26,10 @@ extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */
extern kmp_bootstrap_lock_t
__kmp_console_lock; /* Control console initialization */
-extern void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap);
+extern void __kmp_vprintf(enum kmp_io stream, char const *format, va_list ap);
extern void __kmp_printf(char const *format, ...);
extern void __kmp_printf_no_lock(char const *format, ...);
+extern void __kmp_fprintf(enum kmp_io stream, char const *format, ...);
extern void __kmp_close_console(void);
#ifdef __cplusplus
diff --git a/runtime/src/kmp_lock.cpp b/runtime/src/kmp_lock.cpp
index 16834c6..5c2eeed 100644
--- a/runtime/src/kmp_lock.cpp
+++ b/runtime/src/kmp_lock.cpp
@@ -1108,7 +1108,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
kmp_int32 need_mf = 1;
#if OMPT_SUPPORT
- omp_state_t prev_state = omp_state_undefined;
+ ompt_state_t prev_state = ompt_state_undefined;
#endif
KA_TRACE(1000,
@@ -1216,7 +1216,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
#endif
#if OMPT_SUPPORT
- if (ompt_enabled.enabled && prev_state != omp_state_undefined) {
+ if (ompt_enabled.enabled && prev_state != ompt_state_undefined) {
/* change the state before clearing wait_id */
this_thr->th.ompt_thread_info.state = prev_state;
this_thr->th.ompt_thread_info.wait_id = 0;
@@ -1231,11 +1231,11 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck,
}
#if OMPT_SUPPORT
- if (ompt_enabled.enabled && prev_state == omp_state_undefined) {
+ if (ompt_enabled.enabled && prev_state == ompt_state_undefined) {
/* this thread will spin; set wait_id before entering wait state */
prev_state = this_thr->th.ompt_thread_info.state;
this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck;
- this_thr->th.ompt_thread_info.state = omp_state_wait_lock;
+ this_thr->th.ompt_thread_info.state = ompt_state_wait_lock;
}
#endif
@@ -1716,7 +1716,9 @@ static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck,
/* RTM Adaptive locks */
-#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300
+#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \
+ (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \
+ (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT)
#include <immintrin.h>
#define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT)
@@ -3357,7 +3359,7 @@ static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) {
#endif
static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) {
- return lck == lck->lk.initialized;
+ return lck == lck->lk.self;
}
static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) {
diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h
index 220236d..6a88d7b 100644
--- a/runtime/src/kmp_lock.h
+++ b/runtime/src/kmp_lock.h
@@ -649,7 +649,7 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
} \
} \
if (lck->tas.lk.poll != 0 || \
- !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
+ !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(lck); \
KMP_INIT_YIELD(spins); \
@@ -659,8 +659,8 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck,
} else { \
KMP_YIELD_SPIN(spins); \
} \
- while (lck->tas.lk.poll != 0 || \
- !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
+ while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \
+ &lck->tas.lk.poll, 0, gtid + 1)) { \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
@@ -702,7 +702,7 @@ static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck,
}
}
return ((lck->tas.lk.poll == 0) &&
- __kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
+ __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
} else {
KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL);
return (*__kmp_test_user_lock_with_checks_)(lck, gtid);
@@ -767,7 +767,7 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
*depth = KMP_LOCK_ACQUIRED_NEXT; \
} else { \
if ((lck->tas.lk.poll != 0) || \
- !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
+ !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
kmp_uint32 spins; \
KMP_FSYNC_PREPARE(lck); \
KMP_INIT_YIELD(spins); \
@@ -777,8 +777,9 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck,
} else { \
KMP_YIELD_SPIN(spins); \
} \
- while ((lck->tas.lk.poll != 0) || \
- !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
+ while ( \
+ (lck->tas.lk.poll != 0) || \
+ !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \
if (TCR_4(__kmp_nth) > \
(__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
KMP_YIELD(TRUE); \
@@ -826,7 +827,7 @@ static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck,
return ++lck->tas.lk.depth_locked; /* same owner, depth increased */
}
retval = ((lck->tas.lk.poll == 0) &&
- __kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
+ __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1));
if (retval) {
KMP_MB();
lck->tas.lk.depth_locked = 1;
diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h
index 93743ad..3c2426b 100644
--- a/runtime/src/kmp_os.h
+++ b/runtime/src/kmp_os.h
@@ -86,9 +86,12 @@
128-bit extended precision type yet */
typedef long double _Quad;
#elif KMP_COMPILER_GCC
+/* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad */
+#if !KMP_OS_NETBSD
typedef __float128 _Quad;
#undef KMP_HAVE_QUAD
#define KMP_HAVE_QUAD 1
+#endif
#elif KMP_COMPILER_MSVC
typedef long double _Quad;
#endif
@@ -100,7 +103,9 @@ typedef long double _Quad;
#endif
#endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
+#define KMP_USE_X87CONTROL 0
#if KMP_OS_WINDOWS
+#define KMP_END_OF_LINE "\r\n"
typedef char kmp_int8;
typedef unsigned char kmp_uint8;
typedef short kmp_int16;
@@ -122,6 +127,10 @@ typedef struct kmp_struct64 kmp_int64;
typedef struct kmp_struct64 kmp_uint64;
/* Not sure what to use for KMP_[U]INT64_SPEC here */
#endif
+#if KMP_ARCH_X86 && KMP_MSVC_COMPAT
+#undef KMP_USE_X87CONTROL
+#define KMP_USE_X87CONTROL 1
+#endif
#if KMP_ARCH_X86_64
#define KMP_INTPTR 1
typedef __int64 kmp_intptr_t;
@@ -132,6 +141,7 @@ typedef unsigned __int64 kmp_uintptr_t;
#endif /* KMP_OS_WINDOWS */
#if KMP_OS_UNIX
+#define KMP_END_OF_LINE "\n"
typedef char kmp_int8;
typedef unsigned char kmp_uint8;
typedef short kmp_int16;
@@ -246,7 +256,7 @@ template <> struct traits_t<unsigned long long> {
#define KMP_EXPORT extern /* export declaration in guide libraries */
-#if __GNUC__ >= 4
+#if __GNUC__ >= 4 && !defined(__MINGW32__)
#define __forceinline __inline
#endif
@@ -296,7 +306,7 @@ extern "C" {
#define KMP_NORETURN __attribute__((noreturn))
#endif
-#if KMP_OS_WINDOWS
+#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
#define KMP_ALIGN(bytes) __declspec(align(bytes))
#define KMP_THREAD_LOCAL __declspec(thread)
#define KMP_ALIAS /* Nothing */
@@ -356,10 +366,12 @@ enum kmp_mem_fence_type {
#if KMP_ASM_INTRINS && KMP_OS_WINDOWS
+#if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG
#pragma intrinsic(InterlockedExchangeAdd)
#pragma intrinsic(InterlockedCompareExchange)
#pragma intrinsic(InterlockedExchange)
#pragma intrinsic(InterlockedExchange64)
+#endif
// Using InterlockedIncrement / InterlockedDecrement causes a library loading
// ordering problem, so we use InterlockedExchangeAdd instead.
diff --git a/runtime/src/kmp_platform.h b/runtime/src/kmp_platform.h
index 7610484..bb23de0 100644
--- a/runtime/src/kmp_platform.h
+++ b/runtime/src/kmp_platform.h
@@ -17,8 +17,10 @@
/* ---------------------- Operating system recognition ------------------- */
#define KMP_OS_LINUX 0
+#define KMP_OS_DRAGONFLY 0
#define KMP_OS_FREEBSD 0
#define KMP_OS_NETBSD 0
+#define KMP_OS_OPENBSD 0
#define KMP_OS_DARWIN 0
#define KMP_OS_WINDOWS 0
#define KMP_OS_CNK 0
@@ -45,6 +47,11 @@
#else
#endif
+#if (defined __DragonFly__)
+#undef KMP_OS_DRAGONFLY
+#define KMP_OS_DRAGONFLY 1
+#endif
+
#if (defined __FreeBSD__)
#undef KMP_OS_FREEBSD
#define KMP_OS_FREEBSD 1
@@ -55,6 +62,11 @@
#define KMP_OS_NETBSD 1
#endif
+#if (defined __OpenBSD__)
+#undef KMP_OS_OPENBSD
+#define KMP_OS_OPENBSD 1
+#endif
+
#if (defined __bgq__)
#undef KMP_OS_CNK
#define KMP_OS_CNK 1
@@ -66,12 +78,13 @@
#endif
#if (1 != \
- KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + \
- KMP_OS_WINDOWS + KMP_OS_HURD)
+ KMP_OS_LINUX + KMP_OS_DRAGONFLY + KMP_OS_FREEBSD + KMP_OS_NETBSD + \
+ KMP_OS_OPENBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS + KMP_OS_HURD)
#error Unknown OS
#endif
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_OPENBSD || KMP_OS_DARWIN || KMP_OS_HURD
#undef KMP_OS_UNIX
#define KMP_OS_UNIX 1
#endif
@@ -88,7 +101,7 @@
#define KMP_ARCH_MIPS64 0
#if KMP_OS_WINDOWS
-#if defined _M_AMD64
+#if defined(_M_AMD64) || defined(__x86_64)
#undef KMP_ARCH_X86_64
#define KMP_ARCH_X86_64 1
#else
diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp
index b861c06..3dd9ab6 100644
--- a/runtime/src/kmp_runtime.cpp
+++ b/runtime/src/kmp_runtime.cpp
@@ -1092,6 +1092,19 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team,
#endif
}
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && team->t.t_display_affinity != 1) {
+ for (i = 0; i < team->t.t_nproc; i++) {
+ kmp_info_t *thr = team->t.t_threads[i];
+ if (thr->th.th_prev_num_threads != team->t.t_nproc ||
+ thr->th.th_prev_level != team->t.t_level) {
+ team->t.t_display_affinity = 1;
+ break;
+ }
+ }
+ }
+#endif
+
KMP_MB();
}
@@ -1213,12 +1226,12 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
ompt_data_t *implicit_task_data;
void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state != omp_state_overhead) {
+ this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
ompt_task_info_t *parent_task_info;
parent_task_info = OMPT_CUR_TASK_INFO(this_thr);
- parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_parallel_begin) {
int team_size = 1;
@@ -1382,13 +1395,27 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq);
#endif
+#if OMP_50_ENABLED
+ // Perform the display affinity functionality for
+ // serialized parallel regions
+ if (__kmp_display_affinity) {
+ if (this_thr->th.th_prev_level != serial_team->t.t_level ||
+ this_thr->th.th_prev_num_threads != 1) {
+ // NULL means use the affinity-format-var ICV
+ __kmp_aux_display_affinity(global_tid, NULL);
+ this_thr->th.th_prev_level = serial_team->t.t_level;
+ this_thr->th.th_prev_num_threads = 1;
+ }
+ }
+#endif
+
if (__kmp_env_consistency_check)
__kmp_push_parallel(global_tid, NULL);
#if OMPT_SUPPORT
serial_team->t.ompt_team_info.master_return_address = codeptr;
if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state != omp_state_overhead) {
- OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
+ this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
+ OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
ompt_lw_taskteam_t lw_taskteam;
__ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid,
@@ -1408,8 +1435,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
}
/* OMPT state */
- this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
- OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1);
+ this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
+ OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
}
@@ -1478,7 +1505,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
ompt_data_t ompt_parallel_data = ompt_data_none;
ompt_data_t *parent_task_data;
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
ompt_data_t *implicit_task_data;
void *return_address = NULL;
@@ -1518,7 +1545,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
parent_task_data, ompt_frame, &ompt_parallel_data, team_size,
OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = omp_state_overhead;
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
@@ -1558,7 +1585,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
if (ompt_enabled.enabled) {
__ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid,
&ompt_parallel_data, return_address);
- exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame);
+ exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr);
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped
@@ -1574,7 +1601,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
/* OMPT state */
- master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1594,7 +1621,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
*exit_runtime_p = NULL;
if (ompt_enabled.enabled) {
- OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL;
+ OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none;
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_end, NULL, implicit_task_data, 1,
@@ -1607,7 +1634,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th),
OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = omp_state_overhead;
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
return TRUE;
@@ -1776,7 +1803,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
// don't use lw_taskteam after linking. content was swaped
task_info = OMPT_CUR_TASK_INFO(master_th);
- exit_runtime_p = &(task_info->frame.exit_frame);
+ exit_runtime_p = &(task_info->frame.exit_frame.ptr);
if (ompt_enabled.ompt_callback_implicit_task) {
ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th),
@@ -1786,7 +1813,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
/* OMPT state */
- master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1819,7 +1846,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
OMPT_CUR_TEAM_DATA(master_th), parent_task_data,
OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = omp_state_overhead;
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
} else if (microtask == (microtask_t)__kmp_teams_master) {
@@ -1874,7 +1901,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__ompt_lw_taskteam_link(&lw_taskteam, master_th, 0);
// don't use lw_taskteam after linking. content was swaped
task_info = OMPT_CUR_TASK_INFO(master_th);
- exit_runtime_p = &(task_info->frame.exit_frame);
+ exit_runtime_p = &(task_info->frame.exit_frame.ptr);
/* OMPT implicit task begin */
implicit_task_data = OMPT_CUR_TASK_DATA(master_th);
@@ -1887,7 +1914,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
}
/* OMPT state */
- master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
} else {
exit_runtime_p = &dummy;
}
@@ -1920,7 +1947,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
&ompt_parallel_data, parent_task_data,
OMPT_INVOKER(call_context), return_address);
}
- master_th->th.ompt_thread_info.state = omp_state_overhead;
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
#if OMP_40_ENABLED
@@ -1932,7 +1959,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
__ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data,
return_address);
- lwt.ompt_task_info.frame.exit_frame = NULL;
+ lwt.ompt_task_info.frame.exit_frame = ompt_data_none;
__ompt_lw_taskteam_link(&lwt, master_th, 1);
// don't use lw_taskteam after linking. content was swaped
#endif
@@ -1948,7 +1975,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid));
KMP_MB();
return FALSE;
- }
+ } // if (nthreads == 1)
// GEH: only modify the executing flag in the case when not serialized
// serialized case is handled in kmpc_serialized_parallel
@@ -2132,6 +2159,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
master_th->th.th_task_state_top++;
#if KMP_NESTED_HOT_TEAMS
if (master_th->th.th_hot_teams &&
+ active_level < __kmp_hot_teams_max_level &&
team == master_th->th.th_hot_teams[active_level].hot_team) {
// Restore master's nested state if nested hot team
master_th->th.th_task_state =
@@ -2195,7 +2223,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
&master_th->th.th_current_task->td_icvs, loc);
#if OMPT_SUPPORT
- master_th->th.ompt_thread_info.state = omp_state_work_parallel;
+ master_th->th.ompt_thread_info.state = ompt_state_work_parallel;
#endif
__kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
@@ -2276,7 +2304,7 @@ int __kmp_fork_call(ident_t *loc, int gtid,
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- master_th->th.ompt_thread_info.state = omp_state_overhead;
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
@@ -2288,8 +2316,8 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread,
kmp_team_t *team) {
// restore state outside the region
thread->th.ompt_thread_info.state =
- ((team->t.t_serialized) ? omp_state_work_serial
- : omp_state_work_parallel);
+ ((team->t.t_serialized) ? ompt_state_work_serial
+ : ompt_state_work_parallel);
}
static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
@@ -2302,7 +2330,7 @@ static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread,
codeptr);
}
- task_info->frame.enter_frame = NULL;
+ task_info->frame.enter_frame = ompt_data_none;
__kmp_join_restore_state(thread, team);
}
#endif
@@ -2337,7 +2365,7 @@ void __kmp_join_call(ident_t *loc, int gtid
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- master_th->th.ompt_thread_info.state = omp_state_overhead;
+ master_th->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
@@ -2516,7 +2544,7 @@ void __kmp_join_call(ident_t *loc, int gtid
OMPT_CUR_TASK_INFO(master_th)->thread_num);
}
- task_info->frame.exit_frame = NULL;
+ task_info->frame.exit_frame = ompt_data_none;
task_info->task_data = ompt_data_none;
}
#endif
@@ -2649,6 +2677,8 @@ void __kmp_set_num_threads(int new_nth, int gtid) {
KMP_COUNT_VALUE(OMP_set_numthreads, new_nth);
thread = __kmp_threads[gtid];
+ if (thread->th.th_current_task->td_icvs.nproc == new_nth)
+ return; // nothing to do
__kmp_save_internal_controls(thread);
@@ -3816,6 +3846,8 @@ int __kmp_register_root(int initial_thread) {
#endif /* KMP_AFFINITY_SUPPORTED */
#if OMP_50_ENABLED
root_thread->th.th_def_allocator = __kmp_def_allocator;
+ root_thread->th.th_prev_level = 0;
+ root_thread->th.th_prev_num_threads = 1;
#endif
__kmp_root_counter++;
@@ -3825,7 +3857,7 @@ int __kmp_register_root(int initial_thread) {
kmp_info_t *root_thread = ompt_get_thread();
- ompt_set_thread_state(root_thread, omp_state_overhead);
+ ompt_set_thread_state(root_thread, ompt_state_overhead);
if (ompt_enabled.ompt_callback_thread_begin) {
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
@@ -3839,7 +3871,7 @@ int __kmp_register_root(int initial_thread) {
// initial task has nothing to return to
}
- ompt_set_thread_state(root_thread, omp_state_work_serial);
+ ompt_set_thread_state(root_thread, ompt_state_work_serial);
}
#endif
@@ -3978,7 +4010,7 @@ void __kmp_unregister_root_current_thread(int gtid) {
if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) {
#if OMPT_SUPPORT
// the runtime is shutting down so we won't report any events
- thread->th.ompt_thread_info.state = omp_state_undefined;
+ thread->th.ompt_thread_info.state = ompt_state_undefined;
#endif
__kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL));
}
@@ -4357,6 +4389,8 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team,
#endif
#if OMP_50_ENABLED
new_thr->th.th_def_allocator = __kmp_def_allocator;
+ new_thr->th.th_prev_level = 0;
+ new_thr->th.th_prev_num_threads = 1;
#endif
TCW_4(new_thr->th.th_in_pool, FALSE);
@@ -4545,6 +4579,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = masters_place;
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && masters_place != th->th.th_current_place &&
+ team->t.t_display_affinity != 1) {
+ team->t.t_display_affinity = 1;
+ }
+#endif
KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@@ -4578,6 +4618,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && place != th->th.th_current_place &&
+ team->t.t_display_affinity != 1) {
+ team->t.t_display_affinity = 1;
+ }
+#endif
KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d "
"partition = [%d,%d]\n",
@@ -4599,6 +4645,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first_place;
th->th.th_last_place = last_place;
th->th.th_new_place = place;
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && place != th->th.th_current_place &&
+ team->t.t_display_affinity != 1) {
+ team->t.t_display_affinity = 1;
+ }
+#endif
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -4667,6 +4719,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = place;
th->th.th_new_place = place;
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && place != th->th.th_current_place &&
+ team->t.t_display_affinity != 1) {
+ team->t.t_display_affinity = 1;
+ }
+#endif
s_count = 1;
while (s_count < S) {
if (place == last_place) {
@@ -4758,7 +4816,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = first;
th->th.th_new_place = place;
th->th.th_last_place = last;
-
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && place != th->th.th_current_place &&
+ team->t.t_display_affinity != 1) {
+ team->t.t_display_affinity = 1;
+ }
+#endif
KA_TRACE(100,
("__kmp_partition_places: spread: T#%d(%d:%d) place %d "
"partition = [%d,%d], spacing = %.4f\n",
@@ -4787,6 +4850,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) {
th->th.th_first_place = place;
th->th.th_last_place = place;
th->th.th_new_place = place;
+#if OMP_50_ENABLED
+ if (__kmp_display_affinity && place != th->th.th_current_place &&
+ team->t.t_display_affinity != 1) {
+ team->t.t_display_affinity = 1;
+ }
+#endif
s_count++;
if ((s_count == S) && rem && (gap_ct == gap)) {
@@ -5601,7 +5670,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
thread_data = &(this_thr->th.ompt_thread_info.thread_data);
*thread_data = ompt_data_none;
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
this_thr->th.ompt_thread_info.wait_id = 0;
this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_thread_begin) {
@@ -5613,7 +5682,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- this_thr->th.ompt_thread_info.state = omp_state_idle;
+ this_thr->th.ompt_thread_info.state = ompt_state_idle;
}
#endif
/* This is the place where threads wait for work */
@@ -5629,7 +5698,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
@@ -5649,7 +5718,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- this_thr->th.ompt_thread_info.state = omp_state_work_parallel;
+ this_thr->th.ompt_thread_info.state = ompt_state_work_parallel;
}
#endif
@@ -5664,9 +5733,9 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) {
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
/* no frame set while outside task */
- __ompt_get_task_info_object(0)->frame.exit_frame = NULL;
+ __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none;
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
#endif
/* join barrier after parallel region */
@@ -6960,7 +7029,7 @@ int __kmp_invoke_task_func(int gtid) {
if (ompt_enabled.enabled) {
exit_runtime_p = &(
- team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame);
+ team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr);
} else {
exit_runtime_p = &dummy;
}
@@ -7200,10 +7269,10 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) {
__kmp_join_barrier(gtid); /* wait for everyone */
#if OMPT_SUPPORT
if (ompt_enabled.enabled &&
- this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) {
+ this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr);
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
if (KMP_MASTER_TID(ds_tid) &&
@@ -7407,6 +7476,12 @@ void __kmp_cleanup(void) {
__kmp_nested_proc_bind.bind_types = NULL;
__kmp_nested_proc_bind.size = 0;
__kmp_nested_proc_bind.used = 0;
+#if OMP_50_ENABLED
+ if (__kmp_affinity_format) {
+ KMP_INTERNAL_FREE(__kmp_affinity_format);
+ __kmp_affinity_format = NULL;
+ }
+#endif
__kmp_i18n_catclose();
@@ -7563,6 +7638,339 @@ void __kmp_aux_set_library(enum library_type arg) {
}
}
+/* Getting team information common for all team API */
+// Returns NULL if not in teams construct
+static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) {
+ kmp_info_t *thr = __kmp_entry_thread();
+ teams_serialized = 0;
+ if (thr->th.th_teams_microtask) {
+ kmp_team_t *team = thr->th.th_team;
+ int tlevel = thr->th.th_teams_level; // the level of the teams construct
+ int ii = team->t.t_level;
+ teams_serialized = team->t.t_serialized;
+ int level = tlevel + 1;
+ KMP_DEBUG_ASSERT(ii >= tlevel);
+ while (ii > level) {
+ for (teams_serialized = team->t.t_serialized;
+ (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) {
+ }
+ if (team->t.t_serialized && (!teams_serialized)) {
+ team = team->t.t_parent;
+ continue;
+ }
+ if (ii > level) {
+ team = team->t.t_parent;
+ ii--;
+ }
+ }
+ return team;
+ }
+ return NULL;
+}
+
+int __kmp_aux_get_team_num() {
+ int serialized;
+ kmp_team_t *team = __kmp_aux_get_team_info(serialized);
+ if (team) {
+ if (serialized > 1) {
+ return 0; // teams region is serialized ( 1 team of 1 thread ).
+ } else {
+ return team->t.t_master_tid;
+ }
+ }
+ return 0;
+}
+
+int __kmp_aux_get_num_teams() {
+ int serialized;
+ kmp_team_t *team = __kmp_aux_get_team_info(serialized);
+ if (team) {
+ if (serialized > 1) {
+ return 1;
+ } else {
+ return team->t.t_parent->t.t_nproc;
+ }
+ }
+ return 1;
+}
+
+/* ------------------------------------------------------------------------ */
+
+#if OMP_50_ENABLED
+/*
+ * Affinity Format Parser
+ *
+ * Field is in form of: %[[[0].]size]type
+ * % and type are required (%% means print a literal '%')
+ * type is either single char or long name surrounded by {},
+ * e.g., N or {num_threads}
+ * 0 => leading zeros
+ * . => right justified when size is specified
+ * by default output is left justified
+ * size is the *minimum* field length
+ * All other characters are printed as is
+ *
+ * Available field types:
+ * L {thread_level} - omp_get_level()
+ * n {thread_num} - omp_get_thread_num()
+ * h {host} - name of host machine
+ * P {process_id} - process id (integer)
+ * T {thread_identifier} - native thread identifier (integer)
+ * N {num_threads} - omp_get_num_threads()
+ * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1)
+ * a {thread_affinity} - comma separated list of integers or integer ranges
+ * (values of affinity mask)
+ *
+ * Implementation-specific field types can be added
+ * If a type is unknown, print "undefined"
+*/
+
+// Structure holding the short name, long name, and corresponding data type
+// for snprintf. A table of these will represent the entire valid keyword
+// field types.
+typedef struct kmp_affinity_format_field_t {
+ char short_name; // from spec e.g., L -> thread level
+ const char *long_name; // from spec thread_level -> thread level
+ char field_format; // data type for snprintf (typically 'd' or 's'
+ // for integer or string)
+} kmp_affinity_format_field_t;
+
+static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = {
+#if KMP_AFFINITY_SUPPORTED
+ {'A', "thread_affinity", 's'},
+#endif
+ {'t', "team_num", 'd'},
+ {'T', "num_teams", 'd'},
+ {'L', "nesting_level", 'd'},
+ {'n', "thread_num", 'd'},
+ {'N', "num_threads", 'd'},
+ {'a', "ancestor_tnum", 'd'},
+ {'H', "host", 's'},
+ {'P', "process_id", 'd'},
+ {'i', "native_thread_id", 'd'}};
+
+// Return the number of characters it takes to hold field
+static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th,
+ const char **ptr,
+ kmp_str_buf_t *field_buffer) {
+ int rc, format_index, field_value;
+ const char *width_left, *width_right;
+ bool pad_zeros, right_justify, parse_long_name, found_valid_name;
+ static const int FORMAT_SIZE = 20;
+ char format[FORMAT_SIZE] = {0};
+ char absolute_short_name = 0;
+
+ KMP_DEBUG_ASSERT(gtid >= 0);
+ KMP_DEBUG_ASSERT(th);
+ KMP_DEBUG_ASSERT(**ptr == '%');
+ KMP_DEBUG_ASSERT(field_buffer);
+
+ __kmp_str_buf_clear(field_buffer);
+
+ // Skip the initial %
+ (*ptr)++;
+
+ // Check for %% first
+ if (**ptr == '%') {
+ __kmp_str_buf_cat(field_buffer, "%", 1);
+ (*ptr)++; // skip over the second %
+ return 1;
+ }
+
+ // Parse field modifiers if they are present
+ pad_zeros = false;
+ if (**ptr == '0') {
+ pad_zeros = true;
+ (*ptr)++; // skip over 0
+ }
+ right_justify = false;
+ if (**ptr == '.') {
+ right_justify = true;
+ (*ptr)++; // skip over .
+ }
+ // Parse width of field: [width_left, width_right)
+ width_left = width_right = NULL;
+ if (**ptr >= '0' && **ptr <= '9') {
+ width_left = *ptr;
+ SKIP_DIGITS(*ptr);
+ width_right = *ptr;
+ }
+
+ // Create the format for KMP_SNPRINTF based on flags parsed above
+ format_index = 0;
+ format[format_index++] = '%';
+ if (!right_justify)
+ format[format_index++] = '-';
+ if (pad_zeros)
+ format[format_index++] = '0';
+ if (width_left && width_right) {
+ int i = 0;
+ // Only allow 8 digit number widths.
+ // This also prevents overflowing format variable
+ while (i < 8 && width_left < width_right) {
+ format[format_index++] = *width_left;
+ width_left++;
+ i++;
+ }
+ }
+
+ // Parse a name (long or short)
+ // Canonicalize the name into absolute_short_name
+ found_valid_name = false;
+ parse_long_name = (**ptr == '{');
+ if (parse_long_name)
+ (*ptr)++; // skip initial left brace
+ for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) /
+ sizeof(__kmp_affinity_format_table[0]);
+ ++i) {
+ char short_name = __kmp_affinity_format_table[i].short_name;
+ const char *long_name = __kmp_affinity_format_table[i].long_name;
+ char field_format = __kmp_affinity_format_table[i].field_format;
+ if (parse_long_name) {
+ int length = KMP_STRLEN(long_name);
+ if (strncmp(*ptr, long_name, length) == 0) {
+ found_valid_name = true;
+ (*ptr) += length; // skip the long name
+ }
+ } else if (**ptr == short_name) {
+ found_valid_name = true;
+ (*ptr)++; // skip the short name
+ }
+ if (found_valid_name) {
+ format[format_index++] = field_format;
+ format[format_index++] = '\0';
+ absolute_short_name = short_name;
+ break;
+ }
+ }
+ if (parse_long_name) {
+ if (**ptr != '}') {
+ absolute_short_name = 0;
+ } else {
+ (*ptr)++; // skip over the right brace
+ }
+ }
+
+ // Attempt to fill the buffer with the requested
+ // value using snprintf within __kmp_str_buf_print()
+ switch (absolute_short_name) {
+ case 't':
+ rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num());
+ break;
+ case 'T':
+ rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams());
+ break;
+ case 'L':
+ rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level);
+ break;
+ case 'n':
+ rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid));
+ break;
+ case 'H': {
+ static const int BUFFER_SIZE = 256;
+ char buf[BUFFER_SIZE];
+ __kmp_expand_host_name(buf, BUFFER_SIZE);
+ rc = __kmp_str_buf_print(field_buffer, format, buf);
+ } break;
+ case 'P':
+ rc = __kmp_str_buf_print(field_buffer, format, getpid());
+ break;
+ case 'i':
+ rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid());
+ break;
+ case 'N':
+ rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc);
+ break;
+ case 'a':
+ field_value =
+ __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1);
+ rc = __kmp_str_buf_print(field_buffer, format, field_value);
+ break;
+#if KMP_AFFINITY_SUPPORTED
+ case 'A': {
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask);
+ rc = __kmp_str_buf_print(field_buffer, format, buf.str);
+ __kmp_str_buf_free(&buf);
+ } break;
+#endif
+ default:
+ // According to spec, If an implementation does not have info for field
+ // type, then "undefined" is printed
+ rc = __kmp_str_buf_print(field_buffer, "%s", "undefined");
+ // Skip the field
+ if (parse_long_name) {
+ SKIP_TOKEN(*ptr);
+ if (**ptr == '}')
+ (*ptr)++;
+ } else {
+ (*ptr)++;
+ }
+ }
+
+ KMP_ASSERT(format_index <= FORMAT_SIZE);
+ return rc;
+}
+
+/*
+ * Return number of characters needed to hold the affinity string
+ * (not including null byte character)
+ * The resultant string is printed to buffer, which the caller can then
+ * handle afterwards
+*/
+size_t __kmp_aux_capture_affinity(int gtid, const char *format,
+ kmp_str_buf_t *buffer) {
+ const char *parse_ptr;
+ size_t retval;
+ const kmp_info_t *th;
+ kmp_str_buf_t field;
+
+ KMP_DEBUG_ASSERT(buffer);
+ KMP_DEBUG_ASSERT(gtid >= 0);
+
+ __kmp_str_buf_init(&field);
+ __kmp_str_buf_clear(buffer);
+
+ th = __kmp_threads[gtid];
+ retval = 0;
+
+ // If format is NULL or zero-length string, then we use
+ // affinity-format-var ICV
+ parse_ptr = format;
+ if (parse_ptr == NULL || *parse_ptr == '\0') {
+ parse_ptr = __kmp_affinity_format;
+ }
+ KMP_DEBUG_ASSERT(parse_ptr);
+
+ while (*parse_ptr != '\0') {
+ // Parse a field
+ if (*parse_ptr == '%') {
+ // Put field in the buffer
+ int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field);
+ __kmp_str_buf_catbuf(buffer, &field);
+ retval += rc;
+ } else {
+ // Put literal character in buffer
+ __kmp_str_buf_cat(buffer, parse_ptr, 1);
+ retval++;
+ parse_ptr++;
+ }
+ }
+ __kmp_str_buf_free(&field);
+ return retval;
+}
+
+// Displays the affinity string to stdout
+void __kmp_aux_display_affinity(int gtid, const char *format) {
+ kmp_str_buf_t buf;
+ __kmp_str_buf_init(&buf);
+ __kmp_aux_capture_affinity(gtid, format, &buf);
+ __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str);
+ __kmp_str_buf_free(&buf);
+}
+#endif // OMP_50_ENABLED
+
/* ------------------------------------------------------------------------ */
void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) {
@@ -7667,8 +8075,8 @@ __kmp_determine_reduction_method(
#if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \
- KMP_OS_DARWIN || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
int teamsize_cutoff = 4;
@@ -7691,8 +8099,8 @@ __kmp_determine_reduction_method(
}
#else
#error "Unknown or unsupported OS"
-#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS ||
-// KMP_OS_DARWIN
+#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
+ // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD
#elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS
diff --git a/runtime/src/kmp_safe_c_api.h b/runtime/src/kmp_safe_c_api.h
index 9d0da0b..d894fe3 100644
--- a/runtime/src/kmp_safe_c_api.h
+++ b/runtime/src/kmp_safe_c_api.h
@@ -11,11 +11,14 @@
#ifndef KMP_SAFE_C_API_H
#define KMP_SAFE_C_API_H
+#include "kmp_platform.h"
+#include <string.h>
+
// Replacement for banned C API
// Not every unsafe call listed here is handled now, but keeping everything
// in one place should be handy for future maintenance.
-#if KMP_OS_WINDOWS
+#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT
#define RSIZE_MAX_STR (4UL << 10) // 4KB
@@ -57,4 +60,16 @@
#endif // KMP_OS_WINDOWS
+// Offer truncated version of strncpy
+static inline void __kmp_strncpy_truncate(char *buffer, size_t buf_size,
+ char const *src, size_t src_size) {
+ if (src_size >= buf_size) {
+ src_size = buf_size - 1;
+ KMP_STRNCPY_S(buffer, buf_size, src, src_size);
+ buffer[buf_size - 1] = '\0';
+ } else {
+ KMP_STRNCPY_S(buffer, buf_size, src, src_size);
+ }
+}
+
#endif // KMP_SAFE_C_API_H
diff --git a/runtime/src/kmp_settings.cpp b/runtime/src/kmp_settings.cpp
index d855de8..6d049e4 100644
--- a/runtime/src/kmp_settings.cpp
+++ b/runtime/src/kmp_settings.cpp
@@ -410,7 +410,7 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
int *out_range, char *out_routine,
char *out_file, int *out_lb,
int *out_ub) {
- size_t len = KMP_STRLEN(value + 1);
+ size_t len = KMP_STRLEN(value) + 1;
par_range_to_print = (char *)KMP_INTERNAL_MALLOC(len + 1);
KMP_STRNCPY_S(par_range_to_print, len + 1, value, len + 1);
__kmp_par_range = +1;
@@ -418,7 +418,7 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value,
__kmp_par_range_ub = INT_MAX;
for (;;) {
unsigned int len;
- if ((value == NULL) || (*value == '\0')) {
+ if (*value == '\0') {
break;
}
if (!__kmp_strcasecmp_with_sentinel("routine", value, '=')) {
@@ -3252,7 +3252,29 @@ static void __kmp_stg_print_proc_bind(kmp_str_buf_t *buffer, char const *name,
#endif /* OMP_40_ENABLED */
#if OMP_50_ENABLED
-
+static void __kmp_stg_parse_display_affinity(char const *name,
+ char const *value, void *data) {
+ __kmp_stg_parse_bool(name, value, &__kmp_display_affinity);
+}
+static void __kmp_stg_print_display_affinity(kmp_str_buf_t *buffer,
+ char const *name, void *data) {
+ __kmp_stg_print_bool(buffer, name, __kmp_display_affinity);
+}
+static void __kmp_stg_parse_affinity_format(char const *name, char const *value,
+ void *data) {
+ size_t length = KMP_STRLEN(value);
+ __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, value,
+ length);
+}
+static void __kmp_stg_print_affinity_format(kmp_str_buf_t *buffer,
+ char const *name, void *data) {
+ if (__kmp_env_format) {
+ KMP_STR_BUF_PRINT_NAME_EX(name);
+ } else {
+ __kmp_str_buf_print(buffer, " %s='", name);
+ }
+ __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format);
+}
// OMP_ALLOCATOR sets default allocator
static void __kmp_stg_parse_allocator(char const *name, char const *value,
void *data) {
@@ -4879,7 +4901,12 @@ static kmp_setting_t __kmp_stg_table[] = {
#endif
#endif // KMP_AFFINITY_SUPPORTED
-
+#if OMP_50_ENABLED
+ {"OMP_DISPLAY_AFFINITY", __kmp_stg_parse_display_affinity,
+ __kmp_stg_print_display_affinity, NULL, 0, 0},
+ {"OMP_AFFINITY_FORMAT", __kmp_stg_parse_affinity_format,
+ __kmp_stg_print_affinity_format, NULL, 0, 0},
+#endif
{"KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork,
__kmp_stg_print_init_at_fork, NULL, 0, 0},
{"KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL,
@@ -5409,6 +5436,21 @@ void __kmp_env_initialize(char const *string) {
}
#endif /* OMP_40_ENABLED */
+#if OMP_50_ENABLED
+ // Set up the affinity format ICV
+ // Grab the default affinity format string from the message catalog
+ kmp_msg_t m =
+ __kmp_msg_format(kmp_i18n_msg_AffFormatDefault, "%P", "%i", "%n", "%A");
+ KMP_DEBUG_ASSERT(KMP_STRLEN(m.str) < KMP_AFFINITY_FORMAT_SIZE);
+
+ if (__kmp_affinity_format == NULL) {
+ __kmp_affinity_format =
+ (char *)KMP_INTERNAL_MALLOC(sizeof(char) * KMP_AFFINITY_FORMAT_SIZE);
+ }
+ KMP_STRCPY_S(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, m.str);
+ __kmp_str_free(&m.str);
+#endif
+
// Now process all of the settings.
for (i = 0; i < block.count; ++i) {
__kmp_stg_parse(block.vars[i].name, block.vars[i].value);
@@ -5513,7 +5555,7 @@ void __kmp_env_initialize(char const *string) {
// then determine if it is equal to that single group.
if (within_one_group) {
DWORD num_bits_in_group = __kmp_GetActiveProcessorCount(group);
- int num_bits_in_mask = 0;
+ DWORD num_bits_in_mask = 0;
for (int bit = init_mask->begin(); bit != init_mask->end();
bit = init_mask->next(bit))
num_bits_in_mask++;
diff --git a/runtime/src/kmp_str.cpp b/runtime/src/kmp_str.cpp
index 04c4056..5338edf 100644
--- a/runtime/src/kmp_str.cpp
+++ b/runtime/src/kmp_str.cpp
@@ -143,13 +143,28 @@ void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) {
KMP_STR_BUF_INVARIANT(buffer);
} // __kmp_str_buf_cat
-void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
- va_list args) {
+void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) {
+ KMP_DEBUG_ASSERT(dest);
+ KMP_DEBUG_ASSERT(src);
+ KMP_STR_BUF_INVARIANT(dest);
+ KMP_STR_BUF_INVARIANT(src);
+ if (!src->str || !src->used)
+ return;
+ __kmp_str_buf_reserve(dest, dest->used + src->used + 1);
+ KMP_MEMCPY(dest->str + dest->used, src->str, src->used);
+ dest->str[dest->used + src->used] = 0;
+ dest->used += src->used;
+ KMP_STR_BUF_INVARIANT(dest);
+} // __kmp_str_buf_catbuf
+
+// Return the number of characters written
+int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+ va_list args) {
+ int rc;
KMP_STR_BUF_INVARIANT(buffer);
for (;;) {
int const free = buffer->size - buffer->used;
- int rc;
int size;
// Try to format string.
@@ -198,13 +213,17 @@ void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
KMP_DEBUG_ASSERT(buffer->size > 0);
KMP_STR_BUF_INVARIANT(buffer);
+ return rc;
} // __kmp_str_buf_vprint
-void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
+// Return the number of characters written
+int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) {
+ int rc;
va_list args;
va_start(args, format);
- __kmp_str_buf_vprint(buffer, format, args);
+ rc = __kmp_str_buf_vprint(buffer, format, args);
va_end(args);
+ return rc;
} // __kmp_str_buf_print
/* The function prints specified size to buffer. Size is expressed using biggest
diff --git a/runtime/src/kmp_str.h b/runtime/src/kmp_str.h
index 02a2032..c30255d 100644
--- a/runtime/src/kmp_str.h
+++ b/runtime/src/kmp_str.h
@@ -51,9 +51,10 @@ void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size);
void __kmp_str_buf_detach(kmp_str_buf_t *buffer);
void __kmp_str_buf_free(kmp_str_buf_t *buffer);
void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len);
-void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
- va_list args);
-void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...);
+void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src);
+int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format,
+ va_list args);
+int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...);
void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size);
/* File name parser.
diff --git a/runtime/src/kmp_stub.cpp b/runtime/src/kmp_stub.cpp
index e26e084..c1f3bf3 100644
--- a/runtime/src/kmp_stub.cpp
+++ b/runtime/src/kmp_stub.cpp
@@ -35,6 +35,10 @@
#define omp_set_num_threads ompc_set_num_threads
#define omp_set_dynamic ompc_set_dynamic
#define omp_set_nested ompc_set_nested
+#define omp_set_affinity_format ompc_set_affinity_format
+#define omp_get_affinity_format ompc_get_affinity_format
+#define omp_display_affinity ompc_display_affinity
+#define omp_capture_affinity ompc_capture_affinity
#define kmp_set_stacksize kmpc_set_stacksize
#define kmp_set_stacksize_s kmpc_set_stacksize_s
#define kmp_set_blocktime kmpc_set_blocktime
@@ -350,6 +354,17 @@ const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5;
const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6;
const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7;
const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8;
+/* OpenMP 5.0 Affinity Format */
+void omp_set_affinity_format(char const *format) { i; }
+size_t omp_get_affinity_format(char *buffer, size_t size) {
+ i;
+ return 0;
+}
+void omp_display_affinity(char const *format) { i; }
+size_t omp_capture_affinity(char *buffer, size_t buf_size, char const *format) {
+ i;
+ return 0;
+}
#endif /* OMP_50_ENABLED */
// end of file //
diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp
index b48c5b6..6c810dd 100644
--- a/runtime/src/kmp_taskdeps.cpp
+++ b/runtime/src/kmp_taskdeps.cpp
@@ -466,9 +466,9 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
OMPT_STORE_RETURN_ADDRESS(gtid);
- if (!current_task->ompt_task_info.frame.enter_frame)
- current_task->ompt_task_info.frame.enter_frame =
- OMPT_GET_FRAME_ADDRESS(1);
+ if (!current_task->ompt_task_info.frame.enter_frame.ptr)
+ current_task->ompt_task_info.frame.enter_frame.ptr =
+ OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_task_create) {
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
@@ -479,7 +479,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
- new_taskdata->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(0);
+ new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#if OMPT_OPTIONAL
@@ -566,7 +566,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- current_task->ompt_task_info.frame.enter_frame = NULL;
+ current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return TASK_CURRENT_NOT_QUEUED;
@@ -586,7 +586,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid,
kmp_int32 ret = __kmp_omp_task(gtid, new_task, true);
#if OMPT_SUPPORT
if (ompt_enabled.enabled) {
- current_task->ompt_task_info.frame.enter_frame = NULL;
+ current_task->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return ret;
diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp
index 2d74686..9c61a12 100644
--- a/runtime/src/kmp_tasking.cpp
+++ b/runtime/src/kmp_tasking.cpp
@@ -547,8 +547,10 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task,
static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) {
// The calls to __ompt_task_init already have the ompt_enabled condition.
task->ompt_task_info.task_data.value = 0;
- task->ompt_task_info.frame.exit_frame = NULL;
- task->ompt_task_info.frame.enter_frame = NULL;
+ task->ompt_task_info.frame.exit_frame = ompt_data_none;
+ task->ompt_task_info.frame.enter_frame = ompt_data_none;
+ task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
+ task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
#if OMP_40_ENABLED
task->ompt_task_info.ndeps = 0;
task->ompt_task_info.deps = NULL;
@@ -627,9 +629,11 @@ static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid,
#if OMPT_SUPPORT
if (ompt) {
- if (current_task->ompt_task_info.frame.enter_frame == NULL) {
- current_task->ompt_task_info.frame.enter_frame =
- taskdata->ompt_task_info.frame.exit_frame = frame_address;
+ if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) {
+ current_task->ompt_task_info.frame.enter_frame.ptr =
+ taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address;
+ current_task->ompt_task_info.frame.enter_frame_flags =
+ taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer;
}
if (ompt_enabled.ompt_callback_task_create) {
ompt_task_info_t *parent_info = &(current_task->ompt_task_info);
@@ -811,8 +815,10 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task,
kmp_taskdata_t *resumed_task) {
kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task);
kmp_info_t *thread = __kmp_threads[gtid];
+#if OMP_45_ENABLED
kmp_task_team_t *task_team =
thread->th.th_task_team; // might be NULL for serial teams...
+#endif // OMP_45_ENABLED
kmp_int32 children = 0;
KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming "
@@ -964,9 +970,10 @@ static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref,
#if OMPT_SUPPORT
if (ompt) {
- omp_frame_t *ompt_frame;
+ ompt_frame_t *ompt_frame;
__ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
- ompt_frame->enter_frame = NULL;
+ ompt_frame->enter_frame = ompt_data_none;
+ ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer;
}
#endif
@@ -1392,6 +1399,28 @@ kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid,
return retval;
}
+#if OMP_50_ENABLED
+/*!
+@ingroup TASKING
+@param loc_ref location of the original task directive
+@param gtid Global Thread ID of encountering thread
+@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new
+task''
+@param naffins Number of affinity items
+@param affin_list List of affinity items
+@return Returns non-zero if registering affinity information was not successful.
+ Returns 0 if registration was successful
+This entry registers the affinity information attached to a task with the task
+thunk structure kmp_taskdata_t.
+*/
+kmp_int32
+__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid,
+ kmp_task_t *new_task, kmp_int32 naffins,
+ kmp_task_affinity_info_t *affin_list) {
+ return 0;
+}
+#endif
+
// __kmp_invoke_task: invoke the specified task
//
// gtid: global thread ID of caller
@@ -1438,9 +1467,9 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
oldInfo = thread->th.ompt_thread_info;
thread->th.ompt_thread_info.wait_id = 0;
thread->th.ompt_thread_info.state = (thread->th.th_team_serialized)
- ? omp_state_work_serial
- : omp_state_work_parallel;
- taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0);
+ ? ompt_state_work_serial
+ : ompt_state_work_parallel;
+ taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
#endif
@@ -1566,7 +1595,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task,
if (UNLIKELY(ompt_enabled.enabled)) {
thread->th.ompt_thread_info = oldInfo;
if (taskdata->td_flags.tiedness == TASK_TIED) {
- taskdata->ompt_task_info.frame.exit_frame = NULL;
+ taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
}
__kmp_task_finish<true>(gtid, task, current_task);
} else
@@ -1634,7 +1663,7 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid,
ANNOTATE_HAPPENS_BEFORE(new_task);
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled)) {
- parent->ompt_task_info.frame.enter_frame = NULL;
+ parent->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return TASK_CURRENT_NOT_QUEUED;
@@ -1703,8 +1732,8 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
if (!new_taskdata->td_flags.started) {
OMPT_STORE_RETURN_ADDRESS(gtid);
parent = new_taskdata->td_parent;
- if (!parent->ompt_task_info.frame.enter_frame) {
- parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (!parent->ompt_task_info.frame.enter_frame.ptr) {
+ parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
}
if (ompt_enabled.ompt_callback_task_create) {
ompt_data_t task_data = ompt_data_none;
@@ -1721,7 +1750,7 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
__ompt_task_finish(new_task,
new_taskdata->ompt_task_info.scheduling_parent,
ompt_task_switch);
- new_taskdata->ompt_task_info.frame.exit_frame = NULL;
+ new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none;
}
}
#endif
@@ -1733,7 +1762,7 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid,
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
- parent->ompt_task_info.frame.enter_frame = NULL;
+ parent->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return res;
@@ -1767,8 +1796,8 @@ kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
kmp_taskdata_t *parent = NULL;
if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) {
parent = new_taskdata->td_parent;
- if (!parent->ompt_task_info.frame.enter_frame)
- parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1);
+ if (!parent->ompt_task_info.frame.enter_frame.ptr)
+ parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
if (ompt_enabled.ompt_callback_task_create) {
ompt_data_t task_data = ompt_data_none;
ompt_callbacks.ompt_callback(ompt_callback_task_create)(
@@ -1788,7 +1817,7 @@ kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid,
gtid, loc_ref, new_taskdata));
#if OMPT_SUPPORT
if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) {
- parent->ompt_task_info.frame.enter_frame = NULL;
+ parent->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif
return res;
@@ -1817,7 +1846,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
my_task_data = &(taskdata->ompt_task_info.task_data);
my_parallel_data = OMPT_CUR_TEAM_DATA(thread);
- taskdata->ompt_task_info.frame.enter_frame = frame_address;
+ taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address;
if (ompt_enabled.ompt_callback_sync_region) {
ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
@@ -1886,7 +1915,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid,
ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data,
my_task_data, return_address);
}
- taskdata->ompt_task_info.frame.enter_frame = NULL;
+ taskdata->ompt_task_info.frame.enter_frame = ompt_data_none;
}
#endif // OMPT_SUPPORT && OMPT_OPTIONAL
@@ -1916,7 +1945,7 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) {
#if OMPT_SUPPORT && OMPT_OPTIONAL
if (UNLIKELY(ompt_enabled.enabled)) {
OMPT_STORE_RETURN_ADDRESS(gtid);
- return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1),
+ return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0),
OMPT_LOAD_RETURN_ADDRESS(gtid));
}
#endif
diff --git a/runtime/src/kmp_utility.cpp b/runtime/src/kmp_utility.cpp
index 06090e6..dc4c714 100644
--- a/runtime/src/kmp_utility.cpp
+++ b/runtime/src/kmp_utility.cpp
@@ -375,7 +375,11 @@ void __kmp_expand_file_name(char *result, size_t rlen, char *pattern) {
case 'I':
case 'i': {
pid_t id = getpid();
+#if KMP_ARCH_X86_64 && defined(__MINGW32__)
+ snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*lld", width, id);
+#else
snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id);
+#endif
if (snp_result >= 0 && snp_result <= end - pos) {
while (*pos)
++pos;
diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h
index ec489d1..e2984a8 100644
--- a/runtime/src/kmp_wait_release.h
+++ b/runtime/src/kmp_wait_release.h
@@ -120,12 +120,12 @@ public:
#if OMPT_SUPPORT
static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
- omp_state_t omp_state,
+ ompt_state_t ompt_state,
ompt_data_t *tId,
ompt_data_t *pId) {
int ds_tid = this_thr->th.th_info.ds.ds_tid;
- if (omp_state == omp_state_wait_barrier_implicit) {
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ if (ompt_state == ompt_state_wait_barrier_implicit) {
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
#if OMPT_OPTIONAL
void *codeptr = NULL;
if (ompt_enabled.ompt_callback_sync_region_wait) {
@@ -143,9 +143,9 @@ static inline void __ompt_implicit_task_end(kmp_info_t *this_thr,
ompt_scope_end, NULL, tId, 0, ds_tid);
}
// return to idle state
- this_thr->th.ompt_thread_info.state = omp_state_idle;
+ this_thr->th.ompt_thread_info.state = ompt_state_idle;
} else {
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
}
}
@@ -199,27 +199,27 @@ THIS function is called from
function.
Events are triggered in the calling code (__kmp_barrier):
- state := omp_state_overhead
+ state := ompt_state_overhead
barrier-begin
barrier-wait-begin
- state := omp_state_wait_barrier
+ state := ompt_state_wait_barrier
call join-barrier-implementation (finally arrive here)
{}
call fork-barrier-implementation (finally arrive here)
{}
- state := omp_state_overhead
+ state := ompt_state_overhead
barrier-wait-end
barrier-end
- state := omp_state_work_parallel
+ state := ompt_state_work_parallel
__kmp_fork_barrier (after thread creation, before executing implicit task)
call fork-barrier-implementation (finally arrive here)
- {} // worker arrive here with state = omp_state_idle
+ {} // worker arrive here with state = ompt_state_idle
__kmp_join_barrier (implicit barrier at end of parallel region)
- state := omp_state_barrier_implicit
+ state := ompt_state_barrier_implicit
barrier-begin
barrier-wait-begin
call join-barrier-implementation (finally arrive here
@@ -234,19 +234,19 @@ final_spin=FALSE)
barrier-end
implicit-task-end
idle-begin
- state := omp_state_idle
+ state := ompt_state_idle
- Before leaving, if state = omp_state_idle
+ Before leaving, if state = ompt_state_idle
idle-end
- state := omp_state_overhead
+ state := ompt_state_overhead
*/
#if OMPT_SUPPORT
- omp_state_t ompt_entry_state;
+ ompt_state_t ompt_entry_state;
ompt_data_t *pId = NULL;
ompt_data_t *tId;
if (ompt_enabled.enabled) {
ompt_entry_state = this_thr->th.ompt_thread_info.state;
- if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit ||
+ if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
ompt_lw_taskteam_t *team =
this_thr->th.th_team->t.ompt_serialized_team_info;
@@ -432,16 +432,16 @@ final_spin=FALSE)
}
#if OMPT_SUPPORT
- omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
- if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) {
+ ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
+ if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
#if OMPT_OPTIONAL
if (final_spin) {
__ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId);
ompt_exit_state = this_thr->th.ompt_thread_info.state;
}
#endif
- if (ompt_exit_state == omp_state_idle) {
- this_thr->th.ompt_thread_info.state = omp_state_overhead;
+ if (ompt_exit_state == ompt_state_idle) {
+ this_thr->th.ompt_thread_info.state = ompt_state_overhead;
}
}
#endif
diff --git a/runtime/src/kmp_wrapper_getpid.h b/runtime/src/kmp_wrapper_getpid.h
index 5b4081a..47e2728 100644
--- a/runtime/src/kmp_wrapper_getpid.h
+++ b/runtime/src/kmp_wrapper_getpid.h
@@ -24,6 +24,9 @@
#if KMP_OS_DARWIN
// OS X
#define __kmp_gettid() syscall(SYS_thread_selfid)
+#elif KMP_OS_NETBSD
+#include <lwp.h>
+#define __kmp_gettid() _lwp_self()
#elif defined(SYS_gettid)
// Hopefully other Unix systems define SYS_gettid syscall for getting os thread
// id
@@ -39,7 +42,9 @@
// "process.h".
#include <process.h>
// Let us simulate Unix.
+#if KMP_MSVC_COMPAT
typedef int pid_t;
+#endif
#define getpid _getpid
#define __kmp_gettid() GetCurrentThreadId()
diff --git a/runtime/src/kmp_wrapper_malloc.h b/runtime/src/kmp_wrapper_malloc.h
index cf6f2be..c8d2c70 100644
--- a/runtime/src/kmp_wrapper_malloc.h
+++ b/runtime/src/kmp_wrapper_malloc.h
@@ -93,8 +93,10 @@
// Include alloca() declaration.
#if KMP_OS_WINDOWS
#include <malloc.h> // Windows* OS: _alloca() declared in "malloc.h".
+#if KMP_MSVC_COMPAT
#define alloca _alloca // Allow to use alloca() with no underscore.
-#elif KMP_OS_FREEBSD || KMP_OS_NETBSD
+#endif
+#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD
// Declared in "stdlib.h".
#elif KMP_OS_UNIX
#include <alloca.h> // Linux* OS and OS X*: alloc() declared in "alloca".
diff --git a/runtime/src/libomp.rc.var b/runtime/src/libomp.rc.var
index cf6a9c9..32449e2 100644
--- a/runtime/src/libomp.rc.var
+++ b/runtime/src/libomp.rc.var
@@ -11,7 +11,7 @@
////===----------------------------------------------------------------------===//
//
-#include "winres.h"
+#include "winresrc.h"
#include "kmp_config.h"
LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US // English (U.S.) resources
diff --git a/runtime/src/ompt-general.cpp b/runtime/src/ompt-general.cpp
index 8da5610..705b452 100644
--- a/runtime/src/ompt-general.cpp
+++ b/runtime/src/ompt-general.cpp
@@ -52,8 +52,8 @@
typedef struct {
const char *state_name;
- omp_state_t state_id;
-} omp_state_info_t;
+ ompt_state_t state_id;
+} ompt_state_info_t;
typedef struct {
const char *name;
@@ -73,10 +73,10 @@ enum tool_setting_e {
ompt_callbacks_active_t ompt_enabled;
-omp_state_info_t omp_state_info[] = {
-#define omp_state_macro(state, code) {#state, state},
- FOREACH_OMP_STATE(omp_state_macro)
-#undef omp_state_macro
+ompt_state_info_t ompt_state_info[] = {
+#define ompt_state_macro(state, code) {#state, state},
+ FOREACH_OMPT_STATE(ompt_state_macro)
+#undef ompt_state_macro
};
kmp_mutex_impl_info_t kmp_mutex_impl_info[] = {
@@ -353,7 +353,7 @@ void ompt_post_init() {
kmp_info_t *root_thread = ompt_get_thread();
- ompt_set_thread_state(root_thread, omp_state_overhead);
+ ompt_set_thread_state(root_thread, ompt_state_overhead);
if (ompt_enabled.ompt_callback_thread_begin) {
ompt_callbacks.ompt_callback(ompt_callback_thread_begin)(
@@ -366,7 +366,7 @@ void ompt_post_init() {
NULL, NULL, task_data, ompt_task_initial, 0, NULL);
}
- ompt_set_thread_state(root_thread, omp_state_work_serial);
+ ompt_set_thread_state(root_thread, ompt_state_work_serial);
}
}
@@ -388,13 +388,13 @@ void ompt_fini() {
OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state,
const char **next_state_name) {
- const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t);
+ const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t);
int i = 0;
for (i = 0; i < len - 1; i++) {
- if (omp_state_info[i].state_id == current_state) {
- *next_state = omp_state_info[i + 1].state_id;
- *next_state_name = omp_state_info[i + 1].state_name;
+ if (ompt_state_info[i].state_id == current_state) {
+ *next_state = ompt_state_info[i + 1].state_id;
+ *next_state_name = ompt_state_info[i + 1].state_name;
return 1;
}
}
@@ -482,11 +482,11 @@ OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level,
team_size);
}
-OMPT_API_ROUTINE omp_state_t ompt_get_state(omp_wait_id_t *wait_id) {
- omp_state_t thread_state = __ompt_get_state_internal(wait_id);
+OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *wait_id) {
+ ompt_state_t thread_state = __ompt_get_state_internal(wait_id);
- if (thread_state == omp_state_undefined) {
- thread_state = omp_state_work_serial;
+ if (thread_state == ompt_state_undefined) {
+ thread_state = ompt_state_work_serial;
}
return thread_state;
@@ -502,7 +502,7 @@ OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) {
OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type,
ompt_data_t **task_data,
- omp_frame_t **task_frame,
+ ompt_frame_t **task_frame,
ompt_data_t **parallel_data,
int *thread_num) {
return __ompt_get_task_info_internal(ancestor_level, type, task_data,
diff --git a/runtime/src/ompt-internal.h b/runtime/src/ompt-internal.h
index 6b92eaa..c6823fc 100644
--- a/runtime/src/ompt-internal.h
+++ b/runtime/src/ompt-internal.h
@@ -54,7 +54,7 @@ typedef struct ompt_callbacks_active_s {
(info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0)
typedef struct {
- omp_frame_t frame;
+ ompt_frame_t frame;
ompt_data_t task_data;
struct kmp_taskdata *scheduling_parent;
int thread_num;
@@ -81,8 +81,8 @@ typedef struct {
ompt_data_t task_data; /* stored here from implicit barrier-begin until
implicit-task-end */
void *return_address; /* stored here on entry of runtime */
- omp_state_t state;
- omp_wait_id_t wait_id;
+ ompt_state_t state;
+ ompt_wait_id_t wait_id;
int ompt_task_yielded;
void *idle_frame;
} ompt_thread_info_t;
diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp
index 23d09aa..cc4f1de 100644
--- a/runtime/src/ompt-specific.cpp
+++ b/runtime/src/ompt-specific.cpp
@@ -211,10 +211,10 @@ ompt_data_t *__ompt_get_thread_data_internal() {
void __ompt_thread_assign_wait_id(void *variable) {
kmp_info_t *ti = ompt_get_thread();
- ti->th.ompt_thread_info.wait_id = (omp_wait_id_t)variable;
+ ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable;
}
-omp_state_t __ompt_get_state_internal(omp_wait_id_t *omp_wait_id) {
+ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) {
kmp_info_t *ti = ompt_get_thread();
if (ti) {
@@ -222,7 +222,7 @@ omp_state_t __ompt_get_state_internal(omp_wait_id_t *omp_wait_id) {
*omp_wait_id = ti->th.ompt_thread_info.wait_id;
return ti->th.ompt_thread_info.state;
}
- return omp_state_undefined;
+ return ompt_state_undefined;
}
//----------------------------------------------------------
@@ -259,8 +259,8 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid,
lwt->ompt_team_info.parallel_data = *ompt_pid;
lwt->ompt_team_info.master_return_address = codeptr;
lwt->ompt_task_info.task_data.value = 0;
- lwt->ompt_task_info.frame.enter_frame = NULL;
- lwt->ompt_task_info.frame.exit_frame = NULL;
+ lwt->ompt_task_info.frame.enter_frame = ompt_data_none;
+ lwt->ompt_task_info.frame.exit_frame = ompt_data_none;
lwt->ompt_task_info.scheduling_parent = NULL;
lwt->ompt_task_info.deps = NULL;
lwt->ompt_task_info.ndeps = 0;
@@ -328,7 +328,7 @@ void __ompt_lw_taskteam_unlink(kmp_info_t *thr) {
int __ompt_get_task_info_internal(int ancestor_level, int *type,
ompt_data_t **task_data,
- omp_frame_t **task_frame,
+ ompt_frame_t **task_frame,
ompt_data_t **parallel_data,
int *thread_num) {
if (__kmp_get_gtid() < 0)
diff --git a/runtime/src/ompt-specific.h b/runtime/src/ompt-specific.h
index 8cf7450..317580f 100644
--- a/runtime/src/ompt-specific.h
+++ b/runtime/src/ompt-specific.h
@@ -41,7 +41,7 @@ int __ompt_get_parallel_info_internal(int ancestor_level,
int __ompt_get_task_info_internal(int ancestor_level, int *type,
ompt_data_t **task_data,
- omp_frame_t **task_frame,
+ ompt_frame_t **task_frame,
ompt_data_t **parallel_data, int *thread_num);
ompt_data_t *__ompt_get_thread_data_internal();
@@ -93,7 +93,7 @@ inline kmp_info_t *ompt_get_thread() {
return ompt_get_thread_gtid(gtid);
}
-inline void ompt_set_thread_state(kmp_info_t *thread, omp_state_t state) {
+inline void ompt_set_thread_state(kmp_info_t *thread, ompt_state_t state) {
thread->th.ompt_thread_info.state = state;
}
diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/runtime/src/thirdparty/ittnotify/ittnotify_static.c
index 75ef966..63e1b0c 100644
--- a/runtime/src/thirdparty/ittnotify/ittnotify_static.c
+++ b/runtime/src/thirdparty/ittnotify/ittnotify_static.c
@@ -12,7 +12,11 @@
#include "ittnotify_config.h"
#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if defined(__MINGW32__)
+#include <limits.h>
+#else
#define PATH_MAX 512
+#endif
#else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */
#include <limits.h>
#include <dlfcn.h>
@@ -28,7 +32,9 @@
#include "ittnotify.h"
#include "legacy/ittnotify.h"
+#if KMP_MSVC_COMPAT
#include "disable_warnings.h"
+#endif
static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n";
@@ -194,7 +200,7 @@ static __itt_group_alias group_alias[] = {
#pragma pack(pop)
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
#pragma warning(push)
#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -217,7 +223,7 @@ static __itt_api_info api_list[] = {
{NULL, NULL, NULL, NULL, __itt_group_none}
};
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
#pragma warning(pop)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -252,7 +258,7 @@ typedef void (__itt_api_fini_t)(__itt_global*);
ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args);
#endif /* ITT_NOTIFY_EXT_REPORT */
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
#pragma warning(push)
#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -278,7 +284,7 @@ static void __itt_report_error(unsigned code_arg, ...)
va_end(args);
}
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
#pragma warning(pop)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
@@ -1013,7 +1019,7 @@ static void __itt_nullify_all_pointers(void)
*_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func;
}
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
#pragma warning(push)
#pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */
#pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */
@@ -1191,7 +1197,6 @@ ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t
return prev;
}
-#if ITT_PLATFORM==ITT_PLATFORM_WIN
+#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT
#pragma warning(pop)
#endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */
-
diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp
index a8d9324..ab9c353 100644
--- a/runtime/src/z_Linux_util.cpp
+++ b/runtime/src/z_Linux_util.cpp
@@ -22,7 +22,7 @@
#include "kmp_wait_release.h"
#include "kmp_wrapper_getpid.h"
-#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD
+#if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
#include <alloca.h>
#endif
#include <math.h> // HUGE_VAL.
@@ -50,8 +50,11 @@
#elif KMP_OS_DARWIN
#include <mach/mach.h>
#include <sys/sysctl.h>
-#elif KMP_OS_FREEBSD
+#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
#include <pthread_np.h>
+#elif KMP_OS_NETBSD
+#include <sys/types.h>
+#include <sys/sysctl.h>
#endif
#include <ctype.h>
@@ -444,7 +447,8 @@ void __kmp_terminate_thread(int gtid) {
determined exactly, FALSE if incremental refinement is necessary. */
static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
int stack_data;
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_HURD
pthread_attr_t attr;
int status;
size_t size = 0;
@@ -458,7 +462,7 @@ static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
/* Fetch the real thread attributes */
status = pthread_attr_init(&attr);
KMP_CHECK_SYSFAIL("pthread_attr_init", status);
-#if KMP_OS_FREEBSD || KMP_OS_NETBSD
+#if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
status = pthread_attr_get_np(pthread_self(), &attr);
KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
#else
@@ -482,7 +486,8 @@ static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
return TRUE;
}
-#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */
+#endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
+ KMP_OS_HURD */
/* Use incremental refinement starting from initial conservative estimate */
TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
@@ -496,7 +501,8 @@ static void *__kmp_launch_worker(void *thr) {
sigset_t new_set, old_set;
#endif /* KMP_BLOCK_SIGNALS */
void *exit_val;
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_OPENBSD || KMP_OS_HURD
void *volatile padding = 0;
#endif
int gtid;
@@ -544,7 +550,8 @@ static void *__kmp_launch_worker(void *thr) {
KMP_CHECK_SYSFAIL("pthread_sigmask", status);
#endif /* KMP_BLOCK_SIGNALS */
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_OPENBSD
if (__kmp_stkoffset > 0 && gtid > 0) {
padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
}
@@ -1771,7 +1778,8 @@ static int __kmp_get_xproc(void) {
int r = 0;
-#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD
+#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
+ KMP_OS_OPENBSD || KMP_OS_HURD
r = sysconf(_SC_NPROCESSORS_ONLN);
@@ -1934,20 +1942,27 @@ void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
kmp_uint64 __kmp_now_nsec() {
struct timeval t;
gettimeofday(&t, NULL);
- return KMP_NSEC_PER_SEC * t.tv_sec + 1000 * t.tv_usec;
+ kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
+ (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
+ return nsec;
}
#if KMP_ARCH_X86 || KMP_ARCH_X86_64
/* Measure clock ticks per millisecond */
void __kmp_initialize_system_tick() {
+ kmp_uint64 now, nsec2, diff;
kmp_uint64 delay = 100000; // 50~100 usec on most machines.
kmp_uint64 nsec = __kmp_now_nsec();
kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
- kmp_uint64 now;
while ((now = __kmp_hardware_timestamp()) < goal)
;
- __kmp_ticks_per_msec =
- (kmp_uint64)(1e6 * (delay + (now - goal)) / (__kmp_now_nsec() - nsec));
+ nsec2 = __kmp_now_nsec();
+ diff = nsec2 - nsec;
+ if (diff > 0) {
+ kmp_uint64 tpms = (kmp_uint64)(1e6 * (delay + (now - goal)) / diff);
+ if (tpms > 0)
+ __kmp_ticks_per_msec = tpms;
+ }
}
#endif
@@ -2017,9 +2032,39 @@ int __kmp_is_address_mapped(void *addr) {
found = 1;
}
-#elif KMP_OS_FREEBSD || KMP_OS_NETBSD
+#elif KMP_OS_NETBSD
+
+ int mib[5];
+ mib[0] = CTL_VM;
+ mib[1] = VM_PROC;
+ mib[2] = VM_PROC_MAP;
+ mib[3] = getpid();
+ mib[4] = sizeof(struct kinfo_vmentry);
+
+ size_t size;
+ rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
+ KMP_ASSERT(!rc);
+ KMP_ASSERT(size);
+
+ size = size * 4 / 3;
+ struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
+ KMP_ASSERT(kiv);
+
+ rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
+ KMP_ASSERT(!rc);
+ KMP_ASSERT(size);
+
+ for (size_t i = 0; i < size; i++) {
+ if (kiv[i].kve_start >= (uint64_t)addr &&
+ kiv[i].kve_end <= (uint64_t)addr) {
+ found = 1;
+ break;
+ }
+ }
+ KMP_INTERNAL_FREE(kiv);
+#elif KMP_OS_DRAGONFLY || KMP_OS_OPENBSD
- // FIXME(FreeBSD, NetBSD): Implement this
+ // FIXME(DragonFly, OpenBSD): Implement this
found = 1;
#else
@@ -2034,7 +2079,7 @@ int __kmp_is_address_mapped(void *addr) {
#ifdef USE_LOAD_BALANCE
-#if KMP_OS_DARWIN
+#if KMP_OS_DARWIN || KMP_OS_NETBSD
// The function returns the rounded value of the system load average
// during given time interval which depends on the value of
diff --git a/runtime/src/z_Windows_NT_util.cpp b/runtime/src/z_Windows_NT_util.cpp
index e8ed660..f3d667f 100644
--- a/runtime/src/z_Windows_NT_util.cpp
+++ b/runtime/src/z_Windows_NT_util.cpp
@@ -887,6 +887,7 @@ kmp_uint64 __kmp_now_nsec() {
return 1e9 * __kmp_win32_tick * now.QuadPart;
}
+extern "C"
void *__stdcall __kmp_launch_worker(void *arg) {
volatile void *stack_data;
void *exit_val;
diff --git a/runtime/test/affinity/format/affinity_display.1.c b/runtime/test/affinity/format/affinity_display.1.c
new file mode 100644
index 0000000..b900c3c
--- /dev/null
+++ b/runtime/test/affinity/format/affinity_display.1.c
@@ -0,0 +1,92 @@
+// RUN: %libomp-compile
+// RUN: env OMP_DISPLAY_AFFINITY=TRUE OMP_NUM_THREADS=4 OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' %libomp-run | python %S/check.py -c 'CHECK' %s
+
+// Affinity Display examples
+#include <stdio.h>
+#include <stdlib.h> // also null is in <stddef.h>
+#include <stddef.h>
+#include <omp.h>
+#include <string.h>
+
+// ENVIRONMENT
+// OMP_DISPLAY_AFFINITY=TRUE
+// OMP_NUM_THREADS=4
+// OMP_PLACES='{0,1},{2,3},{4,5},{6,7}'
+
+// CHECK: num_threads=1 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0-7])|(0,1)|(undefined)\}
+// CHECK: num_threads=4 Thread id [0-3] reporting in
+// CHECK: num_threads=4 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0-7])|([0246],[1357])|(undefined)\}
+// CHECK: num_threads=1 Default Affinity Format is:
+// CHECK: num_threads=1 Affinity Format set to: host=%20H tid=%0.4n binds_to=%A
+// CHECK: num_threads=4 tid=[0-3] affinity:host=[a-zA-Z0-9_.-]+[ ]+tid=000[0-4][ ]+binds_to=(([0-7])|([0246],[1357])|(undefined))
+
+#define FORMAT_STORE 80
+#define BUFFER_STORE 80
+
+int main(int argc, char** argv) {
+ int i, n, tid, max_req_store = 0;
+ size_t nchars;
+ char default_format[FORMAT_STORE];
+ char my_format[] = "host=%20H tid=%0.4n binds_to=%A";
+ char **buffer;
+
+ // CODE SEGMENT 1 AFFINITY DISPLAY
+ omp_display_affinity(NULL);
+
+ // OMP_DISPLAY_AFFINITY=TRUE,
+ // Affinity reported for 1 parallel region
+ #pragma omp parallel
+ {
+ printf("Thread id %d reporting in.\n", omp_get_thread_num());
+ }
+
+ // Get and Display Default Affinity Format
+ nchars = omp_get_affinity_format(default_format, (size_t)FORMAT_STORE);
+ printf("Default Affinity Format is: %s\n", default_format);
+
+ if (nchars > FORMAT_STORE) {
+ printf("Caution: Reported Format is truncated. Increase\n");
+ printf(" FORMAT_STORE by %d.\n", (int)nchars - FORMAT_STORE);
+ }
+
+ // Set Affinity Format
+ omp_set_affinity_format(my_format);
+ printf("Affinity Format set to: %s\n", my_format);
+
+ // CODE SEGMENT 3 CAPTURE AFFINITY
+ // Set up buffer for affinity of n threads
+ n = omp_get_max_threads();
+ buffer = (char **)malloc(sizeof(char *) * n);
+ for (i = 0; i < n; i++) {
+ buffer[i] = (char *)malloc(sizeof(char) * BUFFER_STORE);
+ }
+
+ // Capture Affinity using Affinity Format set above.
+ // Use critical reduction to check size of buffer areas
+ #pragma omp parallel private(tid, nchars)
+ {
+ tid = omp_get_thread_num();
+ nchars = omp_capture_affinity(buffer[tid], (size_t)BUFFER_STORE, NULL);
+ #pragma omp critical
+ {
+ if (nchars > max_req_store)
+ max_req_store = nchars;
+ }
+ }
+
+ for (i = 0; i < n; i++) {
+ printf("tid=%d affinity:%s:\n", i, buffer[i]);
+ }
+ // for 4 threads with OMP_PLACES='{0,1},{2,3},{4,5},{6,7}'
+ // host=%20H tid=%0.4n binds_to=%A
+ // host=<hostname> tid=0000 binds_to=0,1
+ // host=<hostname> tid=0001 binds_to=2,3
+ // host=<hostname> tid=0002 binds_to=4,5
+ // host=<hostname> tid=0003 binds_to=6,7
+
+ if (max_req_store > BUFFER_STORE) {
+ printf("Caution: Affinity string truncated. Increase\n");
+ printf(" BUFFER_STORE by %d\n", max_req_store - BUFFER_STORE);
+ }
+ return 0;
+}
diff --git a/runtime/test/affinity/format/affinity_values.c b/runtime/test/affinity/format/affinity_values.c
new file mode 100644
index 0000000..37ab210
--- /dev/null
+++ b/runtime/test/affinity/format/affinity_values.c
@@ -0,0 +1,135 @@
+// RUN: %libomp-compile
+// RUN: env OMP_PROC_BIND=close OMP_PLACES=threads %libomp-run
+// RUN: env OMP_PROC_BIND=close OMP_PLACES=cores %libomp-run
+// RUN: env OMP_PROC_BIND=close OMP_PLACES=sockets %libomp-run
+// RUN: env KMP_AFFINITY=compact %libomp-run
+// RUN: env KMP_AFFINITY=scatter %libomp-run
+// REQUIRES: affinity
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition) \
+ if (!(condition)) { \
+ fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \
+ __LINE__); \
+ exit(1); \
+ }
+
+#define DEBUG 0
+
+#if DEBUG
+#include <stdarg.h>
+#endif
+
+#define BUFFER_SIZE 1024
+
+char buf[BUFFER_SIZE];
+#pragma omp threadprivate(buf)
+
+static int debug_printf(const char* format, ...) {
+ int retval = 0;
+#if DEBUG
+ va_list args;
+ va_start(args, format);
+ retval = vprintf(format, args);
+ va_end(args);
+#endif
+ return retval;
+}
+
+static void display_affinity_environment() {
+#if DEBUG
+ printf("Affinity Environment:\n");
+ printf(" OMP_PROC_BIND=%s\n", getenv("OMP_PROC_BIND"));
+ printf(" OMP_PLACES=%s\n", getenv("OMP_PLACES"));
+ printf(" KMP_AFFINITY=%s\n", getenv("KMP_AFFINITY"));
+#endif
+}
+
+// Reads in a list of integers into ids array (not going past ids_size)
+// e.g., if affinity = "0-4,6,8-10,14,16,17-20,23"
+// then ids = [0,1,2,3,4,6,8,9,10,14,16,17,18,19,20,23]
+void list_to_ids(const char* affinity, int* ids, int ids_size) {
+ int id, b, e, ids_index;
+ char *aff, *begin, *end, *absolute_end;
+ aff = strdup(affinity);
+ absolute_end = aff + strlen(aff);
+ ids_index = 0;
+ begin = end = aff;
+ while (end < absolute_end) {
+ end = begin;
+ while (*end != '\0' && *end != ',')
+ end++;
+ *end = '\0';
+ if (strchr(begin, '-') != NULL) {
+ // Range
+ sscanf(begin, "%d-%d", &b, &e);
+ } else {
+ // Single Number
+ sscanf(begin, "%d", &b);
+ e = b;
+ }
+ for (id = b; id <= e; ++id) {
+ ids[ids_index++] = id;
+ if (ids_index >= ids_size) {
+ free(aff);
+ return;
+ }
+ }
+ begin = end + 1;
+ }
+ free(aff);
+}
+
+void check_thread_affinity() {
+ int i;
+ const char *formats[2] = {"%{thread_affinity}", "%A"};
+ for (i = 0; i < sizeof(formats) / sizeof(formats[0]); ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel
+ {
+ int j, k;
+ int place = omp_get_place_num();
+ int num_procs = omp_get_place_num_procs(place);
+ int *ids = (int *)malloc(sizeof(int) * num_procs);
+ int *ids2 = (int *)malloc(sizeof(int) * num_procs);
+ char buf[256];
+ size_t n = omp_capture_affinity(buf, 256, NULL);
+ check(n <= 256);
+ omp_get_place_proc_ids(place, ids);
+ list_to_ids(buf, ids2, num_procs);
+
+ #pragma omp for schedule(static) ordered
+ for (k = 0; k < omp_get_num_threads(); ++k) {
+ #pragma omp ordered
+ {
+ debug_printf("Thread %d: captured affinity = %s\n",
+ omp_get_thread_num(), buf);
+ for (j = 0; j < num_procs; ++j) {
+ debug_printf("Thread %d: ids[%d] = %d ids2[%d] = %d\n",
+ omp_get_thread_num(), j, ids[j], j, ids2[j]);
+ check(ids[j] == ids2[j]);
+ }
+ }
+ }
+
+ free(ids);
+ free(ids2);
+ }
+ }
+}
+
+int main(int argc, char** argv) {
+ omp_set_nested(1);
+ display_affinity_environment();
+ check_thread_affinity();
+ return 0;
+}
diff --git a/runtime/test/affinity/format/api.c b/runtime/test/affinity/format/api.c
new file mode 100644
index 0000000..df6be66
--- /dev/null
+++ b/runtime/test/affinity/format/api.c
@@ -0,0 +1,56 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition) \
+ if (!(condition)) { \
+ fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \
+ __LINE__); \
+ exit(1); \
+ }
+
+#define BUFFER_SIZE 1024
+
+int main(int argc, char** argv) {
+ char buf[BUFFER_SIZE];
+ size_t needed;
+
+ omp_set_affinity_format("0123456789");
+
+ needed = omp_get_affinity_format(buf, BUFFER_SIZE);
+ check(streqls(buf, "0123456789"));
+ check(needed == 10)
+
+ // Check that it is truncated properly
+ omp_get_affinity_format(buf, 5);
+ check(streqls(buf, "0123"));
+
+ #pragma omp parallel
+ {
+ char my_buf[512];
+ size_t needed = omp_capture_affinity(my_buf, 512, NULL);
+ check(streqls(my_buf, "0123456789"));
+ check(needed == 10);
+ // Check that it is truncated properly
+ omp_capture_affinity(my_buf, 5, NULL);
+ check(streqls(my_buf, "0123"));
+ }
+
+ #pragma omp parallel num_threads(4)
+ {
+ omp_display_affinity(NULL);
+ }
+
+ return 0;
+}
+
+// CHECK: num_threads=4 0123456789
diff --git a/runtime/test/affinity/format/api2.c b/runtime/test/affinity/format/api2.c
new file mode 100644
index 0000000..7b2d700
--- /dev/null
+++ b/runtime/test/affinity/format/api2.c
@@ -0,0 +1,84 @@
+// RUN: %libomp-compile-and-run
+// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition) \
+ if (!(condition)) { \
+ fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \
+ __LINE__); \
+ exit(1); \
+ }
+
+#if defined(_WIN32)
+#define snprintf _snprintf
+#endif
+
+#define BUFFER_SIZE 1024
+
+int main(int argc, char** argv) {
+ char buf[BUFFER_SIZE];
+ size_t needed, length;
+ const char* format = "tl:%L tn:%n nt:%N an:%a";
+ const char* second_format = "nesting_level:%{nesting_level} thread_num:%{thread_num} num_threads:%{num_threads} ancestor_tnum:%{ancestor_tnum}";
+
+ length = strlen(format);
+ omp_set_affinity_format(format);
+
+ needed = omp_get_affinity_format(buf, BUFFER_SIZE);
+ check(streqls(buf, format));
+ check(needed == length)
+
+ // Check that it is truncated properly
+ omp_get_affinity_format(buf, 5);
+ check(streqls(buf, "tl:%"));
+
+ #pragma omp parallel
+ {
+ char my_buf[512];
+ char supposed[512];
+ int tl, tn, nt, an;
+ size_t needed, needed2;
+ tl = omp_get_level();
+ tn = omp_get_thread_num();
+ nt = omp_get_num_threads();
+ an = omp_get_ancestor_thread_num(omp_get_level()-1);
+ needed = omp_capture_affinity(my_buf, 512, NULL);
+ needed2 = (size_t)snprintf(supposed, 512, "tl:%d tn:%d nt:%d an:%d", tl, tn, nt, an);
+ check(streqls(my_buf, supposed));
+ check(needed == needed2);
+ // Check that it is truncated properly
+ supposed[4] = '\0';
+ omp_capture_affinity(my_buf, 5, NULL);
+ check(streqls(my_buf, supposed));
+
+ needed = omp_capture_affinity(my_buf, 512, second_format);
+ needed2 = (size_t)snprintf(supposed, 512, "nesting_level:%d thread_num:%d num_threads:%d ancestor_tnum:%d", tl, tn, nt, an);
+ check(streqls(my_buf, supposed));
+ check(needed == needed2);
+
+ // Check that it is truncated properly
+ supposed[25] = '\0';
+ omp_capture_affinity(my_buf, 26, second_format);
+ check(streqls(my_buf, supposed));
+ }
+
+ #pragma omp parallel num_threads(4)
+ {
+ omp_display_affinity(NULL);
+ omp_display_affinity(second_format);
+ }
+
+ return 0;
+}
+
+// CHECK: num_threads=4 tl:[0-9]+ tn:[0-9]+ nt:[0-9]+ an:[0-9]+
+// CHECK: num_threads=4 nesting_level:[0-9]+ thread_num:[0-9]+ num_threads:[0-9]+ ancestor_tnum:[0-9]+
diff --git a/runtime/test/affinity/format/check.py b/runtime/test/affinity/format/check.py
new file mode 100644
index 0000000..0adddbd
--- /dev/null
+++ b/runtime/test/affinity/format/check.py
@@ -0,0 +1,73 @@
+import os
+import sys
+import argparse
+import re
+
+class Checks(object):
+ class CheckError(Exception):
+ pass
+
+ def __init__(self, filename, prefix):
+ self.checks = []
+ self.lines = []
+ self.check_no_output = False
+ self.filename = filename
+ self.prefix = prefix
+ def readStdin(self):
+ self.lines = [l.rstrip('\r\n') for l in sys.stdin.readlines()]
+ def readChecks(self):
+ with open(self.filename) as f:
+ for line in f:
+ match = re.search('{}: NO_OUTPUT'.format(self.prefix), line)
+ if match is not None:
+ self.check_no_output = True
+ return
+ match = re.search('{}: num_threads=([0-9]+) (.*)$'.format(self.prefix), line)
+ if match is not None:
+ num_threads = int(match.group(1))
+ for i in range(num_threads):
+ self.checks.append(match.group(2))
+ continue
+ def check(self):
+ # If no checks at all, then nothing to do
+ if len(self.checks) == 0 and not self.check_no_output:
+ print('Nothing to check for')
+ return
+ # Check if we are expecting no output
+ if self.check_no_output:
+ if len(self.lines) == 0:
+ return
+ else:
+ raise Checks.CheckError('{}: Output was found when expecting none.'.format(self.prefix))
+ # Run through each check line and see if it exists in the output
+ # If it does, then delete the line from output and look for the
+ # next check line.
+ # If you don't find the line then raise Checks.CheckError
+ # If there are extra lines of output then raise Checks.CheckError
+ for c in self.checks:
+ found = False
+ index = -1
+ for idx, line in enumerate(self.lines):
+ if re.search(c, line) is not None:
+ found = True
+ index = idx
+ break
+ if not found:
+ raise Checks.CheckError('{}: Did not find: {}'.format(self.prefix, c))
+ else:
+ del self.lines[index]
+ if len(self.lines) != 0:
+ raise Checks.CheckError('{}: Extra output: {}'.format(self.prefix, self.lines))
+
+# Setup argument parsing
+parser = argparse.ArgumentParser(description='''This script checks output of
+ a program against "CHECK" lines in filename''')
+parser.add_argument('filename', default=None, help='filename to check against')
+parser.add_argument('-c', '--check-prefix', dest='prefix',
+ default='CHECK', help='check prefix token default: %(default)s')
+command_args = parser.parse_args()
+# Do the checking
+checks = Checks(command_args.filename, command_args.prefix)
+checks.readStdin()
+checks.readChecks()
+checks.check()
diff --git a/runtime/test/affinity/format/fields_modifiers.c b/runtime/test/affinity/format/fields_modifiers.c
new file mode 100644
index 0000000..c180271
--- /dev/null
+++ b/runtime/test/affinity/format/fields_modifiers.c
@@ -0,0 +1,117 @@
+// RUN: %libomp-compile-and-run
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition) \
+ if (!(condition)) { \
+ fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \
+ __LINE__); \
+ exit(1); \
+ }
+
+#define BUFFER_SIZE 1024
+
+char buf[BUFFER_SIZE];
+#pragma omp threadprivate(buf)
+
+char* get_string(size_t check_needed) {
+ size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL);
+ //printf("buf = %s\n", buf);
+ check(needed < BUFFER_SIZE);
+ if (check_needed != 0) {
+ check(needed == check_needed);
+ }
+ return buf;
+}
+
+void check_thread_num_padded_rjustified() {
+ int i;
+ const char* formats[2] = {"%0.8{thread_num}", "%0.8n"};
+ for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ int j;
+ int tid = omp_get_thread_num();
+ char ctid = '0' + (char)tid;
+ char* s = get_string(8);
+ for (j = 0; j < 7; ++j) {
+ check(s[j] == '0');
+ }
+ check(s[j] == ctid);
+ }
+ }
+}
+
+void check_thread_num_rjustified() {
+ int i;
+ const char* formats[2] = {"%.12{thread_num}", "%.12n"};
+ for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ int j;
+ int tid = omp_get_thread_num();
+ char ctid = '0' + (char)tid;
+ char* s = get_string(12);
+ for (j = 0; j < 11; ++j) {
+ check(s[j] == ' ');
+ }
+ check(s[j] == ctid);
+ }
+ }
+}
+
+void check_thread_num_ljustified() {
+ int i;
+ const char* formats[2] = {"%5{thread_num}", "%5n"};
+ for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ int j;
+ int tid = omp_get_thread_num();
+ char ctid = '0' + (char)tid;
+ char* s = get_string(5);
+ check(s[0] == ctid);
+ for (j = 1; j < 5; ++j) {
+ check(s[j] == ' ');
+ }
+ }
+ }
+}
+
+void check_thread_num_padded_ljustified() {
+ int i;
+ const char* formats[2] = {"%018{thread_num}", "%018n"};
+ for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ int j;
+ int tid = omp_get_thread_num();
+ char ctid = '0' + (char)tid;
+ char* s = get_string(18);
+ check(s[0] == ctid);
+ for (j = 1; j < 18; ++j) {
+ check(s[j] == ' ');
+ }
+ }
+ }
+}
+
+int main(int argc, char** argv) {
+ check_thread_num_ljustified();
+ check_thread_num_rjustified();
+ check_thread_num_padded_ljustified();
+ check_thread_num_padded_rjustified();
+ return 0;
+}
diff --git a/runtime/test/affinity/format/fields_values.c b/runtime/test/affinity/format/fields_values.c
new file mode 100644
index 0000000..e56ce27
--- /dev/null
+++ b/runtime/test/affinity/format/fields_values.c
@@ -0,0 +1,152 @@
+// RUN: %libomp-compile-and-run
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <omp.h>
+
+#define XSTR(x) #x
+#define STR(x) XSTR(x)
+
+#define streqls(s1, s2) (!strcmp(s1, s2))
+
+#define check(condition) \
+ if (!(condition)) { \
+ fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \
+ __LINE__); \
+ exit(1); \
+ }
+
+#if defined(_WIN32)
+#include <windows.h>
+#define getpid _getpid
+typedef int pid_t;
+#define gettid GetCurrentThreadId
+#define my_gethostname(buf, sz) GetComputerNameA(buf, &(sz))
+#else
+#include <unistd.h>
+#include <sys/types.h>
+#define my_gethostname(buf, sz) gethostname(buf, sz)
+#endif
+
+#define BUFFER_SIZE 256
+
+int get_integer() {
+ int n, retval;
+ char buf[BUFFER_SIZE];
+ size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL);
+ check(needed < BUFFER_SIZE);
+ n = sscanf(buf, "%d", &retval);
+ check(n == 1);
+ return retval;
+}
+
+char* get_string() {
+ int n, retval;
+ char buf[BUFFER_SIZE];
+ size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL);
+ check(needed < BUFFER_SIZE);
+ return strdup(buf);
+}
+
+void check_integer(const char* formats[2], int(*func)()) {
+ int i;
+ for (i = 0; i < 2; ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ check(get_integer() == func());
+ #pragma omp parallel num_threads(3)
+ {
+ check(get_integer() == func());
+ }
+ check(get_integer() == func());
+ }
+ }
+}
+
+void check_nesting_level() {
+ // Check %{nesting_level} and %L
+ const char* formats[2] = {"%{nesting_level}", "%L"};
+ check_integer(formats, omp_get_level);
+}
+
+void check_thread_num() {
+ // Check %{thread_num} and %n
+ const char* formats[2] = {"%{thread_num}", "%n"};
+ check_integer(formats, omp_get_thread_num);
+}
+
+void check_num_threads() {
+ // Check %{num_threads} and %N
+ const char* formats[2] = {"%{num_threads}", "%N"};
+ check_integer(formats, omp_get_num_threads);
+}
+
+int ancestor_helper() {
+ return omp_get_ancestor_thread_num(omp_get_level() - 1);
+}
+void check_ancestor_tnum() {
+ // Check %{ancestor_tnum} and %a
+ const char* formats[2] = {"%{ancestor_tnum}", "%a"};
+ check_integer(formats, ancestor_helper);
+}
+
+int my_get_pid() { return (int)getpid(); }
+void check_process_id() {
+ // Check %{process_id} and %P
+ const char* formats[2] = {"%{process_id}", "%P"};
+ check_integer(formats, my_get_pid);
+}
+
+/*
+int my_get_tid() { return (int)gettid(); }
+void check_native_thread_id() {
+ // Check %{native_thread_id} and %i
+ const char* formats[2] = {"%{native_thread_id}", "%i"};
+ check_integer(formats, my_get_tid);
+}
+*/
+
+void check_host() {
+ int i;
+ int buffer_size = 256;
+ const char* formats[2] = {"%{host}", "%H"};
+ char hostname[256];
+ my_gethostname(hostname, buffer_size);
+ for (i = 0; i < 2; ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ char* host = get_string();
+ check(streqls(host, hostname));
+ free(host);
+ }
+ }
+}
+
+void check_undefined() {
+ int i;
+ const char* formats[2] = {"%{foobar}", "%X"};
+ for (i = 0; i < 2; ++i) {
+ omp_set_affinity_format(formats[i]);
+ #pragma omp parallel num_threads(8)
+ {
+ char* undef = get_string();
+ check(streqls(undef, "undefined"));
+ free(undef);
+ }
+ }
+}
+
+int main(int argc, char** argv) {
+ omp_set_nested(1);
+ check_nesting_level();
+ check_num_threads();
+ check_ancestor_tnum();
+ check_process_id();
+ //check_native_thread_id();
+ check_host();
+ check_undefined();
+ return 0;
+}
diff --git a/runtime/test/affinity/format/increase.c b/runtime/test/affinity/format/increase.c
new file mode 100644
index 0000000..46d8edb
--- /dev/null
+++ b/runtime/test/affinity/format/increase.c
@@ -0,0 +1,36 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
+ // should print all for first parallel
+ omp_set_num_threads(4);
+ #pragma omp parallel
+ { }
+ // should print all because of new threads
+ omp_set_num_threads(8);
+ #pragma omp parallel
+ { }
+ // should not print anything here
+ omp_set_num_threads(6);
+ #pragma omp parallel
+ { }
+ // should print all because of new thread
+ omp_set_num_threads(9);
+ #pragma omp parallel
+ { }
+ // should not print anything here
+ omp_set_num_threads(2);
+ #pragma omp parallel
+ { }
+ return 0;
+}
+
+// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8
+// CHECK: num_threads=6 TESTER: tl:1 tn:[0-5] nt:6
+// CHECK: num_threads=9 TESTER: tl:1 tn:[0-8] nt:9
+// CHECK: num_threads=2 TESTER: tl:1 tn:[01] nt:2
diff --git a/runtime/test/affinity/format/lit.local.cfg b/runtime/test/affinity/format/lit.local.cfg
new file mode 100644
index 0000000..80583af
--- /dev/null
+++ b/runtime/test/affinity/format/lit.local.cfg
@@ -0,0 +1,2 @@
+if 'openmp-5.0' not in config.available_features:
+ config.unsupported = True
diff --git a/runtime/test/affinity/format/nested.c b/runtime/test/affinity/format/nested.c
new file mode 100644
index 0000000..502c1da
--- /dev/null
+++ b/runtime/test/affinity/format/nested.c
@@ -0,0 +1,23 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close %libomp-run | python %S/check.py -c 'CHECK' %s
+// REQUIRES: affinity
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
+ omp_set_nested(1);
+ #pragma omp parallel num_threads(4)
+ {
+ #pragma omp parallel num_threads(3)
+ { }
+ }
+ return 0;
+}
+
+// CHECK: num_threads=4 TESTER: tl:1 at:0 tn:[0-3] nt:4
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3
diff --git a/runtime/test/affinity/format/nested2.c b/runtime/test/affinity/format/nested2.c
new file mode 100644
index 0000000..3dd4956
--- /dev/null
+++ b/runtime/test/affinity/format/nested2.c
@@ -0,0 +1,29 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+// Currently, KMP_HOT_TEAMS_MAX_LEVEL has to be equal to the
+// nest depth for intuitive behavior
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
+ omp_set_nested(1);
+ #pragma omp parallel num_threads(4)
+ {
+ #pragma omp parallel num_threads(3)
+ { }
+ #pragma omp parallel num_threads(3)
+ { }
+ }
+ #pragma omp parallel num_threads(4)
+ { }
+ return 0;
+}
+
+// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3
+// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
diff --git a/runtime/test/affinity/format/nested_mixed.c b/runtime/test/affinity/format/nested_mixed.c
new file mode 100644
index 0000000..a39b4fd
--- /dev/null
+++ b/runtime/test/affinity/format/nested_mixed.c
@@ -0,0 +1,46 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
+ omp_set_nested(1);
+ #pragma omp parallel num_threads(1)
+ {
+ #pragma omp parallel num_threads(2)
+ { }
+ #pragma omp parallel num_threads(2)
+ {
+ #pragma omp parallel num_threads(1)
+ {
+ #pragma omp parallel num_threads(2)
+ { }
+ }
+ }
+ #pragma omp parallel num_threads(1)
+ { }
+ }
+ #pragma omp parallel num_threads(2)
+ { }
+ #pragma omp parallel num_threads(1)
+ { }
+ return 0;
+}
+
+// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
+
+// CHECK: num_threads=2 TESTER: tl:2 at:[0-9] tn:[01] nt:2
+
+// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1
+
+// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2
+// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2
+
+// CHECK: num_threads=1 TESTER: tl:2 at:[0-9] tn:0 nt:1
+
+// CHECK: num_threads=2 TESTER: tl:1 at:[0-9] tn:[01] nt:2
+
+// CHECK: num_threads=1 TESTER: tl:1 at:[0-9] tn:0 nt:1
diff --git a/runtime/test/affinity/format/nested_serial.c b/runtime/test/affinity/format/nested_serial.c
new file mode 100644
index 0000000..87ff2bd
--- /dev/null
+++ b/runtime/test/affinity/format/nested_serial.c
@@ -0,0 +1,35 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N");
+ omp_set_nested(1);
+ #pragma omp parallel num_threads(1)
+ {
+ #pragma omp parallel num_threads(1)
+ { }
+ #pragma omp parallel num_threads(1)
+ { }
+ #pragma omp parallel num_threads(1)
+ {
+ #pragma omp parallel num_threads(1)
+ { }
+ }
+ #pragma omp parallel num_threads(1)
+ { }
+ }
+ #pragma omp parallel num_threads(1)
+ { }
+ #pragma omp parallel num_threads(1)
+ { }
+ return 0;
+}
+
+// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:3 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1
+// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1
diff --git a/runtime/test/affinity/format/proc_bind.c b/runtime/test/affinity/format/proc_bind.c
new file mode 100644
index 0000000..e88e1aa
--- /dev/null
+++ b/runtime/test/affinity/format/proc_bind.c
@@ -0,0 +1,31 @@
+// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES='{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0}' %libomp-run | python %S/check.py -c 'CHECK' %s
+// REQUIRES: affinity
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N aff:{%A}");
+ omp_set_num_threads(8);
+ // Initial parallel
+ #pragma omp parallel proc_bind(spread)
+ { }
+ #pragma omp parallel proc_bind(spread)
+ { }
+ // Affinity changes here
+ #pragma omp parallel proc_bind(close)
+ { }
+ #pragma omp parallel proc_bind(close)
+ { }
+ // Affinity changes here
+ #pragma omp parallel proc_bind(master)
+ { }
+ #pragma omp parallel proc_bind(master)
+ { }
+ return 0;
+}
+
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
+// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff:
diff --git a/runtime/test/affinity/format/simple.c b/runtime/test/affinity/format/simple.c
new file mode 100644
index 0000000..954aa74
--- /dev/null
+++ b/runtime/test/affinity/format/simple.c
@@ -0,0 +1,27 @@
+// RUN: %libomp-compile
+// RUN: env OMP_DISPLAY_AFFINITY=false %libomp-run | python %S/check.py -c 'NOTHING' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=1 %libomp-run | python %S/check.py -c 'CHECK' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=2 %libomp-run | python %S/check.py -c 'CHECK-2' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=3 %libomp-run | python %S/check.py -c 'CHECK-3' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=4 %libomp-run | python %S/check.py -c 'CHECK-4' %s
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N");
+ #pragma omp parallel
+ { }
+ #pragma omp parallel
+ { }
+ return 0;
+}
+
+// NOTHING: NO_OUTPUT
+// CHECK: num_threads=1 TESTER: tl:1 tn:0 nt:1
+// CHECK-2: num_threads=2 TESTER: tl:1 tn:[01] nt:2
+// CHECK-3: num_threads=3 TESTER: tl:1 tn:[0-2] nt:3
+// CHECK-4: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4
+// CHECK-8: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8
diff --git a/runtime/test/affinity/format/simple_env.c b/runtime/test/affinity/format/simple_env.c
new file mode 100644
index 0000000..7aab1cf
--- /dev/null
+++ b/runtime/test/affinity/format/simple_env.c
@@ -0,0 +1,16 @@
+// RUN: %libomp-compile
+// RUN: env OMP_DISPLAY_AFFINITY=true OMP_AFFINITY_FORMAT='TESTER-ENV: tl:%L tn:%n nt:%N' OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <omp.h>
+
+int main(int argc, char** argv) {
+ #pragma omp parallel
+ { }
+ #pragma omp parallel
+ { }
+ return 0;
+}
+
+// CHECK-8: num_threads=8 TESTER-ENV: tl:1 tn:[0-7] nt:8
diff --git a/runtime/test/api/omp_alloc.c b/runtime/test/api/omp_alloc.c
index afad4a5..2002adb 100644
--- a/runtime/test/api/omp_alloc.c
+++ b/runtime/test/api/omp_alloc.c
@@ -1,4 +1,7 @@
// RUN: %libomp-compile-and-run
+
+// REQUIRES: openmp-5.0
+
#include <stdio.h>
#include <stdint.h>
#include <omp.h>
diff --git a/runtime/test/api/omp_get_wtick.c b/runtime/test/api/omp_get_wtick.c
index 8b35226..11a320f 100644
--- a/runtime/test/api/omp_get_wtick.c
+++ b/runtime/test/api/omp_get_wtick.c
@@ -7,7 +7,7 @@ int test_omp_get_wtick()
double tick;
tick = -1.;
tick = omp_get_wtick ();
- return ((tick > 0.0) && (tick < 0.01));
+ return ((tick > 0.0) && (tick <= 0.01));
}
int main()
diff --git a/runtime/test/api/omp_in_parallel.c b/runtime/test/api/omp_in_parallel.c
index d09313e..5e9e635 100644
--- a/runtime/test/api/omp_in_parallel.c
+++ b/runtime/test/api/omp_in_parallel.c
@@ -30,6 +30,11 @@ int main()
int i;
int num_failed=0;
+ // the test requires more than 1 thread to pass
+ omp_set_dynamic(0); // disable dynamic adjustment of threads
+ if (omp_get_max_threads() == 1)
+ omp_set_num_threads(2); // set 2 threads if no HW resources available
+
for(i = 0; i < REPETITIONS; i++) {
if(!test_omp_in_parallel()) {
num_failed++;
diff --git a/runtime/test/flush/omp_flush.c b/runtime/test/flush/omp_flush.c
index 3fd3cdf..95a406d 100644
--- a/runtime/test/flush/omp_flush.c
+++ b/runtime/test/flush/omp_flush.c
@@ -36,6 +36,11 @@ int main()
int i;
int num_failed=0;
+ // the test requires more than 1 thread to pass
+ omp_set_dynamic(0); // disable dynamic adjustment of threads
+ if (omp_get_max_threads() == 1)
+ omp_set_num_threads(2); // set 2 threads if no HW resources available
+
for (i = 0; i < REPETITIONS; i++) {
if(!test_omp_flush()) {
num_failed++;
diff --git a/runtime/test/lit.cfg b/runtime/test/lit.cfg
index 9f0c059..066929e 100644
--- a/runtime/test/lit.cfg
+++ b/runtime/test/lit.cfg
@@ -55,7 +55,6 @@ if config.has_libatomic:
libs += " -latomic"
# Allow REQUIRES / UNSUPPORTED / XFAIL to work
-config.target_triple = [ ]
for feature in config.test_compiler_features:
config.available_features.add(feature)
@@ -91,9 +90,21 @@ if config.has_ompt:
# for callback.h
config.test_flags += " -I " + config.test_source_root + "/ompt"
+if config.libomp_omp_version >= 50:
+ config.available_features.add("openmp-5.0")
+
+if config.libomp_omp_version >= 45:
+ config.available_features.add("openmp-4.5")
+
+if config.libomp_omp_version >= 40:
+ config.available_features.add("openmp-4.0")
+
if 'Linux' in config.operating_system:
config.available_features.add("linux")
+if config.operating_system in ['Linux', 'Windows']:
+ config.available_features.add('affinity')
+
# to run with icc INTEL_LICENSE_FILE must be set
if 'INTEL_LICENSE_FILE' in os.environ:
config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE']
@@ -116,7 +127,7 @@ config.substitutions.append(("%flags", config.test_flags))
if config.has_ompt:
config.substitutions.append(("FileCheck", config.test_filecheck))
- config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable"))
+ config.substitutions.append(("%sort-threads", "sort -n -s"))
if config.operating_system == 'Windows':
# No such environment variable on Windows.
config.substitutions.append(("%preload-tool", "true ||"))
diff --git a/runtime/test/lit.site.cfg.in b/runtime/test/lit.site.cfg.in
index c2825ee..fe4a372 100644
--- a/runtime/test/lit.site.cfg.in
+++ b/runtime/test/lit.site.cfg.in
@@ -1,11 +1,13 @@
@AUTO_GEN_COMMENT@
+config.target_triple = "@TARGET_TRIPLE@"
config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@"
config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@"
config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@
config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@"
config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@"
config.test_extra_flags = "@OPENMP_TEST_FLAGS@"
+config.libomp_omp_version = @LIBOMP_OMP_VERSION@
config.libomp_obj_root = "@CMAKE_CURRENT_BINARY_DIR@"
config.library_dir = "@LIBOMP_LIBRARY_DIR@"
config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src"
diff --git a/runtime/test/ompt/callback.h b/runtime/test/ompt/callback.h
index df83043..0304cff 100755
--- a/runtime/test/ompt/callback.h
+++ b/runtime/test/ompt/callback.h
@@ -79,7 +79,7 @@ static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls;
static void print_ids(int level)
{
int task_type, thread_num;
- omp_frame_t *frame;
+ ompt_frame_t *frame;
ompt_data_t *task_parallel_data;
ompt_data_t *task_data;
int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame,
@@ -92,8 +92,8 @@ static void print_ids(int level)
"task_type=%s=%d, thread_num=%d\n",
ompt_get_thread_data()->value, level,
exists_task ? task_parallel_data->value : 0,
- exists_task ? task_data->value : 0, frame->exit_frame,
- frame->enter_frame, buffer, task_type, thread_num);
+ exists_task ? task_data->value : 0, frame->exit_frame.ptr,
+ frame->enter_frame.ptr, buffer, task_type, thread_num);
}
#define get_frame_address(level) __builtin_frame_address(level)
@@ -197,7 +197,7 @@ on_ompt_callback_mutex_acquire(
ompt_mutex_t kind,
unsigned int hint,
unsigned int impl,
- omp_wait_id_t wait_id,
+ ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
@@ -225,7 +225,7 @@ on_ompt_callback_mutex_acquire(
static void
on_ompt_callback_mutex_acquired(
ompt_mutex_t kind,
- omp_wait_id_t wait_id,
+ ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
@@ -253,7 +253,7 @@ on_ompt_callback_mutex_acquired(
static void
on_ompt_callback_mutex_released(
ompt_mutex_t kind,
- omp_wait_id_t wait_id,
+ ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
@@ -281,7 +281,7 @@ on_ompt_callback_mutex_released(
static void
on_ompt_callback_nest_lock(
ompt_scope_endpoint_t endpoint,
- omp_wait_id_t wait_id,
+ ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(endpoint)
@@ -460,7 +460,7 @@ on_ompt_callback_lock_init(
ompt_mutex_t kind,
unsigned int hint,
unsigned int impl,
- omp_wait_id_t wait_id,
+ ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
@@ -479,7 +479,7 @@ on_ompt_callback_lock_init(
static void
on_ompt_callback_lock_destroy(
ompt_mutex_t kind,
- omp_wait_id_t wait_id,
+ ompt_wait_id_t wait_id,
const void *codeptr_ra)
{
switch(kind)
@@ -583,7 +583,7 @@ on_ompt_callback_master(
static void on_ompt_callback_parallel_begin(
ompt_data_t *encountering_task_data,
- const omp_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
+ const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data,
uint32_t requested_team_size, int flag, const void *codeptr_ra) {
if(parallel_data->ptr)
printf("0: parallel_data initially not null\n");
@@ -593,8 +593,8 @@ static void on_ompt_callback_parallel_begin(
"parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32
", codeptr_ra=%p, invoker=%d\n",
ompt_get_thread_data()->value, encountering_task_data->value,
- encountering_task_frame->exit_frame,
- encountering_task_frame->enter_frame, parallel_data->value,
+ encountering_task_frame->exit_frame.ptr,
+ encountering_task_frame->enter_frame.ptr, parallel_data->value,
requested_team_size, codeptr_ra, flag);
}
@@ -610,7 +610,7 @@ static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data,
static void
on_ompt_callback_task_create(
ompt_data_t *encountering_task_data,
- const omp_frame_t *encountering_task_frame,
+ const ompt_frame_t *encountering_task_frame,
ompt_data_t* new_task_data,
int type,
int has_dependences,
@@ -634,7 +634,7 @@ on_ompt_callback_task_create(
parallel_data->value = ompt_get_unique_id();
}
- printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
+ printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL, encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no");
}
static void
@@ -692,9 +692,9 @@ on_ompt_callback_control_tool(
void *arg,
const void *codeptr_ra)
{
- omp_frame_t* omptTaskFrame;
+ ompt_frame_t* omptTaskFrame;
ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL);
- printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame, omptTaskFrame->enter_frame);
+ printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr);
return 0; //success
}
diff --git a/runtime/test/ompt/misc/api_calls_from_other_thread.cpp b/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
index 470d7cd..e2ef1fc 100644
--- a/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
+++ b/runtime/test/ompt/misc/api_calls_from_other_thread.cpp
@@ -31,12 +31,12 @@ void f() {
printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL));
- int state = omp_state_undefined;
+ int state = ompt_state_undefined;
const char *state_name;
printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue,
ompt_enumerate_states(state, &state, &state_name));
- int impl = ompt_mutex_impl_unknown;
+ int impl = ompt_mutex_impl_none;
const char *impl_name;
printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue,
ompt_enumerate_mutex_impls(impl, &impl, &impl_name));
diff --git a/runtime/test/ompt/misc/api_calls_misc.c b/runtime/test/ompt/misc/api_calls_misc.c
index d567b1b..884421e 100644
--- a/runtime/test/ompt/misc/api_calls_misc.c
+++ b/runtime/test/ompt/misc/api_calls_misc.c
@@ -19,7 +19,7 @@ int main() {
ompt_get_state(NULL));
// ompt_enumerate_states()
- int state = omp_state_undefined;
+ int state = ompt_state_undefined;
const char *state_name;
int steps = 0;
while (ompt_enumerate_states(state, &state, &state_name) && steps < 1000) {
@@ -35,7 +35,7 @@ int main() {
}
// ompt_enumerate_mutex_impls()
- int impl = ompt_mutex_impl_unknown;
+ int impl = ompt_mutex_impl_none;
const char *impl_name;
steps = 0;
while (ompt_enumerate_mutex_impls(impl, &impl, &impl_name) &&
diff --git a/runtime/test/ompt/misc/api_calls_places.c b/runtime/test/ompt/misc/api_calls_places.c
index ad338a7..3385c9c 100644
--- a/runtime/test/ompt/misc/api_calls_places.c
+++ b/runtime/test/ompt/misc/api_calls_places.c
@@ -42,7 +42,7 @@ int main() {
int omp_nums[omp_nums_size];
omp_get_partition_place_nums(omp_nums);
print_list("omp_get_partition_place_nums", omp_nums_size, omp_nums);
- int ompt_nums_size = ompt_get_partition_place_nums(0, NULL);
+ int ompt_nums_size = ompt_get_partition_place_nums(0, omp_nums);
int ompt_nums[ompt_nums_size];
ompt_get_partition_place_nums(ompt_nums_size, ompt_nums);
print_list("ompt_get_partition_place_nums", ompt_nums_size, ompt_nums);
diff --git a/runtime/test/ompt/misc/control_tool.c b/runtime/test/ompt/misc/control_tool.c
index 2c59666..7995614 100644
--- a/runtime/test/ompt/misc/control_tool.c
+++ b/runtime/test/ompt/misc/control_tool.c
@@ -1,6 +1,7 @@
// RUN: %libomp-compile-and-run | FileCheck %s
// REQUIRES: ompt
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+// XFAIL: powerpc64le, ppc64le
#define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN
#include "callback.h"
#include <omp.h>
@@ -22,7 +23,7 @@ int main()
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address({{.}})=[[EXIT_FRAME:0x[0-f]*]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]]
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter={{0x[0-f]*}}
// CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]]
return 0;
diff --git a/runtime/test/ompt/misc/control_tool_no_ompt_support.c b/runtime/test/ompt/misc/control_tool_no_ompt_support.c
index ee64da0..23daf8b 100644
--- a/runtime/test/ompt/misc/control_tool_no_ompt_support.c
+++ b/runtime/test/ompt/misc/control_tool_no_ompt_support.c
@@ -1,4 +1,7 @@
// RUN: %libomp-compile-and-run
+
+// REQUIRES: openmp-5.0
+
#include <omp.h>
int main()
diff --git a/runtime/test/ompt/misc/interoperability.cpp b/runtime/test/ompt/misc/interoperability.cpp
index 102e6de..b07814e 100644
--- a/runtime/test/ompt/misc/interoperability.cpp
+++ b/runtime/test/ompt/misc/interoperability.cpp
@@ -3,7 +3,11 @@
#include <iostream>
#include <thread>
+#if !defined(__NetBSD__)
#include <alloca.h>
+#else
+#include <cstdlib>
+#endif
#include "callback.h"
#include "omp.h"
diff --git a/runtime/test/ompt/parallel/nested.c b/runtime/test/ompt/parallel/nested.c
index 035529c..d91597b 100644
--- a/runtime/test/ompt/parallel/nested.c
+++ b/runtime/test/ompt/parallel/nested.c
@@ -80,25 +80,25 @@ int main()
// THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]]
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
- // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
- // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
+ // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]]
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
- // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
- // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
+ // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
// THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
// explicit barrier
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
- // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+ // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=0x{{[0-f]+}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]]
// THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]]
// THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]]
diff --git a/runtime/test/ompt/parallel/nested_thread_num.c b/runtime/test/ompt/parallel/nested_thread_num.c
index e952f80..f14f87a 100644
--- a/runtime/test/ompt/parallel/nested_thread_num.c
+++ b/runtime/test/ompt/parallel/nested_thread_num.c
@@ -80,7 +80,7 @@ int main() {
// THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
// THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]],
// THREADS-SAME: parent_task_frame.exit=[[NULL]],
-// THREADS-SAME: parent_task_frame.reenter=[[MAIN_REENTER]],
+// THREADS-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
// THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2,
// THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
// THREADS-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]]
@@ -101,14 +101,14 @@ int main() {
// THREADS: {{^}}[[MASTER_ID]]: task level 1:
// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]],
// THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]],
-// THREADS-SAME: reenter_frame=[[MAIN_REENTER]]
+// THREADS-SAME: reenter_frame=0x{{[0-f]+}}
// THREADS: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin:
// THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]],
// THREADS-SAME: parent_task_frame.exit=[[EXIT]],
-// THREADS-SAME: parent_task_frame.reenter=[[REENTER]],
+// THREADS-SAME: parent_task_frame.reenter=0x{{[0-f]+}},
// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]],
// THREADS-SAME: requested_team_size=2,
// THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}},
@@ -129,12 +129,12 @@ int main() {
// THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]],
// THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]],
-// THREADS-SAME: reenter_frame=[[REENTER]]
+// THREADS-SAME: reenter_frame=0x{{[0-f]+}}
// THREADS: {{^}}[[MASTER_ID]]: task level 2:
// THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]],
// THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]],
-// THREADS-SAME: reenter_frame=[[MAIN_REENTER]]
+// THREADS-SAME: reenter_frame=0x{{[0-f]+}}
// THREADS: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]]
@@ -149,7 +149,7 @@ int main() {
// THREADS: {{^}}[[MASTER_ID]]: task level 0:
// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
// THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]],
-// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]]
+// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=0x{{[0-f]+}}
// THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end:
// THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]],
diff --git a/runtime/test/ompt/parallel/nested_threadnum.c b/runtime/test/ompt/parallel/nested_threadnum.c
new file mode 100644
index 0000000..a248530
--- /dev/null
+++ b/runtime/test/ompt/parallel/nested_threadnum.c
@@ -0,0 +1,62 @@
+// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
+// REQUIRES: ompt
+#include <omp.h>
+#include "callback.h"
+
+int main() {
+ omp_set_nested(1);
+#pragma omp parallel num_threads(2)
+ {
+#pragma omp barrier
+#pragma omp parallel num_threads(2)
+ { print_frame(0); }
+ }
+
+ // CHECK: 0: NULL_POINTER=[[NULL:.*$]]
+
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM1:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin:
+ // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1]]
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM1:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+ // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end
+ // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM1]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+
+ // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: parallel_id=[[PARALLEL_ID]]
+ // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM2:[0-9]+]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_parallel_begin:
+ // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2:[0-9]+]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2]]
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM2:[0-9]+]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_end
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM2]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_parallel_end:
+ // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2]]
+ // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_end
+ // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM2]]
+
+ // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM3:[0-9]+]]
+ // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_implicit_task_end
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM3]]
+
+ // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_implicit_task_begin:
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM4:[0-9]+]]
+ // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_implicit_task_end
+ // CHECK-SAME: thread_num=[[INNER_THREAD_NUM4]]
+
+ return 0;
+}
diff --git a/runtime/test/ompt/synchronization/taskwait.c b/runtime/test/ompt/synchronization/taskwait.c
index c431024..cb30f3b 100644
--- a/runtime/test/ompt/synchronization/taskwait.c
+++ b/runtime/test/ompt/synchronization/taskwait.c
@@ -1,6 +1,7 @@
// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s
// REQUIRES: ompt
// UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7
+// XFAIL: powerpc64le, ppc64le
#include "callback.h"
#include <omp.h>
diff --git a/runtime/test/ompt/tasks/explicit_task.c b/runtime/test/ompt/tasks/explicit_task.c
index 01fb3f8..a986c48 100644
--- a/runtime/test/ompt/tasks/explicit_task.c
+++ b/runtime/test/ompt/tasks/explicit_task.c
@@ -52,22 +52,22 @@ int main()
// make sure initial data pointers are null
// CHECK-NOT: 0: new_task_data initially not null
- // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // CHECK--doesnotwork: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// <- ompt_event_task_create would be expected here
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
// CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// explicit barrier after master
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
- // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -78,16 +78,16 @@ int main()
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
// this is expected to come earlier and at MASTER:
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
diff --git a/runtime/test/ompt/tasks/serialized.c b/runtime/test/ompt/tasks/serialized.c
index 12a0281..b1ef45d 100644
--- a/runtime/test/ompt/tasks/serialized.c
+++ b/runtime/test/ompt/tasks/serialized.c
@@ -58,7 +58,7 @@ int main() {
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin
// CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]]
// CHECK-SAME: parent_task_frame.exit=[[NULL]]
- // CHECK-SAME: parent_task_frame.reenter=[[MAIN_REENTER]]
+ // CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}
// CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2
// CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}}
@@ -76,13 +76,13 @@ int main() {
// CHECK: {{^}}[[MASTER_ID]]: task level 1
// CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]]
// CHECK-SAME: task_id=[[PARENT_TASK_ID]],
- // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create
// CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]]
// CHECK-SAME: parent_task_frame.exit=[[EXIT]]
- // CHECK-SAME: parent_task_frame.reenter=[[REENTER]]
+ // CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}}
// CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]]
// CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}
@@ -96,12 +96,12 @@ int main() {
// CHECK: {{^}}[[MASTER_ID]]: task level 1
// CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
- // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: task level 2
// CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
// CHECK-SAME: task_id=[[PARENT_TASK_ID]]
- // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule
// CHECK-SAME: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
@@ -135,7 +135,7 @@ int main() {
// CHECK: {{^}}[[THREAD_ID]]: task level 1
// CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]]
// CHECK-SAME: task_id=[[PARENT_TASK_ID]]
- // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)={{0x[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin
diff --git a/runtime/test/ompt/tasks/task_in_joinbarrier.c b/runtime/test/ompt/tasks/task_in_joinbarrier.c
index 25b57a9..8228add 100644
--- a/runtime/test/ompt/tasks/task_in_joinbarrier.c
+++ b/runtime/test/ompt/tasks/task_in_joinbarrier.c
@@ -50,16 +50,16 @@ int main()
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// <- ompt_event_task_create would be expected here
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -70,7 +70,7 @@ int main()
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// implicit barrier parallel
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -79,7 +79,7 @@ int main()
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
// CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]]
diff --git a/runtime/test/ompt/tasks/untied_task.c b/runtime/test/ompt/tasks/untied_task.c
index e68fa26..4ee3f11 100644
--- a/runtime/test/ompt/tasks/untied_task.c
+++ b/runtime/test/ompt/tasks/untied_task.c
@@ -60,20 +60,20 @@ int main()
// CHECK-NOT: 0: new_task_data initially not null
// CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]]
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]]
// nested parallel masters
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// <- ompt_event_task_create would be expected here
- // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
+ // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]]
// CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
// explicit barrier after master
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
- // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
// implicit barrier parallel
// CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
@@ -84,16 +84,16 @@ int main()
// CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
// this is expected to come earlier and at MASTER:
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]]
// CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]]
- // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]]
+ // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}}
+ // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}}
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]]
// CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]]
diff --git a/runtime/test/parallel/omp_nested.c b/runtime/test/parallel/omp_nested.c
index 8b78088..d2d5b08 100644
--- a/runtime/test/parallel/omp_nested.c
+++ b/runtime/test/parallel/omp_nested.c
@@ -12,6 +12,8 @@ int test_omp_nested()
#ifdef _OPENMP
if (omp_get_max_threads() > 4)
omp_set_num_threads(4);
+ if (omp_get_max_threads() < 2)
+ omp_set_num_threads(2);
#endif
int counter = 0;
diff --git a/runtime/test/tasking/bug_nested_proxy_task.c b/runtime/test/tasking/bug_nested_proxy_task.c
index 6c00822..84e4dfd 100644
--- a/runtime/test/tasking/bug_nested_proxy_task.c
+++ b/runtime/test/tasking/bug_nested_proxy_task.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile -lpthread && %libomp-run
+// REQUIRES: openmp-4.5
// The runtime currently does not get dependency information from GCC.
// UNSUPPORTED: gcc
diff --git a/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/runtime/test/tasking/bug_proxy_task_dep_waiting.c
index e6dd895..fe8f18d 100644
--- a/runtime/test/tasking/bug_proxy_task_dep_waiting.c
+++ b/runtime/test/tasking/bug_proxy_task_dep_waiting.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile -lpthread && %libomp-run
+// REQUIRES: openmp-4.5
// The runtime currently does not get dependency information from GCC.
// UNSUPPORTED: gcc
diff --git a/runtime/test/tasking/kmp_task_reduction_nest.cpp b/runtime/test/tasking/kmp_task_reduction_nest.cpp
index 63dffe4..019a9fe 100644
--- a/runtime/test/tasking/kmp_task_reduction_nest.cpp
+++ b/runtime/test/tasking/kmp_task_reduction_nest.cpp
@@ -1,5 +1,6 @@
// RUN: %libomp-cxx-compile-and-run
// RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run
+// REQUIRES: openmp-5.0
// GCC-5 is needed for OpenMP 4.0 support (taskgroup)
// XFAIL: gcc-4
#include <cstdio>
diff --git a/runtime/test/tasking/kmp_taskloop.c b/runtime/test/tasking/kmp_taskloop.c
index 4b13793..359f7a4 100644
--- a/runtime/test/tasking/kmp_taskloop.c
+++ b/runtime/test/tasking/kmp_taskloop.c
@@ -1,5 +1,6 @@
// RUN: %libomp-compile-and-run
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+// REQUIRES: openmp-4.5
#include <stdio.h>
#include <omp.h>
#include "omp_my_sleep.h"
diff --git a/runtime/test/tasking/omp_task.c b/runtime/test/tasking/omp_task.c
index c534abe..5703225 100644
--- a/runtime/test/tasking/omp_task.c
+++ b/runtime/test/tasking/omp_task.c
@@ -43,6 +43,9 @@ int main()
int i;
int num_failed=0;
+ if (omp_get_max_threads() < 2)
+ omp_set_num_threads(8);
+
for(i = 0; i < REPETITIONS; i++) {
if(!test_omp_task()) {
num_failed++;
diff --git a/runtime/test/tasking/omp_task_priority.c b/runtime/test/tasking/omp_task_priority.c
index 7b62360..6acb4a8 100644
--- a/runtime/test/tasking/omp_task_priority.c
+++ b/runtime/test/tasking/omp_task_priority.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run
+// REQUIRES: openmp-4.5
// Test OMP 4.5 task priorities
// Currently only API function and envirable parsing implemented.
// Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below.
diff --git a/runtime/test/tasking/omp_taskloop_grainsize.c b/runtime/test/tasking/omp_taskloop_grainsize.c
index 0833073..c5756a4 100644
--- a/runtime/test/tasking/omp_taskloop_grainsize.c
+++ b/runtime/test/tasking/omp_taskloop_grainsize.c
@@ -1,5 +1,6 @@
// RUN: %libomp-compile-and-run
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+// REQUIRES: openmp-4.5
// These compilers don't support the taskloop construct
// UNSUPPORTED: gcc-4, gcc-5, icc-16
diff --git a/runtime/test/tasking/omp_taskloop_num_tasks.c b/runtime/test/tasking/omp_taskloop_num_tasks.c
index 7c3c704..75efea6 100644
--- a/runtime/test/tasking/omp_taskloop_num_tasks.c
+++ b/runtime/test/tasking/omp_taskloop_num_tasks.c
@@ -1,5 +1,6 @@
// RUN: %libomp-compile-and-run
// RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run
+// REQUIRES: openmp-4.5
// These compilers don't support the taskloop construct
// UNSUPPORTED: gcc-4, gcc-5, icc-16
diff --git a/runtime/test/tasking/omp_taskyield.c b/runtime/test/tasking/omp_taskyield.c
index 5bb6984..7f85413 100644
--- a/runtime/test/tasking/omp_taskyield.c
+++ b/runtime/test/tasking/omp_taskyield.c
@@ -49,6 +49,9 @@ int main()
int i;
int num_failed=0;
+ if (omp_get_max_threads() < 2)
+ omp_set_num_threads(8);
+
for(i = 0; i < REPETITIONS; i++) {
if(!test_omp_taskyield()) {
num_failed++;
diff --git a/runtime/test/worksharing/for/kmp_doacross_check.c b/runtime/test/worksharing/for/kmp_doacross_check.c
index 59b61e3..4eea328 100644
--- a/runtime/test/worksharing/for/kmp_doacross_check.c
+++ b/runtime/test/worksharing/for/kmp_doacross_check.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
// UNSUPPORTED: gcc
// This test is incompatible with gcc because of the explicit call to
// __kmpc_doacross_fini(). gcc relies on an implicit call to this function
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/runtime/test/worksharing/for/kmp_sch_simd_guided.c
index 5c6f94b..6cf5d2f 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_guided.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_guided.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
/*
Test for the 'schedule(simd:guided)' clause.
Compiler needs to generate a dynamic dispatching and pass the schedule
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
index bb538d1..8b5f34a 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
// The test checks schedule(simd:runtime)
// in combination with omp_set_schedule()
@@ -66,6 +67,7 @@ run_loop(
int ub; // Chunk upper bound.
int st; // Chunk stride.
int rc;
+ int nthreads = omp_get_num_threads();
int tid = omp_get_thread_num();
int gtid = __kmpc_global_thread_num(&loc);
int last;
@@ -134,7 +136,7 @@ run_loop(
printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
// Update maximum for the next chunk.
if (last) {
- if (!no_chunk && cur > ch)
+ if (!no_chunk && cur > ch && nthreads > 1)
printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
(int)cur, ch, tid, ++err);
} else {
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
index d137831..142e9b3 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c
@@ -6,6 +6,7 @@
// RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1
// RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2
// RUN: env OMP_SCHEDULE=auto %libomp-run
+// REQUIRES: openmp-4.5
// The test checks schedule(simd:runtime)
// in combination with OMP_SCHEDULE=guided[,chunk]
@@ -74,6 +75,7 @@ run_loop(
int ub; // Chunk upper bound.
int st; // Chunk stride.
int rc;
+ int nthreads = omp_get_num_threads();
int tid = omp_get_thread_num();
int gtid = __kmpc_global_thread_num(&loc);
int last;
@@ -144,7 +146,7 @@ run_loop(
if (!last && cur % ch)
printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n",
chunk, (int)cur, ch, tid, ++err);
- if (last && !no_chunk && cur > ch)
+ if (last && !no_chunk && cur > ch && nthreads > 1)
printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
(int)cur, ch, tid, ++err);
if (cur < max)
diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
index 4cb15d6..e2c878f 100644
--- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
+++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c
@@ -1,5 +1,6 @@
// RUN: %libomp-compile && %libomp-run
// RUN: %libomp-run 1 && %libomp-run 2
+// REQUIRES: openmp-4.5
// The test checks schedule(simd:runtime)
// in combination with OMP_SCHEDULE=static[,chunk]
@@ -67,6 +68,7 @@ run_loop(
int ub; // Chunk upper bound.
int st; // Chunk stride.
int rc;
+ int nthreads = omp_get_num_threads();
int tid = omp_get_thread_num();
int gtid = __kmpc_global_thread_num(&loc);
int last;
@@ -135,7 +137,7 @@ run_loop(
printf("Error with iter %d, %d, err %d\n", cur, max, ++err);
// Update maximum for the next chunk.
if (last) {
- if (!no_chunk && cur > ch)
+ if (!no_chunk && cur > ch && nthreads > 1)
printf("Error: too big last chunk %d (%d), tid %d, err %d\n",
(int)cur, ch, tid, ++err);
} else {
diff --git a/runtime/test/worksharing/for/omp_doacross.c b/runtime/test/worksharing/for/omp_doacross.c
index 4187112..32e8e82 100644
--- a/runtime/test/worksharing/for/omp_doacross.c
+++ b/runtime/test/worksharing/for/omp_doacross.c
@@ -1,4 +1,5 @@
// RUN: %libomp-compile-and-run
+// REQUIRES: openmp-4.5
// XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16
#include <stdio.h>
#include <stdlib.h>
@@ -51,6 +52,8 @@ int test_doacross() {
int main(int argc, char **argv) {
int i;
int num_failed = 0;
+ if (omp_get_max_threads() < 2)
+ omp_set_num_threads(4);
for (i = 0; i < REPETITIONS; i++) {
if (!test_doacross()) {
num_failed++;