diff options
author | Pirama Arumuga Nainar <pirama@google.com> | 2019-01-14 11:00:28 -0800 |
---|---|---|
committer | Pirama Arumuga Nainar <pirama@google.com> | 2019-01-14 11:00:28 -0800 |
commit | 4a3ebbfc8cacc0368a4a61943b5ea2bbfdc3e5f5 (patch) | |
tree | 3978b4513cf680d08677e434dadde447d70d9208 | |
parent | 985edb5b045dcb0dd4beb807d882a0220324b520 (diff) | |
parent | 7a5b7589b92524c0d141ec70ab7435d9897972ed (diff) | |
download | openmp_llvm-4a3ebbfc8cacc0368a4a61943b5ea2bbfdc3e5f5.tar.gz |
Merge 7a5b758 for LLVM update to 349610
Change-Id: I1ba0bbe7b606a2539855c4eb67804d1001bd8b0b
118 files changed, 3234 insertions, 1019 deletions
diff --git a/cmake/HandleOpenMPOptions.cmake b/cmake/HandleOpenMPOptions.cmake index 5e5215d..97b616e 100644 --- a/cmake/HandleOpenMPOptions.cmake +++ b/cmake/HandleOpenMPOptions.cmake @@ -13,4 +13,7 @@ if (${OPENMP_ENABLE_WERROR}) append_if(OPENMP_HAVE_WERROR_FLAG "-Werror" CMAKE_C_FLAGS CMAKE_CXX_FLAGS) endif() -append_if(OPENMP_HAVE_STD_CPP11_FLAG "-std=c++11" CMAKE_CXX_FLAGS)
\ No newline at end of file +append_if(OPENMP_HAVE_STD_GNUPP11_FLAG "-std=gnu++11" CMAKE_CXX_FLAGS) +if (NOT OPENMP_HAVE_STD_GNUPP11_FLAG) + append_if(OPENMP_HAVE_STD_CPP11_FLAG "-std=c++11" CMAKE_CXX_FLAGS) +endif() diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 912cbd0..13eace9 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -3,4 +3,5 @@ include(CheckCXXCompilerFlag) check_c_compiler_flag(-Werror OPENMP_HAVE_WERROR_FLAG) -check_cxx_compiler_flag(-std=c++11 OPENMP_HAVE_STD_CPP11_FLAG)
\ No newline at end of file +check_cxx_compiler_flag(-std=gnu++11 OPENMP_HAVE_STD_GNUPP11_FLAG) +check_cxx_compiler_flag(-std=c++11 OPENMP_HAVE_STD_CPP11_FLAG) diff --git a/libomptarget/deviceRTLs/nvptx/src/cancel.cu b/libomptarget/deviceRTLs/nvptx/src/cancel.cu index 77033db..9f92e2d 100644 --- a/libomptarget/deviceRTLs/nvptx/src/cancel.cu +++ b/libomptarget/deviceRTLs/nvptx/src/cancel.cu @@ -13,16 +13,16 @@ #include "omptarget-nvptx.h" -EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid, +EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal) { - PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", cancelVal); + PRINT(LD_IO, "call kmpc_cancellationpoint(cancel val %d)\n", (int)cancelVal); // disabled return FALSE; } -EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid, +EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal) { - PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", cancelVal); + PRINT(LD_IO, "call kmpc_cancel(cancel val %d)\n", (int)cancelVal); // disabled return FALSE; } diff --git a/libomptarget/deviceRTLs/nvptx/src/critical.cu b/libomptarget/deviceRTLs/nvptx/src/critical.cu index fef8101..9bf2a30 100644 --- a/libomptarget/deviceRTLs/nvptx/src/critical.cu +++ b/libomptarget/deviceRTLs/nvptx/src/critical.cu @@ -16,17 +16,15 @@ #include "omptarget-nvptx.h" EXTERN -void __kmpc_critical(kmp_Indent *loc, int32_t global_tid, +void __kmpc_critical(kmp_Ident *loc, int32_t global_tid, kmp_CriticalName *lck) { PRINT0(LD_IO, "call to kmpc_critical()\n"); - omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor(); - omp_set_lock(teamDescr.CriticalLock()); + omp_set_lock((omp_lock_t *)lck); } EXTERN -void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid, +void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid, kmp_CriticalName *lck) { PRINT0(LD_IO, "call to kmpc_end_critical()\n"); - omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor(); - omp_unset_lock(teamDescr.CriticalLock()); + omp_unset_lock((omp_lock_t *)lck); } diff --git a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu index bfb8208..f69daa1 100644 --- a/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu +++ b/libomptarget/deviceRTLs/nvptx/src/data_sharing.cu @@ -84,7 +84,7 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS, "Entering __kmpc_initialize_data_sharing_environment\n"); unsigned WID = getWarpId(); - DSPRINT(DSFLAG_INIT, "Warp ID: %d\n", WID); + DSPRINT(DSFLAG_INIT, "Warp ID: %u\n", WID); omptarget_nvptx_TeamDescr *teamDescr = &omptarget_nvptx_threadPrivateContext->TeamContext(); @@ -95,15 +95,16 @@ __kmpc_initialize_data_sharing_environment(__kmpc_data_sharing_slot *rootS, // We don't need to initialize the frame and active threads. - DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", InitialDataSize); - DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (long long)RootS); + DSPRINT(DSFLAG_INIT, "Initial data size: %08x \n", (unsigned)InitialDataSize); + DSPRINT(DSFLAG_INIT, "Root slot at: %016llx \n", (unsigned long long)RootS); DSPRINT(DSFLAG_INIT, "Root slot data-end at: %016llx \n", - (long long)RootS->DataEnd); - DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n", (long long)RootS->Next); + (unsigned long long)RootS->DataEnd); + DSPRINT(DSFLAG_INIT, "Root slot next at: %016llx \n", + (unsigned long long)RootS->Next); DSPRINT(DSFLAG_INIT, "Shared slot ptr at: %016llx \n", - (long long)DataSharingState.SlotPtr[WID]); + (unsigned long long)DataSharingState.SlotPtr[WID]); DSPRINT(DSFLAG_INIT, "Shared stack ptr at: %016llx \n", - (long long)DataSharingState.StackPtr[WID]); + (unsigned long long)DataSharingState.StackPtr[WID]); DSPRINT0(DSFLAG_INIT, "Exiting __kmpc_initialize_data_sharing_environment\n"); } @@ -121,8 +122,9 @@ EXTERN void *__kmpc_data_sharing_environment_begin( if (!IsOMPRuntimeInitialized) return (void *)&DataSharingState; - DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize); - DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize); + DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize); + DSPRINT(DSFLAG, "Default Data Size %016llx\n", + (unsigned long long)SharingDefaultDataSize); unsigned WID = getWarpId(); unsigned CurActiveThreads = getActiveThreadsMask(); @@ -139,11 +141,11 @@ EXTERN void *__kmpc_data_sharing_environment_begin( *SavedSharedFrame = FrameP; *SavedActiveThreads = ActiveT; - DSPRINT(DSFLAG, "Warp ID: %d\n", WID); - DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (long long)SlotP); - DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (long long)StackP); + DSPRINT(DSFLAG, "Warp ID: %u\n", WID); + DSPRINT(DSFLAG, "Saved slot ptr at: %016llx \n", (unsigned long long)SlotP); + DSPRINT(DSFLAG, "Saved stack ptr at: %016llx \n", (unsigned long long)StackP); DSPRINT(DSFLAG, "Saved frame ptr at: %016llx \n", (long long)FrameP); - DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT); + DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT); // Only the warp active master needs to grow the stack. if (IsWarpMasterActiveThread()) { @@ -161,12 +163,16 @@ EXTERN void *__kmpc_data_sharing_environment_begin( const uintptr_t RequiredEndAddress = CurrentStartAddress + (uintptr_t)SharingDataSize; - DSPRINT(DSFLAG, "Data Size %016llx\n", SharingDataSize); - DSPRINT(DSFLAG, "Default Data Size %016llx\n", SharingDefaultDataSize); - DSPRINT(DSFLAG, "Current Start Address %016llx\n", CurrentStartAddress); - DSPRINT(DSFLAG, "Current End Address %016llx\n", CurrentEndAddress); - DSPRINT(DSFLAG, "Required End Address %016llx\n", RequiredEndAddress); - DSPRINT(DSFLAG, "Active Threads %08x\n", ActiveT); + DSPRINT(DSFLAG, "Data Size %016llx\n", (unsigned long long)SharingDataSize); + DSPRINT(DSFLAG, "Default Data Size %016llx\n", + (unsigned long long)SharingDefaultDataSize); + DSPRINT(DSFLAG, "Current Start Address %016llx\n", + (unsigned long long)CurrentStartAddress); + DSPRINT(DSFLAG, "Current End Address %016llx\n", + (unsigned long long)CurrentEndAddress); + DSPRINT(DSFLAG, "Required End Address %016llx\n", + (unsigned long long)RequiredEndAddress); + DSPRINT(DSFLAG, "Active Threads %08x\n", (unsigned)ActiveT); // If we require a new slot, allocate it and initialize it (or attempt to // reuse one). Also, set the shared stack and slot pointers to the new @@ -184,11 +190,11 @@ EXTERN void *__kmpc_data_sharing_environment_begin( (uintptr_t)(&ExistingSlot->Data[0]); if (ExistingSlotSize >= NewSize) { DSPRINT(DSFLAG, "Reusing stack slot %016llx\n", - (long long)ExistingSlot); + (unsigned long long)ExistingSlot); NewSlot = ExistingSlot; } else { DSPRINT(DSFLAG, "Cleaning up -failed reuse - %016llx\n", - (long long)SlotP->Next); + (unsigned long long)SlotP->Next); free(ExistingSlot); } } @@ -197,7 +203,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( NewSlot = (__kmpc_data_sharing_slot *)malloc( sizeof(__kmpc_data_sharing_slot) + NewSize); DSPRINT(DSFLAG, "New slot allocated %016llx (data size=%016llx)\n", - (long long)NewSlot, NewSize); + (unsigned long long)NewSlot, NewSize); } NewSlot->Next = 0; @@ -213,7 +219,7 @@ EXTERN void *__kmpc_data_sharing_environment_begin( // not eliminate them because that may be used to return data. if (SlotP->Next) { DSPRINT(DSFLAG, "Cleaning up - old not required - %016llx\n", - (long long)SlotP->Next); + (unsigned long long)SlotP->Next); free(SlotP->Next); SlotP->Next = 0; } @@ -275,8 +281,8 @@ EXTERN void __kmpc_data_sharing_environment_end( // have other threads that will return after the current ones. ActiveT &= ~CurActive; - DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n", CurActive, - ActiveT); + DSPRINT(DSFLAG, "Active threads: %08x; New mask: %08x\n", + (unsigned)CurActive, (unsigned)ActiveT); if (!ActiveT) { // No other active threads? Great, lets restore the stack. @@ -290,10 +296,13 @@ EXTERN void __kmpc_data_sharing_environment_end( FrameP = *SavedSharedFrame; ActiveT = *SavedActiveThreads; - DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n", (long long)SlotP); - DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n", (long long)StackP); - DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n", (long long)FrameP); - DSPRINT(DSFLAG, "Active threads: %08x \n", ActiveT); + DSPRINT(DSFLAG, "Restored slot ptr at: %016llx \n", + (unsigned long long)SlotP); + DSPRINT(DSFLAG, "Restored stack ptr at: %016llx \n", + (unsigned long long)StackP); + DSPRINT(DSFLAG, "Restored frame ptr at: %016llx \n", + (unsigned long long)FrameP); + DSPRINT(DSFLAG, "Active threads: %08x \n", (unsigned)ActiveT); } } @@ -319,7 +328,7 @@ __kmpc_get_data_sharing_environment_frame(int32_t SourceThreadID, unsigned SourceWID = SourceThreadID / WARPSIZE; - DSPRINT(DSFLAG, "Source warp: %d\n", SourceWID); + DSPRINT(DSFLAG, "Source warp: %u\n", SourceWID); void * volatile P = DataSharingState.FramePtr[SourceWID]; DSPRINT0(DSFLAG, "Exiting __kmpc_get_data_sharing_environment_frame\n"); diff --git a/libomptarget/deviceRTLs/nvptx/src/debug.h b/libomptarget/deviceRTLs/nvptx/src/debug.h index 9f59d66..8577c8f 100644 --- a/libomptarget/deviceRTLs/nvptx/src/debug.h +++ b/libomptarget/deviceRTLs/nvptx/src/debug.h @@ -127,6 +127,14 @@ #if OMPTARGET_NVPTX_DEBUG || OMPTARGET_NVPTX_TEST || OMPTARGET_NVPTX_WARNING #include <stdio.h> +#include "option.h" + +template <typename... Arguments> +static NOINLINE void log(const char *fmt, Arguments... parameters) { + printf(fmt, (int)blockIdx.x, (int)threadIdx.x, (int)(threadIdx.x / WARPSIZE), + (int)(threadIdx.x & 0x1F), parameters...); +} + #endif #if OMPTARGET_NVPTX_TEST #include <assert.h> @@ -164,16 +172,14 @@ #define PRINT0(_flag, _str) \ { \ if (omptarget_device_environment.debug_level && DON(_flag)) { \ - printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x, \ - threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \ + log("<b %2d, t %4d, w %2d, l %2d>: " _str); \ } \ } #define PRINT(_flag, _str, _args...) \ { \ if (omptarget_device_environment.debug_level && DON(_flag)) { \ - printf("<b %2d, t %4d, w %2d, l %2d>: " _str, blockIdx.x, threadIdx.x, \ - threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \ + log("<b %2d, t %4d, w %2d, l %2d>: " _str, _args); \ } \ } #else @@ -217,16 +223,14 @@ #define ASSERT0(_flag, _cond, _str) \ { \ if (TON(_flag) && !(_cond)) { \ - printf("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n", blockIdx.x, \ - threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \ + log("<b %3d, t %4d, w %2d, l %2d> ASSERT: " _str "\n"); \ assert(_cond); \ } \ } #define ASSERT(_flag, _cond, _str, _args...) \ { \ if (TON(_flag) && !(_cond)) { \ - printf("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", blockIdx.x, \ - threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \ + log("<b %3d, t %4d, w %2d, l %d2> ASSERT: " _str "\n", _args); \ assert(_cond); \ } \ } @@ -253,15 +257,13 @@ #define WARNING0(_flag, _str) \ { \ if (WON(_flag)) { \ - printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x, \ - threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F); \ + log("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str); \ } \ } #define WARNING(_flag, _str, _args...) \ { \ if (WON(_flag)) { \ - printf("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, blockIdx.x, \ - threadIdx.x, threadIdx.x / WARPSIZE, threadIdx.x & 0x1F, _args); \ + log("<b %2d, t %4d, w %2d, l %2d> WARNING: " _str, _args); \ } \ } diff --git a/libomptarget/deviceRTLs/nvptx/src/interface.h b/libomptarget/deviceRTLs/nvptx/src/interface.h index 7a37c04..2c2beae 100644 --- a/libomptarget/deviceRTLs/nvptx/src/interface.h +++ b/libomptarget/deviceRTLs/nvptx/src/interface.h @@ -160,8 +160,36 @@ typedef enum kmp_sched_t { } kmp_sched_t; +/*! + * Enum for accesseing the reserved_2 field of the ident_t struct below. + */ +enum { + /*! Bit set to 1 when in SPMD mode. */ + KMP_IDENT_SPMD_MODE = 0x01, + /*! Bit set to 1 when a simplified runtime is used. */ + KMP_IDENT_SIMPLE_RT_MODE = 0x02, +}; + +/*! + * The ident structure that describes a source location. + * The struct is identical to the one in the kmp.h file. + * We maintain the same data structure for compatibility. + */ +typedef int kmp_int32; +typedef struct ident { + kmp_int32 reserved_1; /**< might be used in Fortran; see above */ + kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags; KMP_IDENT_KMPC + identifies this union member */ + kmp_int32 reserved_2; /**< not really used in Fortran any more; see above */ + kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for C++ */ + char const *psource; /**< String describing the source location. + The string is composed of semi-colon separated fields + which describe the source file, the function and a pair + of line numbers that delimit the construct. */ +} ident_t; + // parallel defs -typedef void kmp_Indent; +typedef ident_t kmp_Ident; typedef void (*kmp_ParFctPtr)(int32_t *global_tid, int32_t *bound_tid, ...); typedef void (*kmp_ReductFctPtr)(void *lhsData, void *rhsData); typedef void (*kmp_InterWarpCopyFctPtr)(void *src, int32_t warp_num); @@ -223,28 +251,28 @@ typedef int32_t kmp_CriticalName[8]; //////////////////////////////////////////////////////////////////////////////// // query -EXTERN int32_t __kmpc_global_num_threads(kmp_Indent *loc); // missing -EXTERN int32_t __kmpc_bound_thread_num(kmp_Indent *loc); // missing -EXTERN int32_t __kmpc_bound_num_threads(kmp_Indent *loc); // missing -EXTERN int32_t __kmpc_in_parallel(kmp_Indent *loc); // missing +EXTERN int32_t __kmpc_global_num_threads(kmp_Ident *loc); // missing +EXTERN int32_t __kmpc_bound_thread_num(kmp_Ident *loc); // missing +EXTERN int32_t __kmpc_bound_num_threads(kmp_Ident *loc); // missing +EXTERN int32_t __kmpc_in_parallel(kmp_Ident *loc); // missing // parallel -EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc); -EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t global_tid, +EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc); +EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t global_tid, int32_t num_threads); // simd -EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t global_tid, int32_t simd_limit); // aee ... not supported -// EXTERN void __kmpc_fork_call(kmp_Indent *loc, int32_t argc, kmp_ParFctPtr +// EXTERN void __kmpc_fork_call(kmp_Ident *loc, int32_t argc, kmp_ParFctPtr // microtask, ...); -EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid); -EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc, +EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid); +EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc, uint32_t global_tid); -EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid); +EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid); // proc bind -EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t global_tid, int proc_bind); EXTERN int omp_get_num_places(void); EXTERN int omp_get_place_num_procs(int place_num); @@ -254,52 +282,52 @@ EXTERN int omp_get_partition_num_places(void); EXTERN void omp_get_partition_place_nums(int *place_nums); // for static (no chunk or chunk) -EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk); -EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk); -EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk); -EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk); EXTERN -void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk); EXTERN -void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk); EXTERN -void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk); EXTERN -void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk); EXTERN -void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc, +void __kmpc_for_static_init_4_simple_generic(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int32_t *plower, int32_t *pupper, @@ -307,11 +335,11 @@ void __kmpc_for_static_init_4_simple_generic(kmp_Indent *loc, int32_t chunk); EXTERN void __kmpc_for_static_init_4u_simple_generic( - kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, + kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk); EXTERN -void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc, +void __kmpc_for_static_init_8_simple_generic(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter, int64_t *plower, int64_t *pupper, @@ -319,48 +347,48 @@ void __kmpc_for_static_init_8_simple_generic(kmp_Indent *loc, int64_t chunk); EXTERN void __kmpc_for_static_init_8u_simple_generic( - kmp_Indent *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1, + kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t *plastiter1, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk); -EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid); +EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid); // for dynamic -EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t global_tid, int32_t sched, int32_t lower, int32_t upper, int32_t incr, int32_t chunk); -EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t global_tid, int32_t sched, uint32_t lower, uint32_t upper, int32_t incr, int32_t chunk); -EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t global_tid, int32_t sched, int64_t lower, int64_t upper, int64_t incr, int64_t chunk); -EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t global_tid, int32_t sched, uint64_t lower, uint64_t upper, int64_t incr, int64_t chunk); -EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t global_tid, +EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t global_tid, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride); -EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t global_tid, +EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t global_tid, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride); -EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t global_tid, +EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t global_tid, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride); -EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t global_tid, +EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t global_tid, int32_t *plastiter, uint64_t *plower, uint64_t *pupper, int64_t *pstride); -EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t global_tid); +EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t global_tid); // Support for reducing conditional lastprivate variables -EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc, +EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t global_tid, int32_t varNum, void *array); @@ -391,67 +419,73 @@ EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple_generic( int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data, kmp_ShuffleReductFctPtr shflFct, kmp_InterWarpCopyFctPtr cpyFct, kmp_CopyToScratchpadFctPtr sratchFct, kmp_LoadReduceFctPtr ldFct); +EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc, + int32_t global_tid, + kmp_CriticalName *crit); +EXTERN void __kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, + int32_t global_tid, + kmp_CriticalName *crit); EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size); EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size); // sync barrier -EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid); -EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid); -EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid); -EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc, int32_t global_tid); +EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid); +EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid); +EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid); +EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc, int32_t global_tid); // single -EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid); +EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid); // sync -EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t global_tid); -EXTERN void __kmpc_critical(kmp_Indent *loc, int32_t global_tid, +EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t global_tid); +EXTERN void __kmpc_critical(kmp_Ident *loc, int32_t global_tid, kmp_CriticalName *crit); -EXTERN void __kmpc_end_critical(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_end_critical(kmp_Ident *loc, int32_t global_tid, kmp_CriticalName *crit); -EXTERN void __kmpc_flush(kmp_Indent *loc); +EXTERN void __kmpc_flush(kmp_Ident *loc); // vote EXTERN int32_t __kmpc_warp_active_thread_mask(); // tasks -EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Indent *loc, +EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc(kmp_Ident *loc, uint32_t global_tid, int32_t flag, size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable, kmp_TaskFctPtr sub); -EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid, +EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newLegacyTaskDescr); -EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid, +EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newLegacyTaskDescr, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newLegacyTaskDescr); -EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newLegacyTaskDescr); -EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList); -EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid); -EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid); -EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid); +EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid); +EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid, int end_part); -EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid); -EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid, +EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid); +EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr, int if_val, uint64_t *lb, uint64_t *ub, int64_t st, int nogroup, int32_t sched, uint64_t grainsize, void *task_dup); // cancel -EXTERN int32_t __kmpc_cancellationpoint(kmp_Indent *loc, int32_t global_tid, +EXTERN int32_t __kmpc_cancellationpoint(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal); -EXTERN int32_t __kmpc_cancel(kmp_Indent *loc, int32_t global_tid, +EXTERN int32_t __kmpc_cancel(kmp_Ident *loc, int32_t global_tid, int32_t cancelVal); // non standard @@ -460,7 +494,8 @@ EXTERN void __kmpc_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime); EXTERN void __kmpc_kernel_deinit(int16_t IsOMPRuntimeInitialized); EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, int16_t RequiresDataSharing); -EXTERN void __kmpc_spmd_kernel_deinit(); +EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit(); +EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime); EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, int16_t IsOMPRuntimeInitialized); EXTERN bool __kmpc_kernel_parallel(void **WorkFn, diff --git a/libomptarget/deviceRTLs/nvptx/src/libcall.cu b/libomptarget/deviceRTLs/nvptx/src/libcall.cu index ea9225d..91b270c 100644 --- a/libomptarget/deviceRTLs/nvptx/src/libcall.cu +++ b/libomptarget/deviceRTLs/nvptx/src/libcall.cu @@ -222,9 +222,11 @@ EXTERN int omp_get_ancestor_thread_num(int level) { " chunk %" PRIu64 "; tid %d, tnum %d, nthreads %d\n", "ancestor", steps, (currTaskDescr->IsParallelConstruct() ? "par" : "task"), - currTaskDescr->InParallelRegion(), sched, - currTaskDescr->RuntimeChunkSize(), currTaskDescr->ThreadId(), - currTaskDescr->ThreadsInTeam(), currTaskDescr->NThreads()); + (int)currTaskDescr->InParallelRegion(), (int)sched, + currTaskDescr->RuntimeChunkSize(), + (int)currTaskDescr->ThreadId(), + (int)currTaskDescr->ThreadsInTeam(), + (int)currTaskDescr->NThreads()); } if (currTaskDescr->IsParallelConstruct()) { @@ -404,23 +406,21 @@ EXTERN int omp_get_max_task_priority(void) { #define SET 1 EXTERN void omp_init_lock(omp_lock_t *lock) { - *lock = UNSET; + omp_unset_lock(lock); PRINT0(LD_IO, "call omp_init_lock()\n"); } EXTERN void omp_destroy_lock(omp_lock_t *lock) { + omp_unset_lock(lock); PRINT0(LD_IO, "call omp_destroy_lock()\n"); } EXTERN void omp_set_lock(omp_lock_t *lock) { // int atomicCAS(int* address, int compare, int val); // (old == compare ? val : old) - int compare = UNSET; - int val = SET; // TODO: not sure spinning is a good idea here.. - while (atomicCAS(lock, compare, val) != UNSET) { - + while (atomicCAS(lock, UNSET, SET) != UNSET) { clock_t start = clock(); clock_t now; for (;;) { @@ -436,9 +436,7 @@ EXTERN void omp_set_lock(omp_lock_t *lock) { } EXTERN void omp_unset_lock(omp_lock_t *lock) { - int compare = SET; - int val = UNSET; - int old = atomicCAS(lock, compare, val); + (void)atomicExch(lock, UNSET); PRINT0(LD_IO, "call omp_unset_lock()\n"); } @@ -446,10 +444,7 @@ EXTERN void omp_unset_lock(omp_lock_t *lock) { EXTERN int omp_test_lock(omp_lock_t *lock) { // int atomicCAS(int* address, int compare, int val); // (old == compare ? val : old) - int compare = UNSET; - int val = SET; - - int ret = atomicCAS(lock, compare, val); + int ret = atomicAdd(lock, 0); PRINT(LD_IO, "call omp_test_lock() return %d\n", ret); diff --git a/libomptarget/deviceRTLs/nvptx/src/loop.cu b/libomptarget/deviceRTLs/nvptx/src/loop.cu index bd84f0f..c100be5 100644 --- a/libomptarget/deviceRTLs/nvptx/src/loop.cu +++ b/libomptarget/deviceRTLs/nvptx/src/loop.cu @@ -93,9 +93,10 @@ public: //////////////////////////////////////////////////////////////////////////////// // Support for Static Init - INLINE static void for_static_init(int32_t schedtype, int32_t *plastiter, - T *plower, T *pupper, ST *pstride, - ST chunk, bool IsSPMDExecutionMode, + INLINE static void for_static_init(int32_t gtid, int32_t schedtype, + int32_t *plastiter, T *plower, T *pupper, + ST *pstride, ST chunk, + bool IsSPMDExecutionMode, bool IsRuntimeUninitialized) { // When IsRuntimeUninitialized is true, we assume that the caller is // in an L0 parallel region and that all worker threads participate. @@ -112,108 +113,73 @@ public: PRINT(LD_LOOP, "OMP Thread %d: schedule type %d, chunk size = %lld, mytid " "%d, num tids %d\n", - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized), - schedtype, P64(chunk), - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized), - GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized)); - ASSERT0( - LT_FUSSY, - (GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized)) < - (GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized)), - "current thread is not needed here; error"); + (int)gtid, (int)schedtype, (long long)chunk, (int)gtid, + (int)numberOfActiveOMPThreads); + ASSERT0(LT_FUSSY, gtid < numberOfActiveOMPThreads, + "current thread is not needed here; error"); // copy int lastiter = 0; T lb = *plower; T ub = *pupper; ST stride = *pstride; - T entityId, numberOfEntities; // init switch (SCHEDULE_WITHOUT_MODIFIERS(schedtype)) { case kmp_sched_static_chunk: { if (chunk > 0) { - entityId = - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized); - numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized); - ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, + numberOfActiveOMPThreads); break; } } // note: if chunk <=0, use nochunk case kmp_sched_static_balanced_chunk: { if (chunk > 0) { - entityId = - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized); - numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized); - // round up to make sure the chunk is enough to cover all iterations T tripCount = ub - lb + 1; // +1 because ub is inclusive - T span = (tripCount + numberOfEntities - 1) / numberOfEntities; + T span = (tripCount + numberOfActiveOMPThreads - 1) / + numberOfActiveOMPThreads; // perform chunk adjustment chunk = (span + chunk - 1) & ~(chunk - 1); ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); T oldUb = ub; - ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, + numberOfActiveOMPThreads); if (ub > oldUb) ub = oldUb; break; } } // note: if chunk <=0, use nochunk case kmp_sched_static_nochunk: { - entityId = - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized); - numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized); - ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + ForStaticNoChunk(lastiter, lb, ub, stride, chunk, gtid, + numberOfActiveOMPThreads); break; } case kmp_sched_distr_static_chunk: { if (chunk > 0) { - entityId = GetOmpTeamId(); - numberOfEntities = GetNumberOfOmpTeams(); - ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + ForStaticChunk(lastiter, lb, ub, stride, chunk, GetOmpTeamId(), + GetNumberOfOmpTeams()); break; } // note: if chunk <=0, use nochunk } case kmp_sched_distr_static_nochunk: { - entityId = GetOmpTeamId(); - numberOfEntities = GetNumberOfOmpTeams(); - - ForStaticNoChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + ForStaticNoChunk(lastiter, lb, ub, stride, chunk, GetOmpTeamId(), + GetNumberOfOmpTeams()); break; } case kmp_sched_distr_static_chunk_sched_static_chunkone: { - entityId = - GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized) * - GetOmpTeamId() + - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized); - numberOfEntities = GetNumberOfOmpTeams() * - GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized); - ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + ForStaticChunk(lastiter, lb, ub, stride, chunk, + numberOfActiveOMPThreads * GetOmpTeamId() + gtid, + GetNumberOfOmpTeams() * numberOfActiveOMPThreads); break; } default: { - ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", schedtype); + ASSERT(LT_FUSSY, FALSE, "unknown schedtype %d", (int)schedtype); PRINT(LD_LOOP, "unknown schedtype %d, revert back to static chunk\n", - schedtype); - entityId = - GetOmpThreadId(tid, IsSPMDExecutionMode, IsRuntimeUninitialized); - numberOfEntities = GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, - IsRuntimeUninitialized); - ForStaticChunk(lastiter, lb, ub, stride, chunk, entityId, - numberOfEntities); + (int)schedtype); + ForStaticChunk(lastiter, lb, ub, stride, chunk, gtid, + numberOfActiveOMPThreads); + break; } } // copy back @@ -221,13 +187,12 @@ public: *plower = lb; *pupper = ub; *pstride = stride; - PRINT( - LD_LOOP, - "Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last " - "%d\n", - GetNumberOfOmpThreads(tid, IsSPMDExecutionMode, IsRuntimeUninitialized), - GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride), - lastiter); + PRINT(LD_LOOP, + "Got sched: Active %d, total %d: lb %lld, ub %lld, stride %lld, last " + "%d\n", + (int)numberOfActiveOMPThreads, (int)GetNumberOfWorkersInTeam(), + (long long)(*plower), (long long)(*pupper), (long long)(*pstride), + (int)lastiter); } //////////////////////////////////////////////////////////////////////////////// @@ -238,20 +203,17 @@ public: schedule <= kmp_sched_ordered_last; } - INLINE static void dispatch_init(kmp_Indent *loc, int32_t threadId, + INLINE static void dispatch_init(kmp_Ident *loc, int32_t threadId, kmp_sched_t schedule, T lb, T ub, ST st, ST chunk) { - ASSERT0(LT_FUSSY, isRuntimeInitialized(), + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Expected non-SPMD mode + initialized runtime."); int tid = GetLogicalThreadIdInBlock(); omptarget_nvptx_TaskDescr *currTaskDescr = getMyTopTaskDescriptor(tid); T tnum = currTaskDescr->ThreadsInTeam(); T tripCount = ub - lb + 1; // +1 because ub is inclusive - ASSERT0( - LT_FUSSY, - GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) < - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), - "current thread is not needed here; error"); + ASSERT0(LT_FUSSY, threadId < tnum, + "current thread is not needed here; error"); /* Currently just ignore the monotonic and non-monotonic modifiers * (the compiler isn't producing them * yet anyway). @@ -269,7 +231,7 @@ public: __kmpc_barrier(loc, threadId); PRINT(LD_LOOP, "go sequential as tnum=%ld, trip count %lld, ordered sched=%d\n", - (long)tnum, P64(tripCount), schedule); + (long)tnum, (long long)tripCount, (int)schedule); schedule = kmp_sched_static_chunk; chunk = tripCount; // one thread gets the whole loop } else if (schedule == kmp_sched_runtime) { @@ -295,18 +257,20 @@ public: break; } } - PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", schedule, - P64(chunk)); + PRINT(LD_LOOP, "Runtime sched is %d with chunk %lld\n", (int)schedule, + (long long)chunk); } else if (schedule == kmp_sched_auto) { schedule = kmp_sched_static_chunk; chunk = 1; - PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", schedule, - P64(chunk)); + PRINT(LD_LOOP, "Auto sched is %d with chunk %lld\n", (int)schedule, + (long long)chunk); } else { - PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", schedule, P64(chunk)); + PRINT(LD_LOOP, "Dyn sched is %d with chunk %lld\n", (int)schedule, + (long long)chunk); ASSERT(LT_FUSSY, schedule == kmp_sched_dynamic || schedule == kmp_sched_guided, - "unknown schedule %d & chunk %lld\n", schedule, P64(chunk)); + "unknown schedule %d & chunk %lld\n", (int)schedule, + (long long)chunk); } // init schedules @@ -319,9 +283,7 @@ public: // compute static chunk ST stride; int lastiter = 0; - ForStaticChunk( - lastiter, lb, ub, stride, chunk, - GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum); + ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); // save computed params omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk; omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb; @@ -329,10 +291,12 @@ public: PRINT(LD_LOOP, "dispatch init (static chunk) : num threads = %d, ub = %" PRId64 ", next lower bound = %llu, stride = %llu\n", - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), + (int)tnum, omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid), - omptarget_nvptx_threadPrivateContext->NextLowerBound(tid), - omptarget_nvptx_threadPrivateContext->Stride(tid)); + (unsigned long long) + omptarget_nvptx_threadPrivateContext->NextLowerBound(tid), + (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride( + tid)); } else if (schedule == kmp_sched_static_balanced_chunk) { ASSERT0(LT_FUSSY, chunk > 0, "bad chunk value"); // save sched state @@ -348,9 +312,7 @@ public: chunk = (span + chunk - 1) & ~(chunk - 1); T oldUb = ub; - ForStaticChunk( - lastiter, lb, ub, stride, chunk, - GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum); + ForStaticChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); ASSERT0(LT_FUSSY, ub >= lb, "ub must be >= lb."); if (ub > oldUb) ub = oldUb; @@ -361,10 +323,12 @@ public: PRINT(LD_LOOP, "dispatch init (static chunk) : num threads = %d, ub = %" PRId64 ", next lower bound = %llu, stride = %llu\n", - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), + (int)tnum, omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid), - omptarget_nvptx_threadPrivateContext->NextLowerBound(tid), - omptarget_nvptx_threadPrivateContext->Stride(tid)); + (unsigned long long) + omptarget_nvptx_threadPrivateContext->NextLowerBound(tid), + (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride( + tid)); } else if (schedule == kmp_sched_static_nochunk) { ASSERT0(LT_FUSSY, chunk == 0, "bad chunk value"); // save sched state @@ -374,9 +338,7 @@ public: // compute static chunk ST stride; int lastiter = 0; - ForStaticNoChunk( - lastiter, lb, ub, stride, chunk, - GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()), tnum); + ForStaticNoChunk(lastiter, lb, ub, stride, chunk, threadId, tnum); // save computed params omptarget_nvptx_threadPrivateContext->Chunk(tid) = chunk; omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = lb; @@ -384,10 +346,12 @@ public: PRINT(LD_LOOP, "dispatch init (static nochunk) : num threads = %d, ub = %" PRId64 ", next lower bound = %llu, stride = %llu\n", - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), + (int)tnum, omptarget_nvptx_threadPrivateContext->LoopUpperBound(tid), - omptarget_nvptx_threadPrivateContext->NextLowerBound(tid), - omptarget_nvptx_threadPrivateContext->Stride(tid)); + (unsigned long long) + omptarget_nvptx_threadPrivateContext->NextLowerBound(tid), + (unsigned long long)omptarget_nvptx_threadPrivateContext->Stride( + tid)); } else if (schedule == kmp_sched_dynamic || schedule == kmp_sched_guided) { __kmpc_barrier(loc, threadId); @@ -405,8 +369,9 @@ public: PRINT(LD_LOOP, "dispatch init (dyn) : num threads = %d, lb = %llu, ub = %" PRId64 ", chunk %" PRIu64 "\n", - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), - omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId), + (int)tnum, + (unsigned long long) + omptarget_nvptx_threadPrivateContext->NextLowerBound(teamId), omptarget_nvptx_threadPrivateContext->LoopUpperBound(teamId), omptarget_nvptx_threadPrivateContext->Chunk(teamId)); } @@ -430,41 +395,40 @@ public: // c. lb and ub >= loopUpperBound: empty chunk --> FINISHED // a. if (lb <= loopUpperBound && ub < loopUpperBound) { - PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n", P64(lb), - P64(ub), P64(loopUpperBound)); + PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; not finished\n", + (long long)lb, (long long)ub, (long long)loopUpperBound); return NOT_FINISHED; } // b. if (lb <= loopUpperBound) { PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; clip to loop ub\n", - P64(lb), P64(ub), P64(loopUpperBound)); + (long long)lb, (long long)ub, (long long)loopUpperBound); ub = loopUpperBound; return LAST_CHUNK; } // c. if we are here, we are in case 'c' lb = loopUpperBound + 2; ub = loopUpperBound + 1; - PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", P64(lb), - P64(ub), P64(loopUpperBound)); + PRINT(LD_LOOPD, "lb %lld, ub %lld, loop ub %lld; finished\n", (long long)lb, + (long long)ub, (long long)loopUpperBound); return FINISHED; } // On Pascal, with inlining of the runtime into the user application, // this code deadlocks. This is probably because different threads // in a warp cannot make independent progress. - NOINLINE static int dispatch_next(int32_t *plast, T *plower, T *pupper, - ST *pstride) { + NOINLINE static int dispatch_next(int32_t gtid, int32_t *plast, T *plower, + T *pupper, ST *pstride) { ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Expected non-SPMD mode + initialized runtime."); // ID of a thread in its own warp // automatically selects thread or warp ID based on selected implementation int tid = GetLogicalThreadIdInBlock(); - ASSERT0( - LT_FUSSY, - GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()) < - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), - "current thread is not needed here; error"); + ASSERT0(LT_FUSSY, + gtid < GetNumberOfOmpThreads(tid, isSPMDMode(), + isRuntimeUninitialized()), + "current thread is not needed here; error"); // retrieve schedule kmp_sched_t schedule = omptarget_nvptx_threadPrivateContext->ScheduleType(tid); @@ -477,7 +441,7 @@ public: // finished? if (myLb > ub) { PRINT(LD_LOOP, "static loop finished with myLb %lld, ub %lld\n", - P64(myLb), P64(ub)); + (long long)myLb, (long long)ub); return DISPATCH_FINISHED; } // not finished, save current bounds @@ -493,7 +457,7 @@ public: ST stride = omptarget_nvptx_threadPrivateContext->Stride(tid); omptarget_nvptx_threadPrivateContext->NextLowerBound(tid) = myLb + stride; PRINT(LD_LOOP, "static loop continues with myLb %lld, myUb %lld\n", - P64(*plower), P64(*pupper)); + (long long)*plower, (long long)*pupper); return DISPATCH_NOTFINISHED; } ASSERT0(LT_FUSSY, @@ -515,12 +479,13 @@ public: *pupper = myUb; *pstride = 1; - PRINT(LD_LOOP, - "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, " - "last %d\n", - GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), - GetNumberOfWorkersInTeam(), P64(*plower), P64(*pupper), P64(*pstride), - *plast); + PRINT( + LD_LOOP, + "Got sched: active %d, total %d: lb %lld, ub %lld, stride = %lld, " + "last %d\n", + (int)GetNumberOfOmpThreads(tid, isSPMDMode(), isRuntimeUninitialized()), + (int)GetNumberOfWorkersInTeam(), (long long)*plower, (long long)*pupper, + (long long)*pstride, (int)*plast); return DISPATCH_NOTFINISHED; } @@ -538,7 +503,7 @@ public: //////////////////////////////////////////////////////////////////////////////// // init -EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_dispatch_init_4(kmp_Ident *loc, int32_t tid, int32_t schedule, int32_t lb, int32_t ub, int32_t st, int32_t chunk) { PRINT0(LD_IO, "call kmpc_dispatch_init_4\n"); @@ -546,7 +511,7 @@ EXTERN void __kmpc_dispatch_init_4(kmp_Indent *loc, int32_t tid, loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk); } -EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_dispatch_init_4u(kmp_Ident *loc, int32_t tid, int32_t schedule, uint32_t lb, uint32_t ub, int32_t st, int32_t chunk) { PRINT0(LD_IO, "call kmpc_dispatch_init_4u\n"); @@ -554,7 +519,7 @@ EXTERN void __kmpc_dispatch_init_4u(kmp_Indent *loc, int32_t tid, loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk); } -EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_dispatch_init_8(kmp_Ident *loc, int32_t tid, int32_t schedule, int64_t lb, int64_t ub, int64_t st, int64_t chunk) { PRINT0(LD_IO, "call kmpc_dispatch_init_8\n"); @@ -562,7 +527,7 @@ EXTERN void __kmpc_dispatch_init_8(kmp_Indent *loc, int32_t tid, loc, tid, (kmp_sched_t)schedule, lb, ub, st, chunk); } -EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_dispatch_init_8u(kmp_Ident *loc, int32_t tid, int32_t schedule, uint64_t lb, uint64_t ub, int64_t st, int64_t chunk) { PRINT0(LD_IO, "call kmpc_dispatch_init_8u\n"); @@ -571,53 +536,53 @@ EXTERN void __kmpc_dispatch_init_8u(kmp_Indent *loc, int32_t tid, } // next -EXTERN int __kmpc_dispatch_next_4(kmp_Indent *loc, int32_t tid, int32_t *p_last, +EXTERN int __kmpc_dispatch_next_4(kmp_Ident *loc, int32_t tid, int32_t *p_last, int32_t *p_lb, int32_t *p_ub, int32_t *p_st) { PRINT0(LD_IO, "call kmpc_dispatch_next_4\n"); return omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_next( - p_last, p_lb, p_ub, p_st); + tid, p_last, p_lb, p_ub, p_st); } -EXTERN int __kmpc_dispatch_next_4u(kmp_Indent *loc, int32_t tid, +EXTERN int __kmpc_dispatch_next_4u(kmp_Ident *loc, int32_t tid, int32_t *p_last, uint32_t *p_lb, uint32_t *p_ub, int32_t *p_st) { PRINT0(LD_IO, "call kmpc_dispatch_next_4u\n"); return omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_next( - p_last, p_lb, p_ub, p_st); + tid, p_last, p_lb, p_ub, p_st); } -EXTERN int __kmpc_dispatch_next_8(kmp_Indent *loc, int32_t tid, int32_t *p_last, +EXTERN int __kmpc_dispatch_next_8(kmp_Ident *loc, int32_t tid, int32_t *p_last, int64_t *p_lb, int64_t *p_ub, int64_t *p_st) { PRINT0(LD_IO, "call kmpc_dispatch_next_8\n"); return omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_next( - p_last, p_lb, p_ub, p_st); + tid, p_last, p_lb, p_ub, p_st); } -EXTERN int __kmpc_dispatch_next_8u(kmp_Indent *loc, int32_t tid, +EXTERN int __kmpc_dispatch_next_8u(kmp_Ident *loc, int32_t tid, int32_t *p_last, uint64_t *p_lb, uint64_t *p_ub, int64_t *p_st) { PRINT0(LD_IO, "call kmpc_dispatch_next_8u\n"); return omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_next( - p_last, p_lb, p_ub, p_st); + tid, p_last, p_lb, p_ub, p_st); } // fini -EXTERN void __kmpc_dispatch_fini_4(kmp_Indent *loc, int32_t tid) { +EXTERN void __kmpc_dispatch_fini_4(kmp_Ident *loc, int32_t tid) { PRINT0(LD_IO, "call kmpc_dispatch_fini_4\n"); omptarget_nvptx_LoopSupport<int32_t, int32_t>::dispatch_fini(); } -EXTERN void __kmpc_dispatch_fini_4u(kmp_Indent *loc, int32_t tid) { +EXTERN void __kmpc_dispatch_fini_4u(kmp_Ident *loc, int32_t tid) { PRINT0(LD_IO, "call kmpc_dispatch_fini_4u\n"); omptarget_nvptx_LoopSupport<uint32_t, int32_t>::dispatch_fini(); } -EXTERN void __kmpc_dispatch_fini_8(kmp_Indent *loc, int32_t tid) { +EXTERN void __kmpc_dispatch_fini_8(kmp_Ident *loc, int32_t tid) { PRINT0(LD_IO, "call kmpc_dispatch_fini_8\n"); omptarget_nvptx_LoopSupport<int64_t, int64_t>::dispatch_fini(); } -EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) { +EXTERN void __kmpc_dispatch_fini_8u(kmp_Ident *loc, int32_t tid) { PRINT0(LD_IO, "call kmpc_dispatch_fini_8u\n"); omptarget_nvptx_LoopSupport<uint64_t, int64_t>::dispatch_fini(); } @@ -626,151 +591,143 @@ EXTERN void __kmpc_dispatch_fini_8u(kmp_Indent *loc, int32_t tid) { // KMP interface implementation (static loops) //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_for_static_init_4(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_4(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_4\n"); omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(), - isRuntimeUninitialized()); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + checkSPMDMode(loc), checkRuntimeUninitialized(loc)); } -EXTERN void __kmpc_for_static_init_4u(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_4u(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_4u\n"); omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(), - isRuntimeUninitialized()); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + checkSPMDMode(loc), checkRuntimeUninitialized(loc)); } -EXTERN void __kmpc_for_static_init_8(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_8(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_8\n"); omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(), - isRuntimeUninitialized()); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + checkSPMDMode(loc), checkRuntimeUninitialized(loc)); } -EXTERN void __kmpc_for_static_init_8u(kmp_Indent *loc, int32_t global_tid, +EXTERN void __kmpc_for_static_init_8u(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_8u\n"); omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, isSPMDMode(), - isRuntimeUninitialized()); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + checkSPMDMode(loc), checkRuntimeUninitialized(loc)); } EXTERN -void __kmpc_for_static_init_4_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_4_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_spmd\n"); omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/true, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true); } EXTERN -void __kmpc_for_static_init_4u_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_4u_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_spmd\n"); omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/true, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true); } EXTERN -void __kmpc_for_static_init_8_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_8_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_spmd\n"); omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/true, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true); } EXTERN -void __kmpc_for_static_init_8u_simple_spmd(kmp_Indent *loc, int32_t global_tid, +void __kmpc_for_static_init_8u_simple_spmd(kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_spmd\n"); omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/true, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/true, /*IsRuntimeUninitialized=*/true); } EXTERN void __kmpc_for_static_init_4_simple_generic( - kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, + kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, int32_t *plower, int32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_4_simple_generic\n"); omptarget_nvptx_LoopSupport<int32_t, int32_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/false, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true); } EXTERN void __kmpc_for_static_init_4u_simple_generic( - kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, + kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, uint32_t *plower, uint32_t *pupper, int32_t *pstride, int32_t incr, int32_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_4u_simple_generic\n"); omptarget_nvptx_LoopSupport<uint32_t, int32_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/false, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true); } EXTERN void __kmpc_for_static_init_8_simple_generic( - kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, + kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, int64_t *plower, int64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_8_simple_generic\n"); omptarget_nvptx_LoopSupport<int64_t, int64_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/false, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true); } EXTERN void __kmpc_for_static_init_8u_simple_generic( - kmp_Indent *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, + kmp_Ident *loc, int32_t global_tid, int32_t schedtype, int32_t *plastiter, uint64_t *plower, uint64_t *pupper, int64_t *pstride, int64_t incr, int64_t chunk) { PRINT0(LD_IO, "call kmpc_for_static_init_8u_simple_generic\n"); omptarget_nvptx_LoopSupport<uint64_t, int64_t>::for_static_init( - schedtype, plastiter, plower, pupper, pstride, chunk, - /*IsSPMDExecutionMode=*/false, - /*IsRuntimeUninitialized=*/true); + global_tid, schedtype, plastiter, plower, pupper, pstride, chunk, + /*IsSPMDExecutionMode=*/false, /*IsRuntimeUninitialized=*/true); } -EXTERN void __kmpc_for_static_fini(kmp_Indent *loc, int32_t global_tid) { +EXTERN void __kmpc_for_static_fini(kmp_Ident *loc, int32_t global_tid) { PRINT0(LD_IO, "call kmpc_for_static_fini\n"); } @@ -792,21 +749,20 @@ INLINE void syncWorkersInGenericMode(uint32_t NumThreads) { } }; // namespace -EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Indent *loc, int32_t gtid, +EXTERN void __kmpc_reduce_conditional_lastprivate(kmp_Ident *loc, int32_t gtid, int32_t varNum, void *array) { PRINT0(LD_IO, "call to __kmpc_reduce_conditional_lastprivate(...)\n"); - ASSERT0(LT_FUSSY, isRuntimeInitialized(), + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Expected non-SPMD mode + initialized runtime."); omptarget_nvptx_TeamDescr &teamDescr = getMyTeamDescriptor(); - int tid = GetOmpThreadId(GetLogicalThreadIdInBlock(), isSPMDMode(), - isRuntimeUninitialized()); - uint32_t NumThreads = GetNumberOfOmpThreads( - GetLogicalThreadIdInBlock(), isSPMDMode(), isRuntimeUninitialized()); + int tid = GetLogicalThreadIdInBlock(); + uint32_t NumThreads = GetNumberOfOmpThreads(tid, checkSPMDMode(loc), + checkRuntimeUninitialized(loc)); uint64_t *Buffer = teamDescr.getLastprivateIterBuffer(); for (unsigned i = 0; i < varNum; i++) { // Reset buffer. - if (tid == 0) + if (gtid == 0) *Buffer = 0; // Reset to minimum loop iteration value. // Barrier. diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu index 5d95eb1..b0b1290 100644 --- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu +++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.cu @@ -150,7 +150,7 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, PRINT(LD_PAR, "thread will execute parallel region with id %d in a team of " "%d threads\n", - newTaskDescr->ThreadId(), newTaskDescr->ThreadsInTeam()); + (int)newTaskDescr->ThreadId(), (int)newTaskDescr->ThreadsInTeam()); if (RequiresDataSharing && threadId % WARPSIZE == 0) { // Warp master innitializes data sharing environment. @@ -162,12 +162,16 @@ EXTERN void __kmpc_spmd_kernel_init(int ThreadLimit, int16_t RequiresOMPRuntime, } } -EXTERN void __kmpc_spmd_kernel_deinit() { +EXTERN __attribute__((deprecated)) void __kmpc_spmd_kernel_deinit() { + __kmpc_spmd_kernel_deinit_v2(isRuntimeInitialized()); +} + +EXTERN void __kmpc_spmd_kernel_deinit_v2(int16_t RequiresOMPRuntime) { // We're not going to pop the task descr stack of each thread since // there are no more parallel regions in SPMD mode. __syncthreads(); int threadId = GetThreadIdInBlock(); - if (isRuntimeUninitialized()) { + if (!RequiresOMPRuntime) { if (threadId == 0) { // Enqueue omp state object for use by another team. int slot = usedSlotIdx; diff --git a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h index 2a6de28..b63feae 100644 --- a/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h +++ b/libomptarget/deviceRTLs/nvptx/src/omptarget-nvptx.h @@ -251,7 +251,6 @@ public: INLINE omptarget_nvptx_WorkDescr &WorkDescr() { return workDescrForActiveParallel; } - INLINE omp_lock_t *CriticalLock() { return &criticalLock; } INLINE uint64_t *getLastprivateIterBuffer() { return &lastprivateIterBuffer; } // init @@ -303,7 +302,6 @@ private: levelZeroTaskDescr; // icv for team master initial thread omptarget_nvptx_WorkDescr workDescrForActiveParallel; // one, ONLY for the active par - omp_lock_t criticalLock; uint64_t lastprivateIterBuffer; __align__(16) diff --git a/libomptarget/deviceRTLs/nvptx/src/parallel.cu b/libomptarget/deviceRTLs/nvptx/src/parallel.cu index 13e64e4..fbcbeab 100644 --- a/libomptarget/deviceRTLs/nvptx/src/parallel.cu +++ b/libomptarget/deviceRTLs/nvptx/src/parallel.cu @@ -76,7 +76,7 @@ EXTERN bool __kmpc_kernel_convergent_simd(void *buffer, uint32_t Mask, else *NumLanes = ConvergentSize; ASSERT(LT_FUSSY, *NumLanes > 0, "bad thread request of %d threads", - *NumLanes); + (int)*NumLanes); // Set to true for lanes participating in the simd region. bool isActive = false; @@ -152,7 +152,7 @@ EXTERN bool __kmpc_kernel_convergent_parallel(void *buffer, uint32_t Mask, else NumThreads = ConvergentSize; ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads", - NumThreads); + (int)NumThreads); // Set to true for workers participating in the parallel region. bool isActive = false; @@ -260,7 +260,7 @@ EXTERN void __kmpc_kernel_prepare_parallel(void *WorkFn, } ASSERT(LT_FUSSY, NumThreads > 0, "bad thread request of %d threads", - NumThreads); + (int)NumThreads); ASSERT0(LT_FUSSY, GetThreadIdInBlock() == GetMasterThreadID(), "only team master can create parallel"); @@ -307,7 +307,7 @@ EXTERN bool __kmpc_kernel_parallel(void **WorkFn, PRINT(LD_PAR, "thread will execute parallel region with id %d in a team of " "%d threads\n", - newTaskDescr->ThreadId(), newTaskDescr->NThreads()); + (int)newTaskDescr->ThreadId(), (int)newTaskDescr->NThreads()); isActive = true; } @@ -332,11 +332,11 @@ EXTERN void __kmpc_kernel_end_parallel() { // support for parallel that goes sequential //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) { +EXTERN void __kmpc_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_serialized_parallel\n"); - if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), + if (checkRuntimeUninitialized(loc)) { + ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); omptarget_nvptx_simpleThreadPrivateContext->IncParLevel(); return; @@ -370,12 +370,12 @@ EXTERN void __kmpc_serialized_parallel(kmp_Indent *loc, uint32_t global_tid) { newTaskDescr); } -EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc, +EXTERN void __kmpc_end_serialized_parallel(kmp_Ident *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_end_serialized_parallel\n"); - if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), + if (checkRuntimeUninitialized(loc)) { + ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); omptarget_nvptx_simpleThreadPrivateContext->DecParLevel(); return; @@ -393,11 +393,11 @@ EXTERN void __kmpc_end_serialized_parallel(kmp_Indent *loc, currTaskDescr->RestoreLoopData(); } -EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) { +EXTERN uint16_t __kmpc_parallel_level(kmp_Ident *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_parallel_level\n"); - if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), + if (checkRuntimeUninitialized(loc)) { + ASSERT0(LT_FUSSY, checkSPMDMode(loc), "Expected SPMD mode with uninitialized runtime."); return omptarget_nvptx_simpleThreadPrivateContext->GetParallelLevel(); } @@ -417,27 +417,29 @@ EXTERN uint16_t __kmpc_parallel_level(kmp_Indent *loc, uint32_t global_tid) { // cached by the compiler and used when calling the runtime. On nvptx // it's cheap to recalculate this value so we never use the result // of this call. -EXTERN int32_t __kmpc_global_thread_num(kmp_Indent *loc) { - return GetLogicalThreadIdInBlock(); +EXTERN int32_t __kmpc_global_thread_num(kmp_Ident *loc) { + int tid = GetLogicalThreadIdInBlock(); + return GetOmpThreadId(tid, checkSPMDMode(loc), + checkRuntimeUninitialized(loc)); } //////////////////////////////////////////////////////////////////////////////// // push params //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_push_num_threads(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_push_num_threads(kmp_Ident *loc, int32_t tid, int32_t num_threads) { PRINT(LD_IO, "call kmpc_push_num_threads %d\n", num_threads); - ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized."); tid = GetLogicalThreadIdInBlock(); omptarget_nvptx_threadPrivateContext->NumThreadsForNextParallel(tid) = num_threads; } -EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_push_simd_limit(kmp_Ident *loc, int32_t tid, int32_t simd_limit) { - PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", simd_limit); - ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); + PRINT(LD_IO, "call kmpc_push_simd_limit %d\n", (int)simd_limit); + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), "Runtime must be initialized."); tid = GetLogicalThreadIdInBlock(); omptarget_nvptx_threadPrivateContext->SimdLimitForNextSimd(tid) = simd_limit; } @@ -445,14 +447,14 @@ EXTERN void __kmpc_push_simd_limit(kmp_Indent *loc, int32_t tid, // Do nothing. The host guarantees we started the requested number of // teams and we only need inspection of gridDim. -EXTERN void __kmpc_push_num_teams(kmp_Indent *loc, int32_t tid, +EXTERN void __kmpc_push_num_teams(kmp_Ident *loc, int32_t tid, int32_t num_teams, int32_t thread_limit) { - PRINT(LD_IO, "call kmpc_push_num_teams %d\n", num_teams); + PRINT(LD_IO, "call kmpc_push_num_teams %d\n", (int)num_teams); ASSERT0(LT_FUSSY, FALSE, "should never have anything with new teams on device"); } -EXTERN void __kmpc_push_proc_bind(kmp_Indent *loc, uint32_t tid, +EXTERN void __kmpc_push_proc_bind(kmp_Ident *loc, uint32_t tid, int proc_bind) { - PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", proc_bind); + PRINT(LD_IO, "call kmpc_push_proc_bind %d\n", (int)proc_bind); } diff --git a/libomptarget/deviceRTLs/nvptx/src/reduction.cu b/libomptarget/deviceRTLs/nvptx/src/reduction.cu index 21a419c..c0d22df 100644 --- a/libomptarget/deviceRTLs/nvptx/src/reduction.cu +++ b/libomptarget/deviceRTLs/nvptx/src/reduction.cu @@ -31,7 +31,7 @@ int32_t __gpu_block_reduce() { } EXTERN -int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars, +int32_t __kmpc_reduce_gpu(kmp_Ident *loc, int32_t global_tid, int32_t num_vars, size_t reduce_size, void *reduce_data, void *reduce_array_size, kmp_ReductFctPtr *reductFct, kmp_CriticalName *lck) { @@ -40,7 +40,8 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars, int numthread; if (currTaskDescr->IsParallelConstruct()) { numthread = - GetNumberOfOmpThreads(threadId, isSPMDMode(), isRuntimeUninitialized()); + GetNumberOfOmpThreads(threadId, checkSPMDMode(loc), + checkRuntimeUninitialized(loc)); } else { numthread = GetNumberOfOmpTeams(); } @@ -55,12 +56,12 @@ int32_t __kmpc_reduce_gpu(kmp_Indent *loc, int32_t global_tid, int32_t num_vars, } EXTERN -int32_t __kmpc_reduce_combined(kmp_Indent *loc) { +int32_t __kmpc_reduce_combined(kmp_Ident *loc) { return threadIdx.x == 0 ? 2 : 0; } EXTERN -int32_t __kmpc_reduce_simd(kmp_Indent *loc) { +int32_t __kmpc_reduce_simd(kmp_Ident *loc) { return (threadIdx.x % 32 == 0) ? 1 : 0; } @@ -75,12 +76,12 @@ EXTERN int32_t __kmpc_shuffle_int32(int32_t val, int16_t delta, int16_t size) { } EXTERN int64_t __kmpc_shuffle_int64(int64_t val, int16_t delta, int16_t size) { - int lo, hi; - asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); - hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size); - lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size); - asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi)); - return val; + int lo, hi; + asm volatile("mov.b64 {%0,%1}, %2;" : "=r"(lo), "=r"(hi) : "l"(val)); + hi = __SHFL_DOWN_SYNC(0xFFFFFFFF, hi, delta, size); + lo = __SHFL_DOWN_SYNC(0xFFFFFFFF, lo, delta, size); + asm volatile("mov.b64 %0, {%1,%2};" : "=l"(val) : "r"(lo), "r"(hi)); + return val; } static INLINE void gpu_regular_warp_reduce(void *reduce_data, @@ -231,8 +232,7 @@ int32_t nvptx_parallel_reduce_nowait(int32_t global_tid, int32_t num_vars, // Get the OMP thread Id. This is different from BlockThreadId in the case of // an L2 parallel region. - return GetOmpThreadId(BlockThreadId, isSPMDExecutionMode, - isRuntimeUninitialized) == 0; + return global_tid == 0; #endif // __CUDA_ARCH__ >= 700 } @@ -429,3 +429,22 @@ int32_t __kmpc_nvptx_teams_reduce_nowait_simple_generic( /*isSPMDExecutionMode=*/false, /*isRuntimeUninitialized=*/true); } + +EXTERN int32_t __kmpc_nvptx_teams_reduce_nowait_simple(kmp_Ident *loc, + int32_t global_tid, + kmp_CriticalName *crit) { + if (checkSPMDMode(loc) && GetThreadIdInBlock() != 0) + return 0; + // The master thread of the team actually does the reduction. + while (atomicCAS((uint32_t *)crit, 0, 1)) + ; + return 1; +} + +EXTERN void +__kmpc_nvptx_teams_end_reduce_nowait_simple(kmp_Ident *loc, int32_t global_tid, + kmp_CriticalName *crit) { + __threadfence_system(); + (void)atomicExch((uint32_t *)crit, 0); +} + diff --git a/libomptarget/deviceRTLs/nvptx/src/supporti.h b/libomptarget/deviceRTLs/nvptx/src/supporti.h index c93657e..e2ea2d1 100644 --- a/libomptarget/deviceRTLs/nvptx/src/supporti.h +++ b/libomptarget/deviceRTLs/nvptx/src/supporti.h @@ -33,6 +33,59 @@ INLINE bool isRuntimeInitialized() { } //////////////////////////////////////////////////////////////////////////////// +// Execution Modes based on location parameter fields +//////////////////////////////////////////////////////////////////////////////// + +INLINE bool checkSPMDMode(kmp_Ident *loc) { + if (!loc) + return isSPMDMode(); + + // If SPMD is true then we are not in the UNDEFINED state so + // we can return immediately. + if (loc->reserved_2 & KMP_IDENT_SPMD_MODE) + return true; + + // If not in SPMD mode and runtime required is a valid + // combination of flags so we can return immediately. + if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE)) + return false; + + // We are in underfined state. + return isSPMDMode(); +} + +INLINE bool checkGenericMode(kmp_Ident *loc) { + return !checkSPMDMode(loc); +} + +INLINE bool checkRuntimeUninitialized(kmp_Ident *loc) { + if (!loc) + return isRuntimeUninitialized(); + + // If runtime is required then we know we can't be + // in the undefined mode. We can return immediately. + if (!(loc->reserved_2 & KMP_IDENT_SIMPLE_RT_MODE)) + return false; + + // If runtime is required then we need to check is in + // SPMD mode or not. If not in SPMD mode then we end + // up in the UNDEFINED state that marks the orphaned + // functions. + if (loc->reserved_2 & KMP_IDENT_SPMD_MODE) + return true; + + // Check if we are in an UNDEFINED state. Undefined is denoted by + // non-SPMD + noRuntimeRequired which is a combination that + // cannot actually happen. Undefined states is used to mark orphaned + // functions. + return isRuntimeUninitialized(); +} + +INLINE bool checkRuntimeInitialized(kmp_Ident *loc) { + return !checkRuntimeUninitialized(loc); +} + +//////////////////////////////////////////////////////////////////////////////// // support: get info from machine //////////////////////////////////////////////////////////////////////////////// @@ -78,8 +131,6 @@ INLINE int GetNumberOfWorkersInTeam() { return GetMasterThreadID(); } // id is GetMasterThreadID()) calls this routine, we return 0 because // it is a shadow for the first worker. INLINE int GetLogicalThreadIdInBlock() { - // return GetThreadIdInBlock() % GetMasterThreadID(); - // Implemented using control flow (predication) instead of with a modulo // operation. int tid = GetThreadIdInBlock(); @@ -180,19 +231,20 @@ INLINE unsigned long PadBytes(unsigned long size, { // compute the necessary padding to satisfy alignment constraint ASSERT(LT_FUSSY, (alignment & (alignment - 1)) == 0, - "alignment %ld is not a power of 2\n", alignment); + "alignment %lu is not a power of 2\n", alignment); return (~(unsigned long)size + 1) & (alignment - 1); } INLINE void *SafeMalloc(size_t size, const char *msg) // check if success { void *ptr = malloc(size); - PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg, P64(ptr)); + PRINT(LD_MEM, "malloc data of size %zu for %s: 0x%llx\n", size, msg, + (unsigned long long)ptr); return ptr; } INLINE void *SafeFree(void *ptr, const char *msg) { - PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", P64(ptr), msg); + PRINT(LD_MEM, "free data ptr 0x%llx for %s\n", (unsigned long long)ptr, msg); free(ptr); return NULL; } diff --git a/libomptarget/deviceRTLs/nvptx/src/sync.cu b/libomptarget/deviceRTLs/nvptx/src/sync.cu index 0a99405..7cdb7ff 100644 --- a/libomptarget/deviceRTLs/nvptx/src/sync.cu +++ b/libomptarget/deviceRTLs/nvptx/src/sync.cu @@ -17,11 +17,11 @@ // KMP Ordered calls //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_ordered(kmp_Indent *loc, int32_t tid) { +EXTERN void __kmpc_ordered(kmp_Ident *loc, int32_t tid) { PRINT0(LD_IO, "call kmpc_ordered\n"); } -EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) { +EXTERN void __kmpc_end_ordered(kmp_Ident *loc, int32_t tid) { PRINT0(LD_IO, "call kmpc_end_ordered\n"); } @@ -33,16 +33,16 @@ EXTERN void __kmpc_end_ordered(kmp_Indent *loc, int32_t tid) { // FIXME: what if not all threads (warps) participate to the barrier? // We may need to implement it differently -EXTERN int32_t __kmpc_cancel_barrier(kmp_Indent *loc_ref, int32_t tid) { +EXTERN int32_t __kmpc_cancel_barrier(kmp_Ident *loc_ref, int32_t tid) { PRINT0(LD_IO, "call kmpc_cancel_barrier\n"); __kmpc_barrier(loc_ref, tid); PRINT0(LD_SYNC, "completed kmpc_cancel_barrier\n"); return 0; } -EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) { - if (isRuntimeUninitialized()) { - ASSERT0(LT_FUSSY, isSPMDMode(), +EXTERN void __kmpc_barrier(kmp_Ident *loc_ref, int32_t tid) { + if (checkRuntimeUninitialized(loc_ref)) { + ASSERT0(LT_FUSSY, checkSPMDMode(loc_ref), "Expected SPMD mode with uninitialized runtime."); __kmpc_barrier_simple_spmd(loc_ref, tid); } else { @@ -50,9 +50,9 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) { omptarget_nvptx_TaskDescr *currTaskDescr = omptarget_nvptx_threadPrivateContext->GetTopLevelTaskDescr(tid); int numberOfActiveOMPThreads = GetNumberOfOmpThreads( - tid, isSPMDMode(), /*isRuntimeUninitialized=*/false); + tid, checkSPMDMode(loc_ref), /*isRuntimeUninitialized=*/false); if (numberOfActiveOMPThreads > 1) { - if (isSPMDMode()) { + if (checkSPMDMode(loc_ref)) { __kmpc_barrier_simple_spmd(loc_ref, tid); } else { // The #threads parameter must be rounded up to the WARPSIZE. @@ -61,7 +61,7 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) { PRINT(LD_SYNC, "call kmpc_barrier with %d omp threads, sync parameter %d\n", - numberOfActiveOMPThreads, threads); + (int)numberOfActiveOMPThreads, (int)threads); // Barrier #1 is for synchronization among active threads. named_sync(L1_BARRIER, threads); } @@ -72,7 +72,7 @@ EXTERN void __kmpc_barrier(kmp_Indent *loc_ref, int32_t tid) { // Emit a simple barrier call in SPMD mode. Assumes the caller is in an L0 // parallel region and that all worker threads participate. -EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) { +EXTERN void __kmpc_barrier_simple_spmd(kmp_Ident *loc_ref, int32_t tid) { PRINT0(LD_SYNC, "call kmpc_barrier_simple_spmd\n"); __syncthreads(); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_spmd\n"); @@ -80,7 +80,7 @@ EXTERN void __kmpc_barrier_simple_spmd(kmp_Indent *loc_ref, int32_t tid) { // Emit a simple barrier call in Generic mode. Assumes the caller is in an L0 // parallel region and that all worker threads participate. -EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) { +EXTERN void __kmpc_barrier_simple_generic(kmp_Ident *loc_ref, int32_t tid) { int numberOfActiveOMPThreads = GetNumberOfThreadsInBlock() - WARPSIZE; // The #threads parameter must be rounded up to the WARPSIZE. int threads = @@ -89,7 +89,7 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) { PRINT(LD_SYNC, "call kmpc_barrier_simple_generic with %d omp threads, sync parameter " "%d\n", - numberOfActiveOMPThreads, threads); + (int)numberOfActiveOMPThreads, (int)threads); // Barrier #1 is for synchronization among active threads. named_sync(L1_BARRIER, threads); PRINT0(LD_SYNC, "completed kmpc_barrier_simple_generic\n"); @@ -99,37 +99,30 @@ EXTERN void __kmpc_barrier_simple_generic(kmp_Indent *loc_ref, int32_t tid) { // KMP MASTER //////////////////////////////////////////////////////////////////////////////// -INLINE int32_t IsMaster() { - // only the team master updates the state - int tid = GetLogicalThreadIdInBlock(); - int ompThreadId = GetOmpThreadId(tid, isSPMDMode(), isRuntimeUninitialized()); - return IsTeamMaster(ompThreadId); -} - -EXTERN int32_t __kmpc_master(kmp_Indent *loc, int32_t global_tid) { +EXTERN int32_t __kmpc_master(kmp_Ident *loc, int32_t global_tid) { PRINT0(LD_IO, "call kmpc_master\n"); - return IsMaster(); + return IsTeamMaster(global_tid); } -EXTERN void __kmpc_end_master(kmp_Indent *loc, int32_t global_tid) { +EXTERN void __kmpc_end_master(kmp_Ident *loc, int32_t global_tid) { PRINT0(LD_IO, "call kmpc_end_master\n"); - ASSERT0(LT_FUSSY, IsMaster(), "expected only master here"); + ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here"); } //////////////////////////////////////////////////////////////////////////////// // KMP SINGLE //////////////////////////////////////////////////////////////////////////////// -EXTERN int32_t __kmpc_single(kmp_Indent *loc, int32_t global_tid) { +EXTERN int32_t __kmpc_single(kmp_Ident *loc, int32_t global_tid) { PRINT0(LD_IO, "call kmpc_single\n"); // decide to implement single with master; master get the single - return IsMaster(); + return IsTeamMaster(global_tid); } -EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) { +EXTERN void __kmpc_end_single(kmp_Ident *loc, int32_t global_tid) { PRINT0(LD_IO, "call kmpc_end_single\n"); // decide to implement single with master: master get the single - ASSERT0(LT_FUSSY, IsMaster(), "expected only master here"); + ASSERT0(LT_FUSSY, IsTeamMaster(global_tid), "expected only master here"); // sync barrier is explicitely called... so that is not a problem } @@ -137,9 +130,9 @@ EXTERN void __kmpc_end_single(kmp_Indent *loc, int32_t global_tid) { // Flush //////////////////////////////////////////////////////////////////////////////// -EXTERN void __kmpc_flush(kmp_Indent *loc) { +EXTERN void __kmpc_flush(kmp_Ident *loc) { PRINT0(LD_IO, "call kmpc_flush\n"); - __threadfence_block(); + __threadfence_system(); } //////////////////////////////////////////////////////////////////////////////// diff --git a/libomptarget/deviceRTLs/nvptx/src/task.cu b/libomptarget/deviceRTLs/nvptx/src/task.cu index f0431ab..2f47d4b 100644 --- a/libomptarget/deviceRTLs/nvptx/src/task.cu +++ b/libomptarget/deviceRTLs/nvptx/src/task.cu @@ -31,7 +31,7 @@ #include "omptarget-nvptx.h" EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc( - kmp_Indent *loc, // unused + kmp_Ident *loc, // unused uint32_t global_tid, // unused int32_t flag, // unused (because in our impl, all are immediately exec size_t sizeOfTaskInclPrivate, size_t sizeOfSharedTable, @@ -39,14 +39,15 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc( PRINT(LD_IO, "call __kmpc_omp_task_alloc(size priv&struct %lld, shared %lld, " "fct 0x%llx)\n", - P64(sizeOfTaskInclPrivate), P64(sizeOfSharedTable), P64(taskSub)); + (long long)sizeOfTaskInclPrivate, (long long)sizeOfSharedTable, + (unsigned long long)taskSub); // want task+priv to be a multiple of 8 bytes size_t padForTaskInclPriv = PadBytes(sizeOfTaskInclPrivate, sizeof(void *)); sizeOfTaskInclPrivate += padForTaskInclPriv; size_t kmpSize = sizeOfTaskInclPrivate + sizeOfSharedTable; ASSERT(LT_FUSSY, sizeof(omptarget_nvptx_TaskDescr) % sizeof(void *) == 0, "need task descr of size %d to be a multiple of %d\n", - sizeof(omptarget_nvptx_TaskDescr), sizeof(void *)); + (int)sizeof(omptarget_nvptx_TaskDescr), (int)sizeof(void *)); size_t totSize = sizeof(omptarget_nvptx_TaskDescr) + kmpSize; omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SafeMalloc( @@ -63,25 +64,27 @@ EXTERN kmp_TaskDescr *__kmpc_omp_task_alloc( newKmpTaskDescr->sub = taskSub; newKmpTaskDescr->destructors = NULL; PRINT(LD_TASK, "return with task descr kmp: 0x%llx, omptarget-nvptx 0x%llx\n", - P64(newKmpTaskDescr), P64(newExplicitTaskDescr)); + (unsigned long long)newKmpTaskDescr, + (unsigned long long)newExplicitTaskDescr); return newKmpTaskDescr; } -EXTERN int32_t __kmpc_omp_task(kmp_Indent *loc, uint32_t global_tid, +EXTERN int32_t __kmpc_omp_task(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr) { return __kmpc_omp_task_with_deps(loc, global_tid, newKmpTaskDescr, 0, 0, 0, 0); } -EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid, +EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { PRINT(LD_IO, "call to __kmpc_omp_task_with_deps(task 0x%llx)\n", P64(newKmpTaskDescr)); - ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + "Runtime must be initialized."); // 1. get explict task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES( @@ -101,10 +104,11 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid, // 3. call sub PRINT(LD_TASK, "call task sub 0x%llx(task descr 0x%llx)\n", - P64(newKmpTaskDescr->sub), P64(newKmpTaskDescr)); + (unsigned long long)newKmpTaskDescr->sub, + (unsigned long long)newKmpTaskDescr); newKmpTaskDescr->sub(0, newKmpTaskDescr); PRINT(LD_TASK, "return from call task sub 0x%llx()\n", - P64(newKmpTaskDescr->sub)); + (unsigned long long)newKmpTaskDescr->sub); // 4. pop context omptarget_nvptx_threadPrivateContext->SetTopLevelTaskDescr(tid, @@ -114,11 +118,12 @@ EXTERN int32_t __kmpc_omp_task_with_deps(kmp_Indent *loc, uint32_t global_tid, return 0; } -EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_omp_task_begin_if0(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr) { PRINT(LD_IO, "call to __kmpc_omp_task_begin_if0(task 0x%llx)\n", - P64(newKmpTaskDescr)); - ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); + (unsigned long long)newKmpTaskDescr); + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + "Runtime must be initialized."); // 1. get explict task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES( @@ -139,11 +144,12 @@ EXTERN void __kmpc_omp_task_begin_if0(kmp_Indent *loc, uint32_t global_tid, // 4 & 5 ... done in complete } -EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_omp_task_complete_if0(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr) { PRINT(LD_IO, "call to __kmpc_omp_task_complete_if0(task 0x%llx)\n", - P64(newKmpTaskDescr)); - ASSERT0(LT_FUSSY, isRuntimeInitialized(), "Runtime must be initialized."); + (unsigned long long)newKmpTaskDescr); + ASSERT0(LT_FUSSY, checkRuntimeInitialized(loc), + "Runtime must be initialized."); // 1. get explict task descr from kmp task descr omptarget_nvptx_ExplicitTaskDescr *newExplicitTaskDescr = (omptarget_nvptx_ExplicitTaskDescr *)SUB_BYTES( @@ -164,37 +170,37 @@ EXTERN void __kmpc_omp_task_complete_if0(kmp_Indent *loc, uint32_t global_tid, SafeFree(newExplicitTaskDescr, "explicit task descriptor"); } -EXTERN void __kmpc_omp_wait_deps(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_omp_wait_deps(kmp_Ident *loc, uint32_t global_tid, int32_t depNum, void *depList, int32_t noAliasDepNum, void *noAliasDepList) { PRINT0(LD_IO, "call to __kmpc_omp_wait_deps(..)\n"); // nothing to do as all our tasks are executed as final } -EXTERN void __kmpc_taskgroup(kmp_Indent *loc, uint32_t global_tid) { +EXTERN void __kmpc_taskgroup(kmp_Ident *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_taskgroup(..)\n"); // nothing to do as all our tasks are executed as final } -EXTERN void __kmpc_end_taskgroup(kmp_Indent *loc, uint32_t global_tid) { +EXTERN void __kmpc_end_taskgroup(kmp_Ident *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_end_taskgroup(..)\n"); // nothing to do as all our tasks are executed as final } -EXTERN int32_t __kmpc_omp_taskyield(kmp_Indent *loc, uint32_t global_tid, +EXTERN int32_t __kmpc_omp_taskyield(kmp_Ident *loc, uint32_t global_tid, int end_part) { PRINT0(LD_IO, "call to __kmpc_taskyield()\n"); // do nothing: tasks are executed immediately, no yielding allowed return 0; } -EXTERN int32_t __kmpc_omp_taskwait(kmp_Indent *loc, uint32_t global_tid) { +EXTERN int32_t __kmpc_omp_taskwait(kmp_Ident *loc, uint32_t global_tid) { PRINT0(LD_IO, "call to __kmpc_taskwait()\n"); // nothing to do as all our tasks are executed as final return 0; } -EXTERN void __kmpc_taskloop(kmp_Indent *loc, uint32_t global_tid, +EXTERN void __kmpc_taskloop(kmp_Ident *loc, uint32_t global_tid, kmp_TaskDescr *newKmpTaskDescr, int if_val, uint64_t *lb, uint64_t *ub, int64_t st, int nogroup, int32_t sched, uint64_t grainsize, void *task_dup) { diff --git a/libomptarget/src/omptarget.cpp b/libomptarget/src/omptarget.cpp index a1ffd04..a23d82b 100644 --- a/libomptarget/src/omptarget.cpp +++ b/libomptarget/src/omptarget.cpp @@ -638,19 +638,20 @@ int target(int64_t device_id, void *host_ptr, int32_t arg_num, assert(tgtIdx != -1 && "Base address must be translated already."); // The parent lambda must be processed already and it must be the last // in tgt_args and tgt_offsets arrays. - void *HstPtrBegin = args[i]; - void *HstPtrBase = args_base[i]; + void *HstPtrVal = args[i]; + void *HstPtrBegin = args_base[i]; + void *HstPtrBase = args[idx]; bool IsLast; // unused. void *TgtPtrBase = (void *)((intptr_t)tgt_args[tgtIdx] + tgt_offsets[tgtIdx]); DP("Parent lambda base " DPxMOD "\n", DPxPTR(TgtPtrBase)); uint64_t Delta = (uint64_t)HstPtrBegin - (uint64_t)HstPtrBase; void *TgtPtrBegin = (void *)((uintptr_t)TgtPtrBase + Delta); - void *Pointer_TgtPtrBegin = Device.getTgtPtrBegin( - *(void **)HstPtrBegin, arg_sizes[i], IsLast, false); + void *Pointer_TgtPtrBegin = + Device.getTgtPtrBegin(HstPtrVal, arg_sizes[i], IsLast, false); if (!Pointer_TgtPtrBegin) { DP("No lambda captured variable mapped (" DPxMOD ") - ignored\n", - DPxPTR(*(void **)HstPtrBegin)); + DPxPTR(HstPtrVal)); continue; } DP("Update lambda reference (" DPxMOD ") -> [" DPxMOD "]\n", diff --git a/runtime/cmake/LibompHandleFlags.cmake b/runtime/cmake/LibompHandleFlags.cmake index efe2099..0b829a5 100644 --- a/runtime/cmake/LibompHandleFlags.cmake +++ b/runtime/cmake/LibompHandleFlags.cmake @@ -50,6 +50,7 @@ function(libomp_get_c_and_cxxflags_common flags) libomp_append(flags_local /GS LIBOMP_HAVE_GS_FLAG) libomp_append(flags_local /EHsc LIBOMP_HAVE_EHSC_FLAG) libomp_append(flags_local /Oy- LIBOMP_HAVE_OY__FLAG) + libomp_append(flags_local -mrtm LIBOMP_HAVE_MRTM_FLAG) # Intel(R) C Compiler flags libomp_append(flags_local /Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG) libomp_append(flags_local -Qoption,cpp,--extended_float_types LIBOMP_HAVE_EXTENDED_FLOAT_TYPES_FLAG) @@ -158,6 +159,11 @@ function(libomp_get_libflags libflags) if(${IA32}) libomp_append(libflags_local -lirc_pic LIBOMP_HAVE_IRC_PIC_LIBRARY) endif() + IF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly") + libomp_append(libflags_local "-Wl,--no-as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG) + libomp_append(libflags_local "-lm") + libomp_append(libflags_local "-Wl,--as-needed" LIBOMP_HAVE_AS_NEEDED_FLAG) + ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "DragonFly") IF(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD") libomp_append(libflags_local -lm) ENDIF(${CMAKE_SYSTEM_NAME} MATCHES "NetBSD") diff --git a/runtime/cmake/LibompMicroTests.cmake b/runtime/cmake/LibompMicroTests.cmake index 0918fdd..bdecf7f 100644 --- a/runtime/cmake/LibompMicroTests.cmake +++ b/runtime/cmake/LibompMicroTests.cmake @@ -176,6 +176,9 @@ if(CMAKE_SYSTEM_NAME MATCHES "FreeBSD") elseif(CMAKE_SYSTEM_NAME MATCHES "NetBSD") set(libomp_expected_library_deps libc.so.12 libpthread.so.1 libm.so.0) libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC) +elseif(CMAKE_SYSTEM_NAME MATCHES "DragonFly") + set(libomp_expected_library_deps libc.so.8 libpthread.so.0 libm.so.4) + libomp_append(libomp_expected_library_deps libhwloc.so.5 LIBOMP_USE_HWLOC) elseif(APPLE) set(libomp_expected_library_deps /usr/lib/libSystem.B.dylib) elseif(WIN32) diff --git a/runtime/cmake/config-ix.cmake b/runtime/cmake/config-ix.cmake index 5415e57..019c83c 100644 --- a/runtime/cmake/config-ix.cmake +++ b/runtime/cmake/config-ix.cmake @@ -73,13 +73,16 @@ check_c_compiler_flag(-ftls-model=initial-exec LIBOMP_HAVE_FTLS_MODEL_FLAG) libomp_check_architecture_flag(-mmic LIBOMP_HAVE_MMIC_FLAG) libomp_check_architecture_flag(-m32 LIBOMP_HAVE_M32_FLAG) if(WIN32) - # Check Windows MSVC style flags. - check_c_compiler_flag(/TP LIBOMP_HAVE_TP_FLAG) - check_cxx_compiler_flag(/EHsc LIBOMP_HAVE_EHSC_FLAG) - check_cxx_compiler_flag(/GS LIBOMP_HAVE_GS_FLAG) - check_cxx_compiler_flag(/Oy- LIBOMP_HAVE_Oy__FLAG) - check_cxx_compiler_flag(/arch:SSE2 LIBOMP_HAVE_ARCH_SSE2_FLAG) - check_cxx_compiler_flag(/Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG) + if(MSVC) + # Check Windows MSVC style flags. + check_c_compiler_flag(/TP LIBOMP_HAVE_TP_FLAG) + check_cxx_compiler_flag(/EHsc LIBOMP_HAVE_EHSC_FLAG) + check_cxx_compiler_flag(/GS LIBOMP_HAVE_GS_FLAG) + check_cxx_compiler_flag(/Oy- LIBOMP_HAVE_Oy__FLAG) + check_cxx_compiler_flag(/arch:SSE2 LIBOMP_HAVE_ARCH_SSE2_FLAG) + check_cxx_compiler_flag(/Qsafeseh LIBOMP_HAVE_QSAFESEH_FLAG) + endif() + check_c_compiler_flag(-mrtm LIBOMP_HAVE_MRTM_FLAG) # It is difficult to create a dummy masm assembly file # and then check the MASM assembler to see if these flags exist and work, # so we assume they do for Windows. diff --git a/runtime/src/dllexports b/runtime/src/dllexports index 1108930..963ac61 100644 --- a/runtime/src/dllexports +++ b/runtime/src/dllexports @@ -405,6 +405,7 @@ kmpc_set_disp_num_buffers 267 __kmpc_task_reduction_get_th_data 269 # USED FOR 4.5 __kmpc_critical_with_hint 270 __kmpc_get_target_offload 271 + __kmpc_omp_reg_task_with_affinity 272 %endif %endif @@ -546,6 +547,14 @@ kmp_set_disp_num_buffers 890 omp_get_default_allocator 893 omp_alloc 894 omp_free 895 + omp_set_affinity_format 748 + omp_get_affinity_format 749 + omp_display_affinity 750 + omp_capture_affinity 751 + ompc_set_affinity_format 752 + ompc_get_affinity_format 753 + ompc_display_affinity 754 + ompc_capture_affinity 755 OMP_NULL_ALLOCATOR DATA omp_default_mem_alloc DATA diff --git a/runtime/src/i18n/en_US.txt b/runtime/src/i18n/en_US.txt index 067cb94..3e5283e 100644 --- a/runtime/src/i18n/en_US.txt +++ b/runtime/src/i18n/en_US.txt @@ -425,6 +425,7 @@ AffHWSubsetManyNodes "KMP_HW_SUBSET ignored: too many NUMA Nodes request AffHWSubsetManyTiles "KMP_HW_SUBSET ignored: too many L2 Caches requested." AffHWSubsetManyProcs "KMP_HW_SUBSET ignored: too many Procs requested." HierSchedInvalid "Hierarchy ignored: unsupported level: %1$s." +AffFormatDefault "OMP: pid %1$s tid %2$s thread %3$s bound to OS proc set {%4$s}" # -------------------------------------------------------------------------------------------------- diff --git a/runtime/src/include/50/omp.h.var b/runtime/src/include/50/omp.h.var index 7a626bd..81b6c85 100644 --- a/runtime/src/include/50/omp.h.var +++ b/runtime/src/include/50/omp.h.var @@ -25,6 +25,11 @@ extern "C" { # endif +# define omp_set_affinity_format ompc_set_affinity_format +# define omp_get_affinity_format ompc_get_affinity_format +# define omp_display_affinity ompc_display_affinity +# define omp_capture_affinity ompc_capture_affinity + # if defined(_WIN32) # define __KAI_KMPC_CONVENTION __cdecl # ifndef __KMP_IMP @@ -235,6 +240,12 @@ extern void __KAI_KMPC_CONVENTION omp_free(void *ptr, const omp_allocator_t *allocator); #endif + /* OpenMP 5.0 Affinity Format */ + extern void __KAI_KMPC_CONVENTION omp_set_affinity_format(char const *); + extern size_t __KAI_KMPC_CONVENTION omp_get_affinity_format(char *, size_t); + extern void __KAI_KMPC_CONVENTION omp_display_affinity(char const *); + extern size_t __KAI_KMPC_CONVENTION omp_capture_affinity(char *, size_t, char const *); + # undef __KAI_KMPC_CONVENTION # undef __KMP_IMP diff --git a/runtime/src/include/50/omp_lib.f.var b/runtime/src/include/50/omp_lib.f.var index 8a02b62..d5a8057 100644 --- a/runtime/src/include/50/omp_lib.f.var +++ b/runtime/src/include/50/omp_lib.f.var @@ -375,6 +375,27 @@ integer (kind=omp_allocator_kind) omp_get_default_allocator end function omp_get_default_allocator + subroutine omp_set_affinity_format(format) + character (len=*) format + end subroutine omp_set_affinity_format + + function omp_get_affinity_format(buffer) + use omp_lib_kinds + character (len=*) buffer + integer (kind=kmp_size_t_kind) omp_get_affinity_format + end function omp_get_affinity_format + + subroutine omp_display_affinity(format) + character (len=*) format + end subroutine omp_display_affinity + + function omp_capture_affinity(buffer, format) + use omp_lib_kinds + character (len=*) format + character (len=*) buffer + integer (kind=kmp_size_t_kind) omp_capture_affinity + end function omp_capture_affinity + ! *** ! *** kmp_* entry points ! *** @@ -594,6 +615,10 @@ !dec$ attributes alias:'OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'OMP_GET_MAX_TASK_PRIORITY' :: omp_get_max_task_priority !dec$ attributes alias:'OMP_CONTROL_TOOL' :: omp_control_tool +!dec$ attributes alias:'OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format +!dec$ attributes alias:'OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format +!dec$ attributes alias:'OMP_DISPLAY_AFFINITY' :: omp_display_affinity +!dec$ attributes alias:'OMP_CAPTURE_AFFINITY' :: omp_capture_affinity !dec$ attributes alias:'omp_init_lock' :: omp_init_lock !dec$ attributes alias:'omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -675,6 +700,10 @@ !dec$ attributes alias:'_OMP_IS_INITIAL_DEVICE' :: omp_is_initial_device !dec$ attributes alias:'_OMP_GET_MAX_TASK_PRIORTY' :: omp_get_max_task_priority !dec$ attributes alias:'_OMP_CONTROL_TOOL' :: omp_control_tool +!dec$ attributes alias:'_OMP_SET_AFFINITY_FORMAT' :: omp_set_affinity_format +!dec$ attributes alias:'_OMP_GET_AFFINITY_FORMAT' :: omp_get_affinity_format +!dec$ attributes alias:'_OMP_DISPLAY_AFFINITY' :: omp_display_affinity +!dec$ attributes alias:'_OMP_CAPTURE_AFFINITY' :: omp_capture_affinity !dec$ attributes alias:'_omp_init_lock' :: omp_init_lock !dec$ attributes alias:'_omp_init_lock_with_hint' :: omp_init_lock_with_hint @@ -758,6 +787,10 @@ !dec$ attributes alias:'omp_get_cancellation_'::omp_get_cancellation !dec$ attributes alias:'omp_is_initial_device_'::omp_is_initial_device !dec$ attributes alias:'omp_get_max_task_priority_'::omp_get_max_task_priority +!dec$ attributes alias:'omp_set_affinity_format_' :: omp_set_affinity_format +!dec$ attributes alias:'omp_get_affinity_format_' :: omp_get_affinity_format +!dec$ attributes alias:'omp_display_affinity_' :: omp_display_affinity +!dec$ attributes alias:'omp_capture_affinity_' :: omp_capture_affinity !dec$ attributes alias:'omp_init_lock_'::omp_init_lock !dec$ attributes alias:'omp_init_lock_with_hint_'::omp_init_lock_with_hint @@ -852,6 +885,10 @@ !dec$ attributes alias:'_omp_unset_nest_lock_'::omp_unset_nest_lock !dec$ attributes alias:'_omp_test_nest_lock_'::omp_test_nest_lock !dec$ attributes alias:'_omp_control_tool_'::omp_control_tool +!dec$ attributes alias:'_omp_set_affinity_format_' :: omp_set_affinity_format +!dec$ attributes alias:'_omp_get_affinity_format_' :: omp_get_affinity_format +!dec$ attributes alias:'_omp_display_affinity_' :: omp_display_affinity +!dec$ attributes alias:'_omp_capture_affinity_' :: omp_capture_affinity !dec$ attributes alias:'_kmp_set_stacksize_'::kmp_set_stacksize !dec$ attributes alias:'_kmp_set_stacksize_s_'::kmp_set_stacksize_s diff --git a/runtime/src/include/50/omp_lib.f90.var b/runtime/src/include/50/omp_lib.f90.var index 624774d..afc6d67 100644 --- a/runtime/src/include/50/omp_lib.f90.var +++ b/runtime/src/include/50/omp_lib.f90.var @@ -391,6 +391,27 @@ integer (kind=omp_allocator_kind) omp_get_default_allocator end function omp_get_default_allocator + subroutine omp_set_affinity_format(format) + character (len=*) :: format + end subroutine omp_set_affinity_format + + function omp_get_affinity_format(buffer) + use omp_lib_kinds + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_get_affinity_format + end function omp_get_affinity_format + + subroutine omp_display_affinity(format) + character (len=*) :: format + end subroutine omp_display_affinity + + function omp_capture_affinity(buffer, format) + use omp_lib_kinds + character (len=*) :: format + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_capture_affinity + end function omp_capture_affinity + ! *** ! *** kmp_* entry points ! *** diff --git a/runtime/src/include/50/omp_lib.h.var b/runtime/src/include/50/omp_lib.h.var index 0e4c2c6..11dbc0a 100644 --- a/runtime/src/include/50/omp_lib.h.var +++ b/runtime/src/include/50/omp_lib.h.var @@ -424,6 +424,27 @@ integer (kind=omp_allocator_kind) omp_get_default_allocator end function omp_get_default_allocator + subroutine omp_set_affinity_format(format) + character (len=*) :: format + end subroutine omp_set_affinity_format + + function omp_get_affinity_format(buffer) + import + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_get_affinity_format + end function omp_get_affinity_format + + subroutine omp_display_affinity(format) + character (len=*) :: format + end subroutine omp_display_affinity + + function omp_capture_affinity(buffer, format) + import + character (len=*) :: format + character (len=*) :: buffer + integer (kind=kmp_size_t_kind) :: omp_capture_affinity + end function omp_capture_affinity + ! *** ! *** kmp_* entry points ! *** @@ -637,6 +658,10 @@ !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_unset_nest_lock !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_test_nest_lock !DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_max_task_priority +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_set_affinity_format +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_get_affinity_format +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_display_affinity +!DIR$ ATTRIBUTES OFFLOAD:MIC :: omp_capture_affinity !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_stacksize_s !DIR$ ATTRIBUTES OFFLOAD:MIC :: kmp_set_blocktime @@ -710,6 +735,10 @@ !$omp declare target(omp_unset_nest_lock ) !$omp declare target(omp_test_nest_lock ) !$omp declare target(omp_get_max_task_priority ) +!$omp declare target(omp_set_affinity_format ) +!$omp declare target(omp_get_affinity_format ) +!$omp declare target(omp_display_affinity ) +!$omp declare target(omp_capture_affinity ) !$omp declare target(kmp_set_stacksize ) !$omp declare target(kmp_set_stacksize_s ) !$omp declare target(kmp_set_blocktime ) diff --git a/runtime/src/include/50/ompt.h.var b/runtime/src/include/50/ompt.h.var index 24fc90b..478c6cc 100644 --- a/runtime/src/include/50/ompt.h.var +++ b/runtime/src/include/50/ompt.h.var @@ -53,50 +53,50 @@ macro(ompt_get_target_info) \ macro(ompt_get_num_devices) -#define FOREACH_OMP_STATE(macro) \ +#define FOREACH_OMPT_STATE(macro) \ \ /* first available state */ \ - macro (omp_state_undefined, 0x102) /* undefined thread state */ \ + macro (ompt_state_undefined, 0x102) /* undefined thread state */ \ \ /* work states (0..15) */ \ - macro (omp_state_work_serial, 0x000) /* working outside parallel */ \ - macro (omp_state_work_parallel, 0x001) /* working within parallel */ \ - macro (omp_state_work_reduction, 0x002) /* performing a reduction */ \ + macro (ompt_state_work_serial, 0x000) /* working outside parallel */ \ + macro (ompt_state_work_parallel, 0x001) /* working within parallel */ \ + macro (ompt_state_work_reduction, 0x002) /* performing a reduction */ \ \ /* barrier wait states (16..31) */ \ - macro (omp_state_wait_barrier, 0x010) /* waiting at a barrier */ \ - macro (omp_state_wait_barrier_implicit_parallel, 0x011) \ + macro (ompt_state_wait_barrier, 0x010) /* waiting at a barrier */ \ + macro (ompt_state_wait_barrier_implicit_parallel, 0x011) \ /* implicit barrier at the end of parallel region */\ - macro (omp_state_wait_barrier_implicit_workshare, 0x012) \ + macro (ompt_state_wait_barrier_implicit_workshare, 0x012) \ /* implicit barrier at the end of worksharing */ \ - macro (omp_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ - macro (omp_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ + macro (ompt_state_wait_barrier_implicit, 0x013) /* implicit barrier */ \ + macro (ompt_state_wait_barrier_explicit, 0x014) /* explicit barrier */ \ \ /* task wait states (32..63) */ \ - macro (omp_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ - macro (omp_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ + macro (ompt_state_wait_taskwait, 0x020) /* waiting at a taskwait */ \ + macro (ompt_state_wait_taskgroup, 0x021) /* waiting at a taskgroup */ \ \ /* mutex wait states (64..127) */ \ - macro (omp_state_wait_mutex, 0x040) \ - macro (omp_state_wait_lock, 0x041) /* waiting for lock */ \ - macro (omp_state_wait_critical, 0x042) /* waiting for critical */ \ - macro (omp_state_wait_atomic, 0x043) /* waiting for atomic */ \ - macro (omp_state_wait_ordered, 0x044) /* waiting for ordered */ \ + macro (ompt_state_wait_mutex, 0x040) \ + macro (ompt_state_wait_lock, 0x041) /* waiting for lock */ \ + macro (ompt_state_wait_critical, 0x042) /* waiting for critical */ \ + macro (ompt_state_wait_atomic, 0x043) /* waiting for atomic */ \ + macro (ompt_state_wait_ordered, 0x044) /* waiting for ordered */ \ \ /* target wait states (128..255) */ \ - macro (omp_state_wait_target, 0x080) /* waiting for target region */ \ - macro (omp_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ - macro (omp_state_wait_target_update, 0x082) /* waiting for target update operation */ \ + macro (ompt_state_wait_target, 0x080) /* waiting for target region */ \ + macro (ompt_state_wait_target_map, 0x081) /* waiting for target data mapping operation */ \ + macro (ompt_state_wait_target_update, 0x082) /* waiting for target update operation */ \ \ /* misc (256..511) */ \ - macro (omp_state_idle, 0x100) /* waiting for work */ \ - macro (omp_state_overhead, 0x101) /* overhead excluding wait states */ \ + macro (ompt_state_idle, 0x100) /* waiting for work */ \ + macro (ompt_state_overhead, 0x101) /* overhead excluding wait states */ \ \ /* implementation-specific states (512..) */ #define FOREACH_KMP_MUTEX_IMPL(macro) \ - macro (ompt_mutex_impl_unknown, 0) /* unknown implementation */ \ + macro (ompt_mutex_impl_none, 0) /* unknown implementation */ \ macro (kmp_mutex_impl_spin, 1) /* based on spin */ \ macro (kmp_mutex_impl_queuing, 2) /* based on some fair policy */ \ macro (kmp_mutex_impl_speculative, 3) /* based on HW-supported speculation */ @@ -178,20 +178,11 @@ typedef union ompt_data_t { static const ompt_data_t ompt_data_none = {0}; -typedef uint64_t omp_wait_id_t; -static const omp_wait_id_t omp_wait_id_none = 0; +typedef uint64_t ompt_wait_id_t; +static const ompt_wait_id_t omp_wait_id_none = 0; typedef void ompt_device_t; -/*--------------------- - * omp_frame_t - *---------------------*/ - -typedef struct omp_frame_t { - void *exit_frame; /* next frame is user code */ - void *enter_frame; /* previous frame is user code */ -} omp_frame_t; - /*--------------------- * dependences types @@ -220,10 +211,18 @@ typedef struct ompt_task_dependence_t { *---------------------*/ typedef enum { -#define omp_state_macro(state, code) state = code, - FOREACH_OMP_STATE(omp_state_macro) -#undef omp_state_macro -} omp_state_t; +#define ompt_state_macro(state, code) state = code, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro +} ompt_state_t; + +typedef enum ompt_frame_flag_t { + ompt_frame_runtime = 0x00, + ompt_frame_application = 0x01, + ompt_frame_cfa = 0x10, + ompt_frame_framepointer = 0x20, + ompt_frame_stackaddress = 0x30 +} ompt_frame_flag_t; /*--------------------- @@ -278,6 +277,12 @@ typedef enum ompt_thread_t { ompt_thread_unknown = 4 } ompt_thread_t; +typedef struct ompt_frame_t { + ompt_data_t exit_frame; + ompt_data_t enter_frame; + int exit_frame_flags; + int enter_frame_flags; +} ompt_frame_t; typedef enum ompt_parallel_flag_t { ompt_parallel_invoker_program = 0x00000001, /* program invokes master task */ ompt_parallel_invoker_runtime = 0x00000002, /* runtime invokes master task */ @@ -295,7 +300,7 @@ typedef void (*ompt_callback_thread_end_t) ( ); typedef void (*ompt_wait_callback_t) ( - omp_wait_id_t wait_id /* wait data */ + ompt_wait_id_t wait_id /* wait data */ ); /* parallel and workshares */ @@ -316,7 +321,7 @@ typedef void (*ompt_callback_implicit_task_t) ( typedef void (*ompt_callback_parallel_begin_t) ( ompt_data_t *encountering_task_data, /* data of encountering task */ - const omp_frame_t *encountering_task_frame, /* frame data of encountering task */ + const ompt_frame_t *encountering_task_frame, /* frame data of encountering task */ ompt_data_t *parallel_data, /* data of parallel region */ unsigned int requested_team_size, /* requested number of threads in team */ int flag, /* flag for additional information */ @@ -358,7 +363,7 @@ typedef void (*ompt_callback_task_schedule_t) ( typedef void (*ompt_callback_task_create_t) ( ompt_data_t *encountering_task_data, /* data of parent task */ - const omp_frame_t *encountering_task_frame, /* frame data for parent task */ + const ompt_frame_t *encountering_task_frame, /* frame data for parent task */ ompt_data_t *new_task_data, /* data of created task */ int flag, /* type of created task */ int has_dependences, /* created task has dependences */ @@ -479,19 +484,19 @@ typedef void (*ompt_callback_mutex_acquire_t) ( ompt_mutex_t kind, /* mutex kind */ unsigned int hint, /* mutex hint */ unsigned int impl, /* mutex implementation */ - omp_wait_id_t wait_id, /* id of object being awaited */ + ompt_wait_id_t wait_id, /* id of object being awaited */ const void *codeptr_ra /* return address of runtime call */ ); typedef void (*ompt_callback_mutex_t) ( ompt_mutex_t kind, /* mutex kind */ - omp_wait_id_t wait_id, /* id of object being awaited */ + ompt_wait_id_t wait_id, /* id of object being awaited */ const void *codeptr_ra /* return address of runtime call */ ); typedef void (*ompt_callback_nest_lock_t) ( ompt_scope_endpoint_t endpoint, /* endpoint of nested lock */ - omp_wait_id_t wait_id, /* id of object being awaited */ + ompt_wait_id_t wait_id, /* id of object being awaited */ const void *codeptr_ra /* return address of runtime call */ ); @@ -592,8 +597,8 @@ extern "C" { ***************************************************************************/ /* state */ -OMPT_API_FUNCTION(omp_state_t, ompt_get_state, ( - omp_wait_id_t *wait_id +OMPT_API_FUNCTION(ompt_state_t, ompt_get_state, ( + ompt_wait_id_t *wait_id )); /* thread */ @@ -611,7 +616,7 @@ OMPT_API_FUNCTION(int, ompt_get_task_info, ( int ancestor_level, int *type, ompt_data_t **task_data, - omp_frame_t **task_frame, + ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num )); diff --git a/runtime/src/kmp.h b/runtime/src/kmp.h index ef9e0a9..23bbeb3 100644 --- a/runtime/src/kmp.h +++ b/runtime/src/kmp.h @@ -129,6 +129,11 @@ class kmp_stats_list; #include "ompt-internal.h" #endif +#if OMP_50_ENABLED +// Affinity format function +#include "kmp_str.h" +#endif + // 0 - no fast memory allocation, alignment: 8-byte on x86, 16-byte on x64. // 3 - fast allocation using sync, non-sync free lists of any size, non-self // free lists of limited size. @@ -544,11 +549,15 @@ typedef int PACKED_REDUCTION_METHOD_T; #if KMP_OS_WINDOWS #define USE_CBLKDATA +#if KMP_MSVC_COMPAT #pragma warning(push) #pragma warning(disable : 271 310) +#endif #include <windows.h> +#if KMP_MSVC_COMPAT #pragma warning(pop) #endif +#endif #if KMP_OS_UNIX #include <dlfcn.h> @@ -560,7 +569,7 @@ typedef int PACKED_REDUCTION_METHOD_T; // GROUP_AFFINITY is already defined for _MSC_VER>=1600 (VS2010 and later). #if KMP_OS_WINDOWS -#if _MSC_VER < 1600 +#if _MSC_VER < 1600 && KMP_MSVC_COMPAT typedef struct GROUP_AFFINITY { KAFFINITY Mask; WORD Group; @@ -793,6 +802,12 @@ extern kmp_nested_proc_bind_t __kmp_nested_proc_bind; #endif /* OMP_40_ENABLED */ +#if OMP_50_ENABLED +extern int __kmp_display_affinity; +extern char *__kmp_affinity_format; +static const size_t KMP_AFFINITY_FORMAT_SIZE = 512; +#endif // OMP_50_ENABLED + #if KMP_AFFINITY_SUPPORTED #define KMP_PLACE_ALL (-1) #define KMP_PLACE_UNDEFINED (-2) @@ -1042,6 +1057,10 @@ extern kmp_uint64 __kmp_now_nsec(); /* TODO: tune for KMP_OS_DARWIN */ #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#elif KMP_OS_DRAGONFLY +/* TODO: tune for KMP_OS_DRAGONFLY */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #elif KMP_OS_FREEBSD /* TODO: tune for KMP_OS_FREEBSD */ #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ @@ -1054,6 +1073,10 @@ extern kmp_uint64 __kmp_now_nsec(); /* TODO: tune for KMP_OS_HURD */ #define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ #define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ +#elif KMP_OS_OPENBSD +/* TODO: tune for KMP_OS_OPENBSD */ +#define KMP_INIT_WAIT 1024U /* initial number of spin-tests */ +#define KMP_NEXT_WAIT 512U /* susequent number of spin-tests */ #endif #if KMP_ARCH_X86 || KMP_ARCH_X86_64 @@ -2222,6 +2245,18 @@ typedef struct kmp_dephash { #endif } kmp_dephash_t; +#if OMP_50_ENABLED +typedef struct kmp_task_affinity_info { + kmp_intptr_t base_addr; + size_t len; + struct { + bool flag1 : 1; + bool flag2 : 1; + kmp_int32 reserved : 30; + } flags; +} kmp_task_affinity_info_t; +#endif + #endif #ifdef BUILD_TIED_TASK_STACK @@ -2483,6 +2518,10 @@ typedef struct KMP_ALIGN_CACHE kmp_base_info { int th_last_place; /* last place in partition */ #endif #endif +#if OMP_50_ENABLED + int th_prev_level; /* previous level for affinity format */ + int th_prev_num_threads; /* previous num_threads for affinity format */ +#endif #if USE_ITT_BUILD kmp_uint64 th_bar_arrive_time; /* arrival to barrier timestamp */ kmp_uint64 th_bar_min_time; /* minimum arrival time at the barrier */ @@ -2676,6 +2715,9 @@ typedef struct KMP_ALIGN_CACHE kmp_base_team { int t_first_place; // first & last place in parent thread's partition. int t_last_place; // Restore these values to master after par region. #endif // OMP_40_ENABLED && KMP_AFFINITY_SUPPORTED +#if OMP_50_ENABLED + int t_display_affinity; +#endif int t_size_changed; // team size was changed?: 0: no, 1: yes, -1: changed via // omp_set_num_threads() call #if OMP_50_ENABLED @@ -3359,6 +3401,8 @@ extern void __kmp_runtime_destroy(void); #if KMP_AFFINITY_SUPPORTED extern char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask); +extern kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, + kmp_affin_mask_t *mask); extern void __kmp_affinity_initialize(void); extern void __kmp_affinity_uninitialize(void); extern void __kmp_affinity_set_init_mask( @@ -3378,6 +3422,14 @@ extern void __kmp_balanced_affinity(kmp_info_t *th, int team_size); extern int kmp_set_thread_affinity_mask_initial(void); #endif #endif /* KMP_AFFINITY_SUPPORTED */ +#if OMP_50_ENABLED +// No need for KMP_AFFINITY_SUPPORTED guard as only one field in the +// format string is for affinity, so platforms that do not support +// affinity can still use the other fields, e.g., %n for num_threads +extern size_t __kmp_aux_capture_affinity(int gtid, const char *format, + kmp_str_buf_t *buffer); +extern void __kmp_aux_display_affinity(int gtid, const char *format); +#endif extern void __kmp_cleanup_hierarchy(); extern void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar); @@ -3530,6 +3582,8 @@ KMP_EXPORT int __kmpc_invoke_task_func(int gtid); #if OMP_40_ENABLED extern int __kmp_invoke_teams_master(int gtid); extern void __kmp_teams_master(int gtid); +extern int __kmp_aux_get_team_num(); +extern int __kmp_aux_get_num_teams(); #endif extern void __kmp_save_internal_controls(kmp_info_t *thread); extern void __kmp_user_set_library(enum library_type arg); @@ -3783,6 +3837,9 @@ KMP_EXPORT void __kmpc_taskloop(ident_t *loc, kmp_int32 gtid, kmp_task_t *task, #if OMP_50_ENABLED KMP_EXPORT void *__kmpc_task_reduction_init(int gtid, int num_data, void *data); KMP_EXPORT void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void *d); +KMP_EXPORT kmp_int32 __kmpc_omp_reg_task_with_affinity( + ident_t *loc_ref, kmp_int32 gtid, kmp_task_t *new_task, kmp_int32 naffins, + kmp_task_affinity_info_t *affin_list); #endif #endif diff --git a/runtime/src/kmp_affinity.cpp b/runtime/src/kmp_affinity.cpp index a9a21cf..775862e 100644 --- a/runtime/src/kmp_affinity.cpp +++ b/runtime/src/kmp_affinity.cpp @@ -83,55 +83,135 @@ void KMPAffinity::destroy_api() { } } +#define KMP_ADVANCE_SCAN(scan) \ + while (*scan != '\0') { \ + scan++; \ + } + // Print the affinity mask to the character array in a pretty format. +// The format is a comma separated list of non-negative integers or integer +// ranges: e.g., 1,2,3-5,7,9-15 +// The format can also be the string "{<empty>}" if no bits are set in mask char *__kmp_affinity_print_mask(char *buf, int buf_len, kmp_affin_mask_t *mask) { + int start = 0, finish = 0, previous = 0; + bool first_range; + KMP_ASSERT(buf); KMP_ASSERT(buf_len >= 40); + KMP_ASSERT(mask); char *scan = buf; char *end = buf + buf_len - 1; - // Find first element / check for empty set. - int i; - i = mask->begin(); - if (i == mask->end()) { + // Check for empty set. + if (mask->begin() == mask->end()) { KMP_SNPRINTF(scan, end - scan + 1, "{<empty>}"); - while (*scan != '\0') - scan++; + KMP_ADVANCE_SCAN(scan); KMP_ASSERT(scan <= end); return buf; } - KMP_SNPRINTF(scan, end - scan + 1, "{%d", i); - while (*scan != '\0') - scan++; - i++; - for (; i != mask->end(); i = mask->next(i)) { - if (!KMP_CPU_ISSET(i, mask)) { - continue; + first_range = true; + start = mask->begin(); + while (1) { + // Find next range + // [start, previous] is inclusive range of contiguous bits in mask + for (finish = mask->next(start), previous = start; + finish == previous + 1 && finish != mask->end(); + finish = mask->next(finish)) { + previous = finish; } - // Check for buffer overflow. A string of the form ",<n>" will have at most - // 10 characters, plus we want to leave room to print ",...}" if the set is - // too large to print for a total of 15 characters. We already left room for - // '\0' in setting end. - if (end - scan < 15) { - break; + // The first range does not need a comma printed before it, but the rest + // of the ranges do need a comma beforehand + if (!first_range) { + KMP_SNPRINTF(scan, end - scan + 1, "%s", ","); + KMP_ADVANCE_SCAN(scan); + } else { + first_range = false; } - KMP_SNPRINTF(scan, end - scan + 1, ",%-d", i); - while (*scan != '\0') - scan++; - } - if (i != mask->end()) { - KMP_SNPRINTF(scan, end - scan + 1, ",..."); - while (*scan != '\0') - scan++; + // Range with three or more contiguous bits in the affinity mask + if (previous - start > 1) { + KMP_SNPRINTF(scan, end - scan + 1, "%d-%d", static_cast<int>(start), + static_cast<int>(previous)); + } else { + // Range with one or two contiguous bits in the affinity mask + KMP_SNPRINTF(scan, end - scan + 1, "%d", static_cast<int>(start)); + KMP_ADVANCE_SCAN(scan); + if (previous - start > 0) { + KMP_SNPRINTF(scan, end - scan + 1, ",%d", static_cast<int>(previous)); + } + } + KMP_ADVANCE_SCAN(scan); + // Start over with new start point + start = finish; + if (start == mask->end()) + break; + // Check for overflow + if (end - scan < 2) + break; } - KMP_SNPRINTF(scan, end - scan + 1, "}"); - while (*scan != '\0') - scan++; + + // Check for overflow KMP_ASSERT(scan <= end); return buf; } +#undef KMP_ADVANCE_SCAN + +// Print the affinity mask to the string buffer object in a pretty format +// The format is a comma separated list of non-negative integers or integer +// ranges: e.g., 1,2,3-5,7,9-15 +// The format can also be the string "{<empty>}" if no bits are set in mask +kmp_str_buf_t *__kmp_affinity_str_buf_mask(kmp_str_buf_t *buf, + kmp_affin_mask_t *mask) { + int start = 0, finish = 0, previous = 0; + bool first_range; + KMP_ASSERT(buf); + KMP_ASSERT(mask); + + __kmp_str_buf_clear(buf); + + // Check for empty set. + if (mask->begin() == mask->end()) { + __kmp_str_buf_print(buf, "%s", "{<empty>}"); + return buf; + } + + first_range = true; + start = mask->begin(); + while (1) { + // Find next range + // [start, previous] is inclusive range of contiguous bits in mask + for (finish = mask->next(start), previous = start; + finish == previous + 1 && finish != mask->end(); + finish = mask->next(finish)) { + previous = finish; + } + + // The first range does not need a comma printed before it, but the rest + // of the ranges do need a comma beforehand + if (!first_range) { + __kmp_str_buf_print(buf, "%s", ","); + } else { + first_range = false; + } + // Range with three or more contiguous bits in the affinity mask + if (previous - start > 1) { + __kmp_str_buf_print(buf, "%d-%d", static_cast<int>(start), + static_cast<int>(previous)); + } else { + // Range with one or two contiguous bits in the affinity mask + __kmp_str_buf_print(buf, "%d", static_cast<int>(start)); + if (previous - start > 0) { + __kmp_str_buf_print(buf, ",%d", static_cast<int>(previous)); + } + } + // Start over with new start point + start = finish; + if (start == mask->end()) + break; + } + return buf; +} void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) { KMP_CPU_ZERO(mask); diff --git a/runtime/src/kmp_affinity.h b/runtime/src/kmp_affinity.h index cb1a7e3..e62508a 100644 --- a/runtime/src/kmp_affinity.h +++ b/runtime/src/kmp_affinity.h @@ -376,26 +376,26 @@ class KMPNativeAffinity : public KMPAffinity { mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T)); } void zero() override { - for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] = 0; } void copy(const KMPAffinity::Mask *src) override { const Mask *convert = static_cast<const Mask *>(src); - for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] = convert->mask[i]; } void bitwise_and(const KMPAffinity::Mask *rhs) override { const Mask *convert = static_cast<const Mask *>(rhs); - for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] &= convert->mask[i]; } void bitwise_or(const KMPAffinity::Mask *rhs) override { const Mask *convert = static_cast<const Mask *>(rhs); - for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] |= convert->mask[i]; } void bitwise_not() override { - for (size_t i = 0; i < __kmp_num_proc_groups; ++i) + for (int i = 0; i < __kmp_num_proc_groups; ++i) mask[i] = ~(mask[i]); } int begin() const override { diff --git a/runtime/src/kmp_atomic.h b/runtime/src/kmp_atomic.h index 3b75a6b..288916c 100644 --- a/runtime/src/kmp_atomic.h +++ b/runtime/src/kmp_atomic.h @@ -364,7 +364,7 @@ static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck, #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( - ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (omp_wait_id_t)lck, + ompt_mutex_atomic, 0, kmp_mutex_impl_queuing, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif @@ -374,7 +374,7 @@ static inline void __kmp_acquire_atomic_lock(kmp_atomic_lock_t *lck, #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_atomic, (omp_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); + ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif } @@ -390,7 +390,7 @@ static inline void __kmp_release_atomic_lock(kmp_atomic_lock_t *lck, #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_atomic, (omp_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); + ompt_mutex_atomic, (ompt_wait_id_t)lck, OMPT_GET_RETURN_ADDRESS(0)); } #endif } diff --git a/runtime/src/kmp_barrier.cpp b/runtime/src/kmp_barrier.cpp index 2b78b54..79b6bf3 100644 --- a/runtime/src/kmp_barrier.cpp +++ b/runtime/src/kmp_barrier.cpp @@ -1253,7 +1253,7 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, // It is OK to report the barrier state after the barrier begin callback. // According to the OMPT specification, a compliant implementation may // even delay reporting this state until the barrier begins to wait. - this_thr->th.ompt_thread_info.state = omp_state_wait_barrier; + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier; } #endif @@ -1502,7 +1502,7 @@ int __kmp_barrier(enum barrier_type bt, int gtid, int is_split, my_task_data, return_address); } #endif - this_thr->th.ompt_thread_info.state = omp_state_work_parallel; + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; } #endif ANNOTATE_BARRIER_END(&team->t.t_bar); @@ -1624,7 +1624,7 @@ void __kmp_join_barrier(int gtid) { if (!KMP_MASTER_TID(ds_tid)) this_thr->th.ompt_thread_info.task_data = *OMPT_CUR_TASK_DATA(this_thr); #endif - this_thr->th.ompt_thread_info.state = omp_state_wait_barrier_implicit; + this_thr->th.ompt_thread_info.state = ompt_state_wait_barrier_implicit; } #endif @@ -1698,6 +1698,11 @@ void __kmp_join_barrier(int gtid) { if (__kmp_tasking_mode != tskm_immediate_exec) { __kmp_task_team_wait(this_thr, team USE_ITT_BUILD_ARG(itt_sync_obj)); } +#if OMP_50_ENABLED + if (__kmp_display_affinity) { + KMP_CHECK_UPDATE(team->t.t_display_affinity, 0); + } +#endif #if KMP_STATS_ENABLED // Have master thread flag the workers to indicate they are now waiting for // next parallel region, Also wake them up so they switch their timers to @@ -1882,12 +1887,12 @@ void __kmp_fork_barrier(int gtid, int tid) { #if OMPT_SUPPORT if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { int ds_tid = this_thr->th.th_info.ds.ds_tid; ompt_data_t *task_data = (team) ? OMPT_CUR_TASK_DATA(this_thr) : &(this_thr->th.ompt_thread_info.task_data); - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; #if OMPT_OPTIONAL void *codeptr = NULL; if (KMP_MASTER_TID(ds_tid) && @@ -1985,6 +1990,19 @@ void __kmp_fork_barrier(int gtid, int tid) { } #endif #if OMP_50_ENABLED + // Perform the display affinity functionality + if (__kmp_display_affinity) { + if (team->t.t_display_affinity +#if KMP_AFFINITY_SUPPORTED + || (__kmp_affinity_type == affinity_balanced && team->t.t_size_changed) +#endif + ) { + // NULL means use the affinity-format-var ICV + __kmp_aux_display_affinity(gtid, NULL); + this_thr->th.th_prev_num_threads = team->t.t_nproc; + this_thr->th.th_prev_level = team->t.t_level; + } + } if (!KMP_MASTER_TID(tid)) KMP_CHECK_UPDATE(this_thr->th.th_def_allocator, team->t.t_def_allocator); #endif diff --git a/runtime/src/kmp_config.h.cmake b/runtime/src/kmp_config.h.cmake index 6b778ea..c9ebbc0 100644 --- a/runtime/src/kmp_config.h.cmake +++ b/runtime/src/kmp_config.h.cmake @@ -74,6 +74,8 @@ #if LIBOMP_TSAN_SUPPORT #define TSAN_SUPPORT #endif +#cmakedefine01 MSVC +#define KMP_MSVC_COMPAT MSVC // Configured cache line based on architecture #if KMP_ARCH_PPC64 diff --git a/runtime/src/kmp_csupport.cpp b/runtime/src/kmp_csupport.cpp index ac76794..4c62720 100644 --- a/runtime/src/kmp_csupport.cpp +++ b/runtime/src/kmp_csupport.cpp @@ -293,7 +293,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { va_start(ap, microtask); #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { kmp_info_t *master_th = __kmp_threads[gtid]; kmp_team_t *parent_team = master_th->th.th_team; @@ -305,7 +305,7 @@ void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) { ompt_frame = &( parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame); } - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -395,7 +395,7 @@ void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, int tid = __kmp_tid_from_gtid(gtid); if (ompt_enabled.enabled) { parent_team->t.t_implicit_task_taskdata[tid] - .ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } OMPT_STORE_RETURN_ADDRESS(gtid); #endif @@ -506,8 +506,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { #if OMPT_SUPPORT if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state != omp_state_overhead) { - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = NULL; + this_thr->th.ompt_thread_info.state != ompt_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1, @@ -524,7 +524,7 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { ompt_parallel_invoker_program, OMPT_LOAD_RETURN_ADDRESS(global_tid)); } __ompt_lw_taskteam_unlink(this_thr); - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; } #endif @@ -606,8 +606,8 @@ void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { #if OMPT_SUPPORT if (ompt_enabled.enabled) this_thr->th.ompt_thread_info.state = - ((this_thr->th.th_team_serialized) ? omp_state_work_serial - : omp_state_work_parallel); + ((this_thr->th.th_team_serialized) ? ompt_state_work_serial + : ompt_state_work_parallel); #endif } @@ -705,11 +705,11 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { } #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -724,7 +724,7 @@ void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) { __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif } @@ -843,22 +843,22 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL kmp_team_t *team; - omp_wait_id_t lck; + ompt_wait_id_t lck; void *codeptr_ra; if (ompt_enabled.enabled) { OMPT_STORE_RETURN_ADDRESS(gtid); team = __kmp_team_from_gtid(gtid); - lck = (omp_wait_id_t)&team->t.t_ordered.dt.t_value; + lck = (ompt_wait_id_t)&team->t.t_ordered.dt.t_value; /* OMPT state update */ th->th.ompt_thread_info.wait_id = lck; - th->th.ompt_thread_info.state = omp_state_wait_ordered; + th->th.ompt_thread_info.state = ompt_state_wait_ordered; /* OMPT event callback */ codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, - (omp_wait_id_t)lck, codeptr_ra); + (ompt_wait_id_t)lck, codeptr_ra); } } #endif @@ -871,13 +871,13 @@ void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { /* OMPT state update */ - th->th.ompt_thread_info.state = omp_state_work_parallel; + th->th.ompt_thread_info.state = ompt_state_work_parallel; th->th.ompt_thread_info.wait_id = 0; /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_ordered, (omp_wait_id_t)lck, codeptr_ra); + ompt_mutex_ordered, (ompt_wait_id_t)lck, codeptr_ra); } } #endif @@ -917,7 +917,7 @@ void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) { if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( ompt_mutex_ordered, - (omp_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, + (ompt_wait_id_t)&__kmp_team_from_gtid(gtid)->t.t_ordered.dt.t_value, OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif @@ -1144,7 +1144,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, #else KMP_COUNT_BLOCK(OMP_CRITICAL); #if OMPT_SUPPORT && OMPT_OPTIONAL - omp_state_t prev_state = omp_state_undefined; + ompt_state_t prev_state = ompt_state_undefined; ompt_thread_info_t ti; #endif kmp_user_lock_p lck; @@ -1188,15 +1188,15 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, ti = __kmp_threads[global_tid]->th.ompt_thread_info; /* OMPT state update */ prev_state = ti.state; - ti.wait_id = (omp_wait_id_t)lck; - ti.state = omp_state_wait_critical; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = ompt_state_wait_critical; /* OMPT event callback */ codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid); if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (omp_wait_id_t)crit, codeptr_ra); + (ompt_wait_id_t)crit, codeptr_ra); } } #endif @@ -1216,7 +1216,7 @@ void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_critical, (omp_wait_id_t)crit, codeptr_ra); + ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr_ra); } } #endif @@ -1292,7 +1292,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { return kmp_mutex_impl_speculative; #endif default: - return ompt_mutex_impl_unknown; + return ompt_mutex_impl_none; } ilock = KMP_LOOKUP_I_LOCK(user_lock); } @@ -1316,7 +1316,7 @@ __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) { case locktag_nested_drdpa: return kmp_mutex_impl_queuing; default: - return ompt_mutex_impl_unknown; + return ompt_mutex_impl_none; } } #else @@ -1339,7 +1339,7 @@ static kmp_mutex_impl_t __ompt_get_mutex_impl_type() { return kmp_mutex_impl_speculative; #endif default: - return ompt_mutex_impl_unknown; + return ompt_mutex_impl_none; } } #endif // KMP_USE_DYNAMIC_LOCK @@ -1363,7 +1363,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, KMP_COUNT_BLOCK(OMP_CRITICAL); kmp_user_lock_p lck; #if OMPT_SUPPORT && OMPT_OPTIONAL - omp_state_t prev_state = omp_state_undefined; + ompt_state_t prev_state = ompt_state_undefined; ompt_thread_info_t ti; // This is the case, if called from __kmpc_critical: void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); @@ -1402,14 +1402,14 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, ti = __kmp_threads[global_tid]->th.ompt_thread_info; /* OMPT state update */ prev_state = ti.state; - ti.wait_id = (omp_wait_id_t)lck; - ti.state = omp_state_wait_critical; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = ompt_state_wait_critical; /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_critical, (unsigned int)hint, - __ompt_get_mutex_impl_type(crit), (omp_wait_id_t)crit, codeptr); + __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)crit, codeptr); } } #endif @@ -1440,14 +1440,14 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, ti = __kmp_threads[global_tid]->th.ompt_thread_info; /* OMPT state update */ prev_state = ti.state; - ti.wait_id = (omp_wait_id_t)lck; - ti.state = omp_state_wait_critical; + ti.wait_id = (ompt_wait_id_t)lck; + ti.state = ompt_state_wait_critical; /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_critical, (unsigned int)hint, - __ompt_get_mutex_impl_type(0, ilk), (omp_wait_id_t)crit, codeptr); + __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)crit, codeptr); } } #endif @@ -1467,7 +1467,7 @@ void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, /* OMPT event callback */ if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_critical, (omp_wait_id_t)crit, codeptr); + ompt_mutex_critical, (ompt_wait_id_t)crit, codeptr); } } #endif @@ -1565,7 +1565,7 @@ void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, OMPT_STORE_RETURN_ADDRESS(global_tid); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_critical, (omp_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); + ompt_mutex_critical, (ompt_wait_id_t)crit, OMPT_LOAD_RETURN_ADDRESS(0)); } #endif @@ -1594,11 +1594,11 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { __kmp_check_barrier(global_tid, ct_barrier, loc); #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -1608,7 +1608,7 @@ kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) { status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -1656,11 +1656,11 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { } #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -1670,7 +1670,7 @@ kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) { __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -1867,6 +1867,59 @@ int ompc_get_team_size(int level) { return __kmp_get_team_size(__kmp_entry_gtid(), level); } +#if OMP_50_ENABLED +/* OpenMP 5.0 Affinity Format API */ + +void ompc_set_affinity_format(char const *format) { + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, + format, KMP_STRLEN(format) + 1); +} + +size_t ompc_get_affinity_format(char *buffer, size_t size) { + size_t format_size; + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + format_size = KMP_STRLEN(__kmp_affinity_format); + if (buffer && size) { + __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format, + format_size + 1); + } + return format_size; +} + +void ompc_display_affinity(char const *format) { + int gtid; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + __kmp_aux_display_affinity(gtid, format); +} + +size_t ompc_capture_affinity(char *buffer, size_t buf_size, + char const *format) { + int gtid; + size_t num_required; + kmp_str_buf_t capture_buf; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + __kmp_str_buf_init(&capture_buf); + num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf); + if (buffer && buf_size) { + __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str, + capture_buf.used + 1); + } + __kmp_str_buf_free(&capture_buf); + return num_required; +} +#endif /* OMP_50_ENABLED */ + void kmpc_set_stacksize(int arg) { // __kmp_aux_set_stacksize initializes the library if needed __kmp_aux_set_stacksize(arg); @@ -2006,11 +2059,11 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, *data_ptr = cpy_data; #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -2038,7 +2091,7 @@ void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif } @@ -2136,7 +2189,7 @@ void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock, if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_lock, (omp_lock_hint_t)hint, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2160,7 +2213,7 @@ void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid, if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_nest_lock, (omp_lock_hint_t)hint, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2186,7 +2239,7 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2229,7 +2282,7 @@ void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (omp_wait_id_t)user_lock, codeptr); + (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2258,7 +2311,7 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2304,7 +2357,7 @@ void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_lock_init) { ompt_callbacks.ompt_callback(ompt_callback_lock_init)( ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (omp_wait_id_t)user_lock, codeptr); + (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2340,7 +2393,7 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { lck = (kmp_user_lock_p)user_lock; } ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); @@ -2368,7 +2421,7 @@ void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_lock_destroy) { ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2408,7 +2461,7 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_lock_destroy) { ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock); @@ -2440,7 +2493,7 @@ void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_lock_destroy) { ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)( - ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2485,7 +2538,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2507,7 +2560,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2540,7 +2593,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (omp_wait_id_t)lck, codeptr); + (ompt_wait_id_t)lck, codeptr); } #endif @@ -2553,7 +2606,7 @@ void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2575,7 +2628,7 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } } @@ -2593,13 +2646,13 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr); + ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); } } } @@ -2637,7 +2690,7 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr); + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); } } #endif @@ -2654,13 +2707,13 @@ void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (omp_wait_id_t)lck, codeptr); + ompt_scope_begin, (ompt_wait_id_t)lck, codeptr); } } } @@ -2696,7 +2749,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2725,7 +2778,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2757,7 +2810,7 @@ void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { codeptr = OMPT_GET_RETURN_ADDRESS(0); if (ompt_enabled.ompt_callback_mutex_released) { ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -2785,12 +2838,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_released) { // release_lock_last ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } } else if (ompt_enabled.ompt_callback_nest_lock) { // release_lock_prev ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_end, (omp_wait_id_t)user_lock, codeptr); + ompt_scope_end, (ompt_wait_id_t)user_lock, codeptr); } } #endif @@ -2834,12 +2887,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_released) { // release_lock_last ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); } } else if (ompt_enabled.ompt_callback_nest_lock) { // release_lock_previous ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr); + ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); } } #endif @@ -2876,12 +2929,12 @@ void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_released) { // release_lock_last ompt_callbacks.ompt_callback(ompt_callback_mutex_released)( - ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); } } else if (ompt_enabled.ompt_callback_nest_lock) { // release_lock_previous ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_end, (omp_wait_id_t)lck, codeptr); + ompt_mutex_scope_end, (ompt_wait_id_t)lck, codeptr); } } #endif @@ -2907,7 +2960,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -2930,7 +2983,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)user_lock, codeptr); } #endif return FTN_TRUE; @@ -2971,7 +3024,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(), - (omp_wait_id_t)lck, codeptr); + (ompt_wait_id_t)lck, codeptr); } #endif @@ -2986,7 +3039,7 @@ int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (rc && ompt_enabled.ompt_callback_mutex_acquired) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_lock, (ompt_wait_id_t)lck, codeptr); } #endif @@ -3012,7 +3065,7 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(user_lock), (omp_wait_id_t)user_lock, + __ompt_get_mutex_impl_type(user_lock), (ompt_wait_id_t)user_lock, codeptr); } #endif @@ -3030,13 +3083,13 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (omp_wait_id_t)user_lock, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)user_lock, codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_scope_begin, (omp_wait_id_t)user_lock, codeptr); + ompt_scope_begin, (ompt_wait_id_t)user_lock, codeptr); } } } @@ -3077,7 +3130,7 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { ompt_enabled.ompt_callback_mutex_acquire) { ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)( ompt_mutex_nest_lock, omp_lock_hint_none, - __ompt_get_mutex_impl_type(), (omp_wait_id_t)lck, codeptr); + __ompt_get_mutex_impl_type(), (ompt_wait_id_t)lck, codeptr); } #endif @@ -3095,13 +3148,13 @@ int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) { if (ompt_enabled.ompt_callback_mutex_acquired) { // lock_first ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)( - ompt_mutex_nest_lock, (omp_wait_id_t)lck, codeptr); + ompt_mutex_nest_lock, (ompt_wait_id_t)lck, codeptr); } } else { if (ompt_enabled.ompt_callback_nest_lock) { // lock_next ompt_callbacks.ompt_callback(ompt_callback_nest_lock)( - ompt_mutex_scope_begin, (omp_wait_id_t)lck, codeptr); + ompt_mutex_scope_begin, (ompt_wait_id_t)lck, codeptr); } } } @@ -3392,11 +3445,11 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, // JP: as long as there is a barrier in the implementation, OMPT should and // will provide the barrier events // so we set-up the necessary frame/return addresses. - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -3409,7 +3462,7 @@ __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, retval = (retval != 0) ? (0) : (1); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -3573,11 +3626,11 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, // this barrier should be visible to a customer and to the threading profile // tool (it's a terminating barrier on constructs if NOWAIT not specified) #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -3591,7 +3644,7 @@ kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, retval = (retval != 0) ? (0) : (1); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -3659,11 +3712,11 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // TODO: implicit barrier: should be exposed #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -3673,7 +3726,7 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -3683,11 +3736,11 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, // TODO: implicit barrier: should be exposed #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -3697,18 +3750,18 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif } else if (packed_reduction_method == atomic_reduce_block) { #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - if (ompt_frame->enter_frame == NULL) - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (ompt_frame->enter_frame.ptr == NULL) + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(global_tid); } #endif @@ -3719,7 +3772,7 @@ void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif diff --git a/runtime/src/kmp_dispatch.cpp b/runtime/src/kmp_dispatch.cpp index b4192df..1090e9d 100644 --- a/runtime/src/kmp_dispatch.cpp +++ b/runtime/src/kmp_dispatch.cpp @@ -24,7 +24,7 @@ #include "kmp_itt.h" #include "kmp_stats.h" #include "kmp_str.h" -#if KMP_OS_WINDOWS && KMP_ARCH_X86 +#if KMP_USE_X87CONTROL #include <float.h> #endif #include "kmp_lock.h" @@ -478,7 +478,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, /* commonly used term: (2 nproc - 1)/(2 nproc) */ DBL x; -#if KMP_OS_WINDOWS && KMP_ARCH_X86 +#if KMP_USE_X87CONTROL /* Linux* OS already has 64-bit computation by default for long double, and on Windows* OS on Intel(R) 64, /Qlong_double doesn't work. On Windows* OS on IA-32 architecture, we need to set precision to 64-bit @@ -573,7 +573,7 @@ void __kmp_dispatch_init_algorithm(ident_t *loc, int gtid, pr->u.p.count = tc - __kmp_dispatch_guided_remaining( tc, GUIDED_ANALYTICAL_WORKAROUND, cross) - cross * chunk; -#if KMP_OS_WINDOWS && KMP_ARCH_X86 +#if KMP_USE_X87CONTROL // restore FPCW _control87(oldFpcw, _MCW_PC); #endif @@ -1625,7 +1625,7 @@ int __kmp_dispatch_next_algorithm(int gtid, case kmp_sch_guided_analytical_chunked: { T chunkspec = pr->u.p.parm1; UT chunkIdx; -#if KMP_OS_WINDOWS && KMP_ARCH_X86 +#if KMP_USE_X87CONTROL /* for storing original FPCW value for Windows* OS on IA-32 architecture 8-byte version */ unsigned int oldFpcw; @@ -1662,7 +1662,7 @@ int __kmp_dispatch_next_algorithm(int gtid, Windows* OS. This check works around the possible effect that init != 0 for chunkIdx == 0. */ -#if KMP_OS_WINDOWS && KMP_ARCH_X86 +#if KMP_USE_X87CONTROL /* If we haven't already done so, save original FPCW and set precision to 64-bit, as Windows* OS on IA-32 architecture defaults to 53-bit */ @@ -1690,7 +1690,7 @@ int __kmp_dispatch_next_algorithm(int gtid, } // if } // if } // while (1) -#if KMP_OS_WINDOWS && KMP_ARCH_X86 +#if KMP_USE_X87CONTROL /* restore FPCW if necessary AC: check fpcwSet flag first because oldFpcw can be uninitialized here */ diff --git a/runtime/src/kmp_ftn_entry.h b/runtime/src/kmp_ftn_entry.h index 6910c37..abf1892 100644 --- a/runtime/src/kmp_ftn_entry.h +++ b/runtime/src/kmp_ftn_entry.h @@ -21,6 +21,12 @@ #include "kmp_i18n.h" +#if OMP_50_ENABLED +// For affinity format functions +#include "kmp_io.h" +#include "kmp_str.h" +#endif + #if OMPT_SUPPORT #include "ompt-specific.h" #endif @@ -355,9 +361,9 @@ int FTN_STDCALL FTN_CONTROL_TOOL(int command, int modifier, void *arg) { } kmp_info_t *this_thr = __kmp_threads[__kmp_entry_gtid()]; ompt_task_info_t *parent_task_info = OMPT_CUR_TASK_INFO(this_thr); - parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); int ret = __kmp_control_tool(command, modifier, arg); - parent_task_info->frame.enter_frame = 0; + parent_task_info->frame.enter_frame.ptr = 0; return ret; #endif } @@ -389,6 +395,137 @@ void FTN_STDCALL FTN_FREE(void *ptr, const omp_allocator_t *allocator) { __kmpc_free(__kmp_entry_gtid(), ptr, allocator); #endif } + +/* OpenMP 5.0 affinity format support */ + +#ifndef KMP_STUB +static void __kmp_fortran_strncpy_truncate(char *buffer, size_t buf_size, + char const *csrc, size_t csrc_size) { + size_t capped_src_size = csrc_size; + if (csrc_size >= buf_size) { + capped_src_size = buf_size - 1; + } + KMP_STRNCPY_S(buffer, buf_size, csrc, capped_src_size); + if (csrc_size >= buf_size) { + KMP_DEBUG_ASSERT(buffer[buf_size - 1] == '\0'); + buffer[buf_size - 1] = csrc[buf_size - 1]; + } else { + for (size_t i = csrc_size; i < buf_size; ++i) + buffer[i] = ' '; + } +} + +// Convert a Fortran string to a C string by adding null byte +class ConvertedString { + char *buf; + kmp_info_t *th; + +public: + ConvertedString(char const *fortran_str, size_t size) { + th = __kmp_get_thread(); + buf = (char *)__kmp_thread_malloc(th, size + 1); + KMP_STRNCPY_S(buf, size + 1, fortran_str, size); + buf[size] = '\0'; + } + ~ConvertedString() { __kmp_thread_free(th, buf); } + const char *get() const { return buf; } +}; +#endif // KMP_STUB + +/* + * Set the value of the affinity-format-var ICV on the current device to the + * format specified in the argument. +*/ +void FTN_STDCALL FTN_SET_AFFINITY_FORMAT(char const *format, size_t size) { +#ifdef KMP_STUB + return; +#else + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + ConvertedString cformat(format, size); + // Since the __kmp_affinity_format variable is a C string, do not + // use the fortran strncpy function + __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, + cformat.get(), KMP_STRLEN(cformat.get())); +#endif +} + +/* + * Returns the number of characters required to hold the entire affinity format + * specification (not including null byte character) and writes the value of the + * affinity-format-var ICV on the current device to buffer. If the return value + * is larger than size, the affinity format specification is truncated. +*/ +size_t FTN_STDCALL FTN_GET_AFFINITY_FORMAT(char *buffer, size_t size) { +#ifdef KMP_STUB + return 0; +#else + size_t format_size; + if (!__kmp_init_serial) { + __kmp_serial_initialize(); + } + format_size = KMP_STRLEN(__kmp_affinity_format); + if (buffer && size) { + __kmp_fortran_strncpy_truncate(buffer, size, __kmp_affinity_format, + format_size); + } + return format_size; +#endif +} + +/* + * Prints the thread affinity information of the current thread in the format + * specified by the format argument. If the format is NULL or a zero-length + * string, the value of the affinity-format-var ICV is used. +*/ +void FTN_STDCALL FTN_DISPLAY_AFFINITY(char const *format, size_t size) { +#ifdef KMP_STUB + return; +#else + int gtid; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + ConvertedString cformat(format, size); + __kmp_aux_display_affinity(gtid, cformat.get()); +#endif +} + +/* + * Returns the number of characters required to hold the entire affinity format + * specification (not including null byte) and prints the thread affinity + * information of the current thread into the character string buffer with the + * size of size in the format specified by the format argument. If the format is + * NULL or a zero-length string, the value of the affinity-format-var ICV is + * used. The buffer must be allocated prior to calling the routine. If the + * return value is larger than size, the affinity format specification is + * truncated. +*/ +size_t FTN_STDCALL FTN_CAPTURE_AFFINITY(char *buffer, char const *format, + size_t buf_size, size_t for_size) { +#if defined(KMP_STUB) + return 0; +#else + int gtid; + size_t num_required; + kmp_str_buf_t capture_buf; + if (!TCR_4(__kmp_init_middle)) { + __kmp_middle_initialize(); + } + gtid = __kmp_get_gtid(); + __kmp_str_buf_init(&capture_buf); + ConvertedString cformat(format, for_size); + num_required = __kmp_aux_capture_affinity(gtid, cformat.get(), &capture_buf); + if (buffer && buf_size) { + __kmp_fortran_strncpy_truncate(buffer, buf_size, capture_buf.str, + capture_buf.used); + } + __kmp_str_buf_free(&capture_buf); + return num_required; +#endif +} #endif /* OMP_50_ENABLED */ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { @@ -397,7 +534,8 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_THREAD_NUM)(void) { #else int gtid; -#if KMP_OS_DARWIN || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD +#if KMP_OS_DARWIN || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_HURD gtid = __kmp_entry_gtid(); #elif KMP_OS_WINDOWS if (!__kmp_init_parallel || @@ -777,34 +915,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_NUM_TEAMS)(void) { #ifdef KMP_STUB return 1; #else - kmp_info_t *thr = __kmp_entry_thread(); - if (thr->th.th_teams_microtask) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; - int ii = team->t.t_level; // the level of the teams construct - int dd = team->t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT(ii >= tlevel); - while (ii > level) { - for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { - } - if (team->t.t_serialized && (!dd)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - ii--; - } - } - if (dd > 1) { - return 1; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_parent->t.t_nproc; - } - } else { - return 1; - } + return __kmp_aux_get_num_teams(); #endif } @@ -812,34 +923,7 @@ int FTN_STDCALL KMP_EXPAND_NAME(FTN_GET_TEAM_NUM)(void) { #ifdef KMP_STUB return 0; #else - kmp_info_t *thr = __kmp_entry_thread(); - if (thr->th.th_teams_microtask) { - kmp_team_t *team = thr->th.th_team; - int tlevel = thr->th.th_teams_level; // the level of the teams construct - int ii = team->t.t_level; - int dd = team->t.t_serialized; - int level = tlevel + 1; - KMP_DEBUG_ASSERT(ii >= tlevel); - while (ii > level) { - for (dd = team->t.t_serialized; (dd > 0) && (ii > level); dd--, ii--) { - } - if (team->t.t_serialized && (!dd)) { - team = team->t.t_parent; - continue; - } - if (ii > level) { - team = team->t.t_parent; - ii--; - } - } - if (dd > 1) { - return 0; // teams region is serialized ( 1 team of 1 thread ). - } else { - return team->t.t_master_tid; - } - } else { - return 0; - } + return __kmp_aux_get_team_num(); #endif } diff --git a/runtime/src/kmp_ftn_os.h b/runtime/src/kmp_ftn_os.h index 5d0aaa2..47188fc 100644 --- a/runtime/src/kmp_ftn_os.h +++ b/runtime/src/kmp_ftn_os.h @@ -139,6 +139,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator #define FTN_ALLOC omp_alloc #define FTN_FREE omp_free +#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format +#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format +#define FTN_DISPLAY_AFFINITY omp_display_affinity +#define FTN_CAPTURE_AFFINITY omp_capture_affinity #endif #endif /* KMP_FTN_PLAIN */ @@ -265,6 +269,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR omp_get_default_allocator_ #define FTN_ALLOC omp_alloc_ #define FTN_FREE omp_free_ +#define FTN_SET_AFFINITY_FORMAT omp_set_affinity_format_ +#define FTN_GET_AFFINITY_FORMAT omp_get_affinity_format_ +#define FTN_DISPLAY_AFFINITY omp_display_affinity_ +#define FTN_CAPTURE_AFFINITY omp_capture_affinity_ #endif #endif /* KMP_FTN_APPEND */ @@ -391,6 +399,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR #define FTN_ALLOC OMP_ALLOC #define FTN_FREE OMP_FREE +#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT +#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT +#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY +#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY #endif #endif /* KMP_FTN_UPPER */ @@ -517,6 +529,10 @@ #define FTN_GET_DEFAULT_ALLOCATOR OMP_GET_DEFAULT_ALLOCATOR_ #define FTN_ALLOC OMP_ALLOC_ #define FTN_FREE OMP_FREE_ +#define FTN_SET_AFFINITY_FORMAT OMP_SET_AFFINITY_FORMAT_ +#define FTN_GET_AFFINITY_FORMAT OMP_GET_AFFINITY_FORMAT_ +#define FTN_DISPLAY_AFFINITY OMP_DISPLAY_AFFINITY_ +#define FTN_CAPTURE_AFFINITY OMP_CAPTURE_AFFINITY_ #endif #endif /* KMP_FTN_UAPPEND */ diff --git a/runtime/src/kmp_global.cpp b/runtime/src/kmp_global.cpp index 34465de..ef8a116 100644 --- a/runtime/src/kmp_global.cpp +++ b/runtime/src/kmp_global.cpp @@ -282,6 +282,11 @@ kmp_nested_proc_bind_t __kmp_nested_proc_bind = {NULL, 0, 0}; int __kmp_affinity_num_places = 0; #endif +#if OMP_50_ENABLED +int __kmp_display_affinity = FALSE; +char *__kmp_affinity_format = NULL; +#endif // OMP_50_ENABLED + kmp_hws_item_t __kmp_hws_socket = {0, 0}; kmp_hws_item_t __kmp_hws_node = {0, 0}; kmp_hws_item_t __kmp_hws_tile = {0, 0}; diff --git a/runtime/src/kmp_gsupport.cpp b/runtime/src/kmp_gsupport.cpp index e218018..646d75d 100644 --- a/runtime/src/kmp_gsupport.cpp +++ b/runtime/src/kmp_gsupport.cpp @@ -32,17 +32,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_BARRIER)(void) { MKLOC(loc, "GOMP_barrier"); KA_TRACE(20, ("GOMP_barrier: T#%d\n", gtid)); #if OMPT_SUPPORT && OMPT_OPTIONAL - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif __kmpc_barrier(&loc, gtid); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif } @@ -178,10 +178,10 @@ void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { // and for all other threads to reach this point. #if OMPT_SUPPORT && OMPT_OPTIONAL - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -198,7 +198,7 @@ void *KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_START)(void) { __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif return retval; @@ -214,10 +214,10 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { // propagated to all threads before trying to reuse the t_copypriv_data field. __kmp_team_from_gtid(gtid)->t.t_copypriv_data = data; #if OMPT_SUPPORT && OMPT_OPTIONAL - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -230,7 +230,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SINGLE_COPY_END)(void *data) { __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif } @@ -284,8 +284,8 @@ static void *data) { #if OMPT_SUPPORT kmp_info_t *thr; - omp_frame_t *ompt_frame; - omp_state_t enclosing_state; + ompt_frame_t *ompt_frame; + ompt_state_t enclosing_state; if (ompt_enabled.enabled) { // get pointer to thread data structure @@ -293,11 +293,11 @@ static // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = omp_state_work_parallel; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; // set task frame __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->exit_frame = OMPT_GET_FRAME_ADDRESS(0); + ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -306,7 +306,7 @@ static #if OMPT_SUPPORT if (ompt_enabled.enabled) { // clear task frame - ompt_frame->exit_frame = NULL; + ompt_frame->exit_frame = ompt_data_none; // restore enclosing state thr->th.ompt_thread_info.state = enclosing_state; @@ -331,18 +331,18 @@ static #if OMPT_SUPPORT kmp_info_t *thr; - omp_frame_t *ompt_frame; - omp_state_t enclosing_state; + ompt_frame_t *ompt_frame; + ompt_state_t enclosing_state; if (ompt_enabled.enabled) { thr = __kmp_threads[*gtid]; // save enclosing task state; set current state for task enclosing_state = thr->th.ompt_thread_info.state; - thr->th.ompt_thread_info.state = omp_state_work_parallel; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; // set task frame __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->exit_frame = OMPT_GET_FRAME_ADDRESS(0); + ompt_frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -352,7 +352,7 @@ static #if OMPT_SUPPORT if (ompt_enabled.enabled) { // clear task frame - ompt_frame->exit_frame = NULL; + ompt_frame->exit_frame = ompt_data_none; // reset enclosing state thr->th.ompt_thread_info.state = enclosing_state; @@ -403,7 +403,7 @@ static &(task_info->task_data), ompt_team_size, __kmp_tid_from_gtid(gtid)); task_info->thread_num = __kmp_tid_from_gtid(gtid); } - thr->th.ompt_thread_info.state = omp_state_work_parallel; + thr->th.ompt_thread_info.state = ompt_state_work_parallel; } #endif } @@ -422,11 +422,11 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), int gtid = __kmp_entry_gtid(); #if OMPT_SUPPORT - omp_frame_t *parent_frame, *frame; + ompt_frame_t *parent_frame, *frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); - parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -448,7 +448,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_START)(void (*task)(void *), #if OMPT_SUPPORT if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &frame, NULL, NULL); - frame->exit_frame = OMPT_GET_FRAME_ADDRESS(1); + frame->exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif } @@ -471,7 +471,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(void) { // Implicit task is finished here, in the barrier we might schedule // deferred tasks, // these don't see the implicit task on the stack - OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = NULL; + OMPT_CUR_TASK_INFO(thr)->frame.exit_frame = ompt_data_none; } #endif @@ -764,17 +764,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_LOOP_END)(void) { KA_TRACE(20, ("GOMP_loop_end: T#%d\n", gtid)) #if OMPT_SUPPORT && OMPT_OPTIONAL - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT && OMPT_OPTIONAL if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -1075,16 +1075,16 @@ LOOP_DOACROSS_RUNTIME_START_ULL( #if OMPT_SUPPORT && OMPT_OPTIONAL #define OMPT_LOOP_PRE() \ - omp_frame_t *parent_frame; \ + ompt_frame_t *parent_frame; \ if (ompt_enabled.enabled) { \ __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); \ - parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); \ + parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); \ OMPT_STORE_RETURN_ADDRESS(gtid); \ } #define OMPT_LOOP_POST() \ if (ompt_enabled.enabled) { \ - parent_frame->enter_frame = NULL; \ + parent_frame->enter_frame = ompt_data_none; \ } #else @@ -1164,7 +1164,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, if (ompt_enabled.enabled) { OMPT_STORE_RETURN_ADDRESS(gtid); current_task = __kmp_threads[gtid]->th.th_current_task; - current_task->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + current_task->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -1198,8 +1198,8 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, taskdata = KMP_TASK_TO_TASKDATA(task); oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; - thread->th.ompt_thread_info.state = omp_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0); + thread->th.ompt_thread_info.state = ompt_state_work_parallel; + taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -1211,13 +1211,13 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_TASK)(void (*func)(void *), void *data, #if OMPT_SUPPORT if (ompt_enabled.enabled) { thread->th.ompt_thread_info = oldInfo; - taskdata->ompt_task_info.frame.exit_frame = NULL; + taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; } #endif } #if OMPT_SUPPORT if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = NULL; + current_task->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif @@ -1302,11 +1302,11 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)( int gtid = __kmp_entry_gtid(); #if OMPT_SUPPORT - omp_frame_t *parent_frame; + ompt_frame_t *parent_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &parent_frame, NULL, NULL); - parent_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + parent_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -1328,7 +1328,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_SECTIONS_START)( #if OMPT_SUPPORT if (ompt_enabled.enabled) { - parent_frame->enter_frame = NULL; + parent_frame->enter_frame = ompt_data_none; } #endif @@ -1342,17 +1342,17 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_SECTIONS_END)(void) { KA_TRACE(20, ("GOMP_sections_end: T#%d\n", gtid)) #if OMPT_SUPPORT - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; if (ompt_enabled.enabled) { __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = OMPT_GET_FRAME_ADDRESS(1); + ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL); #if OMPT_SUPPORT if (ompt_enabled.enabled) { - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; } #endif @@ -1383,7 +1383,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), ompt_task_info_t *parent_task_info, *task_info; if (ompt_enabled.enabled) { parent_task_info = __ompt_get_task_info_object(0); - parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); OMPT_STORE_RETURN_ADDRESS(gtid); } #endif @@ -1403,7 +1403,7 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), #if OMPT_SUPPORT if (ompt_enabled.enabled) { task_info = __ompt_get_task_info_object(0); - task_info->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0); + task_info->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif task(data); @@ -1415,8 +1415,8 @@ void KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL)(void (*task)(void *), KMP_EXPAND_NAME(KMP_API_NAME_GOMP_PARALLEL_END)(); #if OMPT_SUPPORT if (ompt_enabled.enabled) { - task_info->frame.exit_frame = NULL; - parent_task_info->frame.enter_frame = NULL; + task_info->frame.exit_frame = ompt_data_none; + parent_task_info->frame.enter_frame = ompt_data_none; } #endif } diff --git a/runtime/src/kmp_io.cpp b/runtime/src/kmp_io.cpp index 4f58ea0..24c6e72 100644 --- a/runtime/src/kmp_io.cpp +++ b/runtime/src/kmp_io.cpp @@ -27,11 +27,15 @@ #include "kmp_str.h" #if KMP_OS_WINDOWS +#if KMP_MSVC_COMPAT #pragma warning(push) #pragma warning(disable : 271 310) +#endif #include <windows.h> +#if KMP_MSVC_COMPAT #pragma warning(pop) #endif +#endif /* ------------------------------------------------------------------------ */ @@ -42,10 +46,7 @@ kmp_bootstrap_lock_t __kmp_console_lock = KMP_BOOTSTRAP_LOCK_INITIALIZER( #if KMP_OS_WINDOWS -#ifdef KMP_DEBUG -/* __kmp_stdout is used only for dev build */ static HANDLE __kmp_stdout = NULL; -#endif static HANDLE __kmp_stderr = NULL; static int __kmp_console_exists = FALSE; static kmp_str_buf_t __kmp_console_buf; @@ -72,10 +73,7 @@ void __kmp_close_console(void) { /* wait until user presses return before closing window */ /* TODO only close if a window was opened */ if (__kmp_console_exists) { -#ifdef KMP_DEBUG - /* standard out is used only in dev build */ __kmp_stdout = NULL; -#endif __kmp_stderr = NULL; __kmp_str_buf_free(&__kmp_console_buf); __kmp_console_exists = FALSE; @@ -88,21 +86,17 @@ static void __kmp_redirect_output(void) { __kmp_acquire_bootstrap_lock(&__kmp_console_lock); if (!__kmp_console_exists) { -#ifdef KMP_DEBUG - /* standard out is used only in dev build */ HANDLE ho; -#endif HANDLE he; __kmp_str_buf_init(&__kmp_console_buf); AllocConsole(); -// We do not check the result of AllocConsole because -// 1. the call is harmless -// 2. it is not clear how to communicate failue -// 3. we will detect failure later when we get handle(s) + // We do not check the result of AllocConsole because + // 1. the call is harmless + // 2. it is not clear how to communicate failue + // 3. we will detect failure later when we get handle(s) -#ifdef KMP_DEBUG ho = GetStdHandle(STD_OUTPUT_HANDLE); if (ho == INVALID_HANDLE_VALUE || ho == NULL) { @@ -114,7 +108,6 @@ static void __kmp_redirect_output(void) { __kmp_stdout = ho; // temporary code, need new global for ho } -#endif he = GetStdHandle(STD_ERROR_HANDLE); if (he == INVALID_HANDLE_VALUE || he == NULL) { @@ -133,22 +126,22 @@ static void __kmp_redirect_output(void) { #else #define __kmp_stderr (stderr) +#define __kmp_stdout (stdout) #endif /* KMP_OS_WINDOWS */ -void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) { +void __kmp_vprintf(enum kmp_io out_stream, char const *format, va_list ap) { #if KMP_OS_WINDOWS if (!__kmp_console_exists) { __kmp_redirect_output(); } - if (!__kmp_stderr && __kmp_io == kmp_err) { + if (!__kmp_stderr && out_stream == kmp_err) { return; } -#ifdef KMP_DEBUG - if (!__kmp_stdout && __kmp_io == kmp_out) { + if (!__kmp_stdout && out_stream == kmp_out) { return; } -#endif #endif /* KMP_OS_WINDOWS */ + auto stream = ((out_stream == kmp_out) ? __kmp_stdout : __kmp_stderr); if (__kmp_debug_buf && __kmp_debug_buffer != NULL) { @@ -170,14 +163,14 @@ void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) { "overflow; increase " "KMP_DEBUG_BUF_CHARS to %d\n", chars + 1); - WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, - &count, NULL); + WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count, + NULL); __kmp_str_buf_clear(&__kmp_console_buf); #else - fprintf(__kmp_stderr, "OMP warning: Debugging buffer overflow; " - "increase KMP_DEBUG_BUF_CHARS to %d\n", + fprintf(stream, "OMP warning: Debugging buffer overflow; " + "increase KMP_DEBUG_BUF_CHARS to %d\n", chars + 1); - fflush(__kmp_stderr); + fflush(stream); #endif __kmp_debug_buf_warn_chars = chars + 1; } @@ -192,15 +185,15 @@ void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap) { __kmp_str_buf_print(&__kmp_console_buf, "pid=%d: ", (kmp_int32)getpid()); #endif __kmp_str_buf_vprint(&__kmp_console_buf, format, ap); - WriteFile(__kmp_stderr, __kmp_console_buf.str, __kmp_console_buf.used, - &count, NULL); + WriteFile(stream, __kmp_console_buf.str, __kmp_console_buf.used, &count, + NULL); __kmp_str_buf_clear(&__kmp_console_buf); #else #ifdef KMP_DEBUG_PIDS - fprintf(__kmp_stderr, "pid=%d: ", (kmp_int32)getpid()); + fprintf(stream, "pid=%d: ", (kmp_int32)getpid()); #endif - vfprintf(__kmp_stderr, format, ap); - fflush(__kmp_stderr); + vfprintf(stream, format, ap); + fflush(stream); #endif } } @@ -224,3 +217,14 @@ void __kmp_printf_no_lock(char const *format, ...) { va_end(ap); } + +void __kmp_fprintf(enum kmp_io stream, char const *format, ...) { + va_list ap; + va_start(ap, format); + + __kmp_acquire_bootstrap_lock(&__kmp_stdio_lock); + __kmp_vprintf(stream, format, ap); + __kmp_release_bootstrap_lock(&__kmp_stdio_lock); + + va_end(ap); +} diff --git a/runtime/src/kmp_io.h b/runtime/src/kmp_io.h index 84ac67b..dac7a42 100644 --- a/runtime/src/kmp_io.h +++ b/runtime/src/kmp_io.h @@ -26,9 +26,10 @@ extern kmp_bootstrap_lock_t __kmp_stdio_lock; /* Control stdio functions */ extern kmp_bootstrap_lock_t __kmp_console_lock; /* Control console initialization */ -extern void __kmp_vprintf(enum kmp_io __kmp_io, char const *format, va_list ap); +extern void __kmp_vprintf(enum kmp_io stream, char const *format, va_list ap); extern void __kmp_printf(char const *format, ...); extern void __kmp_printf_no_lock(char const *format, ...); +extern void __kmp_fprintf(enum kmp_io stream, char const *format, ...); extern void __kmp_close_console(void); #ifdef __cplusplus diff --git a/runtime/src/kmp_lock.cpp b/runtime/src/kmp_lock.cpp index 16834c6..5c2eeed 100644 --- a/runtime/src/kmp_lock.cpp +++ b/runtime/src/kmp_lock.cpp @@ -1108,7 +1108,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, kmp_int32 need_mf = 1; #if OMPT_SUPPORT - omp_state_t prev_state = omp_state_undefined; + ompt_state_t prev_state = ompt_state_undefined; #endif KA_TRACE(1000, @@ -1216,7 +1216,7 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, #endif #if OMPT_SUPPORT - if (ompt_enabled.enabled && prev_state != omp_state_undefined) { + if (ompt_enabled.enabled && prev_state != ompt_state_undefined) { /* change the state before clearing wait_id */ this_thr->th.ompt_thread_info.state = prev_state; this_thr->th.ompt_thread_info.wait_id = 0; @@ -1231,11 +1231,11 @@ __kmp_acquire_queuing_lock_timed_template(kmp_queuing_lock_t *lck, } #if OMPT_SUPPORT - if (ompt_enabled.enabled && prev_state == omp_state_undefined) { + if (ompt_enabled.enabled && prev_state == ompt_state_undefined) { /* this thread will spin; set wait_id before entering wait state */ prev_state = this_thr->th.ompt_thread_info.state; this_thr->th.ompt_thread_info.wait_id = (uint64_t)lck; - this_thr->th.ompt_thread_info.state = omp_state_wait_lock; + this_thr->th.ompt_thread_info.state = ompt_state_wait_lock; } #endif @@ -1716,7 +1716,9 @@ static void __kmp_set_queuing_lock_flags(kmp_queuing_lock_t *lck, /* RTM Adaptive locks */ -#if KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300 +#if (KMP_COMPILER_ICC && __INTEL_COMPILER >= 1300) || \ + (KMP_COMPILER_MSVC && _MSC_VER >= 1700) || \ + (KMP_COMPILER_CLANG && KMP_MSVC_COMPAT) #include <immintrin.h> #define SOFT_ABORT_MASK (_XABORT_RETRY | _XABORT_CONFLICT | _XABORT_EXPLICIT) @@ -3357,7 +3359,7 @@ static void __kmp_init_nested_futex_lock_with_checks(kmp_futex_lock_t *lck) { #endif static int __kmp_is_ticket_lock_initialized(kmp_ticket_lock_t *lck) { - return lck == lck->lk.initialized; + return lck == lck->lk.self; } static void __kmp_init_ticket_lock_with_checks(kmp_ticket_lock_t *lck) { diff --git a/runtime/src/kmp_lock.h b/runtime/src/kmp_lock.h index 220236d..6a88d7b 100644 --- a/runtime/src/kmp_lock.h +++ b/runtime/src/kmp_lock.h @@ -649,7 +649,7 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, } \ } \ if (lck->tas.lk.poll != 0 || \ - !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ kmp_uint32 spins; \ KMP_FSYNC_PREPARE(lck); \ KMP_INIT_YIELD(spins); \ @@ -659,8 +659,8 @@ extern int (*__kmp_acquire_user_lock_with_checks_)(kmp_user_lock_p lck, } else { \ KMP_YIELD_SPIN(spins); \ } \ - while (lck->tas.lk.poll != 0 || \ - !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ + while (lck->tas.lk.poll != 0 || !__kmp_atomic_compare_store_acq( \ + &lck->tas.lk.poll, 0, gtid + 1)) { \ if (TCR_4(__kmp_nth) > \ (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ KMP_YIELD(TRUE); \ @@ -702,7 +702,7 @@ static inline int __kmp_test_user_lock_with_checks(kmp_user_lock_p lck, } } return ((lck->tas.lk.poll == 0) && - __kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); + __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); } else { KMP_DEBUG_ASSERT(__kmp_test_user_lock_with_checks_ != NULL); return (*__kmp_test_user_lock_with_checks_)(lck, gtid); @@ -767,7 +767,7 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, *depth = KMP_LOCK_ACQUIRED_NEXT; \ } else { \ if ((lck->tas.lk.poll != 0) || \ - !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ kmp_uint32 spins; \ KMP_FSYNC_PREPARE(lck); \ KMP_INIT_YIELD(spins); \ @@ -777,8 +777,9 @@ extern int (*__kmp_acquire_nested_user_lock_with_checks_)(kmp_user_lock_p lck, } else { \ KMP_YIELD_SPIN(spins); \ } \ - while ((lck->tas.lk.poll != 0) || \ - !__kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ + while ( \ + (lck->tas.lk.poll != 0) || \ + !__kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)) { \ if (TCR_4(__kmp_nth) > \ (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \ KMP_YIELD(TRUE); \ @@ -826,7 +827,7 @@ static inline int __kmp_test_nested_user_lock_with_checks(kmp_user_lock_p lck, return ++lck->tas.lk.depth_locked; /* same owner, depth increased */ } retval = ((lck->tas.lk.poll == 0) && - __kmp_compare_and_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); + __kmp_atomic_compare_store_acq(&lck->tas.lk.poll, 0, gtid + 1)); if (retval) { KMP_MB(); lck->tas.lk.depth_locked = 1; diff --git a/runtime/src/kmp_os.h b/runtime/src/kmp_os.h index 93743ad..3c2426b 100644 --- a/runtime/src/kmp_os.h +++ b/runtime/src/kmp_os.h @@ -86,9 +86,12 @@ 128-bit extended precision type yet */ typedef long double _Quad; #elif KMP_COMPILER_GCC +/* GCC on NetBSD lacks __multc3/__divtc3 builtins needed for quad */ +#if !KMP_OS_NETBSD typedef __float128 _Quad; #undef KMP_HAVE_QUAD #define KMP_HAVE_QUAD 1 +#endif #elif KMP_COMPILER_MSVC typedef long double _Quad; #endif @@ -100,7 +103,9 @@ typedef long double _Quad; #endif #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */ +#define KMP_USE_X87CONTROL 0 #if KMP_OS_WINDOWS +#define KMP_END_OF_LINE "\r\n" typedef char kmp_int8; typedef unsigned char kmp_uint8; typedef short kmp_int16; @@ -122,6 +127,10 @@ typedef struct kmp_struct64 kmp_int64; typedef struct kmp_struct64 kmp_uint64; /* Not sure what to use for KMP_[U]INT64_SPEC here */ #endif +#if KMP_ARCH_X86 && KMP_MSVC_COMPAT +#undef KMP_USE_X87CONTROL +#define KMP_USE_X87CONTROL 1 +#endif #if KMP_ARCH_X86_64 #define KMP_INTPTR 1 typedef __int64 kmp_intptr_t; @@ -132,6 +141,7 @@ typedef unsigned __int64 kmp_uintptr_t; #endif /* KMP_OS_WINDOWS */ #if KMP_OS_UNIX +#define KMP_END_OF_LINE "\n" typedef char kmp_int8; typedef unsigned char kmp_uint8; typedef short kmp_int16; @@ -246,7 +256,7 @@ template <> struct traits_t<unsigned long long> { #define KMP_EXPORT extern /* export declaration in guide libraries */ -#if __GNUC__ >= 4 +#if __GNUC__ >= 4 && !defined(__MINGW32__) #define __forceinline __inline #endif @@ -296,7 +306,7 @@ extern "C" { #define KMP_NORETURN __attribute__((noreturn)) #endif -#if KMP_OS_WINDOWS +#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT #define KMP_ALIGN(bytes) __declspec(align(bytes)) #define KMP_THREAD_LOCAL __declspec(thread) #define KMP_ALIAS /* Nothing */ @@ -356,10 +366,12 @@ enum kmp_mem_fence_type { #if KMP_ASM_INTRINS && KMP_OS_WINDOWS +#if KMP_MSVC_COMPAT && !KMP_COMPILER_CLANG #pragma intrinsic(InterlockedExchangeAdd) #pragma intrinsic(InterlockedCompareExchange) #pragma intrinsic(InterlockedExchange) #pragma intrinsic(InterlockedExchange64) +#endif // Using InterlockedIncrement / InterlockedDecrement causes a library loading // ordering problem, so we use InterlockedExchangeAdd instead. diff --git a/runtime/src/kmp_platform.h b/runtime/src/kmp_platform.h index 7610484..bb23de0 100644 --- a/runtime/src/kmp_platform.h +++ b/runtime/src/kmp_platform.h @@ -17,8 +17,10 @@ /* ---------------------- Operating system recognition ------------------- */ #define KMP_OS_LINUX 0 +#define KMP_OS_DRAGONFLY 0 #define KMP_OS_FREEBSD 0 #define KMP_OS_NETBSD 0 +#define KMP_OS_OPENBSD 0 #define KMP_OS_DARWIN 0 #define KMP_OS_WINDOWS 0 #define KMP_OS_CNK 0 @@ -45,6 +47,11 @@ #else #endif +#if (defined __DragonFly__) +#undef KMP_OS_DRAGONFLY +#define KMP_OS_DRAGONFLY 1 +#endif + #if (defined __FreeBSD__) #undef KMP_OS_FREEBSD #define KMP_OS_FREEBSD 1 @@ -55,6 +62,11 @@ #define KMP_OS_NETBSD 1 #endif +#if (defined __OpenBSD__) +#undef KMP_OS_OPENBSD +#define KMP_OS_OPENBSD 1 +#endif + #if (defined __bgq__) #undef KMP_OS_CNK #define KMP_OS_CNK 1 @@ -66,12 +78,13 @@ #endif #if (1 != \ - KMP_OS_LINUX + KMP_OS_FREEBSD + KMP_OS_NETBSD + KMP_OS_DARWIN + \ - KMP_OS_WINDOWS + KMP_OS_HURD) + KMP_OS_LINUX + KMP_OS_DRAGONFLY + KMP_OS_FREEBSD + KMP_OS_NETBSD + \ + KMP_OS_OPENBSD + KMP_OS_DARWIN + KMP_OS_WINDOWS + KMP_OS_HURD) #error Unknown OS #endif -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_DARWIN || KMP_OS_HURD +#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD || KMP_OS_DARWIN || KMP_OS_HURD #undef KMP_OS_UNIX #define KMP_OS_UNIX 1 #endif @@ -88,7 +101,7 @@ #define KMP_ARCH_MIPS64 0 #if KMP_OS_WINDOWS -#if defined _M_AMD64 +#if defined(_M_AMD64) || defined(__x86_64) #undef KMP_ARCH_X86_64 #define KMP_ARCH_X86_64 1 #else diff --git a/runtime/src/kmp_runtime.cpp b/runtime/src/kmp_runtime.cpp index b861c06..3dd9ab6 100644 --- a/runtime/src/kmp_runtime.cpp +++ b/runtime/src/kmp_runtime.cpp @@ -1092,6 +1092,19 @@ static void __kmp_fork_team_threads(kmp_root_t *root, kmp_team_t *team, #endif } +#if OMP_50_ENABLED + if (__kmp_display_affinity && team->t.t_display_affinity != 1) { + for (i = 0; i < team->t.t_nproc; i++) { + kmp_info_t *thr = team->t.t_threads[i]; + if (thr->th.th_prev_num_threads != team->t.t_nproc || + thr->th.th_prev_level != team->t.t_level) { + team->t.t_display_affinity = 1; + break; + } + } + } +#endif + KMP_MB(); } @@ -1213,12 +1226,12 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { ompt_data_t *implicit_task_data; void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid); if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state != omp_state_overhead) { + this_thr->th.ompt_thread_info.state != ompt_state_overhead) { ompt_task_info_t *parent_task_info; parent_task_info = OMPT_CUR_TASK_INFO(this_thr); - parent_task_info->frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + parent_task_info->frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_parallel_begin) { int team_size = 1; @@ -1382,13 +1395,27 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { KMP_CHECK_UPDATE(serial_team->t.t_cancel_request, cancel_noreq); #endif +#if OMP_50_ENABLED + // Perform the display affinity functionality for + // serialized parallel regions + if (__kmp_display_affinity) { + if (this_thr->th.th_prev_level != serial_team->t.t_level || + this_thr->th.th_prev_num_threads != 1) { + // NULL means use the affinity-format-var ICV + __kmp_aux_display_affinity(global_tid, NULL); + this_thr->th.th_prev_level = serial_team->t.t_level; + this_thr->th.th_prev_num_threads = 1; + } + } +#endif + if (__kmp_env_consistency_check) __kmp_push_parallel(global_tid, NULL); #if OMPT_SUPPORT serial_team->t.ompt_team_info.master_return_address = codeptr; if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state != omp_state_overhead) { - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1); + this_thr->th.ompt_thread_info.state != ompt_state_overhead) { + OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); ompt_lw_taskteam_t lw_taskteam; __ompt_lw_taskteam_init(&lw_taskteam, this_thr, global_tid, @@ -1408,8 +1435,8 @@ void __kmp_serialized_parallel(ident_t *loc, kmp_int32 global_tid) { } /* OMPT state */ - this_thr->th.ompt_thread_info.state = omp_state_work_parallel; - OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = OMPT_GET_FRAME_ADDRESS(1); + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; + OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif } @@ -1478,7 +1505,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT ompt_data_t ompt_parallel_data = ompt_data_none; ompt_data_t *parent_task_data; - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; ompt_data_t *implicit_task_data; void *return_address = NULL; @@ -1518,7 +1545,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, parent_task_data, ompt_frame, &ompt_parallel_data, team_size, OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = omp_state_overhead; + master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif @@ -1558,7 +1585,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, if (ompt_enabled.enabled) { __ompt_lw_taskteam_init(&lw_taskteam, master_th, gtid, &ompt_parallel_data, return_address); - exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame); + exit_runtime_p = &(lw_taskteam.ompt_task_info.frame.exit_frame.ptr); __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); // don't use lw_taskteam after linking. content was swaped @@ -1574,7 +1601,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, } /* OMPT state */ - master_th->th.ompt_thread_info.state = omp_state_work_parallel; + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1594,7 +1621,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT *exit_runtime_p = NULL; if (ompt_enabled.enabled) { - OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = NULL; + OMPT_CUR_TASK_INFO(master_th)->frame.exit_frame = ompt_data_none; if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_end, NULL, implicit_task_data, 1, @@ -1607,7 +1634,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, OMPT_CUR_TEAM_DATA(master_th), OMPT_CUR_TASK_DATA(master_th), OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = omp_state_overhead; + master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif return TRUE; @@ -1776,7 +1803,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, // don't use lw_taskteam after linking. content was swaped task_info = OMPT_CUR_TASK_INFO(master_th); - exit_runtime_p = &(task_info->frame.exit_frame); + exit_runtime_p = &(task_info->frame.exit_frame.ptr); if (ompt_enabled.ompt_callback_implicit_task) { ompt_callbacks.ompt_callback(ompt_callback_implicit_task)( ompt_scope_begin, OMPT_CUR_TEAM_DATA(master_th), @@ -1786,7 +1813,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, } /* OMPT state */ - master_th->th.ompt_thread_info.state = omp_state_work_parallel; + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1819,7 +1846,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, OMPT_CUR_TEAM_DATA(master_th), parent_task_data, OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = omp_state_overhead; + master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif } else if (microtask == (microtask_t)__kmp_teams_master) { @@ -1874,7 +1901,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, __ompt_lw_taskteam_link(&lw_taskteam, master_th, 0); // don't use lw_taskteam after linking. content was swaped task_info = OMPT_CUR_TASK_INFO(master_th); - exit_runtime_p = &(task_info->frame.exit_frame); + exit_runtime_p = &(task_info->frame.exit_frame.ptr); /* OMPT implicit task begin */ implicit_task_data = OMPT_CUR_TASK_DATA(master_th); @@ -1887,7 +1914,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, } /* OMPT state */ - master_th->th.ompt_thread_info.state = omp_state_work_parallel; + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; } else { exit_runtime_p = &dummy; } @@ -1920,7 +1947,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, &ompt_parallel_data, parent_task_data, OMPT_INVOKER(call_context), return_address); } - master_th->th.ompt_thread_info.state = omp_state_overhead; + master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif #if OMP_40_ENABLED @@ -1932,7 +1959,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, __ompt_lw_taskteam_init(&lwt, master_th, gtid, &ompt_parallel_data, return_address); - lwt.ompt_task_info.frame.exit_frame = NULL; + lwt.ompt_task_info.frame.exit_frame = ompt_data_none; __ompt_lw_taskteam_link(&lwt, master_th, 1); // don't use lw_taskteam after linking. content was swaped #endif @@ -1948,7 +1975,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, KA_TRACE(20, ("__kmp_fork_call: T#%d serial exit\n", gtid)); KMP_MB(); return FALSE; - } + } // if (nthreads == 1) // GEH: only modify the executing flag in the case when not serialized // serialized case is handled in kmpc_serialized_parallel @@ -2132,6 +2159,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, master_th->th.th_task_state_top++; #if KMP_NESTED_HOT_TEAMS if (master_th->th.th_hot_teams && + active_level < __kmp_hot_teams_max_level && team == master_th->th.th_hot_teams[active_level].hot_team) { // Restore master's nested state if nested hot team master_th->th.th_task_state = @@ -2195,7 +2223,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, &master_th->th.th_current_task->td_icvs, loc); #if OMPT_SUPPORT - master_th->th.ompt_thread_info.state = omp_state_work_parallel; + master_th->th.ompt_thread_info.state = ompt_state_work_parallel; #endif __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock); @@ -2276,7 +2304,7 @@ int __kmp_fork_call(ident_t *loc, int gtid, #if OMPT_SUPPORT if (ompt_enabled.enabled) { - master_th->th.ompt_thread_info.state = omp_state_overhead; + master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif @@ -2288,8 +2316,8 @@ static inline void __kmp_join_restore_state(kmp_info_t *thread, kmp_team_t *team) { // restore state outside the region thread->th.ompt_thread_info.state = - ((team->t.t_serialized) ? omp_state_work_serial - : omp_state_work_parallel); + ((team->t.t_serialized) ? ompt_state_work_serial + : ompt_state_work_parallel); } static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, @@ -2302,7 +2330,7 @@ static inline void __kmp_join_ompt(int gtid, kmp_info_t *thread, codeptr); } - task_info->frame.enter_frame = NULL; + task_info->frame.enter_frame = ompt_data_none; __kmp_join_restore_state(thread, team); } #endif @@ -2337,7 +2365,7 @@ void __kmp_join_call(ident_t *loc, int gtid #if OMPT_SUPPORT if (ompt_enabled.enabled) { - master_th->th.ompt_thread_info.state = omp_state_overhead; + master_th->th.ompt_thread_info.state = ompt_state_overhead; } #endif @@ -2516,7 +2544,7 @@ void __kmp_join_call(ident_t *loc, int gtid OMPT_CUR_TASK_INFO(master_th)->thread_num); } - task_info->frame.exit_frame = NULL; + task_info->frame.exit_frame = ompt_data_none; task_info->task_data = ompt_data_none; } #endif @@ -2649,6 +2677,8 @@ void __kmp_set_num_threads(int new_nth, int gtid) { KMP_COUNT_VALUE(OMP_set_numthreads, new_nth); thread = __kmp_threads[gtid]; + if (thread->th.th_current_task->td_icvs.nproc == new_nth) + return; // nothing to do __kmp_save_internal_controls(thread); @@ -3816,6 +3846,8 @@ int __kmp_register_root(int initial_thread) { #endif /* KMP_AFFINITY_SUPPORTED */ #if OMP_50_ENABLED root_thread->th.th_def_allocator = __kmp_def_allocator; + root_thread->th.th_prev_level = 0; + root_thread->th.th_prev_num_threads = 1; #endif __kmp_root_counter++; @@ -3825,7 +3857,7 @@ int __kmp_register_root(int initial_thread) { kmp_info_t *root_thread = ompt_get_thread(); - ompt_set_thread_state(root_thread, omp_state_overhead); + ompt_set_thread_state(root_thread, ompt_state_overhead); if (ompt_enabled.ompt_callback_thread_begin) { ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( @@ -3839,7 +3871,7 @@ int __kmp_register_root(int initial_thread) { // initial task has nothing to return to } - ompt_set_thread_state(root_thread, omp_state_work_serial); + ompt_set_thread_state(root_thread, ompt_state_work_serial); } #endif @@ -3978,7 +4010,7 @@ void __kmp_unregister_root_current_thread(int gtid) { if (task_team != NULL && task_team->tt.tt_found_proxy_tasks) { #if OMPT_SUPPORT // the runtime is shutting down so we won't report any events - thread->th.ompt_thread_info.state = omp_state_undefined; + thread->th.ompt_thread_info.state = ompt_state_undefined; #endif __kmp_task_team_wait(thread, team USE_ITT_BUILD_ARG(NULL)); } @@ -4357,6 +4389,8 @@ kmp_info_t *__kmp_allocate_thread(kmp_root_t *root, kmp_team_t *team, #endif #if OMP_50_ENABLED new_thr->th.th_def_allocator = __kmp_def_allocator; + new_thr->th.th_prev_level = 0; + new_thr->th.th_prev_num_threads = 1; #endif TCW_4(new_thr->th.th_in_pool, FALSE); @@ -4545,6 +4579,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { th->th.th_first_place = first_place; th->th.th_last_place = last_place; th->th.th_new_place = masters_place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && masters_place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif KA_TRACE(100, ("__kmp_partition_places: master: T#%d(%d:%d) place %d " "partition = [%d,%d]\n", @@ -4578,6 +4618,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { th->th.th_first_place = first_place; th->th.th_last_place = last_place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif KA_TRACE(100, ("__kmp_partition_places: close: T#%d(%d:%d) place %d " "partition = [%d,%d]\n", @@ -4599,6 +4645,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { th->th.th_first_place = first_place; th->th.th_last_place = last_place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif s_count++; if ((s_count == S) && rem && (gap_ct == gap)) { @@ -4667,6 +4719,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { th->th.th_first_place = place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif s_count = 1; while (s_count < S) { if (place == last_place) { @@ -4758,7 +4816,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { th->th.th_first_place = first; th->th.th_new_place = place; th->th.th_last_place = last; - +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif KA_TRACE(100, ("__kmp_partition_places: spread: T#%d(%d:%d) place %d " "partition = [%d,%d], spacing = %.4f\n", @@ -4787,6 +4850,12 @@ static void __kmp_partition_places(kmp_team_t *team, int update_master_only) { th->th.th_first_place = place; th->th.th_last_place = place; th->th.th_new_place = place; +#if OMP_50_ENABLED + if (__kmp_display_affinity && place != th->th.th_current_place && + team->t.t_display_affinity != 1) { + team->t.t_display_affinity = 1; + } +#endif s_count++; if ((s_count == S) && rem && (gap_ct == gap)) { @@ -5601,7 +5670,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { thread_data = &(this_thr->th.ompt_thread_info.thread_data); *thread_data = ompt_data_none; - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; this_thr->th.ompt_thread_info.wait_id = 0; this_thr->th.ompt_thread_info.idle_frame = OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_thread_begin) { @@ -5613,7 +5682,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { #if OMPT_SUPPORT if (ompt_enabled.enabled) { - this_thr->th.ompt_thread_info.state = omp_state_idle; + this_thr->th.ompt_thread_info.state = ompt_state_idle; } #endif /* This is the place where threads wait for work */ @@ -5629,7 +5698,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { #if OMPT_SUPPORT if (ompt_enabled.enabled) { - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; } #endif @@ -5649,7 +5718,7 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { #if OMPT_SUPPORT if (ompt_enabled.enabled) { - this_thr->th.ompt_thread_info.state = omp_state_work_parallel; + this_thr->th.ompt_thread_info.state = ompt_state_work_parallel; } #endif @@ -5664,9 +5733,9 @@ void *__kmp_launch_thread(kmp_info_t *this_thr) { #if OMPT_SUPPORT if (ompt_enabled.enabled) { /* no frame set while outside task */ - __ompt_get_task_info_object(0)->frame.exit_frame = NULL; + __ompt_get_task_info_object(0)->frame.exit_frame = ompt_data_none; - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; } #endif /* join barrier after parallel region */ @@ -6960,7 +7029,7 @@ int __kmp_invoke_task_func(int gtid) { if (ompt_enabled.enabled) { exit_runtime_p = &( - team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame); + team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame.exit_frame.ptr); } else { exit_runtime_p = &dummy; } @@ -7200,10 +7269,10 @@ void __kmp_internal_join(ident_t *id, int gtid, kmp_team_t *team) { __kmp_join_barrier(gtid); /* wait for everyone */ #if OMPT_SUPPORT if (ompt_enabled.enabled && - this_thr->th.ompt_thread_info.state == omp_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state == ompt_state_wait_barrier_implicit) { int ds_tid = this_thr->th.th_info.ds.ds_tid; ompt_data_t *task_data = OMPT_CUR_TASK_DATA(this_thr); - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; #if OMPT_OPTIONAL void *codeptr = NULL; if (KMP_MASTER_TID(ds_tid) && @@ -7407,6 +7476,12 @@ void __kmp_cleanup(void) { __kmp_nested_proc_bind.bind_types = NULL; __kmp_nested_proc_bind.size = 0; __kmp_nested_proc_bind.used = 0; +#if OMP_50_ENABLED + if (__kmp_affinity_format) { + KMP_INTERNAL_FREE(__kmp_affinity_format); + __kmp_affinity_format = NULL; + } +#endif __kmp_i18n_catclose(); @@ -7563,6 +7638,339 @@ void __kmp_aux_set_library(enum library_type arg) { } } +/* Getting team information common for all team API */ +// Returns NULL if not in teams construct +static kmp_team_t *__kmp_aux_get_team_info(int &teams_serialized) { + kmp_info_t *thr = __kmp_entry_thread(); + teams_serialized = 0; + if (thr->th.th_teams_microtask) { + kmp_team_t *team = thr->th.th_team; + int tlevel = thr->th.th_teams_level; // the level of the teams construct + int ii = team->t.t_level; + teams_serialized = team->t.t_serialized; + int level = tlevel + 1; + KMP_DEBUG_ASSERT(ii >= tlevel); + while (ii > level) { + for (teams_serialized = team->t.t_serialized; + (teams_serialized > 0) && (ii > level); teams_serialized--, ii--) { + } + if (team->t.t_serialized && (!teams_serialized)) { + team = team->t.t_parent; + continue; + } + if (ii > level) { + team = team->t.t_parent; + ii--; + } + } + return team; + } + return NULL; +} + +int __kmp_aux_get_team_num() { + int serialized; + kmp_team_t *team = __kmp_aux_get_team_info(serialized); + if (team) { + if (serialized > 1) { + return 0; // teams region is serialized ( 1 team of 1 thread ). + } else { + return team->t.t_master_tid; + } + } + return 0; +} + +int __kmp_aux_get_num_teams() { + int serialized; + kmp_team_t *team = __kmp_aux_get_team_info(serialized); + if (team) { + if (serialized > 1) { + return 1; + } else { + return team->t.t_parent->t.t_nproc; + } + } + return 1; +} + +/* ------------------------------------------------------------------------ */ + +#if OMP_50_ENABLED +/* + * Affinity Format Parser + * + * Field is in form of: %[[[0].]size]type + * % and type are required (%% means print a literal '%') + * type is either single char or long name surrounded by {}, + * e.g., N or {num_threads} + * 0 => leading zeros + * . => right justified when size is specified + * by default output is left justified + * size is the *minimum* field length + * All other characters are printed as is + * + * Available field types: + * L {thread_level} - omp_get_level() + * n {thread_num} - omp_get_thread_num() + * h {host} - name of host machine + * P {process_id} - process id (integer) + * T {thread_identifier} - native thread identifier (integer) + * N {num_threads} - omp_get_num_threads() + * A {ancestor_tnum} - omp_get_ancestor_thread_num(omp_get_level()-1) + * a {thread_affinity} - comma separated list of integers or integer ranges + * (values of affinity mask) + * + * Implementation-specific field types can be added + * If a type is unknown, print "undefined" +*/ + +// Structure holding the short name, long name, and corresponding data type +// for snprintf. A table of these will represent the entire valid keyword +// field types. +typedef struct kmp_affinity_format_field_t { + char short_name; // from spec e.g., L -> thread level + const char *long_name; // from spec thread_level -> thread level + char field_format; // data type for snprintf (typically 'd' or 's' + // for integer or string) +} kmp_affinity_format_field_t; + +static const kmp_affinity_format_field_t __kmp_affinity_format_table[] = { +#if KMP_AFFINITY_SUPPORTED + {'A', "thread_affinity", 's'}, +#endif + {'t', "team_num", 'd'}, + {'T', "num_teams", 'd'}, + {'L', "nesting_level", 'd'}, + {'n', "thread_num", 'd'}, + {'N', "num_threads", 'd'}, + {'a', "ancestor_tnum", 'd'}, + {'H', "host", 's'}, + {'P', "process_id", 'd'}, + {'i', "native_thread_id", 'd'}}; + +// Return the number of characters it takes to hold field +static int __kmp_aux_capture_affinity_field(int gtid, const kmp_info_t *th, + const char **ptr, + kmp_str_buf_t *field_buffer) { + int rc, format_index, field_value; + const char *width_left, *width_right; + bool pad_zeros, right_justify, parse_long_name, found_valid_name; + static const int FORMAT_SIZE = 20; + char format[FORMAT_SIZE] = {0}; + char absolute_short_name = 0; + + KMP_DEBUG_ASSERT(gtid >= 0); + KMP_DEBUG_ASSERT(th); + KMP_DEBUG_ASSERT(**ptr == '%'); + KMP_DEBUG_ASSERT(field_buffer); + + __kmp_str_buf_clear(field_buffer); + + // Skip the initial % + (*ptr)++; + + // Check for %% first + if (**ptr == '%') { + __kmp_str_buf_cat(field_buffer, "%", 1); + (*ptr)++; // skip over the second % + return 1; + } + + // Parse field modifiers if they are present + pad_zeros = false; + if (**ptr == '0') { + pad_zeros = true; + (*ptr)++; // skip over 0 + } + right_justify = false; + if (**ptr == '.') { + right_justify = true; + (*ptr)++; // skip over . + } + // Parse width of field: [width_left, width_right) + width_left = width_right = NULL; + if (**ptr >= '0' && **ptr <= '9') { + width_left = *ptr; + SKIP_DIGITS(*ptr); + width_right = *ptr; + } + + // Create the format for KMP_SNPRINTF based on flags parsed above + format_index = 0; + format[format_index++] = '%'; + if (!right_justify) + format[format_index++] = '-'; + if (pad_zeros) + format[format_index++] = '0'; + if (width_left && width_right) { + int i = 0; + // Only allow 8 digit number widths. + // This also prevents overflowing format variable + while (i < 8 && width_left < width_right) { + format[format_index++] = *width_left; + width_left++; + i++; + } + } + + // Parse a name (long or short) + // Canonicalize the name into absolute_short_name + found_valid_name = false; + parse_long_name = (**ptr == '{'); + if (parse_long_name) + (*ptr)++; // skip initial left brace + for (size_t i = 0; i < sizeof(__kmp_affinity_format_table) / + sizeof(__kmp_affinity_format_table[0]); + ++i) { + char short_name = __kmp_affinity_format_table[i].short_name; + const char *long_name = __kmp_affinity_format_table[i].long_name; + char field_format = __kmp_affinity_format_table[i].field_format; + if (parse_long_name) { + int length = KMP_STRLEN(long_name); + if (strncmp(*ptr, long_name, length) == 0) { + found_valid_name = true; + (*ptr) += length; // skip the long name + } + } else if (**ptr == short_name) { + found_valid_name = true; + (*ptr)++; // skip the short name + } + if (found_valid_name) { + format[format_index++] = field_format; + format[format_index++] = '\0'; + absolute_short_name = short_name; + break; + } + } + if (parse_long_name) { + if (**ptr != '}') { + absolute_short_name = 0; + } else { + (*ptr)++; // skip over the right brace + } + } + + // Attempt to fill the buffer with the requested + // value using snprintf within __kmp_str_buf_print() + switch (absolute_short_name) { + case 't': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_team_num()); + break; + case 'T': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_aux_get_num_teams()); + break; + case 'L': + rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_level); + break; + case 'n': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_tid_from_gtid(gtid)); + break; + case 'H': { + static const int BUFFER_SIZE = 256; + char buf[BUFFER_SIZE]; + __kmp_expand_host_name(buf, BUFFER_SIZE); + rc = __kmp_str_buf_print(field_buffer, format, buf); + } break; + case 'P': + rc = __kmp_str_buf_print(field_buffer, format, getpid()); + break; + case 'i': + rc = __kmp_str_buf_print(field_buffer, format, __kmp_gettid()); + break; + case 'N': + rc = __kmp_str_buf_print(field_buffer, format, th->th.th_team->t.t_nproc); + break; + case 'a': + field_value = + __kmp_get_ancestor_thread_num(gtid, th->th.th_team->t.t_level - 1); + rc = __kmp_str_buf_print(field_buffer, format, field_value); + break; +#if KMP_AFFINITY_SUPPORTED + case 'A': { + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_affinity_str_buf_mask(&buf, th->th.th_affin_mask); + rc = __kmp_str_buf_print(field_buffer, format, buf.str); + __kmp_str_buf_free(&buf); + } break; +#endif + default: + // According to spec, If an implementation does not have info for field + // type, then "undefined" is printed + rc = __kmp_str_buf_print(field_buffer, "%s", "undefined"); + // Skip the field + if (parse_long_name) { + SKIP_TOKEN(*ptr); + if (**ptr == '}') + (*ptr)++; + } else { + (*ptr)++; + } + } + + KMP_ASSERT(format_index <= FORMAT_SIZE); + return rc; +} + +/* + * Return number of characters needed to hold the affinity string + * (not including null byte character) + * The resultant string is printed to buffer, which the caller can then + * handle afterwards +*/ +size_t __kmp_aux_capture_affinity(int gtid, const char *format, + kmp_str_buf_t *buffer) { + const char *parse_ptr; + size_t retval; + const kmp_info_t *th; + kmp_str_buf_t field; + + KMP_DEBUG_ASSERT(buffer); + KMP_DEBUG_ASSERT(gtid >= 0); + + __kmp_str_buf_init(&field); + __kmp_str_buf_clear(buffer); + + th = __kmp_threads[gtid]; + retval = 0; + + // If format is NULL or zero-length string, then we use + // affinity-format-var ICV + parse_ptr = format; + if (parse_ptr == NULL || *parse_ptr == '\0') { + parse_ptr = __kmp_affinity_format; + } + KMP_DEBUG_ASSERT(parse_ptr); + + while (*parse_ptr != '\0') { + // Parse a field + if (*parse_ptr == '%') { + // Put field in the buffer + int rc = __kmp_aux_capture_affinity_field(gtid, th, &parse_ptr, &field); + __kmp_str_buf_catbuf(buffer, &field); + retval += rc; + } else { + // Put literal character in buffer + __kmp_str_buf_cat(buffer, parse_ptr, 1); + retval++; + parse_ptr++; + } + } + __kmp_str_buf_free(&field); + return retval; +} + +// Displays the affinity string to stdout +void __kmp_aux_display_affinity(int gtid, const char *format) { + kmp_str_buf_t buf; + __kmp_str_buf_init(&buf); + __kmp_aux_capture_affinity(gtid, format, &buf); + __kmp_fprintf(kmp_out, "%s" KMP_END_OF_LINE, buf.str); + __kmp_str_buf_free(&buf); +} +#endif // OMP_50_ENABLED + /* ------------------------------------------------------------------------ */ void __kmp_aux_set_blocktime(int arg, kmp_info_t *thread, int tid) { @@ -7667,8 +8075,8 @@ __kmp_determine_reduction_method( #if KMP_ARCH_X86_64 || KMP_ARCH_PPC64 || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS64 -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || \ - KMP_OS_DARWIN || KMP_OS_HURD +#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD int teamsize_cutoff = 4; @@ -7691,8 +8099,8 @@ __kmp_determine_reduction_method( } #else #error "Unknown or unsupported OS" -#endif // KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_WINDOWS || -// KMP_OS_DARWIN +#endif // KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || + // KMP_OS_OPENBSD || KMP_OS_WINDOWS || KMP_OS_DARWIN || KMP_OS_HURD #elif KMP_ARCH_X86 || KMP_ARCH_ARM || KMP_ARCH_AARCH || KMP_ARCH_MIPS diff --git a/runtime/src/kmp_safe_c_api.h b/runtime/src/kmp_safe_c_api.h index 9d0da0b..d894fe3 100644 --- a/runtime/src/kmp_safe_c_api.h +++ b/runtime/src/kmp_safe_c_api.h @@ -11,11 +11,14 @@ #ifndef KMP_SAFE_C_API_H #define KMP_SAFE_C_API_H +#include "kmp_platform.h" +#include <string.h> + // Replacement for banned C API // Not every unsafe call listed here is handled now, but keeping everything // in one place should be handy for future maintenance. -#if KMP_OS_WINDOWS +#if KMP_OS_WINDOWS && KMP_MSVC_COMPAT #define RSIZE_MAX_STR (4UL << 10) // 4KB @@ -57,4 +60,16 @@ #endif // KMP_OS_WINDOWS +// Offer truncated version of strncpy +static inline void __kmp_strncpy_truncate(char *buffer, size_t buf_size, + char const *src, size_t src_size) { + if (src_size >= buf_size) { + src_size = buf_size - 1; + KMP_STRNCPY_S(buffer, buf_size, src, src_size); + buffer[buf_size - 1] = '\0'; + } else { + KMP_STRNCPY_S(buffer, buf_size, src, src_size); + } +} + #endif // KMP_SAFE_C_API_H diff --git a/runtime/src/kmp_settings.cpp b/runtime/src/kmp_settings.cpp index d855de8..6d049e4 100644 --- a/runtime/src/kmp_settings.cpp +++ b/runtime/src/kmp_settings.cpp @@ -410,7 +410,7 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value, int *out_range, char *out_routine, char *out_file, int *out_lb, int *out_ub) { - size_t len = KMP_STRLEN(value + 1); + size_t len = KMP_STRLEN(value) + 1; par_range_to_print = (char *)KMP_INTERNAL_MALLOC(len + 1); KMP_STRNCPY_S(par_range_to_print, len + 1, value, len + 1); __kmp_par_range = +1; @@ -418,7 +418,7 @@ static void __kmp_stg_parse_par_range(char const *name, char const *value, __kmp_par_range_ub = INT_MAX; for (;;) { unsigned int len; - if ((value == NULL) || (*value == '\0')) { + if (*value == '\0') { break; } if (!__kmp_strcasecmp_with_sentinel("routine", value, '=')) { @@ -3252,7 +3252,29 @@ static void __kmp_stg_print_proc_bind(kmp_str_buf_t *buffer, char const *name, #endif /* OMP_40_ENABLED */ #if OMP_50_ENABLED - +static void __kmp_stg_parse_display_affinity(char const *name, + char const *value, void *data) { + __kmp_stg_parse_bool(name, value, &__kmp_display_affinity); +} +static void __kmp_stg_print_display_affinity(kmp_str_buf_t *buffer, + char const *name, void *data) { + __kmp_stg_print_bool(buffer, name, __kmp_display_affinity); +} +static void __kmp_stg_parse_affinity_format(char const *name, char const *value, + void *data) { + size_t length = KMP_STRLEN(value); + __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, value, + length); +} +static void __kmp_stg_print_affinity_format(kmp_str_buf_t *buffer, + char const *name, void *data) { + if (__kmp_env_format) { + KMP_STR_BUF_PRINT_NAME_EX(name); + } else { + __kmp_str_buf_print(buffer, " %s='", name); + } + __kmp_str_buf_print(buffer, "%s'\n", __kmp_affinity_format); +} // OMP_ALLOCATOR sets default allocator static void __kmp_stg_parse_allocator(char const *name, char const *value, void *data) { @@ -4879,7 +4901,12 @@ static kmp_setting_t __kmp_stg_table[] = { #endif #endif // KMP_AFFINITY_SUPPORTED - +#if OMP_50_ENABLED + {"OMP_DISPLAY_AFFINITY", __kmp_stg_parse_display_affinity, + __kmp_stg_print_display_affinity, NULL, 0, 0}, + {"OMP_AFFINITY_FORMAT", __kmp_stg_parse_affinity_format, + __kmp_stg_print_affinity_format, NULL, 0, 0}, +#endif {"KMP_INIT_AT_FORK", __kmp_stg_parse_init_at_fork, __kmp_stg_print_init_at_fork, NULL, 0, 0}, {"KMP_SCHEDULE", __kmp_stg_parse_schedule, __kmp_stg_print_schedule, NULL, @@ -5409,6 +5436,21 @@ void __kmp_env_initialize(char const *string) { } #endif /* OMP_40_ENABLED */ +#if OMP_50_ENABLED + // Set up the affinity format ICV + // Grab the default affinity format string from the message catalog + kmp_msg_t m = + __kmp_msg_format(kmp_i18n_msg_AffFormatDefault, "%P", "%i", "%n", "%A"); + KMP_DEBUG_ASSERT(KMP_STRLEN(m.str) < KMP_AFFINITY_FORMAT_SIZE); + + if (__kmp_affinity_format == NULL) { + __kmp_affinity_format = + (char *)KMP_INTERNAL_MALLOC(sizeof(char) * KMP_AFFINITY_FORMAT_SIZE); + } + KMP_STRCPY_S(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE, m.str); + __kmp_str_free(&m.str); +#endif + // Now process all of the settings. for (i = 0; i < block.count; ++i) { __kmp_stg_parse(block.vars[i].name, block.vars[i].value); @@ -5513,7 +5555,7 @@ void __kmp_env_initialize(char const *string) { // then determine if it is equal to that single group. if (within_one_group) { DWORD num_bits_in_group = __kmp_GetActiveProcessorCount(group); - int num_bits_in_mask = 0; + DWORD num_bits_in_mask = 0; for (int bit = init_mask->begin(); bit != init_mask->end(); bit = init_mask->next(bit)) num_bits_in_mask++; diff --git a/runtime/src/kmp_str.cpp b/runtime/src/kmp_str.cpp index 04c4056..5338edf 100644 --- a/runtime/src/kmp_str.cpp +++ b/runtime/src/kmp_str.cpp @@ -143,13 +143,28 @@ void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len) { KMP_STR_BUF_INVARIANT(buffer); } // __kmp_str_buf_cat -void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, - va_list args) { +void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src) { + KMP_DEBUG_ASSERT(dest); + KMP_DEBUG_ASSERT(src); + KMP_STR_BUF_INVARIANT(dest); + KMP_STR_BUF_INVARIANT(src); + if (!src->str || !src->used) + return; + __kmp_str_buf_reserve(dest, dest->used + src->used + 1); + KMP_MEMCPY(dest->str + dest->used, src->str, src->used); + dest->str[dest->used + src->used] = 0; + dest->used += src->used; + KMP_STR_BUF_INVARIANT(dest); +} // __kmp_str_buf_catbuf + +// Return the number of characters written +int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, + va_list args) { + int rc; KMP_STR_BUF_INVARIANT(buffer); for (;;) { int const free = buffer->size - buffer->used; - int rc; int size; // Try to format string. @@ -198,13 +213,17 @@ void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, KMP_DEBUG_ASSERT(buffer->size > 0); KMP_STR_BUF_INVARIANT(buffer); + return rc; } // __kmp_str_buf_vprint -void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { +// Return the number of characters written +int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...) { + int rc; va_list args; va_start(args, format); - __kmp_str_buf_vprint(buffer, format, args); + rc = __kmp_str_buf_vprint(buffer, format, args); va_end(args); + return rc; } // __kmp_str_buf_print /* The function prints specified size to buffer. Size is expressed using biggest diff --git a/runtime/src/kmp_str.h b/runtime/src/kmp_str.h index 02a2032..c30255d 100644 --- a/runtime/src/kmp_str.h +++ b/runtime/src/kmp_str.h @@ -51,9 +51,10 @@ void __kmp_str_buf_reserve(kmp_str_buf_t *buffer, int size); void __kmp_str_buf_detach(kmp_str_buf_t *buffer); void __kmp_str_buf_free(kmp_str_buf_t *buffer); void __kmp_str_buf_cat(kmp_str_buf_t *buffer, char const *str, int len); -void __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, - va_list args); -void __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...); +void __kmp_str_buf_catbuf(kmp_str_buf_t *dest, const kmp_str_buf_t *src); +int __kmp_str_buf_vprint(kmp_str_buf_t *buffer, char const *format, + va_list args); +int __kmp_str_buf_print(kmp_str_buf_t *buffer, char const *format, ...); void __kmp_str_buf_print_size(kmp_str_buf_t *buffer, size_t size); /* File name parser. diff --git a/runtime/src/kmp_stub.cpp b/runtime/src/kmp_stub.cpp index e26e084..c1f3bf3 100644 --- a/runtime/src/kmp_stub.cpp +++ b/runtime/src/kmp_stub.cpp @@ -35,6 +35,10 @@ #define omp_set_num_threads ompc_set_num_threads #define omp_set_dynamic ompc_set_dynamic #define omp_set_nested ompc_set_nested +#define omp_set_affinity_format ompc_set_affinity_format +#define omp_get_affinity_format ompc_get_affinity_format +#define omp_display_affinity ompc_display_affinity +#define omp_capture_affinity ompc_capture_affinity #define kmp_set_stacksize kmpc_set_stacksize #define kmp_set_stacksize_s kmpc_set_stacksize_s #define kmp_set_blocktime kmpc_set_blocktime @@ -350,6 +354,17 @@ const omp_allocator_t *omp_low_lat_mem_alloc = (const omp_allocator_t *)5; const omp_allocator_t *omp_cgroup_mem_alloc = (const omp_allocator_t *)6; const omp_allocator_t *omp_pteam_mem_alloc = (const omp_allocator_t *)7; const omp_allocator_t *omp_thread_mem_alloc = (const omp_allocator_t *)8; +/* OpenMP 5.0 Affinity Format */ +void omp_set_affinity_format(char const *format) { i; } +size_t omp_get_affinity_format(char *buffer, size_t size) { + i; + return 0; +} +void omp_display_affinity(char const *format) { i; } +size_t omp_capture_affinity(char *buffer, size_t buf_size, char const *format) { + i; + return 0; +} #endif /* OMP_50_ENABLED */ // end of file // diff --git a/runtime/src/kmp_taskdeps.cpp b/runtime/src/kmp_taskdeps.cpp index b48c5b6..6c810dd 100644 --- a/runtime/src/kmp_taskdeps.cpp +++ b/runtime/src/kmp_taskdeps.cpp @@ -466,9 +466,9 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, #if OMPT_SUPPORT if (ompt_enabled.enabled) { OMPT_STORE_RETURN_ADDRESS(gtid); - if (!current_task->ompt_task_info.frame.enter_frame) - current_task->ompt_task_info.frame.enter_frame = - OMPT_GET_FRAME_ADDRESS(1); + if (!current_task->ompt_task_info.frame.enter_frame.ptr) + current_task->ompt_task_info.frame.enter_frame.ptr = + OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -479,7 +479,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, OMPT_LOAD_RETURN_ADDRESS(gtid)); } - new_taskdata->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(0); + new_taskdata->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #if OMPT_OPTIONAL @@ -566,7 +566,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, gtid, loc_ref, new_taskdata)); #if OMPT_SUPPORT if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = NULL; + current_task->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif return TASK_CURRENT_NOT_QUEUED; @@ -586,7 +586,7 @@ kmp_int32 __kmpc_omp_task_with_deps(ident_t *loc_ref, kmp_int32 gtid, kmp_int32 ret = __kmp_omp_task(gtid, new_task, true); #if OMPT_SUPPORT if (ompt_enabled.enabled) { - current_task->ompt_task_info.frame.enter_frame = NULL; + current_task->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif return ret; diff --git a/runtime/src/kmp_tasking.cpp b/runtime/src/kmp_tasking.cpp index 2d74686..9c61a12 100644 --- a/runtime/src/kmp_tasking.cpp +++ b/runtime/src/kmp_tasking.cpp @@ -547,8 +547,10 @@ static void __kmp_task_start(kmp_int32 gtid, kmp_task_t *task, static inline void __ompt_task_init(kmp_taskdata_t *task, int tid) { // The calls to __ompt_task_init already have the ompt_enabled condition. task->ompt_task_info.task_data.value = 0; - task->ompt_task_info.frame.exit_frame = NULL; - task->ompt_task_info.frame.enter_frame = NULL; + task->ompt_task_info.frame.exit_frame = ompt_data_none; + task->ompt_task_info.frame.enter_frame = ompt_data_none; + task->ompt_task_info.frame.exit_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; + task->ompt_task_info.frame.enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; #if OMP_40_ENABLED task->ompt_task_info.ndeps = 0; task->ompt_task_info.deps = NULL; @@ -627,9 +629,11 @@ static void __kmpc_omp_task_begin_if0_template(ident_t *loc_ref, kmp_int32 gtid, #if OMPT_SUPPORT if (ompt) { - if (current_task->ompt_task_info.frame.enter_frame == NULL) { - current_task->ompt_task_info.frame.enter_frame = - taskdata->ompt_task_info.frame.exit_frame = frame_address; + if (current_task->ompt_task_info.frame.enter_frame.ptr == NULL) { + current_task->ompt_task_info.frame.enter_frame.ptr = + taskdata->ompt_task_info.frame.exit_frame.ptr = frame_address; + current_task->ompt_task_info.frame.enter_frame_flags = + taskdata->ompt_task_info.frame.exit_frame_flags = ompt_frame_application | ompt_frame_framepointer; } if (ompt_enabled.ompt_callback_task_create) { ompt_task_info_t *parent_info = &(current_task->ompt_task_info); @@ -811,8 +815,10 @@ static void __kmp_task_finish(kmp_int32 gtid, kmp_task_t *task, kmp_taskdata_t *resumed_task) { kmp_taskdata_t *taskdata = KMP_TASK_TO_TASKDATA(task); kmp_info_t *thread = __kmp_threads[gtid]; +#if OMP_45_ENABLED kmp_task_team_t *task_team = thread->th.th_task_team; // might be NULL for serial teams... +#endif // OMP_45_ENABLED kmp_int32 children = 0; KA_TRACE(10, ("__kmp_task_finish(enter): T#%d finishing task %p and resuming " @@ -964,9 +970,10 @@ static void __kmpc_omp_task_complete_if0_template(ident_t *loc_ref, #if OMPT_SUPPORT if (ompt) { - omp_frame_t *ompt_frame; + ompt_frame_t *ompt_frame; __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL); - ompt_frame->enter_frame = NULL; + ompt_frame->enter_frame = ompt_data_none; + ompt_frame->enter_frame_flags = ompt_frame_runtime | ompt_frame_framepointer; } #endif @@ -1392,6 +1399,28 @@ kmp_task_t *__kmpc_omp_task_alloc(ident_t *loc_ref, kmp_int32 gtid, return retval; } +#if OMP_50_ENABLED +/*! +@ingroup TASKING +@param loc_ref location of the original task directive +@param gtid Global Thread ID of encountering thread +@param new_task task thunk allocated by __kmpc_omp_task_alloc() for the ''new +task'' +@param naffins Number of affinity items +@param affin_list List of affinity items +@return Returns non-zero if registering affinity information was not successful. + Returns 0 if registration was successful +This entry registers the affinity information attached to a task with the task +thunk structure kmp_taskdata_t. +*/ +kmp_int32 +__kmpc_omp_reg_task_with_affinity(ident_t *loc_ref, kmp_int32 gtid, + kmp_task_t *new_task, kmp_int32 naffins, + kmp_task_affinity_info_t *affin_list) { + return 0; +} +#endif + // __kmp_invoke_task: invoke the specified task // // gtid: global thread ID of caller @@ -1438,9 +1467,9 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, oldInfo = thread->th.ompt_thread_info; thread->th.ompt_thread_info.wait_id = 0; thread->th.ompt_thread_info.state = (thread->th.th_team_serialized) - ? omp_state_work_serial - : omp_state_work_parallel; - taskdata->ompt_task_info.frame.exit_frame = OMPT_GET_FRAME_ADDRESS(0); + ? ompt_state_work_serial + : ompt_state_work_parallel; + taskdata->ompt_task_info.frame.exit_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } #endif @@ -1566,7 +1595,7 @@ static void __kmp_invoke_task(kmp_int32 gtid, kmp_task_t *task, if (UNLIKELY(ompt_enabled.enabled)) { thread->th.ompt_thread_info = oldInfo; if (taskdata->td_flags.tiedness == TASK_TIED) { - taskdata->ompt_task_info.frame.exit_frame = NULL; + taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; } __kmp_task_finish<true>(gtid, task, current_task); } else @@ -1634,7 +1663,7 @@ kmp_int32 __kmpc_omp_task_parts(ident_t *loc_ref, kmp_int32 gtid, ANNOTATE_HAPPENS_BEFORE(new_task); #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled)) { - parent->ompt_task_info.frame.enter_frame = NULL; + parent->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif return TASK_CURRENT_NOT_QUEUED; @@ -1703,8 +1732,8 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, if (!new_taskdata->td_flags.started) { OMPT_STORE_RETURN_ADDRESS(gtid); parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame) { - parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (!parent->ompt_task_info.frame.enter_frame.ptr) { + parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); } if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; @@ -1721,7 +1750,7 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, __ompt_task_finish(new_task, new_taskdata->ompt_task_info.scheduling_parent, ompt_task_switch); - new_taskdata->ompt_task_info.frame.exit_frame = NULL; + new_taskdata->ompt_task_info.frame.exit_frame = ompt_data_none; } } #endif @@ -1733,7 +1762,7 @@ kmp_int32 __kmpc_omp_task(ident_t *loc_ref, kmp_int32 gtid, gtid, loc_ref, new_taskdata)); #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { - parent->ompt_task_info.frame.enter_frame = NULL; + parent->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif return res; @@ -1767,8 +1796,8 @@ kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, kmp_taskdata_t *parent = NULL; if (UNLIKELY(ompt_enabled.enabled && !new_taskdata->td_flags.started)) { parent = new_taskdata->td_parent; - if (!parent->ompt_task_info.frame.enter_frame) - parent->ompt_task_info.frame.enter_frame = OMPT_GET_FRAME_ADDRESS(1); + if (!parent->ompt_task_info.frame.enter_frame.ptr) + parent->ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0); if (ompt_enabled.ompt_callback_task_create) { ompt_data_t task_data = ompt_data_none; ompt_callbacks.ompt_callback(ompt_callback_task_create)( @@ -1788,7 +1817,7 @@ kmp_int32 __kmp_omp_taskloop_task(ident_t *loc_ref, kmp_int32 gtid, gtid, loc_ref, new_taskdata)); #if OMPT_SUPPORT if (UNLIKELY(ompt_enabled.enabled && parent != NULL)) { - parent->ompt_task_info.frame.enter_frame = NULL; + parent->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif return res; @@ -1817,7 +1846,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, my_task_data = &(taskdata->ompt_task_info.task_data); my_parallel_data = OMPT_CUR_TEAM_DATA(thread); - taskdata->ompt_task_info.frame.enter_frame = frame_address; + taskdata->ompt_task_info.frame.enter_frame.ptr = frame_address; if (ompt_enabled.ompt_callback_sync_region) { ompt_callbacks.ompt_callback(ompt_callback_sync_region)( @@ -1886,7 +1915,7 @@ static kmp_int32 __kmpc_omp_taskwait_template(ident_t *loc_ref, kmp_int32 gtid, ompt_sync_region_taskwait, ompt_scope_end, my_parallel_data, my_task_data, return_address); } - taskdata->ompt_task_info.frame.enter_frame = NULL; + taskdata->ompt_task_info.frame.enter_frame = ompt_data_none; } #endif // OMPT_SUPPORT && OMPT_OPTIONAL @@ -1916,7 +1945,7 @@ kmp_int32 __kmpc_omp_taskwait(ident_t *loc_ref, kmp_int32 gtid) { #if OMPT_SUPPORT && OMPT_OPTIONAL if (UNLIKELY(ompt_enabled.enabled)) { OMPT_STORE_RETURN_ADDRESS(gtid); - return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(1), + return __kmpc_omp_taskwait_ompt(loc_ref, gtid, OMPT_GET_FRAME_ADDRESS(0), OMPT_LOAD_RETURN_ADDRESS(gtid)); } #endif diff --git a/runtime/src/kmp_utility.cpp b/runtime/src/kmp_utility.cpp index 06090e6..dc4c714 100644 --- a/runtime/src/kmp_utility.cpp +++ b/runtime/src/kmp_utility.cpp @@ -375,7 +375,11 @@ void __kmp_expand_file_name(char *result, size_t rlen, char *pattern) { case 'I': case 'i': { pid_t id = getpid(); +#if KMP_ARCH_X86_64 && defined(__MINGW32__) + snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*lld", width, id); +#else snp_result = KMP_SNPRINTF(pos, end - pos + 1, "%0*d", width, id); +#endif if (snp_result >= 0 && snp_result <= end - pos) { while (*pos) ++pos; diff --git a/runtime/src/kmp_wait_release.h b/runtime/src/kmp_wait_release.h index ec489d1..e2984a8 100644 --- a/runtime/src/kmp_wait_release.h +++ b/runtime/src/kmp_wait_release.h @@ -120,12 +120,12 @@ public: #if OMPT_SUPPORT static inline void __ompt_implicit_task_end(kmp_info_t *this_thr, - omp_state_t omp_state, + ompt_state_t ompt_state, ompt_data_t *tId, ompt_data_t *pId) { int ds_tid = this_thr->th.th_info.ds.ds_tid; - if (omp_state == omp_state_wait_barrier_implicit) { - this_thr->th.ompt_thread_info.state = omp_state_overhead; + if (ompt_state == ompt_state_wait_barrier_implicit) { + this_thr->th.ompt_thread_info.state = ompt_state_overhead; #if OMPT_OPTIONAL void *codeptr = NULL; if (ompt_enabled.ompt_callback_sync_region_wait) { @@ -143,9 +143,9 @@ static inline void __ompt_implicit_task_end(kmp_info_t *this_thr, ompt_scope_end, NULL, tId, 0, ds_tid); } // return to idle state - this_thr->th.ompt_thread_info.state = omp_state_idle; + this_thr->th.ompt_thread_info.state = ompt_state_idle; } else { - this_thr->th.ompt_thread_info.state = omp_state_overhead; + this_thr->th.ompt_thread_info.state = ompt_state_overhead; } } } @@ -199,27 +199,27 @@ THIS function is called from function. Events are triggered in the calling code (__kmp_barrier): - state := omp_state_overhead + state := ompt_state_overhead barrier-begin barrier-wait-begin - state := omp_state_wait_barrier + state := ompt_state_wait_barrier call join-barrier-implementation (finally arrive here) {} call fork-barrier-implementation (finally arrive here) {} - state := omp_state_overhead + state := ompt_state_overhead barrier-wait-end barrier-end - state := omp_state_work_parallel + state := ompt_state_work_parallel __kmp_fork_barrier (after thread creation, before executing implicit task) call fork-barrier-implementation (finally arrive here) - {} // worker arrive here with state = omp_state_idle + {} // worker arrive here with state = ompt_state_idle __kmp_join_barrier (implicit barrier at end of parallel region) - state := omp_state_barrier_implicit + state := ompt_state_barrier_implicit barrier-begin barrier-wait-begin call join-barrier-implementation (finally arrive here @@ -234,19 +234,19 @@ final_spin=FALSE) barrier-end implicit-task-end idle-begin - state := omp_state_idle + state := ompt_state_idle - Before leaving, if state = omp_state_idle + Before leaving, if state = ompt_state_idle idle-end - state := omp_state_overhead + state := ompt_state_overhead */ #if OMPT_SUPPORT - omp_state_t ompt_entry_state; + ompt_state_t ompt_entry_state; ompt_data_t *pId = NULL; ompt_data_t *tId; if (ompt_enabled.enabled) { ompt_entry_state = this_thr->th.ompt_thread_info.state; - if (!final_spin || ompt_entry_state != omp_state_wait_barrier_implicit || + if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit || KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) { ompt_lw_taskteam_t *team = this_thr->th.th_team->t.ompt_serialized_team_info; @@ -432,16 +432,16 @@ final_spin=FALSE) } #if OMPT_SUPPORT - omp_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; - if (ompt_enabled.enabled && ompt_exit_state != omp_state_undefined) { + ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state; + if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) { #if OMPT_OPTIONAL if (final_spin) { __ompt_implicit_task_end(this_thr, ompt_exit_state, tId, pId); ompt_exit_state = this_thr->th.ompt_thread_info.state; } #endif - if (ompt_exit_state == omp_state_idle) { - this_thr->th.ompt_thread_info.state = omp_state_overhead; + if (ompt_exit_state == ompt_state_idle) { + this_thr->th.ompt_thread_info.state = ompt_state_overhead; } } #endif diff --git a/runtime/src/kmp_wrapper_getpid.h b/runtime/src/kmp_wrapper_getpid.h index 5b4081a..47e2728 100644 --- a/runtime/src/kmp_wrapper_getpid.h +++ b/runtime/src/kmp_wrapper_getpid.h @@ -24,6 +24,9 @@ #if KMP_OS_DARWIN // OS X #define __kmp_gettid() syscall(SYS_thread_selfid) +#elif KMP_OS_NETBSD +#include <lwp.h> +#define __kmp_gettid() _lwp_self() #elif defined(SYS_gettid) // Hopefully other Unix systems define SYS_gettid syscall for getting os thread // id @@ -39,7 +42,9 @@ // "process.h". #include <process.h> // Let us simulate Unix. +#if KMP_MSVC_COMPAT typedef int pid_t; +#endif #define getpid _getpid #define __kmp_gettid() GetCurrentThreadId() diff --git a/runtime/src/kmp_wrapper_malloc.h b/runtime/src/kmp_wrapper_malloc.h index cf6f2be..c8d2c70 100644 --- a/runtime/src/kmp_wrapper_malloc.h +++ b/runtime/src/kmp_wrapper_malloc.h @@ -93,8 +93,10 @@ // Include alloca() declaration. #if KMP_OS_WINDOWS #include <malloc.h> // Windows* OS: _alloca() declared in "malloc.h". +#if KMP_MSVC_COMPAT #define alloca _alloca // Allow to use alloca() with no underscore. -#elif KMP_OS_FREEBSD || KMP_OS_NETBSD +#endif +#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_OPENBSD // Declared in "stdlib.h". #elif KMP_OS_UNIX #include <alloca.h> // Linux* OS and OS X*: alloc() declared in "alloca". diff --git a/runtime/src/libomp.rc.var b/runtime/src/libomp.rc.var index cf6a9c9..32449e2 100644 --- a/runtime/src/libomp.rc.var +++ b/runtime/src/libomp.rc.var @@ -11,7 +11,7 @@ ////===----------------------------------------------------------------------===// // -#include "winres.h" +#include "winresrc.h" #include "kmp_config.h" LANGUAGE LANG_ENGLISH, SUBLANG_ENGLISH_US // English (U.S.) resources diff --git a/runtime/src/ompt-general.cpp b/runtime/src/ompt-general.cpp index 8da5610..705b452 100644 --- a/runtime/src/ompt-general.cpp +++ b/runtime/src/ompt-general.cpp @@ -52,8 +52,8 @@ typedef struct { const char *state_name; - omp_state_t state_id; -} omp_state_info_t; + ompt_state_t state_id; +} ompt_state_info_t; typedef struct { const char *name; @@ -73,10 +73,10 @@ enum tool_setting_e { ompt_callbacks_active_t ompt_enabled; -omp_state_info_t omp_state_info[] = { -#define omp_state_macro(state, code) {#state, state}, - FOREACH_OMP_STATE(omp_state_macro) -#undef omp_state_macro +ompt_state_info_t ompt_state_info[] = { +#define ompt_state_macro(state, code) {#state, state}, + FOREACH_OMPT_STATE(ompt_state_macro) +#undef ompt_state_macro }; kmp_mutex_impl_info_t kmp_mutex_impl_info[] = { @@ -353,7 +353,7 @@ void ompt_post_init() { kmp_info_t *root_thread = ompt_get_thread(); - ompt_set_thread_state(root_thread, omp_state_overhead); + ompt_set_thread_state(root_thread, ompt_state_overhead); if (ompt_enabled.ompt_callback_thread_begin) { ompt_callbacks.ompt_callback(ompt_callback_thread_begin)( @@ -366,7 +366,7 @@ void ompt_post_init() { NULL, NULL, task_data, ompt_task_initial, 0, NULL); } - ompt_set_thread_state(root_thread, omp_state_work_serial); + ompt_set_thread_state(root_thread, ompt_state_work_serial); } } @@ -388,13 +388,13 @@ void ompt_fini() { OMPT_API_ROUTINE int ompt_enumerate_states(int current_state, int *next_state, const char **next_state_name) { - const static int len = sizeof(omp_state_info) / sizeof(omp_state_info_t); + const static int len = sizeof(ompt_state_info) / sizeof(ompt_state_info_t); int i = 0; for (i = 0; i < len - 1; i++) { - if (omp_state_info[i].state_id == current_state) { - *next_state = omp_state_info[i + 1].state_id; - *next_state_name = omp_state_info[i + 1].state_name; + if (ompt_state_info[i].state_id == current_state) { + *next_state = ompt_state_info[i + 1].state_id; + *next_state_name = ompt_state_info[i + 1].state_name; return 1; } } @@ -482,11 +482,11 @@ OMPT_API_ROUTINE int ompt_get_parallel_info(int ancestor_level, team_size); } -OMPT_API_ROUTINE omp_state_t ompt_get_state(omp_wait_id_t *wait_id) { - omp_state_t thread_state = __ompt_get_state_internal(wait_id); +OMPT_API_ROUTINE ompt_state_t ompt_get_state(ompt_wait_id_t *wait_id) { + ompt_state_t thread_state = __ompt_get_state_internal(wait_id); - if (thread_state == omp_state_undefined) { - thread_state = omp_state_work_serial; + if (thread_state == ompt_state_undefined) { + thread_state = ompt_state_work_serial; } return thread_state; @@ -502,7 +502,7 @@ OMPT_API_ROUTINE ompt_data_t *ompt_get_thread_data(void) { OMPT_API_ROUTINE int ompt_get_task_info(int ancestor_level, int *type, ompt_data_t **task_data, - omp_frame_t **task_frame, + ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num) { return __ompt_get_task_info_internal(ancestor_level, type, task_data, diff --git a/runtime/src/ompt-internal.h b/runtime/src/ompt-internal.h index 6b92eaa..c6823fc 100644 --- a/runtime/src/ompt-internal.h +++ b/runtime/src/ompt-internal.h @@ -54,7 +54,7 @@ typedef struct ompt_callbacks_active_s { (info->td_flags.merged_if0 ? ompt_task_mergeable : 0x0) typedef struct { - omp_frame_t frame; + ompt_frame_t frame; ompt_data_t task_data; struct kmp_taskdata *scheduling_parent; int thread_num; @@ -81,8 +81,8 @@ typedef struct { ompt_data_t task_data; /* stored here from implicit barrier-begin until implicit-task-end */ void *return_address; /* stored here on entry of runtime */ - omp_state_t state; - omp_wait_id_t wait_id; + ompt_state_t state; + ompt_wait_id_t wait_id; int ompt_task_yielded; void *idle_frame; } ompt_thread_info_t; diff --git a/runtime/src/ompt-specific.cpp b/runtime/src/ompt-specific.cpp index 23d09aa..cc4f1de 100644 --- a/runtime/src/ompt-specific.cpp +++ b/runtime/src/ompt-specific.cpp @@ -211,10 +211,10 @@ ompt_data_t *__ompt_get_thread_data_internal() { void __ompt_thread_assign_wait_id(void *variable) { kmp_info_t *ti = ompt_get_thread(); - ti->th.ompt_thread_info.wait_id = (omp_wait_id_t)variable; + ti->th.ompt_thread_info.wait_id = (ompt_wait_id_t)variable; } -omp_state_t __ompt_get_state_internal(omp_wait_id_t *omp_wait_id) { +ompt_state_t __ompt_get_state_internal(ompt_wait_id_t *omp_wait_id) { kmp_info_t *ti = ompt_get_thread(); if (ti) { @@ -222,7 +222,7 @@ omp_state_t __ompt_get_state_internal(omp_wait_id_t *omp_wait_id) { *omp_wait_id = ti->th.ompt_thread_info.wait_id; return ti->th.ompt_thread_info.state; } - return omp_state_undefined; + return ompt_state_undefined; } //---------------------------------------------------------- @@ -259,8 +259,8 @@ void __ompt_lw_taskteam_init(ompt_lw_taskteam_t *lwt, kmp_info_t *thr, int gtid, lwt->ompt_team_info.parallel_data = *ompt_pid; lwt->ompt_team_info.master_return_address = codeptr; lwt->ompt_task_info.task_data.value = 0; - lwt->ompt_task_info.frame.enter_frame = NULL; - lwt->ompt_task_info.frame.exit_frame = NULL; + lwt->ompt_task_info.frame.enter_frame = ompt_data_none; + lwt->ompt_task_info.frame.exit_frame = ompt_data_none; lwt->ompt_task_info.scheduling_parent = NULL; lwt->ompt_task_info.deps = NULL; lwt->ompt_task_info.ndeps = 0; @@ -328,7 +328,7 @@ void __ompt_lw_taskteam_unlink(kmp_info_t *thr) { int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, - omp_frame_t **task_frame, + ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num) { if (__kmp_get_gtid() < 0) diff --git a/runtime/src/ompt-specific.h b/runtime/src/ompt-specific.h index 8cf7450..317580f 100644 --- a/runtime/src/ompt-specific.h +++ b/runtime/src/ompt-specific.h @@ -41,7 +41,7 @@ int __ompt_get_parallel_info_internal(int ancestor_level, int __ompt_get_task_info_internal(int ancestor_level, int *type, ompt_data_t **task_data, - omp_frame_t **task_frame, + ompt_frame_t **task_frame, ompt_data_t **parallel_data, int *thread_num); ompt_data_t *__ompt_get_thread_data_internal(); @@ -93,7 +93,7 @@ inline kmp_info_t *ompt_get_thread() { return ompt_get_thread_gtid(gtid); } -inline void ompt_set_thread_state(kmp_info_t *thread, omp_state_t state) { +inline void ompt_set_thread_state(kmp_info_t *thread, ompt_state_t state) { thread->th.ompt_thread_info.state = state; } diff --git a/runtime/src/thirdparty/ittnotify/ittnotify_static.c b/runtime/src/thirdparty/ittnotify/ittnotify_static.c index 75ef966..63e1b0c 100644 --- a/runtime/src/thirdparty/ittnotify/ittnotify_static.c +++ b/runtime/src/thirdparty/ittnotify/ittnotify_static.c @@ -12,7 +12,11 @@ #include "ittnotify_config.h" #if ITT_PLATFORM==ITT_PLATFORM_WIN +#if defined(__MINGW32__) +#include <limits.h> +#else #define PATH_MAX 512 +#endif #else /* ITT_PLATFORM!=ITT_PLATFORM_WIN */ #include <limits.h> #include <dlfcn.h> @@ -28,7 +32,9 @@ #include "ittnotify.h" #include "legacy/ittnotify.h" +#if KMP_MSVC_COMPAT #include "disable_warnings.h" +#endif static const char api_version[] = API_VERSION "\0\n@(#) $Revision: 481659 $\n"; @@ -194,7 +200,7 @@ static __itt_group_alias group_alias[] = { #pragma pack(pop) -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT #pragma warning(push) #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -217,7 +223,7 @@ static __itt_api_info api_list[] = { {NULL, NULL, NULL, NULL, __itt_group_none} }; -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -252,7 +258,7 @@ typedef void (__itt_api_fini_t)(__itt_global*); ITT_EXTERN_C void _N_(error_handler)(__itt_error_code, va_list args); #endif /* ITT_NOTIFY_EXT_REPORT */ -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT #pragma warning(push) #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -278,7 +284,7 @@ static void __itt_report_error(unsigned code_arg, ...) va_end(args); } -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ @@ -1013,7 +1019,7 @@ static void __itt_nullify_all_pointers(void) *_N_(_ittapi_global).api_list_ptr[i].func_ptr = _N_(_ittapi_global).api_list_ptr[i].null_func; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT #pragma warning(push) #pragma warning(disable: 4054) /* warning C4054: 'type cast' : from function pointer 'XXX' to data pointer 'void *' */ #pragma warning(disable: 4055) /* warning C4055: 'type cast' : from data pointer 'void *' to function pointer 'XXX' */ @@ -1191,7 +1197,6 @@ ITT_EXTERN_C __itt_error_handler_t* _N_(set_error_handler)(__itt_error_handler_t return prev; } -#if ITT_PLATFORM==ITT_PLATFORM_WIN +#if ITT_PLATFORM==ITT_PLATFORM_WIN && KMP_MSVC_COMPAT #pragma warning(pop) #endif /* ITT_PLATFORM==ITT_PLATFORM_WIN */ - diff --git a/runtime/src/z_Linux_util.cpp b/runtime/src/z_Linux_util.cpp index a8d9324..ab9c353 100644 --- a/runtime/src/z_Linux_util.cpp +++ b/runtime/src/z_Linux_util.cpp @@ -22,7 +22,7 @@ #include "kmp_wait_release.h" #include "kmp_wrapper_getpid.h" -#if !KMP_OS_FREEBSD && !KMP_OS_NETBSD +#if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD #include <alloca.h> #endif #include <math.h> // HUGE_VAL. @@ -50,8 +50,11 @@ #elif KMP_OS_DARWIN #include <mach/mach.h> #include <sys/sysctl.h> -#elif KMP_OS_FREEBSD +#elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD #include <pthread_np.h> +#elif KMP_OS_NETBSD +#include <sys/types.h> +#include <sys/sysctl.h> #endif #include <ctype.h> @@ -444,7 +447,8 @@ void __kmp_terminate_thread(int gtid) { determined exactly, FALSE if incremental refinement is necessary. */ static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) { int stack_data; -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD +#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_HURD pthread_attr_t attr; int status; size_t size = 0; @@ -458,7 +462,7 @@ static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) { /* Fetch the real thread attributes */ status = pthread_attr_init(&attr); KMP_CHECK_SYSFAIL("pthread_attr_init", status); -#if KMP_OS_FREEBSD || KMP_OS_NETBSD +#if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD status = pthread_attr_get_np(pthread_self(), &attr); KMP_CHECK_SYSFAIL("pthread_attr_get_np", status); #else @@ -482,7 +486,8 @@ static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) { TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE); return TRUE; } -#endif /* KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD */ +#endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || + KMP_OS_HURD */ /* Use incremental refinement starting from initial conservative estimate */ TCW_PTR(th->th.th_info.ds.ds_stacksize, 0); TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data); @@ -496,7 +501,8 @@ static void *__kmp_launch_worker(void *thr) { sigset_t new_set, old_set; #endif /* KMP_BLOCK_SIGNALS */ void *exit_val; -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD +#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD || KMP_OS_HURD void *volatile padding = 0; #endif int gtid; @@ -544,7 +550,8 @@ static void *__kmp_launch_worker(void *thr) { KMP_CHECK_SYSFAIL("pthread_sigmask", status); #endif /* KMP_BLOCK_SIGNALS */ -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD +#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD if (__kmp_stkoffset > 0 && gtid > 0) { padding = KMP_ALLOCA(gtid * __kmp_stkoffset); } @@ -1771,7 +1778,8 @@ static int __kmp_get_xproc(void) { int r = 0; -#if KMP_OS_LINUX || KMP_OS_FREEBSD || KMP_OS_NETBSD || KMP_OS_HURD +#if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \ + KMP_OS_OPENBSD || KMP_OS_HURD r = sysconf(_SC_NPROCESSORS_ONLN); @@ -1934,20 +1942,27 @@ void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; } kmp_uint64 __kmp_now_nsec() { struct timeval t; gettimeofday(&t, NULL); - return KMP_NSEC_PER_SEC * t.tv_sec + 1000 * t.tv_usec; + kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec + + (kmp_uint64)1000 * (kmp_uint64)t.tv_usec; + return nsec; } #if KMP_ARCH_X86 || KMP_ARCH_X86_64 /* Measure clock ticks per millisecond */ void __kmp_initialize_system_tick() { + kmp_uint64 now, nsec2, diff; kmp_uint64 delay = 100000; // 50~100 usec on most machines. kmp_uint64 nsec = __kmp_now_nsec(); kmp_uint64 goal = __kmp_hardware_timestamp() + delay; - kmp_uint64 now; while ((now = __kmp_hardware_timestamp()) < goal) ; - __kmp_ticks_per_msec = - (kmp_uint64)(1e6 * (delay + (now - goal)) / (__kmp_now_nsec() - nsec)); + nsec2 = __kmp_now_nsec(); + diff = nsec2 - nsec; + if (diff > 0) { + kmp_uint64 tpms = (kmp_uint64)(1e6 * (delay + (now - goal)) / diff); + if (tpms > 0) + __kmp_ticks_per_msec = tpms; + } } #endif @@ -2017,9 +2032,39 @@ int __kmp_is_address_mapped(void *addr) { found = 1; } -#elif KMP_OS_FREEBSD || KMP_OS_NETBSD +#elif KMP_OS_NETBSD + + int mib[5]; + mib[0] = CTL_VM; + mib[1] = VM_PROC; + mib[2] = VM_PROC_MAP; + mib[3] = getpid(); + mib[4] = sizeof(struct kinfo_vmentry); + + size_t size; + rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0); + KMP_ASSERT(!rc); + KMP_ASSERT(size); + + size = size * 4 / 3; + struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size); + KMP_ASSERT(kiv); + + rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0); + KMP_ASSERT(!rc); + KMP_ASSERT(size); + + for (size_t i = 0; i < size; i++) { + if (kiv[i].kve_start >= (uint64_t)addr && + kiv[i].kve_end <= (uint64_t)addr) { + found = 1; + break; + } + } + KMP_INTERNAL_FREE(kiv); +#elif KMP_OS_DRAGONFLY || KMP_OS_OPENBSD - // FIXME(FreeBSD, NetBSD): Implement this + // FIXME(DragonFly, OpenBSD): Implement this found = 1; #else @@ -2034,7 +2079,7 @@ int __kmp_is_address_mapped(void *addr) { #ifdef USE_LOAD_BALANCE -#if KMP_OS_DARWIN +#if KMP_OS_DARWIN || KMP_OS_NETBSD // The function returns the rounded value of the system load average // during given time interval which depends on the value of diff --git a/runtime/src/z_Windows_NT_util.cpp b/runtime/src/z_Windows_NT_util.cpp index e8ed660..f3d667f 100644 --- a/runtime/src/z_Windows_NT_util.cpp +++ b/runtime/src/z_Windows_NT_util.cpp @@ -887,6 +887,7 @@ kmp_uint64 __kmp_now_nsec() { return 1e9 * __kmp_win32_tick * now.QuadPart; } +extern "C" void *__stdcall __kmp_launch_worker(void *arg) { volatile void *stack_data; void *exit_val; diff --git a/runtime/test/affinity/format/affinity_display.1.c b/runtime/test/affinity/format/affinity_display.1.c new file mode 100644 index 0000000..b900c3c --- /dev/null +++ b/runtime/test/affinity/format/affinity_display.1.c @@ -0,0 +1,92 @@ +// RUN: %libomp-compile +// RUN: env OMP_DISPLAY_AFFINITY=TRUE OMP_NUM_THREADS=4 OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' %libomp-run | python %S/check.py -c 'CHECK' %s + +// Affinity Display examples +#include <stdio.h> +#include <stdlib.h> // also null is in <stddef.h> +#include <stddef.h> +#include <omp.h> +#include <string.h> + +// ENVIRONMENT +// OMP_DISPLAY_AFFINITY=TRUE +// OMP_NUM_THREADS=4 +// OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' + +// CHECK: num_threads=1 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0-7])|(0,1)|(undefined)\} +// CHECK: num_threads=4 Thread id [0-3] reporting in +// CHECK: num_threads=4 OMP: pid [0-9]+ tid [0-9]+ thread [0-4] bound to OS proc set \{([0-7])|([0246],[1357])|(undefined)\} +// CHECK: num_threads=1 Default Affinity Format is: +// CHECK: num_threads=1 Affinity Format set to: host=%20H tid=%0.4n binds_to=%A +// CHECK: num_threads=4 tid=[0-3] affinity:host=[a-zA-Z0-9_.-]+[ ]+tid=000[0-4][ ]+binds_to=(([0-7])|([0246],[1357])|(undefined)) + +#define FORMAT_STORE 80 +#define BUFFER_STORE 80 + +int main(int argc, char** argv) { + int i, n, tid, max_req_store = 0; + size_t nchars; + char default_format[FORMAT_STORE]; + char my_format[] = "host=%20H tid=%0.4n binds_to=%A"; + char **buffer; + + // CODE SEGMENT 1 AFFINITY DISPLAY + omp_display_affinity(NULL); + + // OMP_DISPLAY_AFFINITY=TRUE, + // Affinity reported for 1 parallel region + #pragma omp parallel + { + printf("Thread id %d reporting in.\n", omp_get_thread_num()); + } + + // Get and Display Default Affinity Format + nchars = omp_get_affinity_format(default_format, (size_t)FORMAT_STORE); + printf("Default Affinity Format is: %s\n", default_format); + + if (nchars > FORMAT_STORE) { + printf("Caution: Reported Format is truncated. Increase\n"); + printf(" FORMAT_STORE by %d.\n", (int)nchars - FORMAT_STORE); + } + + // Set Affinity Format + omp_set_affinity_format(my_format); + printf("Affinity Format set to: %s\n", my_format); + + // CODE SEGMENT 3 CAPTURE AFFINITY + // Set up buffer for affinity of n threads + n = omp_get_max_threads(); + buffer = (char **)malloc(sizeof(char *) * n); + for (i = 0; i < n; i++) { + buffer[i] = (char *)malloc(sizeof(char) * BUFFER_STORE); + } + + // Capture Affinity using Affinity Format set above. + // Use critical reduction to check size of buffer areas + #pragma omp parallel private(tid, nchars) + { + tid = omp_get_thread_num(); + nchars = omp_capture_affinity(buffer[tid], (size_t)BUFFER_STORE, NULL); + #pragma omp critical + { + if (nchars > max_req_store) + max_req_store = nchars; + } + } + + for (i = 0; i < n; i++) { + printf("tid=%d affinity:%s:\n", i, buffer[i]); + } + // for 4 threads with OMP_PLACES='{0,1},{2,3},{4,5},{6,7}' + // host=%20H tid=%0.4n binds_to=%A + // host=<hostname> tid=0000 binds_to=0,1 + // host=<hostname> tid=0001 binds_to=2,3 + // host=<hostname> tid=0002 binds_to=4,5 + // host=<hostname> tid=0003 binds_to=6,7 + + if (max_req_store > BUFFER_STORE) { + printf("Caution: Affinity string truncated. Increase\n"); + printf(" BUFFER_STORE by %d\n", max_req_store - BUFFER_STORE); + } + return 0; +} diff --git a/runtime/test/affinity/format/affinity_values.c b/runtime/test/affinity/format/affinity_values.c new file mode 100644 index 0000000..37ab210 --- /dev/null +++ b/runtime/test/affinity/format/affinity_values.c @@ -0,0 +1,135 @@ +// RUN: %libomp-compile +// RUN: env OMP_PROC_BIND=close OMP_PLACES=threads %libomp-run +// RUN: env OMP_PROC_BIND=close OMP_PLACES=cores %libomp-run +// RUN: env OMP_PROC_BIND=close OMP_PLACES=sockets %libomp-run +// RUN: env KMP_AFFINITY=compact %libomp-run +// RUN: env KMP_AFFINITY=scatter %libomp-run +// REQUIRES: affinity + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> + +#define XSTR(x) #x +#define STR(x) XSTR(x) + +#define streqls(s1, s2) (!strcmp(s1, s2)) + +#define check(condition) \ + if (!(condition)) { \ + fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#define DEBUG 0 + +#if DEBUG +#include <stdarg.h> +#endif + +#define BUFFER_SIZE 1024 + +char buf[BUFFER_SIZE]; +#pragma omp threadprivate(buf) + +static int debug_printf(const char* format, ...) { + int retval = 0; +#if DEBUG + va_list args; + va_start(args, format); + retval = vprintf(format, args); + va_end(args); +#endif + return retval; +} + +static void display_affinity_environment() { +#if DEBUG + printf("Affinity Environment:\n"); + printf(" OMP_PROC_BIND=%s\n", getenv("OMP_PROC_BIND")); + printf(" OMP_PLACES=%s\n", getenv("OMP_PLACES")); + printf(" KMP_AFFINITY=%s\n", getenv("KMP_AFFINITY")); +#endif +} + +// Reads in a list of integers into ids array (not going past ids_size) +// e.g., if affinity = "0-4,6,8-10,14,16,17-20,23" +// then ids = [0,1,2,3,4,6,8,9,10,14,16,17,18,19,20,23] +void list_to_ids(const char* affinity, int* ids, int ids_size) { + int id, b, e, ids_index; + char *aff, *begin, *end, *absolute_end; + aff = strdup(affinity); + absolute_end = aff + strlen(aff); + ids_index = 0; + begin = end = aff; + while (end < absolute_end) { + end = begin; + while (*end != '\0' && *end != ',') + end++; + *end = '\0'; + if (strchr(begin, '-') != NULL) { + // Range + sscanf(begin, "%d-%d", &b, &e); + } else { + // Single Number + sscanf(begin, "%d", &b); + e = b; + } + for (id = b; id <= e; ++id) { + ids[ids_index++] = id; + if (ids_index >= ids_size) { + free(aff); + return; + } + } + begin = end + 1; + } + free(aff); +} + +void check_thread_affinity() { + int i; + const char *formats[2] = {"%{thread_affinity}", "%A"}; + for (i = 0; i < sizeof(formats) / sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel + { + int j, k; + int place = omp_get_place_num(); + int num_procs = omp_get_place_num_procs(place); + int *ids = (int *)malloc(sizeof(int) * num_procs); + int *ids2 = (int *)malloc(sizeof(int) * num_procs); + char buf[256]; + size_t n = omp_capture_affinity(buf, 256, NULL); + check(n <= 256); + omp_get_place_proc_ids(place, ids); + list_to_ids(buf, ids2, num_procs); + + #pragma omp for schedule(static) ordered + for (k = 0; k < omp_get_num_threads(); ++k) { + #pragma omp ordered + { + debug_printf("Thread %d: captured affinity = %s\n", + omp_get_thread_num(), buf); + for (j = 0; j < num_procs; ++j) { + debug_printf("Thread %d: ids[%d] = %d ids2[%d] = %d\n", + omp_get_thread_num(), j, ids[j], j, ids2[j]); + check(ids[j] == ids2[j]); + } + } + } + + free(ids); + free(ids2); + } + } +} + +int main(int argc, char** argv) { + omp_set_nested(1); + display_affinity_environment(); + check_thread_affinity(); + return 0; +} diff --git a/runtime/test/affinity/format/api.c b/runtime/test/affinity/format/api.c new file mode 100644 index 0000000..df6be66 --- /dev/null +++ b/runtime/test/affinity/format/api.c @@ -0,0 +1,56 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> + +#define XSTR(x) #x +#define STR(x) XSTR(x) + +#define streqls(s1, s2) (!strcmp(s1, s2)) + +#define check(condition) \ + if (!(condition)) { \ + fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#define BUFFER_SIZE 1024 + +int main(int argc, char** argv) { + char buf[BUFFER_SIZE]; + size_t needed; + + omp_set_affinity_format("0123456789"); + + needed = omp_get_affinity_format(buf, BUFFER_SIZE); + check(streqls(buf, "0123456789")); + check(needed == 10) + + // Check that it is truncated properly + omp_get_affinity_format(buf, 5); + check(streqls(buf, "0123")); + + #pragma omp parallel + { + char my_buf[512]; + size_t needed = omp_capture_affinity(my_buf, 512, NULL); + check(streqls(my_buf, "0123456789")); + check(needed == 10); + // Check that it is truncated properly + omp_capture_affinity(my_buf, 5, NULL); + check(streqls(my_buf, "0123")); + } + + #pragma omp parallel num_threads(4) + { + omp_display_affinity(NULL); + } + + return 0; +} + +// CHECK: num_threads=4 0123456789 diff --git a/runtime/test/affinity/format/api2.c b/runtime/test/affinity/format/api2.c new file mode 100644 index 0000000..7b2d700 --- /dev/null +++ b/runtime/test/affinity/format/api2.c @@ -0,0 +1,84 @@ +// RUN: %libomp-compile-and-run +// RUN: %libomp-run | python %S/check.py -c 'CHECK' %s + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> + +#define XSTR(x) #x +#define STR(x) XSTR(x) + +#define streqls(s1, s2) (!strcmp(s1, s2)) + +#define check(condition) \ + if (!(condition)) { \ + fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#if defined(_WIN32) +#define snprintf _snprintf +#endif + +#define BUFFER_SIZE 1024 + +int main(int argc, char** argv) { + char buf[BUFFER_SIZE]; + size_t needed, length; + const char* format = "tl:%L tn:%n nt:%N an:%a"; + const char* second_format = "nesting_level:%{nesting_level} thread_num:%{thread_num} num_threads:%{num_threads} ancestor_tnum:%{ancestor_tnum}"; + + length = strlen(format); + omp_set_affinity_format(format); + + needed = omp_get_affinity_format(buf, BUFFER_SIZE); + check(streqls(buf, format)); + check(needed == length) + + // Check that it is truncated properly + omp_get_affinity_format(buf, 5); + check(streqls(buf, "tl:%")); + + #pragma omp parallel + { + char my_buf[512]; + char supposed[512]; + int tl, tn, nt, an; + size_t needed, needed2; + tl = omp_get_level(); + tn = omp_get_thread_num(); + nt = omp_get_num_threads(); + an = omp_get_ancestor_thread_num(omp_get_level()-1); + needed = omp_capture_affinity(my_buf, 512, NULL); + needed2 = (size_t)snprintf(supposed, 512, "tl:%d tn:%d nt:%d an:%d", tl, tn, nt, an); + check(streqls(my_buf, supposed)); + check(needed == needed2); + // Check that it is truncated properly + supposed[4] = '\0'; + omp_capture_affinity(my_buf, 5, NULL); + check(streqls(my_buf, supposed)); + + needed = omp_capture_affinity(my_buf, 512, second_format); + needed2 = (size_t)snprintf(supposed, 512, "nesting_level:%d thread_num:%d num_threads:%d ancestor_tnum:%d", tl, tn, nt, an); + check(streqls(my_buf, supposed)); + check(needed == needed2); + + // Check that it is truncated properly + supposed[25] = '\0'; + omp_capture_affinity(my_buf, 26, second_format); + check(streqls(my_buf, supposed)); + } + + #pragma omp parallel num_threads(4) + { + omp_display_affinity(NULL); + omp_display_affinity(second_format); + } + + return 0; +} + +// CHECK: num_threads=4 tl:[0-9]+ tn:[0-9]+ nt:[0-9]+ an:[0-9]+ +// CHECK: num_threads=4 nesting_level:[0-9]+ thread_num:[0-9]+ num_threads:[0-9]+ ancestor_tnum:[0-9]+ diff --git a/runtime/test/affinity/format/check.py b/runtime/test/affinity/format/check.py new file mode 100644 index 0000000..0adddbd --- /dev/null +++ b/runtime/test/affinity/format/check.py @@ -0,0 +1,73 @@ +import os +import sys +import argparse +import re + +class Checks(object): + class CheckError(Exception): + pass + + def __init__(self, filename, prefix): + self.checks = [] + self.lines = [] + self.check_no_output = False + self.filename = filename + self.prefix = prefix + def readStdin(self): + self.lines = [l.rstrip('\r\n') for l in sys.stdin.readlines()] + def readChecks(self): + with open(self.filename) as f: + for line in f: + match = re.search('{}: NO_OUTPUT'.format(self.prefix), line) + if match is not None: + self.check_no_output = True + return + match = re.search('{}: num_threads=([0-9]+) (.*)$'.format(self.prefix), line) + if match is not None: + num_threads = int(match.group(1)) + for i in range(num_threads): + self.checks.append(match.group(2)) + continue + def check(self): + # If no checks at all, then nothing to do + if len(self.checks) == 0 and not self.check_no_output: + print('Nothing to check for') + return + # Check if we are expecting no output + if self.check_no_output: + if len(self.lines) == 0: + return + else: + raise Checks.CheckError('{}: Output was found when expecting none.'.format(self.prefix)) + # Run through each check line and see if it exists in the output + # If it does, then delete the line from output and look for the + # next check line. + # If you don't find the line then raise Checks.CheckError + # If there are extra lines of output then raise Checks.CheckError + for c in self.checks: + found = False + index = -1 + for idx, line in enumerate(self.lines): + if re.search(c, line) is not None: + found = True + index = idx + break + if not found: + raise Checks.CheckError('{}: Did not find: {}'.format(self.prefix, c)) + else: + del self.lines[index] + if len(self.lines) != 0: + raise Checks.CheckError('{}: Extra output: {}'.format(self.prefix, self.lines)) + +# Setup argument parsing +parser = argparse.ArgumentParser(description='''This script checks output of + a program against "CHECK" lines in filename''') +parser.add_argument('filename', default=None, help='filename to check against') +parser.add_argument('-c', '--check-prefix', dest='prefix', + default='CHECK', help='check prefix token default: %(default)s') +command_args = parser.parse_args() +# Do the checking +checks = Checks(command_args.filename, command_args.prefix) +checks.readStdin() +checks.readChecks() +checks.check() diff --git a/runtime/test/affinity/format/fields_modifiers.c b/runtime/test/affinity/format/fields_modifiers.c new file mode 100644 index 0000000..c180271 --- /dev/null +++ b/runtime/test/affinity/format/fields_modifiers.c @@ -0,0 +1,117 @@ +// RUN: %libomp-compile-and-run + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> + +#define XSTR(x) #x +#define STR(x) XSTR(x) + +#define streqls(s1, s2) (!strcmp(s1, s2)) + +#define check(condition) \ + if (!(condition)) { \ + fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#define BUFFER_SIZE 1024 + +char buf[BUFFER_SIZE]; +#pragma omp threadprivate(buf) + +char* get_string(size_t check_needed) { + size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL); + //printf("buf = %s\n", buf); + check(needed < BUFFER_SIZE); + if (check_needed != 0) { + check(needed == check_needed); + } + return buf; +} + +void check_thread_num_padded_rjustified() { + int i; + const char* formats[2] = {"%0.8{thread_num}", "%0.8n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(8); + for (j = 0; j < 7; ++j) { + check(s[j] == '0'); + } + check(s[j] == ctid); + } + } +} + +void check_thread_num_rjustified() { + int i; + const char* formats[2] = {"%.12{thread_num}", "%.12n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(12); + for (j = 0; j < 11; ++j) { + check(s[j] == ' '); + } + check(s[j] == ctid); + } + } +} + +void check_thread_num_ljustified() { + int i; + const char* formats[2] = {"%5{thread_num}", "%5n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(5); + check(s[0] == ctid); + for (j = 1; j < 5; ++j) { + check(s[j] == ' '); + } + } + } +} + +void check_thread_num_padded_ljustified() { + int i; + const char* formats[2] = {"%018{thread_num}", "%018n"}; + for (i = 0; i < sizeof(formats)/sizeof(formats[0]); ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + int j; + int tid = omp_get_thread_num(); + char ctid = '0' + (char)tid; + char* s = get_string(18); + check(s[0] == ctid); + for (j = 1; j < 18; ++j) { + check(s[j] == ' '); + } + } + } +} + +int main(int argc, char** argv) { + check_thread_num_ljustified(); + check_thread_num_rjustified(); + check_thread_num_padded_ljustified(); + check_thread_num_padded_rjustified(); + return 0; +} diff --git a/runtime/test/affinity/format/fields_values.c b/runtime/test/affinity/format/fields_values.c new file mode 100644 index 0000000..e56ce27 --- /dev/null +++ b/runtime/test/affinity/format/fields_values.c @@ -0,0 +1,152 @@ +// RUN: %libomp-compile-and-run + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <omp.h> + +#define XSTR(x) #x +#define STR(x) XSTR(x) + +#define streqls(s1, s2) (!strcmp(s1, s2)) + +#define check(condition) \ + if (!(condition)) { \ + fprintf(stderr, "error: %s: %d: " STR(condition) "\n", __FILE__, \ + __LINE__); \ + exit(1); \ + } + +#if defined(_WIN32) +#include <windows.h> +#define getpid _getpid +typedef int pid_t; +#define gettid GetCurrentThreadId +#define my_gethostname(buf, sz) GetComputerNameA(buf, &(sz)) +#else +#include <unistd.h> +#include <sys/types.h> +#define my_gethostname(buf, sz) gethostname(buf, sz) +#endif + +#define BUFFER_SIZE 256 + +int get_integer() { + int n, retval; + char buf[BUFFER_SIZE]; + size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL); + check(needed < BUFFER_SIZE); + n = sscanf(buf, "%d", &retval); + check(n == 1); + return retval; +} + +char* get_string() { + int n, retval; + char buf[BUFFER_SIZE]; + size_t needed = omp_capture_affinity(buf, BUFFER_SIZE, NULL); + check(needed < BUFFER_SIZE); + return strdup(buf); +} + +void check_integer(const char* formats[2], int(*func)()) { + int i; + for (i = 0; i < 2; ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + check(get_integer() == func()); + #pragma omp parallel num_threads(3) + { + check(get_integer() == func()); + } + check(get_integer() == func()); + } + } +} + +void check_nesting_level() { + // Check %{nesting_level} and %L + const char* formats[2] = {"%{nesting_level}", "%L"}; + check_integer(formats, omp_get_level); +} + +void check_thread_num() { + // Check %{thread_num} and %n + const char* formats[2] = {"%{thread_num}", "%n"}; + check_integer(formats, omp_get_thread_num); +} + +void check_num_threads() { + // Check %{num_threads} and %N + const char* formats[2] = {"%{num_threads}", "%N"}; + check_integer(formats, omp_get_num_threads); +} + +int ancestor_helper() { + return omp_get_ancestor_thread_num(omp_get_level() - 1); +} +void check_ancestor_tnum() { + // Check %{ancestor_tnum} and %a + const char* formats[2] = {"%{ancestor_tnum}", "%a"}; + check_integer(formats, ancestor_helper); +} + +int my_get_pid() { return (int)getpid(); } +void check_process_id() { + // Check %{process_id} and %P + const char* formats[2] = {"%{process_id}", "%P"}; + check_integer(formats, my_get_pid); +} + +/* +int my_get_tid() { return (int)gettid(); } +void check_native_thread_id() { + // Check %{native_thread_id} and %i + const char* formats[2] = {"%{native_thread_id}", "%i"}; + check_integer(formats, my_get_tid); +} +*/ + +void check_host() { + int i; + int buffer_size = 256; + const char* formats[2] = {"%{host}", "%H"}; + char hostname[256]; + my_gethostname(hostname, buffer_size); + for (i = 0; i < 2; ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + char* host = get_string(); + check(streqls(host, hostname)); + free(host); + } + } +} + +void check_undefined() { + int i; + const char* formats[2] = {"%{foobar}", "%X"}; + for (i = 0; i < 2; ++i) { + omp_set_affinity_format(formats[i]); + #pragma omp parallel num_threads(8) + { + char* undef = get_string(); + check(streqls(undef, "undefined")); + free(undef); + } + } +} + +int main(int argc, char** argv) { + omp_set_nested(1); + check_nesting_level(); + check_num_threads(); + check_ancestor_tnum(); + check_process_id(); + //check_native_thread_id(); + check_host(); + check_undefined(); + return 0; +} diff --git a/runtime/test/affinity/format/increase.c b/runtime/test/affinity/format/increase.c new file mode 100644 index 0000000..46d8edb --- /dev/null +++ b/runtime/test/affinity/format/increase.c @@ -0,0 +1,36 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N"); + // should print all for first parallel + omp_set_num_threads(4); + #pragma omp parallel + { } + // should print all because of new threads + omp_set_num_threads(8); + #pragma omp parallel + { } + // should not print anything here + omp_set_num_threads(6); + #pragma omp parallel + { } + // should print all because of new thread + omp_set_num_threads(9); + #pragma omp parallel + { } + // should not print anything here + omp_set_num_threads(2); + #pragma omp parallel + { } + return 0; +} + +// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 +// CHECK: num_threads=6 TESTER: tl:1 tn:[0-5] nt:6 +// CHECK: num_threads=9 TESTER: tl:1 tn:[0-8] nt:9 +// CHECK: num_threads=2 TESTER: tl:1 tn:[01] nt:2 diff --git a/runtime/test/affinity/format/lit.local.cfg b/runtime/test/affinity/format/lit.local.cfg new file mode 100644 index 0000000..80583af --- /dev/null +++ b/runtime/test/affinity/format/lit.local.cfg @@ -0,0 +1,2 @@ +if 'openmp-5.0' not in config.available_features: + config.unsupported = True diff --git a/runtime/test/affinity/format/nested.c b/runtime/test/affinity/format/nested.c new file mode 100644 index 0000000..502c1da --- /dev/null +++ b/runtime/test/affinity/format/nested.c @@ -0,0 +1,23 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close %libomp-run | python %S/check.py -c 'CHECK' %s +// REQUIRES: affinity + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N"); + omp_set_nested(1); + #pragma omp parallel num_threads(4) + { + #pragma omp parallel num_threads(3) + { } + } + return 0; +} + +// CHECK: num_threads=4 TESTER: tl:1 at:0 tn:[0-3] nt:4 +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 +// CHECK: num_threads=3 TESTER: tl:2 at:[0-3] tn:[0-2] nt:3 diff --git a/runtime/test/affinity/format/nested2.c b/runtime/test/affinity/format/nested2.c new file mode 100644 index 0000000..3dd4956 --- /dev/null +++ b/runtime/test/affinity/format/nested2.c @@ -0,0 +1,29 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES=threads OMP_PROC_BIND=spread,close KMP_HOT_TEAMS_MAX_LEVEL=2 %libomp-run | python %S/check.py -c 'CHECK' %s + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +// Currently, KMP_HOT_TEAMS_MAX_LEVEL has to be equal to the +// nest depth for intuitive behavior +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N"); + omp_set_nested(1); + #pragma omp parallel num_threads(4) + { + #pragma omp parallel num_threads(3) + { } + #pragma omp parallel num_threads(3) + { } + } + #pragma omp parallel num_threads(4) + { } + return 0; +} + +// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 +// CHECK: num_threads=3 TESTER: tl:2 tn:[0-2] nt:3 +// CHECK: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 diff --git a/runtime/test/affinity/format/nested_mixed.c b/runtime/test/affinity/format/nested_mixed.c new file mode 100644 index 0000000..a39b4fd --- /dev/null +++ b/runtime/test/affinity/format/nested_mixed.c @@ -0,0 +1,46 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N"); + omp_set_nested(1); + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(2) + { } + #pragma omp parallel num_threads(2) + { + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(2) + { } + } + } + #pragma omp parallel num_threads(1) + { } + } + #pragma omp parallel num_threads(2) + { } + #pragma omp parallel num_threads(1) + { } + return 0; +} + +// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1 + +// CHECK: num_threads=2 TESTER: tl:2 at:[0-9] tn:[01] nt:2 + +// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1 +// CHECK: num_threads=1 TESTER: tl:3 at:[0-9] tn:0 nt:1 + +// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2 +// CHECK: num_threads=2 TESTER: tl:4 at:[0-9] tn:[01] nt:2 + +// CHECK: num_threads=1 TESTER: tl:2 at:[0-9] tn:0 nt:1 + +// CHECK: num_threads=2 TESTER: tl:1 at:[0-9] tn:[01] nt:2 + +// CHECK: num_threads=1 TESTER: tl:1 at:[0-9] tn:0 nt:1 diff --git a/runtime/test/affinity/format/nested_serial.c b/runtime/test/affinity/format/nested_serial.c new file mode 100644 index 0000000..87ff2bd --- /dev/null +++ b/runtime/test/affinity/format/nested_serial.c @@ -0,0 +1,35 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true %libomp-run | python %S/check.py -c 'CHECK' %s + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L at:%a tn:%n nt:%N"); + omp_set_nested(1); + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(1) + { } + #pragma omp parallel num_threads(1) + { } + #pragma omp parallel num_threads(1) + { + #pragma omp parallel num_threads(1) + { } + } + #pragma omp parallel num_threads(1) + { } + } + #pragma omp parallel num_threads(1) + { } + #pragma omp parallel num_threads(1) + { } + return 0; +} + +// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1 +// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1 +// CHECK: num_threads=1 TESTER: tl:3 at:0 tn:0 nt:1 +// CHECK: num_threads=1 TESTER: tl:2 at:0 tn:0 nt:1 +// CHECK: num_threads=1 TESTER: tl:1 at:0 tn:0 nt:1 diff --git a/runtime/test/affinity/format/proc_bind.c b/runtime/test/affinity/format/proc_bind.c new file mode 100644 index 0000000..e88e1aa --- /dev/null +++ b/runtime/test/affinity/format/proc_bind.c @@ -0,0 +1,31 @@ +// RUN: %libomp-compile && env OMP_DISPLAY_AFFINITY=true OMP_PLACES='{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0},{0,1},{0}' %libomp-run | python %S/check.py -c 'CHECK' %s +// REQUIRES: affinity + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N aff:{%A}"); + omp_set_num_threads(8); + // Initial parallel + #pragma omp parallel proc_bind(spread) + { } + #pragma omp parallel proc_bind(spread) + { } + // Affinity changes here + #pragma omp parallel proc_bind(close) + { } + #pragma omp parallel proc_bind(close) + { } + // Affinity changes here + #pragma omp parallel proc_bind(master) + { } + #pragma omp parallel proc_bind(master) + { } + return 0; +} + +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff: +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff: +// CHECK: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 aff: diff --git a/runtime/test/affinity/format/simple.c b/runtime/test/affinity/format/simple.c new file mode 100644 index 0000000..954aa74 --- /dev/null +++ b/runtime/test/affinity/format/simple.c @@ -0,0 +1,27 @@ +// RUN: %libomp-compile +// RUN: env OMP_DISPLAY_AFFINITY=false %libomp-run | python %S/check.py -c 'NOTHING' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=1 %libomp-run | python %S/check.py -c 'CHECK' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=2 %libomp-run | python %S/check.py -c 'CHECK-2' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=3 %libomp-run | python %S/check.py -c 'CHECK-3' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=4 %libomp-run | python %S/check.py -c 'CHECK-4' %s +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + omp_set_affinity_format("TESTER: tl:%L tn:%n nt:%N"); + #pragma omp parallel + { } + #pragma omp parallel + { } + return 0; +} + +// NOTHING: NO_OUTPUT +// CHECK: num_threads=1 TESTER: tl:1 tn:0 nt:1 +// CHECK-2: num_threads=2 TESTER: tl:1 tn:[01] nt:2 +// CHECK-3: num_threads=3 TESTER: tl:1 tn:[0-2] nt:3 +// CHECK-4: num_threads=4 TESTER: tl:1 tn:[0-3] nt:4 +// CHECK-8: num_threads=8 TESTER: tl:1 tn:[0-7] nt:8 diff --git a/runtime/test/affinity/format/simple_env.c b/runtime/test/affinity/format/simple_env.c new file mode 100644 index 0000000..7aab1cf --- /dev/null +++ b/runtime/test/affinity/format/simple_env.c @@ -0,0 +1,16 @@ +// RUN: %libomp-compile +// RUN: env OMP_DISPLAY_AFFINITY=true OMP_AFFINITY_FORMAT='TESTER-ENV: tl:%L tn:%n nt:%N' OMP_NUM_THREADS=8 %libomp-run | python %S/check.py -c 'CHECK-8' %s + +#include <stdio.h> +#include <stdlib.h> +#include <omp.h> + +int main(int argc, char** argv) { + #pragma omp parallel + { } + #pragma omp parallel + { } + return 0; +} + +// CHECK-8: num_threads=8 TESTER-ENV: tl:1 tn:[0-7] nt:8 diff --git a/runtime/test/api/omp_alloc.c b/runtime/test/api/omp_alloc.c index afad4a5..2002adb 100644 --- a/runtime/test/api/omp_alloc.c +++ b/runtime/test/api/omp_alloc.c @@ -1,4 +1,7 @@ // RUN: %libomp-compile-and-run + +// REQUIRES: openmp-5.0 + #include <stdio.h> #include <stdint.h> #include <omp.h> diff --git a/runtime/test/api/omp_get_wtick.c b/runtime/test/api/omp_get_wtick.c index 8b35226..11a320f 100644 --- a/runtime/test/api/omp_get_wtick.c +++ b/runtime/test/api/omp_get_wtick.c @@ -7,7 +7,7 @@ int test_omp_get_wtick() double tick; tick = -1.; tick = omp_get_wtick (); - return ((tick > 0.0) && (tick < 0.01)); + return ((tick > 0.0) && (tick <= 0.01)); } int main() diff --git a/runtime/test/api/omp_in_parallel.c b/runtime/test/api/omp_in_parallel.c index d09313e..5e9e635 100644 --- a/runtime/test/api/omp_in_parallel.c +++ b/runtime/test/api/omp_in_parallel.c @@ -30,6 +30,11 @@ int main() int i; int num_failed=0; + // the test requires more than 1 thread to pass + omp_set_dynamic(0); // disable dynamic adjustment of threads + if (omp_get_max_threads() == 1) + omp_set_num_threads(2); // set 2 threads if no HW resources available + for(i = 0; i < REPETITIONS; i++) { if(!test_omp_in_parallel()) { num_failed++; diff --git a/runtime/test/flush/omp_flush.c b/runtime/test/flush/omp_flush.c index 3fd3cdf..95a406d 100644 --- a/runtime/test/flush/omp_flush.c +++ b/runtime/test/flush/omp_flush.c @@ -36,6 +36,11 @@ int main() int i; int num_failed=0; + // the test requires more than 1 thread to pass + omp_set_dynamic(0); // disable dynamic adjustment of threads + if (omp_get_max_threads() == 1) + omp_set_num_threads(2); // set 2 threads if no HW resources available + for (i = 0; i < REPETITIONS; i++) { if(!test_omp_flush()) { num_failed++; diff --git a/runtime/test/lit.cfg b/runtime/test/lit.cfg index 9f0c059..066929e 100644 --- a/runtime/test/lit.cfg +++ b/runtime/test/lit.cfg @@ -55,7 +55,6 @@ if config.has_libatomic: libs += " -latomic" # Allow REQUIRES / UNSUPPORTED / XFAIL to work -config.target_triple = [ ] for feature in config.test_compiler_features: config.available_features.add(feature) @@ -91,9 +90,21 @@ if config.has_ompt: # for callback.h config.test_flags += " -I " + config.test_source_root + "/ompt" +if config.libomp_omp_version >= 50: + config.available_features.add("openmp-5.0") + +if config.libomp_omp_version >= 45: + config.available_features.add("openmp-4.5") + +if config.libomp_omp_version >= 40: + config.available_features.add("openmp-4.0") + if 'Linux' in config.operating_system: config.available_features.add("linux") +if config.operating_system in ['Linux', 'Windows']: + config.available_features.add('affinity') + # to run with icc INTEL_LICENSE_FILE must be set if 'INTEL_LICENSE_FILE' in os.environ: config.environment['INTEL_LICENSE_FILE'] = os.environ['INTEL_LICENSE_FILE'] @@ -116,7 +127,7 @@ config.substitutions.append(("%flags", config.test_flags)) if config.has_ompt: config.substitutions.append(("FileCheck", config.test_filecheck)) - config.substitutions.append(("%sort-threads", "sort --numeric-sort --stable")) + config.substitutions.append(("%sort-threads", "sort -n -s")) if config.operating_system == 'Windows': # No such environment variable on Windows. config.substitutions.append(("%preload-tool", "true ||")) diff --git a/runtime/test/lit.site.cfg.in b/runtime/test/lit.site.cfg.in index c2825ee..fe4a372 100644 --- a/runtime/test/lit.site.cfg.in +++ b/runtime/test/lit.site.cfg.in @@ -1,11 +1,13 @@ @AUTO_GEN_COMMENT@ +config.target_triple = "@TARGET_TRIPLE@" config.test_c_compiler = "@OPENMP_TEST_C_COMPILER@" config.test_cxx_compiler = "@OPENMP_TEST_CXX_COMPILER@" config.test_compiler_features = @OPENMP_TEST_COMPILER_FEATURES@ config.test_filecheck = "@OPENMP_FILECHECK_EXECUTABLE@" config.test_openmp_flags = "@OPENMP_TEST_OPENMP_FLAGS@" config.test_extra_flags = "@OPENMP_TEST_FLAGS@" +config.libomp_omp_version = @LIBOMP_OMP_VERSION@ config.libomp_obj_root = "@CMAKE_CURRENT_BINARY_DIR@" config.library_dir = "@LIBOMP_LIBRARY_DIR@" config.omp_header_directory = "@LIBOMP_BINARY_DIR@/src" diff --git a/runtime/test/ompt/callback.h b/runtime/test/ompt/callback.h index df83043..0304cff 100755 --- a/runtime/test/ompt/callback.h +++ b/runtime/test/ompt/callback.h @@ -79,7 +79,7 @@ static ompt_enumerate_mutex_impls_t ompt_enumerate_mutex_impls; static void print_ids(int level) { int task_type, thread_num; - omp_frame_t *frame; + ompt_frame_t *frame; ompt_data_t *task_parallel_data; ompt_data_t *task_data; int exists_task = ompt_get_task_info(level, &task_type, &task_data, &frame, @@ -92,8 +92,8 @@ static void print_ids(int level) "task_type=%s=%d, thread_num=%d\n", ompt_get_thread_data()->value, level, exists_task ? task_parallel_data->value : 0, - exists_task ? task_data->value : 0, frame->exit_frame, - frame->enter_frame, buffer, task_type, thread_num); + exists_task ? task_data->value : 0, frame->exit_frame.ptr, + frame->enter_frame.ptr, buffer, task_type, thread_num); } #define get_frame_address(level) __builtin_frame_address(level) @@ -197,7 +197,7 @@ on_ompt_callback_mutex_acquire( ompt_mutex_t kind, unsigned int hint, unsigned int impl, - omp_wait_id_t wait_id, + ompt_wait_id_t wait_id, const void *codeptr_ra) { switch(kind) @@ -225,7 +225,7 @@ on_ompt_callback_mutex_acquire( static void on_ompt_callback_mutex_acquired( ompt_mutex_t kind, - omp_wait_id_t wait_id, + ompt_wait_id_t wait_id, const void *codeptr_ra) { switch(kind) @@ -253,7 +253,7 @@ on_ompt_callback_mutex_acquired( static void on_ompt_callback_mutex_released( ompt_mutex_t kind, - omp_wait_id_t wait_id, + ompt_wait_id_t wait_id, const void *codeptr_ra) { switch(kind) @@ -281,7 +281,7 @@ on_ompt_callback_mutex_released( static void on_ompt_callback_nest_lock( ompt_scope_endpoint_t endpoint, - omp_wait_id_t wait_id, + ompt_wait_id_t wait_id, const void *codeptr_ra) { switch(endpoint) @@ -460,7 +460,7 @@ on_ompt_callback_lock_init( ompt_mutex_t kind, unsigned int hint, unsigned int impl, - omp_wait_id_t wait_id, + ompt_wait_id_t wait_id, const void *codeptr_ra) { switch(kind) @@ -479,7 +479,7 @@ on_ompt_callback_lock_init( static void on_ompt_callback_lock_destroy( ompt_mutex_t kind, - omp_wait_id_t wait_id, + ompt_wait_id_t wait_id, const void *codeptr_ra) { switch(kind) @@ -583,7 +583,7 @@ on_ompt_callback_master( static void on_ompt_callback_parallel_begin( ompt_data_t *encountering_task_data, - const omp_frame_t *encountering_task_frame, ompt_data_t *parallel_data, + const ompt_frame_t *encountering_task_frame, ompt_data_t *parallel_data, uint32_t requested_team_size, int flag, const void *codeptr_ra) { if(parallel_data->ptr) printf("0: parallel_data initially not null\n"); @@ -593,8 +593,8 @@ static void on_ompt_callback_parallel_begin( "parallel_id=%" PRIu64 ", requested_team_size=%" PRIu32 ", codeptr_ra=%p, invoker=%d\n", ompt_get_thread_data()->value, encountering_task_data->value, - encountering_task_frame->exit_frame, - encountering_task_frame->enter_frame, parallel_data->value, + encountering_task_frame->exit_frame.ptr, + encountering_task_frame->enter_frame.ptr, parallel_data->value, requested_team_size, codeptr_ra, flag); } @@ -610,7 +610,7 @@ static void on_ompt_callback_parallel_end(ompt_data_t *parallel_data, static void on_ompt_callback_task_create( ompt_data_t *encountering_task_data, - const omp_frame_t *encountering_task_frame, + const ompt_frame_t *encountering_task_frame, ompt_data_t* new_task_data, int type, int has_dependences, @@ -634,7 +634,7 @@ on_ompt_callback_task_create( parallel_data->value = ompt_get_unique_id(); } - printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame : NULL, encountering_task_frame ? encountering_task_frame->enter_frame : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); + printf("%" PRIu64 ": ompt_event_task_create: parent_task_id=%" PRIu64 ", parent_task_frame.exit=%p, parent_task_frame.reenter=%p, new_task_id=%" PRIu64 ", codeptr_ra=%p, task_type=%s=%d, has_dependences=%s\n", ompt_get_thread_data()->value, encountering_task_data ? encountering_task_data->value : 0, encountering_task_frame ? encountering_task_frame->exit_frame.ptr : NULL, encountering_task_frame ? encountering_task_frame->enter_frame.ptr : NULL, new_task_data->value, codeptr_ra, buffer, type, has_dependences ? "yes" : "no"); } static void @@ -692,9 +692,9 @@ on_ompt_callback_control_tool( void *arg, const void *codeptr_ra) { - omp_frame_t* omptTaskFrame; + ompt_frame_t* omptTaskFrame; ompt_get_task_info(0, NULL, (ompt_data_t**) NULL, &omptTaskFrame, NULL, NULL); - printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame, omptTaskFrame->enter_frame); + printf("%" PRIu64 ": ompt_event_control_tool: command=%" PRIu64 ", modifier=%" PRIu64 ", arg=%p, codeptr_ra=%p, current_task_frame.exit=%p, current_task_frame.reenter=%p \n", ompt_get_thread_data()->value, command, modifier, arg, codeptr_ra, omptTaskFrame->exit_frame.ptr, omptTaskFrame->enter_frame.ptr); return 0; //success } diff --git a/runtime/test/ompt/misc/api_calls_from_other_thread.cpp b/runtime/test/ompt/misc/api_calls_from_other_thread.cpp index 470d7cd..e2ef1fc 100644 --- a/runtime/test/ompt/misc/api_calls_from_other_thread.cpp +++ b/runtime/test/ompt/misc/api_calls_from_other_thread.cpp @@ -31,12 +31,12 @@ void f() { printf("%" PRIu64 ": ompt_get_state()=%d\n", tvalue, ompt_get_state(NULL)); - int state = omp_state_undefined; + int state = ompt_state_undefined; const char *state_name; printf("%" PRIu64 ": ompt_enumerate_states()=%d\n", tvalue, ompt_enumerate_states(state, &state, &state_name)); - int impl = ompt_mutex_impl_unknown; + int impl = ompt_mutex_impl_none; const char *impl_name; printf("%" PRIu64 ": ompt_enumerate_mutex_impls()=%d\n", tvalue, ompt_enumerate_mutex_impls(impl, &impl, &impl_name)); diff --git a/runtime/test/ompt/misc/api_calls_misc.c b/runtime/test/ompt/misc/api_calls_misc.c index d567b1b..884421e 100644 --- a/runtime/test/ompt/misc/api_calls_misc.c +++ b/runtime/test/ompt/misc/api_calls_misc.c @@ -19,7 +19,7 @@ int main() { ompt_get_state(NULL)); // ompt_enumerate_states() - int state = omp_state_undefined; + int state = ompt_state_undefined; const char *state_name; int steps = 0; while (ompt_enumerate_states(state, &state, &state_name) && steps < 1000) { @@ -35,7 +35,7 @@ int main() { } // ompt_enumerate_mutex_impls() - int impl = ompt_mutex_impl_unknown; + int impl = ompt_mutex_impl_none; const char *impl_name; steps = 0; while (ompt_enumerate_mutex_impls(impl, &impl, &impl_name) && diff --git a/runtime/test/ompt/misc/api_calls_places.c b/runtime/test/ompt/misc/api_calls_places.c index ad338a7..3385c9c 100644 --- a/runtime/test/ompt/misc/api_calls_places.c +++ b/runtime/test/ompt/misc/api_calls_places.c @@ -42,7 +42,7 @@ int main() { int omp_nums[omp_nums_size]; omp_get_partition_place_nums(omp_nums); print_list("omp_get_partition_place_nums", omp_nums_size, omp_nums); - int ompt_nums_size = ompt_get_partition_place_nums(0, NULL); + int ompt_nums_size = ompt_get_partition_place_nums(0, omp_nums); int ompt_nums[ompt_nums_size]; ompt_get_partition_place_nums(ompt_nums_size, ompt_nums); print_list("ompt_get_partition_place_nums", ompt_nums_size, ompt_nums); diff --git a/runtime/test/ompt/misc/control_tool.c b/runtime/test/ompt/misc/control_tool.c index 2c59666..7995614 100644 --- a/runtime/test/ompt/misc/control_tool.c +++ b/runtime/test/ompt/misc/control_tool.c @@ -1,6 +1,7 @@ // RUN: %libomp-compile-and-run | FileCheck %s // REQUIRES: ompt // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +// XFAIL: powerpc64le, ppc64le #define TEST_NEED_PRINT_FRAME_FROM_OUTLINED_FN #include "callback.h" #include <omp.h> @@ -22,7 +23,7 @@ int main() // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address({{.}})=[[EXIT_FRAME:0x[0-f]*]] // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER_FRAME:0x[0-f]*]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter=[[REENTER_FRAME]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_control_tool: command=3, modifier=1, arg=[[NULL]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]*]], current_task_frame.exit=[[EXIT_FRAME]], current_task_frame.reenter={{0x[0-f]*}} // CHECK-NEXT: {{^}}[[MASTER_ID]]: current_address={{.*}}[[RETURN_ADDRESS]] return 0; diff --git a/runtime/test/ompt/misc/control_tool_no_ompt_support.c b/runtime/test/ompt/misc/control_tool_no_ompt_support.c index ee64da0..23daf8b 100644 --- a/runtime/test/ompt/misc/control_tool_no_ompt_support.c +++ b/runtime/test/ompt/misc/control_tool_no_ompt_support.c @@ -1,4 +1,7 @@ // RUN: %libomp-compile-and-run + +// REQUIRES: openmp-5.0 + #include <omp.h> int main() diff --git a/runtime/test/ompt/misc/interoperability.cpp b/runtime/test/ompt/misc/interoperability.cpp index 102e6de..b07814e 100644 --- a/runtime/test/ompt/misc/interoperability.cpp +++ b/runtime/test/ompt/misc/interoperability.cpp @@ -3,7 +3,11 @@ #include <iostream> #include <thread> +#if !defined(__NetBSD__) #include <alloca.h> +#else +#include <cstdlib> +#endif #include "callback.h" #include "omp.h" diff --git a/runtime/test/ompt/parallel/nested.c b/runtime/test/ompt/parallel/nested.c index 035529c..d91597b 100644 --- a/runtime/test/ompt/parallel/nested.c +++ b/runtime/test/ompt/parallel/nested.c @@ -80,25 +80,25 @@ int main() // THREADS: {{^}}0: NULL_POINTER=[[NULL:.*$]] // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] - // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] + // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], requested_team_size=4, codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, invoker=[[PARALLEL_INVOKER]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID:[0-9]+]] // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[NESTED_EXIT:0x[0-f]+]] // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] - // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] - // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} + // THREADS: {{^}}[[MASTER_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // THREADS: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]] // THREADS-NOT: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end // explicit barrier // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], codeptr_ra=[[BARRIER_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} - // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] + // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=0x{{[0-f]+}} // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]] // THREADS: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[BARRIER_RETURN_ADDRESS]] // THREADS: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[NESTED_PARALLEL_ID]], task_id=[[NESTED_IMPLICIT_TASK_ID]], exit_frame=[[NESTED_EXIT]], reenter_frame=[[NULL]] diff --git a/runtime/test/ompt/parallel/nested_thread_num.c b/runtime/test/ompt/parallel/nested_thread_num.c index e952f80..f14f87a 100644 --- a/runtime/test/ompt/parallel/nested_thread_num.c +++ b/runtime/test/ompt/parallel/nested_thread_num.c @@ -80,7 +80,7 @@ int main() { // THREADS: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: // THREADS-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], // THREADS-SAME: parent_task_frame.exit=[[NULL]], -// THREADS-SAME: parent_task_frame.reenter=[[MAIN_REENTER]], +// THREADS-SAME: parent_task_frame.reenter=0x{{[0-f]+}}, // THREADS-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, // THREADS-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, // THREADS-SAME: invoker=[[PARALLEL_INVOKER:[0-9]+]] @@ -101,14 +101,14 @@ int main() { // THREADS: {{^}}[[MASTER_ID]]: task level 1: // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], // THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], -// THREADS-SAME: reenter_frame=[[MAIN_REENTER]] +// THREADS-SAME: reenter_frame=0x{{[0-f]+}} // THREADS: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // THREADS: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: // THREADS-SAME: parent_task_id=[[IMPLICIT_TASK_ID]], // THREADS-SAME: parent_task_frame.exit=[[EXIT]], -// THREADS-SAME: parent_task_frame.reenter=[[REENTER]], +// THREADS-SAME: parent_task_frame.reenter=0x{{[0-f]+}}, // THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID:[0-9]+]], // THREADS-SAME: requested_team_size=2, // THREADS-SAME: codeptr_ra=[[NESTED_RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}}, @@ -129,12 +129,12 @@ int main() { // THREADS: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], // THREADS-SAME: task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], -// THREADS-SAME: reenter_frame=[[REENTER]] +// THREADS-SAME: reenter_frame=0x{{[0-f]+}} // THREADS: {{^}}[[MASTER_ID]]: task level 2: // THREADS-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]], // THREADS-SAME: task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], -// THREADS-SAME: reenter_frame=[[MAIN_REENTER]] +// THREADS-SAME: reenter_frame=0x{{[0-f]+}} // THREADS: __builtin_frame_address(0)=[[NESTED_REENTER:0x[0-f]+]] @@ -149,7 +149,7 @@ int main() { // THREADS: {{^}}[[MASTER_ID]]: task level 0: // THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], // THREADS-SAME: task_id=[[NESTED_IMPLICIT_TASK_ID]], -// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=[[NESTED_REENTER]] +// THREADS-SAME: exit_frame=[[NESTED_EXIT]], reenter_frame=0x{{[0-f]+}} // THREADS: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: // THREADS-SAME: parallel_id=[[NESTED_PARALLEL_ID]], diff --git a/runtime/test/ompt/parallel/nested_threadnum.c b/runtime/test/ompt/parallel/nested_threadnum.c new file mode 100644 index 0000000..a248530 --- /dev/null +++ b/runtime/test/ompt/parallel/nested_threadnum.c @@ -0,0 +1,62 @@ +// RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s +// REQUIRES: ompt +#include <omp.h> +#include "callback.h" + +int main() { + omp_set_nested(1); +#pragma omp parallel num_threads(2) + { +#pragma omp barrier +#pragma omp parallel num_threads(2) + { print_frame(0); } + } + + // CHECK: 0: NULL_POINTER=[[NULL:.*$]] + + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM1:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: + // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1]] + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM1:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_end + // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM1]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + + // CHECK: {{^}}[[WORKER_ID1:[0-9]+]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[PARALLEL_ID]] + // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM2:[0-9]+]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_parallel_begin: + // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2:[0-9]+]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_begin: + // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2]] + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM2:[0-9]+]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_end + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM2]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_parallel_end: + // CHECK-SAME: parallel_id=[[INNER_PARALLEL_ID2]] + // CHECK: {{^}}[[WORKER_ID1]]: ompt_event_implicit_task_end + // CHECK-SAME: thread_num=[[OUTER_THREAD_NUM2]] + + // CHECK: {{^}}[[WORKER_ID2:[0-9]+]]: ompt_event_implicit_task_begin: + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM3:[0-9]+]] + // CHECK: {{^}}[[WORKER_ID2]]: ompt_event_implicit_task_end + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM3]] + + // CHECK: {{^}}[[WORKER_ID3:[0-9]+]]: ompt_event_implicit_task_begin: + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM4:[0-9]+]] + // CHECK: {{^}}[[WORKER_ID3]]: ompt_event_implicit_task_end + // CHECK-SAME: thread_num=[[INNER_THREAD_NUM4]] + + return 0; +} diff --git a/runtime/test/ompt/synchronization/taskwait.c b/runtime/test/ompt/synchronization/taskwait.c index c431024..cb30f3b 100644 --- a/runtime/test/ompt/synchronization/taskwait.c +++ b/runtime/test/ompt/synchronization/taskwait.c @@ -1,6 +1,7 @@ // RUN: %libomp-compile-and-run | %sort-threads | FileCheck %s // REQUIRES: ompt // UNSUPPORTED: gcc-4, gcc-5, gcc-6, gcc-7 +// XFAIL: powerpc64le, ppc64le #include "callback.h" #include <omp.h> diff --git a/runtime/test/ompt/tasks/explicit_task.c b/runtime/test/ompt/tasks/explicit_task.c index 01fb3f8..a986c48 100644 --- a/runtime/test/ompt/tasks/explicit_task.c +++ b/runtime/test/ompt/tasks/explicit_task.c @@ -52,22 +52,22 @@ int main() // make sure initial data pointers are null // CHECK-NOT: 0: new_task_data initially not null - // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // CHECK--doesnotwork: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // <- ompt_event_task_create would be expected here - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} // CHECK: {{^}}[[MASTER_ID]]: fuzzy_address={{.*}}[[RETURN_ADDRESS]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // explicit barrier after master // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // implicit barrier parallel // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] @@ -78,16 +78,16 @@ int main() // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} // this is expected to come earlier and at MASTER: // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] - // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] diff --git a/runtime/test/ompt/tasks/serialized.c b/runtime/test/ompt/tasks/serialized.c index 12a0281..b1ef45d 100644 --- a/runtime/test/ompt/tasks/serialized.c +++ b/runtime/test/ompt/tasks/serialized.c @@ -58,7 +58,7 @@ int main() { // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin // CHECK-SAME: parent_task_id=[[PARENT_TASK_ID:[0-9]+]] // CHECK-SAME: parent_task_frame.exit=[[NULL]] - // CHECK-SAME: parent_task_frame.reenter=[[MAIN_REENTER]] + // CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}} // CHECK-SAME: parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2 // CHECK-SAME: codeptr_ra=0x{{[0-f]+}}, invoker={{[0-9]+}} @@ -76,13 +76,13 @@ int main() { // CHECK: {{^}}[[MASTER_ID]]: task level 1 // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]] // CHECK-SAME: task_id=[[PARENT_TASK_ID]], - // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create // CHECK-SAME: parent_task_id=[[IMPLICIT_TASK_ID]] // CHECK-SAME: parent_task_frame.exit=[[EXIT]] - // CHECK-SAME: parent_task_frame.reenter=[[REENTER]] + // CHECK-SAME: parent_task_frame.reenter=0x{{[0-f]+}} // CHECK-SAME: new_task_id=[[TASK_ID:[0-9]+]] // CHECK-SAME: codeptr_ra=[[RETURN_ADDRESS:0x[0-f]+]]{{[0-f][0-f]}} @@ -96,12 +96,12 @@ int main() { // CHECK: {{^}}[[MASTER_ID]]: task level 1 // CHECK-SAME: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK-SAME: exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: task level 2 // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] // CHECK-SAME: task_id=[[PARENT_TASK_ID]] - // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_schedule // CHECK-SAME: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] @@ -135,7 +135,7 @@ int main() { // CHECK: {{^}}[[THREAD_ID]]: task level 1 // CHECK-SAME: parallel_id=[[IMPLICIT_PARALLEL_ID]] // CHECK-SAME: task_id=[[PARENT_TASK_ID]] - // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK-SAME: exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)={{0x[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin diff --git a/runtime/test/ompt/tasks/task_in_joinbarrier.c b/runtime/test/ompt/tasks/task_in_joinbarrier.c index 25b57a9..8228add 100644 --- a/runtime/test/ompt/tasks/task_in_joinbarrier.c +++ b/runtime/test/ompt/tasks/task_in_joinbarrier.c @@ -50,16 +50,16 @@ int main() // CHECK-NOT: 0: new_task_data initially not null // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // <- ompt_event_task_create would be expected here - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // implicit barrier parallel // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] @@ -70,7 +70,7 @@ int main() // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // implicit barrier parallel // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] @@ -79,7 +79,7 @@ int main() // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id={{[0-9]+}}, task_id=[[IMPLICIT_TASK_ID]] diff --git a/runtime/test/ompt/tasks/untied_task.c b/runtime/test/ompt/tasks/untied_task.c index e68fa26..4ee3f11 100644 --- a/runtime/test/ompt/tasks/untied_task.c +++ b/runtime/test/ompt/tasks/untied_task.c @@ -60,20 +60,20 @@ int main() // CHECK-NOT: 0: new_task_data initially not null // CHECK: {{^}}[[MASTER_ID:[0-9]+]]: __builtin_frame_address(0)=[[MAIN_REENTER:0x[0-f]+]] - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=[[MAIN_REENTER]], parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_parallel_begin: parent_task_id=[[PARENT_TASK_ID:[0-9]+]], parent_task_frame.exit=[[NULL]], parent_task_frame.reenter=0x{{[0-f]+}}, parallel_id=[[PARALLEL_ID:[0-9]+]], requested_team_size=2, codeptr_ra=0x{{[0-f]+}}, invoker=[[PARALLEL_INVOKER:[0-9]+]] // nested parallel masters // CHECK: {{^}}[[MASTER_ID]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID:[0-9]+]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // <- ompt_event_task_create would be expected here - // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=[[REENTER]], new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] + // CHECK: {{^}}[[MASTER_ID]]: ompt_event_task_create: parent_task_id=[[IMPLICIT_TASK_ID]], parent_task_frame.exit=[[EXIT]], parent_task_frame.reenter=0x{{[0-f]+}}, new_task_id=[[TASK_ID:[0-9]+]], codeptr_ra=[[TASK_FUNCTION:0x[0-f]+]] // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] // explicit barrier after master // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[MASTER_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] // implicit barrier parallel // CHECK: {{^}}[[MASTER_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] @@ -84,16 +84,16 @@ int main() // CHECK: {{^}}[[THREAD_ID:[0-9]+]]: ompt_event_implicit_task_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID:[0-9]+]] // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address({{.}})=[[EXIT:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(0)=[[REENTER:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_begin: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] - // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} // this is expected to come earlier and at MASTER: // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[IMPLICIT_TASK_ID]], second_task_id=[[TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: __builtin_frame_address(1)=[[TASK_EXIT:0x[0-f]+]] // CHECK: {{^}}[[THREAD_ID]]: task level 0: parallel_id=[[PARALLEL_ID]], task_id=[[TASK_ID]], exit_frame=[[TASK_EXIT]], reenter_frame=[[NULL]] - // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=[[REENTER]] - // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=[[MAIN_REENTER]] + // CHECK: {{^}}[[THREAD_ID]]: task level 1: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]], exit_frame=[[EXIT]], reenter_frame=0x{{[0-f]+}} + // CHECK: {{^}}[[THREAD_ID]]: task level 2: parallel_id=[[IMPLICIT_PARALLEL_ID]], task_id=[[PARENT_TASK_ID]], exit_frame=[[NULL]], reenter_frame=0x{{[0-f]+}} // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_schedule: first_task_id=[[TASK_ID]], second_task_id=[[IMPLICIT_TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_task_end: task_id=[[TASK_ID]] // CHECK: {{^}}[[THREAD_ID]]: ompt_event_barrier_end: parallel_id=[[PARALLEL_ID]], task_id=[[IMPLICIT_TASK_ID]] diff --git a/runtime/test/parallel/omp_nested.c b/runtime/test/parallel/omp_nested.c index 8b78088..d2d5b08 100644 --- a/runtime/test/parallel/omp_nested.c +++ b/runtime/test/parallel/omp_nested.c @@ -12,6 +12,8 @@ int test_omp_nested() #ifdef _OPENMP if (omp_get_max_threads() > 4) omp_set_num_threads(4); + if (omp_get_max_threads() < 2) + omp_set_num_threads(2); #endif int counter = 0; diff --git a/runtime/test/tasking/bug_nested_proxy_task.c b/runtime/test/tasking/bug_nested_proxy_task.c index 6c00822..84e4dfd 100644 --- a/runtime/test/tasking/bug_nested_proxy_task.c +++ b/runtime/test/tasking/bug_nested_proxy_task.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile -lpthread && %libomp-run +// REQUIRES: openmp-4.5 // The runtime currently does not get dependency information from GCC. // UNSUPPORTED: gcc diff --git a/runtime/test/tasking/bug_proxy_task_dep_waiting.c b/runtime/test/tasking/bug_proxy_task_dep_waiting.c index e6dd895..fe8f18d 100644 --- a/runtime/test/tasking/bug_proxy_task_dep_waiting.c +++ b/runtime/test/tasking/bug_proxy_task_dep_waiting.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile -lpthread && %libomp-run +// REQUIRES: openmp-4.5 // The runtime currently does not get dependency information from GCC. // UNSUPPORTED: gcc diff --git a/runtime/test/tasking/kmp_task_reduction_nest.cpp b/runtime/test/tasking/kmp_task_reduction_nest.cpp index 63dffe4..019a9fe 100644 --- a/runtime/test/tasking/kmp_task_reduction_nest.cpp +++ b/runtime/test/tasking/kmp_task_reduction_nest.cpp @@ -1,5 +1,6 @@ // RUN: %libomp-cxx-compile-and-run // RUN: %libomp-cxx-compile -DFLG=1 && %libomp-run +// REQUIRES: openmp-5.0 // GCC-5 is needed for OpenMP 4.0 support (taskgroup) // XFAIL: gcc-4 #include <cstdio> diff --git a/runtime/test/tasking/kmp_taskloop.c b/runtime/test/tasking/kmp_taskloop.c index 4b13793..359f7a4 100644 --- a/runtime/test/tasking/kmp_taskloop.c +++ b/runtime/test/tasking/kmp_taskloop.c @@ -1,5 +1,6 @@ // RUN: %libomp-compile-and-run // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run +// REQUIRES: openmp-4.5 #include <stdio.h> #include <omp.h> #include "omp_my_sleep.h" diff --git a/runtime/test/tasking/omp_task.c b/runtime/test/tasking/omp_task.c index c534abe..5703225 100644 --- a/runtime/test/tasking/omp_task.c +++ b/runtime/test/tasking/omp_task.c @@ -43,6 +43,9 @@ int main() int i; int num_failed=0; + if (omp_get_max_threads() < 2) + omp_set_num_threads(8); + for(i = 0; i < REPETITIONS; i++) { if(!test_omp_task()) { num_failed++; diff --git a/runtime/test/tasking/omp_task_priority.c b/runtime/test/tasking/omp_task_priority.c index 7b62360..6acb4a8 100644 --- a/runtime/test/tasking/omp_task_priority.c +++ b/runtime/test/tasking/omp_task_priority.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile && env OMP_MAX_TASK_PRIORITY=42 %libomp-run +// REQUIRES: openmp-4.5 // Test OMP 4.5 task priorities // Currently only API function and envirable parsing implemented. // Test environment sets envirable: OMP_MAX_TASK_PRIORITY=42 as tested below. diff --git a/runtime/test/tasking/omp_taskloop_grainsize.c b/runtime/test/tasking/omp_taskloop_grainsize.c index 0833073..c5756a4 100644 --- a/runtime/test/tasking/omp_taskloop_grainsize.c +++ b/runtime/test/tasking/omp_taskloop_grainsize.c @@ -1,5 +1,6 @@ // RUN: %libomp-compile-and-run // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run +// REQUIRES: openmp-4.5 // These compilers don't support the taskloop construct // UNSUPPORTED: gcc-4, gcc-5, icc-16 diff --git a/runtime/test/tasking/omp_taskloop_num_tasks.c b/runtime/test/tasking/omp_taskloop_num_tasks.c index 7c3c704..75efea6 100644 --- a/runtime/test/tasking/omp_taskloop_num_tasks.c +++ b/runtime/test/tasking/omp_taskloop_num_tasks.c @@ -1,5 +1,6 @@ // RUN: %libomp-compile-and-run // RUN: %libomp-compile && env KMP_TASKLOOP_MIN_TASKS=1 %libomp-run +// REQUIRES: openmp-4.5 // These compilers don't support the taskloop construct // UNSUPPORTED: gcc-4, gcc-5, icc-16 diff --git a/runtime/test/tasking/omp_taskyield.c b/runtime/test/tasking/omp_taskyield.c index 5bb6984..7f85413 100644 --- a/runtime/test/tasking/omp_taskyield.c +++ b/runtime/test/tasking/omp_taskyield.c @@ -49,6 +49,9 @@ int main() int i; int num_failed=0; + if (omp_get_max_threads() < 2) + omp_set_num_threads(8); + for(i = 0; i < REPETITIONS; i++) { if(!test_omp_taskyield()) { num_failed++; diff --git a/runtime/test/worksharing/for/kmp_doacross_check.c b/runtime/test/worksharing/for/kmp_doacross_check.c index 59b61e3..4eea328 100644 --- a/runtime/test/worksharing/for/kmp_doacross_check.c +++ b/runtime/test/worksharing/for/kmp_doacross_check.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 // UNSUPPORTED: gcc // This test is incompatible with gcc because of the explicit call to // __kmpc_doacross_fini(). gcc relies on an implicit call to this function diff --git a/runtime/test/worksharing/for/kmp_sch_simd_guided.c b/runtime/test/worksharing/for/kmp_sch_simd_guided.c index 5c6f94b..6cf5d2f 100644 --- a/runtime/test/worksharing/for/kmp_sch_simd_guided.c +++ b/runtime/test/worksharing/for/kmp_sch_simd_guided.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 /* Test for the 'schedule(simd:guided)' clause. Compiler needs to generate a dynamic dispatching and pass the schedule diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c index bb538d1..8b5f34a 100644 --- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c +++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_api.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 // The test checks schedule(simd:runtime) // in combination with omp_set_schedule() @@ -66,6 +67,7 @@ run_loop( int ub; // Chunk upper bound. int st; // Chunk stride. int rc; + int nthreads = omp_get_num_threads(); int tid = omp_get_thread_num(); int gtid = __kmpc_global_thread_num(&loc); int last; @@ -134,7 +136,7 @@ run_loop( printf("Error with iter %d, %d, err %d\n", cur, max, ++err); // Update maximum for the next chunk. if (last) { - if (!no_chunk && cur > ch) + if (!no_chunk && cur > ch && nthreads > 1) printf("Error: too big last chunk %d (%d), tid %d, err %d\n", (int)cur, ch, tid, ++err); } else { diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c index d137831..142e9b3 100644 --- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c +++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_guided.c @@ -6,6 +6,7 @@ // RUN: env OMP_SCHEDULE=dynamic,1 %libomp-run 1 // RUN: env OMP_SCHEDULE=dynamic,2 %libomp-run 2 // RUN: env OMP_SCHEDULE=auto %libomp-run +// REQUIRES: openmp-4.5 // The test checks schedule(simd:runtime) // in combination with OMP_SCHEDULE=guided[,chunk] @@ -74,6 +75,7 @@ run_loop( int ub; // Chunk upper bound. int st; // Chunk stride. int rc; + int nthreads = omp_get_num_threads(); int tid = omp_get_thread_num(); int gtid = __kmpc_global_thread_num(&loc); int last; @@ -144,7 +146,7 @@ run_loop( if (!last && cur % ch) printf("Error with chunk %d, %d, ch %d, tid %d, err %d\n", chunk, (int)cur, ch, tid, ++err); - if (last && !no_chunk && cur > ch) + if (last && !no_chunk && cur > ch && nthreads > 1) printf("Error: too big last chunk %d (%d), tid %d, err %d\n", (int)cur, ch, tid, ++err); if (cur < max) diff --git a/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c b/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c index 4cb15d6..e2c878f 100644 --- a/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c +++ b/runtime/test/worksharing/for/kmp_sch_simd_runtime_static.c @@ -1,5 +1,6 @@ // RUN: %libomp-compile && %libomp-run // RUN: %libomp-run 1 && %libomp-run 2 +// REQUIRES: openmp-4.5 // The test checks schedule(simd:runtime) // in combination with OMP_SCHEDULE=static[,chunk] @@ -67,6 +68,7 @@ run_loop( int ub; // Chunk upper bound. int st; // Chunk stride. int rc; + int nthreads = omp_get_num_threads(); int tid = omp_get_thread_num(); int gtid = __kmpc_global_thread_num(&loc); int last; @@ -135,7 +137,7 @@ run_loop( printf("Error with iter %d, %d, err %d\n", cur, max, ++err); // Update maximum for the next chunk. if (last) { - if (!no_chunk && cur > ch) + if (!no_chunk && cur > ch && nthreads > 1) printf("Error: too big last chunk %d (%d), tid %d, err %d\n", (int)cur, ch, tid, ++err); } else { diff --git a/runtime/test/worksharing/for/omp_doacross.c b/runtime/test/worksharing/for/omp_doacross.c index 4187112..32e8e82 100644 --- a/runtime/test/worksharing/for/omp_doacross.c +++ b/runtime/test/worksharing/for/omp_doacross.c @@ -1,4 +1,5 @@ // RUN: %libomp-compile-and-run +// REQUIRES: openmp-4.5 // XFAIL: gcc-4, gcc-5, clang-3.7, clang-3.8, icc-15, icc-16 #include <stdio.h> #include <stdlib.h> @@ -51,6 +52,8 @@ int test_doacross() { int main(int argc, char **argv) { int i; int num_failed = 0; + if (omp_get_max_threads() < 2) + omp_set_num_threads(4); for (i = 0; i < REPETITIONS; i++) { if (!test_doacross()) { num_failed++; |