13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
18 #include "kmp_stats.h"
20 #include "ompt-specific.h"
55 volatile P *get() {
return loc; }
56 void *get_void_p() {
return RCAST(
void *, CCAST(P *, loc)); }
57 void set(
volatile P *new_loc) { loc = new_loc; }
59 P load() {
return *loc; }
60 void store(P val) { *loc = val; }
77 std::atomic<P> *
get() {
return loc; }
85 void set(std::atomic<P> *new_loc) {
loc = new_loc; }
93 P
load() {
return loc->load(std::memory_order_acquire); }
97 void store(P val) {
loc->store(val, std::memory_order_release); }
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123 ompt_state_t ompt_state,
125 int ds_tid = this_thr->th.th_info.ds.ds_tid;
126 if (ompt_state == ompt_state_wait_barrier_implicit) {
127 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
129 void *codeptr = NULL;
130 if (ompt_enabled.ompt_callback_sync_region_wait) {
131 ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
135 if (ompt_enabled.ompt_callback_sync_region) {
136 ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137 ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
141 if (!KMP_MASTER_TID(ds_tid)) {
142 if (ompt_enabled.ompt_callback_implicit_task) {
143 int flags = this_thr->th.ompt_thread_info.parallel_flags;
144 flags = (flags & ompt_parallel_league) ? ompt_task_initial
145 : ompt_task_implicit;
146 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
147 ompt_scope_end, NULL, tId, 0, ds_tid, flags);
150 this_thr->th.ompt_thread_info.state = ompt_state_idle;
152 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
163 template <
class C,
int final_spin,
bool cancellable =
false,
164 bool sleepable =
true>
166 __kmp_wait_template(kmp_info_t *this_thr,
167 C *flag USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
168 #if USE_ITT_BUILD && USE_ITT_NOTIFY
169 volatile void *spin = flag->get();
173 int tasks_completed = FALSE;
176 kmp_uint64 poll_count;
177 kmp_uint64 hibernate_goal;
179 kmp_uint32 hibernate;
182 KMP_FSYNC_SPIN_INIT(spin, NULL);
183 if (flag->done_check()) {
184 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
187 th_gtid = this_thr->th.th_info.ds.ds_gtid;
189 kmp_team_t *team = this_thr->th.th_team;
190 if (team && team->t.t_cancel_request == cancel_parallel)
195 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
198 (
"__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
199 #if KMP_STATS_ENABLED
254 ompt_state_t ompt_entry_state;
256 if (ompt_enabled.enabled) {
257 ompt_entry_state = this_thr->th.ompt_thread_info.state;
258 if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
259 KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
260 ompt_lw_taskteam_t *team =
261 this_thr->th.th_team->t.ompt_serialized_team_info;
263 tId = &(team->ompt_task_info.task_data);
265 tId = OMPT_CUR_TASK_DATA(this_thr);
268 tId = &(this_thr->th.ompt_thread_info.task_data);
270 if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
271 this_thr->th.th_task_team == NULL)) {
273 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
278 KMP_INIT_YIELD(spins);
280 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
281 __kmp_pause_status == kmp_soft_paused) {
285 #ifdef KMP_ADJUST_BLOCKTIME
286 if (__kmp_pause_status == kmp_soft_paused ||
287 (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
292 hibernate = this_thr->th.th_team_bt_intervals;
294 hibernate = this_thr->th.th_team_bt_intervals;
305 hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
306 KF_TRACE(20, (
"__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
307 th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
308 hibernate - __kmp_global.g.g_time.dt.t_value));
310 if (__kmp_pause_status == kmp_soft_paused) {
312 hibernate_goal = KMP_NOW();
314 hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
316 #endif // KMP_USE_MONITOR
319 oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
323 while (flag->notdone_check()) {
324 kmp_task_team_t *task_team = NULL;
325 if (__kmp_tasking_mode != tskm_immediate_exec) {
326 task_team = this_thr->th.th_task_team;
334 if (task_team != NULL) {
335 if (TCR_SYNC_4(task_team->tt.tt_active)) {
336 if (KMP_TASKING_ENABLED(task_team))
338 this_thr, th_gtid, final_spin,
339 &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
341 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
343 KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
346 if (final_spin && ompt_enabled.enabled)
347 __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
349 this_thr->th.th_task_team = NULL;
350 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
353 this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
357 KMP_FSYNC_SPIN_PREPARE(CCAST(
void *, spin));
358 if (TCR_4(__kmp_global.g.g_done)) {
359 if (__kmp_global.g.g_abort)
360 __kmp_abort_thread();
366 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
368 #if KMP_STATS_ENABLED
371 if (this_thr->th.th_stats->isIdle() &&
372 KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
373 KMP_SET_THREAD_STATE(IDLE);
374 KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
379 kmp_team_t *team = this_thr->th.th_team;
380 if (team && team->t.t_cancel_request == cancel_parallel)
385 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
386 __kmp_pause_status != kmp_soft_paused)
390 if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
395 if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
398 if (KMP_BLOCKING(hibernate_goal, poll_count++))
406 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
407 __kmp_pause_status != kmp_soft_paused)
410 KF_TRACE(50, (
"__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
414 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
416 flag->suspend(th_gtid);
419 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
true);
422 if (TCR_4(__kmp_global.g.g_done)) {
423 if (__kmp_global.g.g_abort)
424 __kmp_abort_thread();
426 }
else if (__kmp_tasking_mode != tskm_immediate_exec &&
427 this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
428 this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
434 ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
435 if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
438 __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
439 ompt_exit_state = this_thr->th.ompt_thread_info.state;
442 if (ompt_exit_state == ompt_state_idle) {
443 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
447 #if KMP_STATS_ENABLED
449 if (KMP_GET_THREAD_STATE() == IDLE) {
450 KMP_POP_PARTITIONED_TIMER();
451 KMP_SET_THREAD_STATE(thread_state);
452 this_thr->th.th_stats->resetIdleFlag();
458 KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking,
false);
460 KMP_FSYNC_SPIN_ACQUIRED(CCAST(
void *, spin));
462 kmp_team_t *team = this_thr->th.th_team;
463 if (team && team->t.t_cancel_request == cancel_parallel) {
464 if (tasks_completed) {
467 kmp_task_team_t *task_team = this_thr->th.th_task_team;
468 std::atomic<kmp_int32> *unfinished_threads =
469 &(task_team->tt.tt_unfinished_threads);
470 KMP_ATOMIC_INC(unfinished_threads);
482 template <
class C>
static inline void __kmp_release_template(C *flag) {
484 int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
486 KF_TRACE(20, (
"__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
487 KMP_DEBUG_ASSERT(flag->get());
488 KMP_FSYNC_RELEASING(flag->get_void_p());
490 flag->internal_release();
492 KF_TRACE(100, (
"__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
495 if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
498 if (flag->is_any_sleeping()) {
499 for (
unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
501 kmp_info_t *waiter = flag->get_waiter(i);
503 int wait_gtid = waiter->th.th_info.ds.ds_gtid;
505 KF_TRACE(50, (
"__kmp_release: T#%d waking up thread T#%d since sleep "
507 gtid, wait_gtid, flag->get()));
508 flag->resume(wait_gtid);
515 template <
typename FlagType>
struct flag_traits {};
517 template <>
struct flag_traits<kmp_uint32> {
518 typedef kmp_uint32 flag_t;
520 static inline flag_t tcr(flag_t f) {
return TCR_4(f); }
521 static inline flag_t test_then_add4(
volatile flag_t *f) {
522 return KMP_TEST_THEN_ADD4_32(RCAST(
volatile kmp_int32 *, f));
524 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
525 return KMP_TEST_THEN_OR32(f, v);
527 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
528 return KMP_TEST_THEN_AND32(f, v);
532 template <>
struct flag_traits<kmp_uint64> {
533 typedef kmp_uint64 flag_t;
535 static inline flag_t tcr(flag_t f) {
return TCR_8(f); }
536 static inline flag_t test_then_add4(
volatile flag_t *f) {
537 return KMP_TEST_THEN_ADD4_64(RCAST(
volatile kmp_int64 *, f));
539 static inline flag_t test_then_or(
volatile flag_t *f, flag_t v) {
540 return KMP_TEST_THEN_OR64(f, v);
542 static inline flag_t test_then_and(
volatile flag_t *f, flag_t v) {
543 return KMP_TEST_THEN_AND64(f, v);
548 template <
typename FlagType>
550 typedef flag_traits<FlagType> traits_type;
558 kmp_basic_flag_native(
volatile FlagType *p)
559 :
kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
560 kmp_basic_flag_native(
volatile FlagType *p, kmp_info_t *thr)
561 :
kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
562 waiting_threads[0] = thr;
564 kmp_basic_flag_native(
volatile FlagType *p, FlagType c)
566 num_waiting_threads(0) {}
571 kmp_info_t *get_waiter(kmp_uint32 i) {
572 KMP_DEBUG_ASSERT(i < num_waiting_threads);
573 return waiting_threads[i];
578 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
584 void set_waiter(kmp_info_t *thr) {
585 waiting_threads[0] = thr;
586 num_waiting_threads = 1;
591 bool done_check() {
return traits_type::tcr(*(this->get())) == checker; }
596 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
604 bool notdone_check() {
return traits_type::tcr(*(this->get())) != checker; }
609 void internal_release() {
610 (void)traits_type::test_then_add4((
volatile FlagType *)this->get());
617 FlagType set_sleeping() {
618 return traits_type::test_then_or((
volatile FlagType *)this->get(),
619 KMP_BARRIER_SLEEP_STATE);
626 FlagType unset_sleeping() {
627 return traits_type::test_then_and((
volatile FlagType *)this->get(),
628 ~KMP_BARRIER_SLEEP_STATE);
634 bool is_sleeping_val(FlagType old_loc) {
635 return old_loc & KMP_BARRIER_SLEEP_STATE;
640 bool is_sleeping() {
return is_sleeping_val(*(this->get())); }
641 bool is_any_sleeping() {
return is_sleeping_val(*(this->get())); }
642 kmp_uint8 *get_stolen() {
return NULL; }
643 enum barrier_type get_bt() {
return bs_last_barrier; }
646 template <
typename FlagType>
class kmp_basic_flag :
public kmp_flag<FlagType> {
647 typedef flag_traits<FlagType> traits_type;
655 kmp_basic_flag(std::atomic<FlagType> *p)
656 :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
657 kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
658 :
kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
659 waiting_threads[0] = thr;
661 kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
662 :
kmp_flag<FlagType>(p, traits_type::t), checker(c),
663 num_waiting_threads(0) {}
668 kmp_info_t *get_waiter(kmp_uint32 i) {
669 KMP_DEBUG_ASSERT(i < num_waiting_threads);
670 return waiting_threads[i];
675 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
681 void set_waiter(kmp_info_t *thr) {
682 waiting_threads[0] = thr;
683 num_waiting_threads = 1;
688 bool done_check() {
return this->
load() == checker; }
693 bool done_check_val(FlagType old_loc) {
return old_loc == checker; }
701 bool notdone_check() {
return this->
load() != checker; }
706 void internal_release() { KMP_ATOMIC_ADD(this->
get(), 4); }
712 FlagType set_sleeping() {
713 return KMP_ATOMIC_OR(this->
get(), KMP_BARRIER_SLEEP_STATE);
720 FlagType unset_sleeping() {
721 return KMP_ATOMIC_AND(this->
get(), ~KMP_BARRIER_SLEEP_STATE);
727 bool is_sleeping_val(FlagType old_loc) {
728 return old_loc & KMP_BARRIER_SLEEP_STATE;
733 bool is_sleeping() {
return is_sleeping_val(this->
load()); }
734 bool is_any_sleeping() {
return is_sleeping_val(this->
load()); }
735 kmp_uint8 *get_stolen() {
return NULL; }
736 enum barrier_type get_bt() {
return bs_last_barrier; }
739 class kmp_flag_32 :
public kmp_basic_flag<kmp_uint32> {
741 kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
742 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
743 : kmp_basic_flag<kmp_uint32>(p, thr) {}
744 kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
745 : kmp_basic_flag<kmp_uint32>(p, c) {}
746 void suspend(
int th_gtid) { __kmp_suspend_32(th_gtid,
this); }
747 void resume(
int th_gtid) { __kmp_resume_32(th_gtid,
this); }
748 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
749 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
750 kmp_int32 is_constrained) {
751 return __kmp_execute_tasks_32(
752 this_thr, gtid,
this, final_spin,
753 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
755 void wait(kmp_info_t *this_thr,
756 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
758 __kmp_wait_template<kmp_flag_32, TRUE>(
759 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
761 __kmp_wait_template<kmp_flag_32, FALSE>(
762 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
764 void release() { __kmp_release_template(
this); }
768 class kmp_flag_64 :
public kmp_basic_flag_native<kmp_uint64> {
770 kmp_flag_64(
volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
771 kmp_flag_64(
volatile kmp_uint64 *p, kmp_info_t *thr)
772 : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
773 kmp_flag_64(
volatile kmp_uint64 *p, kmp_uint64 c)
774 : kmp_basic_flag_native<kmp_uint64>(p, c) {}
775 void suspend(
int th_gtid) { __kmp_suspend_64(th_gtid,
this); }
776 void resume(
int th_gtid) { __kmp_resume_64(th_gtid,
this); }
777 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
778 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
779 kmp_int32 is_constrained) {
780 return __kmp_execute_tasks_64(
781 this_thr, gtid,
this, final_spin,
782 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
784 void wait(kmp_info_t *this_thr,
785 int final_spin USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
787 __kmp_wait_template<kmp_flag_64, TRUE>(
788 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
790 __kmp_wait_template<kmp_flag_64, FALSE>(
791 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
793 bool wait_cancellable_nosleep(kmp_info_t *this_thr,
795 USE_ITT_BUILD_ARG(
void *itt_sync_obj)) {
798 retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
799 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
801 retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
802 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
805 void release() { __kmp_release_template(
this); }
812 kmp_info_t *waiting_threads[1];
813 kmp_uint32 num_waiting_threads;
817 enum barrier_type bt;
818 kmp_info_t *this_thr;
824 unsigned char &byteref(
volatile kmp_uint64 *loc,
size_t offset) {
825 return (RCAST(
unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
829 kmp_flag_oncore(
volatile kmp_uint64 *p)
831 flag_switch(false) {}
832 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint32 idx)
834 offset(idx), flag_switch(false) {}
835 kmp_flag_oncore(
volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
836 enum barrier_type bar_t,
837 kmp_info_t *thr USE_ITT_BUILD_ARG(
void *itt))
839 num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
840 this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
841 kmp_info_t *get_waiter(kmp_uint32 i) {
842 KMP_DEBUG_ASSERT(i < num_waiting_threads);
843 return waiting_threads[i];
845 kmp_uint32 get_num_waiters() {
return num_waiting_threads; }
846 void set_waiter(kmp_info_t *thr) {
847 waiting_threads[0] = thr;
848 num_waiting_threads = 1;
850 bool done_check_val(kmp_uint64 old_loc) {
851 return byteref(&old_loc, offset) == checker;
853 bool done_check() {
return done_check_val(*get()); }
854 bool notdone_check() {
856 if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
858 if (byteref(get(), offset) != 1 && !flag_switch)
860 else if (flag_switch) {
861 this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
862 kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
863 (kmp_uint64)KMP_BARRIER_STATE_BUMP);
864 __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
868 void internal_release() {
870 if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
871 byteref(get(), offset) = 1;
874 byteref(&mask, offset) = 1;
875 KMP_TEST_THEN_OR64(get(), mask);
878 kmp_uint64 set_sleeping() {
879 return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
881 kmp_uint64 unset_sleeping() {
882 return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
884 bool is_sleeping_val(kmp_uint64 old_loc) {
885 return old_loc & KMP_BARRIER_SLEEP_STATE;
887 bool is_sleeping() {
return is_sleeping_val(*get()); }
888 bool is_any_sleeping() {
return is_sleeping_val(*get()); }
889 void wait(kmp_info_t *this_thr,
int final_spin) {
891 __kmp_wait_template<kmp_flag_oncore, TRUE>(
892 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
894 __kmp_wait_template<kmp_flag_oncore, FALSE>(
895 this_thr,
this USE_ITT_BUILD_ARG(itt_sync_obj));
897 void release() { __kmp_release_template(
this); }
898 void suspend(
int th_gtid) { __kmp_suspend_oncore(th_gtid,
this); }
899 void resume(
int th_gtid) { __kmp_resume_oncore(th_gtid,
this); }
900 int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid,
int final_spin,
901 int *thread_finished USE_ITT_BUILD_ARG(
void *itt_sync_obj),
902 kmp_int32 is_constrained) {
903 return __kmp_execute_tasks_oncore(
904 this_thr, gtid,
this, final_spin,
905 thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
907 kmp_uint8 *get_stolen() {
return NULL; }
908 enum barrier_type get_bt() {
return bt; }
914 static inline void __kmp_null_resume_wrapper(
int gtid,
volatile void *flag) {
918 switch (RCAST(kmp_flag_64 *, CCAST(
void *, flag))->get_type()) {
920 __kmp_resume_32(gtid, NULL);
923 __kmp_resume_64(gtid, NULL);
926 __kmp_resume_oncore(gtid, NULL);
935 #endif // KMP_WAIT_RELEASE_H