LLVM OpenMP* Runtime Library
kmp_wait_release.h
1 /*
2  * kmp_wait_release.h -- Wait/Release implementation
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_WAIT_RELEASE_H
14 #define KMP_WAIT_RELEASE_H
15 
16 #include "kmp.h"
17 #include "kmp_itt.h"
18 #include "kmp_stats.h"
19 #if OMPT_SUPPORT
20 #include "ompt-specific.h"
21 #endif
22 
39 enum flag_type {
43 };
44 
48 template <typename P> class kmp_flag_native {
49  volatile P *loc;
50  flag_type t;
51 
52 public:
53  typedef P flag_t;
54  kmp_flag_native(volatile P *p, flag_type ft) : loc(p), t(ft) {}
55  volatile P *get() { return loc; }
56  void *get_void_p() { return RCAST(void *, CCAST(P *, loc)); }
57  void set(volatile P *new_loc) { loc = new_loc; }
58  flag_type get_type() { return t; }
59  P load() { return *loc; }
60  void store(P val) { *loc = val; }
61 };
62 
66 template <typename P> class kmp_flag {
67  std::atomic<P>
68  *loc;
71 public:
72  typedef P flag_t;
73  kmp_flag(std::atomic<P> *p, flag_type ft) : loc(p), t(ft) {}
77  std::atomic<P> *get() { return loc; }
81  void *get_void_p() { return RCAST(void *, loc); }
85  void set(std::atomic<P> *new_loc) { loc = new_loc; }
89  flag_type get_type() { return t; }
93  P load() { return loc->load(std::memory_order_acquire); }
97  void store(P val) { loc->store(val, std::memory_order_release); }
98  // Derived classes must provide the following:
99  /*
100  kmp_info_t * get_waiter(kmp_uint32 i);
101  kmp_uint32 get_num_waiters();
102  bool done_check();
103  bool done_check_val(P old_loc);
104  bool notdone_check();
105  P internal_release();
106  void suspend(int th_gtid);
107  void resume(int th_gtid);
108  P set_sleeping();
109  P unset_sleeping();
110  bool is_sleeping();
111  bool is_any_sleeping();
112  bool is_sleeping_val(P old_loc);
113  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
114  int *thread_finished
115  USE_ITT_BUILD_ARG(void * itt_sync_obj), kmp_int32
116  is_constrained);
117  */
118 };
119 
120 #if OMPT_SUPPORT
121 OMPT_NOINLINE
122 static void __ompt_implicit_task_end(kmp_info_t *this_thr,
123  ompt_state_t ompt_state,
124  ompt_data_t *tId) {
125  int ds_tid = this_thr->th.th_info.ds.ds_tid;
126  if (ompt_state == ompt_state_wait_barrier_implicit) {
127  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
128 #if OMPT_OPTIONAL
129  void *codeptr = NULL;
130  if (ompt_enabled.ompt_callback_sync_region_wait) {
131  ompt_callbacks.ompt_callback(ompt_callback_sync_region_wait)(
132  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
133  codeptr);
134  }
135  if (ompt_enabled.ompt_callback_sync_region) {
136  ompt_callbacks.ompt_callback(ompt_callback_sync_region)(
137  ompt_sync_region_barrier_implicit, ompt_scope_end, NULL, tId,
138  codeptr);
139  }
140 #endif
141  if (!KMP_MASTER_TID(ds_tid)) {
142  if (ompt_enabled.ompt_callback_implicit_task) {
143  int flags = this_thr->th.ompt_thread_info.parallel_flags;
144  flags = (flags & ompt_parallel_league) ? ompt_task_initial
145  : ompt_task_implicit;
146  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
147  ompt_scope_end, NULL, tId, 0, ds_tid, flags);
148  }
149  // return to idle state
150  this_thr->th.ompt_thread_info.state = ompt_state_idle;
151  } else {
152  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
153  }
154  }
155 }
156 #endif
157 
158 /* Spin wait loop that first does pause/yield, then sleep. A thread that calls
159  __kmp_wait_* must make certain that another thread calls __kmp_release
160  to wake it back up to prevent deadlocks!
161 
162  NOTE: We may not belong to a team at this point. */
163 template <class C, int final_spin, bool cancellable = false,
164  bool sleepable = true>
165 static inline bool
166 __kmp_wait_template(kmp_info_t *this_thr,
167  C *flag USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
168 #if USE_ITT_BUILD && USE_ITT_NOTIFY
169  volatile void *spin = flag->get();
170 #endif
171  kmp_uint32 spins;
172  int th_gtid;
173  int tasks_completed = FALSE;
174  int oversubscribed;
175 #if !KMP_USE_MONITOR
176  kmp_uint64 poll_count;
177  kmp_uint64 hibernate_goal;
178 #else
179  kmp_uint32 hibernate;
180 #endif
181 
182  KMP_FSYNC_SPIN_INIT(spin, NULL);
183  if (flag->done_check()) {
184  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
185  return false;
186  }
187  th_gtid = this_thr->th.th_info.ds.ds_gtid;
188  if (cancellable) {
189  kmp_team_t *team = this_thr->th.th_team;
190  if (team && team->t.t_cancel_request == cancel_parallel)
191  return true;
192  }
193 #if KMP_OS_UNIX
194  if (final_spin)
195  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
196 #endif
197  KA_TRACE(20,
198  ("__kmp_wait_sleep: T#%d waiting for flag(%p)\n", th_gtid, flag));
199 #if KMP_STATS_ENABLED
200  stats_state_e thread_state = KMP_GET_THREAD_STATE();
201 #endif
202 
203 /* OMPT Behavior:
204 THIS function is called from
205  __kmp_barrier (2 times) (implicit or explicit barrier in parallel regions)
206  these have join / fork behavior
207 
208  In these cases, we don't change the state or trigger events in THIS
209 function.
210  Events are triggered in the calling code (__kmp_barrier):
211 
212  state := ompt_state_overhead
213  barrier-begin
214  barrier-wait-begin
215  state := ompt_state_wait_barrier
216  call join-barrier-implementation (finally arrive here)
217  {}
218  call fork-barrier-implementation (finally arrive here)
219  {}
220  state := ompt_state_overhead
221  barrier-wait-end
222  barrier-end
223  state := ompt_state_work_parallel
224 
225 
226  __kmp_fork_barrier (after thread creation, before executing implicit task)
227  call fork-barrier-implementation (finally arrive here)
228  {} // worker arrive here with state = ompt_state_idle
229 
230 
231  __kmp_join_barrier (implicit barrier at end of parallel region)
232  state := ompt_state_barrier_implicit
233  barrier-begin
234  barrier-wait-begin
235  call join-barrier-implementation (finally arrive here
236 final_spin=FALSE)
237  {
238  }
239  __kmp_fork_barrier (implicit barrier at end of parallel region)
240  call fork-barrier-implementation (finally arrive here final_spin=TRUE)
241 
242  Worker after task-team is finished:
243  barrier-wait-end
244  barrier-end
245  implicit-task-end
246  idle-begin
247  state := ompt_state_idle
248 
249  Before leaving, if state = ompt_state_idle
250  idle-end
251  state := ompt_state_overhead
252 */
253 #if OMPT_SUPPORT
254  ompt_state_t ompt_entry_state;
255  ompt_data_t *tId;
256  if (ompt_enabled.enabled) {
257  ompt_entry_state = this_thr->th.ompt_thread_info.state;
258  if (!final_spin || ompt_entry_state != ompt_state_wait_barrier_implicit ||
259  KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid)) {
260  ompt_lw_taskteam_t *team =
261  this_thr->th.th_team->t.ompt_serialized_team_info;
262  if (team) {
263  tId = &(team->ompt_task_info.task_data);
264  } else {
265  tId = OMPT_CUR_TASK_DATA(this_thr);
266  }
267  } else {
268  tId = &(this_thr->th.ompt_thread_info.task_data);
269  }
270  if (final_spin && (__kmp_tasking_mode == tskm_immediate_exec ||
271  this_thr->th.th_task_team == NULL)) {
272  // implicit task is done. Either no taskqueue, or task-team finished
273  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
274  }
275  }
276 #endif
277 
278  KMP_INIT_YIELD(spins); // Setup for waiting
279 
280  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME ||
281  __kmp_pause_status == kmp_soft_paused) {
282 #if KMP_USE_MONITOR
283 // The worker threads cannot rely on the team struct existing at this point.
284 // Use the bt values cached in the thread struct instead.
285 #ifdef KMP_ADJUST_BLOCKTIME
286  if (__kmp_pause_status == kmp_soft_paused ||
287  (__kmp_zero_bt && !this_thr->th.th_team_bt_set))
288  // Force immediate suspend if not set by user and more threads than
289  // available procs
290  hibernate = 0;
291  else
292  hibernate = this_thr->th.th_team_bt_intervals;
293 #else
294  hibernate = this_thr->th.th_team_bt_intervals;
295 #endif /* KMP_ADJUST_BLOCKTIME */
296 
297  /* If the blocktime is nonzero, we want to make sure that we spin wait for
298  the entirety of the specified #intervals, plus up to one interval more.
299  This increment make certain that this thread doesn't go to sleep too
300  soon. */
301  if (hibernate != 0)
302  hibernate++;
303 
304  // Add in the current time value.
305  hibernate += TCR_4(__kmp_global.g.g_time.dt.t_value);
306  KF_TRACE(20, ("__kmp_wait_sleep: T#%d now=%d, hibernate=%d, intervals=%d\n",
307  th_gtid, __kmp_global.g.g_time.dt.t_value, hibernate,
308  hibernate - __kmp_global.g.g_time.dt.t_value));
309 #else
310  if (__kmp_pause_status == kmp_soft_paused) {
311  // Force immediate suspend
312  hibernate_goal = KMP_NOW();
313  } else
314  hibernate_goal = KMP_NOW() + this_thr->th.th_team_bt_intervals;
315  poll_count = 0;
316 #endif // KMP_USE_MONITOR
317  }
318 
319  oversubscribed = (TCR_4(__kmp_nth) > __kmp_avail_proc);
320  KMP_MB();
321 
322  // Main wait spin loop
323  while (flag->notdone_check()) {
324  kmp_task_team_t *task_team = NULL;
325  if (__kmp_tasking_mode != tskm_immediate_exec) {
326  task_team = this_thr->th.th_task_team;
327  /* If the thread's task team pointer is NULL, it means one of 3 things:
328  1) A newly-created thread is first being released by
329  __kmp_fork_barrier(), and its task team has not been set up yet.
330  2) All tasks have been executed to completion.
331  3) Tasking is off for this region. This could be because we are in a
332  serialized region (perhaps the outer one), or else tasking was manually
333  disabled (KMP_TASKING=0). */
334  if (task_team != NULL) {
335  if (TCR_SYNC_4(task_team->tt.tt_active)) {
336  if (KMP_TASKING_ENABLED(task_team))
337  flag->execute_tasks(
338  this_thr, th_gtid, final_spin,
339  &tasks_completed USE_ITT_BUILD_ARG(itt_sync_obj), 0);
340  else
341  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
342  } else {
343  KMP_DEBUG_ASSERT(!KMP_MASTER_TID(this_thr->th.th_info.ds.ds_tid));
344 #if OMPT_SUPPORT
345  // task-team is done now, other cases should be catched above
346  if (final_spin && ompt_enabled.enabled)
347  __ompt_implicit_task_end(this_thr, ompt_entry_state, tId);
348 #endif
349  this_thr->th.th_task_team = NULL;
350  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
351  }
352  } else {
353  this_thr->th.th_reap_state = KMP_SAFE_TO_REAP;
354  } // if
355  } // if
356 
357  KMP_FSYNC_SPIN_PREPARE(CCAST(void *, spin));
358  if (TCR_4(__kmp_global.g.g_done)) {
359  if (__kmp_global.g.g_abort)
360  __kmp_abort_thread();
361  break;
362  }
363 
364  // If we are oversubscribed, or have waited a bit (and
365  // KMP_LIBRARY=throughput), then yield
366  KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
367 
368 #if KMP_STATS_ENABLED
369  // Check if thread has been signalled to idle state
370  // This indicates that the logical "join-barrier" has finished
371  if (this_thr->th.th_stats->isIdle() &&
372  KMP_GET_THREAD_STATE() == FORK_JOIN_BARRIER) {
373  KMP_SET_THREAD_STATE(IDLE);
374  KMP_PUSH_PARTITIONED_TIMER(OMP_idle);
375  }
376 #endif
377  // Check if the barrier surrounding this wait loop has been cancelled
378  if (cancellable) {
379  kmp_team_t *team = this_thr->th.th_team;
380  if (team && team->t.t_cancel_request == cancel_parallel)
381  break;
382  }
383 
384  // Don't suspend if KMP_BLOCKTIME is set to "infinite"
385  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
386  __kmp_pause_status != kmp_soft_paused)
387  continue;
388 
389  // Don't suspend if there is a likelihood of new tasks being spawned.
390  if ((task_team != NULL) && TCR_4(task_team->tt.tt_found_tasks))
391  continue;
392 
393 #if KMP_USE_MONITOR
394  // If we have waited a bit more, fall asleep
395  if (TCR_4(__kmp_global.g.g_time.dt.t_value) < hibernate)
396  continue;
397 #else
398  if (KMP_BLOCKING(hibernate_goal, poll_count++))
399  continue;
400 #endif
401  // Don't suspend if wait loop designated non-sleepable
402  // in template parameters
403  if (!sleepable)
404  continue;
405 
406  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
407  __kmp_pause_status != kmp_soft_paused)
408  continue;
409 
410  KF_TRACE(50, ("__kmp_wait_sleep: T#%d suspend time reached\n", th_gtid));
411 
412 #if KMP_OS_UNIX
413  if (final_spin)
414  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
415 #endif
416  flag->suspend(th_gtid);
417 #if KMP_OS_UNIX
418  if (final_spin)
419  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, true);
420 #endif
421 
422  if (TCR_4(__kmp_global.g.g_done)) {
423  if (__kmp_global.g.g_abort)
424  __kmp_abort_thread();
425  break;
426  } else if (__kmp_tasking_mode != tskm_immediate_exec &&
427  this_thr->th.th_reap_state == KMP_SAFE_TO_REAP) {
428  this_thr->th.th_reap_state = KMP_NOT_SAFE_TO_REAP;
429  }
430  // TODO: If thread is done with work and times out, disband/free
431  }
432 
433 #if OMPT_SUPPORT
434  ompt_state_t ompt_exit_state = this_thr->th.ompt_thread_info.state;
435  if (ompt_enabled.enabled && ompt_exit_state != ompt_state_undefined) {
436 #if OMPT_OPTIONAL
437  if (final_spin) {
438  __ompt_implicit_task_end(this_thr, ompt_exit_state, tId);
439  ompt_exit_state = this_thr->th.ompt_thread_info.state;
440  }
441 #endif
442  if (ompt_exit_state == ompt_state_idle) {
443  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
444  }
445  }
446 #endif
447 #if KMP_STATS_ENABLED
448  // If we were put into idle state, pop that off the state stack
449  if (KMP_GET_THREAD_STATE() == IDLE) {
450  KMP_POP_PARTITIONED_TIMER();
451  KMP_SET_THREAD_STATE(thread_state);
452  this_thr->th.th_stats->resetIdleFlag();
453  }
454 #endif
455 
456 #if KMP_OS_UNIX
457  if (final_spin)
458  KMP_ATOMIC_ST_REL(&this_thr->th.th_blocking, false);
459 #endif
460  KMP_FSYNC_SPIN_ACQUIRED(CCAST(void *, spin));
461  if (cancellable) {
462  kmp_team_t *team = this_thr->th.th_team;
463  if (team && team->t.t_cancel_request == cancel_parallel) {
464  if (tasks_completed) {
465  // undo the previous decrement of unfinished_threads so that the
466  // thread can decrement at the join barrier with no problem
467  kmp_task_team_t *task_team = this_thr->th.th_task_team;
468  std::atomic<kmp_int32> *unfinished_threads =
469  &(task_team->tt.tt_unfinished_threads);
470  KMP_ATOMIC_INC(unfinished_threads);
471  }
472  return true;
473  }
474  }
475  return false;
476 }
477 
478 /* Release any threads specified as waiting on the flag by releasing the flag
479  and resume the waiting thread if indicated by the sleep bit(s). A thread that
480  calls __kmp_wait_template must call this function to wake up the potentially
481  sleeping thread and prevent deadlocks! */
482 template <class C> static inline void __kmp_release_template(C *flag) {
483 #ifdef KMP_DEBUG
484  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
485 #endif
486  KF_TRACE(20, ("__kmp_release: T#%d releasing flag(%x)\n", gtid, flag->get()));
487  KMP_DEBUG_ASSERT(flag->get());
488  KMP_FSYNC_RELEASING(flag->get_void_p());
489 
490  flag->internal_release();
491 
492  KF_TRACE(100, ("__kmp_release: T#%d set new spin=%d\n", gtid, flag->get(),
493  flag->load()));
494 
495  if (__kmp_dflt_blocktime != KMP_MAX_BLOCKTIME) {
496  // Only need to check sleep stuff if infinite block time not set.
497  // Are *any* threads waiting on flag sleeping?
498  if (flag->is_any_sleeping()) {
499  for (unsigned int i = 0; i < flag->get_num_waiters(); ++i) {
500  // if sleeping waiter exists at i, sets current_waiter to i inside flag
501  kmp_info_t *waiter = flag->get_waiter(i);
502  if (waiter) {
503  int wait_gtid = waiter->th.th_info.ds.ds_gtid;
504  // Wake up thread if needed
505  KF_TRACE(50, ("__kmp_release: T#%d waking up thread T#%d since sleep "
506  "flag(%p) set\n",
507  gtid, wait_gtid, flag->get()));
508  flag->resume(wait_gtid); // unsets flag's current_waiter when done
509  }
510  }
511  }
512  }
513 }
514 
515 template <typename FlagType> struct flag_traits {};
516 
517 template <> struct flag_traits<kmp_uint32> {
518  typedef kmp_uint32 flag_t;
519  static const flag_type t = flag32;
520  static inline flag_t tcr(flag_t f) { return TCR_4(f); }
521  static inline flag_t test_then_add4(volatile flag_t *f) {
522  return KMP_TEST_THEN_ADD4_32(RCAST(volatile kmp_int32 *, f));
523  }
524  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
525  return KMP_TEST_THEN_OR32(f, v);
526  }
527  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
528  return KMP_TEST_THEN_AND32(f, v);
529  }
530 };
531 
532 template <> struct flag_traits<kmp_uint64> {
533  typedef kmp_uint64 flag_t;
534  static const flag_type t = flag64;
535  static inline flag_t tcr(flag_t f) { return TCR_8(f); }
536  static inline flag_t test_then_add4(volatile flag_t *f) {
537  return KMP_TEST_THEN_ADD4_64(RCAST(volatile kmp_int64 *, f));
538  }
539  static inline flag_t test_then_or(volatile flag_t *f, flag_t v) {
540  return KMP_TEST_THEN_OR64(f, v);
541  }
542  static inline flag_t test_then_and(volatile flag_t *f, flag_t v) {
543  return KMP_TEST_THEN_AND64(f, v);
544  }
545 };
546 
547 // Basic flag that does not use C11 Atomics
548 template <typename FlagType>
549 class kmp_basic_flag_native : public kmp_flag_native<FlagType> {
550  typedef flag_traits<FlagType> traits_type;
551  FlagType checker;
553  kmp_info_t
554  *waiting_threads[1];
555  kmp_uint32
556  num_waiting_threads;
557 public:
558  kmp_basic_flag_native(volatile FlagType *p)
559  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
560  kmp_basic_flag_native(volatile FlagType *p, kmp_info_t *thr)
561  : kmp_flag_native<FlagType>(p, traits_type::t), num_waiting_threads(1) {
562  waiting_threads[0] = thr;
563  }
564  kmp_basic_flag_native(volatile FlagType *p, FlagType c)
565  : kmp_flag_native<FlagType>(p, traits_type::t), checker(c),
566  num_waiting_threads(0) {}
571  kmp_info_t *get_waiter(kmp_uint32 i) {
572  KMP_DEBUG_ASSERT(i < num_waiting_threads);
573  return waiting_threads[i];
574  }
578  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
584  void set_waiter(kmp_info_t *thr) {
585  waiting_threads[0] = thr;
586  num_waiting_threads = 1;
587  }
591  bool done_check() { return traits_type::tcr(*(this->get())) == checker; }
596  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
604  bool notdone_check() { return traits_type::tcr(*(this->get())) != checker; }
609  void internal_release() {
610  (void)traits_type::test_then_add4((volatile FlagType *)this->get());
611  }
617  FlagType set_sleeping() {
618  return traits_type::test_then_or((volatile FlagType *)this->get(),
619  KMP_BARRIER_SLEEP_STATE);
620  }
626  FlagType unset_sleeping() {
627  return traits_type::test_then_and((volatile FlagType *)this->get(),
628  ~KMP_BARRIER_SLEEP_STATE);
629  }
634  bool is_sleeping_val(FlagType old_loc) {
635  return old_loc & KMP_BARRIER_SLEEP_STATE;
636  }
640  bool is_sleeping() { return is_sleeping_val(*(this->get())); }
641  bool is_any_sleeping() { return is_sleeping_val(*(this->get())); }
642  kmp_uint8 *get_stolen() { return NULL; }
643  enum barrier_type get_bt() { return bs_last_barrier; }
644 };
645 
646 template <typename FlagType> class kmp_basic_flag : public kmp_flag<FlagType> {
647  typedef flag_traits<FlagType> traits_type;
648  FlagType checker;
650  kmp_info_t
651  *waiting_threads[1];
652  kmp_uint32
653  num_waiting_threads;
654 public:
655  kmp_basic_flag(std::atomic<FlagType> *p)
656  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(0) {}
657  kmp_basic_flag(std::atomic<FlagType> *p, kmp_info_t *thr)
658  : kmp_flag<FlagType>(p, traits_type::t), num_waiting_threads(1) {
659  waiting_threads[0] = thr;
660  }
661  kmp_basic_flag(std::atomic<FlagType> *p, FlagType c)
662  : kmp_flag<FlagType>(p, traits_type::t), checker(c),
663  num_waiting_threads(0) {}
668  kmp_info_t *get_waiter(kmp_uint32 i) {
669  KMP_DEBUG_ASSERT(i < num_waiting_threads);
670  return waiting_threads[i];
671  }
675  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
681  void set_waiter(kmp_info_t *thr) {
682  waiting_threads[0] = thr;
683  num_waiting_threads = 1;
684  }
688  bool done_check() { return this->load() == checker; }
693  bool done_check_val(FlagType old_loc) { return old_loc == checker; }
701  bool notdone_check() { return this->load() != checker; }
706  void internal_release() { KMP_ATOMIC_ADD(this->get(), 4); }
712  FlagType set_sleeping() {
713  return KMP_ATOMIC_OR(this->get(), KMP_BARRIER_SLEEP_STATE);
714  }
720  FlagType unset_sleeping() {
721  return KMP_ATOMIC_AND(this->get(), ~KMP_BARRIER_SLEEP_STATE);
722  }
727  bool is_sleeping_val(FlagType old_loc) {
728  return old_loc & KMP_BARRIER_SLEEP_STATE;
729  }
733  bool is_sleeping() { return is_sleeping_val(this->load()); }
734  bool is_any_sleeping() { return is_sleeping_val(this->load()); }
735  kmp_uint8 *get_stolen() { return NULL; }
736  enum barrier_type get_bt() { return bs_last_barrier; }
737 };
738 
739 class kmp_flag_32 : public kmp_basic_flag<kmp_uint32> {
740 public:
741  kmp_flag_32(std::atomic<kmp_uint32> *p) : kmp_basic_flag<kmp_uint32>(p) {}
742  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_info_t *thr)
743  : kmp_basic_flag<kmp_uint32>(p, thr) {}
744  kmp_flag_32(std::atomic<kmp_uint32> *p, kmp_uint32 c)
745  : kmp_basic_flag<kmp_uint32>(p, c) {}
746  void suspend(int th_gtid) { __kmp_suspend_32(th_gtid, this); }
747  void resume(int th_gtid) { __kmp_resume_32(th_gtid, this); }
748  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
749  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
750  kmp_int32 is_constrained) {
751  return __kmp_execute_tasks_32(
752  this_thr, gtid, this, final_spin,
753  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
754  }
755  void wait(kmp_info_t *this_thr,
756  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
757  if (final_spin)
758  __kmp_wait_template<kmp_flag_32, TRUE>(
759  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
760  else
761  __kmp_wait_template<kmp_flag_32, FALSE>(
762  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
763  }
764  void release() { __kmp_release_template(this); }
765  flag_type get_ptr_type() { return flag32; }
766 };
767 
768 class kmp_flag_64 : public kmp_basic_flag_native<kmp_uint64> {
769 public:
770  kmp_flag_64(volatile kmp_uint64 *p) : kmp_basic_flag_native<kmp_uint64>(p) {}
771  kmp_flag_64(volatile kmp_uint64 *p, kmp_info_t *thr)
772  : kmp_basic_flag_native<kmp_uint64>(p, thr) {}
773  kmp_flag_64(volatile kmp_uint64 *p, kmp_uint64 c)
774  : kmp_basic_flag_native<kmp_uint64>(p, c) {}
775  void suspend(int th_gtid) { __kmp_suspend_64(th_gtid, this); }
776  void resume(int th_gtid) { __kmp_resume_64(th_gtid, this); }
777  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
778  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
779  kmp_int32 is_constrained) {
780  return __kmp_execute_tasks_64(
781  this_thr, gtid, this, final_spin,
782  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
783  }
784  void wait(kmp_info_t *this_thr,
785  int final_spin USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
786  if (final_spin)
787  __kmp_wait_template<kmp_flag_64, TRUE>(
788  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
789  else
790  __kmp_wait_template<kmp_flag_64, FALSE>(
791  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
792  }
793  bool wait_cancellable_nosleep(kmp_info_t *this_thr,
794  int final_spin
795  USE_ITT_BUILD_ARG(void *itt_sync_obj)) {
796  bool retval = false;
797  if (final_spin)
798  retval = __kmp_wait_template<kmp_flag_64, TRUE, true, false>(
799  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
800  else
801  retval = __kmp_wait_template<kmp_flag_64, FALSE, true, false>(
802  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
803  return retval;
804  }
805  void release() { __kmp_release_template(this); }
806  flag_type get_ptr_type() { return flag64; }
807 };
808 
809 // Hierarchical 64-bit on-core barrier instantiation
810 class kmp_flag_oncore : public kmp_flag_native<kmp_uint64> {
811  kmp_uint64 checker;
812  kmp_info_t *waiting_threads[1];
813  kmp_uint32 num_waiting_threads;
814  kmp_uint32
815  offset;
816  bool flag_switch;
817  enum barrier_type bt;
818  kmp_info_t *this_thr;
820 #if USE_ITT_BUILD
821  void *
822  itt_sync_obj;
823 #endif
824  unsigned char &byteref(volatile kmp_uint64 *loc, size_t offset) {
825  return (RCAST(unsigned char *, CCAST(kmp_uint64 *, loc)))[offset];
826  }
827 
828 public:
829  kmp_flag_oncore(volatile kmp_uint64 *p)
830  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
831  flag_switch(false) {}
832  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint32 idx)
833  : kmp_flag_native<kmp_uint64>(p, flag_oncore), num_waiting_threads(0),
834  offset(idx), flag_switch(false) {}
835  kmp_flag_oncore(volatile kmp_uint64 *p, kmp_uint64 c, kmp_uint32 idx,
836  enum barrier_type bar_t,
837  kmp_info_t *thr USE_ITT_BUILD_ARG(void *itt))
838  : kmp_flag_native<kmp_uint64>(p, flag_oncore), checker(c),
839  num_waiting_threads(0), offset(idx), flag_switch(false), bt(bar_t),
840  this_thr(thr) USE_ITT_BUILD_ARG(itt_sync_obj(itt)) {}
841  kmp_info_t *get_waiter(kmp_uint32 i) {
842  KMP_DEBUG_ASSERT(i < num_waiting_threads);
843  return waiting_threads[i];
844  }
845  kmp_uint32 get_num_waiters() { return num_waiting_threads; }
846  void set_waiter(kmp_info_t *thr) {
847  waiting_threads[0] = thr;
848  num_waiting_threads = 1;
849  }
850  bool done_check_val(kmp_uint64 old_loc) {
851  return byteref(&old_loc, offset) == checker;
852  }
853  bool done_check() { return done_check_val(*get()); }
854  bool notdone_check() {
855  // Calculate flag_switch
856  if (this_thr->th.th_bar[bt].bb.wait_flag == KMP_BARRIER_SWITCH_TO_OWN_FLAG)
857  flag_switch = true;
858  if (byteref(get(), offset) != 1 && !flag_switch)
859  return true;
860  else if (flag_switch) {
861  this_thr->th.th_bar[bt].bb.wait_flag = KMP_BARRIER_SWITCHING;
862  kmp_flag_64 flag(&this_thr->th.th_bar[bt].bb.b_go,
863  (kmp_uint64)KMP_BARRIER_STATE_BUMP);
864  __kmp_wait_64(this_thr, &flag, TRUE USE_ITT_BUILD_ARG(itt_sync_obj));
865  }
866  return false;
867  }
868  void internal_release() {
869  // Other threads can write their own bytes simultaneously.
870  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
871  byteref(get(), offset) = 1;
872  } else {
873  kmp_uint64 mask = 0;
874  byteref(&mask, offset) = 1;
875  KMP_TEST_THEN_OR64(get(), mask);
876  }
877  }
878  kmp_uint64 set_sleeping() {
879  return KMP_TEST_THEN_OR64(get(), KMP_BARRIER_SLEEP_STATE);
880  }
881  kmp_uint64 unset_sleeping() {
882  return KMP_TEST_THEN_AND64(get(), ~KMP_BARRIER_SLEEP_STATE);
883  }
884  bool is_sleeping_val(kmp_uint64 old_loc) {
885  return old_loc & KMP_BARRIER_SLEEP_STATE;
886  }
887  bool is_sleeping() { return is_sleeping_val(*get()); }
888  bool is_any_sleeping() { return is_sleeping_val(*get()); }
889  void wait(kmp_info_t *this_thr, int final_spin) {
890  if (final_spin)
891  __kmp_wait_template<kmp_flag_oncore, TRUE>(
892  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
893  else
894  __kmp_wait_template<kmp_flag_oncore, FALSE>(
895  this_thr, this USE_ITT_BUILD_ARG(itt_sync_obj));
896  }
897  void release() { __kmp_release_template(this); }
898  void suspend(int th_gtid) { __kmp_suspend_oncore(th_gtid, this); }
899  void resume(int th_gtid) { __kmp_resume_oncore(th_gtid, this); }
900  int execute_tasks(kmp_info_t *this_thr, kmp_int32 gtid, int final_spin,
901  int *thread_finished USE_ITT_BUILD_ARG(void *itt_sync_obj),
902  kmp_int32 is_constrained) {
903  return __kmp_execute_tasks_oncore(
904  this_thr, gtid, this, final_spin,
905  thread_finished USE_ITT_BUILD_ARG(itt_sync_obj), is_constrained);
906  }
907  kmp_uint8 *get_stolen() { return NULL; }
908  enum barrier_type get_bt() { return bt; }
909  flag_type get_ptr_type() { return flag_oncore; }
910 };
911 
912 // Used to wake up threads, volatile void* flag is usually the th_sleep_loc
913 // associated with int gtid.
914 static inline void __kmp_null_resume_wrapper(int gtid, volatile void *flag) {
915  if (!flag)
916  return;
917 
918  switch (RCAST(kmp_flag_64 *, CCAST(void *, flag))->get_type()) {
919  case flag32:
920  __kmp_resume_32(gtid, NULL);
921  break;
922  case flag64:
923  __kmp_resume_64(gtid, NULL);
924  break;
925  case flag_oncore:
926  __kmp_resume_oncore(gtid, NULL);
927  break;
928  }
929 }
930 
935 #endif // KMP_WAIT_RELEASE_H
kmp_flag::set
void set(std::atomic< P > *new_loc)
Definition: kmp_wait_release.h:85
kmp_flag::get_void_p
void * get_void_p()
Definition: kmp_wait_release.h:81
kmp_flag::t
flag_type t
Definition: kmp_wait_release.h:70
stats_state_e
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
flag64
@ flag64
Definition: kmp_wait_release.h:41
flag_type
flag_type
Definition: kmp_wait_release.h:39
kmp_flag::get_type
flag_type get_type()
Definition: kmp_wait_release.h:89
kmp_flag::load
P load()
Definition: kmp_wait_release.h:93
kmp_flag_native
Definition: kmp_wait_release.h:48
kmp_flag::get
std::atomic< P > * get()
Definition: kmp_wait_release.h:77
kmp_flag::store
void store(P val)
Definition: kmp_wait_release.h:97
flag32
@ flag32
Definition: kmp_wait_release.h:40
kmp_flag
Definition: kmp_wait_release.h:66
kmp_flag::loc
std::atomic< P > * loc
Definition: kmp_wait_release.h:68
flag_oncore
@ flag_oncore
Definition: kmp_wait_release.h:42