LLVM OpenMP* Runtime Library
kmp_csupport.cpp
1 /*
2  * kmp_csupport.cpp -- kfront linkage support for OpenMP.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #define __KMP_IMP
14 #include "omp.h" /* extern "C" declarations of user-visible routines */
15 #include "kmp.h"
16 #include "kmp_error.h"
17 #include "kmp_i18n.h"
18 #include "kmp_itt.h"
19 #include "kmp_lock.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
22 
23 #define MAX_MESSAGE 512
24 
25 // flags will be used in future, e.g. to implement openmp_strict library
26 // restrictions
27 
36 void __kmpc_begin(ident_t *loc, kmp_int32 flags) {
37  // By default __kmpc_begin() is no-op.
38  char *env;
39  if ((env = getenv("KMP_INITIAL_THREAD_BIND")) != NULL &&
40  __kmp_str_match_true(env)) {
41  __kmp_middle_initialize();
42  KC_TRACE(10, ("__kmpc_begin: middle initialization called\n"));
43  } else if (__kmp_ignore_mppbeg() == FALSE) {
44  // By default __kmp_ignore_mppbeg() returns TRUE.
45  __kmp_internal_begin();
46  KC_TRACE(10, ("__kmpc_begin: called\n"));
47  }
48 }
49 
58 void __kmpc_end(ident_t *loc) {
59  // By default, __kmp_ignore_mppend() returns TRUE which makes __kmpc_end()
60  // call no-op. However, this can be overridden with KMP_IGNORE_MPPEND
61  // environment variable. If KMP_IGNORE_MPPEND is 0, __kmp_ignore_mppend()
62  // returns FALSE and __kmpc_end() will unregister this root (it can cause
63  // library shut down).
64  if (__kmp_ignore_mppend() == FALSE) {
65  KC_TRACE(10, ("__kmpc_end: called\n"));
66  KA_TRACE(30, ("__kmpc_end\n"));
67 
68  __kmp_internal_end_thread(-1);
69  }
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT
71  // Normal exit process on Windows does not allow worker threads of the final
72  // parallel region to finish reporting their events, so shutting down the
73  // library here fixes the issue at least for the cases where __kmpc_end() is
74  // placed properly.
75  if (ompt_enabled.enabled)
76  __kmp_internal_end_library(__kmp_gtid_get_specific());
77 #endif
78 }
79 
99  kmp_int32 gtid = __kmp_entry_gtid();
100 
101  KC_TRACE(10, ("__kmpc_global_thread_num: T#%d\n", gtid));
102 
103  return gtid;
104 }
105 
121  KC_TRACE(10,
122  ("__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
123 
124  return TCR_4(__kmp_all_nth);
125 }
126 
134  KC_TRACE(10, ("__kmpc_bound_thread_num: called\n"));
135  return __kmp_tid_from_gtid(__kmp_entry_gtid());
136 }
137 
144  KC_TRACE(10, ("__kmpc_bound_num_threads: called\n"));
145 
146  return __kmp_entry_thread()->th.th_team->t.t_nproc;
147 }
148 
155 kmp_int32 __kmpc_ok_to_fork(ident_t *loc) {
156 #ifndef KMP_DEBUG
157 
158  return TRUE;
159 
160 #else
161 
162  const char *semi2;
163  const char *semi3;
164  int line_no;
165 
166  if (__kmp_par_range == 0) {
167  return TRUE;
168  }
169  semi2 = loc->psource;
170  if (semi2 == NULL) {
171  return TRUE;
172  }
173  semi2 = strchr(semi2, ';');
174  if (semi2 == NULL) {
175  return TRUE;
176  }
177  semi2 = strchr(semi2 + 1, ';');
178  if (semi2 == NULL) {
179  return TRUE;
180  }
181  if (__kmp_par_range_filename[0]) {
182  const char *name = semi2 - 1;
183  while ((name > loc->psource) && (*name != '/') && (*name != ';')) {
184  name--;
185  }
186  if ((*name == '/') || (*name == ';')) {
187  name++;
188  }
189  if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190  return __kmp_par_range < 0;
191  }
192  }
193  semi3 = strchr(semi2 + 1, ';');
194  if (__kmp_par_range_routine[0]) {
195  if ((semi3 != NULL) && (semi3 > semi2) &&
196  (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197  return __kmp_par_range < 0;
198  }
199  }
200  if (KMP_SSCANF(semi3 + 1, "%d", &line_no) == 1) {
201  if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202  return __kmp_par_range > 0;
203  }
204  return __kmp_par_range < 0;
205  }
206  return TRUE;
207 
208 #endif /* KMP_DEBUG */
209 }
210 
217 kmp_int32 __kmpc_in_parallel(ident_t *loc) {
218  return __kmp_entry_thread()->th.th_root->r.r_active;
219 }
220 
230 void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid,
231  kmp_int32 num_threads) {
232  KA_TRACE(20, ("__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233  global_tid, num_threads));
234 
235  __kmp_push_num_threads(loc, global_tid, num_threads);
236 }
237 
238 void __kmpc_pop_num_threads(ident_t *loc, kmp_int32 global_tid) {
239  KA_TRACE(20, ("__kmpc_pop_num_threads: enter\n"));
240 
241  /* the num_threads are automatically popped */
242 }
243 
244 void __kmpc_push_proc_bind(ident_t *loc, kmp_int32 global_tid,
245  kmp_int32 proc_bind) {
246  KA_TRACE(20, ("__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
247  proc_bind));
248 
249  __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
250 }
251 
262 void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask, ...) {
263  int gtid = __kmp_entry_gtid();
264 
265 #if (KMP_STATS_ENABLED)
266  // If we were in a serial region, then stop the serial timer, record
267  // the event, and start parallel region timer
268  stats_state_e previous_state = KMP_GET_THREAD_STATE();
269  if (previous_state == stats_state_e::SERIAL_REGION) {
270  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
271  } else {
272  KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
273  }
274  int inParallel = __kmpc_in_parallel(loc);
275  if (inParallel) {
276  KMP_COUNT_BLOCK(OMP_NESTED_PARALLEL);
277  } else {
278  KMP_COUNT_BLOCK(OMP_PARALLEL);
279  }
280 #endif
281 
282  // maybe to save thr_state is enough here
283  {
284  va_list ap;
285  va_start(ap, microtask);
286 
287 #if OMPT_SUPPORT
288  ompt_frame_t *ompt_frame;
289  if (ompt_enabled.enabled) {
290  kmp_info_t *master_th = __kmp_threads[gtid];
291  kmp_team_t *parent_team = master_th->th.th_team;
292  ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
293  if (lwt)
294  ompt_frame = &(lwt->ompt_task_info.frame);
295  else {
296  int tid = __kmp_tid_from_gtid(gtid);
297  ompt_frame = &(
298  parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
299  }
300  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301  OMPT_STORE_RETURN_ADDRESS(gtid);
302  }
303 #endif
304 
305 #if INCLUDE_SSC_MARKS
306  SSC_MARK_FORKING();
307 #endif
308  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309  VOLATILE_CAST(microtask_t) microtask, // "wrapped" task
310  VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
311 /* TODO: revert workaround for Intel(R) 64 tracker #96 */
312 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
313  &ap
314 #else
315  ap
316 #endif
317  );
318 #if INCLUDE_SSC_MARKS
319  SSC_MARK_JOINING();
320 #endif
321  __kmp_join_call(loc, gtid
322 #if OMPT_SUPPORT
323  ,
324  fork_context_intel
325 #endif
326  );
327 
328  va_end(ap);
329  }
330 
331 #if KMP_STATS_ENABLED
332  if (previous_state == stats_state_e::SERIAL_REGION) {
333  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
334  } else {
335  KMP_POP_PARTITIONED_TIMER();
336  }
337 #endif // KMP_STATS_ENABLED
338 }
339 
351 void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid,
352  kmp_int32 num_teams, kmp_int32 num_threads) {
353  KA_TRACE(20,
354  ("__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
355  global_tid, num_teams, num_threads));
356 
357  __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
358 }
359 
370 void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,
371  ...) {
372  int gtid = __kmp_entry_gtid();
373  kmp_info_t *this_thr = __kmp_threads[gtid];
374  va_list ap;
375  va_start(ap, microtask);
376 
377 #if KMP_STATS_ENABLED
378  KMP_COUNT_BLOCK(OMP_TEAMS);
379  stats_state_e previous_state = KMP_GET_THREAD_STATE();
380  if (previous_state == stats_state_e::SERIAL_REGION) {
381  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
382  } else {
383  KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
384  }
385 #endif
386 
387  // remember teams entry point and nesting level
388  this_thr->th.th_teams_microtask = microtask;
389  this_thr->th.th_teams_level =
390  this_thr->th.th_team->t.t_level; // AC: can be >0 on host
391 
392 #if OMPT_SUPPORT
393  kmp_team_t *parent_team = this_thr->th.th_team;
394  int tid = __kmp_tid_from_gtid(gtid);
395  if (ompt_enabled.enabled) {
396  parent_team->t.t_implicit_task_taskdata[tid]
397  .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
398  }
399  OMPT_STORE_RETURN_ADDRESS(gtid);
400 #endif
401 
402  // check if __kmpc_push_num_teams called, set default number of teams
403  // otherwise
404  if (this_thr->th.th_teams_size.nteams == 0) {
405  __kmp_push_num_teams(loc, gtid, 0, 0);
406  }
407  KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409  KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
410 
411  __kmp_fork_call(loc, gtid, fork_context_intel, argc,
412  VOLATILE_CAST(microtask_t)
413  __kmp_teams_master, // "wrapped" task
414  VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
415 #if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
416  &ap
417 #else
418  ap
419 #endif
420  );
421  __kmp_join_call(loc, gtid
422 #if OMPT_SUPPORT
423  ,
424  fork_context_intel
425 #endif
426  );
427 
428  // Pop current CG root off list
429  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430  kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431  this_thr->th.th_cg_roots = tmp->up;
432  KA_TRACE(100, ("__kmpc_fork_teams: Thread %p popping node %p and moving up"
433  " to node %p. cg_nthreads was %d\n",
434  this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
435  KMP_DEBUG_ASSERT(tmp->cg_nthreads);
436  int i = tmp->cg_nthreads--;
437  if (i == 1) { // check is we are the last thread in CG (not always the case)
438  __kmp_free(tmp);
439  }
440  // Restore current task's thread_limit from CG root
441  KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
442  this_thr->th.th_current_task->td_icvs.thread_limit =
443  this_thr->th.th_cg_roots->cg_thread_limit;
444 
445  this_thr->th.th_teams_microtask = NULL;
446  this_thr->th.th_teams_level = 0;
447  *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
448  va_end(ap);
449 #if KMP_STATS_ENABLED
450  if (previous_state == stats_state_e::SERIAL_REGION) {
451  KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
452  } else {
453  KMP_POP_PARTITIONED_TIMER();
454  }
455 #endif // KMP_STATS_ENABLED
456 }
457 
458 // I don't think this function should ever have been exported.
459 // The __kmpc_ prefix was misapplied. I'm fairly certain that no generated
460 // openmp code ever called it, but it's been exported from the RTL for so
461 // long that I'm afraid to remove the definition.
462 int __kmpc_invoke_task_func(int gtid) { return __kmp_invoke_task_func(gtid); }
463 
476 void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
477 // The implementation is now in kmp_runtime.cpp so that it can share static
478 // functions with kmp_fork_call since the tasks to be done are similar in
479 // each case.
480 #if OMPT_SUPPORT
481  OMPT_STORE_RETURN_ADDRESS(global_tid);
482 #endif
483  __kmp_serialized_parallel(loc, global_tid);
484 }
485 
493 void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid) {
494  kmp_internal_control_t *top;
495  kmp_info_t *this_thr;
496  kmp_team_t *serial_team;
497 
498  KC_TRACE(10,
499  ("__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
500 
501  /* skip all this code for autopar serialized loops since it results in
502  unacceptable overhead */
503  if (loc != NULL && (loc->flags & KMP_IDENT_AUTOPAR))
504  return;
505 
506  // Not autopar code
507  if (!TCR_4(__kmp_init_parallel))
508  __kmp_parallel_initialize();
509 
510  __kmp_resume_if_soft_paused();
511 
512  this_thr = __kmp_threads[global_tid];
513  serial_team = this_thr->th.th_serial_team;
514 
515  kmp_task_team_t *task_team = this_thr->th.th_task_team;
516  // we need to wait for the proxy tasks before finishing the thread
517  if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
518  __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
519 
520  KMP_MB();
521  KMP_DEBUG_ASSERT(serial_team);
522  KMP_ASSERT(serial_team->t.t_serialized);
523  KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
524  KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
525  KMP_DEBUG_ASSERT(serial_team->t.t_threads);
526  KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
527 
528 #if OMPT_SUPPORT
529  if (ompt_enabled.enabled &&
530  this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
531  OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
532  if (ompt_enabled.ompt_callback_implicit_task) {
533  ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
534  ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
535  OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
536  }
537 
538  // reset clear the task id only after unlinking the task
539  ompt_data_t *parent_task_data;
540  __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
541 
542  if (ompt_enabled.ompt_callback_parallel_end) {
543  ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
544  &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
545  ompt_parallel_invoker_program | ompt_parallel_team,
546  OMPT_LOAD_RETURN_ADDRESS(global_tid));
547  }
548  __ompt_lw_taskteam_unlink(this_thr);
549  this_thr->th.ompt_thread_info.state = ompt_state_overhead;
550  }
551 #endif
552 
553  /* If necessary, pop the internal control stack values and replace the team
554  * values */
555  top = serial_team->t.t_control_stack_top;
556  if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
557  copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
558  serial_team->t.t_control_stack_top = top->next;
559  __kmp_free(top);
560  }
561 
562  // if( serial_team -> t.t_serialized > 1 )
563  serial_team->t.t_level--;
564 
565  /* pop dispatch buffers stack */
566  KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
567  {
568  dispatch_private_info_t *disp_buffer =
569  serial_team->t.t_dispatch->th_disp_buffer;
570  serial_team->t.t_dispatch->th_disp_buffer =
571  serial_team->t.t_dispatch->th_disp_buffer->next;
572  __kmp_free(disp_buffer);
573  }
574  this_thr->th.th_def_allocator = serial_team->t.t_def_allocator; // restore
575 
576  --serial_team->t.t_serialized;
577  if (serial_team->t.t_serialized == 0) {
578 
579 /* return to the parallel section */
580 
581 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
582  if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
583  __kmp_clear_x87_fpu_status_word();
584  __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
585  __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
586  }
587 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
588 
589  this_thr->th.th_team = serial_team->t.t_parent;
590  this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
591 
592  /* restore values cached in the thread */
593  this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc; /* JPH */
594  this_thr->th.th_team_master =
595  serial_team->t.t_parent->t.t_threads[0]; /* JPH */
596  this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
597 
598  /* TODO the below shouldn't need to be adjusted for serialized teams */
599  this_thr->th.th_dispatch =
600  &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
601 
602  __kmp_pop_current_task_from_thread(this_thr);
603 
604  KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
605  this_thr->th.th_current_task->td_flags.executing = 1;
606 
607  if (__kmp_tasking_mode != tskm_immediate_exec) {
608  // Copy the task team from the new child / old parent team to the thread.
609  this_thr->th.th_task_team =
610  this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
611  KA_TRACE(20,
612  ("__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
613  "team %p\n",
614  global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
615  }
616  } else {
617  if (__kmp_tasking_mode != tskm_immediate_exec) {
618  KA_TRACE(20, ("__kmpc_end_serialized_parallel: T#%d decreasing nesting "
619  "depth of serial team %p to %d\n",
620  global_tid, serial_team, serial_team->t.t_serialized));
621  }
622  }
623 
624  if (__kmp_env_consistency_check)
625  __kmp_pop_parallel(global_tid, NULL);
626 #if OMPT_SUPPORT
627  if (ompt_enabled.enabled)
628  this_thr->th.ompt_thread_info.state =
629  ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
630  : ompt_state_work_parallel);
631 #endif
632 }
633 
642 void __kmpc_flush(ident_t *loc) {
643  KC_TRACE(10, ("__kmpc_flush: called\n"));
644 
645  /* need explicit __mf() here since use volatile instead in library */
646  KMP_MB(); /* Flush all pending memory write invalidates. */
647 
648 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
649 #if KMP_MIC
650 // fence-style instructions do not exist, but lock; xaddl $0,(%rsp) can be used.
651 // We shouldn't need it, though, since the ABI rules require that
652 // * If the compiler generates NGO stores it also generates the fence
653 // * If users hand-code NGO stores they should insert the fence
654 // therefore no incomplete unordered stores should be visible.
655 #else
656  // C74404
657  // This is to address non-temporal store instructions (sfence needed).
658  // The clflush instruction is addressed either (mfence needed).
659  // Probably the non-temporal load monvtdqa instruction should also be
660  // addressed.
661  // mfence is a SSE2 instruction. Do not execute it if CPU is not SSE2.
662  if (!__kmp_cpuinfo.initialized) {
663  __kmp_query_cpuid(&__kmp_cpuinfo);
664  }
665  if (!__kmp_cpuinfo.sse2) {
666  // CPU cannot execute SSE2 instructions.
667  } else {
668 #if KMP_COMPILER_ICC
669  _mm_mfence();
670 #elif KMP_COMPILER_MSVC
671  MemoryBarrier();
672 #else
673  __sync_synchronize();
674 #endif // KMP_COMPILER_ICC
675  }
676 #endif // KMP_MIC
677 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
678  KMP_ARCH_RISCV64)
679 // Nothing to see here move along
680 #elif KMP_ARCH_PPC64
681 // Nothing needed here (we have a real MB above).
682 #if KMP_OS_CNK
683  // The flushing thread needs to yield here; this prevents a
684  // busy-waiting thread from saturating the pipeline. flush is
685  // often used in loops like this:
686  // while (!flag) {
687  // #pragma omp flush(flag)
688  // }
689  // and adding the yield here is good for at least a 10x speedup
690  // when running >2 threads per core (on the NAS LU benchmark).
691  __kmp_yield();
692 #endif
693 #else
694 #error Unknown or unsupported architecture
695 #endif
696 
697 #if OMPT_SUPPORT && OMPT_OPTIONAL
698  if (ompt_enabled.ompt_callback_flush) {
699  ompt_callbacks.ompt_callback(ompt_callback_flush)(
700  __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
701  }
702 #endif
703 }
704 
705 /* -------------------------------------------------------------------------- */
713 void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid) {
714  KMP_COUNT_BLOCK(OMP_BARRIER);
715  KC_TRACE(10, ("__kmpc_barrier: called T#%d\n", global_tid));
716 
717  if (!TCR_4(__kmp_init_parallel))
718  __kmp_parallel_initialize();
719 
720  __kmp_resume_if_soft_paused();
721 
722  if (__kmp_env_consistency_check) {
723  if (loc == 0) {
724  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
725  }
726  __kmp_check_barrier(global_tid, ct_barrier, loc);
727  }
728 
729 #if OMPT_SUPPORT
730  ompt_frame_t *ompt_frame;
731  if (ompt_enabled.enabled) {
732  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
733  if (ompt_frame->enter_frame.ptr == NULL)
734  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
735  OMPT_STORE_RETURN_ADDRESS(global_tid);
736  }
737 #endif
738  __kmp_threads[global_tid]->th.th_ident = loc;
739  // TODO: explicit barrier_wait_id:
740  // this function is called when 'barrier' directive is present or
741  // implicit barrier at the end of a worksharing construct.
742  // 1) better to add a per-thread barrier counter to a thread data structure
743  // 2) set to 0 when a new team is created
744  // 4) no sync is required
745 
746  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
747 #if OMPT_SUPPORT && OMPT_OPTIONAL
748  if (ompt_enabled.enabled) {
749  ompt_frame->enter_frame = ompt_data_none;
750  }
751 #endif
752 }
753 
754 /* The BARRIER for a MASTER section is always explicit */
761 kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid) {
762  int status = 0;
763 
764  KC_TRACE(10, ("__kmpc_master: called T#%d\n", global_tid));
765 
766  if (!TCR_4(__kmp_init_parallel))
767  __kmp_parallel_initialize();
768 
769  __kmp_resume_if_soft_paused();
770 
771  if (KMP_MASTER_GTID(global_tid)) {
772  KMP_COUNT_BLOCK(OMP_MASTER);
773  KMP_PUSH_PARTITIONED_TIMER(OMP_master);
774  status = 1;
775  }
776 
777 #if OMPT_SUPPORT && OMPT_OPTIONAL
778  if (status) {
779  if (ompt_enabled.ompt_callback_master) {
780  kmp_info_t *this_thr = __kmp_threads[global_tid];
781  kmp_team_t *team = this_thr->th.th_team;
782 
783  int tid = __kmp_tid_from_gtid(global_tid);
784  ompt_callbacks.ompt_callback(ompt_callback_master)(
785  ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787  OMPT_GET_RETURN_ADDRESS(0));
788  }
789  }
790 #endif
791 
792  if (__kmp_env_consistency_check) {
793 #if KMP_USE_DYNAMIC_LOCK
794  if (status)
795  __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
796  else
797  __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
798 #else
799  if (status)
800  __kmp_push_sync(global_tid, ct_master, loc, NULL);
801  else
802  __kmp_check_sync(global_tid, ct_master, loc, NULL);
803 #endif
804  }
805 
806  return status;
807 }
808 
817 void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid) {
818  KC_TRACE(10, ("__kmpc_end_master: called T#%d\n", global_tid));
819 
820  KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821  KMP_POP_PARTITIONED_TIMER();
822 
823 #if OMPT_SUPPORT && OMPT_OPTIONAL
824  kmp_info_t *this_thr = __kmp_threads[global_tid];
825  kmp_team_t *team = this_thr->th.th_team;
826  if (ompt_enabled.ompt_callback_master) {
827  int tid = __kmp_tid_from_gtid(global_tid);
828  ompt_callbacks.ompt_callback(ompt_callback_master)(
829  ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831  OMPT_GET_RETURN_ADDRESS(0));
832  }
833 #endif
834 
835  if (__kmp_env_consistency_check) {
836  if (global_tid < 0)
837  KMP_WARNING(ThreadIdentInvalid);
838 
839  if (KMP_MASTER_GTID(global_tid))
840  __kmp_pop_sync(global_tid, ct_master, loc);
841  }
842 }
843 
851 void __kmpc_ordered(ident_t *loc, kmp_int32 gtid) {
852  int cid = 0;
853  kmp_info_t *th;
854  KMP_DEBUG_ASSERT(__kmp_init_serial);
855 
856  KC_TRACE(10, ("__kmpc_ordered: called T#%d\n", gtid));
857 
858  if (!TCR_4(__kmp_init_parallel))
859  __kmp_parallel_initialize();
860 
861  __kmp_resume_if_soft_paused();
862 
863 #if USE_ITT_BUILD
864  __kmp_itt_ordered_prep(gtid);
865 // TODO: ordered_wait_id
866 #endif /* USE_ITT_BUILD */
867 
868  th = __kmp_threads[gtid];
869 
870 #if OMPT_SUPPORT && OMPT_OPTIONAL
871  kmp_team_t *team;
872  ompt_wait_id_t lck;
873  void *codeptr_ra;
874  if (ompt_enabled.enabled) {
875  OMPT_STORE_RETURN_ADDRESS(gtid);
876  team = __kmp_team_from_gtid(gtid);
877  lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
878  /* OMPT state update */
879  th->th.ompt_thread_info.wait_id = lck;
880  th->th.ompt_thread_info.state = ompt_state_wait_ordered;
881 
882  /* OMPT event callback */
883  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
884  if (ompt_enabled.ompt_callback_mutex_acquire) {
885  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
886  ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
887  codeptr_ra);
888  }
889  }
890 #endif
891 
892  if (th->th.th_dispatch->th_deo_fcn != 0)
893  (*th->th.th_dispatch->th_deo_fcn)(&gtid, &cid, loc);
894  else
895  __kmp_parallel_deo(&gtid, &cid, loc);
896 
897 #if OMPT_SUPPORT && OMPT_OPTIONAL
898  if (ompt_enabled.enabled) {
899  /* OMPT state update */
900  th->th.ompt_thread_info.state = ompt_state_work_parallel;
901  th->th.ompt_thread_info.wait_id = 0;
902 
903  /* OMPT event callback */
904  if (ompt_enabled.ompt_callback_mutex_acquired) {
905  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
906  ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
907  }
908  }
909 #endif
910 
911 #if USE_ITT_BUILD
912  __kmp_itt_ordered_start(gtid);
913 #endif /* USE_ITT_BUILD */
914 }
915 
923 void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid) {
924  int cid = 0;
925  kmp_info_t *th;
926 
927  KC_TRACE(10, ("__kmpc_end_ordered: called T#%d\n", gtid));
928 
929 #if USE_ITT_BUILD
930  __kmp_itt_ordered_end(gtid);
931 // TODO: ordered_wait_id
932 #endif /* USE_ITT_BUILD */
933 
934  th = __kmp_threads[gtid];
935 
936  if (th->th.th_dispatch->th_dxo_fcn != 0)
937  (*th->th.th_dispatch->th_dxo_fcn)(&gtid, &cid, loc);
938  else
939  __kmp_parallel_dxo(&gtid, &cid, loc);
940 
941 #if OMPT_SUPPORT && OMPT_OPTIONAL
942  OMPT_STORE_RETURN_ADDRESS(gtid);
943  if (ompt_enabled.ompt_callback_mutex_released) {
944  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
945  ompt_mutex_ordered,
946  (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
947  ->t.t_ordered.dt.t_value,
948  OMPT_LOAD_RETURN_ADDRESS(gtid));
949  }
950 #endif
951 }
952 
953 #if KMP_USE_DYNAMIC_LOCK
954 
955 static __forceinline void
956 __kmp_init_indirect_csptr(kmp_critical_name *crit, ident_t const *loc,
957  kmp_int32 gtid, kmp_indirect_locktag_t tag) {
958  // Pointer to the allocated indirect lock is written to crit, while indexing
959  // is ignored.
960  void *idx;
961  kmp_indirect_lock_t **lck;
962  lck = (kmp_indirect_lock_t **)crit;
963  kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
964  KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
965  KMP_SET_I_LOCK_LOCATION(ilk, loc);
966  KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
967  KA_TRACE(20,
968  ("__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
969 #if USE_ITT_BUILD
970  __kmp_itt_critical_creating(ilk->lock, loc);
971 #endif
972  int status = KMP_COMPARE_AND_STORE_PTR(lck, nullptr, ilk);
973  if (status == 0) {
974 #if USE_ITT_BUILD
975  __kmp_itt_critical_destroyed(ilk->lock);
976 #endif
977  // We don't really need to destroy the unclaimed lock here since it will be
978  // cleaned up at program exit.
979  // KMP_D_LOCK_FUNC(&idx, destroy)((kmp_dyna_lock_t *)&idx);
980  }
981  KMP_DEBUG_ASSERT(*lck != NULL);
982 }
983 
984 // Fast-path acquire tas lock
985 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
986  { \
987  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
988  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
989  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
990  if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
991  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
992  kmp_uint32 spins; \
993  KMP_FSYNC_PREPARE(l); \
994  KMP_INIT_YIELD(spins); \
995  kmp_backoff_t backoff = __kmp_spin_backoff_params; \
996  do { \
997  if (TCR_4(__kmp_nth) > \
998  (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
999  KMP_YIELD(TRUE); \
1000  } else { \
1001  KMP_YIELD_SPIN(spins); \
1002  } \
1003  __kmp_spin_backoff(&backoff); \
1004  } while ( \
1005  KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1006  !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1007  } \
1008  KMP_FSYNC_ACQUIRED(l); \
1009  }
1010 
1011 // Fast-path test tas lock
1012 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1013  { \
1014  kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1015  kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1016  kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1017  rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1018  __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1019  }
1020 
1021 // Fast-path release tas lock
1022 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1023  { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1024 
1025 #if KMP_USE_FUTEX
1026 
1027 #include <sys/syscall.h>
1028 #include <unistd.h>
1029 #ifndef FUTEX_WAIT
1030 #define FUTEX_WAIT 0
1031 #endif
1032 #ifndef FUTEX_WAKE
1033 #define FUTEX_WAKE 1
1034 #endif
1035 
1036 // Fast-path acquire futex lock
1037 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1038  { \
1039  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1040  kmp_int32 gtid_code = (gtid + 1) << 1; \
1041  KMP_MB(); \
1042  KMP_FSYNC_PREPARE(ftx); \
1043  kmp_int32 poll_val; \
1044  while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1045  &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1046  KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1047  kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1048  if (!cond) { \
1049  if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1050  poll_val | \
1051  KMP_LOCK_BUSY(1, futex))) { \
1052  continue; \
1053  } \
1054  poll_val |= KMP_LOCK_BUSY(1, futex); \
1055  } \
1056  kmp_int32 rc; \
1057  if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1058  NULL, NULL, 0)) != 0) { \
1059  continue; \
1060  } \
1061  gtid_code |= 1; \
1062  } \
1063  KMP_FSYNC_ACQUIRED(ftx); \
1064  }
1065 
1066 // Fast-path test futex lock
1067 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1068  { \
1069  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1070  if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1071  KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1072  KMP_FSYNC_ACQUIRED(ftx); \
1073  rc = TRUE; \
1074  } else { \
1075  rc = FALSE; \
1076  } \
1077  }
1078 
1079 // Fast-path release futex lock
1080 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1081  { \
1082  kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1083  KMP_MB(); \
1084  KMP_FSYNC_RELEASING(ftx); \
1085  kmp_int32 poll_val = \
1086  KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1087  if (KMP_LOCK_STRIP(poll_val) & 1) { \
1088  syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1089  KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1090  } \
1091  KMP_MB(); \
1092  KMP_YIELD_OVERSUB(); \
1093  }
1094 
1095 #endif // KMP_USE_FUTEX
1096 
1097 #else // KMP_USE_DYNAMIC_LOCK
1098 
1099 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1100  ident_t const *loc,
1101  kmp_int32 gtid) {
1102  kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1103 
1104  // Because of the double-check, the following load doesn't need to be volatile
1105  kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1106 
1107  if (lck == NULL) {
1108  void *idx;
1109 
1110  // Allocate & initialize the lock.
1111  // Remember alloc'ed locks in table in order to free them in __kmp_cleanup()
1112  lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1113  __kmp_init_user_lock_with_checks(lck);
1114  __kmp_set_user_lock_location(lck, loc);
1115 #if USE_ITT_BUILD
1116  __kmp_itt_critical_creating(lck);
1117 // __kmp_itt_critical_creating() should be called *before* the first usage
1118 // of underlying lock. It is the only place where we can guarantee it. There
1119 // are chances the lock will destroyed with no usage, but it is not a
1120 // problem, because this is not real event seen by user but rather setting
1121 // name for object (lock). See more details in kmp_itt.h.
1122 #endif /* USE_ITT_BUILD */
1123 
1124  // Use a cmpxchg instruction to slam the start of the critical section with
1125  // the lock pointer. If another thread beat us to it, deallocate the lock,
1126  // and use the lock that the other thread allocated.
1127  int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1128 
1129  if (status == 0) {
1130 // Deallocate the lock and reload the value.
1131 #if USE_ITT_BUILD
1132  __kmp_itt_critical_destroyed(lck);
1133 // Let ITT know the lock is destroyed and the same memory location may be reused
1134 // for another purpose.
1135 #endif /* USE_ITT_BUILD */
1136  __kmp_destroy_user_lock_with_checks(lck);
1137  __kmp_user_lock_free(&idx, gtid, lck);
1138  lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1139  KMP_DEBUG_ASSERT(lck != NULL);
1140  }
1141  }
1142  return lck;
1143 }
1144 
1145 #endif // KMP_USE_DYNAMIC_LOCK
1146 
1157 void __kmpc_critical(ident_t *loc, kmp_int32 global_tid,
1158  kmp_critical_name *crit) {
1159 #if KMP_USE_DYNAMIC_LOCK
1160 #if OMPT_SUPPORT && OMPT_OPTIONAL
1161  OMPT_STORE_RETURN_ADDRESS(global_tid);
1162 #endif // OMPT_SUPPORT
1163  __kmpc_critical_with_hint(loc, global_tid, crit, omp_lock_hint_none);
1164 #else
1165  KMP_COUNT_BLOCK(OMP_CRITICAL);
1166 #if OMPT_SUPPORT && OMPT_OPTIONAL
1167  ompt_state_t prev_state = ompt_state_undefined;
1168  ompt_thread_info_t ti;
1169 #endif
1170  kmp_user_lock_p lck;
1171 
1172  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1173 
1174  // TODO: add THR_OVHD_STATE
1175 
1176  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1177  KMP_CHECK_USER_LOCK_INIT();
1178 
1179  if ((__kmp_user_lock_kind == lk_tas) &&
1180  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1181  lck = (kmp_user_lock_p)crit;
1182  }
1183 #if KMP_USE_FUTEX
1184  else if ((__kmp_user_lock_kind == lk_futex) &&
1185  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1186  lck = (kmp_user_lock_p)crit;
1187  }
1188 #endif
1189  else { // ticket, queuing or drdpa
1190  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1191  }
1192 
1193  if (__kmp_env_consistency_check)
1194  __kmp_push_sync(global_tid, ct_critical, loc, lck);
1195 
1196 // since the critical directive binds to all threads, not just the current
1197 // team we have to check this even if we are in a serialized team.
1198 // also, even if we are the uber thread, we still have to conduct the lock,
1199 // as we have to contend with sibling threads.
1200 
1201 #if USE_ITT_BUILD
1202  __kmp_itt_critical_acquiring(lck);
1203 #endif /* USE_ITT_BUILD */
1204 #if OMPT_SUPPORT && OMPT_OPTIONAL
1205  OMPT_STORE_RETURN_ADDRESS(gtid);
1206  void *codeptr_ra = NULL;
1207  if (ompt_enabled.enabled) {
1208  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1209  /* OMPT state update */
1210  prev_state = ti.state;
1211  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1212  ti.state = ompt_state_wait_critical;
1213 
1214  /* OMPT event callback */
1215  codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1216  if (ompt_enabled.ompt_callback_mutex_acquire) {
1217  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1218  ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1219  (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1220  }
1221  }
1222 #endif
1223  // Value of 'crit' should be good for using as a critical_id of the critical
1224  // section directive.
1225  __kmp_acquire_user_lock_with_checks(lck, global_tid);
1226 
1227 #if USE_ITT_BUILD
1228  __kmp_itt_critical_acquired(lck);
1229 #endif /* USE_ITT_BUILD */
1230 #if OMPT_SUPPORT && OMPT_OPTIONAL
1231  if (ompt_enabled.enabled) {
1232  /* OMPT state update */
1233  ti.state = prev_state;
1234  ti.wait_id = 0;
1235 
1236  /* OMPT event callback */
1237  if (ompt_enabled.ompt_callback_mutex_acquired) {
1238  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1239  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1240  }
1241  }
1242 #endif
1243  KMP_POP_PARTITIONED_TIMER();
1244 
1245  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1246  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1247 #endif // KMP_USE_DYNAMIC_LOCK
1248 }
1249 
1250 #if KMP_USE_DYNAMIC_LOCK
1251 
1252 // Converts the given hint to an internal lock implementation
1253 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1254 #if KMP_USE_TSX
1255 #define KMP_TSX_LOCK(seq) lockseq_##seq
1256 #else
1257 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1258 #endif
1259 
1260 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1262 #else
1263 #define KMP_CPUINFO_RTM 0
1264 #endif
1265 
1266  // Hints that do not require further logic
1267  if (hint & kmp_lock_hint_hle)
1268  return KMP_TSX_LOCK(hle);
1269  if (hint & kmp_lock_hint_rtm)
1270  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1271  if (hint & kmp_lock_hint_adaptive)
1272  return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1273 
1274  // Rule out conflicting hints first by returning the default lock
1275  if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1276  return __kmp_user_lock_seq;
1277  if ((hint & omp_lock_hint_speculative) &&
1278  (hint & omp_lock_hint_nonspeculative))
1279  return __kmp_user_lock_seq;
1280 
1281  // Do not even consider speculation when it appears to be contended
1282  if (hint & omp_lock_hint_contended)
1283  return lockseq_queuing;
1284 
1285  // Uncontended lock without speculation
1286  if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1287  return lockseq_tas;
1288 
1289  // HLE lock for speculation
1290  if (hint & omp_lock_hint_speculative)
1291  return KMP_TSX_LOCK(hle);
1292 
1293  return __kmp_user_lock_seq;
1294 }
1295 
1296 #if OMPT_SUPPORT && OMPT_OPTIONAL
1297 #if KMP_USE_DYNAMIC_LOCK
1298 static kmp_mutex_impl_t
1299 __ompt_get_mutex_impl_type(void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1300  if (user_lock) {
1301  switch (KMP_EXTRACT_D_TAG(user_lock)) {
1302  case 0:
1303  break;
1304 #if KMP_USE_FUTEX
1305  case locktag_futex:
1306  return kmp_mutex_impl_queuing;
1307 #endif
1308  case locktag_tas:
1309  return kmp_mutex_impl_spin;
1310 #if KMP_USE_TSX
1311  case locktag_hle:
1312  return kmp_mutex_impl_speculative;
1313 #endif
1314  default:
1315  return kmp_mutex_impl_none;
1316  }
1317  ilock = KMP_LOOKUP_I_LOCK(user_lock);
1318  }
1319  KMP_ASSERT(ilock);
1320  switch (ilock->type) {
1321 #if KMP_USE_TSX
1322  case locktag_adaptive:
1323  case locktag_rtm:
1324  return kmp_mutex_impl_speculative;
1325 #endif
1326  case locktag_nested_tas:
1327  return kmp_mutex_impl_spin;
1328 #if KMP_USE_FUTEX
1329  case locktag_nested_futex:
1330 #endif
1331  case locktag_ticket:
1332  case locktag_queuing:
1333  case locktag_drdpa:
1334  case locktag_nested_ticket:
1335  case locktag_nested_queuing:
1336  case locktag_nested_drdpa:
1337  return kmp_mutex_impl_queuing;
1338  default:
1339  return kmp_mutex_impl_none;
1340  }
1341 }
1342 #else
1343 // For locks without dynamic binding
1344 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1345  switch (__kmp_user_lock_kind) {
1346  case lk_tas:
1347  return kmp_mutex_impl_spin;
1348 #if KMP_USE_FUTEX
1349  case lk_futex:
1350 #endif
1351  case lk_ticket:
1352  case lk_queuing:
1353  case lk_drdpa:
1354  return kmp_mutex_impl_queuing;
1355 #if KMP_USE_TSX
1356  case lk_hle:
1357  case lk_rtm:
1358  case lk_adaptive:
1359  return kmp_mutex_impl_speculative;
1360 #endif
1361  default:
1362  return kmp_mutex_impl_none;
1363  }
1364 }
1365 #endif // KMP_USE_DYNAMIC_LOCK
1366 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1367 
1381 void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid,
1382  kmp_critical_name *crit, uint32_t hint) {
1383  KMP_COUNT_BLOCK(OMP_CRITICAL);
1384  kmp_user_lock_p lck;
1385 #if OMPT_SUPPORT && OMPT_OPTIONAL
1386  ompt_state_t prev_state = ompt_state_undefined;
1387  ompt_thread_info_t ti;
1388  // This is the case, if called from __kmpc_critical:
1389  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1390  if (!codeptr)
1391  codeptr = OMPT_GET_RETURN_ADDRESS(0);
1392 #endif
1393 
1394  KC_TRACE(10, ("__kmpc_critical: called T#%d\n", global_tid));
1395 
1396  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1397  // Check if it is initialized.
1398  KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1399  if (*lk == 0) {
1400  kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1401  if (KMP_IS_D_LOCK(lckseq)) {
1402  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
1403  KMP_GET_D_TAG(lckseq));
1404  } else {
1405  __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1406  }
1407  }
1408  // Branch for accessing the actual lock object and set operation. This
1409  // branching is inevitable since this lock initialization does not follow the
1410  // normal dispatch path (lock table is not used).
1411  if (KMP_EXTRACT_D_TAG(lk) != 0) {
1412  lck = (kmp_user_lock_p)lk;
1413  if (__kmp_env_consistency_check) {
1414  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1415  __kmp_map_hint_to_lock(hint));
1416  }
1417 #if USE_ITT_BUILD
1418  __kmp_itt_critical_acquiring(lck);
1419 #endif
1420 #if OMPT_SUPPORT && OMPT_OPTIONAL
1421  if (ompt_enabled.enabled) {
1422  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1423  /* OMPT state update */
1424  prev_state = ti.state;
1425  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1426  ti.state = ompt_state_wait_critical;
1427 
1428  /* OMPT event callback */
1429  if (ompt_enabled.ompt_callback_mutex_acquire) {
1430  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1431  ompt_mutex_critical, (unsigned int)hint,
1432  __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1433  codeptr);
1434  }
1435  }
1436 #endif
1437 #if KMP_USE_INLINED_TAS
1438  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1439  KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1440  } else
1441 #elif KMP_USE_INLINED_FUTEX
1442  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1443  KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1444  } else
1445 #endif
1446  {
1447  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1448  }
1449  } else {
1450  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1451  lck = ilk->lock;
1452  if (__kmp_env_consistency_check) {
1453  __kmp_push_sync(global_tid, ct_critical, loc, lck,
1454  __kmp_map_hint_to_lock(hint));
1455  }
1456 #if USE_ITT_BUILD
1457  __kmp_itt_critical_acquiring(lck);
1458 #endif
1459 #if OMPT_SUPPORT && OMPT_OPTIONAL
1460  if (ompt_enabled.enabled) {
1461  ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1462  /* OMPT state update */
1463  prev_state = ti.state;
1464  ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1465  ti.state = ompt_state_wait_critical;
1466 
1467  /* OMPT event callback */
1468  if (ompt_enabled.ompt_callback_mutex_acquire) {
1469  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1470  ompt_mutex_critical, (unsigned int)hint,
1471  __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1472  codeptr);
1473  }
1474  }
1475 #endif
1476  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1477  }
1478  KMP_POP_PARTITIONED_TIMER();
1479 
1480 #if USE_ITT_BUILD
1481  __kmp_itt_critical_acquired(lck);
1482 #endif /* USE_ITT_BUILD */
1483 #if OMPT_SUPPORT && OMPT_OPTIONAL
1484  if (ompt_enabled.enabled) {
1485  /* OMPT state update */
1486  ti.state = prev_state;
1487  ti.wait_id = 0;
1488 
1489  /* OMPT event callback */
1490  if (ompt_enabled.ompt_callback_mutex_acquired) {
1491  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1492  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1493  }
1494  }
1495 #endif
1496 
1497  KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1498  KA_TRACE(15, ("__kmpc_critical: done T#%d\n", global_tid));
1499 } // __kmpc_critical_with_hint
1500 
1501 #endif // KMP_USE_DYNAMIC_LOCK
1502 
1512 void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid,
1513  kmp_critical_name *crit) {
1514  kmp_user_lock_p lck;
1515 
1516  KC_TRACE(10, ("__kmpc_end_critical: called T#%d\n", global_tid));
1517 
1518 #if KMP_USE_DYNAMIC_LOCK
1519  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1520  lck = (kmp_user_lock_p)crit;
1521  KMP_ASSERT(lck != NULL);
1522  if (__kmp_env_consistency_check) {
1523  __kmp_pop_sync(global_tid, ct_critical, loc);
1524  }
1525 #if USE_ITT_BUILD
1526  __kmp_itt_critical_releasing(lck);
1527 #endif
1528 #if KMP_USE_INLINED_TAS
1529  if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1530  KMP_RELEASE_TAS_LOCK(lck, global_tid);
1531  } else
1532 #elif KMP_USE_INLINED_FUTEX
1533  if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1534  KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1535  } else
1536 #endif
1537  {
1538  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1539  }
1540  } else {
1541  kmp_indirect_lock_t *ilk =
1542  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1543  KMP_ASSERT(ilk != NULL);
1544  lck = ilk->lock;
1545  if (__kmp_env_consistency_check) {
1546  __kmp_pop_sync(global_tid, ct_critical, loc);
1547  }
1548 #if USE_ITT_BUILD
1549  __kmp_itt_critical_releasing(lck);
1550 #endif
1551  KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1552  }
1553 
1554 #else // KMP_USE_DYNAMIC_LOCK
1555 
1556  if ((__kmp_user_lock_kind == lk_tas) &&
1557  (sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1558  lck = (kmp_user_lock_p)crit;
1559  }
1560 #if KMP_USE_FUTEX
1561  else if ((__kmp_user_lock_kind == lk_futex) &&
1562  (sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1563  lck = (kmp_user_lock_p)crit;
1564  }
1565 #endif
1566  else { // ticket, queuing or drdpa
1567  lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1568  }
1569 
1570  KMP_ASSERT(lck != NULL);
1571 
1572  if (__kmp_env_consistency_check)
1573  __kmp_pop_sync(global_tid, ct_critical, loc);
1574 
1575 #if USE_ITT_BUILD
1576  __kmp_itt_critical_releasing(lck);
1577 #endif /* USE_ITT_BUILD */
1578  // Value of 'crit' should be good for using as a critical_id of the critical
1579  // section directive.
1580  __kmp_release_user_lock_with_checks(lck, global_tid);
1581 
1582 #endif // KMP_USE_DYNAMIC_LOCK
1583 
1584 #if OMPT_SUPPORT && OMPT_OPTIONAL
1585  /* OMPT release event triggers after lock is released; place here to trigger
1586  * for all #if branches */
1587  OMPT_STORE_RETURN_ADDRESS(global_tid);
1588  if (ompt_enabled.ompt_callback_mutex_released) {
1589  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1590  ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1591  OMPT_LOAD_RETURN_ADDRESS(0));
1592  }
1593 #endif
1594 
1595  KMP_POP_PARTITIONED_TIMER();
1596  KA_TRACE(15, ("__kmpc_end_critical: done T#%d\n", global_tid));
1597 }
1598 
1608 kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1609  int status;
1610 
1611  KC_TRACE(10, ("__kmpc_barrier_master: called T#%d\n", global_tid));
1612 
1613  if (!TCR_4(__kmp_init_parallel))
1614  __kmp_parallel_initialize();
1615 
1616  __kmp_resume_if_soft_paused();
1617 
1618  if (__kmp_env_consistency_check)
1619  __kmp_check_barrier(global_tid, ct_barrier, loc);
1620 
1621 #if OMPT_SUPPORT
1622  ompt_frame_t *ompt_frame;
1623  if (ompt_enabled.enabled) {
1624  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1625  if (ompt_frame->enter_frame.ptr == NULL)
1626  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1627  OMPT_STORE_RETURN_ADDRESS(global_tid);
1628  }
1629 #endif
1630 #if USE_ITT_NOTIFY
1631  __kmp_threads[global_tid]->th.th_ident = loc;
1632 #endif
1633  status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1634 #if OMPT_SUPPORT && OMPT_OPTIONAL
1635  if (ompt_enabled.enabled) {
1636  ompt_frame->enter_frame = ompt_data_none;
1637  }
1638 #endif
1639 
1640  return (status != 0) ? 0 : 1;
1641 }
1642 
1652 void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid) {
1653  KC_TRACE(10, ("__kmpc_end_barrier_master: called T#%d\n", global_tid));
1654 
1655  __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1656 }
1657 
1668 kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid) {
1669  kmp_int32 ret;
1670 
1671  KC_TRACE(10, ("__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1672 
1673  if (!TCR_4(__kmp_init_parallel))
1674  __kmp_parallel_initialize();
1675 
1676  __kmp_resume_if_soft_paused();
1677 
1678  if (__kmp_env_consistency_check) {
1679  if (loc == 0) {
1680  KMP_WARNING(ConstructIdentInvalid); // ??? What does it mean for the user?
1681  }
1682  __kmp_check_barrier(global_tid, ct_barrier, loc);
1683  }
1684 
1685 #if OMPT_SUPPORT
1686  ompt_frame_t *ompt_frame;
1687  if (ompt_enabled.enabled) {
1688  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1689  if (ompt_frame->enter_frame.ptr == NULL)
1690  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1691  OMPT_STORE_RETURN_ADDRESS(global_tid);
1692  }
1693 #endif
1694 #if USE_ITT_NOTIFY
1695  __kmp_threads[global_tid]->th.th_ident = loc;
1696 #endif
1697  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1698 #if OMPT_SUPPORT && OMPT_OPTIONAL
1699  if (ompt_enabled.enabled) {
1700  ompt_frame->enter_frame = ompt_data_none;
1701  }
1702 #endif
1703 
1704  ret = __kmpc_master(loc, global_tid);
1705 
1706  if (__kmp_env_consistency_check) {
1707  /* there's no __kmpc_end_master called; so the (stats) */
1708  /* actions of __kmpc_end_master are done here */
1709 
1710  if (global_tid < 0) {
1711  KMP_WARNING(ThreadIdentInvalid);
1712  }
1713  if (ret) {
1714  /* only one thread should do the pop since only */
1715  /* one did the push (see __kmpc_master()) */
1716 
1717  __kmp_pop_sync(global_tid, ct_master, loc);
1718  }
1719  }
1720 
1721  return (ret);
1722 }
1723 
1724 /* The BARRIER for a SINGLE process section is always explicit */
1736 kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid) {
1737  kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1738 
1739  if (rc) {
1740  // We are going to execute the single statement, so we should count it.
1741  KMP_COUNT_BLOCK(OMP_SINGLE);
1742  KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1743  }
1744 
1745 #if OMPT_SUPPORT && OMPT_OPTIONAL
1746  kmp_info_t *this_thr = __kmp_threads[global_tid];
1747  kmp_team_t *team = this_thr->th.th_team;
1748  int tid = __kmp_tid_from_gtid(global_tid);
1749 
1750  if (ompt_enabled.enabled) {
1751  if (rc) {
1752  if (ompt_enabled.ompt_callback_work) {
1753  ompt_callbacks.ompt_callback(ompt_callback_work)(
1754  ompt_work_single_executor, ompt_scope_begin,
1755  &(team->t.ompt_team_info.parallel_data),
1756  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1757  1, OMPT_GET_RETURN_ADDRESS(0));
1758  }
1759  } else {
1760  if (ompt_enabled.ompt_callback_work) {
1761  ompt_callbacks.ompt_callback(ompt_callback_work)(
1762  ompt_work_single_other, ompt_scope_begin,
1763  &(team->t.ompt_team_info.parallel_data),
1764  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1765  1, OMPT_GET_RETURN_ADDRESS(0));
1766  ompt_callbacks.ompt_callback(ompt_callback_work)(
1767  ompt_work_single_other, ompt_scope_end,
1768  &(team->t.ompt_team_info.parallel_data),
1769  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1770  1, OMPT_GET_RETURN_ADDRESS(0));
1771  }
1772  }
1773  }
1774 #endif
1775 
1776  return rc;
1777 }
1778 
1788 void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid) {
1789  __kmp_exit_single(global_tid);
1790  KMP_POP_PARTITIONED_TIMER();
1791 
1792 #if OMPT_SUPPORT && OMPT_OPTIONAL
1793  kmp_info_t *this_thr = __kmp_threads[global_tid];
1794  kmp_team_t *team = this_thr->th.th_team;
1795  int tid = __kmp_tid_from_gtid(global_tid);
1796 
1797  if (ompt_enabled.ompt_callback_work) {
1798  ompt_callbacks.ompt_callback(ompt_callback_work)(
1799  ompt_work_single_executor, ompt_scope_end,
1800  &(team->t.ompt_team_info.parallel_data),
1801  &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1802  OMPT_GET_RETURN_ADDRESS(0));
1803  }
1804 #endif
1805 }
1806 
1814 void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid) {
1815  KMP_POP_PARTITIONED_TIMER();
1816  KE_TRACE(10, ("__kmpc_for_static_fini called T#%d\n", global_tid));
1817 
1818 #if OMPT_SUPPORT && OMPT_OPTIONAL
1819  if (ompt_enabled.ompt_callback_work) {
1820  ompt_work_t ompt_work_type = ompt_work_loop;
1821  ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1822  ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1823  // Determine workshare type
1824  if (loc != NULL) {
1825  if ((loc->flags & KMP_IDENT_WORK_LOOP) != 0) {
1826  ompt_work_type = ompt_work_loop;
1827  } else if ((loc->flags & KMP_IDENT_WORK_SECTIONS) != 0) {
1828  ompt_work_type = ompt_work_sections;
1829  } else if ((loc->flags & KMP_IDENT_WORK_DISTRIBUTE) != 0) {
1830  ompt_work_type = ompt_work_distribute;
1831  } else {
1832  // use default set above.
1833  // a warning about this case is provided in __kmpc_for_static_init
1834  }
1835  KMP_DEBUG_ASSERT(ompt_work_type);
1836  }
1837  ompt_callbacks.ompt_callback(ompt_callback_work)(
1838  ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1839  &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1840  }
1841 #endif
1842  if (__kmp_env_consistency_check)
1843  __kmp_pop_workshare(global_tid, ct_pdo, loc);
1844 }
1845 
1846 // User routines which take C-style arguments (call by value)
1847 // different from the Fortran equivalent routines
1848 
1849 void ompc_set_num_threads(int arg) {
1850  // !!!!! TODO: check the per-task binding
1851  __kmp_set_num_threads(arg, __kmp_entry_gtid());
1852 }
1853 
1854 void ompc_set_dynamic(int flag) {
1855  kmp_info_t *thread;
1856 
1857  /* For the thread-private implementation of the internal controls */
1858  thread = __kmp_entry_thread();
1859 
1860  __kmp_save_internal_controls(thread);
1861 
1862  set__dynamic(thread, flag ? TRUE : FALSE);
1863 }
1864 
1865 void ompc_set_nested(int flag) {
1866  kmp_info_t *thread;
1867 
1868  /* For the thread-private internal controls implementation */
1869  thread = __kmp_entry_thread();
1870 
1871  __kmp_save_internal_controls(thread);
1872 
1873  set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1874 }
1875 
1876 void ompc_set_max_active_levels(int max_active_levels) {
1877  /* TO DO */
1878  /* we want per-task implementation of this internal control */
1879 
1880  /* For the per-thread internal controls implementation */
1881  __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1882 }
1883 
1884 void ompc_set_schedule(omp_sched_t kind, int modifier) {
1885  // !!!!! TODO: check the per-task binding
1886  __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1887 }
1888 
1889 int ompc_get_ancestor_thread_num(int level) {
1890  return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1891 }
1892 
1893 int ompc_get_team_size(int level) {
1894  return __kmp_get_team_size(__kmp_entry_gtid(), level);
1895 }
1896 
1897 /* OpenMP 5.0 Affinity Format API */
1898 
1899 void ompc_set_affinity_format(char const *format) {
1900  if (!__kmp_init_serial) {
1901  __kmp_serial_initialize();
1902  }
1903  __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1904  format, KMP_STRLEN(format) + 1);
1905 }
1906 
1907 size_t ompc_get_affinity_format(char *buffer, size_t size) {
1908  size_t format_size;
1909  if (!__kmp_init_serial) {
1910  __kmp_serial_initialize();
1911  }
1912  format_size = KMP_STRLEN(__kmp_affinity_format);
1913  if (buffer && size) {
1914  __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1915  format_size + 1);
1916  }
1917  return format_size;
1918 }
1919 
1920 void ompc_display_affinity(char const *format) {
1921  int gtid;
1922  if (!TCR_4(__kmp_init_middle)) {
1923  __kmp_middle_initialize();
1924  }
1925  gtid = __kmp_get_gtid();
1926  __kmp_aux_display_affinity(gtid, format);
1927 }
1928 
1929 size_t ompc_capture_affinity(char *buffer, size_t buf_size,
1930  char const *format) {
1931  int gtid;
1932  size_t num_required;
1933  kmp_str_buf_t capture_buf;
1934  if (!TCR_4(__kmp_init_middle)) {
1935  __kmp_middle_initialize();
1936  }
1937  gtid = __kmp_get_gtid();
1938  __kmp_str_buf_init(&capture_buf);
1939  num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1940  if (buffer && buf_size) {
1941  __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1942  capture_buf.used + 1);
1943  }
1944  __kmp_str_buf_free(&capture_buf);
1945  return num_required;
1946 }
1947 
1948 void kmpc_set_stacksize(int arg) {
1949  // __kmp_aux_set_stacksize initializes the library if needed
1950  __kmp_aux_set_stacksize(arg);
1951 }
1952 
1953 void kmpc_set_stacksize_s(size_t arg) {
1954  // __kmp_aux_set_stacksize initializes the library if needed
1955  __kmp_aux_set_stacksize(arg);
1956 }
1957 
1958 void kmpc_set_blocktime(int arg) {
1959  int gtid, tid;
1960  kmp_info_t *thread;
1961 
1962  gtid = __kmp_entry_gtid();
1963  tid = __kmp_tid_from_gtid(gtid);
1964  thread = __kmp_thread_from_gtid(gtid);
1965 
1966  __kmp_aux_set_blocktime(arg, thread, tid);
1967 }
1968 
1969 void kmpc_set_library(int arg) {
1970  // __kmp_user_set_library initializes the library if needed
1971  __kmp_user_set_library((enum library_type)arg);
1972 }
1973 
1974 void kmpc_set_defaults(char const *str) {
1975  // __kmp_aux_set_defaults initializes the library if needed
1976  __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1977 }
1978 
1979 void kmpc_set_disp_num_buffers(int arg) {
1980  // ignore after initialization because some teams have already
1981  // allocated dispatch buffers
1982  if (__kmp_init_serial == 0 && arg > 0)
1983  __kmp_dispatch_num_buffers = arg;
1984 }
1985 
1986 int kmpc_set_affinity_mask_proc(int proc, void **mask) {
1987 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1988  return -1;
1989 #else
1990  if (!TCR_4(__kmp_init_middle)) {
1991  __kmp_middle_initialize();
1992  }
1993  return __kmp_aux_set_affinity_mask_proc(proc, mask);
1994 #endif
1995 }
1996 
1997 int kmpc_unset_affinity_mask_proc(int proc, void **mask) {
1998 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1999  return -1;
2000 #else
2001  if (!TCR_4(__kmp_init_middle)) {
2002  __kmp_middle_initialize();
2003  }
2004  return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2005 #endif
2006 }
2007 
2008 int kmpc_get_affinity_mask_proc(int proc, void **mask) {
2009 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2010  return -1;
2011 #else
2012  if (!TCR_4(__kmp_init_middle)) {
2013  __kmp_middle_initialize();
2014  }
2015  return __kmp_aux_get_affinity_mask_proc(proc, mask);
2016 #endif
2017 }
2018 
2019 /* -------------------------------------------------------------------------- */
2064 void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size,
2065  void *cpy_data, void (*cpy_func)(void *, void *),
2066  kmp_int32 didit) {
2067  void **data_ptr;
2068 
2069  KC_TRACE(10, ("__kmpc_copyprivate: called T#%d\n", gtid));
2070 
2071  KMP_MB();
2072 
2073  data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2074 
2075  if (__kmp_env_consistency_check) {
2076  if (loc == 0) {
2077  KMP_WARNING(ConstructIdentInvalid);
2078  }
2079  }
2080 
2081  // ToDo: Optimize the following two barriers into some kind of split barrier
2082 
2083  if (didit)
2084  *data_ptr = cpy_data;
2085 
2086 #if OMPT_SUPPORT
2087  ompt_frame_t *ompt_frame;
2088  if (ompt_enabled.enabled) {
2089  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2090  if (ompt_frame->enter_frame.ptr == NULL)
2091  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2092  OMPT_STORE_RETURN_ADDRESS(gtid);
2093  }
2094 #endif
2095 /* This barrier is not a barrier region boundary */
2096 #if USE_ITT_NOTIFY
2097  __kmp_threads[gtid]->th.th_ident = loc;
2098 #endif
2099  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2100 
2101  if (!didit)
2102  (*cpy_func)(cpy_data, *data_ptr);
2103 
2104 // Consider next barrier a user-visible barrier for barrier region boundaries
2105 // Nesting checks are already handled by the single construct checks
2106 
2107 #if OMPT_SUPPORT
2108  if (ompt_enabled.enabled) {
2109  OMPT_STORE_RETURN_ADDRESS(gtid);
2110  }
2111 #endif
2112 #if USE_ITT_NOTIFY
2113  __kmp_threads[gtid]->th.th_ident = loc; // TODO: check if it is needed (e.g.
2114 // tasks can overwrite the location)
2115 #endif
2116  __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2117 #if OMPT_SUPPORT && OMPT_OPTIONAL
2118  if (ompt_enabled.enabled) {
2119  ompt_frame->enter_frame = ompt_data_none;
2120  }
2121 #endif
2122 }
2123 
2124 /* -------------------------------------------------------------------------- */
2125 
2126 #define INIT_LOCK __kmp_init_user_lock_with_checks
2127 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2128 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2129 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2130 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2131 #define ACQUIRE_NESTED_LOCK_TIMED \
2132  __kmp_acquire_nested_user_lock_with_checks_timed
2133 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2134 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2135 #define TEST_LOCK __kmp_test_user_lock_with_checks
2136 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2137 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2138 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2139 
2140 // TODO: Make check abort messages use location info & pass it into
2141 // with_checks routines
2142 
2143 #if KMP_USE_DYNAMIC_LOCK
2144 
2145 // internal lock initializer
2146 static __forceinline void __kmp_init_lock_with_hint(ident_t *loc, void **lock,
2147  kmp_dyna_lockseq_t seq) {
2148  if (KMP_IS_D_LOCK(seq)) {
2149  KMP_INIT_D_LOCK(lock, seq);
2150 #if USE_ITT_BUILD
2151  __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2152 #endif
2153  } else {
2154  KMP_INIT_I_LOCK(lock, seq);
2155 #if USE_ITT_BUILD
2156  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2157  __kmp_itt_lock_creating(ilk->lock, loc);
2158 #endif
2159  }
2160 }
2161 
2162 // internal nest lock initializer
2163 static __forceinline void
2164 __kmp_init_nest_lock_with_hint(ident_t *loc, void **lock,
2165  kmp_dyna_lockseq_t seq) {
2166 #if KMP_USE_TSX
2167  // Don't have nested lock implementation for speculative locks
2168  if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2169  seq = __kmp_user_lock_seq;
2170 #endif
2171  switch (seq) {
2172  case lockseq_tas:
2173  seq = lockseq_nested_tas;
2174  break;
2175 #if KMP_USE_FUTEX
2176  case lockseq_futex:
2177  seq = lockseq_nested_futex;
2178  break;
2179 #endif
2180  case lockseq_ticket:
2181  seq = lockseq_nested_ticket;
2182  break;
2183  case lockseq_queuing:
2184  seq = lockseq_nested_queuing;
2185  break;
2186  case lockseq_drdpa:
2187  seq = lockseq_nested_drdpa;
2188  break;
2189  default:
2190  seq = lockseq_nested_queuing;
2191  }
2192  KMP_INIT_I_LOCK(lock, seq);
2193 #if USE_ITT_BUILD
2194  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2195  __kmp_itt_lock_creating(ilk->lock, loc);
2196 #endif
2197 }
2198 
2199 /* initialize the lock with a hint */
2200 void __kmpc_init_lock_with_hint(ident_t *loc, kmp_int32 gtid, void **user_lock,
2201  uintptr_t hint) {
2202  KMP_DEBUG_ASSERT(__kmp_init_serial);
2203  if (__kmp_env_consistency_check && user_lock == NULL) {
2204  KMP_FATAL(LockIsUninitialized, "omp_init_lock_with_hint");
2205  }
2206 
2207  __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2208 
2209 #if OMPT_SUPPORT && OMPT_OPTIONAL
2210  // This is the case, if called from omp_init_lock_with_hint:
2211  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2212  if (!codeptr)
2213  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2214  if (ompt_enabled.ompt_callback_lock_init) {
2215  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2216  ompt_mutex_lock, (omp_lock_hint_t)hint,
2217  __ompt_get_mutex_impl_type(user_lock),
2218  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2219  }
2220 #endif
2221 }
2222 
2223 /* initialize the lock with a hint */
2224 void __kmpc_init_nest_lock_with_hint(ident_t *loc, kmp_int32 gtid,
2225  void **user_lock, uintptr_t hint) {
2226  KMP_DEBUG_ASSERT(__kmp_init_serial);
2227  if (__kmp_env_consistency_check && user_lock == NULL) {
2228  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock_with_hint");
2229  }
2230 
2231  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2232 
2233 #if OMPT_SUPPORT && OMPT_OPTIONAL
2234  // This is the case, if called from omp_init_lock_with_hint:
2235  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2236  if (!codeptr)
2237  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2238  if (ompt_enabled.ompt_callback_lock_init) {
2239  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2240  ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2241  __ompt_get_mutex_impl_type(user_lock),
2242  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2243  }
2244 #endif
2245 }
2246 
2247 #endif // KMP_USE_DYNAMIC_LOCK
2248 
2249 /* initialize the lock */
2250 void __kmpc_init_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2251 #if KMP_USE_DYNAMIC_LOCK
2252 
2253  KMP_DEBUG_ASSERT(__kmp_init_serial);
2254  if (__kmp_env_consistency_check && user_lock == NULL) {
2255  KMP_FATAL(LockIsUninitialized, "omp_init_lock");
2256  }
2257  __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2258 
2259 #if OMPT_SUPPORT && OMPT_OPTIONAL
2260  // This is the case, if called from omp_init_lock_with_hint:
2261  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2262  if (!codeptr)
2263  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2264  if (ompt_enabled.ompt_callback_lock_init) {
2265  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2266  ompt_mutex_lock, omp_lock_hint_none,
2267  __ompt_get_mutex_impl_type(user_lock),
2268  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2269  }
2270 #endif
2271 
2272 #else // KMP_USE_DYNAMIC_LOCK
2273 
2274  static char const *const func = "omp_init_lock";
2275  kmp_user_lock_p lck;
2276  KMP_DEBUG_ASSERT(__kmp_init_serial);
2277 
2278  if (__kmp_env_consistency_check) {
2279  if (user_lock == NULL) {
2280  KMP_FATAL(LockIsUninitialized, func);
2281  }
2282  }
2283 
2284  KMP_CHECK_USER_LOCK_INIT();
2285 
2286  if ((__kmp_user_lock_kind == lk_tas) &&
2287  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2288  lck = (kmp_user_lock_p)user_lock;
2289  }
2290 #if KMP_USE_FUTEX
2291  else if ((__kmp_user_lock_kind == lk_futex) &&
2292  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2293  lck = (kmp_user_lock_p)user_lock;
2294  }
2295 #endif
2296  else {
2297  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2298  }
2299  INIT_LOCK(lck);
2300  __kmp_set_user_lock_location(lck, loc);
2301 
2302 #if OMPT_SUPPORT && OMPT_OPTIONAL
2303  // This is the case, if called from omp_init_lock_with_hint:
2304  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2305  if (!codeptr)
2306  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2307  if (ompt_enabled.ompt_callback_lock_init) {
2308  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2309  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2310  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2311  }
2312 #endif
2313 
2314 #if USE_ITT_BUILD
2315  __kmp_itt_lock_creating(lck);
2316 #endif /* USE_ITT_BUILD */
2317 
2318 #endif // KMP_USE_DYNAMIC_LOCK
2319 } // __kmpc_init_lock
2320 
2321 /* initialize the lock */
2322 void __kmpc_init_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2323 #if KMP_USE_DYNAMIC_LOCK
2324 
2325  KMP_DEBUG_ASSERT(__kmp_init_serial);
2326  if (__kmp_env_consistency_check && user_lock == NULL) {
2327  KMP_FATAL(LockIsUninitialized, "omp_init_nest_lock");
2328  }
2329  __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2330 
2331 #if OMPT_SUPPORT && OMPT_OPTIONAL
2332  // This is the case, if called from omp_init_lock_with_hint:
2333  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2334  if (!codeptr)
2335  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2336  if (ompt_enabled.ompt_callback_lock_init) {
2337  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2338  ompt_mutex_nest_lock, omp_lock_hint_none,
2339  __ompt_get_mutex_impl_type(user_lock),
2340  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2341  }
2342 #endif
2343 
2344 #else // KMP_USE_DYNAMIC_LOCK
2345 
2346  static char const *const func = "omp_init_nest_lock";
2347  kmp_user_lock_p lck;
2348  KMP_DEBUG_ASSERT(__kmp_init_serial);
2349 
2350  if (__kmp_env_consistency_check) {
2351  if (user_lock == NULL) {
2352  KMP_FATAL(LockIsUninitialized, func);
2353  }
2354  }
2355 
2356  KMP_CHECK_USER_LOCK_INIT();
2357 
2358  if ((__kmp_user_lock_kind == lk_tas) &&
2359  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2360  OMP_NEST_LOCK_T_SIZE)) {
2361  lck = (kmp_user_lock_p)user_lock;
2362  }
2363 #if KMP_USE_FUTEX
2364  else if ((__kmp_user_lock_kind == lk_futex) &&
2365  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2366  OMP_NEST_LOCK_T_SIZE)) {
2367  lck = (kmp_user_lock_p)user_lock;
2368  }
2369 #endif
2370  else {
2371  lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2372  }
2373 
2374  INIT_NESTED_LOCK(lck);
2375  __kmp_set_user_lock_location(lck, loc);
2376 
2377 #if OMPT_SUPPORT && OMPT_OPTIONAL
2378  // This is the case, if called from omp_init_lock_with_hint:
2379  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2380  if (!codeptr)
2381  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2382  if (ompt_enabled.ompt_callback_lock_init) {
2383  ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2384  ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2385  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2386  }
2387 #endif
2388 
2389 #if USE_ITT_BUILD
2390  __kmp_itt_lock_creating(lck);
2391 #endif /* USE_ITT_BUILD */
2392 
2393 #endif // KMP_USE_DYNAMIC_LOCK
2394 } // __kmpc_init_nest_lock
2395 
2396 void __kmpc_destroy_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2397 #if KMP_USE_DYNAMIC_LOCK
2398 
2399 #if USE_ITT_BUILD
2400  kmp_user_lock_p lck;
2401  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2402  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2403  } else {
2404  lck = (kmp_user_lock_p)user_lock;
2405  }
2406  __kmp_itt_lock_destroyed(lck);
2407 #endif
2408 #if OMPT_SUPPORT && OMPT_OPTIONAL
2409  // This is the case, if called from omp_init_lock_with_hint:
2410  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2411  if (!codeptr)
2412  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2413  if (ompt_enabled.ompt_callback_lock_destroy) {
2414  kmp_user_lock_p lck;
2415  if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2416  lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2417  } else {
2418  lck = (kmp_user_lock_p)user_lock;
2419  }
2420  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2421  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2422  }
2423 #endif
2424  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2425 #else
2426  kmp_user_lock_p lck;
2427 
2428  if ((__kmp_user_lock_kind == lk_tas) &&
2429  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2430  lck = (kmp_user_lock_p)user_lock;
2431  }
2432 #if KMP_USE_FUTEX
2433  else if ((__kmp_user_lock_kind == lk_futex) &&
2434  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2435  lck = (kmp_user_lock_p)user_lock;
2436  }
2437 #endif
2438  else {
2439  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_lock");
2440  }
2441 
2442 #if OMPT_SUPPORT && OMPT_OPTIONAL
2443  // This is the case, if called from omp_init_lock_with_hint:
2444  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2445  if (!codeptr)
2446  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2447  if (ompt_enabled.ompt_callback_lock_destroy) {
2448  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2449  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2450  }
2451 #endif
2452 
2453 #if USE_ITT_BUILD
2454  __kmp_itt_lock_destroyed(lck);
2455 #endif /* USE_ITT_BUILD */
2456  DESTROY_LOCK(lck);
2457 
2458  if ((__kmp_user_lock_kind == lk_tas) &&
2459  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2460  ;
2461  }
2462 #if KMP_USE_FUTEX
2463  else if ((__kmp_user_lock_kind == lk_futex) &&
2464  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2465  ;
2466  }
2467 #endif
2468  else {
2469  __kmp_user_lock_free(user_lock, gtid, lck);
2470  }
2471 #endif // KMP_USE_DYNAMIC_LOCK
2472 } // __kmpc_destroy_lock
2473 
2474 /* destroy the lock */
2475 void __kmpc_destroy_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2476 #if KMP_USE_DYNAMIC_LOCK
2477 
2478 #if USE_ITT_BUILD
2479  kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2480  __kmp_itt_lock_destroyed(ilk->lock);
2481 #endif
2482 #if OMPT_SUPPORT && OMPT_OPTIONAL
2483  // This is the case, if called from omp_init_lock_with_hint:
2484  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2485  if (!codeptr)
2486  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2487  if (ompt_enabled.ompt_callback_lock_destroy) {
2488  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2489  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2490  }
2491 #endif
2492  KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2493 
2494 #else // KMP_USE_DYNAMIC_LOCK
2495 
2496  kmp_user_lock_p lck;
2497 
2498  if ((__kmp_user_lock_kind == lk_tas) &&
2499  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2500  OMP_NEST_LOCK_T_SIZE)) {
2501  lck = (kmp_user_lock_p)user_lock;
2502  }
2503 #if KMP_USE_FUTEX
2504  else if ((__kmp_user_lock_kind == lk_futex) &&
2505  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2506  OMP_NEST_LOCK_T_SIZE)) {
2507  lck = (kmp_user_lock_p)user_lock;
2508  }
2509 #endif
2510  else {
2511  lck = __kmp_lookup_user_lock(user_lock, "omp_destroy_nest_lock");
2512  }
2513 
2514 #if OMPT_SUPPORT && OMPT_OPTIONAL
2515  // This is the case, if called from omp_init_lock_with_hint:
2516  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2517  if (!codeptr)
2518  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2519  if (ompt_enabled.ompt_callback_lock_destroy) {
2520  ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2521  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2522  }
2523 #endif
2524 
2525 #if USE_ITT_BUILD
2526  __kmp_itt_lock_destroyed(lck);
2527 #endif /* USE_ITT_BUILD */
2528 
2529  DESTROY_NESTED_LOCK(lck);
2530 
2531  if ((__kmp_user_lock_kind == lk_tas) &&
2532  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2533  OMP_NEST_LOCK_T_SIZE)) {
2534  ;
2535  }
2536 #if KMP_USE_FUTEX
2537  else if ((__kmp_user_lock_kind == lk_futex) &&
2538  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2539  OMP_NEST_LOCK_T_SIZE)) {
2540  ;
2541  }
2542 #endif
2543  else {
2544  __kmp_user_lock_free(user_lock, gtid, lck);
2545  }
2546 #endif // KMP_USE_DYNAMIC_LOCK
2547 } // __kmpc_destroy_nest_lock
2548 
2549 void __kmpc_set_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2550  KMP_COUNT_BLOCK(OMP_set_lock);
2551 #if KMP_USE_DYNAMIC_LOCK
2552  int tag = KMP_EXTRACT_D_TAG(user_lock);
2553 #if USE_ITT_BUILD
2554  __kmp_itt_lock_acquiring(
2555  (kmp_user_lock_p)
2556  user_lock); // itt function will get to the right lock object.
2557 #endif
2558 #if OMPT_SUPPORT && OMPT_OPTIONAL
2559  // This is the case, if called from omp_init_lock_with_hint:
2560  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2561  if (!codeptr)
2562  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2563  if (ompt_enabled.ompt_callback_mutex_acquire) {
2564  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2565  ompt_mutex_lock, omp_lock_hint_none,
2566  __ompt_get_mutex_impl_type(user_lock),
2567  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2568  }
2569 #endif
2570 #if KMP_USE_INLINED_TAS
2571  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2572  KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2573  } else
2574 #elif KMP_USE_INLINED_FUTEX
2575  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2576  KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2577  } else
2578 #endif
2579  {
2580  __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2581  }
2582 #if USE_ITT_BUILD
2583  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2584 #endif
2585 #if OMPT_SUPPORT && OMPT_OPTIONAL
2586  if (ompt_enabled.ompt_callback_mutex_acquired) {
2587  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2588  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2589  }
2590 #endif
2591 
2592 #else // KMP_USE_DYNAMIC_LOCK
2593 
2594  kmp_user_lock_p lck;
2595 
2596  if ((__kmp_user_lock_kind == lk_tas) &&
2597  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598  lck = (kmp_user_lock_p)user_lock;
2599  }
2600 #if KMP_USE_FUTEX
2601  else if ((__kmp_user_lock_kind == lk_futex) &&
2602  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2603  lck = (kmp_user_lock_p)user_lock;
2604  }
2605 #endif
2606  else {
2607  lck = __kmp_lookup_user_lock(user_lock, "omp_set_lock");
2608  }
2609 
2610 #if USE_ITT_BUILD
2611  __kmp_itt_lock_acquiring(lck);
2612 #endif /* USE_ITT_BUILD */
2613 #if OMPT_SUPPORT && OMPT_OPTIONAL
2614  // This is the case, if called from omp_init_lock_with_hint:
2615  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2616  if (!codeptr)
2617  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2618  if (ompt_enabled.ompt_callback_mutex_acquire) {
2619  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2620  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2621  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2622  }
2623 #endif
2624 
2625  ACQUIRE_LOCK(lck, gtid);
2626 
2627 #if USE_ITT_BUILD
2628  __kmp_itt_lock_acquired(lck);
2629 #endif /* USE_ITT_BUILD */
2630 
2631 #if OMPT_SUPPORT && OMPT_OPTIONAL
2632  if (ompt_enabled.ompt_callback_mutex_acquired) {
2633  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2634  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2635  }
2636 #endif
2637 
2638 #endif // KMP_USE_DYNAMIC_LOCK
2639 }
2640 
2641 void __kmpc_set_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2642 #if KMP_USE_DYNAMIC_LOCK
2643 
2644 #if USE_ITT_BUILD
2645  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2646 #endif
2647 #if OMPT_SUPPORT && OMPT_OPTIONAL
2648  // This is the case, if called from omp_init_lock_with_hint:
2649  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2650  if (!codeptr)
2651  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2652  if (ompt_enabled.enabled) {
2653  if (ompt_enabled.ompt_callback_mutex_acquire) {
2654  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2655  ompt_mutex_nest_lock, omp_lock_hint_none,
2656  __ompt_get_mutex_impl_type(user_lock),
2657  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2658  }
2659  }
2660 #endif
2661  int acquire_status =
2662  KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2663  (void) acquire_status;
2664 #if USE_ITT_BUILD
2665  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2666 #endif
2667 
2668 #if OMPT_SUPPORT && OMPT_OPTIONAL
2669  if (ompt_enabled.enabled) {
2670  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2671  if (ompt_enabled.ompt_callback_mutex_acquired) {
2672  // lock_first
2673  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2674  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2675  codeptr);
2676  }
2677  } else {
2678  if (ompt_enabled.ompt_callback_nest_lock) {
2679  // lock_next
2680  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2681  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2682  }
2683  }
2684  }
2685 #endif
2686 
2687 #else // KMP_USE_DYNAMIC_LOCK
2688  int acquire_status;
2689  kmp_user_lock_p lck;
2690 
2691  if ((__kmp_user_lock_kind == lk_tas) &&
2692  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2693  OMP_NEST_LOCK_T_SIZE)) {
2694  lck = (kmp_user_lock_p)user_lock;
2695  }
2696 #if KMP_USE_FUTEX
2697  else if ((__kmp_user_lock_kind == lk_futex) &&
2698  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2699  OMP_NEST_LOCK_T_SIZE)) {
2700  lck = (kmp_user_lock_p)user_lock;
2701  }
2702 #endif
2703  else {
2704  lck = __kmp_lookup_user_lock(user_lock, "omp_set_nest_lock");
2705  }
2706 
2707 #if USE_ITT_BUILD
2708  __kmp_itt_lock_acquiring(lck);
2709 #endif /* USE_ITT_BUILD */
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL
2711  // This is the case, if called from omp_init_lock_with_hint:
2712  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2713  if (!codeptr)
2714  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2715  if (ompt_enabled.enabled) {
2716  if (ompt_enabled.ompt_callback_mutex_acquire) {
2717  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2718  ompt_mutex_nest_lock, omp_lock_hint_none,
2719  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2720  codeptr);
2721  }
2722  }
2723 #endif
2724 
2725  ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2726 
2727 #if USE_ITT_BUILD
2728  __kmp_itt_lock_acquired(lck);
2729 #endif /* USE_ITT_BUILD */
2730 
2731 #if OMPT_SUPPORT && OMPT_OPTIONAL
2732  if (ompt_enabled.enabled) {
2733  if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2734  if (ompt_enabled.ompt_callback_mutex_acquired) {
2735  // lock_first
2736  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2737  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2738  }
2739  } else {
2740  if (ompt_enabled.ompt_callback_nest_lock) {
2741  // lock_next
2742  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2743  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2744  }
2745  }
2746  }
2747 #endif
2748 
2749 #endif // KMP_USE_DYNAMIC_LOCK
2750 }
2751 
2752 void __kmpc_unset_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2753 #if KMP_USE_DYNAMIC_LOCK
2754 
2755  int tag = KMP_EXTRACT_D_TAG(user_lock);
2756 #if USE_ITT_BUILD
2757  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2758 #endif
2759 #if KMP_USE_INLINED_TAS
2760  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2761  KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2762  } else
2763 #elif KMP_USE_INLINED_FUTEX
2764  if (tag == locktag_futex && !__kmp_env_consistency_check) {
2765  KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2766  } else
2767 #endif
2768  {
2769  __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2770  }
2771 
2772 #if OMPT_SUPPORT && OMPT_OPTIONAL
2773  // This is the case, if called from omp_init_lock_with_hint:
2774  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2775  if (!codeptr)
2776  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2777  if (ompt_enabled.ompt_callback_mutex_released) {
2778  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2779  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2780  }
2781 #endif
2782 
2783 #else // KMP_USE_DYNAMIC_LOCK
2784 
2785  kmp_user_lock_p lck;
2786 
2787  /* Can't use serial interval since not block structured */
2788  /* release the lock */
2789 
2790  if ((__kmp_user_lock_kind == lk_tas) &&
2791  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2792 #if KMP_OS_LINUX && \
2793  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2794 // "fast" path implemented to fix customer performance issue
2795 #if USE_ITT_BUILD
2796  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2797 #endif /* USE_ITT_BUILD */
2798  TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2799  KMP_MB();
2800 
2801 #if OMPT_SUPPORT && OMPT_OPTIONAL
2802  // This is the case, if called from omp_init_lock_with_hint:
2803  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2804  if (!codeptr)
2805  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2806  if (ompt_enabled.ompt_callback_mutex_released) {
2807  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2808  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2809  }
2810 #endif
2811 
2812  return;
2813 #else
2814  lck = (kmp_user_lock_p)user_lock;
2815 #endif
2816  }
2817 #if KMP_USE_FUTEX
2818  else if ((__kmp_user_lock_kind == lk_futex) &&
2819  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2820  lck = (kmp_user_lock_p)user_lock;
2821  }
2822 #endif
2823  else {
2824  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_lock");
2825  }
2826 
2827 #if USE_ITT_BUILD
2828  __kmp_itt_lock_releasing(lck);
2829 #endif /* USE_ITT_BUILD */
2830 
2831  RELEASE_LOCK(lck, gtid);
2832 
2833 #if OMPT_SUPPORT && OMPT_OPTIONAL
2834  // This is the case, if called from omp_init_lock_with_hint:
2835  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2836  if (!codeptr)
2837  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2838  if (ompt_enabled.ompt_callback_mutex_released) {
2839  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2840  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2841  }
2842 #endif
2843 
2844 #endif // KMP_USE_DYNAMIC_LOCK
2845 }
2846 
2847 /* release the lock */
2848 void __kmpc_unset_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2849 #if KMP_USE_DYNAMIC_LOCK
2850 
2851 #if USE_ITT_BUILD
2852  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2853 #endif
2854  int release_status =
2855  KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2856  (void) release_status;
2857 
2858 #if OMPT_SUPPORT && OMPT_OPTIONAL
2859  // This is the case, if called from omp_init_lock_with_hint:
2860  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2861  if (!codeptr)
2862  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2863  if (ompt_enabled.enabled) {
2864  if (release_status == KMP_LOCK_RELEASED) {
2865  if (ompt_enabled.ompt_callback_mutex_released) {
2866  // release_lock_last
2867  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2868  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2869  codeptr);
2870  }
2871  } else if (ompt_enabled.ompt_callback_nest_lock) {
2872  // release_lock_prev
2873  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2874  ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2875  }
2876  }
2877 #endif
2878 
2879 #else // KMP_USE_DYNAMIC_LOCK
2880 
2881  kmp_user_lock_p lck;
2882 
2883  /* Can't use serial interval since not block structured */
2884 
2885  if ((__kmp_user_lock_kind == lk_tas) &&
2886  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
2887  OMP_NEST_LOCK_T_SIZE)) {
2888 #if KMP_OS_LINUX && \
2889  (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2890  // "fast" path implemented to fix customer performance issue
2891  kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2892 #if USE_ITT_BUILD
2893  __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2894 #endif /* USE_ITT_BUILD */
2895 
2896 #if OMPT_SUPPORT && OMPT_OPTIONAL
2897  int release_status = KMP_LOCK_STILL_HELD;
2898 #endif
2899 
2900  if (--(tl->lk.depth_locked) == 0) {
2901  TCW_4(tl->lk.poll, 0);
2902 #if OMPT_SUPPORT && OMPT_OPTIONAL
2903  release_status = KMP_LOCK_RELEASED;
2904 #endif
2905  }
2906  KMP_MB();
2907 
2908 #if OMPT_SUPPORT && OMPT_OPTIONAL
2909  // This is the case, if called from omp_init_lock_with_hint:
2910  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2911  if (!codeptr)
2912  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2913  if (ompt_enabled.enabled) {
2914  if (release_status == KMP_LOCK_RELEASED) {
2915  if (ompt_enabled.ompt_callback_mutex_released) {
2916  // release_lock_last
2917  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2918  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2919  }
2920  } else if (ompt_enabled.ompt_callback_nest_lock) {
2921  // release_lock_previous
2922  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2923  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2924  }
2925  }
2926 #endif
2927 
2928  return;
2929 #else
2930  lck = (kmp_user_lock_p)user_lock;
2931 #endif
2932  }
2933 #if KMP_USE_FUTEX
2934  else if ((__kmp_user_lock_kind == lk_futex) &&
2935  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
2936  OMP_NEST_LOCK_T_SIZE)) {
2937  lck = (kmp_user_lock_p)user_lock;
2938  }
2939 #endif
2940  else {
2941  lck = __kmp_lookup_user_lock(user_lock, "omp_unset_nest_lock");
2942  }
2943 
2944 #if USE_ITT_BUILD
2945  __kmp_itt_lock_releasing(lck);
2946 #endif /* USE_ITT_BUILD */
2947 
2948  int release_status;
2949  release_status = RELEASE_NESTED_LOCK(lck, gtid);
2950 #if OMPT_SUPPORT && OMPT_OPTIONAL
2951  // This is the case, if called from omp_init_lock_with_hint:
2952  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2953  if (!codeptr)
2954  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2955  if (ompt_enabled.enabled) {
2956  if (release_status == KMP_LOCK_RELEASED) {
2957  if (ompt_enabled.ompt_callback_mutex_released) {
2958  // release_lock_last
2959  ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2960  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2961  }
2962  } else if (ompt_enabled.ompt_callback_nest_lock) {
2963  // release_lock_previous
2964  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2965  ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2966  }
2967  }
2968 #endif
2969 
2970 #endif // KMP_USE_DYNAMIC_LOCK
2971 }
2972 
2973 /* try to acquire the lock */
2974 int __kmpc_test_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
2975  KMP_COUNT_BLOCK(OMP_test_lock);
2976 
2977 #if KMP_USE_DYNAMIC_LOCK
2978  int rc;
2979  int tag = KMP_EXTRACT_D_TAG(user_lock);
2980 #if USE_ITT_BUILD
2981  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2982 #endif
2983 #if OMPT_SUPPORT && OMPT_OPTIONAL
2984  // This is the case, if called from omp_init_lock_with_hint:
2985  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2986  if (!codeptr)
2987  codeptr = OMPT_GET_RETURN_ADDRESS(0);
2988  if (ompt_enabled.ompt_callback_mutex_acquire) {
2989  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2990  ompt_mutex_lock, omp_lock_hint_none,
2991  __ompt_get_mutex_impl_type(user_lock),
2992  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2993  }
2994 #endif
2995 #if KMP_USE_INLINED_TAS
2996  if (tag == locktag_tas && !__kmp_env_consistency_check) {
2997  KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2998  } else
2999 #elif KMP_USE_INLINED_FUTEX
3000  if (tag == locktag_futex && !__kmp_env_consistency_check) {
3001  KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3002  } else
3003 #endif
3004  {
3005  rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3006  }
3007  if (rc) {
3008 #if USE_ITT_BUILD
3009  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3010 #endif
3011 #if OMPT_SUPPORT && OMPT_OPTIONAL
3012  if (ompt_enabled.ompt_callback_mutex_acquired) {
3013  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3014  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3015  }
3016 #endif
3017  return FTN_TRUE;
3018  } else {
3019 #if USE_ITT_BUILD
3020  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3021 #endif
3022  return FTN_FALSE;
3023  }
3024 
3025 #else // KMP_USE_DYNAMIC_LOCK
3026 
3027  kmp_user_lock_p lck;
3028  int rc;
3029 
3030  if ((__kmp_user_lock_kind == lk_tas) &&
3031  (sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3032  lck = (kmp_user_lock_p)user_lock;
3033  }
3034 #if KMP_USE_FUTEX
3035  else if ((__kmp_user_lock_kind == lk_futex) &&
3036  (sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3037  lck = (kmp_user_lock_p)user_lock;
3038  }
3039 #endif
3040  else {
3041  lck = __kmp_lookup_user_lock(user_lock, "omp_test_lock");
3042  }
3043 
3044 #if USE_ITT_BUILD
3045  __kmp_itt_lock_acquiring(lck);
3046 #endif /* USE_ITT_BUILD */
3047 #if OMPT_SUPPORT && OMPT_OPTIONAL
3048  // This is the case, if called from omp_init_lock_with_hint:
3049  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3050  if (!codeptr)
3051  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3052  if (ompt_enabled.ompt_callback_mutex_acquire) {
3053  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3054  ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3055  (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3056  }
3057 #endif
3058 
3059  rc = TEST_LOCK(lck, gtid);
3060 #if USE_ITT_BUILD
3061  if (rc) {
3062  __kmp_itt_lock_acquired(lck);
3063  } else {
3064  __kmp_itt_lock_cancelled(lck);
3065  }
3066 #endif /* USE_ITT_BUILD */
3067 #if OMPT_SUPPORT && OMPT_OPTIONAL
3068  if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3069  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3070  ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3071  }
3072 #endif
3073 
3074  return (rc ? FTN_TRUE : FTN_FALSE);
3075 
3076 /* Can't use serial interval since not block structured */
3077 
3078 #endif // KMP_USE_DYNAMIC_LOCK
3079 }
3080 
3081 /* try to acquire the lock */
3082 int __kmpc_test_nest_lock(ident_t *loc, kmp_int32 gtid, void **user_lock) {
3083 #if KMP_USE_DYNAMIC_LOCK
3084  int rc;
3085 #if USE_ITT_BUILD
3086  __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3087 #endif
3088 #if OMPT_SUPPORT && OMPT_OPTIONAL
3089  // This is the case, if called from omp_init_lock_with_hint:
3090  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3091  if (!codeptr)
3092  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3093  if (ompt_enabled.ompt_callback_mutex_acquire) {
3094  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3095  ompt_mutex_nest_lock, omp_lock_hint_none,
3096  __ompt_get_mutex_impl_type(user_lock),
3097  (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3098  }
3099 #endif
3100  rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3101 #if USE_ITT_BUILD
3102  if (rc) {
3103  __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3104  } else {
3105  __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3106  }
3107 #endif
3108 #if OMPT_SUPPORT && OMPT_OPTIONAL
3109  if (ompt_enabled.enabled && rc) {
3110  if (rc == 1) {
3111  if (ompt_enabled.ompt_callback_mutex_acquired) {
3112  // lock_first
3113  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3114  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3115  codeptr);
3116  }
3117  } else {
3118  if (ompt_enabled.ompt_callback_nest_lock) {
3119  // lock_next
3120  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3121  ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3122  }
3123  }
3124  }
3125 #endif
3126  return rc;
3127 
3128 #else // KMP_USE_DYNAMIC_LOCK
3129 
3130  kmp_user_lock_p lck;
3131  int rc;
3132 
3133  if ((__kmp_user_lock_kind == lk_tas) &&
3134  (sizeof(lck->tas.lk.poll) + sizeof(lck->tas.lk.depth_locked) <=
3135  OMP_NEST_LOCK_T_SIZE)) {
3136  lck = (kmp_user_lock_p)user_lock;
3137  }
3138 #if KMP_USE_FUTEX
3139  else if ((__kmp_user_lock_kind == lk_futex) &&
3140  (sizeof(lck->futex.lk.poll) + sizeof(lck->futex.lk.depth_locked) <=
3141  OMP_NEST_LOCK_T_SIZE)) {
3142  lck = (kmp_user_lock_p)user_lock;
3143  }
3144 #endif
3145  else {
3146  lck = __kmp_lookup_user_lock(user_lock, "omp_test_nest_lock");
3147  }
3148 
3149 #if USE_ITT_BUILD
3150  __kmp_itt_lock_acquiring(lck);
3151 #endif /* USE_ITT_BUILD */
3152 
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL
3154  // This is the case, if called from omp_init_lock_with_hint:
3155  void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3156  if (!codeptr)
3157  codeptr = OMPT_GET_RETURN_ADDRESS(0);
3158  if (ompt_enabled.enabled) &&
3159  ompt_enabled.ompt_callback_mutex_acquire) {
3160  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3161  ompt_mutex_nest_lock, omp_lock_hint_none,
3162  __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3163  codeptr);
3164  }
3165 #endif
3166 
3167  rc = TEST_NESTED_LOCK(lck, gtid);
3168 #if USE_ITT_BUILD
3169  if (rc) {
3170  __kmp_itt_lock_acquired(lck);
3171  } else {
3172  __kmp_itt_lock_cancelled(lck);
3173  }
3174 #endif /* USE_ITT_BUILD */
3175 #if OMPT_SUPPORT && OMPT_OPTIONAL
3176  if (ompt_enabled.enabled && rc) {
3177  if (rc == 1) {
3178  if (ompt_enabled.ompt_callback_mutex_acquired) {
3179  // lock_first
3180  ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3181  ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3182  }
3183  } else {
3184  if (ompt_enabled.ompt_callback_nest_lock) {
3185  // lock_next
3186  ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3187  ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3188  }
3189  }
3190  }
3191 #endif
3192  return rc;
3193 
3194 /* Can't use serial interval since not block structured */
3195 
3196 #endif // KMP_USE_DYNAMIC_LOCK
3197 }
3198 
3199 // Interface to fast scalable reduce methods routines
3200 
3201 // keep the selected method in a thread local structure for cross-function
3202 // usage: will be used in __kmpc_end_reduce* functions;
3203 // another solution: to re-determine the method one more time in
3204 // __kmpc_end_reduce* functions (new prototype required then)
3205 // AT: which solution is better?
3206 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3207  ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3208 
3209 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3210  (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3211 
3212 // description of the packed_reduction_method variable: look at the macros in
3213 // kmp.h
3214 
3215 // used in a critical section reduce block
3216 static __forceinline void
3217 __kmp_enter_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3218  kmp_critical_name *crit) {
3219 
3220  // this lock was visible to a customer and to the threading profile tool as a
3221  // serial overhead span (although it's used for an internal purpose only)
3222  // why was it visible in previous implementation?
3223  // should we keep it visible in new reduce block?
3224  kmp_user_lock_p lck;
3225 
3226 #if KMP_USE_DYNAMIC_LOCK
3227 
3228  kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3229  // Check if it is initialized.
3230  if (*lk == 0) {
3231  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3232  KMP_COMPARE_AND_STORE_ACQ32((volatile kmp_int32 *)crit, 0,
3233  KMP_GET_D_TAG(__kmp_user_lock_seq));
3234  } else {
3235  __kmp_init_indirect_csptr(crit, loc, global_tid,
3236  KMP_GET_I_TAG(__kmp_user_lock_seq));
3237  }
3238  }
3239  // Branch for accessing the actual lock object and set operation. This
3240  // branching is inevitable since this lock initialization does not follow the
3241  // normal dispatch path (lock table is not used).
3242  if (KMP_EXTRACT_D_TAG(lk) != 0) {
3243  lck = (kmp_user_lock_p)lk;
3244  KMP_DEBUG_ASSERT(lck != NULL);
3245  if (__kmp_env_consistency_check) {
3246  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3247  }
3248  KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3249  } else {
3250  kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3251  lck = ilk->lock;
3252  KMP_DEBUG_ASSERT(lck != NULL);
3253  if (__kmp_env_consistency_check) {
3254  __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3255  }
3256  KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3257  }
3258 
3259 #else // KMP_USE_DYNAMIC_LOCK
3260 
3261  // We know that the fast reduction code is only emitted by Intel compilers
3262  // with 32 byte critical sections. If there isn't enough space, then we
3263  // have to use a pointer.
3264  if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3265  lck = (kmp_user_lock_p)crit;
3266  } else {
3267  lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3268  }
3269  KMP_DEBUG_ASSERT(lck != NULL);
3270 
3271  if (__kmp_env_consistency_check)
3272  __kmp_push_sync(global_tid, ct_critical, loc, lck);
3273 
3274  __kmp_acquire_user_lock_with_checks(lck, global_tid);
3275 
3276 #endif // KMP_USE_DYNAMIC_LOCK
3277 }
3278 
3279 // used in a critical section reduce block
3280 static __forceinline void
3281 __kmp_end_critical_section_reduce_block(ident_t *loc, kmp_int32 global_tid,
3282  kmp_critical_name *crit) {
3283 
3284  kmp_user_lock_p lck;
3285 
3286 #if KMP_USE_DYNAMIC_LOCK
3287 
3288  if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3289  lck = (kmp_user_lock_p)crit;
3290  if (__kmp_env_consistency_check)
3291  __kmp_pop_sync(global_tid, ct_critical, loc);
3292  KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3293  } else {
3294  kmp_indirect_lock_t *ilk =
3295  (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3296  if (__kmp_env_consistency_check)
3297  __kmp_pop_sync(global_tid, ct_critical, loc);
3298  KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3299  }
3300 
3301 #else // KMP_USE_DYNAMIC_LOCK
3302 
3303  // We know that the fast reduction code is only emitted by Intel compilers
3304  // with 32 byte critical sections. If there isn't enough space, then we have
3305  // to use a pointer.
3306  if (__kmp_base_user_lock_size > 32) {
3307  lck = *((kmp_user_lock_p *)crit);
3308  KMP_ASSERT(lck != NULL);
3309  } else {
3310  lck = (kmp_user_lock_p)crit;
3311  }
3312 
3313  if (__kmp_env_consistency_check)
3314  __kmp_pop_sync(global_tid, ct_critical, loc);
3315 
3316  __kmp_release_user_lock_with_checks(lck, global_tid);
3317 
3318 #endif // KMP_USE_DYNAMIC_LOCK
3319 } // __kmp_end_critical_section_reduce_block
3320 
3321 static __forceinline int
3322 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3323  int *task_state) {
3324  kmp_team_t *team;
3325 
3326  // Check if we are inside the teams construct?
3327  if (th->th.th_teams_microtask) {
3328  *team_p = team = th->th.th_team;
3329  if (team->t.t_level == th->th.th_teams_level) {
3330  // This is reduction at teams construct.
3331  KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid); // AC: check that tid == 0
3332  // Let's swap teams temporarily for the reduction.
3333  th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3334  th->th.th_team = team->t.t_parent;
3335  th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3336  th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3337  *task_state = th->th.th_task_state;
3338  th->th.th_task_state = 0;
3339 
3340  return 1;
3341  }
3342  }
3343  return 0;
3344 }
3345 
3346 static __forceinline void
3347 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team, int task_state) {
3348  // Restore thread structure swapped in __kmp_swap_teams_for_teams_reduction.
3349  th->th.th_info.ds.ds_tid = 0;
3350  th->th.th_team = team;
3351  th->th.th_team_nproc = team->t.t_nproc;
3352  th->th.th_task_team = team->t.t_task_team[task_state];
3353  th->th.th_task_state = task_state;
3354 }
3355 
3356 /* 2.a.i. Reduce Block without a terminating barrier */
3372 kmp_int32
3373 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3374  size_t reduce_size, void *reduce_data,
3375  void (*reduce_func)(void *lhs_data, void *rhs_data),
3376  kmp_critical_name *lck) {
3377 
3378  KMP_COUNT_BLOCK(REDUCE_nowait);
3379  int retval = 0;
3380  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3381  kmp_info_t *th;
3382  kmp_team_t *team;
3383  int teams_swapped = 0, task_state;
3384  KA_TRACE(10, ("__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3385 
3386  // why do we need this initialization here at all?
3387  // Reduction clause can not be used as a stand-alone directive.
3388 
3389  // do not call __kmp_serial_initialize(), it will be called by
3390  // __kmp_parallel_initialize() if needed
3391  // possible detection of false-positive race by the threadchecker ???
3392  if (!TCR_4(__kmp_init_parallel))
3393  __kmp_parallel_initialize();
3394 
3395  __kmp_resume_if_soft_paused();
3396 
3397 // check correctness of reduce block nesting
3398 #if KMP_USE_DYNAMIC_LOCK
3399  if (__kmp_env_consistency_check)
3400  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3401 #else
3402  if (__kmp_env_consistency_check)
3403  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3404 #endif
3405 
3406  th = __kmp_thread_from_gtid(global_tid);
3407  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3408 
3409  // packed_reduction_method value will be reused by __kmp_end_reduce* function,
3410  // the value should be kept in a variable
3411  // the variable should be either a construct-specific or thread-specific
3412  // property, not a team specific property
3413  // (a thread can reach the next reduce block on the next construct, reduce
3414  // method may differ on the next construct)
3415  // an ident_t "loc" parameter could be used as a construct-specific property
3416  // (what if loc == 0?)
3417  // (if both construct-specific and team-specific variables were shared,
3418  // then unness extra syncs should be needed)
3419  // a thread-specific variable is better regarding two issues above (next
3420  // construct and extra syncs)
3421  // a thread-specific "th_local.reduction_method" variable is used currently
3422  // each thread executes 'determine' and 'set' lines (no need to execute by one
3423  // thread, to avoid unness extra syncs)
3424 
3425  packed_reduction_method = __kmp_determine_reduction_method(
3426  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3427  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3428 
3429  OMPT_REDUCTION_DECL(th, global_tid);
3430  if (packed_reduction_method == critical_reduce_block) {
3431 
3432  OMPT_REDUCTION_BEGIN;
3433 
3434  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3435  retval = 1;
3436 
3437  } else if (packed_reduction_method == empty_reduce_block) {
3438 
3439  OMPT_REDUCTION_BEGIN;
3440 
3441  // usage: if team size == 1, no synchronization is required ( Intel
3442  // platforms only )
3443  retval = 1;
3444 
3445  } else if (packed_reduction_method == atomic_reduce_block) {
3446 
3447  retval = 2;
3448 
3449  // all threads should do this pop here (because __kmpc_end_reduce_nowait()
3450  // won't be called by the code gen)
3451  // (it's not quite good, because the checking block has been closed by
3452  // this 'pop',
3453  // but atomic operation has not been executed yet, will be executed
3454  // slightly later, literally on next instruction)
3455  if (__kmp_env_consistency_check)
3456  __kmp_pop_sync(global_tid, ct_reduce, loc);
3457 
3458  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3459  tree_reduce_block)) {
3460 
3461 // AT: performance issue: a real barrier here
3462 // AT: (if master goes slow, other threads are blocked here waiting for the
3463 // master to come and release them)
3464 // AT: (it's not what a customer might expect specifying NOWAIT clause)
3465 // AT: (specifying NOWAIT won't result in improvement of performance, it'll
3466 // be confusing to a customer)
3467 // AT: another implementation of *barrier_gather*nowait() (or some other design)
3468 // might go faster and be more in line with sense of NOWAIT
3469 // AT: TO DO: do epcc test and compare times
3470 
3471 // this barrier should be invisible to a customer and to the threading profile
3472 // tool (it's neither a terminating barrier nor customer's code, it's
3473 // used for an internal purpose)
3474 #if OMPT_SUPPORT
3475  // JP: can this barrier potentially leed to task scheduling?
3476  // JP: as long as there is a barrier in the implementation, OMPT should and
3477  // will provide the barrier events
3478  // so we set-up the necessary frame/return addresses.
3479  ompt_frame_t *ompt_frame;
3480  if (ompt_enabled.enabled) {
3481  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3482  if (ompt_frame->enter_frame.ptr == NULL)
3483  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3484  OMPT_STORE_RETURN_ADDRESS(global_tid);
3485  }
3486 #endif
3487 #if USE_ITT_NOTIFY
3488  __kmp_threads[global_tid]->th.th_ident = loc;
3489 #endif
3490  retval =
3491  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3492  global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3493  retval = (retval != 0) ? (0) : (1);
3494 #if OMPT_SUPPORT && OMPT_OPTIONAL
3495  if (ompt_enabled.enabled) {
3496  ompt_frame->enter_frame = ompt_data_none;
3497  }
3498 #endif
3499 
3500  // all other workers except master should do this pop here
3501  // ( none of other workers will get to __kmpc_end_reduce_nowait() )
3502  if (__kmp_env_consistency_check) {
3503  if (retval == 0) {
3504  __kmp_pop_sync(global_tid, ct_reduce, loc);
3505  }
3506  }
3507 
3508  } else {
3509 
3510  // should never reach this block
3511  KMP_ASSERT(0); // "unexpected method"
3512  }
3513  if (teams_swapped) {
3514  __kmp_restore_swapped_teams(th, team, task_state);
3515  }
3516  KA_TRACE(
3517  10,
3518  ("__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3519  global_tid, packed_reduction_method, retval));
3520 
3521  return retval;
3522 }
3523 
3532 void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid,
3533  kmp_critical_name *lck) {
3534 
3535  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3536 
3537  KA_TRACE(10, ("__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3538 
3539  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3540 
3541  OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3542 
3543  if (packed_reduction_method == critical_reduce_block) {
3544 
3545  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3546  OMPT_REDUCTION_END;
3547 
3548  } else if (packed_reduction_method == empty_reduce_block) {
3549 
3550  // usage: if team size == 1, no synchronization is required ( on Intel
3551  // platforms only )
3552 
3553  OMPT_REDUCTION_END;
3554 
3555  } else if (packed_reduction_method == atomic_reduce_block) {
3556 
3557  // neither master nor other workers should get here
3558  // (code gen does not generate this call in case 2: atomic reduce block)
3559  // actually it's better to remove this elseif at all;
3560  // after removal this value will checked by the 'else' and will assert
3561 
3562  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3563  tree_reduce_block)) {
3564 
3565  // only master gets here
3566  // OMPT: tree reduction is annotated in the barrier code
3567 
3568  } else {
3569 
3570  // should never reach this block
3571  KMP_ASSERT(0); // "unexpected method"
3572  }
3573 
3574  if (__kmp_env_consistency_check)
3575  __kmp_pop_sync(global_tid, ct_reduce, loc);
3576 
3577  KA_TRACE(10, ("__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3578  global_tid, packed_reduction_method));
3579 
3580  return;
3581 }
3582 
3583 /* 2.a.ii. Reduce Block with a terminating barrier */
3584 
3600 kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars,
3601  size_t reduce_size, void *reduce_data,
3602  void (*reduce_func)(void *lhs_data, void *rhs_data),
3603  kmp_critical_name *lck) {
3604  KMP_COUNT_BLOCK(REDUCE_wait);
3605  int retval = 0;
3606  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3607  kmp_info_t *th;
3608  kmp_team_t *team;
3609  int teams_swapped = 0, task_state;
3610 
3611  KA_TRACE(10, ("__kmpc_reduce() enter: called T#%d\n", global_tid));
3612 
3613  // why do we need this initialization here at all?
3614  // Reduction clause can not be a stand-alone directive.
3615 
3616  // do not call __kmp_serial_initialize(), it will be called by
3617  // __kmp_parallel_initialize() if needed
3618  // possible detection of false-positive race by the threadchecker ???
3619  if (!TCR_4(__kmp_init_parallel))
3620  __kmp_parallel_initialize();
3621 
3622  __kmp_resume_if_soft_paused();
3623 
3624 // check correctness of reduce block nesting
3625 #if KMP_USE_DYNAMIC_LOCK
3626  if (__kmp_env_consistency_check)
3627  __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3628 #else
3629  if (__kmp_env_consistency_check)
3630  __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3631 #endif
3632 
3633  th = __kmp_thread_from_gtid(global_tid);
3634  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3635 
3636  packed_reduction_method = __kmp_determine_reduction_method(
3637  loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3638  __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3639 
3640  OMPT_REDUCTION_DECL(th, global_tid);
3641 
3642  if (packed_reduction_method == critical_reduce_block) {
3643 
3644  OMPT_REDUCTION_BEGIN;
3645  __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3646  retval = 1;
3647 
3648  } else if (packed_reduction_method == empty_reduce_block) {
3649 
3650  OMPT_REDUCTION_BEGIN;
3651  // usage: if team size == 1, no synchronization is required ( Intel
3652  // platforms only )
3653  retval = 1;
3654 
3655  } else if (packed_reduction_method == atomic_reduce_block) {
3656 
3657  retval = 2;
3658 
3659  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660  tree_reduce_block)) {
3661 
3662 // case tree_reduce_block:
3663 // this barrier should be visible to a customer and to the threading profile
3664 // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3665 #if OMPT_SUPPORT
3666  ompt_frame_t *ompt_frame;
3667  if (ompt_enabled.enabled) {
3668  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3669  if (ompt_frame->enter_frame.ptr == NULL)
3670  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3671  OMPT_STORE_RETURN_ADDRESS(global_tid);
3672  }
3673 #endif
3674 #if USE_ITT_NOTIFY
3675  __kmp_threads[global_tid]->th.th_ident =
3676  loc; // needed for correct notification of frames
3677 #endif
3678  retval =
3679  __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3680  global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3681  retval = (retval != 0) ? (0) : (1);
3682 #if OMPT_SUPPORT && OMPT_OPTIONAL
3683  if (ompt_enabled.enabled) {
3684  ompt_frame->enter_frame = ompt_data_none;
3685  }
3686 #endif
3687 
3688  // all other workers except master should do this pop here
3689  // ( none of other workers except master will enter __kmpc_end_reduce() )
3690  if (__kmp_env_consistency_check) {
3691  if (retval == 0) { // 0: all other workers; 1: master
3692  __kmp_pop_sync(global_tid, ct_reduce, loc);
3693  }
3694  }
3695 
3696  } else {
3697 
3698  // should never reach this block
3699  KMP_ASSERT(0); // "unexpected method"
3700  }
3701  if (teams_swapped) {
3702  __kmp_restore_swapped_teams(th, team, task_state);
3703  }
3704 
3705  KA_TRACE(10,
3706  ("__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3707  global_tid, packed_reduction_method, retval));
3708  return retval;
3709 }
3710 
3721 void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid,
3722  kmp_critical_name *lck) {
3723 
3724  PACKED_REDUCTION_METHOD_T packed_reduction_method;
3725  kmp_info_t *th;
3726  kmp_team_t *team;
3727  int teams_swapped = 0, task_state;
3728 
3729  KA_TRACE(10, ("__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3730 
3731  th = __kmp_thread_from_gtid(global_tid);
3732  teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3733 
3734  packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3735 
3736  // this barrier should be visible to a customer and to the threading profile
3737  // tool (it's a terminating barrier on constructs if NOWAIT not specified)
3738  OMPT_REDUCTION_DECL(th, global_tid);
3739 
3740  if (packed_reduction_method == critical_reduce_block) {
3741  __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3742 
3743  OMPT_REDUCTION_END;
3744 
3745 // TODO: implicit barrier: should be exposed
3746 #if OMPT_SUPPORT
3747  ompt_frame_t *ompt_frame;
3748  if (ompt_enabled.enabled) {
3749  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3750  if (ompt_frame->enter_frame.ptr == NULL)
3751  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3752  OMPT_STORE_RETURN_ADDRESS(global_tid);
3753  }
3754 #endif
3755 #if USE_ITT_NOTIFY
3756  __kmp_threads[global_tid]->th.th_ident = loc;
3757 #endif
3758  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3759 #if OMPT_SUPPORT && OMPT_OPTIONAL
3760  if (ompt_enabled.enabled) {
3761  ompt_frame->enter_frame = ompt_data_none;
3762  }
3763 #endif
3764 
3765  } else if (packed_reduction_method == empty_reduce_block) {
3766 
3767  OMPT_REDUCTION_END;
3768 
3769 // usage: if team size==1, no synchronization is required (Intel platforms only)
3770 
3771 // TODO: implicit barrier: should be exposed
3772 #if OMPT_SUPPORT
3773  ompt_frame_t *ompt_frame;
3774  if (ompt_enabled.enabled) {
3775  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3776  if (ompt_frame->enter_frame.ptr == NULL)
3777  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3778  OMPT_STORE_RETURN_ADDRESS(global_tid);
3779  }
3780 #endif
3781 #if USE_ITT_NOTIFY
3782  __kmp_threads[global_tid]->th.th_ident = loc;
3783 #endif
3784  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3785 #if OMPT_SUPPORT && OMPT_OPTIONAL
3786  if (ompt_enabled.enabled) {
3787  ompt_frame->enter_frame = ompt_data_none;
3788  }
3789 #endif
3790 
3791  } else if (packed_reduction_method == atomic_reduce_block) {
3792 
3793 #if OMPT_SUPPORT
3794  ompt_frame_t *ompt_frame;
3795  if (ompt_enabled.enabled) {
3796  __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3797  if (ompt_frame->enter_frame.ptr == NULL)
3798  ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3799  OMPT_STORE_RETURN_ADDRESS(global_tid);
3800  }
3801 #endif
3802 // TODO: implicit barrier: should be exposed
3803 #if USE_ITT_NOTIFY
3804  __kmp_threads[global_tid]->th.th_ident = loc;
3805 #endif
3806  __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3807 #if OMPT_SUPPORT && OMPT_OPTIONAL
3808  if (ompt_enabled.enabled) {
3809  ompt_frame->enter_frame = ompt_data_none;
3810  }
3811 #endif
3812 
3813  } else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3814  tree_reduce_block)) {
3815 
3816  // only master executes here (master releases all other workers)
3817  __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3818  global_tid);
3819 
3820  } else {
3821 
3822  // should never reach this block
3823  KMP_ASSERT(0); // "unexpected method"
3824  }
3825  if (teams_swapped) {
3826  __kmp_restore_swapped_teams(th, team, task_state);
3827  }
3828 
3829  if (__kmp_env_consistency_check)
3830  __kmp_pop_sync(global_tid, ct_reduce, loc);
3831 
3832  KA_TRACE(10, ("__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3833  global_tid, packed_reduction_method));
3834 
3835  return;
3836 }
3837 
3838 #undef __KMP_GET_REDUCTION_METHOD
3839 #undef __KMP_SET_REDUCTION_METHOD
3840 
3841 /* end of interface to fast scalable reduce routines */
3842 
3843 kmp_uint64 __kmpc_get_taskid() {
3844 
3845  kmp_int32 gtid;
3846  kmp_info_t *thread;
3847 
3848  gtid = __kmp_get_gtid();
3849  if (gtid < 0) {
3850  return 0;
3851  }
3852  thread = __kmp_thread_from_gtid(gtid);
3853  return thread->th.th_current_task->td_task_id;
3854 
3855 } // __kmpc_get_taskid
3856 
3857 kmp_uint64 __kmpc_get_parent_taskid() {
3858 
3859  kmp_int32 gtid;
3860  kmp_info_t *thread;
3861  kmp_taskdata_t *parent_task;
3862 
3863  gtid = __kmp_get_gtid();
3864  if (gtid < 0) {
3865  return 0;
3866  }
3867  thread = __kmp_thread_from_gtid(gtid);
3868  parent_task = thread->th.th_current_task->td_parent;
3869  return (parent_task == NULL ? 0 : parent_task->td_task_id);
3870 
3871 } // __kmpc_get_parent_taskid
3872 
3884 void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims,
3885  const struct kmp_dim *dims) {
3886  int j, idx;
3887  kmp_int64 last, trace_count;
3888  kmp_info_t *th = __kmp_threads[gtid];
3889  kmp_team_t *team = th->th.th_team;
3890  kmp_uint32 *flags;
3891  kmp_disp_t *pr_buf = th->th.th_dispatch;
3892  dispatch_shared_info_t *sh_buf;
3893 
3894  KA_TRACE(
3895  20,
3896  ("__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3897  gtid, num_dims, !team->t.t_serialized));
3898  KMP_DEBUG_ASSERT(dims != NULL);
3899  KMP_DEBUG_ASSERT(num_dims > 0);
3900 
3901  if (team->t.t_serialized) {
3902  KA_TRACE(20, ("__kmpc_doacross_init() exit: serialized team\n"));
3903  return; // no dependencies if team is serialized
3904  }
3905  KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3906  idx = pr_buf->th_doacross_buf_idx++; // Increment index of shared buffer for
3907  // the next loop
3908  sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3909 
3910  // Save bounds info into allocated private buffer
3911  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3912  pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3913  th, sizeof(kmp_int64) * (4 * num_dims + 1));
3914  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3915  pr_buf->th_doacross_info[0] =
3916  (kmp_int64)num_dims; // first element is number of dimensions
3917  // Save also address of num_done in order to access it later without knowing
3918  // the buffer index
3919  pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3920  pr_buf->th_doacross_info[2] = dims[0].lo;
3921  pr_buf->th_doacross_info[3] = dims[0].up;
3922  pr_buf->th_doacross_info[4] = dims[0].st;
3923  last = 5;
3924  for (j = 1; j < num_dims; ++j) {
3925  kmp_int64
3926  range_length; // To keep ranges of all dimensions but the first dims[0]
3927  if (dims[j].st == 1) { // most common case
3928  // AC: should we care of ranges bigger than LLONG_MAX? (not for now)
3929  range_length = dims[j].up - dims[j].lo + 1;
3930  } else {
3931  if (dims[j].st > 0) {
3932  KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3933  range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3934  } else { // negative increment
3935  KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3936  range_length =
3937  (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3938  }
3939  }
3940  pr_buf->th_doacross_info[last++] = range_length;
3941  pr_buf->th_doacross_info[last++] = dims[j].lo;
3942  pr_buf->th_doacross_info[last++] = dims[j].up;
3943  pr_buf->th_doacross_info[last++] = dims[j].st;
3944  }
3945 
3946  // Compute total trip count.
3947  // Start with range of dims[0] which we don't need to keep in the buffer.
3948  if (dims[0].st == 1) { // most common case
3949  trace_count = dims[0].up - dims[0].lo + 1;
3950  } else if (dims[0].st > 0) {
3951  KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3952  trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3953  } else { // negative increment
3954  KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3955  trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3956  }
3957  for (j = 1; j < num_dims; ++j) {
3958  trace_count *= pr_buf->th_doacross_info[4 * j + 1]; // use kept ranges
3959  }
3960  KMP_DEBUG_ASSERT(trace_count > 0);
3961 
3962  // Check if shared buffer is not occupied by other loop (idx -
3963  // __kmp_dispatch_num_buffers)
3964  if (idx != sh_buf->doacross_buf_idx) {
3965  // Shared buffer is occupied, wait for it to be free
3966  __kmp_wait_4((volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3967  __kmp_eq_4, NULL);
3968  }
3969 #if KMP_32_BIT_ARCH
3970  // Check if we are the first thread. After the CAS the first thread gets 0,
3971  // others get 1 if initialization is in progress, allocated pointer otherwise.
3972  // Treat pointer as volatile integer (value 0 or 1) until memory is allocated.
3973  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3974  (volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3975 #else
3976  flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3977  (volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3978 #endif
3979  if (flags == NULL) {
3980  // we are the first thread, allocate the array of flags
3981  size_t size = trace_count / 8 + 8; // in bytes, use single bit per iteration
3982  flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3983  KMP_MB();
3984  sh_buf->doacross_flags = flags;
3985  } else if (flags == (kmp_uint32 *)1) {
3986 #if KMP_32_BIT_ARCH
3987  // initialization is still in progress, need to wait
3988  while (*(volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3989 #else
3990  while (*(volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3991 #endif
3992  KMP_YIELD(TRUE);
3993  KMP_MB();
3994  } else {
3995  KMP_MB();
3996  }
3997  KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1); // check ptr value
3998  pr_buf->th_doacross_flags =
3999  sh_buf->doacross_flags; // save private copy in order to not
4000  // touch shared buffer on each iteration
4001  KA_TRACE(20, ("__kmpc_doacross_init() exit: T#%d\n", gtid));
4002 }
4003 
4004 void __kmpc_doacross_wait(ident_t *loc, int gtid, const kmp_int64 *vec) {
4005  kmp_int32 shft, num_dims, i;
4006  kmp_uint32 flag;
4007  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4008  kmp_info_t *th = __kmp_threads[gtid];
4009  kmp_team_t *team = th->th.th_team;
4010  kmp_disp_t *pr_buf;
4011  kmp_int64 lo, up, st;
4012 
4013  KA_TRACE(20, ("__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4014  if (team->t.t_serialized) {
4015  KA_TRACE(20, ("__kmpc_doacross_wait() exit: serialized team\n"));
4016  return; // no dependencies if team is serialized
4017  }
4018 
4019  // calculate sequential iteration number and check out-of-bounds condition
4020  pr_buf = th->th.th_dispatch;
4021  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022  num_dims = pr_buf->th_doacross_info[0];
4023  lo = pr_buf->th_doacross_info[2];
4024  up = pr_buf->th_doacross_info[3];
4025  st = pr_buf->th_doacross_info[4];
4026  if (st == 1) { // most common case
4027  if (vec[0] < lo || vec[0] > up) {
4028  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4029  "bounds [%lld,%lld]\n",
4030  gtid, vec[0], lo, up));
4031  return;
4032  }
4033  iter_number = vec[0] - lo;
4034  } else if (st > 0) {
4035  if (vec[0] < lo || vec[0] > up) {
4036  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4037  "bounds [%lld,%lld]\n",
4038  gtid, vec[0], lo, up));
4039  return;
4040  }
4041  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4042  } else { // negative increment
4043  if (vec[0] > lo || vec[0] < up) {
4044  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4045  "bounds [%lld,%lld]\n",
4046  gtid, vec[0], lo, up));
4047  return;
4048  }
4049  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4050  }
4051  for (i = 1; i < num_dims; ++i) {
4052  kmp_int64 iter, ln;
4053  kmp_int32 j = i * 4;
4054  ln = pr_buf->th_doacross_info[j + 1];
4055  lo = pr_buf->th_doacross_info[j + 2];
4056  up = pr_buf->th_doacross_info[j + 3];
4057  st = pr_buf->th_doacross_info[j + 4];
4058  if (st == 1) {
4059  if (vec[i] < lo || vec[i] > up) {
4060  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4061  "bounds [%lld,%lld]\n",
4062  gtid, vec[i], lo, up));
4063  return;
4064  }
4065  iter = vec[i] - lo;
4066  } else if (st > 0) {
4067  if (vec[i] < lo || vec[i] > up) {
4068  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4069  "bounds [%lld,%lld]\n",
4070  gtid, vec[i], lo, up));
4071  return;
4072  }
4073  iter = (kmp_uint64)(vec[i] - lo) / st;
4074  } else { // st < 0
4075  if (vec[i] > lo || vec[i] < up) {
4076  KA_TRACE(20, ("__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4077  "bounds [%lld,%lld]\n",
4078  gtid, vec[i], lo, up));
4079  return;
4080  }
4081  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4082  }
4083  iter_number = iter + ln * iter_number;
4084  }
4085  shft = iter_number % 32; // use 32-bit granularity
4086  iter_number >>= 5; // divided by 32
4087  flag = 1 << shft;
4088  while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4089  KMP_YIELD(TRUE);
4090  }
4091  KMP_MB();
4092  KA_TRACE(20,
4093  ("__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4094  gtid, (iter_number << 5) + shft));
4095 }
4096 
4097 void __kmpc_doacross_post(ident_t *loc, int gtid, const kmp_int64 *vec) {
4098  kmp_int32 shft, num_dims, i;
4099  kmp_uint32 flag;
4100  kmp_int64 iter_number; // iteration number of "collapsed" loop nest
4101  kmp_info_t *th = __kmp_threads[gtid];
4102  kmp_team_t *team = th->th.th_team;
4103  kmp_disp_t *pr_buf;
4104  kmp_int64 lo, st;
4105 
4106  KA_TRACE(20, ("__kmpc_doacross_post() enter: called T#%d\n", gtid));
4107  if (team->t.t_serialized) {
4108  KA_TRACE(20, ("__kmpc_doacross_post() exit: serialized team\n"));
4109  return; // no dependencies if team is serialized
4110  }
4111 
4112  // calculate sequential iteration number (same as in "wait" but no
4113  // out-of-bounds checks)
4114  pr_buf = th->th.th_dispatch;
4115  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4116  num_dims = pr_buf->th_doacross_info[0];
4117  lo = pr_buf->th_doacross_info[2];
4118  st = pr_buf->th_doacross_info[4];
4119  if (st == 1) { // most common case
4120  iter_number = vec[0] - lo;
4121  } else if (st > 0) {
4122  iter_number = (kmp_uint64)(vec[0] - lo) / st;
4123  } else { // negative increment
4124  iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4125  }
4126  for (i = 1; i < num_dims; ++i) {
4127  kmp_int64 iter, ln;
4128  kmp_int32 j = i * 4;
4129  ln = pr_buf->th_doacross_info[j + 1];
4130  lo = pr_buf->th_doacross_info[j + 2];
4131  st = pr_buf->th_doacross_info[j + 4];
4132  if (st == 1) {
4133  iter = vec[i] - lo;
4134  } else if (st > 0) {
4135  iter = (kmp_uint64)(vec[i] - lo) / st;
4136  } else { // st < 0
4137  iter = (kmp_uint64)(lo - vec[i]) / (-st);
4138  }
4139  iter_number = iter + ln * iter_number;
4140  }
4141  shft = iter_number % 32; // use 32-bit granularity
4142  iter_number >>= 5; // divided by 32
4143  flag = 1 << shft;
4144  KMP_MB();
4145  if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4146  KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4147  KA_TRACE(20, ("__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4148  (iter_number << 5) + shft));
4149 }
4150 
4151 void __kmpc_doacross_fini(ident_t *loc, int gtid) {
4152  kmp_int32 num_done;
4153  kmp_info_t *th = __kmp_threads[gtid];
4154  kmp_team_t *team = th->th.th_team;
4155  kmp_disp_t *pr_buf = th->th.th_dispatch;
4156 
4157  KA_TRACE(20, ("__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4158  if (team->t.t_serialized) {
4159  KA_TRACE(20, ("__kmpc_doacross_fini() exit: serialized team %p\n", team));
4160  return; // nothing to do
4161  }
4162  num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4163  if (num_done == th->th.th_team_nproc) {
4164  // we are the last thread, need to free shared resources
4165  int idx = pr_buf->th_doacross_buf_idx - 1;
4166  dispatch_shared_info_t *sh_buf =
4167  &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4168  KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4169  (kmp_int64)&sh_buf->doacross_num_done);
4170  KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4171  KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4172  __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4173  sh_buf->doacross_flags = NULL;
4174  sh_buf->doacross_num_done = 0;
4175  sh_buf->doacross_buf_idx +=
4176  __kmp_dispatch_num_buffers; // free buffer for future re-use
4177  }
4178  // free private resources (need to keep buffer index forever)
4179  pr_buf->th_doacross_flags = NULL;
4180  __kmp_thread_free(th, (void *)pr_buf->th_doacross_info);
4181  pr_buf->th_doacross_info = NULL;
4182  KA_TRACE(20, ("__kmpc_doacross_fini() exit: T#%d\n", gtid));
4183 }
4184 
4185 /* omp_alloc/omp_free only defined for C/C++, not for Fortran */
4186 void *omp_alloc(size_t size, omp_allocator_handle_t allocator) {
4187  return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4188 }
4189 
4190 void omp_free(void *ptr, omp_allocator_handle_t allocator) {
4191  __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4192 }
4193 
4194 int __kmpc_get_target_offload(void) {
4195  if (!__kmp_init_serial) {
4196  __kmp_serial_initialize();
4197  }
4198  return __kmp_target_offload;
4199 }
4200 
4201 int __kmpc_pause_resource(kmp_pause_status_t level) {
4202  if (!__kmp_init_serial) {
4203  return 1; // Can't pause if runtime is not initialized
4204  }
4205  return __kmp_pause_resource(level);
4206 }
__kmpc_end_single
void __kmpc_end_single(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1788
__kmpc_fork_call
void __kmpc_fork_call(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.cpp:262
KMP_IDENT_WORK_LOOP
@ KMP_IDENT_WORK_LOOP
Definition: kmp.h:203
KMP_IDENT_WORK_SECTIONS
@ KMP_IDENT_WORK_SECTIONS
Definition: kmp.h:205
__kmpc_reduce
kmp_int32 __kmpc_reduce(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3600
KMP_IDENT_WORK_DISTRIBUTE
@ KMP_IDENT_WORK_DISTRIBUTE
Definition: kmp.h:207
__kmpc_global_thread_num
kmp_int32 __kmpc_global_thread_num(ident_t *loc)
Definition: kmp_csupport.cpp:98
__kmpc_end
void __kmpc_end(ident_t *loc)
Definition: kmp_csupport.cpp:58
__kmpc_master
kmp_int32 __kmpc_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:761
__kmpc_single
kmp_int32 __kmpc_single(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1736
stats_state_e
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63
__kmpc_end_serialized_parallel
void __kmpc_end_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:493
__kmpc_flush
void __kmpc_flush(ident_t *loc)
Definition: kmp_csupport.cpp:642
ident::psource
const char * psource
Definition: kmp.h:232
__kmpc_end_reduce_nowait
void __kmpc_end_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3532
__kmpc_bound_thread_num
kmp_int32 __kmpc_bound_thread_num(ident_t *loc)
Definition: kmp_csupport.cpp:133
__kmpc_end_reduce
void __kmpc_end_reduce(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3721
__kmpc_ordered
void __kmpc_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.cpp:851
__kmpc_end_ordered
void __kmpc_end_ordered(ident_t *loc, kmp_int32 gtid)
Definition: kmp_csupport.cpp:923
__kmpc_begin
void __kmpc_begin(ident_t *loc, kmp_int32 flags)
Definition: kmp_csupport.cpp:36
__kmpc_doacross_init
void __kmpc_doacross_init(ident_t *loc, int gtid, int num_dims, const struct kmp_dim *dims)
Definition: kmp_csupport.cpp:3884
__kmpc_fork_teams
void __kmpc_fork_teams(ident_t *loc, kmp_int32 argc, kmpc_micro microtask,...)
Definition: kmp_csupport.cpp:370
KMP_IDENT_AUTOPAR
@ KMP_IDENT_AUTOPAR
Definition: kmp.h:188
__kmpc_end_barrier_master
void __kmpc_end_barrier_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1652
ident
Definition: kmp.h:222
__kmpc_critical
void __kmpc_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp_csupport.cpp:1157
__kmpc_global_num_threads
kmp_int32 __kmpc_global_num_threads(ident_t *loc)
Definition: kmp_csupport.cpp:120
__kmpc_bound_num_threads
kmp_int32 __kmpc_bound_num_threads(ident_t *loc)
Definition: kmp_csupport.cpp:143
__kmpc_ok_to_fork
kmp_int32 __kmpc_ok_to_fork(ident_t *loc)
Definition: kmp_csupport.cpp:155
__kmpc_barrier
void __kmpc_barrier(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:713
__kmpc_end_master
void __kmpc_end_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:817
__kmpc_in_parallel
kmp_int32 __kmpc_in_parallel(ident_t *loc)
Definition: kmp_csupport.cpp:217
__kmpc_reduce_nowait
kmp_int32 __kmpc_reduce_nowait(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_vars, size_t reduce_size, void *reduce_data, void(*reduce_func)(void *lhs_data, void *rhs_data), kmp_critical_name *lck)
Definition: kmp_csupport.cpp:3373
__kmpc_push_num_threads
void __kmpc_push_num_threads(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:230
__kmpc_critical_with_hint
void __kmpc_critical_with_hint(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit, uint32_t hint)
Definition: kmp_csupport.cpp:1381
kmpc_micro
void(* kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
Definition: kmp.h:1395
__kmpc_end_critical
void __kmpc_end_critical(ident_t *loc, kmp_int32 global_tid, kmp_critical_name *crit)
Definition: kmp_csupport.cpp:1512
__kmpc_barrier_master_nowait
kmp_int32 __kmpc_barrier_master_nowait(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1668
__kmpc_copyprivate
void __kmpc_copyprivate(ident_t *loc, kmp_int32 gtid, size_t cpy_size, void *cpy_data, void(*cpy_func)(void *, void *), kmp_int32 didit)
Definition: kmp_csupport.cpp:2064
ident::flags
kmp_int32 flags
Definition: kmp.h:224
__kmpc_push_num_teams
void __kmpc_push_num_teams(ident_t *loc, kmp_int32 global_tid, kmp_int32 num_teams, kmp_int32 num_threads)
Definition: kmp_csupport.cpp:351
__kmpc_serialized_parallel
void __kmpc_serialized_parallel(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:476
__kmpc_for_static_fini
void __kmpc_for_static_fini(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1814
__kmpc_barrier_master
kmp_int32 __kmpc_barrier_master(ident_t *loc, kmp_int32 global_tid)
Definition: kmp_csupport.cpp:1608
KMP_COUNT_BLOCK
#define KMP_COUNT_BLOCK(name)
Increments specified counter (name).
Definition: kmp_stats.h:900