16 #include "kmp_error.h"
20 #include "kmp_stats.h"
21 #include "ompt-specific.h"
23 #define MAX_MESSAGE 512
39 if ((env = getenv(
"KMP_INITIAL_THREAD_BIND")) != NULL &&
40 __kmp_str_match_true(env)) {
41 __kmp_middle_initialize();
42 KC_TRACE(10, (
"__kmpc_begin: middle initialization called\n"));
43 }
else if (__kmp_ignore_mppbeg() == FALSE) {
45 __kmp_internal_begin();
46 KC_TRACE(10, (
"__kmpc_begin: called\n"));
64 if (__kmp_ignore_mppend() == FALSE) {
65 KC_TRACE(10, (
"__kmpc_end: called\n"));
66 KA_TRACE(30, (
"__kmpc_end\n"));
68 __kmp_internal_end_thread(-1);
70 #if KMP_OS_WINDOWS && OMPT_SUPPORT
75 if (ompt_enabled.enabled)
76 __kmp_internal_end_library(__kmp_gtid_get_specific());
99 kmp_int32 gtid = __kmp_entry_gtid();
101 KC_TRACE(10, (
"__kmpc_global_thread_num: T#%d\n", gtid));
122 (
"__kmpc_global_num_threads: num_threads = %d\n", __kmp_all_nth));
124 return TCR_4(__kmp_all_nth);
134 KC_TRACE(10, (
"__kmpc_bound_thread_num: called\n"));
135 return __kmp_tid_from_gtid(__kmp_entry_gtid());
144 KC_TRACE(10, (
"__kmpc_bound_num_threads: called\n"));
146 return __kmp_entry_thread()->th.th_team->t.t_nproc;
166 if (__kmp_par_range == 0) {
173 semi2 = strchr(semi2,
';');
177 semi2 = strchr(semi2 + 1,
';');
181 if (__kmp_par_range_filename[0]) {
182 const char *name = semi2 - 1;
183 while ((name > loc->
psource) && (*name !=
'/') && (*name !=
';')) {
186 if ((*name ==
'/') || (*name ==
';')) {
189 if (strncmp(__kmp_par_range_filename, name, semi2 - name)) {
190 return __kmp_par_range < 0;
193 semi3 = strchr(semi2 + 1,
';');
194 if (__kmp_par_range_routine[0]) {
195 if ((semi3 != NULL) && (semi3 > semi2) &&
196 (strncmp(__kmp_par_range_routine, semi2 + 1, semi3 - semi2 - 1))) {
197 return __kmp_par_range < 0;
200 if (KMP_SSCANF(semi3 + 1,
"%d", &line_no) == 1) {
201 if ((line_no >= __kmp_par_range_lb) && (line_no <= __kmp_par_range_ub)) {
202 return __kmp_par_range > 0;
204 return __kmp_par_range < 0;
218 return __kmp_entry_thread()->th.th_root->r.r_active;
231 kmp_int32 num_threads) {
232 KA_TRACE(20, (
"__kmpc_push_num_threads: enter T#%d num_threads=%d\n",
233 global_tid, num_threads));
235 __kmp_push_num_threads(loc, global_tid, num_threads);
238 void __kmpc_pop_num_threads(
ident_t *loc, kmp_int32 global_tid) {
239 KA_TRACE(20, (
"__kmpc_pop_num_threads: enter\n"));
244 void __kmpc_push_proc_bind(
ident_t *loc, kmp_int32 global_tid,
245 kmp_int32 proc_bind) {
246 KA_TRACE(20, (
"__kmpc_push_proc_bind: enter T#%d proc_bind=%d\n", global_tid,
249 __kmp_push_proc_bind(loc, global_tid, (kmp_proc_bind_t)proc_bind);
263 int gtid = __kmp_entry_gtid();
265 #if (KMP_STATS_ENABLED)
269 if (previous_state == stats_state_e::SERIAL_REGION) {
270 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_parallel_overhead);
272 KMP_PUSH_PARTITIONED_TIMER(OMP_parallel_overhead);
285 va_start(ap, microtask);
288 ompt_frame_t *ompt_frame;
289 if (ompt_enabled.enabled) {
290 kmp_info_t *master_th = __kmp_threads[gtid];
291 kmp_team_t *parent_team = master_th->th.th_team;
292 ompt_lw_taskteam_t *lwt = parent_team->t.ompt_serialized_team_info;
294 ompt_frame = &(lwt->ompt_task_info.frame);
296 int tid = __kmp_tid_from_gtid(gtid);
298 parent_team->t.t_implicit_task_taskdata[tid].ompt_task_info.frame);
300 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
301 OMPT_STORE_RETURN_ADDRESS(gtid);
305 #if INCLUDE_SSC_MARKS
308 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
309 VOLATILE_CAST(microtask_t) microtask,
310 VOLATILE_CAST(launch_t) __kmp_invoke_task_func,
312 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
318 #if INCLUDE_SSC_MARKS
321 __kmp_join_call(loc, gtid
331 #if KMP_STATS_ENABLED
332 if (previous_state == stats_state_e::SERIAL_REGION) {
333 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
335 KMP_POP_PARTITIONED_TIMER();
337 #endif // KMP_STATS_ENABLED
352 kmp_int32 num_teams, kmp_int32 num_threads) {
354 (
"__kmpc_push_num_teams: enter T#%d num_teams=%d num_threads=%d\n",
355 global_tid, num_teams, num_threads));
357 __kmp_push_num_teams(loc, global_tid, num_teams, num_threads);
372 int gtid = __kmp_entry_gtid();
373 kmp_info_t *this_thr = __kmp_threads[gtid];
375 va_start(ap, microtask);
377 #if KMP_STATS_ENABLED
380 if (previous_state == stats_state_e::SERIAL_REGION) {
381 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_teams_overhead);
383 KMP_PUSH_PARTITIONED_TIMER(OMP_teams_overhead);
388 this_thr->th.th_teams_microtask = microtask;
389 this_thr->th.th_teams_level =
390 this_thr->th.th_team->t.t_level;
393 kmp_team_t *parent_team = this_thr->th.th_team;
394 int tid = __kmp_tid_from_gtid(gtid);
395 if (ompt_enabled.enabled) {
396 parent_team->t.t_implicit_task_taskdata[tid]
397 .ompt_task_info.frame.enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
399 OMPT_STORE_RETURN_ADDRESS(gtid);
404 if (this_thr->th.th_teams_size.nteams == 0) {
405 __kmp_push_num_teams(loc, gtid, 0, 0);
407 KMP_DEBUG_ASSERT(this_thr->th.th_set_nproc >= 1);
408 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nteams >= 1);
409 KMP_DEBUG_ASSERT(this_thr->th.th_teams_size.nth >= 1);
411 __kmp_fork_call(loc, gtid, fork_context_intel, argc,
412 VOLATILE_CAST(microtask_t)
414 VOLATILE_CAST(launch_t) __kmp_invoke_teams_master,
415 #
if (KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64) && KMP_OS_LINUX
421 __kmp_join_call(loc, gtid
429 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
430 kmp_cg_root_t *tmp = this_thr->th.th_cg_roots;
431 this_thr->th.th_cg_roots = tmp->up;
432 KA_TRACE(100, (
"__kmpc_fork_teams: Thread %p popping node %p and moving up"
433 " to node %p. cg_nthreads was %d\n",
434 this_thr, tmp, this_thr->th.th_cg_roots, tmp->cg_nthreads));
435 KMP_DEBUG_ASSERT(tmp->cg_nthreads);
436 int i = tmp->cg_nthreads--;
441 KMP_DEBUG_ASSERT(this_thr->th.th_cg_roots);
442 this_thr->th.th_current_task->td_icvs.thread_limit =
443 this_thr->th.th_cg_roots->cg_thread_limit;
445 this_thr->th.th_teams_microtask = NULL;
446 this_thr->th.th_teams_level = 0;
447 *(kmp_int64 *)(&this_thr->th.th_teams_size) = 0L;
449 #if KMP_STATS_ENABLED
450 if (previous_state == stats_state_e::SERIAL_REGION) {
451 KMP_EXCHANGE_PARTITIONED_TIMER(OMP_serial);
453 KMP_POP_PARTITIONED_TIMER();
455 #endif // KMP_STATS_ENABLED
462 int __kmpc_invoke_task_func(
int gtid) {
return __kmp_invoke_task_func(gtid); }
481 OMPT_STORE_RETURN_ADDRESS(global_tid);
483 __kmp_serialized_parallel(loc, global_tid);
494 kmp_internal_control_t *top;
495 kmp_info_t *this_thr;
496 kmp_team_t *serial_team;
499 (
"__kmpc_end_serialized_parallel: called by T#%d\n", global_tid));
507 if (!TCR_4(__kmp_init_parallel))
508 __kmp_parallel_initialize();
510 __kmp_resume_if_soft_paused();
512 this_thr = __kmp_threads[global_tid];
513 serial_team = this_thr->th.th_serial_team;
515 kmp_task_team_t *task_team = this_thr->th.th_task_team;
517 if (task_team != NULL && task_team->tt.tt_found_proxy_tasks)
518 __kmp_task_team_wait(this_thr, serial_team USE_ITT_BUILD_ARG(NULL));
521 KMP_DEBUG_ASSERT(serial_team);
522 KMP_ASSERT(serial_team->t.t_serialized);
523 KMP_DEBUG_ASSERT(this_thr->th.th_team == serial_team);
524 KMP_DEBUG_ASSERT(serial_team != this_thr->th.th_root->r.r_root_team);
525 KMP_DEBUG_ASSERT(serial_team->t.t_threads);
526 KMP_DEBUG_ASSERT(serial_team->t.t_threads[0] == this_thr);
529 if (ompt_enabled.enabled &&
530 this_thr->th.ompt_thread_info.state != ompt_state_overhead) {
531 OMPT_CUR_TASK_INFO(this_thr)->frame.exit_frame = ompt_data_none;
532 if (ompt_enabled.ompt_callback_implicit_task) {
533 ompt_callbacks.ompt_callback(ompt_callback_implicit_task)(
534 ompt_scope_end, NULL, OMPT_CUR_TASK_DATA(this_thr), 1,
535 OMPT_CUR_TASK_INFO(this_thr)->thread_num, ompt_task_implicit);
539 ompt_data_t *parent_task_data;
540 __ompt_get_task_info_internal(1, NULL, &parent_task_data, NULL, NULL, NULL);
542 if (ompt_enabled.ompt_callback_parallel_end) {
543 ompt_callbacks.ompt_callback(ompt_callback_parallel_end)(
544 &(serial_team->t.ompt_team_info.parallel_data), parent_task_data,
545 ompt_parallel_invoker_program | ompt_parallel_team,
546 OMPT_LOAD_RETURN_ADDRESS(global_tid));
548 __ompt_lw_taskteam_unlink(this_thr);
549 this_thr->th.ompt_thread_info.state = ompt_state_overhead;
555 top = serial_team->t.t_control_stack_top;
556 if (top && top->serial_nesting_level == serial_team->t.t_serialized) {
557 copy_icvs(&serial_team->t.t_threads[0]->th.th_current_task->td_icvs, top);
558 serial_team->t.t_control_stack_top = top->next;
563 serial_team->t.t_level--;
566 KMP_DEBUG_ASSERT(serial_team->t.t_dispatch->th_disp_buffer);
568 dispatch_private_info_t *disp_buffer =
569 serial_team->t.t_dispatch->th_disp_buffer;
570 serial_team->t.t_dispatch->th_disp_buffer =
571 serial_team->t.t_dispatch->th_disp_buffer->next;
572 __kmp_free(disp_buffer);
574 this_thr->th.th_def_allocator = serial_team->t.t_def_allocator;
576 --serial_team->t.t_serialized;
577 if (serial_team->t.t_serialized == 0) {
581 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
582 if (__kmp_inherit_fp_control && serial_team->t.t_fp_control_saved) {
583 __kmp_clear_x87_fpu_status_word();
584 __kmp_load_x87_fpu_control_word(&serial_team->t.t_x87_fpu_control_word);
585 __kmp_load_mxcsr(&serial_team->t.t_mxcsr);
589 this_thr->th.th_team = serial_team->t.t_parent;
590 this_thr->th.th_info.ds.ds_tid = serial_team->t.t_master_tid;
593 this_thr->th.th_team_nproc = serial_team->t.t_parent->t.t_nproc;
594 this_thr->th.th_team_master =
595 serial_team->t.t_parent->t.t_threads[0];
596 this_thr->th.th_team_serialized = this_thr->th.th_team->t.t_serialized;
599 this_thr->th.th_dispatch =
600 &this_thr->th.th_team->t.t_dispatch[serial_team->t.t_master_tid];
602 __kmp_pop_current_task_from_thread(this_thr);
604 KMP_ASSERT(this_thr->th.th_current_task->td_flags.executing == 0);
605 this_thr->th.th_current_task->td_flags.executing = 1;
607 if (__kmp_tasking_mode != tskm_immediate_exec) {
609 this_thr->th.th_task_team =
610 this_thr->th.th_team->t.t_task_team[this_thr->th.th_task_state];
612 (
"__kmpc_end_serialized_parallel: T#%d restoring task_team %p / "
614 global_tid, this_thr->th.th_task_team, this_thr->th.th_team));
617 if (__kmp_tasking_mode != tskm_immediate_exec) {
618 KA_TRACE(20, (
"__kmpc_end_serialized_parallel: T#%d decreasing nesting "
619 "depth of serial team %p to %d\n",
620 global_tid, serial_team, serial_team->t.t_serialized));
624 if (__kmp_env_consistency_check)
625 __kmp_pop_parallel(global_tid, NULL);
627 if (ompt_enabled.enabled)
628 this_thr->th.ompt_thread_info.state =
629 ((this_thr->th.th_team_serialized) ? ompt_state_work_serial
630 : ompt_state_work_parallel);
643 KC_TRACE(10, (
"__kmpc_flush: called\n"));
648 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
662 if (!__kmp_cpuinfo.initialized) {
663 __kmp_query_cpuid(&__kmp_cpuinfo);
665 if (!__kmp_cpuinfo.sse2) {
670 #elif KMP_COMPILER_MSVC
673 __sync_synchronize();
674 #endif // KMP_COMPILER_ICC
677 #elif (KMP_ARCH_ARM || KMP_ARCH_AARCH64 || KMP_ARCH_MIPS || KMP_ARCH_MIPS64 || \
694 #error Unknown or unsupported architecture
697 #if OMPT_SUPPORT && OMPT_OPTIONAL
698 if (ompt_enabled.ompt_callback_flush) {
699 ompt_callbacks.ompt_callback(ompt_callback_flush)(
700 __ompt_get_thread_data_internal(), OMPT_GET_RETURN_ADDRESS(0));
715 KC_TRACE(10, (
"__kmpc_barrier: called T#%d\n", global_tid));
717 if (!TCR_4(__kmp_init_parallel))
718 __kmp_parallel_initialize();
720 __kmp_resume_if_soft_paused();
722 if (__kmp_env_consistency_check) {
724 KMP_WARNING(ConstructIdentInvalid);
726 __kmp_check_barrier(global_tid, ct_barrier, loc);
730 ompt_frame_t *ompt_frame;
731 if (ompt_enabled.enabled) {
732 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
733 if (ompt_frame->enter_frame.ptr == NULL)
734 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
735 OMPT_STORE_RETURN_ADDRESS(global_tid);
738 __kmp_threads[global_tid]->th.th_ident = loc;
746 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
747 #if OMPT_SUPPORT && OMPT_OPTIONAL
748 if (ompt_enabled.enabled) {
749 ompt_frame->enter_frame = ompt_data_none;
764 KC_TRACE(10, (
"__kmpc_master: called T#%d\n", global_tid));
766 if (!TCR_4(__kmp_init_parallel))
767 __kmp_parallel_initialize();
769 __kmp_resume_if_soft_paused();
771 if (KMP_MASTER_GTID(global_tid)) {
773 KMP_PUSH_PARTITIONED_TIMER(OMP_master);
777 #if OMPT_SUPPORT && OMPT_OPTIONAL
779 if (ompt_enabled.ompt_callback_master) {
780 kmp_info_t *this_thr = __kmp_threads[global_tid];
781 kmp_team_t *team = this_thr->th.th_team;
783 int tid = __kmp_tid_from_gtid(global_tid);
784 ompt_callbacks.ompt_callback(ompt_callback_master)(
785 ompt_scope_begin, &(team->t.ompt_team_info.parallel_data),
786 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
787 OMPT_GET_RETURN_ADDRESS(0));
792 if (__kmp_env_consistency_check) {
793 #if KMP_USE_DYNAMIC_LOCK
795 __kmp_push_sync(global_tid, ct_master, loc, NULL, 0);
797 __kmp_check_sync(global_tid, ct_master, loc, NULL, 0);
800 __kmp_push_sync(global_tid, ct_master, loc, NULL);
802 __kmp_check_sync(global_tid, ct_master, loc, NULL);
818 KC_TRACE(10, (
"__kmpc_end_master: called T#%d\n", global_tid));
820 KMP_DEBUG_ASSERT(KMP_MASTER_GTID(global_tid));
821 KMP_POP_PARTITIONED_TIMER();
823 #if OMPT_SUPPORT && OMPT_OPTIONAL
824 kmp_info_t *this_thr = __kmp_threads[global_tid];
825 kmp_team_t *team = this_thr->th.th_team;
826 if (ompt_enabled.ompt_callback_master) {
827 int tid = __kmp_tid_from_gtid(global_tid);
828 ompt_callbacks.ompt_callback(ompt_callback_master)(
829 ompt_scope_end, &(team->t.ompt_team_info.parallel_data),
830 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
831 OMPT_GET_RETURN_ADDRESS(0));
835 if (__kmp_env_consistency_check) {
837 KMP_WARNING(ThreadIdentInvalid);
839 if (KMP_MASTER_GTID(global_tid))
840 __kmp_pop_sync(global_tid, ct_master, loc);
854 KMP_DEBUG_ASSERT(__kmp_init_serial);
856 KC_TRACE(10, (
"__kmpc_ordered: called T#%d\n", gtid));
858 if (!TCR_4(__kmp_init_parallel))
859 __kmp_parallel_initialize();
861 __kmp_resume_if_soft_paused();
864 __kmp_itt_ordered_prep(gtid);
868 th = __kmp_threads[gtid];
870 #if OMPT_SUPPORT && OMPT_OPTIONAL
874 if (ompt_enabled.enabled) {
875 OMPT_STORE_RETURN_ADDRESS(gtid);
876 team = __kmp_team_from_gtid(gtid);
877 lck = (ompt_wait_id_t)(uintptr_t)&team->t.t_ordered.dt.t_value;
879 th->th.ompt_thread_info.wait_id = lck;
880 th->th.ompt_thread_info.state = ompt_state_wait_ordered;
883 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
884 if (ompt_enabled.ompt_callback_mutex_acquire) {
885 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
886 ompt_mutex_ordered, omp_lock_hint_none, kmp_mutex_impl_spin, lck,
892 if (th->th.th_dispatch->th_deo_fcn != 0)
893 (*th->th.th_dispatch->th_deo_fcn)(>id, &cid, loc);
895 __kmp_parallel_deo(>id, &cid, loc);
897 #if OMPT_SUPPORT && OMPT_OPTIONAL
898 if (ompt_enabled.enabled) {
900 th->th.ompt_thread_info.state = ompt_state_work_parallel;
901 th->th.ompt_thread_info.wait_id = 0;
904 if (ompt_enabled.ompt_callback_mutex_acquired) {
905 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
906 ompt_mutex_ordered, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
912 __kmp_itt_ordered_start(gtid);
927 KC_TRACE(10, (
"__kmpc_end_ordered: called T#%d\n", gtid));
930 __kmp_itt_ordered_end(gtid);
934 th = __kmp_threads[gtid];
936 if (th->th.th_dispatch->th_dxo_fcn != 0)
937 (*th->th.th_dispatch->th_dxo_fcn)(>id, &cid, loc);
939 __kmp_parallel_dxo(>id, &cid, loc);
941 #if OMPT_SUPPORT && OMPT_OPTIONAL
942 OMPT_STORE_RETURN_ADDRESS(gtid);
943 if (ompt_enabled.ompt_callback_mutex_released) {
944 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
946 (ompt_wait_id_t)(uintptr_t)&__kmp_team_from_gtid(gtid)
947 ->t.t_ordered.dt.t_value,
948 OMPT_LOAD_RETURN_ADDRESS(gtid));
953 #if KMP_USE_DYNAMIC_LOCK
955 static __forceinline
void
956 __kmp_init_indirect_csptr(kmp_critical_name *crit,
ident_t const *loc,
957 kmp_int32 gtid, kmp_indirect_locktag_t tag) {
961 kmp_indirect_lock_t **lck;
962 lck = (kmp_indirect_lock_t **)crit;
963 kmp_indirect_lock_t *ilk = __kmp_allocate_indirect_lock(&idx, gtid, tag);
964 KMP_I_LOCK_FUNC(ilk, init)(ilk->lock);
965 KMP_SET_I_LOCK_LOCATION(ilk, loc);
966 KMP_SET_I_LOCK_FLAGS(ilk, kmp_lf_critical_section);
968 (
"__kmp_init_indirect_csptr: initialized indirect lock #%d\n", tag));
970 __kmp_itt_critical_creating(ilk->lock, loc);
972 int status = KMP_COMPARE_AND_STORE_PTR(lck,
nullptr, ilk);
975 __kmp_itt_critical_destroyed(ilk->lock);
981 KMP_DEBUG_ASSERT(*lck != NULL);
985 #define KMP_ACQUIRE_TAS_LOCK(lock, gtid) \
987 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
988 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
989 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
990 if (KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
991 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)) { \
993 KMP_FSYNC_PREPARE(l); \
994 KMP_INIT_YIELD(spins); \
995 kmp_backoff_t backoff = __kmp_spin_backoff_params; \
997 if (TCR_4(__kmp_nth) > \
998 (__kmp_avail_proc ? __kmp_avail_proc : __kmp_xproc)) { \
1001 KMP_YIELD_SPIN(spins); \
1003 __kmp_spin_backoff(&backoff); \
1005 KMP_ATOMIC_LD_RLX(&l->lk.poll) != tas_free || \
1006 !__kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy)); \
1008 KMP_FSYNC_ACQUIRED(l); \
1012 #define KMP_TEST_TAS_LOCK(lock, gtid, rc) \
1014 kmp_tas_lock_t *l = (kmp_tas_lock_t *)lock; \
1015 kmp_int32 tas_free = KMP_LOCK_FREE(tas); \
1016 kmp_int32 tas_busy = KMP_LOCK_BUSY(gtid + 1, tas); \
1017 rc = KMP_ATOMIC_LD_RLX(&l->lk.poll) == tas_free && \
1018 __kmp_atomic_compare_store_acq(&l->lk.poll, tas_free, tas_busy); \
1022 #define KMP_RELEASE_TAS_LOCK(lock, gtid) \
1023 { KMP_ATOMIC_ST_REL(&((kmp_tas_lock_t *)lock)->lk.poll, KMP_LOCK_FREE(tas)); }
1027 #include <sys/syscall.h>
1030 #define FUTEX_WAIT 0
1033 #define FUTEX_WAKE 1
1037 #define KMP_ACQUIRE_FUTEX_LOCK(lock, gtid) \
1039 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1040 kmp_int32 gtid_code = (gtid + 1) << 1; \
1042 KMP_FSYNC_PREPARE(ftx); \
1043 kmp_int32 poll_val; \
1044 while ((poll_val = KMP_COMPARE_AND_STORE_RET32( \
1045 &(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1046 KMP_LOCK_BUSY(gtid_code, futex))) != KMP_LOCK_FREE(futex)) { \
1047 kmp_int32 cond = KMP_LOCK_STRIP(poll_val) & 1; \
1049 if (!KMP_COMPARE_AND_STORE_RET32(&(ftx->lk.poll), poll_val, \
1051 KMP_LOCK_BUSY(1, futex))) { \
1054 poll_val |= KMP_LOCK_BUSY(1, futex); \
1057 if ((rc = syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAIT, poll_val, \
1058 NULL, NULL, 0)) != 0) { \
1063 KMP_FSYNC_ACQUIRED(ftx); \
1067 #define KMP_TEST_FUTEX_LOCK(lock, gtid, rc) \
1069 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1070 if (KMP_COMPARE_AND_STORE_ACQ32(&(ftx->lk.poll), KMP_LOCK_FREE(futex), \
1071 KMP_LOCK_BUSY(gtid + 1 << 1, futex))) { \
1072 KMP_FSYNC_ACQUIRED(ftx); \
1080 #define KMP_RELEASE_FUTEX_LOCK(lock, gtid) \
1082 kmp_futex_lock_t *ftx = (kmp_futex_lock_t *)lock; \
1084 KMP_FSYNC_RELEASING(ftx); \
1085 kmp_int32 poll_val = \
1086 KMP_XCHG_FIXED32(&(ftx->lk.poll), KMP_LOCK_FREE(futex)); \
1087 if (KMP_LOCK_STRIP(poll_val) & 1) { \
1088 syscall(__NR_futex, &(ftx->lk.poll), FUTEX_WAKE, \
1089 KMP_LOCK_BUSY(1, futex), NULL, NULL, 0); \
1092 KMP_YIELD_OVERSUB(); \
1095 #endif // KMP_USE_FUTEX
1097 #else // KMP_USE_DYNAMIC_LOCK
1099 static kmp_user_lock_p __kmp_get_critical_section_ptr(kmp_critical_name *crit,
1102 kmp_user_lock_p *lck_pp = (kmp_user_lock_p *)crit;
1105 kmp_user_lock_p lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1112 lck = __kmp_user_lock_allocate(&idx, gtid, kmp_lf_critical_section);
1113 __kmp_init_user_lock_with_checks(lck);
1114 __kmp_set_user_lock_location(lck, loc);
1116 __kmp_itt_critical_creating(lck);
1127 int status = KMP_COMPARE_AND_STORE_PTR(lck_pp, 0, lck);
1132 __kmp_itt_critical_destroyed(lck);
1136 __kmp_destroy_user_lock_with_checks(lck);
1137 __kmp_user_lock_free(&idx, gtid, lck);
1138 lck = (kmp_user_lock_p)TCR_PTR(*lck_pp);
1139 KMP_DEBUG_ASSERT(lck != NULL);
1145 #endif // KMP_USE_DYNAMIC_LOCK
1158 kmp_critical_name *crit) {
1159 #if KMP_USE_DYNAMIC_LOCK
1160 #if OMPT_SUPPORT && OMPT_OPTIONAL
1161 OMPT_STORE_RETURN_ADDRESS(global_tid);
1162 #endif // OMPT_SUPPORT
1166 #if OMPT_SUPPORT && OMPT_OPTIONAL
1167 ompt_state_t prev_state = ompt_state_undefined;
1168 ompt_thread_info_t ti;
1170 kmp_user_lock_p lck;
1172 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1176 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1177 KMP_CHECK_USER_LOCK_INIT();
1179 if ((__kmp_user_lock_kind == lk_tas) &&
1180 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1181 lck = (kmp_user_lock_p)crit;
1184 else if ((__kmp_user_lock_kind == lk_futex) &&
1185 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1186 lck = (kmp_user_lock_p)crit;
1190 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
1193 if (__kmp_env_consistency_check)
1194 __kmp_push_sync(global_tid, ct_critical, loc, lck);
1202 __kmp_itt_critical_acquiring(lck);
1204 #if OMPT_SUPPORT && OMPT_OPTIONAL
1205 OMPT_STORE_RETURN_ADDRESS(gtid);
1206 void *codeptr_ra = NULL;
1207 if (ompt_enabled.enabled) {
1208 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1210 prev_state = ti.state;
1211 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1212 ti.state = ompt_state_wait_critical;
1215 codeptr_ra = OMPT_LOAD_RETURN_ADDRESS(gtid);
1216 if (ompt_enabled.ompt_callback_mutex_acquire) {
1217 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1218 ompt_mutex_critical, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
1219 (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1225 __kmp_acquire_user_lock_with_checks(lck, global_tid);
1228 __kmp_itt_critical_acquired(lck);
1230 #if OMPT_SUPPORT && OMPT_OPTIONAL
1231 if (ompt_enabled.enabled) {
1233 ti.state = prev_state;
1237 if (ompt_enabled.ompt_callback_mutex_acquired) {
1238 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1239 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr_ra);
1243 KMP_POP_PARTITIONED_TIMER();
1245 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1246 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1247 #endif // KMP_USE_DYNAMIC_LOCK
1250 #if KMP_USE_DYNAMIC_LOCK
1253 static __forceinline kmp_dyna_lockseq_t __kmp_map_hint_to_lock(uintptr_t hint) {
1255 #define KMP_TSX_LOCK(seq) lockseq_##seq
1257 #define KMP_TSX_LOCK(seq) __kmp_user_lock_seq
1260 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1261 #define KMP_CPUINFO_RTM (__kmp_cpuinfo.rtm)
1263 #define KMP_CPUINFO_RTM 0
1267 if (hint & kmp_lock_hint_hle)
1268 return KMP_TSX_LOCK(hle);
1269 if (hint & kmp_lock_hint_rtm)
1270 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(rtm) : __kmp_user_lock_seq;
1271 if (hint & kmp_lock_hint_adaptive)
1272 return KMP_CPUINFO_RTM ? KMP_TSX_LOCK(adaptive) : __kmp_user_lock_seq;
1275 if ((hint & omp_lock_hint_contended) && (hint & omp_lock_hint_uncontended))
1276 return __kmp_user_lock_seq;
1277 if ((hint & omp_lock_hint_speculative) &&
1278 (hint & omp_lock_hint_nonspeculative))
1279 return __kmp_user_lock_seq;
1282 if (hint & omp_lock_hint_contended)
1283 return lockseq_queuing;
1286 if ((hint & omp_lock_hint_uncontended) && !(hint & omp_lock_hint_speculative))
1290 if (hint & omp_lock_hint_speculative)
1291 return KMP_TSX_LOCK(hle);
1293 return __kmp_user_lock_seq;
1296 #if OMPT_SUPPORT && OMPT_OPTIONAL
1297 #if KMP_USE_DYNAMIC_LOCK
1298 static kmp_mutex_impl_t
1299 __ompt_get_mutex_impl_type(
void *user_lock, kmp_indirect_lock_t *ilock = 0) {
1301 switch (KMP_EXTRACT_D_TAG(user_lock)) {
1306 return kmp_mutex_impl_queuing;
1309 return kmp_mutex_impl_spin;
1312 return kmp_mutex_impl_speculative;
1315 return kmp_mutex_impl_none;
1317 ilock = KMP_LOOKUP_I_LOCK(user_lock);
1320 switch (ilock->type) {
1322 case locktag_adaptive:
1324 return kmp_mutex_impl_speculative;
1326 case locktag_nested_tas:
1327 return kmp_mutex_impl_spin;
1329 case locktag_nested_futex:
1331 case locktag_ticket:
1332 case locktag_queuing:
1334 case locktag_nested_ticket:
1335 case locktag_nested_queuing:
1336 case locktag_nested_drdpa:
1337 return kmp_mutex_impl_queuing;
1339 return kmp_mutex_impl_none;
1344 static kmp_mutex_impl_t __ompt_get_mutex_impl_type() {
1345 switch (__kmp_user_lock_kind) {
1347 return kmp_mutex_impl_spin;
1354 return kmp_mutex_impl_queuing;
1359 return kmp_mutex_impl_speculative;
1362 return kmp_mutex_impl_none;
1365 #endif // KMP_USE_DYNAMIC_LOCK
1366 #endif // OMPT_SUPPORT && OMPT_OPTIONAL
1382 kmp_critical_name *crit, uint32_t hint) {
1384 kmp_user_lock_p lck;
1385 #if OMPT_SUPPORT && OMPT_OPTIONAL
1386 ompt_state_t prev_state = ompt_state_undefined;
1387 ompt_thread_info_t ti;
1389 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(global_tid);
1391 codeptr = OMPT_GET_RETURN_ADDRESS(0);
1394 KC_TRACE(10, (
"__kmpc_critical: called T#%d\n", global_tid));
1396 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
1398 KMP_PUSH_PARTITIONED_TIMER(OMP_critical_wait);
1400 kmp_dyna_lockseq_t lckseq = __kmp_map_hint_to_lock(hint);
1401 if (KMP_IS_D_LOCK(lckseq)) {
1402 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
1403 KMP_GET_D_TAG(lckseq));
1405 __kmp_init_indirect_csptr(crit, loc, global_tid, KMP_GET_I_TAG(lckseq));
1411 if (KMP_EXTRACT_D_TAG(lk) != 0) {
1412 lck = (kmp_user_lock_p)lk;
1413 if (__kmp_env_consistency_check) {
1414 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1415 __kmp_map_hint_to_lock(hint));
1418 __kmp_itt_critical_acquiring(lck);
1420 #if OMPT_SUPPORT && OMPT_OPTIONAL
1421 if (ompt_enabled.enabled) {
1422 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1424 prev_state = ti.state;
1425 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1426 ti.state = ompt_state_wait_critical;
1429 if (ompt_enabled.ompt_callback_mutex_acquire) {
1430 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1431 ompt_mutex_critical, (
unsigned int)hint,
1432 __ompt_get_mutex_impl_type(crit), (ompt_wait_id_t)(uintptr_t)lck,
1437 #if KMP_USE_INLINED_TAS
1438 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1439 KMP_ACQUIRE_TAS_LOCK(lck, global_tid);
1441 #elif KMP_USE_INLINED_FUTEX
1442 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1443 KMP_ACQUIRE_FUTEX_LOCK(lck, global_tid);
1447 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
1450 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
1452 if (__kmp_env_consistency_check) {
1453 __kmp_push_sync(global_tid, ct_critical, loc, lck,
1454 __kmp_map_hint_to_lock(hint));
1457 __kmp_itt_critical_acquiring(lck);
1459 #if OMPT_SUPPORT && OMPT_OPTIONAL
1460 if (ompt_enabled.enabled) {
1461 ti = __kmp_threads[global_tid]->th.ompt_thread_info;
1463 prev_state = ti.state;
1464 ti.wait_id = (ompt_wait_id_t)(uintptr_t)lck;
1465 ti.state = ompt_state_wait_critical;
1468 if (ompt_enabled.ompt_callback_mutex_acquire) {
1469 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
1470 ompt_mutex_critical, (
unsigned int)hint,
1471 __ompt_get_mutex_impl_type(0, ilk), (ompt_wait_id_t)(uintptr_t)lck,
1476 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
1478 KMP_POP_PARTITIONED_TIMER();
1481 __kmp_itt_critical_acquired(lck);
1483 #if OMPT_SUPPORT && OMPT_OPTIONAL
1484 if (ompt_enabled.enabled) {
1486 ti.state = prev_state;
1490 if (ompt_enabled.ompt_callback_mutex_acquired) {
1491 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
1492 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
1497 KMP_PUSH_PARTITIONED_TIMER(OMP_critical);
1498 KA_TRACE(15, (
"__kmpc_critical: done T#%d\n", global_tid));
1501 #endif // KMP_USE_DYNAMIC_LOCK
1513 kmp_critical_name *crit) {
1514 kmp_user_lock_p lck;
1516 KC_TRACE(10, (
"__kmpc_end_critical: called T#%d\n", global_tid));
1518 #if KMP_USE_DYNAMIC_LOCK
1519 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
1520 lck = (kmp_user_lock_p)crit;
1521 KMP_ASSERT(lck != NULL);
1522 if (__kmp_env_consistency_check) {
1523 __kmp_pop_sync(global_tid, ct_critical, loc);
1526 __kmp_itt_critical_releasing(lck);
1528 #if KMP_USE_INLINED_TAS
1529 if (__kmp_user_lock_seq == lockseq_tas && !__kmp_env_consistency_check) {
1530 KMP_RELEASE_TAS_LOCK(lck, global_tid);
1532 #elif KMP_USE_INLINED_FUTEX
1533 if (__kmp_user_lock_seq == lockseq_futex && !__kmp_env_consistency_check) {
1534 KMP_RELEASE_FUTEX_LOCK(lck, global_tid);
1538 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
1541 kmp_indirect_lock_t *ilk =
1542 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
1543 KMP_ASSERT(ilk != NULL);
1545 if (__kmp_env_consistency_check) {
1546 __kmp_pop_sync(global_tid, ct_critical, loc);
1549 __kmp_itt_critical_releasing(lck);
1551 KMP_I_LOCK_FUNC(ilk, unset)(lck, global_tid);
1554 #else // KMP_USE_DYNAMIC_LOCK
1556 if ((__kmp_user_lock_kind == lk_tas) &&
1557 (
sizeof(lck->tas.lk.poll) <= OMP_CRITICAL_SIZE)) {
1558 lck = (kmp_user_lock_p)crit;
1561 else if ((__kmp_user_lock_kind == lk_futex) &&
1562 (
sizeof(lck->futex.lk.poll) <= OMP_CRITICAL_SIZE)) {
1563 lck = (kmp_user_lock_p)crit;
1567 lck = (kmp_user_lock_p)TCR_PTR(*((kmp_user_lock_p *)crit));
1570 KMP_ASSERT(lck != NULL);
1572 if (__kmp_env_consistency_check)
1573 __kmp_pop_sync(global_tid, ct_critical, loc);
1576 __kmp_itt_critical_releasing(lck);
1580 __kmp_release_user_lock_with_checks(lck, global_tid);
1582 #endif // KMP_USE_DYNAMIC_LOCK
1584 #if OMPT_SUPPORT && OMPT_OPTIONAL
1587 OMPT_STORE_RETURN_ADDRESS(global_tid);
1588 if (ompt_enabled.ompt_callback_mutex_released) {
1589 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
1590 ompt_mutex_critical, (ompt_wait_id_t)(uintptr_t)lck,
1591 OMPT_LOAD_RETURN_ADDRESS(0));
1595 KMP_POP_PARTITIONED_TIMER();
1596 KA_TRACE(15, (
"__kmpc_end_critical: done T#%d\n", global_tid));
1611 KC_TRACE(10, (
"__kmpc_barrier_master: called T#%d\n", global_tid));
1613 if (!TCR_4(__kmp_init_parallel))
1614 __kmp_parallel_initialize();
1616 __kmp_resume_if_soft_paused();
1618 if (__kmp_env_consistency_check)
1619 __kmp_check_barrier(global_tid, ct_barrier, loc);
1622 ompt_frame_t *ompt_frame;
1623 if (ompt_enabled.enabled) {
1624 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1625 if (ompt_frame->enter_frame.ptr == NULL)
1626 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1627 OMPT_STORE_RETURN_ADDRESS(global_tid);
1631 __kmp_threads[global_tid]->th.th_ident = loc;
1633 status = __kmp_barrier(bs_plain_barrier, global_tid, TRUE, 0, NULL, NULL);
1634 #if OMPT_SUPPORT && OMPT_OPTIONAL
1635 if (ompt_enabled.enabled) {
1636 ompt_frame->enter_frame = ompt_data_none;
1640 return (status != 0) ? 0 : 1;
1653 KC_TRACE(10, (
"__kmpc_end_barrier_master: called T#%d\n", global_tid));
1655 __kmp_end_split_barrier(bs_plain_barrier, global_tid);
1671 KC_TRACE(10, (
"__kmpc_barrier_master_nowait: called T#%d\n", global_tid));
1673 if (!TCR_4(__kmp_init_parallel))
1674 __kmp_parallel_initialize();
1676 __kmp_resume_if_soft_paused();
1678 if (__kmp_env_consistency_check) {
1680 KMP_WARNING(ConstructIdentInvalid);
1682 __kmp_check_barrier(global_tid, ct_barrier, loc);
1686 ompt_frame_t *ompt_frame;
1687 if (ompt_enabled.enabled) {
1688 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
1689 if (ompt_frame->enter_frame.ptr == NULL)
1690 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
1691 OMPT_STORE_RETURN_ADDRESS(global_tid);
1695 __kmp_threads[global_tid]->th.th_ident = loc;
1697 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
1698 #if OMPT_SUPPORT && OMPT_OPTIONAL
1699 if (ompt_enabled.enabled) {
1700 ompt_frame->enter_frame = ompt_data_none;
1706 if (__kmp_env_consistency_check) {
1710 if (global_tid < 0) {
1711 KMP_WARNING(ThreadIdentInvalid);
1717 __kmp_pop_sync(global_tid, ct_master, loc);
1737 kmp_int32 rc = __kmp_enter_single(global_tid, loc, TRUE);
1742 KMP_PUSH_PARTITIONED_TIMER(OMP_single);
1745 #if OMPT_SUPPORT && OMPT_OPTIONAL
1746 kmp_info_t *this_thr = __kmp_threads[global_tid];
1747 kmp_team_t *team = this_thr->th.th_team;
1748 int tid = __kmp_tid_from_gtid(global_tid);
1750 if (ompt_enabled.enabled) {
1752 if (ompt_enabled.ompt_callback_work) {
1753 ompt_callbacks.ompt_callback(ompt_callback_work)(
1754 ompt_work_single_executor, ompt_scope_begin,
1755 &(team->t.ompt_team_info.parallel_data),
1756 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1757 1, OMPT_GET_RETURN_ADDRESS(0));
1760 if (ompt_enabled.ompt_callback_work) {
1761 ompt_callbacks.ompt_callback(ompt_callback_work)(
1762 ompt_work_single_other, ompt_scope_begin,
1763 &(team->t.ompt_team_info.parallel_data),
1764 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1765 1, OMPT_GET_RETURN_ADDRESS(0));
1766 ompt_callbacks.ompt_callback(ompt_callback_work)(
1767 ompt_work_single_other, ompt_scope_end,
1768 &(team->t.ompt_team_info.parallel_data),
1769 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data),
1770 1, OMPT_GET_RETURN_ADDRESS(0));
1789 __kmp_exit_single(global_tid);
1790 KMP_POP_PARTITIONED_TIMER();
1792 #if OMPT_SUPPORT && OMPT_OPTIONAL
1793 kmp_info_t *this_thr = __kmp_threads[global_tid];
1794 kmp_team_t *team = this_thr->th.th_team;
1795 int tid = __kmp_tid_from_gtid(global_tid);
1797 if (ompt_enabled.ompt_callback_work) {
1798 ompt_callbacks.ompt_callback(ompt_callback_work)(
1799 ompt_work_single_executor, ompt_scope_end,
1800 &(team->t.ompt_team_info.parallel_data),
1801 &(team->t.t_implicit_task_taskdata[tid].ompt_task_info.task_data), 1,
1802 OMPT_GET_RETURN_ADDRESS(0));
1815 KMP_POP_PARTITIONED_TIMER();
1816 KE_TRACE(10, (
"__kmpc_for_static_fini called T#%d\n", global_tid));
1818 #if OMPT_SUPPORT && OMPT_OPTIONAL
1819 if (ompt_enabled.ompt_callback_work) {
1820 ompt_work_t ompt_work_type = ompt_work_loop;
1821 ompt_team_info_t *team_info = __ompt_get_teaminfo(0, NULL);
1822 ompt_task_info_t *task_info = __ompt_get_task_info_object(0);
1826 ompt_work_type = ompt_work_loop;
1828 ompt_work_type = ompt_work_sections;
1830 ompt_work_type = ompt_work_distribute;
1835 KMP_DEBUG_ASSERT(ompt_work_type);
1837 ompt_callbacks.ompt_callback(ompt_callback_work)(
1838 ompt_work_type, ompt_scope_end, &(team_info->parallel_data),
1839 &(task_info->task_data), 0, OMPT_GET_RETURN_ADDRESS(0));
1842 if (__kmp_env_consistency_check)
1843 __kmp_pop_workshare(global_tid, ct_pdo, loc);
1849 void ompc_set_num_threads(
int arg) {
1851 __kmp_set_num_threads(arg, __kmp_entry_gtid());
1854 void ompc_set_dynamic(
int flag) {
1858 thread = __kmp_entry_thread();
1860 __kmp_save_internal_controls(thread);
1862 set__dynamic(thread, flag ? TRUE : FALSE);
1865 void ompc_set_nested(
int flag) {
1869 thread = __kmp_entry_thread();
1871 __kmp_save_internal_controls(thread);
1873 set__max_active_levels(thread, flag ? __kmp_dflt_max_active_levels : 1);
1876 void ompc_set_max_active_levels(
int max_active_levels) {
1881 __kmp_set_max_active_levels(__kmp_entry_gtid(), max_active_levels);
1884 void ompc_set_schedule(omp_sched_t kind,
int modifier) {
1886 __kmp_set_schedule(__kmp_entry_gtid(), (kmp_sched_t)kind, modifier);
1889 int ompc_get_ancestor_thread_num(
int level) {
1890 return __kmp_get_ancestor_thread_num(__kmp_entry_gtid(), level);
1893 int ompc_get_team_size(
int level) {
1894 return __kmp_get_team_size(__kmp_entry_gtid(), level);
1899 void ompc_set_affinity_format(
char const *format) {
1900 if (!__kmp_init_serial) {
1901 __kmp_serial_initialize();
1903 __kmp_strncpy_truncate(__kmp_affinity_format, KMP_AFFINITY_FORMAT_SIZE,
1904 format, KMP_STRLEN(format) + 1);
1907 size_t ompc_get_affinity_format(
char *buffer,
size_t size) {
1909 if (!__kmp_init_serial) {
1910 __kmp_serial_initialize();
1912 format_size = KMP_STRLEN(__kmp_affinity_format);
1913 if (buffer && size) {
1914 __kmp_strncpy_truncate(buffer, size, __kmp_affinity_format,
1920 void ompc_display_affinity(
char const *format) {
1922 if (!TCR_4(__kmp_init_middle)) {
1923 __kmp_middle_initialize();
1925 gtid = __kmp_get_gtid();
1926 __kmp_aux_display_affinity(gtid, format);
1929 size_t ompc_capture_affinity(
char *buffer,
size_t buf_size,
1930 char const *format) {
1932 size_t num_required;
1933 kmp_str_buf_t capture_buf;
1934 if (!TCR_4(__kmp_init_middle)) {
1935 __kmp_middle_initialize();
1937 gtid = __kmp_get_gtid();
1938 __kmp_str_buf_init(&capture_buf);
1939 num_required = __kmp_aux_capture_affinity(gtid, format, &capture_buf);
1940 if (buffer && buf_size) {
1941 __kmp_strncpy_truncate(buffer, buf_size, capture_buf.str,
1942 capture_buf.used + 1);
1944 __kmp_str_buf_free(&capture_buf);
1945 return num_required;
1948 void kmpc_set_stacksize(
int arg) {
1950 __kmp_aux_set_stacksize(arg);
1953 void kmpc_set_stacksize_s(
size_t arg) {
1955 __kmp_aux_set_stacksize(arg);
1958 void kmpc_set_blocktime(
int arg) {
1962 gtid = __kmp_entry_gtid();
1963 tid = __kmp_tid_from_gtid(gtid);
1964 thread = __kmp_thread_from_gtid(gtid);
1966 __kmp_aux_set_blocktime(arg, thread, tid);
1969 void kmpc_set_library(
int arg) {
1971 __kmp_user_set_library((
enum library_type)arg);
1974 void kmpc_set_defaults(
char const *str) {
1976 __kmp_aux_set_defaults(str, KMP_STRLEN(str));
1979 void kmpc_set_disp_num_buffers(
int arg) {
1982 if (__kmp_init_serial == 0 && arg > 0)
1983 __kmp_dispatch_num_buffers = arg;
1986 int kmpc_set_affinity_mask_proc(
int proc,
void **mask) {
1987 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
1990 if (!TCR_4(__kmp_init_middle)) {
1991 __kmp_middle_initialize();
1993 return __kmp_aux_set_affinity_mask_proc(proc, mask);
1997 int kmpc_unset_affinity_mask_proc(
int proc,
void **mask) {
1998 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2001 if (!TCR_4(__kmp_init_middle)) {
2002 __kmp_middle_initialize();
2004 return __kmp_aux_unset_affinity_mask_proc(proc, mask);
2008 int kmpc_get_affinity_mask_proc(
int proc,
void **mask) {
2009 #if defined(KMP_STUB) || !KMP_AFFINITY_SUPPORTED
2012 if (!TCR_4(__kmp_init_middle)) {
2013 __kmp_middle_initialize();
2015 return __kmp_aux_get_affinity_mask_proc(proc, mask);
2065 void *cpy_data,
void (*cpy_func)(
void *,
void *),
2069 KC_TRACE(10, (
"__kmpc_copyprivate: called T#%d\n", gtid));
2073 data_ptr = &__kmp_team_from_gtid(gtid)->t.t_copypriv_data;
2075 if (__kmp_env_consistency_check) {
2077 KMP_WARNING(ConstructIdentInvalid);
2084 *data_ptr = cpy_data;
2087 ompt_frame_t *ompt_frame;
2088 if (ompt_enabled.enabled) {
2089 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
2090 if (ompt_frame->enter_frame.ptr == NULL)
2091 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
2092 OMPT_STORE_RETURN_ADDRESS(gtid);
2097 __kmp_threads[gtid]->th.th_ident = loc;
2099 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2102 (*cpy_func)(cpy_data, *data_ptr);
2108 if (ompt_enabled.enabled) {
2109 OMPT_STORE_RETURN_ADDRESS(gtid);
2113 __kmp_threads[gtid]->th.th_ident = loc;
2116 __kmp_barrier(bs_plain_barrier, gtid, FALSE, 0, NULL, NULL);
2117 #if OMPT_SUPPORT && OMPT_OPTIONAL
2118 if (ompt_enabled.enabled) {
2119 ompt_frame->enter_frame = ompt_data_none;
2126 #define INIT_LOCK __kmp_init_user_lock_with_checks
2127 #define INIT_NESTED_LOCK __kmp_init_nested_user_lock_with_checks
2128 #define ACQUIRE_LOCK __kmp_acquire_user_lock_with_checks
2129 #define ACQUIRE_LOCK_TIMED __kmp_acquire_user_lock_with_checks_timed
2130 #define ACQUIRE_NESTED_LOCK __kmp_acquire_nested_user_lock_with_checks
2131 #define ACQUIRE_NESTED_LOCK_TIMED \
2132 __kmp_acquire_nested_user_lock_with_checks_timed
2133 #define RELEASE_LOCK __kmp_release_user_lock_with_checks
2134 #define RELEASE_NESTED_LOCK __kmp_release_nested_user_lock_with_checks
2135 #define TEST_LOCK __kmp_test_user_lock_with_checks
2136 #define TEST_NESTED_LOCK __kmp_test_nested_user_lock_with_checks
2137 #define DESTROY_LOCK __kmp_destroy_user_lock_with_checks
2138 #define DESTROY_NESTED_LOCK __kmp_destroy_nested_user_lock_with_checks
2143 #if KMP_USE_DYNAMIC_LOCK
2146 static __forceinline
void __kmp_init_lock_with_hint(
ident_t *loc,
void **lock,
2147 kmp_dyna_lockseq_t seq) {
2148 if (KMP_IS_D_LOCK(seq)) {
2149 KMP_INIT_D_LOCK(lock, seq);
2151 __kmp_itt_lock_creating((kmp_user_lock_p)lock, NULL);
2154 KMP_INIT_I_LOCK(lock, seq);
2156 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2157 __kmp_itt_lock_creating(ilk->lock, loc);
2163 static __forceinline
void
2164 __kmp_init_nest_lock_with_hint(
ident_t *loc,
void **lock,
2165 kmp_dyna_lockseq_t seq) {
2168 if (seq == lockseq_hle || seq == lockseq_rtm || seq == lockseq_adaptive)
2169 seq = __kmp_user_lock_seq;
2173 seq = lockseq_nested_tas;
2177 seq = lockseq_nested_futex;
2180 case lockseq_ticket:
2181 seq = lockseq_nested_ticket;
2183 case lockseq_queuing:
2184 seq = lockseq_nested_queuing;
2187 seq = lockseq_nested_drdpa;
2190 seq = lockseq_nested_queuing;
2192 KMP_INIT_I_LOCK(lock, seq);
2194 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(lock);
2195 __kmp_itt_lock_creating(ilk->lock, loc);
2200 void __kmpc_init_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
void **user_lock,
2202 KMP_DEBUG_ASSERT(__kmp_init_serial);
2203 if (__kmp_env_consistency_check && user_lock == NULL) {
2204 KMP_FATAL(LockIsUninitialized,
"omp_init_lock_with_hint");
2207 __kmp_init_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2209 #if OMPT_SUPPORT && OMPT_OPTIONAL
2211 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2213 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2214 if (ompt_enabled.ompt_callback_lock_init) {
2215 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2216 ompt_mutex_lock, (omp_lock_hint_t)hint,
2217 __ompt_get_mutex_impl_type(user_lock),
2218 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2224 void __kmpc_init_nest_lock_with_hint(
ident_t *loc, kmp_int32 gtid,
2225 void **user_lock, uintptr_t hint) {
2226 KMP_DEBUG_ASSERT(__kmp_init_serial);
2227 if (__kmp_env_consistency_check && user_lock == NULL) {
2228 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock_with_hint");
2231 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_map_hint_to_lock(hint));
2233 #if OMPT_SUPPORT && OMPT_OPTIONAL
2235 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2237 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2238 if (ompt_enabled.ompt_callback_lock_init) {
2239 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2240 ompt_mutex_nest_lock, (omp_lock_hint_t)hint,
2241 __ompt_get_mutex_impl_type(user_lock),
2242 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2247 #endif // KMP_USE_DYNAMIC_LOCK
2250 void __kmpc_init_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2251 #if KMP_USE_DYNAMIC_LOCK
2253 KMP_DEBUG_ASSERT(__kmp_init_serial);
2254 if (__kmp_env_consistency_check && user_lock == NULL) {
2255 KMP_FATAL(LockIsUninitialized,
"omp_init_lock");
2257 __kmp_init_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2259 #if OMPT_SUPPORT && OMPT_OPTIONAL
2261 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2263 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2264 if (ompt_enabled.ompt_callback_lock_init) {
2265 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2266 ompt_mutex_lock, omp_lock_hint_none,
2267 __ompt_get_mutex_impl_type(user_lock),
2268 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2272 #else // KMP_USE_DYNAMIC_LOCK
2274 static char const *
const func =
"omp_init_lock";
2275 kmp_user_lock_p lck;
2276 KMP_DEBUG_ASSERT(__kmp_init_serial);
2278 if (__kmp_env_consistency_check) {
2279 if (user_lock == NULL) {
2280 KMP_FATAL(LockIsUninitialized, func);
2284 KMP_CHECK_USER_LOCK_INIT();
2286 if ((__kmp_user_lock_kind == lk_tas) &&
2287 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2288 lck = (kmp_user_lock_p)user_lock;
2291 else if ((__kmp_user_lock_kind == lk_futex) &&
2292 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2293 lck = (kmp_user_lock_p)user_lock;
2297 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2300 __kmp_set_user_lock_location(lck, loc);
2302 #if OMPT_SUPPORT && OMPT_OPTIONAL
2304 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2306 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2307 if (ompt_enabled.ompt_callback_lock_init) {
2308 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2309 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2310 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2315 __kmp_itt_lock_creating(lck);
2318 #endif // KMP_USE_DYNAMIC_LOCK
2322 void __kmpc_init_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2323 #if KMP_USE_DYNAMIC_LOCK
2325 KMP_DEBUG_ASSERT(__kmp_init_serial);
2326 if (__kmp_env_consistency_check && user_lock == NULL) {
2327 KMP_FATAL(LockIsUninitialized,
"omp_init_nest_lock");
2329 __kmp_init_nest_lock_with_hint(loc, user_lock, __kmp_user_lock_seq);
2331 #if OMPT_SUPPORT && OMPT_OPTIONAL
2333 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2335 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2336 if (ompt_enabled.ompt_callback_lock_init) {
2337 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2338 ompt_mutex_nest_lock, omp_lock_hint_none,
2339 __ompt_get_mutex_impl_type(user_lock),
2340 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2344 #else // KMP_USE_DYNAMIC_LOCK
2346 static char const *
const func =
"omp_init_nest_lock";
2347 kmp_user_lock_p lck;
2348 KMP_DEBUG_ASSERT(__kmp_init_serial);
2350 if (__kmp_env_consistency_check) {
2351 if (user_lock == NULL) {
2352 KMP_FATAL(LockIsUninitialized, func);
2356 KMP_CHECK_USER_LOCK_INIT();
2358 if ((__kmp_user_lock_kind == lk_tas) &&
2359 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2360 OMP_NEST_LOCK_T_SIZE)) {
2361 lck = (kmp_user_lock_p)user_lock;
2364 else if ((__kmp_user_lock_kind == lk_futex) &&
2365 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2366 OMP_NEST_LOCK_T_SIZE)) {
2367 lck = (kmp_user_lock_p)user_lock;
2371 lck = __kmp_user_lock_allocate(user_lock, gtid, 0);
2374 INIT_NESTED_LOCK(lck);
2375 __kmp_set_user_lock_location(lck, loc);
2377 #if OMPT_SUPPORT && OMPT_OPTIONAL
2379 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2381 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2382 if (ompt_enabled.ompt_callback_lock_init) {
2383 ompt_callbacks.ompt_callback(ompt_callback_lock_init)(
2384 ompt_mutex_nest_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2385 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2390 __kmp_itt_lock_creating(lck);
2393 #endif // KMP_USE_DYNAMIC_LOCK
2396 void __kmpc_destroy_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2397 #if KMP_USE_DYNAMIC_LOCK
2400 kmp_user_lock_p lck;
2401 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2402 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2404 lck = (kmp_user_lock_p)user_lock;
2406 __kmp_itt_lock_destroyed(lck);
2408 #if OMPT_SUPPORT && OMPT_OPTIONAL
2410 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2412 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2413 if (ompt_enabled.ompt_callback_lock_destroy) {
2414 kmp_user_lock_p lck;
2415 if (KMP_EXTRACT_D_TAG(user_lock) == 0) {
2416 lck = ((kmp_indirect_lock_t *)KMP_LOOKUP_I_LOCK(user_lock))->lock;
2418 lck = (kmp_user_lock_p)user_lock;
2420 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2421 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2424 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2426 kmp_user_lock_p lck;
2428 if ((__kmp_user_lock_kind == lk_tas) &&
2429 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2430 lck = (kmp_user_lock_p)user_lock;
2433 else if ((__kmp_user_lock_kind == lk_futex) &&
2434 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2435 lck = (kmp_user_lock_p)user_lock;
2439 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_lock");
2442 #if OMPT_SUPPORT && OMPT_OPTIONAL
2444 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2446 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2447 if (ompt_enabled.ompt_callback_lock_destroy) {
2448 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2449 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2454 __kmp_itt_lock_destroyed(lck);
2458 if ((__kmp_user_lock_kind == lk_tas) &&
2459 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2463 else if ((__kmp_user_lock_kind == lk_futex) &&
2464 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2469 __kmp_user_lock_free(user_lock, gtid, lck);
2471 #endif // KMP_USE_DYNAMIC_LOCK
2475 void __kmpc_destroy_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2476 #if KMP_USE_DYNAMIC_LOCK
2479 kmp_indirect_lock_t *ilk = KMP_LOOKUP_I_LOCK(user_lock);
2480 __kmp_itt_lock_destroyed(ilk->lock);
2482 #if OMPT_SUPPORT && OMPT_OPTIONAL
2484 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2486 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2487 if (ompt_enabled.ompt_callback_lock_destroy) {
2488 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2489 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2492 KMP_D_LOCK_FUNC(user_lock, destroy)((kmp_dyna_lock_t *)user_lock);
2494 #else // KMP_USE_DYNAMIC_LOCK
2496 kmp_user_lock_p lck;
2498 if ((__kmp_user_lock_kind == lk_tas) &&
2499 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2500 OMP_NEST_LOCK_T_SIZE)) {
2501 lck = (kmp_user_lock_p)user_lock;
2504 else if ((__kmp_user_lock_kind == lk_futex) &&
2505 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2506 OMP_NEST_LOCK_T_SIZE)) {
2507 lck = (kmp_user_lock_p)user_lock;
2511 lck = __kmp_lookup_user_lock(user_lock,
"omp_destroy_nest_lock");
2514 #if OMPT_SUPPORT && OMPT_OPTIONAL
2516 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2518 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2519 if (ompt_enabled.ompt_callback_lock_destroy) {
2520 ompt_callbacks.ompt_callback(ompt_callback_lock_destroy)(
2521 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2526 __kmp_itt_lock_destroyed(lck);
2529 DESTROY_NESTED_LOCK(lck);
2531 if ((__kmp_user_lock_kind == lk_tas) &&
2532 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2533 OMP_NEST_LOCK_T_SIZE)) {
2537 else if ((__kmp_user_lock_kind == lk_futex) &&
2538 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2539 OMP_NEST_LOCK_T_SIZE)) {
2544 __kmp_user_lock_free(user_lock, gtid, lck);
2546 #endif // KMP_USE_DYNAMIC_LOCK
2549 void __kmpc_set_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2551 #if KMP_USE_DYNAMIC_LOCK
2552 int tag = KMP_EXTRACT_D_TAG(user_lock);
2554 __kmp_itt_lock_acquiring(
2558 #if OMPT_SUPPORT && OMPT_OPTIONAL
2560 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2562 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2563 if (ompt_enabled.ompt_callback_mutex_acquire) {
2564 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2565 ompt_mutex_lock, omp_lock_hint_none,
2566 __ompt_get_mutex_impl_type(user_lock),
2567 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2570 #if KMP_USE_INLINED_TAS
2571 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2572 KMP_ACQUIRE_TAS_LOCK(user_lock, gtid);
2574 #elif KMP_USE_INLINED_FUTEX
2575 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2576 KMP_ACQUIRE_FUTEX_LOCK(user_lock, gtid);
2580 __kmp_direct_set[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2583 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2585 #if OMPT_SUPPORT && OMPT_OPTIONAL
2586 if (ompt_enabled.ompt_callback_mutex_acquired) {
2587 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2588 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2592 #else // KMP_USE_DYNAMIC_LOCK
2594 kmp_user_lock_p lck;
2596 if ((__kmp_user_lock_kind == lk_tas) &&
2597 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2598 lck = (kmp_user_lock_p)user_lock;
2601 else if ((__kmp_user_lock_kind == lk_futex) &&
2602 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2603 lck = (kmp_user_lock_p)user_lock;
2607 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_lock");
2611 __kmp_itt_lock_acquiring(lck);
2613 #if OMPT_SUPPORT && OMPT_OPTIONAL
2615 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2617 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2618 if (ompt_enabled.ompt_callback_mutex_acquire) {
2619 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2620 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
2621 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2625 ACQUIRE_LOCK(lck, gtid);
2628 __kmp_itt_lock_acquired(lck);
2631 #if OMPT_SUPPORT && OMPT_OPTIONAL
2632 if (ompt_enabled.ompt_callback_mutex_acquired) {
2633 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2634 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2638 #endif // KMP_USE_DYNAMIC_LOCK
2641 void __kmpc_set_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2642 #if KMP_USE_DYNAMIC_LOCK
2645 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2647 #if OMPT_SUPPORT && OMPT_OPTIONAL
2649 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2651 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2652 if (ompt_enabled.enabled) {
2653 if (ompt_enabled.ompt_callback_mutex_acquire) {
2654 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2655 ompt_mutex_nest_lock, omp_lock_hint_none,
2656 __ompt_get_mutex_impl_type(user_lock),
2657 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2661 int acquire_status =
2662 KMP_D_LOCK_FUNC(user_lock, set)((kmp_dyna_lock_t *)user_lock, gtid);
2663 (void) acquire_status;
2665 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
2668 #if OMPT_SUPPORT && OMPT_OPTIONAL
2669 if (ompt_enabled.enabled) {
2670 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2671 if (ompt_enabled.ompt_callback_mutex_acquired) {
2673 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2674 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2678 if (ompt_enabled.ompt_callback_nest_lock) {
2680 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2681 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2687 #else // KMP_USE_DYNAMIC_LOCK
2689 kmp_user_lock_p lck;
2691 if ((__kmp_user_lock_kind == lk_tas) &&
2692 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2693 OMP_NEST_LOCK_T_SIZE)) {
2694 lck = (kmp_user_lock_p)user_lock;
2697 else if ((__kmp_user_lock_kind == lk_futex) &&
2698 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2699 OMP_NEST_LOCK_T_SIZE)) {
2700 lck = (kmp_user_lock_p)user_lock;
2704 lck = __kmp_lookup_user_lock(user_lock,
"omp_set_nest_lock");
2708 __kmp_itt_lock_acquiring(lck);
2710 #if OMPT_SUPPORT && OMPT_OPTIONAL
2712 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2714 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2715 if (ompt_enabled.enabled) {
2716 if (ompt_enabled.ompt_callback_mutex_acquire) {
2717 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2718 ompt_mutex_nest_lock, omp_lock_hint_none,
2719 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
2725 ACQUIRE_NESTED_LOCK(lck, gtid, &acquire_status);
2728 __kmp_itt_lock_acquired(lck);
2731 #if OMPT_SUPPORT && OMPT_OPTIONAL
2732 if (ompt_enabled.enabled) {
2733 if (acquire_status == KMP_LOCK_ACQUIRED_FIRST) {
2734 if (ompt_enabled.ompt_callback_mutex_acquired) {
2736 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
2737 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2740 if (ompt_enabled.ompt_callback_nest_lock) {
2742 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2743 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2749 #endif // KMP_USE_DYNAMIC_LOCK
2752 void __kmpc_unset_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2753 #if KMP_USE_DYNAMIC_LOCK
2755 int tag = KMP_EXTRACT_D_TAG(user_lock);
2757 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2759 #if KMP_USE_INLINED_TAS
2760 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2761 KMP_RELEASE_TAS_LOCK(user_lock, gtid);
2763 #elif KMP_USE_INLINED_FUTEX
2764 if (tag == locktag_futex && !__kmp_env_consistency_check) {
2765 KMP_RELEASE_FUTEX_LOCK(user_lock, gtid);
2769 __kmp_direct_unset[tag]((kmp_dyna_lock_t *)user_lock, gtid);
2772 #if OMPT_SUPPORT && OMPT_OPTIONAL
2774 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2776 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2777 if (ompt_enabled.ompt_callback_mutex_released) {
2778 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2779 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2783 #else // KMP_USE_DYNAMIC_LOCK
2785 kmp_user_lock_p lck;
2790 if ((__kmp_user_lock_kind == lk_tas) &&
2791 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
2792 #if KMP_OS_LINUX && \
2793 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2796 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2798 TCW_4(((kmp_user_lock_p)user_lock)->tas.lk.poll, 0);
2801 #if OMPT_SUPPORT && OMPT_OPTIONAL
2803 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2805 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2806 if (ompt_enabled.ompt_callback_mutex_released) {
2807 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2808 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2814 lck = (kmp_user_lock_p)user_lock;
2818 else if ((__kmp_user_lock_kind == lk_futex) &&
2819 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
2820 lck = (kmp_user_lock_p)user_lock;
2824 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_lock");
2828 __kmp_itt_lock_releasing(lck);
2831 RELEASE_LOCK(lck, gtid);
2833 #if OMPT_SUPPORT && OMPT_OPTIONAL
2835 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2837 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2838 if (ompt_enabled.ompt_callback_mutex_released) {
2839 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2840 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2844 #endif // KMP_USE_DYNAMIC_LOCK
2848 void __kmpc_unset_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2849 #if KMP_USE_DYNAMIC_LOCK
2852 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2854 int release_status =
2855 KMP_D_LOCK_FUNC(user_lock, unset)((kmp_dyna_lock_t *)user_lock, gtid);
2856 (void) release_status;
2858 #if OMPT_SUPPORT && OMPT_OPTIONAL
2860 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2862 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2863 if (ompt_enabled.enabled) {
2864 if (release_status == KMP_LOCK_RELEASED) {
2865 if (ompt_enabled.ompt_callback_mutex_released) {
2867 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2868 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
2871 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2873 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2874 ompt_scope_end, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2879 #else // KMP_USE_DYNAMIC_LOCK
2881 kmp_user_lock_p lck;
2885 if ((__kmp_user_lock_kind == lk_tas) &&
2886 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
2887 OMP_NEST_LOCK_T_SIZE)) {
2888 #if KMP_OS_LINUX && \
2889 (KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_ARCH_ARM || KMP_ARCH_AARCH64)
2891 kmp_tas_lock_t *tl = (kmp_tas_lock_t *)user_lock;
2893 __kmp_itt_lock_releasing((kmp_user_lock_p)user_lock);
2896 #if OMPT_SUPPORT && OMPT_OPTIONAL
2897 int release_status = KMP_LOCK_STILL_HELD;
2900 if (--(tl->lk.depth_locked) == 0) {
2901 TCW_4(tl->lk.poll, 0);
2902 #if OMPT_SUPPORT && OMPT_OPTIONAL
2903 release_status = KMP_LOCK_RELEASED;
2908 #if OMPT_SUPPORT && OMPT_OPTIONAL
2910 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2912 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2913 if (ompt_enabled.enabled) {
2914 if (release_status == KMP_LOCK_RELEASED) {
2915 if (ompt_enabled.ompt_callback_mutex_released) {
2917 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2918 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2920 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2922 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2923 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2930 lck = (kmp_user_lock_p)user_lock;
2934 else if ((__kmp_user_lock_kind == lk_futex) &&
2935 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
2936 OMP_NEST_LOCK_T_SIZE)) {
2937 lck = (kmp_user_lock_p)user_lock;
2941 lck = __kmp_lookup_user_lock(user_lock,
"omp_unset_nest_lock");
2945 __kmp_itt_lock_releasing(lck);
2949 release_status = RELEASE_NESTED_LOCK(lck, gtid);
2950 #if OMPT_SUPPORT && OMPT_OPTIONAL
2952 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2954 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2955 if (ompt_enabled.enabled) {
2956 if (release_status == KMP_LOCK_RELEASED) {
2957 if (ompt_enabled.ompt_callback_mutex_released) {
2959 ompt_callbacks.ompt_callback(ompt_callback_mutex_released)(
2960 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2962 }
else if (ompt_enabled.ompt_callback_nest_lock) {
2964 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
2965 ompt_mutex_scope_end, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
2970 #endif // KMP_USE_DYNAMIC_LOCK
2974 int __kmpc_test_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
2977 #if KMP_USE_DYNAMIC_LOCK
2979 int tag = KMP_EXTRACT_D_TAG(user_lock);
2981 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
2983 #if OMPT_SUPPORT && OMPT_OPTIONAL
2985 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
2987 codeptr = OMPT_GET_RETURN_ADDRESS(0);
2988 if (ompt_enabled.ompt_callback_mutex_acquire) {
2989 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
2990 ompt_mutex_lock, omp_lock_hint_none,
2991 __ompt_get_mutex_impl_type(user_lock),
2992 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
2995 #if KMP_USE_INLINED_TAS
2996 if (tag == locktag_tas && !__kmp_env_consistency_check) {
2997 KMP_TEST_TAS_LOCK(user_lock, gtid, rc);
2999 #elif KMP_USE_INLINED_FUTEX
3000 if (tag == locktag_futex && !__kmp_env_consistency_check) {
3001 KMP_TEST_FUTEX_LOCK(user_lock, gtid, rc);
3005 rc = __kmp_direct_test[tag]((kmp_dyna_lock_t *)user_lock, gtid);
3009 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3011 #if OMPT_SUPPORT && OMPT_OPTIONAL
3012 if (ompt_enabled.ompt_callback_mutex_acquired) {
3013 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3014 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3020 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3025 #else // KMP_USE_DYNAMIC_LOCK
3027 kmp_user_lock_p lck;
3030 if ((__kmp_user_lock_kind == lk_tas) &&
3031 (
sizeof(lck->tas.lk.poll) <= OMP_LOCK_T_SIZE)) {
3032 lck = (kmp_user_lock_p)user_lock;
3035 else if ((__kmp_user_lock_kind == lk_futex) &&
3036 (
sizeof(lck->futex.lk.poll) <= OMP_LOCK_T_SIZE)) {
3037 lck = (kmp_user_lock_p)user_lock;
3041 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_lock");
3045 __kmp_itt_lock_acquiring(lck);
3047 #if OMPT_SUPPORT && OMPT_OPTIONAL
3049 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3051 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3052 if (ompt_enabled.ompt_callback_mutex_acquire) {
3053 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3054 ompt_mutex_lock, omp_lock_hint_none, __ompt_get_mutex_impl_type(),
3055 (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3059 rc = TEST_LOCK(lck, gtid);
3062 __kmp_itt_lock_acquired(lck);
3064 __kmp_itt_lock_cancelled(lck);
3067 #if OMPT_SUPPORT && OMPT_OPTIONAL
3068 if (rc && ompt_enabled.ompt_callback_mutex_acquired) {
3069 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3070 ompt_mutex_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3074 return (rc ? FTN_TRUE : FTN_FALSE);
3078 #endif // KMP_USE_DYNAMIC_LOCK
3082 int __kmpc_test_nest_lock(
ident_t *loc, kmp_int32 gtid,
void **user_lock) {
3083 #if KMP_USE_DYNAMIC_LOCK
3086 __kmp_itt_lock_acquiring((kmp_user_lock_p)user_lock);
3088 #if OMPT_SUPPORT && OMPT_OPTIONAL
3090 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3092 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3093 if (ompt_enabled.ompt_callback_mutex_acquire) {
3094 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3095 ompt_mutex_nest_lock, omp_lock_hint_none,
3096 __ompt_get_mutex_impl_type(user_lock),
3097 (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3100 rc = KMP_D_LOCK_FUNC(user_lock, test)((kmp_dyna_lock_t *)user_lock, gtid);
3103 __kmp_itt_lock_acquired((kmp_user_lock_p)user_lock);
3105 __kmp_itt_lock_cancelled((kmp_user_lock_p)user_lock);
3108 #if OMPT_SUPPORT && OMPT_OPTIONAL
3109 if (ompt_enabled.enabled && rc) {
3111 if (ompt_enabled.ompt_callback_mutex_acquired) {
3113 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3114 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)user_lock,
3118 if (ompt_enabled.ompt_callback_nest_lock) {
3120 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3121 ompt_scope_begin, (ompt_wait_id_t)(uintptr_t)user_lock, codeptr);
3128 #else // KMP_USE_DYNAMIC_LOCK
3130 kmp_user_lock_p lck;
3133 if ((__kmp_user_lock_kind == lk_tas) &&
3134 (
sizeof(lck->tas.lk.poll) +
sizeof(lck->tas.lk.depth_locked) <=
3135 OMP_NEST_LOCK_T_SIZE)) {
3136 lck = (kmp_user_lock_p)user_lock;
3139 else if ((__kmp_user_lock_kind == lk_futex) &&
3140 (
sizeof(lck->futex.lk.poll) +
sizeof(lck->futex.lk.depth_locked) <=
3141 OMP_NEST_LOCK_T_SIZE)) {
3142 lck = (kmp_user_lock_p)user_lock;
3146 lck = __kmp_lookup_user_lock(user_lock,
"omp_test_nest_lock");
3150 __kmp_itt_lock_acquiring(lck);
3153 #if OMPT_SUPPORT && OMPT_OPTIONAL
3155 void *codeptr = OMPT_LOAD_RETURN_ADDRESS(gtid);
3157 codeptr = OMPT_GET_RETURN_ADDRESS(0);
3158 if (ompt_enabled.enabled) &&
3159 ompt_enabled.ompt_callback_mutex_acquire) {
3160 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquire)(
3161 ompt_mutex_nest_lock, omp_lock_hint_none,
3162 __ompt_get_mutex_impl_type(), (ompt_wait_id_t)(uintptr_t)lck,
3167 rc = TEST_NESTED_LOCK(lck, gtid);
3170 __kmp_itt_lock_acquired(lck);
3172 __kmp_itt_lock_cancelled(lck);
3175 #if OMPT_SUPPORT && OMPT_OPTIONAL
3176 if (ompt_enabled.enabled && rc) {
3178 if (ompt_enabled.ompt_callback_mutex_acquired) {
3180 ompt_callbacks.ompt_callback(ompt_callback_mutex_acquired)(
3181 ompt_mutex_nest_lock, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3184 if (ompt_enabled.ompt_callback_nest_lock) {
3186 ompt_callbacks.ompt_callback(ompt_callback_nest_lock)(
3187 ompt_mutex_scope_begin, (ompt_wait_id_t)(uintptr_t)lck, codeptr);
3196 #endif // KMP_USE_DYNAMIC_LOCK
3206 #define __KMP_SET_REDUCTION_METHOD(gtid, rmethod) \
3207 ((__kmp_threads[(gtid)]->th.th_local.packed_reduction_method) = (rmethod))
3209 #define __KMP_GET_REDUCTION_METHOD(gtid) \
3210 (__kmp_threads[(gtid)]->th.th_local.packed_reduction_method)
3216 static __forceinline
void
3217 __kmp_enter_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3218 kmp_critical_name *crit) {
3224 kmp_user_lock_p lck;
3226 #if KMP_USE_DYNAMIC_LOCK
3228 kmp_dyna_lock_t *lk = (kmp_dyna_lock_t *)crit;
3231 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3232 KMP_COMPARE_AND_STORE_ACQ32((
volatile kmp_int32 *)crit, 0,
3233 KMP_GET_D_TAG(__kmp_user_lock_seq));
3235 __kmp_init_indirect_csptr(crit, loc, global_tid,
3236 KMP_GET_I_TAG(__kmp_user_lock_seq));
3242 if (KMP_EXTRACT_D_TAG(lk) != 0) {
3243 lck = (kmp_user_lock_p)lk;
3244 KMP_DEBUG_ASSERT(lck != NULL);
3245 if (__kmp_env_consistency_check) {
3246 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3248 KMP_D_LOCK_FUNC(lk, set)(lk, global_tid);
3250 kmp_indirect_lock_t *ilk = *((kmp_indirect_lock_t **)lk);
3252 KMP_DEBUG_ASSERT(lck != NULL);
3253 if (__kmp_env_consistency_check) {
3254 __kmp_push_sync(global_tid, ct_critical, loc, lck, __kmp_user_lock_seq);
3256 KMP_I_LOCK_FUNC(ilk, set)(lck, global_tid);
3259 #else // KMP_USE_DYNAMIC_LOCK
3264 if (__kmp_base_user_lock_size <= INTEL_CRITICAL_SIZE) {
3265 lck = (kmp_user_lock_p)crit;
3267 lck = __kmp_get_critical_section_ptr(crit, loc, global_tid);
3269 KMP_DEBUG_ASSERT(lck != NULL);
3271 if (__kmp_env_consistency_check)
3272 __kmp_push_sync(global_tid, ct_critical, loc, lck);
3274 __kmp_acquire_user_lock_with_checks(lck, global_tid);
3276 #endif // KMP_USE_DYNAMIC_LOCK
3280 static __forceinline
void
3281 __kmp_end_critical_section_reduce_block(
ident_t *loc, kmp_int32 global_tid,
3282 kmp_critical_name *crit) {
3284 kmp_user_lock_p lck;
3286 #if KMP_USE_DYNAMIC_LOCK
3288 if (KMP_IS_D_LOCK(__kmp_user_lock_seq)) {
3289 lck = (kmp_user_lock_p)crit;
3290 if (__kmp_env_consistency_check)
3291 __kmp_pop_sync(global_tid, ct_critical, loc);
3292 KMP_D_LOCK_FUNC(lck, unset)((kmp_dyna_lock_t *)lck, global_tid);
3294 kmp_indirect_lock_t *ilk =
3295 (kmp_indirect_lock_t *)TCR_PTR(*((kmp_indirect_lock_t **)crit));
3296 if (__kmp_env_consistency_check)
3297 __kmp_pop_sync(global_tid, ct_critical, loc);
3298 KMP_I_LOCK_FUNC(ilk, unset)(ilk->lock, global_tid);
3301 #else // KMP_USE_DYNAMIC_LOCK
3306 if (__kmp_base_user_lock_size > 32) {
3307 lck = *((kmp_user_lock_p *)crit);
3308 KMP_ASSERT(lck != NULL);
3310 lck = (kmp_user_lock_p)crit;
3313 if (__kmp_env_consistency_check)
3314 __kmp_pop_sync(global_tid, ct_critical, loc);
3316 __kmp_release_user_lock_with_checks(lck, global_tid);
3318 #endif // KMP_USE_DYNAMIC_LOCK
3321 static __forceinline
int
3322 __kmp_swap_teams_for_teams_reduction(kmp_info_t *th, kmp_team_t **team_p,
3327 if (th->th.th_teams_microtask) {
3328 *team_p = team = th->th.th_team;
3329 if (team->t.t_level == th->th.th_teams_level) {
3331 KMP_DEBUG_ASSERT(!th->th.th_info.ds.ds_tid);
3333 th->th.th_info.ds.ds_tid = team->t.t_master_tid;
3334 th->th.th_team = team->t.t_parent;
3335 th->th.th_team_nproc = th->th.th_team->t.t_nproc;
3336 th->th.th_task_team = th->th.th_team->t.t_task_team[0];
3337 *task_state = th->th.th_task_state;
3338 th->th.th_task_state = 0;
3346 static __forceinline
void
3347 __kmp_restore_swapped_teams(kmp_info_t *th, kmp_team_t *team,
int task_state) {
3349 th->th.th_info.ds.ds_tid = 0;
3350 th->th.th_team = team;
3351 th->th.th_team_nproc = team->t.t_nproc;
3352 th->th.th_task_team = team->t.t_task_team[task_state];
3353 th->th.th_task_state = task_state;
3374 size_t reduce_size,
void *reduce_data,
3375 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3376 kmp_critical_name *lck) {
3380 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3383 int teams_swapped = 0, task_state;
3384 KA_TRACE(10, (
"__kmpc_reduce_nowait() enter: called T#%d\n", global_tid));
3392 if (!TCR_4(__kmp_init_parallel))
3393 __kmp_parallel_initialize();
3395 __kmp_resume_if_soft_paused();
3398 #if KMP_USE_DYNAMIC_LOCK
3399 if (__kmp_env_consistency_check)
3400 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3402 if (__kmp_env_consistency_check)
3403 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3406 th = __kmp_thread_from_gtid(global_tid);
3407 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3425 packed_reduction_method = __kmp_determine_reduction_method(
3426 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3427 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3429 OMPT_REDUCTION_DECL(th, global_tid);
3430 if (packed_reduction_method == critical_reduce_block) {
3432 OMPT_REDUCTION_BEGIN;
3434 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3437 }
else if (packed_reduction_method == empty_reduce_block) {
3439 OMPT_REDUCTION_BEGIN;
3445 }
else if (packed_reduction_method == atomic_reduce_block) {
3455 if (__kmp_env_consistency_check)
3456 __kmp_pop_sync(global_tid, ct_reduce, loc);
3458 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3459 tree_reduce_block)) {
3479 ompt_frame_t *ompt_frame;
3480 if (ompt_enabled.enabled) {
3481 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3482 if (ompt_frame->enter_frame.ptr == NULL)
3483 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3484 OMPT_STORE_RETURN_ADDRESS(global_tid);
3488 __kmp_threads[global_tid]->th.th_ident = loc;
3491 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3492 global_tid, FALSE, reduce_size, reduce_data, reduce_func);
3493 retval = (retval != 0) ? (0) : (1);
3494 #if OMPT_SUPPORT && OMPT_OPTIONAL
3495 if (ompt_enabled.enabled) {
3496 ompt_frame->enter_frame = ompt_data_none;
3502 if (__kmp_env_consistency_check) {
3504 __kmp_pop_sync(global_tid, ct_reduce, loc);
3513 if (teams_swapped) {
3514 __kmp_restore_swapped_teams(th, team, task_state);
3518 (
"__kmpc_reduce_nowait() exit: called T#%d: method %08x, returns %08x\n",
3519 global_tid, packed_reduction_method, retval));
3533 kmp_critical_name *lck) {
3535 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3537 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() enter: called T#%d\n", global_tid));
3539 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3541 OMPT_REDUCTION_DECL(__kmp_thread_from_gtid(global_tid), global_tid);
3543 if (packed_reduction_method == critical_reduce_block) {
3545 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3548 }
else if (packed_reduction_method == empty_reduce_block) {
3555 }
else if (packed_reduction_method == atomic_reduce_block) {
3562 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3563 tree_reduce_block)) {
3574 if (__kmp_env_consistency_check)
3575 __kmp_pop_sync(global_tid, ct_reduce, loc);
3577 KA_TRACE(10, (
"__kmpc_end_reduce_nowait() exit: called T#%d: method %08x\n",
3578 global_tid, packed_reduction_method));
3601 size_t reduce_size,
void *reduce_data,
3602 void (*reduce_func)(
void *lhs_data,
void *rhs_data),
3603 kmp_critical_name *lck) {
3606 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3609 int teams_swapped = 0, task_state;
3611 KA_TRACE(10, (
"__kmpc_reduce() enter: called T#%d\n", global_tid));
3619 if (!TCR_4(__kmp_init_parallel))
3620 __kmp_parallel_initialize();
3622 __kmp_resume_if_soft_paused();
3625 #if KMP_USE_DYNAMIC_LOCK
3626 if (__kmp_env_consistency_check)
3627 __kmp_push_sync(global_tid, ct_reduce, loc, NULL, 0);
3629 if (__kmp_env_consistency_check)
3630 __kmp_push_sync(global_tid, ct_reduce, loc, NULL);
3633 th = __kmp_thread_from_gtid(global_tid);
3634 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3636 packed_reduction_method = __kmp_determine_reduction_method(
3637 loc, global_tid, num_vars, reduce_size, reduce_data, reduce_func, lck);
3638 __KMP_SET_REDUCTION_METHOD(global_tid, packed_reduction_method);
3640 OMPT_REDUCTION_DECL(th, global_tid);
3642 if (packed_reduction_method == critical_reduce_block) {
3644 OMPT_REDUCTION_BEGIN;
3645 __kmp_enter_critical_section_reduce_block(loc, global_tid, lck);
3648 }
else if (packed_reduction_method == empty_reduce_block) {
3650 OMPT_REDUCTION_BEGIN;
3655 }
else if (packed_reduction_method == atomic_reduce_block) {
3659 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3660 tree_reduce_block)) {
3666 ompt_frame_t *ompt_frame;
3667 if (ompt_enabled.enabled) {
3668 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3669 if (ompt_frame->enter_frame.ptr == NULL)
3670 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3671 OMPT_STORE_RETURN_ADDRESS(global_tid);
3675 __kmp_threads[global_tid]->th.th_ident =
3679 __kmp_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3680 global_tid, TRUE, reduce_size, reduce_data, reduce_func);
3681 retval = (retval != 0) ? (0) : (1);
3682 #if OMPT_SUPPORT && OMPT_OPTIONAL
3683 if (ompt_enabled.enabled) {
3684 ompt_frame->enter_frame = ompt_data_none;
3690 if (__kmp_env_consistency_check) {
3692 __kmp_pop_sync(global_tid, ct_reduce, loc);
3701 if (teams_swapped) {
3702 __kmp_restore_swapped_teams(th, team, task_state);
3706 (
"__kmpc_reduce() exit: called T#%d: method %08x, returns %08x\n",
3707 global_tid, packed_reduction_method, retval));
3722 kmp_critical_name *lck) {
3724 PACKED_REDUCTION_METHOD_T packed_reduction_method;
3727 int teams_swapped = 0, task_state;
3729 KA_TRACE(10, (
"__kmpc_end_reduce() enter: called T#%d\n", global_tid));
3731 th = __kmp_thread_from_gtid(global_tid);
3732 teams_swapped = __kmp_swap_teams_for_teams_reduction(th, &team, &task_state);
3734 packed_reduction_method = __KMP_GET_REDUCTION_METHOD(global_tid);
3738 OMPT_REDUCTION_DECL(th, global_tid);
3740 if (packed_reduction_method == critical_reduce_block) {
3741 __kmp_end_critical_section_reduce_block(loc, global_tid, lck);
3747 ompt_frame_t *ompt_frame;
3748 if (ompt_enabled.enabled) {
3749 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3750 if (ompt_frame->enter_frame.ptr == NULL)
3751 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3752 OMPT_STORE_RETURN_ADDRESS(global_tid);
3756 __kmp_threads[global_tid]->th.th_ident = loc;
3758 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3759 #if OMPT_SUPPORT && OMPT_OPTIONAL
3760 if (ompt_enabled.enabled) {
3761 ompt_frame->enter_frame = ompt_data_none;
3765 }
else if (packed_reduction_method == empty_reduce_block) {
3773 ompt_frame_t *ompt_frame;
3774 if (ompt_enabled.enabled) {
3775 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3776 if (ompt_frame->enter_frame.ptr == NULL)
3777 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3778 OMPT_STORE_RETURN_ADDRESS(global_tid);
3782 __kmp_threads[global_tid]->th.th_ident = loc;
3784 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3785 #if OMPT_SUPPORT && OMPT_OPTIONAL
3786 if (ompt_enabled.enabled) {
3787 ompt_frame->enter_frame = ompt_data_none;
3791 }
else if (packed_reduction_method == atomic_reduce_block) {
3794 ompt_frame_t *ompt_frame;
3795 if (ompt_enabled.enabled) {
3796 __ompt_get_task_info_internal(0, NULL, NULL, &ompt_frame, NULL, NULL);
3797 if (ompt_frame->enter_frame.ptr == NULL)
3798 ompt_frame->enter_frame.ptr = OMPT_GET_FRAME_ADDRESS(0);
3799 OMPT_STORE_RETURN_ADDRESS(global_tid);
3804 __kmp_threads[global_tid]->th.th_ident = loc;
3806 __kmp_barrier(bs_plain_barrier, global_tid, FALSE, 0, NULL, NULL);
3807 #if OMPT_SUPPORT && OMPT_OPTIONAL
3808 if (ompt_enabled.enabled) {
3809 ompt_frame->enter_frame = ompt_data_none;
3813 }
else if (TEST_REDUCTION_METHOD(packed_reduction_method,
3814 tree_reduce_block)) {
3817 __kmp_end_split_barrier(UNPACK_REDUCTION_BARRIER(packed_reduction_method),
3825 if (teams_swapped) {
3826 __kmp_restore_swapped_teams(th, team, task_state);
3829 if (__kmp_env_consistency_check)
3830 __kmp_pop_sync(global_tid, ct_reduce, loc);
3832 KA_TRACE(10, (
"__kmpc_end_reduce() exit: called T#%d: method %08x\n",
3833 global_tid, packed_reduction_method));
3838 #undef __KMP_GET_REDUCTION_METHOD
3839 #undef __KMP_SET_REDUCTION_METHOD
3843 kmp_uint64 __kmpc_get_taskid() {
3848 gtid = __kmp_get_gtid();
3852 thread = __kmp_thread_from_gtid(gtid);
3853 return thread->th.th_current_task->td_task_id;
3857 kmp_uint64 __kmpc_get_parent_taskid() {
3861 kmp_taskdata_t *parent_task;
3863 gtid = __kmp_get_gtid();
3867 thread = __kmp_thread_from_gtid(gtid);
3868 parent_task = thread->th.th_current_task->td_parent;
3869 return (parent_task == NULL ? 0 : parent_task->td_task_id);
3885 const struct kmp_dim *dims) {
3887 kmp_int64 last, trace_count;
3888 kmp_info_t *th = __kmp_threads[gtid];
3889 kmp_team_t *team = th->th.th_team;
3891 kmp_disp_t *pr_buf = th->th.th_dispatch;
3892 dispatch_shared_info_t *sh_buf;
3896 (
"__kmpc_doacross_init() enter: called T#%d, num dims %d, active %d\n",
3897 gtid, num_dims, !team->t.t_serialized));
3898 KMP_DEBUG_ASSERT(dims != NULL);
3899 KMP_DEBUG_ASSERT(num_dims > 0);
3901 if (team->t.t_serialized) {
3902 KA_TRACE(20, (
"__kmpc_doacross_init() exit: serialized team\n"));
3905 KMP_DEBUG_ASSERT(team->t.t_nproc > 1);
3906 idx = pr_buf->th_doacross_buf_idx++;
3908 sh_buf = &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
3911 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info == NULL);
3912 pr_buf->th_doacross_info = (kmp_int64 *)__kmp_thread_malloc(
3913 th,
sizeof(kmp_int64) * (4 * num_dims + 1));
3914 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
3915 pr_buf->th_doacross_info[0] =
3916 (kmp_int64)num_dims;
3919 pr_buf->th_doacross_info[1] = (kmp_int64)&sh_buf->doacross_num_done;
3920 pr_buf->th_doacross_info[2] = dims[0].lo;
3921 pr_buf->th_doacross_info[3] = dims[0].up;
3922 pr_buf->th_doacross_info[4] = dims[0].st;
3924 for (j = 1; j < num_dims; ++j) {
3927 if (dims[j].st == 1) {
3929 range_length = dims[j].up - dims[j].lo + 1;
3931 if (dims[j].st > 0) {
3932 KMP_DEBUG_ASSERT(dims[j].up > dims[j].lo);
3933 range_length = (kmp_uint64)(dims[j].up - dims[j].lo) / dims[j].st + 1;
3935 KMP_DEBUG_ASSERT(dims[j].lo > dims[j].up);
3937 (kmp_uint64)(dims[j].lo - dims[j].up) / (-dims[j].st) + 1;
3940 pr_buf->th_doacross_info[last++] = range_length;
3941 pr_buf->th_doacross_info[last++] = dims[j].lo;
3942 pr_buf->th_doacross_info[last++] = dims[j].up;
3943 pr_buf->th_doacross_info[last++] = dims[j].st;
3948 if (dims[0].st == 1) {
3949 trace_count = dims[0].up - dims[0].lo + 1;
3950 }
else if (dims[0].st > 0) {
3951 KMP_DEBUG_ASSERT(dims[0].up > dims[0].lo);
3952 trace_count = (kmp_uint64)(dims[0].up - dims[0].lo) / dims[0].st + 1;
3954 KMP_DEBUG_ASSERT(dims[0].lo > dims[0].up);
3955 trace_count = (kmp_uint64)(dims[0].lo - dims[0].up) / (-dims[0].st) + 1;
3957 for (j = 1; j < num_dims; ++j) {
3958 trace_count *= pr_buf->th_doacross_info[4 * j + 1];
3960 KMP_DEBUG_ASSERT(trace_count > 0);
3964 if (idx != sh_buf->doacross_buf_idx) {
3966 __kmp_wait_4((
volatile kmp_uint32 *)&sh_buf->doacross_buf_idx, idx,
3973 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET32(
3974 (
volatile kmp_int32 *)&sh_buf->doacross_flags, NULL, 1);
3976 flags = (kmp_uint32 *)KMP_COMPARE_AND_STORE_RET64(
3977 (
volatile kmp_int64 *)&sh_buf->doacross_flags, NULL, 1LL);
3979 if (flags == NULL) {
3981 size_t size = trace_count / 8 + 8;
3982 flags = (kmp_uint32 *)__kmp_thread_calloc(th, size, 1);
3984 sh_buf->doacross_flags = flags;
3985 }
else if (flags == (kmp_uint32 *)1) {
3988 while (*(
volatile kmp_int32 *)&sh_buf->doacross_flags == 1)
3990 while (*(
volatile kmp_int64 *)&sh_buf->doacross_flags == 1LL)
3997 KMP_DEBUG_ASSERT(sh_buf->doacross_flags > (kmp_uint32 *)1);
3998 pr_buf->th_doacross_flags =
3999 sh_buf->doacross_flags;
4001 KA_TRACE(20, (
"__kmpc_doacross_init() exit: T#%d\n", gtid));
4004 void __kmpc_doacross_wait(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4005 kmp_int32 shft, num_dims, i;
4007 kmp_int64 iter_number;
4008 kmp_info_t *th = __kmp_threads[gtid];
4009 kmp_team_t *team = th->th.th_team;
4011 kmp_int64 lo, up, st;
4013 KA_TRACE(20, (
"__kmpc_doacross_wait() enter: called T#%d\n", gtid));
4014 if (team->t.t_serialized) {
4015 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: serialized team\n"));
4020 pr_buf = th->th.th_dispatch;
4021 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4022 num_dims = pr_buf->th_doacross_info[0];
4023 lo = pr_buf->th_doacross_info[2];
4024 up = pr_buf->th_doacross_info[3];
4025 st = pr_buf->th_doacross_info[4];
4027 if (vec[0] < lo || vec[0] > up) {
4028 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4029 "bounds [%lld,%lld]\n",
4030 gtid, vec[0], lo, up));
4033 iter_number = vec[0] - lo;
4034 }
else if (st > 0) {
4035 if (vec[0] < lo || vec[0] > up) {
4036 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4037 "bounds [%lld,%lld]\n",
4038 gtid, vec[0], lo, up));
4041 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4043 if (vec[0] > lo || vec[0] < up) {
4044 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4045 "bounds [%lld,%lld]\n",
4046 gtid, vec[0], lo, up));
4049 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4051 for (i = 1; i < num_dims; ++i) {
4053 kmp_int32 j = i * 4;
4054 ln = pr_buf->th_doacross_info[j + 1];
4055 lo = pr_buf->th_doacross_info[j + 2];
4056 up = pr_buf->th_doacross_info[j + 3];
4057 st = pr_buf->th_doacross_info[j + 4];
4059 if (vec[i] < lo || vec[i] > up) {
4060 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4061 "bounds [%lld,%lld]\n",
4062 gtid, vec[i], lo, up));
4066 }
else if (st > 0) {
4067 if (vec[i] < lo || vec[i] > up) {
4068 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4069 "bounds [%lld,%lld]\n",
4070 gtid, vec[i], lo, up));
4073 iter = (kmp_uint64)(vec[i] - lo) / st;
4075 if (vec[i] > lo || vec[i] < up) {
4076 KA_TRACE(20, (
"__kmpc_doacross_wait() exit: T#%d iter %lld is out of "
4077 "bounds [%lld,%lld]\n",
4078 gtid, vec[i], lo, up));
4081 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4083 iter_number = iter + ln * iter_number;
4085 shft = iter_number % 32;
4088 while ((flag & pr_buf->th_doacross_flags[iter_number]) == 0) {
4093 (
"__kmpc_doacross_wait() exit: T#%d wait for iter %lld completed\n",
4094 gtid, (iter_number << 5) + shft));
4097 void __kmpc_doacross_post(
ident_t *loc,
int gtid,
const kmp_int64 *vec) {
4098 kmp_int32 shft, num_dims, i;
4100 kmp_int64 iter_number;
4101 kmp_info_t *th = __kmp_threads[gtid];
4102 kmp_team_t *team = th->th.th_team;
4106 KA_TRACE(20, (
"__kmpc_doacross_post() enter: called T#%d\n", gtid));
4107 if (team->t.t_serialized) {
4108 KA_TRACE(20, (
"__kmpc_doacross_post() exit: serialized team\n"));
4114 pr_buf = th->th.th_dispatch;
4115 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info != NULL);
4116 num_dims = pr_buf->th_doacross_info[0];
4117 lo = pr_buf->th_doacross_info[2];
4118 st = pr_buf->th_doacross_info[4];
4120 iter_number = vec[0] - lo;
4121 }
else if (st > 0) {
4122 iter_number = (kmp_uint64)(vec[0] - lo) / st;
4124 iter_number = (kmp_uint64)(lo - vec[0]) / (-st);
4126 for (i = 1; i < num_dims; ++i) {
4128 kmp_int32 j = i * 4;
4129 ln = pr_buf->th_doacross_info[j + 1];
4130 lo = pr_buf->th_doacross_info[j + 2];
4131 st = pr_buf->th_doacross_info[j + 4];
4134 }
else if (st > 0) {
4135 iter = (kmp_uint64)(vec[i] - lo) / st;
4137 iter = (kmp_uint64)(lo - vec[i]) / (-st);
4139 iter_number = iter + ln * iter_number;
4141 shft = iter_number % 32;
4145 if ((flag & pr_buf->th_doacross_flags[iter_number]) == 0)
4146 KMP_TEST_THEN_OR32(&pr_buf->th_doacross_flags[iter_number], flag);
4147 KA_TRACE(20, (
"__kmpc_doacross_post() exit: T#%d iter %lld posted\n", gtid,
4148 (iter_number << 5) + shft));
4151 void __kmpc_doacross_fini(
ident_t *loc,
int gtid) {
4153 kmp_info_t *th = __kmp_threads[gtid];
4154 kmp_team_t *team = th->th.th_team;
4155 kmp_disp_t *pr_buf = th->th.th_dispatch;
4157 KA_TRACE(20, (
"__kmpc_doacross_fini() enter: called T#%d\n", gtid));
4158 if (team->t.t_serialized) {
4159 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: serialized team %p\n", team));
4162 num_done = KMP_TEST_THEN_INC32((kmp_int32 *)pr_buf->th_doacross_info[1]) + 1;
4163 if (num_done == th->th.th_team_nproc) {
4165 int idx = pr_buf->th_doacross_buf_idx - 1;
4166 dispatch_shared_info_t *sh_buf =
4167 &team->t.t_disp_buffer[idx % __kmp_dispatch_num_buffers];
4168 KMP_DEBUG_ASSERT(pr_buf->th_doacross_info[1] ==
4169 (kmp_int64)&sh_buf->doacross_num_done);
4170 KMP_DEBUG_ASSERT(num_done == sh_buf->doacross_num_done);
4171 KMP_DEBUG_ASSERT(idx == sh_buf->doacross_buf_idx);
4172 __kmp_thread_free(th, CCAST(kmp_uint32 *, sh_buf->doacross_flags));
4173 sh_buf->doacross_flags = NULL;
4174 sh_buf->doacross_num_done = 0;
4175 sh_buf->doacross_buf_idx +=
4176 __kmp_dispatch_num_buffers;
4179 pr_buf->th_doacross_flags = NULL;
4180 __kmp_thread_free(th, (
void *)pr_buf->th_doacross_info);
4181 pr_buf->th_doacross_info = NULL;
4182 KA_TRACE(20, (
"__kmpc_doacross_fini() exit: T#%d\n", gtid));
4186 void *omp_alloc(
size_t size, omp_allocator_handle_t allocator) {
4187 return __kmpc_alloc(__kmp_entry_gtid(), size, allocator);
4190 void omp_free(
void *ptr, omp_allocator_handle_t allocator) {
4191 __kmpc_free(__kmp_entry_gtid(), ptr, allocator);
4194 int __kmpc_get_target_offload(
void) {
4195 if (!__kmp_init_serial) {
4196 __kmp_serial_initialize();
4198 return __kmp_target_offload;
4201 int __kmpc_pause_resource(kmp_pause_status_t level) {
4202 if (!__kmp_init_serial) {
4205 return __kmp_pause_resource(level);