13 #ifndef KMP_DISPATCH_H
14 #define KMP_DISPATCH_H
20 #include "kmp_error.h"
23 #include "kmp_stats.h"
25 #if KMP_OS_WINDOWS && KMP_ARCH_X86
30 #include "ompt-internal.h"
31 #include "ompt-specific.h"
36 #if KMP_USE_HIER_SCHED
38 template <
typename T>
struct kmp_hier_t;
39 template <
typename T>
struct kmp_hier_top_unit_t;
40 #endif // KMP_USE_HIER_SCHED
42 template <
typename T>
struct dispatch_shared_info_template;
43 template <
typename T>
struct dispatch_private_info_template;
46 extern void __kmp_dispatch_init_algorithm(
ident_t *loc,
int gtid,
47 dispatch_private_info_template<T> *pr,
49 typename traits_t<T>::signed_t st,
51 kmp_uint64 *cur_chunk,
53 typename traits_t<T>::signed_t chunk,
56 extern int __kmp_dispatch_next_algorithm(
57 int gtid, dispatch_private_info_template<T> *pr,
58 dispatch_shared_info_template<T>
volatile *sh, kmp_int32 *p_last, T *p_lb,
59 T *p_ub,
typename traits_t<T>::signed_t *p_st, T nproc, T unit_id);
61 void __kmp_dispatch_dxo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref);
62 void __kmp_dispatch_deo_error(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref);
64 #if KMP_STATIC_STEAL_ENABLED
68 template <
typename T>
struct dispatch_private_infoXX_template {
69 typedef typename traits_t<T>::unsigned_t UT;
70 typedef typename traits_t<T>::signed_t ST;
77 T static_steal_counter;
87 struct KMP_ALIGN(32) {
105 template <
typename T>
struct dispatch_private_infoXX_template {
106 typedef typename traits_t<T>::unsigned_t UT;
107 typedef typename traits_t<T>::signed_t ST;
128 template <
typename T>
struct KMP_ALIGN_CACHE dispatch_private_info_template {
131 union KMP_ALIGN_CACHE private_info_tmpl {
132 dispatch_private_infoXX_template<T> p;
133 dispatch_private_info64_t p64;
136 kmp_sched_flags_t flags;
137 kmp_uint32 ordered_bumped;
139 kmp_int32 ordered_dummy[KMP_MAX_ORDERED - 3];
140 dispatch_private_info *next;
141 kmp_uint32 type_size;
142 #if KMP_USE_HIER_SCHED
144 kmp_hier_top_unit_t<T> *hier_parent;
146 kmp_int32 get_hier_id()
const {
return hier_id; }
147 kmp_hier_top_unit_t<T> *get_parent() {
return hier_parent; }
149 enum cons_type pushed_ws;
154 template <
typename T>
struct dispatch_shared_infoXX_template {
155 typedef typename traits_t<T>::unsigned_t UT;
158 volatile UT iteration;
159 volatile UT num_done;
160 volatile UT ordered_iteration;
162 UT ordered_dummy[KMP_MAX_ORDERED - 3];
166 template <
typename T>
struct dispatch_shared_info_template {
167 typedef typename traits_t<T>::unsigned_t UT;
169 union shared_info_tmpl {
170 dispatch_shared_infoXX_template<UT> s;
171 dispatch_shared_info64_t s64;
173 volatile kmp_uint32 buffer_index;
174 volatile kmp_int32 doacross_buf_idx;
175 kmp_uint32 *doacross_flags;
176 kmp_int32 doacross_num_done;
177 #if KMP_USE_HIER_SCHED
191 #undef USE_TEST_LOCKS
194 template <
typename T>
static __forceinline T test_then_add(
volatile T *p, T d);
197 __forceinline kmp_int32 test_then_add<kmp_int32>(
volatile kmp_int32 *p,
200 r = KMP_TEST_THEN_ADD32(p, d);
205 __forceinline kmp_int64 test_then_add<kmp_int64>(
volatile kmp_int64 *p,
208 r = KMP_TEST_THEN_ADD64(p, d);
213 template <
typename T>
static __forceinline T test_then_inc_acq(
volatile T *p);
216 __forceinline kmp_int32 test_then_inc_acq<kmp_int32>(
volatile kmp_int32 *p) {
218 r = KMP_TEST_THEN_INC_ACQ32(p);
223 __forceinline kmp_int64 test_then_inc_acq<kmp_int64>(
volatile kmp_int64 *p) {
225 r = KMP_TEST_THEN_INC_ACQ64(p);
230 template <
typename T>
static __forceinline T test_then_inc(
volatile T *p);
233 __forceinline kmp_int32 test_then_inc<kmp_int32>(
volatile kmp_int32 *p) {
235 r = KMP_TEST_THEN_INC32(p);
240 __forceinline kmp_int64 test_then_inc<kmp_int64>(
volatile kmp_int64 *p) {
242 r = KMP_TEST_THEN_INC64(p);
247 template <
typename T>
248 static __forceinline kmp_int32 compare_and_swap(
volatile T *p, T c, T s);
251 __forceinline kmp_int32 compare_and_swap<kmp_int32>(
volatile kmp_int32 *p,
252 kmp_int32 c, kmp_int32 s) {
253 return KMP_COMPARE_AND_STORE_REL32(p, c, s);
257 __forceinline kmp_int32 compare_and_swap<kmp_int64>(
volatile kmp_int64 *p,
258 kmp_int64 c, kmp_int64 s) {
259 return KMP_COMPARE_AND_STORE_REL64(p, c, s);
262 template <
typename T> kmp_uint32 __kmp_ge(T value, T checker) {
263 return value >= checker;
265 template <
typename T> kmp_uint32 __kmp_eq(T value, T checker) {
266 return value == checker;
288 template <
typename UT>
289 static UT __kmp_wait(
volatile UT *spinner, UT checker,
290 kmp_uint32 (*pred)(UT, UT) USE_ITT_BUILD_ARG(
void *obj)) {
292 volatile UT *spin = spinner;
295 kmp_uint32 (*f)(UT, UT) = pred;
298 KMP_FSYNC_SPIN_INIT(obj, CCAST(UT *, spin));
299 KMP_INIT_YIELD(spins);
301 while (!f(r = *spin, check)) {
302 KMP_FSYNC_SPIN_PREPARE(obj);
309 KMP_YIELD_OVERSUB_ELSE_SPIN(spins);
311 KMP_FSYNC_SPIN_ACQUIRED(obj);
318 template <
typename UT>
319 void __kmp_dispatch_deo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
320 dispatch_private_info_template<UT> *pr;
322 int gtid = *gtid_ref;
324 kmp_info_t *th = __kmp_threads[gtid];
325 KMP_DEBUG_ASSERT(th->th.th_dispatch);
327 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d called\n", gtid));
328 if (__kmp_env_consistency_check) {
329 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
330 th->th.th_dispatch->th_dispatch_pr_current);
331 if (pr->pushed_ws != ct_none) {
332 #if KMP_USE_DYNAMIC_LOCK
333 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL, 0);
335 __kmp_push_sync(gtid, ct_ordered_in_pdo, loc_ref, NULL);
340 if (!th->th.th_team->t.t_serialized) {
341 dispatch_shared_info_template<UT> *sh =
342 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
343 th->th.th_dispatch->th_dispatch_sh_current);
346 if (!__kmp_env_consistency_check) {
347 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
348 th->th.th_dispatch->th_dispatch_pr_current);
350 lower = pr->u.p.ordered_lower;
352 #if !defined(KMP_GOMP_COMPAT)
353 if (__kmp_env_consistency_check) {
354 if (pr->ordered_bumped) {
355 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
356 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
357 ct_ordered_in_pdo, loc_ref,
358 &p->stack_data[p->w_top]);
368 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d before wait: "
369 "ordered_iter:%%%s lower:%%%s\n",
370 traits_t<UT>::spec, traits_t<UT>::spec);
371 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
372 __kmp_str_free(&buff);
375 __kmp_wait<UT>(&sh->u.s.ordered_iteration, lower,
376 __kmp_ge<UT> USE_ITT_BUILD_ARG(NULL));
382 buff = __kmp_str_format(
"__kmp_dispatch_deo: T#%%d after wait: "
383 "ordered_iter:%%%s lower:%%%s\n",
384 traits_t<UT>::spec, traits_t<UT>::spec);
385 KD_TRACE(1000, (buff, gtid, sh->u.s.ordered_iteration, lower));
386 __kmp_str_free(&buff);
390 KD_TRACE(100, (
"__kmp_dispatch_deo: T#%d returned\n", gtid));
393 template <
typename UT>
394 void __kmp_dispatch_dxo(
int *gtid_ref,
int *cid_ref,
ident_t *loc_ref) {
395 typedef typename traits_t<UT>::signed_t ST;
396 dispatch_private_info_template<UT> *pr;
398 int gtid = *gtid_ref;
400 kmp_info_t *th = __kmp_threads[gtid];
401 KMP_DEBUG_ASSERT(th->th.th_dispatch);
403 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d called\n", gtid));
404 if (__kmp_env_consistency_check) {
405 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
406 th->th.th_dispatch->th_dispatch_pr_current);
407 if (pr->pushed_ws != ct_none) {
408 __kmp_pop_sync(gtid, ct_ordered_in_pdo, loc_ref);
412 if (!th->th.th_team->t.t_serialized) {
413 dispatch_shared_info_template<UT> *sh =
414 reinterpret_cast<dispatch_shared_info_template<UT> *
>(
415 th->th.th_dispatch->th_dispatch_sh_current);
417 if (!__kmp_env_consistency_check) {
418 pr =
reinterpret_cast<dispatch_private_info_template<UT> *
>(
419 th->th.th_dispatch->th_dispatch_pr_current);
422 KMP_FSYNC_RELEASING(CCAST(UT *, &sh->u.s.ordered_iteration));
423 #if !defined(KMP_GOMP_COMPAT)
424 if (__kmp_env_consistency_check) {
425 if (pr->ordered_bumped != 0) {
426 struct cons_header *p = __kmp_threads[gtid]->th.th_cons;
428 __kmp_error_construct2(kmp_i18n_msg_CnsMultipleNesting,
429 ct_ordered_in_pdo, loc_ref,
430 &p->stack_data[p->w_top]);
437 pr->ordered_bumped += 1;
440 (
"__kmp_dispatch_dxo: T#%d bumping ordered ordered_bumped=%d\n",
441 gtid, pr->ordered_bumped));
446 test_then_inc<ST>((
volatile ST *)&sh->u.s.ordered_iteration);
450 KD_TRACE(100, (
"__kmp_dispatch_dxo: T#%d returned\n", gtid));
455 template <
typename UT>
456 static __forceinline
long double __kmp_pow(
long double x, UT y) {
457 long double s = 1.0L;
459 KMP_DEBUG_ASSERT(x > 0.0 && x < 1.0);
478 template <
typename T>
479 static __inline
typename traits_t<T>::unsigned_t
480 __kmp_dispatch_guided_remaining(T tc,
typename traits_t<T>::floating_t base,
481 typename traits_t<T>::unsigned_t idx) {
489 typedef typename traits_t<T>::unsigned_t UT;
491 long double x = tc * __kmp_pow<UT>(base, idx);
504 static const int guided_int_param = 2;
505 static const double guided_flt_param = 0.5;
506 #endif // KMP_DISPATCH_H