LLVM OpenMP* Runtime Library
z_Linux_util.cpp
1 /*
2  * z_Linux_util.cpp -- platform specific routines.
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #include "kmp.h"
14 #include "kmp_affinity.h"
15 #include "kmp_i18n.h"
16 #include "kmp_io.h"
17 #include "kmp_itt.h"
18 #include "kmp_lock.h"
19 #include "kmp_stats.h"
20 #include "kmp_str.h"
21 #include "kmp_wait_release.h"
22 #include "kmp_wrapper_getpid.h"
23 
24 #if !KMP_OS_DRAGONFLY && !KMP_OS_FREEBSD && !KMP_OS_NETBSD && !KMP_OS_OPENBSD
25 #include <alloca.h>
26 #endif
27 #include <math.h> // HUGE_VAL.
28 #include <sys/resource.h>
29 #include <sys/syscall.h>
30 #include <sys/time.h>
31 #include <sys/times.h>
32 #include <unistd.h>
33 
34 #if KMP_OS_LINUX && !KMP_OS_CNK
35 #include <sys/sysinfo.h>
36 #if KMP_USE_FUTEX
37 // We should really include <futex.h>, but that causes compatibility problems on
38 // different Linux* OS distributions that either require that you include (or
39 // break when you try to include) <pci/types.h>. Since all we need is the two
40 // macros below (which are part of the kernel ABI, so can't change) we just
41 // define the constants here and don't include <futex.h>
42 #ifndef FUTEX_WAIT
43 #define FUTEX_WAIT 0
44 #endif
45 #ifndef FUTEX_WAKE
46 #define FUTEX_WAKE 1
47 #endif
48 #endif
49 #elif KMP_OS_DARWIN
50 #include <mach/mach.h>
51 #include <sys/sysctl.h>
52 #elif KMP_OS_DRAGONFLY || KMP_OS_FREEBSD
53 #include <sys/types.h>
54 #include <sys/sysctl.h>
55 #include <sys/user.h>
56 #include <pthread_np.h>
57 #elif KMP_OS_NETBSD || KMP_OS_OPENBSD
58 #include <sys/types.h>
59 #include <sys/sysctl.h>
60 #endif
61 
62 #include <ctype.h>
63 #include <dirent.h>
64 #include <fcntl.h>
65 
66 #include "tsan_annotations.h"
67 
68 struct kmp_sys_timer {
69  struct timespec start;
70 };
71 
72 // Convert timespec to nanoseconds.
73 #define TS2NS(timespec) (((timespec).tv_sec * 1e9) + (timespec).tv_nsec)
74 
75 static struct kmp_sys_timer __kmp_sys_timer_data;
76 
77 #if KMP_HANDLE_SIGNALS
78 typedef void (*sig_func_t)(int);
79 STATIC_EFI2_WORKAROUND struct sigaction __kmp_sighldrs[NSIG];
80 static sigset_t __kmp_sigset;
81 #endif
82 
83 static int __kmp_init_runtime = FALSE;
84 
85 static int __kmp_fork_count = 0;
86 
87 static pthread_condattr_t __kmp_suspend_cond_attr;
88 static pthread_mutexattr_t __kmp_suspend_mutex_attr;
89 
90 static kmp_cond_align_t __kmp_wait_cv;
91 static kmp_mutex_align_t __kmp_wait_mx;
92 
93 kmp_uint64 __kmp_ticks_per_msec = 1000000;
94 
95 #ifdef DEBUG_SUSPEND
96 static void __kmp_print_cond(char *buffer, kmp_cond_align_t *cond) {
97  KMP_SNPRINTF(buffer, 128, "(cond (lock (%ld, %d)), (descr (%p)))",
98  cond->c_cond.__c_lock.__status, cond->c_cond.__c_lock.__spinlock,
99  cond->c_cond.__c_waiting);
100 }
101 #endif
102 
103 #if ((KMP_OS_LINUX || KMP_OS_FREEBSD) && KMP_AFFINITY_SUPPORTED)
104 
105 /* Affinity support */
106 
107 void __kmp_affinity_bind_thread(int which) {
108  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
109  "Illegal set affinity operation when not capable");
110 
111  kmp_affin_mask_t *mask;
112  KMP_CPU_ALLOC_ON_STACK(mask);
113  KMP_CPU_ZERO(mask);
114  KMP_CPU_SET(which, mask);
115  __kmp_set_system_affinity(mask, TRUE);
116  KMP_CPU_FREE_FROM_STACK(mask);
117 }
118 
119 /* Determine if we can access affinity functionality on this version of
120  * Linux* OS by checking __NR_sched_{get,set}affinity system calls, and set
121  * __kmp_affin_mask_size to the appropriate value (0 means not capable). */
122 void __kmp_affinity_determine_capable(const char *env_var) {
123 // Check and see if the OS supports thread affinity.
124 
125 #if KMP_OS_LINUX
126 #define KMP_CPU_SET_SIZE_LIMIT (1024 * 1024)
127 #elif KMP_OS_FREEBSD
128 #define KMP_CPU_SET_SIZE_LIMIT (sizeof(cpuset_t))
129 #endif
130 
131 
132 #if KMP_OS_LINUX
133  // If Linux* OS:
134  // If the syscall fails or returns a suggestion for the size,
135  // then we don't have to search for an appropriate size.
136  int gCode;
137  int sCode;
138  unsigned char *buf;
139  buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
140  gCode = syscall(__NR_sched_getaffinity, 0, KMP_CPU_SET_SIZE_LIMIT, buf);
141  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
142  "initial getaffinity call returned %d errno = %d\n",
143  gCode, errno));
144 
145  // if ((gCode < 0) && (errno == ENOSYS))
146  if (gCode < 0) {
147  // System call not supported
148  if (__kmp_affinity_verbose ||
149  (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
150  (__kmp_affinity_type != affinity_default) &&
151  (__kmp_affinity_type != affinity_disabled))) {
152  int error = errno;
153  kmp_msg_t err_code = KMP_ERR(error);
154  __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
155  err_code, __kmp_msg_null);
156  if (__kmp_generate_warnings == kmp_warnings_off) {
157  __kmp_str_free(&err_code.str);
158  }
159  }
160  KMP_AFFINITY_DISABLE();
161  KMP_INTERNAL_FREE(buf);
162  return;
163  }
164  if (gCode > 0) { // Linux* OS only
165  // The optimal situation: the OS returns the size of the buffer it expects.
166  //
167  // A verification of correct behavior is that Isetaffinity on a NULL
168  // buffer with the same size fails with errno set to EFAULT.
169  sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
170  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
171  "setaffinity for mask size %d returned %d errno = %d\n",
172  gCode, sCode, errno));
173  if (sCode < 0) {
174  if (errno == ENOSYS) {
175  if (__kmp_affinity_verbose ||
176  (__kmp_affinity_warnings &&
177  (__kmp_affinity_type != affinity_none) &&
178  (__kmp_affinity_type != affinity_default) &&
179  (__kmp_affinity_type != affinity_disabled))) {
180  int error = errno;
181  kmp_msg_t err_code = KMP_ERR(error);
182  __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
183  err_code, __kmp_msg_null);
184  if (__kmp_generate_warnings == kmp_warnings_off) {
185  __kmp_str_free(&err_code.str);
186  }
187  }
188  KMP_AFFINITY_DISABLE();
189  KMP_INTERNAL_FREE(buf);
190  }
191  if (errno == EFAULT) {
192  KMP_AFFINITY_ENABLE(gCode);
193  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
194  "affinity supported (mask size %d)\n",
195  (int)__kmp_affin_mask_size));
196  KMP_INTERNAL_FREE(buf);
197  return;
198  }
199  }
200  }
201 
202  // Call the getaffinity system call repeatedly with increasing set sizes
203  // until we succeed, or reach an upper bound on the search.
204  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
205  "searching for proper set size\n"));
206  int size;
207  for (size = 1; size <= KMP_CPU_SET_SIZE_LIMIT; size *= 2) {
208  gCode = syscall(__NR_sched_getaffinity, 0, size, buf);
209  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
210  "getaffinity for mask size %d returned %d errno = %d\n",
211  size, gCode, errno));
212 
213  if (gCode < 0) {
214  if (errno == ENOSYS) {
215  // We shouldn't get here
216  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
217  "inconsistent OS call behavior: errno == ENOSYS for mask "
218  "size %d\n",
219  size));
220  if (__kmp_affinity_verbose ||
221  (__kmp_affinity_warnings &&
222  (__kmp_affinity_type != affinity_none) &&
223  (__kmp_affinity_type != affinity_default) &&
224  (__kmp_affinity_type != affinity_disabled))) {
225  int error = errno;
226  kmp_msg_t err_code = KMP_ERR(error);
227  __kmp_msg(kmp_ms_warning, KMP_MSG(GetAffSysCallNotSupported, env_var),
228  err_code, __kmp_msg_null);
229  if (__kmp_generate_warnings == kmp_warnings_off) {
230  __kmp_str_free(&err_code.str);
231  }
232  }
233  KMP_AFFINITY_DISABLE();
234  KMP_INTERNAL_FREE(buf);
235  return;
236  }
237  continue;
238  }
239 
240  sCode = syscall(__NR_sched_setaffinity, 0, gCode, NULL);
241  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
242  "setaffinity for mask size %d returned %d errno = %d\n",
243  gCode, sCode, errno));
244  if (sCode < 0) {
245  if (errno == ENOSYS) { // Linux* OS only
246  // We shouldn't get here
247  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
248  "inconsistent OS call behavior: errno == ENOSYS for mask "
249  "size %d\n",
250  size));
251  if (__kmp_affinity_verbose ||
252  (__kmp_affinity_warnings &&
253  (__kmp_affinity_type != affinity_none) &&
254  (__kmp_affinity_type != affinity_default) &&
255  (__kmp_affinity_type != affinity_disabled))) {
256  int error = errno;
257  kmp_msg_t err_code = KMP_ERR(error);
258  __kmp_msg(kmp_ms_warning, KMP_MSG(SetAffSysCallNotSupported, env_var),
259  err_code, __kmp_msg_null);
260  if (__kmp_generate_warnings == kmp_warnings_off) {
261  __kmp_str_free(&err_code.str);
262  }
263  }
264  KMP_AFFINITY_DISABLE();
265  KMP_INTERNAL_FREE(buf);
266  return;
267  }
268  if (errno == EFAULT) {
269  KMP_AFFINITY_ENABLE(gCode);
270  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
271  "affinity supported (mask size %d)\n",
272  (int)__kmp_affin_mask_size));
273  KMP_INTERNAL_FREE(buf);
274  return;
275  }
276  }
277  }
278 #elif KMP_OS_FREEBSD
279  int gCode;
280  unsigned char *buf;
281  buf = (unsigned char *)KMP_INTERNAL_MALLOC(KMP_CPU_SET_SIZE_LIMIT);
282  gCode = pthread_getaffinity_np(pthread_self(), KMP_CPU_SET_SIZE_LIMIT, reinterpret_cast<cpuset_t *>(buf));
283  KA_TRACE(30, ("__kmp_affinity_determine_capable: "
284  "initial getaffinity call returned %d errno = %d\n",
285  gCode, errno));
286  if (gCode == 0) {
287  KMP_AFFINITY_ENABLE(KMP_CPU_SET_SIZE_LIMIT);
288  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
289  "affinity supported (mask size %d)\n"<
290  (int)__kmp_affin_mask_size));
291  KMP_INTERNAL_FREE(buf);
292  return;
293  }
294 #endif
295  // save uncaught error code
296  // int error = errno;
297  KMP_INTERNAL_FREE(buf);
298  // restore uncaught error code, will be printed at the next KMP_WARNING below
299  // errno = error;
300 
301  // Affinity is not supported
302  KMP_AFFINITY_DISABLE();
303  KA_TRACE(10, ("__kmp_affinity_determine_capable: "
304  "cannot determine mask size - affinity not supported\n"));
305  if (__kmp_affinity_verbose ||
306  (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none) &&
307  (__kmp_affinity_type != affinity_default) &&
308  (__kmp_affinity_type != affinity_disabled))) {
309  KMP_WARNING(AffCantGetMaskSize, env_var);
310  }
311 }
312 
313 #endif // KMP_OS_LINUX && KMP_AFFINITY_SUPPORTED
314 
315 #if KMP_USE_FUTEX
316 
317 int __kmp_futex_determine_capable() {
318  int loc = 0;
319  int rc = syscall(__NR_futex, &loc, FUTEX_WAKE, 1, NULL, NULL, 0);
320  int retval = (rc == 0) || (errno != ENOSYS);
321 
322  KA_TRACE(10,
323  ("__kmp_futex_determine_capable: rc = %d errno = %d\n", rc, errno));
324  KA_TRACE(10, ("__kmp_futex_determine_capable: futex syscall%s supported\n",
325  retval ? "" : " not"));
326 
327  return retval;
328 }
329 
330 #endif // KMP_USE_FUTEX
331 
332 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (!KMP_ASM_INTRINS)
333 /* Only 32-bit "add-exchange" instruction on IA-32 architecture causes us to
334  use compare_and_store for these routines */
335 
336 kmp_int8 __kmp_test_then_or8(volatile kmp_int8 *p, kmp_int8 d) {
337  kmp_int8 old_value, new_value;
338 
339  old_value = TCR_1(*p);
340  new_value = old_value | d;
341 
342  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
343  KMP_CPU_PAUSE();
344  old_value = TCR_1(*p);
345  new_value = old_value | d;
346  }
347  return old_value;
348 }
349 
350 kmp_int8 __kmp_test_then_and8(volatile kmp_int8 *p, kmp_int8 d) {
351  kmp_int8 old_value, new_value;
352 
353  old_value = TCR_1(*p);
354  new_value = old_value & d;
355 
356  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
357  KMP_CPU_PAUSE();
358  old_value = TCR_1(*p);
359  new_value = old_value & d;
360  }
361  return old_value;
362 }
363 
364 kmp_uint32 __kmp_test_then_or32(volatile kmp_uint32 *p, kmp_uint32 d) {
365  kmp_uint32 old_value, new_value;
366 
367  old_value = TCR_4(*p);
368  new_value = old_value | d;
369 
370  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
371  KMP_CPU_PAUSE();
372  old_value = TCR_4(*p);
373  new_value = old_value | d;
374  }
375  return old_value;
376 }
377 
378 kmp_uint32 __kmp_test_then_and32(volatile kmp_uint32 *p, kmp_uint32 d) {
379  kmp_uint32 old_value, new_value;
380 
381  old_value = TCR_4(*p);
382  new_value = old_value & d;
383 
384  while (!KMP_COMPARE_AND_STORE_REL32(p, old_value, new_value)) {
385  KMP_CPU_PAUSE();
386  old_value = TCR_4(*p);
387  new_value = old_value & d;
388  }
389  return old_value;
390 }
391 
392 #if KMP_ARCH_X86
393 kmp_int8 __kmp_test_then_add8(volatile kmp_int8 *p, kmp_int8 d) {
394  kmp_int8 old_value, new_value;
395 
396  old_value = TCR_1(*p);
397  new_value = old_value + d;
398 
399  while (!KMP_COMPARE_AND_STORE_REL8(p, old_value, new_value)) {
400  KMP_CPU_PAUSE();
401  old_value = TCR_1(*p);
402  new_value = old_value + d;
403  }
404  return old_value;
405 }
406 
407 kmp_int64 __kmp_test_then_add64(volatile kmp_int64 *p, kmp_int64 d) {
408  kmp_int64 old_value, new_value;
409 
410  old_value = TCR_8(*p);
411  new_value = old_value + d;
412 
413  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
414  KMP_CPU_PAUSE();
415  old_value = TCR_8(*p);
416  new_value = old_value + d;
417  }
418  return old_value;
419 }
420 #endif /* KMP_ARCH_X86 */
421 
422 kmp_uint64 __kmp_test_then_or64(volatile kmp_uint64 *p, kmp_uint64 d) {
423  kmp_uint64 old_value, new_value;
424 
425  old_value = TCR_8(*p);
426  new_value = old_value | d;
427  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
428  KMP_CPU_PAUSE();
429  old_value = TCR_8(*p);
430  new_value = old_value | d;
431  }
432  return old_value;
433 }
434 
435 kmp_uint64 __kmp_test_then_and64(volatile kmp_uint64 *p, kmp_uint64 d) {
436  kmp_uint64 old_value, new_value;
437 
438  old_value = TCR_8(*p);
439  new_value = old_value & d;
440  while (!KMP_COMPARE_AND_STORE_REL64(p, old_value, new_value)) {
441  KMP_CPU_PAUSE();
442  old_value = TCR_8(*p);
443  new_value = old_value & d;
444  }
445  return old_value;
446 }
447 
448 #endif /* (KMP_ARCH_X86 || KMP_ARCH_X86_64) && (! KMP_ASM_INTRINS) */
449 
450 void __kmp_terminate_thread(int gtid) {
451  int status;
452  kmp_info_t *th = __kmp_threads[gtid];
453 
454  if (!th)
455  return;
456 
457 #ifdef KMP_CANCEL_THREADS
458  KA_TRACE(10, ("__kmp_terminate_thread: kill (%d)\n", gtid));
459  status = pthread_cancel(th->th.th_info.ds.ds_thread);
460  if (status != 0 && status != ESRCH) {
461  __kmp_fatal(KMP_MSG(CantTerminateWorkerThread), KMP_ERR(status),
462  __kmp_msg_null);
463  }
464 #endif
465  KMP_YIELD(TRUE);
466 } //
467 
468 /* Set thread stack info according to values returned by pthread_getattr_np().
469  If values are unreasonable, assume call failed and use incremental stack
470  refinement method instead. Returns TRUE if the stack parameters could be
471  determined exactly, FALSE if incremental refinement is necessary. */
472 static kmp_int32 __kmp_set_stack_info(int gtid, kmp_info_t *th) {
473  int stack_data;
474 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
475  KMP_OS_HURD
476  pthread_attr_t attr;
477  int status;
478  size_t size = 0;
479  void *addr = 0;
480 
481  /* Always do incremental stack refinement for ubermaster threads since the
482  initial thread stack range can be reduced by sibling thread creation so
483  pthread_attr_getstack may cause thread gtid aliasing */
484  if (!KMP_UBER_GTID(gtid)) {
485 
486  /* Fetch the real thread attributes */
487  status = pthread_attr_init(&attr);
488  KMP_CHECK_SYSFAIL("pthread_attr_init", status);
489 #if KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD
490  status = pthread_attr_get_np(pthread_self(), &attr);
491  KMP_CHECK_SYSFAIL("pthread_attr_get_np", status);
492 #else
493  status = pthread_getattr_np(pthread_self(), &attr);
494  KMP_CHECK_SYSFAIL("pthread_getattr_np", status);
495 #endif
496  status = pthread_attr_getstack(&attr, &addr, &size);
497  KMP_CHECK_SYSFAIL("pthread_attr_getstack", status);
498  KA_TRACE(60,
499  ("__kmp_set_stack_info: T#%d pthread_attr_getstack returned size:"
500  " %lu, low addr: %p\n",
501  gtid, size, addr));
502  status = pthread_attr_destroy(&attr);
503  KMP_CHECK_SYSFAIL("pthread_attr_destroy", status);
504  }
505 
506  if (size != 0 && addr != 0) { // was stack parameter determination successful?
507  /* Store the correct base and size */
508  TCW_PTR(th->th.th_info.ds.ds_stackbase, (((char *)addr) + size));
509  TCW_PTR(th->th.th_info.ds.ds_stacksize, size);
510  TCW_4(th->th.th_info.ds.ds_stackgrow, FALSE);
511  return TRUE;
512  }
513 #endif /* KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD ||
514  KMP_OS_HURD */
515  /* Use incremental refinement starting from initial conservative estimate */
516  TCW_PTR(th->th.th_info.ds.ds_stacksize, 0);
517  TCW_PTR(th->th.th_info.ds.ds_stackbase, &stack_data);
518  TCW_4(th->th.th_info.ds.ds_stackgrow, TRUE);
519  return FALSE;
520 }
521 
522 static void *__kmp_launch_worker(void *thr) {
523  int status, old_type, old_state;
524 #ifdef KMP_BLOCK_SIGNALS
525  sigset_t new_set, old_set;
526 #endif /* KMP_BLOCK_SIGNALS */
527  void *exit_val;
528 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
529  KMP_OS_OPENBSD || KMP_OS_HURD
530  void *volatile padding = 0;
531 #endif
532  int gtid;
533 
534  gtid = ((kmp_info_t *)thr)->th.th_info.ds.ds_gtid;
535  __kmp_gtid_set_specific(gtid);
536 #ifdef KMP_TDATA_GTID
537  __kmp_gtid = gtid;
538 #endif
539 #if KMP_STATS_ENABLED
540  // set thread local index to point to thread-specific stats
541  __kmp_stats_thread_ptr = ((kmp_info_t *)thr)->th.th_stats;
542  __kmp_stats_thread_ptr->startLife();
543  KMP_SET_THREAD_STATE(IDLE);
544  KMP_INIT_PARTITIONED_TIMERS(OMP_idle);
545 #endif
546 
547 #if USE_ITT_BUILD
548  __kmp_itt_thread_name(gtid);
549 #endif /* USE_ITT_BUILD */
550 
551 #if KMP_AFFINITY_SUPPORTED
552  __kmp_affinity_set_init_mask(gtid, FALSE);
553 #endif
554 
555 #ifdef KMP_CANCEL_THREADS
556  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
557  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
558  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
559  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
560  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
561 #endif
562 
563 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
564  // Set FP control regs to be a copy of the parallel initialization thread's.
565  __kmp_clear_x87_fpu_status_word();
566  __kmp_load_x87_fpu_control_word(&__kmp_init_x87_fpu_control_word);
567  __kmp_load_mxcsr(&__kmp_init_mxcsr);
568 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
569 
570 #ifdef KMP_BLOCK_SIGNALS
571  status = sigfillset(&new_set);
572  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
573  status = pthread_sigmask(SIG_BLOCK, &new_set, &old_set);
574  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
575 #endif /* KMP_BLOCK_SIGNALS */
576 
577 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
578  KMP_OS_OPENBSD
579  if (__kmp_stkoffset > 0 && gtid > 0) {
580  padding = KMP_ALLOCA(gtid * __kmp_stkoffset);
581  }
582 #endif
583 
584  KMP_MB();
585  __kmp_set_stack_info(gtid, (kmp_info_t *)thr);
586 
587  __kmp_check_stack_overlap((kmp_info_t *)thr);
588 
589  exit_val = __kmp_launch_thread((kmp_info_t *)thr);
590 
591 #ifdef KMP_BLOCK_SIGNALS
592  status = pthread_sigmask(SIG_SETMASK, &old_set, NULL);
593  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
594 #endif /* KMP_BLOCK_SIGNALS */
595 
596  return exit_val;
597 }
598 
599 #if KMP_USE_MONITOR
600 /* The monitor thread controls all of the threads in the complex */
601 
602 static void *__kmp_launch_monitor(void *thr) {
603  int status, old_type, old_state;
604 #ifdef KMP_BLOCK_SIGNALS
605  sigset_t new_set;
606 #endif /* KMP_BLOCK_SIGNALS */
607  struct timespec interval;
608 
609  KMP_MB(); /* Flush all pending memory write invalidates. */
610 
611  KA_TRACE(10, ("__kmp_launch_monitor: #1 launched\n"));
612 
613  /* register us as the monitor thread */
614  __kmp_gtid_set_specific(KMP_GTID_MONITOR);
615 #ifdef KMP_TDATA_GTID
616  __kmp_gtid = KMP_GTID_MONITOR;
617 #endif
618 
619  KMP_MB();
620 
621 #if USE_ITT_BUILD
622  // Instruct Intel(R) Threading Tools to ignore monitor thread.
623  __kmp_itt_thread_ignore();
624 #endif /* USE_ITT_BUILD */
625 
626  __kmp_set_stack_info(((kmp_info_t *)thr)->th.th_info.ds.ds_gtid,
627  (kmp_info_t *)thr);
628 
629  __kmp_check_stack_overlap((kmp_info_t *)thr);
630 
631 #ifdef KMP_CANCEL_THREADS
632  status = pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &old_type);
633  KMP_CHECK_SYSFAIL("pthread_setcanceltype", status);
634  // josh todo: isn't PTHREAD_CANCEL_ENABLE default for newly-created threads?
635  status = pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &old_state);
636  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
637 #endif
638 
639 #if KMP_REAL_TIME_FIX
640  // This is a potential fix which allows application with real-time scheduling
641  // policy work. However, decision about the fix is not made yet, so it is
642  // disabled by default.
643  { // Are program started with real-time scheduling policy?
644  int sched = sched_getscheduler(0);
645  if (sched == SCHED_FIFO || sched == SCHED_RR) {
646  // Yes, we are a part of real-time application. Try to increase the
647  // priority of the monitor.
648  struct sched_param param;
649  int max_priority = sched_get_priority_max(sched);
650  int rc;
651  KMP_WARNING(RealTimeSchedNotSupported);
652  sched_getparam(0, &param);
653  if (param.sched_priority < max_priority) {
654  param.sched_priority += 1;
655  rc = sched_setscheduler(0, sched, &param);
656  if (rc != 0) {
657  int error = errno;
658  kmp_msg_t err_code = KMP_ERR(error);
659  __kmp_msg(kmp_ms_warning, KMP_MSG(CantChangeMonitorPriority),
660  err_code, KMP_MSG(MonitorWillStarve), __kmp_msg_null);
661  if (__kmp_generate_warnings == kmp_warnings_off) {
662  __kmp_str_free(&err_code.str);
663  }
664  }
665  } else {
666  // We cannot abort here, because number of CPUs may be enough for all
667  // the threads, including the monitor thread, so application could
668  // potentially work...
669  __kmp_msg(kmp_ms_warning, KMP_MSG(RunningAtMaxPriority),
670  KMP_MSG(MonitorWillStarve), KMP_HNT(RunningAtMaxPriority),
671  __kmp_msg_null);
672  }
673  }
674  // AC: free thread that waits for monitor started
675  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
676  }
677 #endif // KMP_REAL_TIME_FIX
678 
679  KMP_MB(); /* Flush all pending memory write invalidates. */
680 
681  if (__kmp_monitor_wakeups == 1) {
682  interval.tv_sec = 1;
683  interval.tv_nsec = 0;
684  } else {
685  interval.tv_sec = 0;
686  interval.tv_nsec = (KMP_NSEC_PER_SEC / __kmp_monitor_wakeups);
687  }
688 
689  KA_TRACE(10, ("__kmp_launch_monitor: #2 monitor\n"));
690 
691  while (!TCR_4(__kmp_global.g.g_done)) {
692  struct timespec now;
693  struct timeval tval;
694 
695  /* This thread monitors the state of the system */
696 
697  KA_TRACE(15, ("__kmp_launch_monitor: update\n"));
698 
699  status = gettimeofday(&tval, NULL);
700  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
701  TIMEVAL_TO_TIMESPEC(&tval, &now);
702 
703  now.tv_sec += interval.tv_sec;
704  now.tv_nsec += interval.tv_nsec;
705 
706  if (now.tv_nsec >= KMP_NSEC_PER_SEC) {
707  now.tv_sec += 1;
708  now.tv_nsec -= KMP_NSEC_PER_SEC;
709  }
710 
711  status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
712  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
713  // AC: the monitor should not fall asleep if g_done has been set
714  if (!TCR_4(__kmp_global.g.g_done)) { // check once more under mutex
715  status = pthread_cond_timedwait(&__kmp_wait_cv.c_cond,
716  &__kmp_wait_mx.m_mutex, &now);
717  if (status != 0) {
718  if (status != ETIMEDOUT && status != EINTR) {
719  KMP_SYSFAIL("pthread_cond_timedwait", status);
720  }
721  }
722  }
723  status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
724  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
725 
726  TCW_4(__kmp_global.g.g_time.dt.t_value,
727  TCR_4(__kmp_global.g.g_time.dt.t_value) + 1);
728 
729  KMP_MB(); /* Flush all pending memory write invalidates. */
730  }
731 
732  KA_TRACE(10, ("__kmp_launch_monitor: #3 cleanup\n"));
733 
734 #ifdef KMP_BLOCK_SIGNALS
735  status = sigfillset(&new_set);
736  KMP_CHECK_SYSFAIL_ERRNO("sigfillset", status);
737  status = pthread_sigmask(SIG_UNBLOCK, &new_set, NULL);
738  KMP_CHECK_SYSFAIL("pthread_sigmask", status);
739 #endif /* KMP_BLOCK_SIGNALS */
740 
741  KA_TRACE(10, ("__kmp_launch_monitor: #4 finished\n"));
742 
743  if (__kmp_global.g.g_abort != 0) {
744  /* now we need to terminate the worker threads */
745  /* the value of t_abort is the signal we caught */
746 
747  int gtid;
748 
749  KA_TRACE(10, ("__kmp_launch_monitor: #5 terminate sig=%d\n",
750  __kmp_global.g.g_abort));
751 
752  /* terminate the OpenMP worker threads */
753  /* TODO this is not valid for sibling threads!!
754  * the uber master might not be 0 anymore.. */
755  for (gtid = 1; gtid < __kmp_threads_capacity; ++gtid)
756  __kmp_terminate_thread(gtid);
757 
758  __kmp_cleanup();
759 
760  KA_TRACE(10, ("__kmp_launch_monitor: #6 raise sig=%d\n",
761  __kmp_global.g.g_abort));
762 
763  if (__kmp_global.g.g_abort > 0)
764  raise(__kmp_global.g.g_abort);
765  }
766 
767  KA_TRACE(10, ("__kmp_launch_monitor: #7 exit\n"));
768 
769  return thr;
770 }
771 #endif // KMP_USE_MONITOR
772 
773 void __kmp_create_worker(int gtid, kmp_info_t *th, size_t stack_size) {
774  pthread_t handle;
775  pthread_attr_t thread_attr;
776  int status;
777 
778  th->th.th_info.ds.ds_gtid = gtid;
779 
780 #if KMP_STATS_ENABLED
781  // sets up worker thread stats
782  __kmp_acquire_tas_lock(&__kmp_stats_lock, gtid);
783 
784  // th->th.th_stats is used to transfer thread-specific stats-pointer to
785  // __kmp_launch_worker. So when thread is created (goes into
786  // __kmp_launch_worker) it will set its thread local pointer to
787  // th->th.th_stats
788  if (!KMP_UBER_GTID(gtid)) {
789  th->th.th_stats = __kmp_stats_list->push_back(gtid);
790  } else {
791  // For root threads, __kmp_stats_thread_ptr is set in __kmp_register_root(),
792  // so set the th->th.th_stats field to it.
793  th->th.th_stats = __kmp_stats_thread_ptr;
794  }
795  __kmp_release_tas_lock(&__kmp_stats_lock, gtid);
796 
797 #endif // KMP_STATS_ENABLED
798 
799  if (KMP_UBER_GTID(gtid)) {
800  KA_TRACE(10, ("__kmp_create_worker: uber thread (%d)\n", gtid));
801  th->th.th_info.ds.ds_thread = pthread_self();
802  __kmp_set_stack_info(gtid, th);
803  __kmp_check_stack_overlap(th);
804  return;
805  }
806 
807  KA_TRACE(10, ("__kmp_create_worker: try to create thread (%d)\n", gtid));
808 
809  KMP_MB(); /* Flush all pending memory write invalidates. */
810 
811 #ifdef KMP_THREAD_ATTR
812  status = pthread_attr_init(&thread_attr);
813  if (status != 0) {
814  __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
815  }
816  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
817  if (status != 0) {
818  __kmp_fatal(KMP_MSG(CantSetWorkerState), KMP_ERR(status), __kmp_msg_null);
819  }
820 
821  /* Set stack size for this thread now.
822  The multiple of 2 is there because on some machines, requesting an unusual
823  stacksize causes the thread to have an offset before the dummy alloca()
824  takes place to create the offset. Since we want the user to have a
825  sufficient stacksize AND support a stack offset, we alloca() twice the
826  offset so that the upcoming alloca() does not eliminate any premade offset,
827  and also gives the user the stack space they requested for all threads */
828  stack_size += gtid * __kmp_stkoffset * 2;
829 
830 #if defined(__ANDROID__) && __ANDROID_API__ < 19
831  // Round the stack size to a multiple of the page size. Older versions of
832  // Android (until KitKat) would fail pthread_attr_setstacksize with EINVAL
833  // if the stack size was not a multiple of the page size.
834  stack_size = (stack_size + PAGE_SIZE - 1) & ~(PAGE_SIZE - 1);
835 #endif
836 
837  KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
838  "__kmp_stksize = %lu bytes, final stacksize = %lu bytes\n",
839  gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
840 
841 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
842  status = pthread_attr_setstacksize(&thread_attr, stack_size);
843 #ifdef KMP_BACKUP_STKSIZE
844  if (status != 0) {
845  if (!__kmp_env_stksize) {
846  stack_size = KMP_BACKUP_STKSIZE + gtid * __kmp_stkoffset;
847  __kmp_stksize = KMP_BACKUP_STKSIZE;
848  KA_TRACE(10, ("__kmp_create_worker: T#%d, default stacksize = %lu bytes, "
849  "__kmp_stksize = %lu bytes, (backup) final stacksize = %lu "
850  "bytes\n",
851  gtid, KMP_DEFAULT_STKSIZE, __kmp_stksize, stack_size));
852  status = pthread_attr_setstacksize(&thread_attr, stack_size);
853  }
854  }
855 #endif /* KMP_BACKUP_STKSIZE */
856  if (status != 0) {
857  __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
858  KMP_HNT(ChangeWorkerStackSize), __kmp_msg_null);
859  }
860 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
861 
862 #endif /* KMP_THREAD_ATTR */
863 
864  status =
865  pthread_create(&handle, &thread_attr, __kmp_launch_worker, (void *)th);
866  if (status != 0 || !handle) { // ??? Why do we check handle??
867 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
868  if (status == EINVAL) {
869  __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
870  KMP_HNT(IncreaseWorkerStackSize), __kmp_msg_null);
871  }
872  if (status == ENOMEM) {
873  __kmp_fatal(KMP_MSG(CantSetWorkerStackSize, stack_size), KMP_ERR(status),
874  KMP_HNT(DecreaseWorkerStackSize), __kmp_msg_null);
875  }
876 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
877  if (status == EAGAIN) {
878  __kmp_fatal(KMP_MSG(NoResourcesForWorkerThread), KMP_ERR(status),
879  KMP_HNT(Decrease_NUM_THREADS), __kmp_msg_null);
880  }
881  KMP_SYSFAIL("pthread_create", status);
882  }
883 
884  th->th.th_info.ds.ds_thread = handle;
885 
886 #ifdef KMP_THREAD_ATTR
887  status = pthread_attr_destroy(&thread_attr);
888  if (status) {
889  kmp_msg_t err_code = KMP_ERR(status);
890  __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
891  __kmp_msg_null);
892  if (__kmp_generate_warnings == kmp_warnings_off) {
893  __kmp_str_free(&err_code.str);
894  }
895  }
896 #endif /* KMP_THREAD_ATTR */
897 
898  KMP_MB(); /* Flush all pending memory write invalidates. */
899 
900  KA_TRACE(10, ("__kmp_create_worker: done creating thread (%d)\n", gtid));
901 
902 } // __kmp_create_worker
903 
904 #if KMP_USE_MONITOR
905 void __kmp_create_monitor(kmp_info_t *th) {
906  pthread_t handle;
907  pthread_attr_t thread_attr;
908  size_t size;
909  int status;
910  int auto_adj_size = FALSE;
911 
912  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME) {
913  // We don't need monitor thread in case of MAX_BLOCKTIME
914  KA_TRACE(10, ("__kmp_create_monitor: skipping monitor thread because of "
915  "MAX blocktime\n"));
916  th->th.th_info.ds.ds_tid = 0; // this makes reap_monitor no-op
917  th->th.th_info.ds.ds_gtid = 0;
918  return;
919  }
920  KA_TRACE(10, ("__kmp_create_monitor: try to create monitor\n"));
921 
922  KMP_MB(); /* Flush all pending memory write invalidates. */
923 
924  th->th.th_info.ds.ds_tid = KMP_GTID_MONITOR;
925  th->th.th_info.ds.ds_gtid = KMP_GTID_MONITOR;
926 #if KMP_REAL_TIME_FIX
927  TCW_4(__kmp_global.g.g_time.dt.t_value,
928  -1); // Will use it for synchronization a bit later.
929 #else
930  TCW_4(__kmp_global.g.g_time.dt.t_value, 0);
931 #endif // KMP_REAL_TIME_FIX
932 
933 #ifdef KMP_THREAD_ATTR
934  if (__kmp_monitor_stksize == 0) {
935  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
936  auto_adj_size = TRUE;
937  }
938  status = pthread_attr_init(&thread_attr);
939  if (status != 0) {
940  __kmp_fatal(KMP_MSG(CantInitThreadAttrs), KMP_ERR(status), __kmp_msg_null);
941  }
942  status = pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE);
943  if (status != 0) {
944  __kmp_fatal(KMP_MSG(CantSetMonitorState), KMP_ERR(status), __kmp_msg_null);
945  }
946 
947 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
948  status = pthread_attr_getstacksize(&thread_attr, &size);
949  KMP_CHECK_SYSFAIL("pthread_attr_getstacksize", status);
950 #else
951  size = __kmp_sys_min_stksize;
952 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
953 #endif /* KMP_THREAD_ATTR */
954 
955  if (__kmp_monitor_stksize == 0) {
956  __kmp_monitor_stksize = KMP_DEFAULT_MONITOR_STKSIZE;
957  }
958  if (__kmp_monitor_stksize < __kmp_sys_min_stksize) {
959  __kmp_monitor_stksize = __kmp_sys_min_stksize;
960  }
961 
962  KA_TRACE(10, ("__kmp_create_monitor: default stacksize = %lu bytes,"
963  "requested stacksize = %lu bytes\n",
964  size, __kmp_monitor_stksize));
965 
966 retry:
967 
968 /* Set stack size for this thread now. */
969 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
970  KA_TRACE(10, ("__kmp_create_monitor: setting stacksize = %lu bytes,",
971  __kmp_monitor_stksize));
972  status = pthread_attr_setstacksize(&thread_attr, __kmp_monitor_stksize);
973  if (status != 0) {
974  if (auto_adj_size) {
975  __kmp_monitor_stksize *= 2;
976  goto retry;
977  }
978  kmp_msg_t err_code = KMP_ERR(status);
979  __kmp_msg(kmp_ms_warning, // should this be fatal? BB
980  KMP_MSG(CantSetMonitorStackSize, (long int)__kmp_monitor_stksize),
981  err_code, KMP_HNT(ChangeMonitorStackSize), __kmp_msg_null);
982  if (__kmp_generate_warnings == kmp_warnings_off) {
983  __kmp_str_free(&err_code.str);
984  }
985  }
986 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
987 
988  status =
989  pthread_create(&handle, &thread_attr, __kmp_launch_monitor, (void *)th);
990 
991  if (status != 0) {
992 #ifdef _POSIX_THREAD_ATTR_STACKSIZE
993  if (status == EINVAL) {
994  if (auto_adj_size && (__kmp_monitor_stksize < (size_t)0x40000000)) {
995  __kmp_monitor_stksize *= 2;
996  goto retry;
997  }
998  __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
999  KMP_ERR(status), KMP_HNT(IncreaseMonitorStackSize),
1000  __kmp_msg_null);
1001  }
1002  if (status == ENOMEM) {
1003  __kmp_fatal(KMP_MSG(CantSetMonitorStackSize, __kmp_monitor_stksize),
1004  KMP_ERR(status), KMP_HNT(DecreaseMonitorStackSize),
1005  __kmp_msg_null);
1006  }
1007 #endif /* _POSIX_THREAD_ATTR_STACKSIZE */
1008  if (status == EAGAIN) {
1009  __kmp_fatal(KMP_MSG(NoResourcesForMonitorThread), KMP_ERR(status),
1010  KMP_HNT(DecreaseNumberOfThreadsInUse), __kmp_msg_null);
1011  }
1012  KMP_SYSFAIL("pthread_create", status);
1013  }
1014 
1015  th->th.th_info.ds.ds_thread = handle;
1016 
1017 #if KMP_REAL_TIME_FIX
1018  // Wait for the monitor thread is really started and set its *priority*.
1019  KMP_DEBUG_ASSERT(sizeof(kmp_uint32) ==
1020  sizeof(__kmp_global.g.g_time.dt.t_value));
1021  __kmp_wait_4((kmp_uint32 volatile *)&__kmp_global.g.g_time.dt.t_value, -1,
1022  &__kmp_neq_4, NULL);
1023 #endif // KMP_REAL_TIME_FIX
1024 
1025 #ifdef KMP_THREAD_ATTR
1026  status = pthread_attr_destroy(&thread_attr);
1027  if (status != 0) {
1028  kmp_msg_t err_code = KMP_ERR(status);
1029  __kmp_msg(kmp_ms_warning, KMP_MSG(CantDestroyThreadAttrs), err_code,
1030  __kmp_msg_null);
1031  if (__kmp_generate_warnings == kmp_warnings_off) {
1032  __kmp_str_free(&err_code.str);
1033  }
1034  }
1035 #endif
1036 
1037  KMP_MB(); /* Flush all pending memory write invalidates. */
1038 
1039  KA_TRACE(10, ("__kmp_create_monitor: monitor created %#.8lx\n",
1040  th->th.th_info.ds.ds_thread));
1041 
1042 } // __kmp_create_monitor
1043 #endif // KMP_USE_MONITOR
1044 
1045 void __kmp_exit_thread(int exit_status) {
1046  pthread_exit((void *)(intptr_t)exit_status);
1047 } // __kmp_exit_thread
1048 
1049 #if KMP_USE_MONITOR
1050 void __kmp_resume_monitor();
1051 
1052 void __kmp_reap_monitor(kmp_info_t *th) {
1053  int status;
1054  void *exit_val;
1055 
1056  KA_TRACE(10, ("__kmp_reap_monitor: try to reap monitor thread with handle"
1057  " %#.8lx\n",
1058  th->th.th_info.ds.ds_thread));
1059 
1060  // If monitor has been created, its tid and gtid should be KMP_GTID_MONITOR.
1061  // If both tid and gtid are 0, it means the monitor did not ever start.
1062  // If both tid and gtid are KMP_GTID_DNE, the monitor has been shut down.
1063  KMP_DEBUG_ASSERT(th->th.th_info.ds.ds_tid == th->th.th_info.ds.ds_gtid);
1064  if (th->th.th_info.ds.ds_gtid != KMP_GTID_MONITOR) {
1065  KA_TRACE(10, ("__kmp_reap_monitor: monitor did not start, returning\n"));
1066  return;
1067  }
1068 
1069  KMP_MB(); /* Flush all pending memory write invalidates. */
1070 
1071  /* First, check to see whether the monitor thread exists to wake it up. This
1072  is to avoid performance problem when the monitor sleeps during
1073  blocktime-size interval */
1074 
1075  status = pthread_kill(th->th.th_info.ds.ds_thread, 0);
1076  if (status != ESRCH) {
1077  __kmp_resume_monitor(); // Wake up the monitor thread
1078  }
1079  KA_TRACE(10, ("__kmp_reap_monitor: try to join with monitor\n"));
1080  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1081  if (exit_val != th) {
1082  __kmp_fatal(KMP_MSG(ReapMonitorError), KMP_ERR(status), __kmp_msg_null);
1083  }
1084 
1085  th->th.th_info.ds.ds_tid = KMP_GTID_DNE;
1086  th->th.th_info.ds.ds_gtid = KMP_GTID_DNE;
1087 
1088  KA_TRACE(10, ("__kmp_reap_monitor: done reaping monitor thread with handle"
1089  " %#.8lx\n",
1090  th->th.th_info.ds.ds_thread));
1091 
1092  KMP_MB(); /* Flush all pending memory write invalidates. */
1093 }
1094 #endif // KMP_USE_MONITOR
1095 
1096 void __kmp_reap_worker(kmp_info_t *th) {
1097  int status;
1098  void *exit_val;
1099 
1100  KMP_MB(); /* Flush all pending memory write invalidates. */
1101 
1102  KA_TRACE(
1103  10, ("__kmp_reap_worker: try to reap T#%d\n", th->th.th_info.ds.ds_gtid));
1104 
1105  status = pthread_join(th->th.th_info.ds.ds_thread, &exit_val);
1106 #ifdef KMP_DEBUG
1107  /* Don't expose these to the user until we understand when they trigger */
1108  if (status != 0) {
1109  __kmp_fatal(KMP_MSG(ReapWorkerError), KMP_ERR(status), __kmp_msg_null);
1110  }
1111  if (exit_val != th) {
1112  KA_TRACE(10, ("__kmp_reap_worker: worker T#%d did not reap properly, "
1113  "exit_val = %p\n",
1114  th->th.th_info.ds.ds_gtid, exit_val));
1115  }
1116 #endif /* KMP_DEBUG */
1117 
1118  KA_TRACE(10, ("__kmp_reap_worker: done reaping T#%d\n",
1119  th->th.th_info.ds.ds_gtid));
1120 
1121  KMP_MB(); /* Flush all pending memory write invalidates. */
1122 }
1123 
1124 #if KMP_HANDLE_SIGNALS
1125 
1126 static void __kmp_null_handler(int signo) {
1127  // Do nothing, for doing SIG_IGN-type actions.
1128 } // __kmp_null_handler
1129 
1130 static void __kmp_team_handler(int signo) {
1131  if (__kmp_global.g.g_abort == 0) {
1132 /* Stage 1 signal handler, let's shut down all of the threads */
1133 #ifdef KMP_DEBUG
1134  __kmp_debug_printf("__kmp_team_handler: caught signal = %d\n", signo);
1135 #endif
1136  switch (signo) {
1137  case SIGHUP:
1138  case SIGINT:
1139  case SIGQUIT:
1140  case SIGILL:
1141  case SIGABRT:
1142  case SIGFPE:
1143  case SIGBUS:
1144  case SIGSEGV:
1145 #ifdef SIGSYS
1146  case SIGSYS:
1147 #endif
1148  case SIGTERM:
1149  if (__kmp_debug_buf) {
1150  __kmp_dump_debug_buffer();
1151  }
1152  KMP_MB(); // Flush all pending memory write invalidates.
1153  TCW_4(__kmp_global.g.g_abort, signo);
1154  KMP_MB(); // Flush all pending memory write invalidates.
1155  TCW_4(__kmp_global.g.g_done, TRUE);
1156  KMP_MB(); // Flush all pending memory write invalidates.
1157  break;
1158  default:
1159 #ifdef KMP_DEBUG
1160  __kmp_debug_printf("__kmp_team_handler: unknown signal type");
1161 #endif
1162  break;
1163  }
1164  }
1165 } // __kmp_team_handler
1166 
1167 static void __kmp_sigaction(int signum, const struct sigaction *act,
1168  struct sigaction *oldact) {
1169  int rc = sigaction(signum, act, oldact);
1170  KMP_CHECK_SYSFAIL_ERRNO("sigaction", rc);
1171 }
1172 
1173 static void __kmp_install_one_handler(int sig, sig_func_t handler_func,
1174  int parallel_init) {
1175  KMP_MB(); // Flush all pending memory write invalidates.
1176  KB_TRACE(60,
1177  ("__kmp_install_one_handler( %d, ..., %d )\n", sig, parallel_init));
1178  if (parallel_init) {
1179  struct sigaction new_action;
1180  struct sigaction old_action;
1181  new_action.sa_handler = handler_func;
1182  new_action.sa_flags = 0;
1183  sigfillset(&new_action.sa_mask);
1184  __kmp_sigaction(sig, &new_action, &old_action);
1185  if (old_action.sa_handler == __kmp_sighldrs[sig].sa_handler) {
1186  sigaddset(&__kmp_sigset, sig);
1187  } else {
1188  // Restore/keep user's handler if one previously installed.
1189  __kmp_sigaction(sig, &old_action, NULL);
1190  }
1191  } else {
1192  // Save initial/system signal handlers to see if user handlers installed.
1193  __kmp_sigaction(sig, NULL, &__kmp_sighldrs[sig]);
1194  }
1195  KMP_MB(); // Flush all pending memory write invalidates.
1196 } // __kmp_install_one_handler
1197 
1198 static void __kmp_remove_one_handler(int sig) {
1199  KB_TRACE(60, ("__kmp_remove_one_handler( %d )\n", sig));
1200  if (sigismember(&__kmp_sigset, sig)) {
1201  struct sigaction old;
1202  KMP_MB(); // Flush all pending memory write invalidates.
1203  __kmp_sigaction(sig, &__kmp_sighldrs[sig], &old);
1204  if ((old.sa_handler != __kmp_team_handler) &&
1205  (old.sa_handler != __kmp_null_handler)) {
1206  // Restore the users signal handler.
1207  KB_TRACE(10, ("__kmp_remove_one_handler: oops, not our handler, "
1208  "restoring: sig=%d\n",
1209  sig));
1210  __kmp_sigaction(sig, &old, NULL);
1211  }
1212  sigdelset(&__kmp_sigset, sig);
1213  KMP_MB(); // Flush all pending memory write invalidates.
1214  }
1215 } // __kmp_remove_one_handler
1216 
1217 void __kmp_install_signals(int parallel_init) {
1218  KB_TRACE(10, ("__kmp_install_signals( %d )\n", parallel_init));
1219  if (__kmp_handle_signals || !parallel_init) {
1220  // If ! parallel_init, we do not install handlers, just save original
1221  // handlers. Let us do it even __handle_signals is 0.
1222  sigemptyset(&__kmp_sigset);
1223  __kmp_install_one_handler(SIGHUP, __kmp_team_handler, parallel_init);
1224  __kmp_install_one_handler(SIGINT, __kmp_team_handler, parallel_init);
1225  __kmp_install_one_handler(SIGQUIT, __kmp_team_handler, parallel_init);
1226  __kmp_install_one_handler(SIGILL, __kmp_team_handler, parallel_init);
1227  __kmp_install_one_handler(SIGABRT, __kmp_team_handler, parallel_init);
1228  __kmp_install_one_handler(SIGFPE, __kmp_team_handler, parallel_init);
1229  __kmp_install_one_handler(SIGBUS, __kmp_team_handler, parallel_init);
1230  __kmp_install_one_handler(SIGSEGV, __kmp_team_handler, parallel_init);
1231 #ifdef SIGSYS
1232  __kmp_install_one_handler(SIGSYS, __kmp_team_handler, parallel_init);
1233 #endif // SIGSYS
1234  __kmp_install_one_handler(SIGTERM, __kmp_team_handler, parallel_init);
1235 #ifdef SIGPIPE
1236  __kmp_install_one_handler(SIGPIPE, __kmp_team_handler, parallel_init);
1237 #endif // SIGPIPE
1238  }
1239 } // __kmp_install_signals
1240 
1241 void __kmp_remove_signals(void) {
1242  int sig;
1243  KB_TRACE(10, ("__kmp_remove_signals()\n"));
1244  for (sig = 1; sig < NSIG; ++sig) {
1245  __kmp_remove_one_handler(sig);
1246  }
1247 } // __kmp_remove_signals
1248 
1249 #endif // KMP_HANDLE_SIGNALS
1250 
1251 void __kmp_enable(int new_state) {
1252 #ifdef KMP_CANCEL_THREADS
1253  int status, old_state;
1254  status = pthread_setcancelstate(new_state, &old_state);
1255  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1256  KMP_DEBUG_ASSERT(old_state == PTHREAD_CANCEL_DISABLE);
1257 #endif
1258 }
1259 
1260 void __kmp_disable(int *old_state) {
1261 #ifdef KMP_CANCEL_THREADS
1262  int status;
1263  status = pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, old_state);
1264  KMP_CHECK_SYSFAIL("pthread_setcancelstate", status);
1265 #endif
1266 }
1267 
1268 static void __kmp_atfork_prepare(void) {
1269  __kmp_acquire_bootstrap_lock(&__kmp_initz_lock);
1270  __kmp_acquire_bootstrap_lock(&__kmp_forkjoin_lock);
1271 }
1272 
1273 static void __kmp_atfork_parent(void) {
1274  __kmp_release_bootstrap_lock(&__kmp_initz_lock);
1275  __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1276 }
1277 
1278 /* Reset the library so execution in the child starts "all over again" with
1279  clean data structures in initial states. Don't worry about freeing memory
1280  allocated by parent, just abandon it to be safe. */
1281 static void __kmp_atfork_child(void) {
1282  __kmp_release_bootstrap_lock(&__kmp_forkjoin_lock);
1283  /* TODO make sure this is done right for nested/sibling */
1284  // ATT: Memory leaks are here? TODO: Check it and fix.
1285  /* KMP_ASSERT( 0 ); */
1286 
1287  ++__kmp_fork_count;
1288 
1289 #if KMP_AFFINITY_SUPPORTED
1290 #if KMP_OS_LINUX || KMP_OS_FREEBSD
1291  // reset the affinity in the child to the initial thread
1292  // affinity in the parent
1293  kmp_set_thread_affinity_mask_initial();
1294 #endif
1295  // Set default not to bind threads tightly in the child (we’re expecting
1296  // over-subscription after the fork and this can improve things for
1297  // scripting languages that use OpenMP inside process-parallel code).
1298  __kmp_affinity_type = affinity_none;
1299  if (__kmp_nested_proc_bind.bind_types != NULL) {
1300  __kmp_nested_proc_bind.bind_types[0] = proc_bind_false;
1301  }
1302 #endif // KMP_AFFINITY_SUPPORTED
1303 
1304  __kmp_init_runtime = FALSE;
1305 #if KMP_USE_MONITOR
1306  __kmp_init_monitor = 0;
1307 #endif
1308  __kmp_init_parallel = FALSE;
1309  __kmp_init_middle = FALSE;
1310  __kmp_init_serial = FALSE;
1311  TCW_4(__kmp_init_gtid, FALSE);
1312  __kmp_init_common = FALSE;
1313 
1314  TCW_4(__kmp_init_user_locks, FALSE);
1315 #if !KMP_USE_DYNAMIC_LOCK
1316  __kmp_user_lock_table.used = 1;
1317  __kmp_user_lock_table.allocated = 0;
1318  __kmp_user_lock_table.table = NULL;
1319  __kmp_lock_blocks = NULL;
1320 #endif
1321 
1322  __kmp_all_nth = 0;
1323  TCW_4(__kmp_nth, 0);
1324 
1325  __kmp_thread_pool = NULL;
1326  __kmp_thread_pool_insert_pt = NULL;
1327  __kmp_team_pool = NULL;
1328 
1329  /* Must actually zero all the *cache arguments passed to __kmpc_threadprivate
1330  here so threadprivate doesn't use stale data */
1331  KA_TRACE(10, ("__kmp_atfork_child: checking cache address list %p\n",
1332  __kmp_threadpriv_cache_list));
1333 
1334  while (__kmp_threadpriv_cache_list != NULL) {
1335 
1336  if (*__kmp_threadpriv_cache_list->addr != NULL) {
1337  KC_TRACE(50, ("__kmp_atfork_child: zeroing cache at address %p\n",
1338  &(*__kmp_threadpriv_cache_list->addr)));
1339 
1340  *__kmp_threadpriv_cache_list->addr = NULL;
1341  }
1342  __kmp_threadpriv_cache_list = __kmp_threadpriv_cache_list->next;
1343  }
1344 
1345  __kmp_init_runtime = FALSE;
1346 
1347  /* reset statically initialized locks */
1348  __kmp_init_bootstrap_lock(&__kmp_initz_lock);
1349  __kmp_init_bootstrap_lock(&__kmp_stdio_lock);
1350  __kmp_init_bootstrap_lock(&__kmp_console_lock);
1351  __kmp_init_bootstrap_lock(&__kmp_task_team_lock);
1352 
1353 #if USE_ITT_BUILD
1354  __kmp_itt_reset(); // reset ITT's global state
1355 #endif /* USE_ITT_BUILD */
1356 
1357  /* This is necessary to make sure no stale data is left around */
1358  /* AC: customers complain that we use unsafe routines in the atfork
1359  handler. Mathworks: dlsym() is unsafe. We call dlsym and dlopen
1360  in dynamic_link when check the presence of shared tbbmalloc library.
1361  Suggestion is to make the library initialization lazier, similar
1362  to what done for __kmpc_begin(). */
1363  // TODO: synchronize all static initializations with regular library
1364  // startup; look at kmp_global.cpp and etc.
1365  //__kmp_internal_begin ();
1366 }
1367 
1368 void __kmp_register_atfork(void) {
1369  if (__kmp_need_register_atfork) {
1370  int status = pthread_atfork(__kmp_atfork_prepare, __kmp_atfork_parent,
1371  __kmp_atfork_child);
1372  KMP_CHECK_SYSFAIL("pthread_atfork", status);
1373  __kmp_need_register_atfork = FALSE;
1374  }
1375 }
1376 
1377 void __kmp_suspend_initialize(void) {
1378  int status;
1379  status = pthread_mutexattr_init(&__kmp_suspend_mutex_attr);
1380  KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1381  status = pthread_condattr_init(&__kmp_suspend_cond_attr);
1382  KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1383 }
1384 
1385 void __kmp_suspend_initialize_thread(kmp_info_t *th) {
1386  ANNOTATE_HAPPENS_AFTER(&th->th.th_suspend_init_count);
1387  int old_value = KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count);
1388  int new_value = __kmp_fork_count + 1;
1389  // Return if already initialized
1390  if (old_value == new_value)
1391  return;
1392  // Wait, then return if being initialized
1393  if (old_value == -1 ||
1394  !__kmp_atomic_compare_store(&th->th.th_suspend_init_count, old_value,
1395  -1)) {
1396  while (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) != new_value) {
1397  KMP_CPU_PAUSE();
1398  }
1399  } else {
1400  // Claim to be the initializer and do initializations
1401  int status;
1402  status = pthread_cond_init(&th->th.th_suspend_cv.c_cond,
1403  &__kmp_suspend_cond_attr);
1404  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1405  status = pthread_mutex_init(&th->th.th_suspend_mx.m_mutex,
1406  &__kmp_suspend_mutex_attr);
1407  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1408  KMP_ATOMIC_ST_REL(&th->th.th_suspend_init_count, new_value);
1409  ANNOTATE_HAPPENS_BEFORE(&th->th.th_suspend_init_count);
1410  }
1411 }
1412 
1413 void __kmp_suspend_uninitialize_thread(kmp_info_t *th) {
1414  if (KMP_ATOMIC_LD_ACQ(&th->th.th_suspend_init_count) > __kmp_fork_count) {
1415  /* this means we have initialize the suspension pthread objects for this
1416  thread in this instance of the process */
1417  int status;
1418 
1419  status = pthread_cond_destroy(&th->th.th_suspend_cv.c_cond);
1420  if (status != 0 && status != EBUSY) {
1421  KMP_SYSFAIL("pthread_cond_destroy", status);
1422  }
1423  status = pthread_mutex_destroy(&th->th.th_suspend_mx.m_mutex);
1424  if (status != 0 && status != EBUSY) {
1425  KMP_SYSFAIL("pthread_mutex_destroy", status);
1426  }
1427  --th->th.th_suspend_init_count;
1428  KMP_DEBUG_ASSERT(KMP_ATOMIC_LD_RLX(&th->th.th_suspend_init_count) ==
1429  __kmp_fork_count);
1430  }
1431 }
1432 
1433 // return true if lock obtained, false otherwise
1434 int __kmp_try_suspend_mx(kmp_info_t *th) {
1435  return (pthread_mutex_trylock(&th->th.th_suspend_mx.m_mutex) == 0);
1436 }
1437 
1438 void __kmp_lock_suspend_mx(kmp_info_t *th) {
1439  int status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1440  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1441 }
1442 
1443 void __kmp_unlock_suspend_mx(kmp_info_t *th) {
1444  int status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1445  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1446 }
1447 
1448 /* This routine puts the calling thread to sleep after setting the
1449  sleep bit for the indicated flag variable to true. */
1450 template <class C>
1451 static inline void __kmp_suspend_template(int th_gtid, C *flag) {
1452  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_suspend);
1453  kmp_info_t *th = __kmp_threads[th_gtid];
1454  int status;
1455  typename C::flag_t old_spin;
1456 
1457  KF_TRACE(30, ("__kmp_suspend_template: T#%d enter for flag = %p\n", th_gtid,
1458  flag->get()));
1459 
1460  __kmp_suspend_initialize_thread(th);
1461 
1462  status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1463  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1464 
1465  KF_TRACE(10, ("__kmp_suspend_template: T#%d setting sleep bit for spin(%p)\n",
1466  th_gtid, flag->get()));
1467 
1468  /* TODO: shouldn't this use release semantics to ensure that
1469  __kmp_suspend_initialize_thread gets called first? */
1470  old_spin = flag->set_sleeping();
1471  if (__kmp_dflt_blocktime == KMP_MAX_BLOCKTIME &&
1472  __kmp_pause_status != kmp_soft_paused) {
1473  flag->unset_sleeping();
1474  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1475  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1476  return;
1477  }
1478  KF_TRACE(5, ("__kmp_suspend_template: T#%d set sleep bit for spin(%p)==%x,"
1479  " was %x\n",
1480  th_gtid, flag->get(), flag->load(), old_spin));
1481 
1482  if (flag->done_check_val(old_spin)) {
1483  old_spin = flag->unset_sleeping();
1484  KF_TRACE(5, ("__kmp_suspend_template: T#%d false alarm, reset sleep bit "
1485  "for spin(%p)\n",
1486  th_gtid, flag->get()));
1487  } else {
1488  /* Encapsulate in a loop as the documentation states that this may
1489  "with low probability" return when the condition variable has
1490  not been signaled or broadcast */
1491  int deactivated = FALSE;
1492  TCW_PTR(th->th.th_sleep_loc, (void *)flag);
1493 
1494  while (flag->is_sleeping()) {
1495 #ifdef DEBUG_SUSPEND
1496  char buffer[128];
1497  __kmp_suspend_count++;
1498  __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1499  __kmp_printf("__kmp_suspend_template: suspending T#%d: %s\n", th_gtid,
1500  buffer);
1501 #endif
1502  // Mark the thread as no longer active (only in the first iteration of the
1503  // loop).
1504  if (!deactivated) {
1505  th->th.th_active = FALSE;
1506  if (th->th.th_active_in_pool) {
1507  th->th.th_active_in_pool = FALSE;
1508  KMP_ATOMIC_DEC(&__kmp_thread_pool_active_nth);
1509  KMP_DEBUG_ASSERT(TCR_4(__kmp_thread_pool_active_nth) >= 0);
1510  }
1511  deactivated = TRUE;
1512  }
1513 
1514 #if USE_SUSPEND_TIMEOUT
1515  struct timespec now;
1516  struct timeval tval;
1517  int msecs;
1518 
1519  status = gettimeofday(&tval, NULL);
1520  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1521  TIMEVAL_TO_TIMESPEC(&tval, &now);
1522 
1523  msecs = (4 * __kmp_dflt_blocktime) + 200;
1524  now.tv_sec += msecs / 1000;
1525  now.tv_nsec += (msecs % 1000) * 1000;
1526 
1527  KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform "
1528  "pthread_cond_timedwait\n",
1529  th_gtid));
1530  status = pthread_cond_timedwait(&th->th.th_suspend_cv.c_cond,
1531  &th->th.th_suspend_mx.m_mutex, &now);
1532 #else
1533  KF_TRACE(15, ("__kmp_suspend_template: T#%d about to perform"
1534  " pthread_cond_wait\n",
1535  th_gtid));
1536  status = pthread_cond_wait(&th->th.th_suspend_cv.c_cond,
1537  &th->th.th_suspend_mx.m_mutex);
1538 #endif
1539 
1540  if ((status != 0) && (status != EINTR) && (status != ETIMEDOUT)) {
1541  KMP_SYSFAIL("pthread_cond_wait", status);
1542  }
1543 #ifdef KMP_DEBUG
1544  if (status == ETIMEDOUT) {
1545  if (flag->is_sleeping()) {
1546  KF_TRACE(100,
1547  ("__kmp_suspend_template: T#%d timeout wakeup\n", th_gtid));
1548  } else {
1549  KF_TRACE(2, ("__kmp_suspend_template: T#%d timeout wakeup, sleep bit "
1550  "not set!\n",
1551  th_gtid));
1552  }
1553  } else if (flag->is_sleeping()) {
1554  KF_TRACE(100,
1555  ("__kmp_suspend_template: T#%d spurious wakeup\n", th_gtid));
1556  }
1557 #endif
1558  } // while
1559 
1560  // Mark the thread as active again (if it was previous marked as inactive)
1561  if (deactivated) {
1562  th->th.th_active = TRUE;
1563  if (TCR_4(th->th.th_in_pool)) {
1564  KMP_ATOMIC_INC(&__kmp_thread_pool_active_nth);
1565  th->th.th_active_in_pool = TRUE;
1566  }
1567  }
1568  }
1569 #ifdef DEBUG_SUSPEND
1570  {
1571  char buffer[128];
1572  __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1573  __kmp_printf("__kmp_suspend_template: T#%d has awakened: %s\n", th_gtid,
1574  buffer);
1575  }
1576 #endif
1577 
1578  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1579  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1580  KF_TRACE(30, ("__kmp_suspend_template: T#%d exit\n", th_gtid));
1581 }
1582 
1583 void __kmp_suspend_32(int th_gtid, kmp_flag_32 *flag) {
1584  __kmp_suspend_template(th_gtid, flag);
1585 }
1586 void __kmp_suspend_64(int th_gtid, kmp_flag_64 *flag) {
1587  __kmp_suspend_template(th_gtid, flag);
1588 }
1589 void __kmp_suspend_oncore(int th_gtid, kmp_flag_oncore *flag) {
1590  __kmp_suspend_template(th_gtid, flag);
1591 }
1592 
1593 /* This routine signals the thread specified by target_gtid to wake up
1594  after setting the sleep bit indicated by the flag argument to FALSE.
1595  The target thread must already have called __kmp_suspend_template() */
1596 template <class C>
1597 static inline void __kmp_resume_template(int target_gtid, C *flag) {
1598  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1599  kmp_info_t *th = __kmp_threads[target_gtid];
1600  int status;
1601 
1602 #ifdef KMP_DEBUG
1603  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1604 #endif
1605 
1606  KF_TRACE(30, ("__kmp_resume_template: T#%d wants to wakeup T#%d enter\n",
1607  gtid, target_gtid));
1608  KMP_DEBUG_ASSERT(gtid != target_gtid);
1609 
1610  __kmp_suspend_initialize_thread(th);
1611 
1612  status = pthread_mutex_lock(&th->th.th_suspend_mx.m_mutex);
1613  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1614 
1615  if (!flag) { // coming from __kmp_null_resume_wrapper
1616  flag = (C *)CCAST(void *, th->th.th_sleep_loc);
1617  }
1618 
1619  // First, check if the flag is null or its type has changed. If so, someone
1620  // else woke it up.
1621  if (!flag || flag->get_type() != flag->get_ptr_type()) { // get_ptr_type
1622  // simply shows what
1623  // flag was cast to
1624  KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1625  "awake: flag(%p)\n",
1626  gtid, target_gtid, NULL));
1627  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1628  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1629  return;
1630  } else { // if multiple threads are sleeping, flag should be internally
1631  // referring to a specific thread here
1632  typename C::flag_t old_spin = flag->unset_sleeping();
1633  if (!flag->is_sleeping_val(old_spin)) {
1634  KF_TRACE(5, ("__kmp_resume_template: T#%d exiting, thread T#%d already "
1635  "awake: flag(%p): "
1636  "%u => %u\n",
1637  gtid, target_gtid, flag->get(), old_spin, flag->load()));
1638  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1639  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1640  return;
1641  }
1642  KF_TRACE(5, ("__kmp_resume_template: T#%d about to wakeup T#%d, reset "
1643  "sleep bit for flag's loc(%p): "
1644  "%u => %u\n",
1645  gtid, target_gtid, flag->get(), old_spin, flag->load()));
1646  }
1647  TCW_PTR(th->th.th_sleep_loc, NULL);
1648 
1649 #ifdef DEBUG_SUSPEND
1650  {
1651  char buffer[128];
1652  __kmp_print_cond(buffer, &th->th.th_suspend_cv);
1653  __kmp_printf("__kmp_resume_template: T#%d resuming T#%d: %s\n", gtid,
1654  target_gtid, buffer);
1655  }
1656 #endif
1657  status = pthread_cond_signal(&th->th.th_suspend_cv.c_cond);
1658  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1659  status = pthread_mutex_unlock(&th->th.th_suspend_mx.m_mutex);
1660  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1661  KF_TRACE(30, ("__kmp_resume_template: T#%d exiting after signaling wake up"
1662  " for T#%d\n",
1663  gtid, target_gtid));
1664 }
1665 
1666 void __kmp_resume_32(int target_gtid, kmp_flag_32 *flag) {
1667  __kmp_resume_template(target_gtid, flag);
1668 }
1669 void __kmp_resume_64(int target_gtid, kmp_flag_64 *flag) {
1670  __kmp_resume_template(target_gtid, flag);
1671 }
1672 void __kmp_resume_oncore(int target_gtid, kmp_flag_oncore *flag) {
1673  __kmp_resume_template(target_gtid, flag);
1674 }
1675 
1676 #if KMP_USE_MONITOR
1677 void __kmp_resume_monitor() {
1678  KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(USER_resume);
1679  int status;
1680 #ifdef KMP_DEBUG
1681  int gtid = TCR_4(__kmp_init_gtid) ? __kmp_get_gtid() : -1;
1682  KF_TRACE(30, ("__kmp_resume_monitor: T#%d wants to wakeup T#%d enter\n", gtid,
1683  KMP_GTID_MONITOR));
1684  KMP_DEBUG_ASSERT(gtid != KMP_GTID_MONITOR);
1685 #endif
1686  status = pthread_mutex_lock(&__kmp_wait_mx.m_mutex);
1687  KMP_CHECK_SYSFAIL("pthread_mutex_lock", status);
1688 #ifdef DEBUG_SUSPEND
1689  {
1690  char buffer[128];
1691  __kmp_print_cond(buffer, &__kmp_wait_cv.c_cond);
1692  __kmp_printf("__kmp_resume_monitor: T#%d resuming T#%d: %s\n", gtid,
1693  KMP_GTID_MONITOR, buffer);
1694  }
1695 #endif
1696  status = pthread_cond_signal(&__kmp_wait_cv.c_cond);
1697  KMP_CHECK_SYSFAIL("pthread_cond_signal", status);
1698  status = pthread_mutex_unlock(&__kmp_wait_mx.m_mutex);
1699  KMP_CHECK_SYSFAIL("pthread_mutex_unlock", status);
1700  KF_TRACE(30, ("__kmp_resume_monitor: T#%d exiting after signaling wake up"
1701  " for T#%d\n",
1702  gtid, KMP_GTID_MONITOR));
1703 }
1704 #endif // KMP_USE_MONITOR
1705 
1706 void __kmp_yield() { sched_yield(); }
1707 
1708 void __kmp_gtid_set_specific(int gtid) {
1709  if (__kmp_init_gtid) {
1710  int status;
1711  status = pthread_setspecific(__kmp_gtid_threadprivate_key,
1712  (void *)(intptr_t)(gtid + 1));
1713  KMP_CHECK_SYSFAIL("pthread_setspecific", status);
1714  } else {
1715  KA_TRACE(50, ("__kmp_gtid_set_specific: runtime shutdown, returning\n"));
1716  }
1717 }
1718 
1719 int __kmp_gtid_get_specific() {
1720  int gtid;
1721  if (!__kmp_init_gtid) {
1722  KA_TRACE(50, ("__kmp_gtid_get_specific: runtime shutdown, returning "
1723  "KMP_GTID_SHUTDOWN\n"));
1724  return KMP_GTID_SHUTDOWN;
1725  }
1726  gtid = (int)(size_t)pthread_getspecific(__kmp_gtid_threadprivate_key);
1727  if (gtid == 0) {
1728  gtid = KMP_GTID_DNE;
1729  } else {
1730  gtid--;
1731  }
1732  KA_TRACE(50, ("__kmp_gtid_get_specific: key:%d gtid:%d\n",
1733  __kmp_gtid_threadprivate_key, gtid));
1734  return gtid;
1735 }
1736 
1737 double __kmp_read_cpu_time(void) {
1738  /*clock_t t;*/
1739  struct tms buffer;
1740 
1741  /*t =*/times(&buffer);
1742 
1743  return (buffer.tms_utime + buffer.tms_cutime) / (double)CLOCKS_PER_SEC;
1744 }
1745 
1746 int __kmp_read_system_info(struct kmp_sys_info *info) {
1747  int status;
1748  struct rusage r_usage;
1749 
1750  memset(info, 0, sizeof(*info));
1751 
1752  status = getrusage(RUSAGE_SELF, &r_usage);
1753  KMP_CHECK_SYSFAIL_ERRNO("getrusage", status);
1754 
1755  // The maximum resident set size utilized (in kilobytes)
1756  info->maxrss = r_usage.ru_maxrss;
1757  // The number of page faults serviced without any I/O
1758  info->minflt = r_usage.ru_minflt;
1759  // The number of page faults serviced that required I/O
1760  info->majflt = r_usage.ru_majflt;
1761  // The number of times a process was "swapped" out of memory
1762  info->nswap = r_usage.ru_nswap;
1763  // The number of times the file system had to perform input
1764  info->inblock = r_usage.ru_inblock;
1765  // The number of times the file system had to perform output
1766  info->oublock = r_usage.ru_oublock;
1767  // The number of times a context switch was voluntarily
1768  info->nvcsw = r_usage.ru_nvcsw;
1769  // The number of times a context switch was forced
1770  info->nivcsw = r_usage.ru_nivcsw;
1771 
1772  return (status != 0);
1773 }
1774 
1775 void __kmp_read_system_time(double *delta) {
1776  double t_ns;
1777  struct timeval tval;
1778  struct timespec stop;
1779  int status;
1780 
1781  status = gettimeofday(&tval, NULL);
1782  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1783  TIMEVAL_TO_TIMESPEC(&tval, &stop);
1784  t_ns = TS2NS(stop) - TS2NS(__kmp_sys_timer_data.start);
1785  *delta = (t_ns * 1e-9);
1786 }
1787 
1788 void __kmp_clear_system_time(void) {
1789  struct timeval tval;
1790  int status;
1791  status = gettimeofday(&tval, NULL);
1792  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1793  TIMEVAL_TO_TIMESPEC(&tval, &__kmp_sys_timer_data.start);
1794 }
1795 
1796 static int __kmp_get_xproc(void) {
1797 
1798  int r = 0;
1799 
1800 #if KMP_OS_LINUX || KMP_OS_DRAGONFLY || KMP_OS_FREEBSD || KMP_OS_NETBSD || \
1801  KMP_OS_OPENBSD || KMP_OS_HURD
1802 
1803  r = sysconf(_SC_NPROCESSORS_ONLN);
1804 
1805 #elif KMP_OS_DARWIN
1806 
1807  // Bug C77011 High "OpenMP Threads and number of active cores".
1808 
1809  // Find the number of available CPUs.
1810  kern_return_t rc;
1811  host_basic_info_data_t info;
1812  mach_msg_type_number_t num = HOST_BASIC_INFO_COUNT;
1813  rc = host_info(mach_host_self(), HOST_BASIC_INFO, (host_info_t)&info, &num);
1814  if (rc == 0 && num == HOST_BASIC_INFO_COUNT) {
1815  // Cannot use KA_TRACE() here because this code works before trace support
1816  // is initialized.
1817  r = info.avail_cpus;
1818  } else {
1819  KMP_WARNING(CantGetNumAvailCPU);
1820  KMP_INFORM(AssumedNumCPU);
1821  }
1822 
1823 #else
1824 
1825 #error "Unknown or unsupported OS."
1826 
1827 #endif
1828 
1829  return r > 0 ? r : 2; /* guess value of 2 if OS told us 0 */
1830 
1831 } // __kmp_get_xproc
1832 
1833 int __kmp_read_from_file(char const *path, char const *format, ...) {
1834  int result;
1835  va_list args;
1836 
1837  va_start(args, format);
1838  FILE *f = fopen(path, "rb");
1839  if (f == NULL)
1840  return 0;
1841  result = vfscanf(f, format, args);
1842  fclose(f);
1843 
1844  return result;
1845 }
1846 
1847 void __kmp_runtime_initialize(void) {
1848  int status;
1849  pthread_mutexattr_t mutex_attr;
1850  pthread_condattr_t cond_attr;
1851 
1852  if (__kmp_init_runtime) {
1853  return;
1854  }
1855 
1856 #if (KMP_ARCH_X86 || KMP_ARCH_X86_64)
1857  if (!__kmp_cpuinfo.initialized) {
1858  __kmp_query_cpuid(&__kmp_cpuinfo);
1859  }
1860 #endif /* KMP_ARCH_X86 || KMP_ARCH_X86_64 */
1861 
1862  __kmp_xproc = __kmp_get_xproc();
1863 
1864 #if ! KMP_32_BIT_ARCH
1865  struct rlimit rlim;
1866  // read stack size of calling thread, save it as default for worker threads;
1867  // this should be done before reading environment variables
1868  status = getrlimit(RLIMIT_STACK, &rlim);
1869  if (status == 0) { // success?
1870  __kmp_stksize = rlim.rlim_cur;
1871  __kmp_check_stksize(&__kmp_stksize); // check value and adjust if needed
1872  }
1873 #endif /* KMP_32_BIT_ARCH */
1874 
1875  if (sysconf(_SC_THREADS)) {
1876 
1877  /* Query the maximum number of threads */
1878  __kmp_sys_max_nth = sysconf(_SC_THREAD_THREADS_MAX);
1879  if (__kmp_sys_max_nth == -1) {
1880  /* Unlimited threads for NPTL */
1881  __kmp_sys_max_nth = INT_MAX;
1882  } else if (__kmp_sys_max_nth <= 1) {
1883  /* Can't tell, just use PTHREAD_THREADS_MAX */
1884  __kmp_sys_max_nth = KMP_MAX_NTH;
1885  }
1886 
1887  /* Query the minimum stack size */
1888  __kmp_sys_min_stksize = sysconf(_SC_THREAD_STACK_MIN);
1889  if (__kmp_sys_min_stksize <= 1) {
1890  __kmp_sys_min_stksize = KMP_MIN_STKSIZE;
1891  }
1892  }
1893 
1894  /* Set up minimum number of threads to switch to TLS gtid */
1895  __kmp_tls_gtid_min = KMP_TLS_GTID_MIN;
1896 
1897  status = pthread_key_create(&__kmp_gtid_threadprivate_key,
1898  __kmp_internal_end_dest);
1899  KMP_CHECK_SYSFAIL("pthread_key_create", status);
1900  status = pthread_mutexattr_init(&mutex_attr);
1901  KMP_CHECK_SYSFAIL("pthread_mutexattr_init", status);
1902  status = pthread_mutex_init(&__kmp_wait_mx.m_mutex, &mutex_attr);
1903  KMP_CHECK_SYSFAIL("pthread_mutex_init", status);
1904  status = pthread_condattr_init(&cond_attr);
1905  KMP_CHECK_SYSFAIL("pthread_condattr_init", status);
1906  status = pthread_cond_init(&__kmp_wait_cv.c_cond, &cond_attr);
1907  KMP_CHECK_SYSFAIL("pthread_cond_init", status);
1908 #if USE_ITT_BUILD
1909  __kmp_itt_initialize();
1910 #endif /* USE_ITT_BUILD */
1911 
1912  __kmp_init_runtime = TRUE;
1913 }
1914 
1915 void __kmp_runtime_destroy(void) {
1916  int status;
1917 
1918  if (!__kmp_init_runtime) {
1919  return; // Nothing to do.
1920  }
1921 
1922 #if USE_ITT_BUILD
1923  __kmp_itt_destroy();
1924 #endif /* USE_ITT_BUILD */
1925 
1926  status = pthread_key_delete(__kmp_gtid_threadprivate_key);
1927  KMP_CHECK_SYSFAIL("pthread_key_delete", status);
1928 
1929  status = pthread_mutex_destroy(&__kmp_wait_mx.m_mutex);
1930  if (status != 0 && status != EBUSY) {
1931  KMP_SYSFAIL("pthread_mutex_destroy", status);
1932  }
1933  status = pthread_cond_destroy(&__kmp_wait_cv.c_cond);
1934  if (status != 0 && status != EBUSY) {
1935  KMP_SYSFAIL("pthread_cond_destroy", status);
1936  }
1937 #if KMP_AFFINITY_SUPPORTED
1938  __kmp_affinity_uninitialize();
1939 #endif
1940 
1941  __kmp_init_runtime = FALSE;
1942 }
1943 
1944 /* Put the thread to sleep for a time period */
1945 /* NOTE: not currently used anywhere */
1946 void __kmp_thread_sleep(int millis) { sleep((millis + 500) / 1000); }
1947 
1948 /* Calculate the elapsed wall clock time for the user */
1949 void __kmp_elapsed(double *t) {
1950  int status;
1951 #ifdef FIX_SGI_CLOCK
1952  struct timespec ts;
1953 
1954  status = clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts);
1955  KMP_CHECK_SYSFAIL_ERRNO("clock_gettime", status);
1956  *t =
1957  (double)ts.tv_nsec * (1.0 / (double)KMP_NSEC_PER_SEC) + (double)ts.tv_sec;
1958 #else
1959  struct timeval tv;
1960 
1961  status = gettimeofday(&tv, NULL);
1962  KMP_CHECK_SYSFAIL_ERRNO("gettimeofday", status);
1963  *t =
1964  (double)tv.tv_usec * (1.0 / (double)KMP_USEC_PER_SEC) + (double)tv.tv_sec;
1965 #endif
1966 }
1967 
1968 /* Calculate the elapsed wall clock tick for the user */
1969 void __kmp_elapsed_tick(double *t) { *t = 1 / (double)CLOCKS_PER_SEC; }
1970 
1971 /* Return the current time stamp in nsec */
1972 kmp_uint64 __kmp_now_nsec() {
1973  struct timeval t;
1974  gettimeofday(&t, NULL);
1975  kmp_uint64 nsec = (kmp_uint64)KMP_NSEC_PER_SEC * (kmp_uint64)t.tv_sec +
1976  (kmp_uint64)1000 * (kmp_uint64)t.tv_usec;
1977  return nsec;
1978 }
1979 
1980 #if KMP_ARCH_X86 || KMP_ARCH_X86_64
1981 /* Measure clock ticks per millisecond */
1982 void __kmp_initialize_system_tick() {
1983  kmp_uint64 now, nsec2, diff;
1984  kmp_uint64 delay = 100000; // 50~100 usec on most machines.
1985  kmp_uint64 nsec = __kmp_now_nsec();
1986  kmp_uint64 goal = __kmp_hardware_timestamp() + delay;
1987  while ((now = __kmp_hardware_timestamp()) < goal)
1988  ;
1989  nsec2 = __kmp_now_nsec();
1990  diff = nsec2 - nsec;
1991  if (diff > 0) {
1992  kmp_uint64 tpms = (kmp_uint64)(1e6 * (delay + (now - goal)) / diff);
1993  if (tpms > 0)
1994  __kmp_ticks_per_msec = tpms;
1995  }
1996 }
1997 #endif
1998 
1999 /* Determine whether the given address is mapped into the current address
2000  space. */
2001 
2002 int __kmp_is_address_mapped(void *addr) {
2003 
2004  int found = 0;
2005  int rc;
2006 
2007 #if KMP_OS_LINUX || KMP_OS_HURD
2008 
2009  /* On GNUish OSes, read the /proc/<pid>/maps pseudo-file to get all the address
2010  ranges mapped into the address space. */
2011 
2012  char *name = __kmp_str_format("/proc/%d/maps", getpid());
2013  FILE *file = NULL;
2014 
2015  file = fopen(name, "r");
2016  KMP_ASSERT(file != NULL);
2017 
2018  for (;;) {
2019 
2020  void *beginning = NULL;
2021  void *ending = NULL;
2022  char perms[5];
2023 
2024  rc = fscanf(file, "%p-%p %4s %*[^\n]\n", &beginning, &ending, perms);
2025  if (rc == EOF) {
2026  break;
2027  }
2028  KMP_ASSERT(rc == 3 &&
2029  KMP_STRLEN(perms) == 4); // Make sure all fields are read.
2030 
2031  // Ending address is not included in the region, but beginning is.
2032  if ((addr >= beginning) && (addr < ending)) {
2033  perms[2] = 0; // 3th and 4th character does not matter.
2034  if (strcmp(perms, "rw") == 0) {
2035  // Memory we are looking for should be readable and writable.
2036  found = 1;
2037  }
2038  break;
2039  }
2040  }
2041 
2042  // Free resources.
2043  fclose(file);
2044  KMP_INTERNAL_FREE(name);
2045 #elif KMP_OS_FREEBSD
2046  char *buf;
2047  size_t lstsz;
2048  int mib[] = {CTL_KERN, KERN_PROC, KERN_PROC_VMMAP, getpid()};
2049  rc = sysctl(mib, 4, NULL, &lstsz, NULL, 0);
2050  if (rc < 0)
2051  return 0;
2052  // We pass from number of vm entry's semantic
2053  // to size of whole entry map list.
2054  lstsz = lstsz * 4 / 3;
2055  buf = reinterpret_cast<char *>(kmpc_malloc(lstsz));
2056  rc = sysctl(mib, 4, buf, &lstsz, NULL, 0);
2057  if (rc < 0) {
2058  kmpc_free(buf);
2059  return 0;
2060  }
2061 
2062  char *lw = buf;
2063  char *up = buf + lstsz;
2064 
2065  while (lw < up) {
2066  struct kinfo_vmentry *cur = reinterpret_cast<struct kinfo_vmentry *>(lw);
2067  size_t cursz = cur->kve_structsize;
2068  if (cursz == 0)
2069  break;
2070  void *start = reinterpret_cast<void *>(cur->kve_start);
2071  void *end = reinterpret_cast<void *>(cur->kve_end);
2072  // Readable/Writable addresses within current map entry
2073  if ((addr >= start) && (addr < end)) {
2074  if ((cur->kve_protection & KVME_PROT_READ) != 0 &&
2075  (cur->kve_protection & KVME_PROT_WRITE) != 0) {
2076  found = 1;
2077  break;
2078  }
2079  }
2080  lw += cursz;
2081  }
2082  kmpc_free(buf);
2083 
2084 #elif KMP_OS_DARWIN
2085 
2086  /* On OS X*, /proc pseudo filesystem is not available. Try to read memory
2087  using vm interface. */
2088 
2089  int buffer;
2090  vm_size_t count;
2091  rc = vm_read_overwrite(
2092  mach_task_self(), // Task to read memory of.
2093  (vm_address_t)(addr), // Address to read from.
2094  1, // Number of bytes to be read.
2095  (vm_address_t)(&buffer), // Address of buffer to save read bytes in.
2096  &count // Address of var to save number of read bytes in.
2097  );
2098  if (rc == 0) {
2099  // Memory successfully read.
2100  found = 1;
2101  }
2102 
2103 #elif KMP_OS_NETBSD
2104 
2105  int mib[5];
2106  mib[0] = CTL_VM;
2107  mib[1] = VM_PROC;
2108  mib[2] = VM_PROC_MAP;
2109  mib[3] = getpid();
2110  mib[4] = sizeof(struct kinfo_vmentry);
2111 
2112  size_t size;
2113  rc = sysctl(mib, __arraycount(mib), NULL, &size, NULL, 0);
2114  KMP_ASSERT(!rc);
2115  KMP_ASSERT(size);
2116 
2117  size = size * 4 / 3;
2118  struct kinfo_vmentry *kiv = (struct kinfo_vmentry *)KMP_INTERNAL_MALLOC(size);
2119  KMP_ASSERT(kiv);
2120 
2121  rc = sysctl(mib, __arraycount(mib), kiv, &size, NULL, 0);
2122  KMP_ASSERT(!rc);
2123  KMP_ASSERT(size);
2124 
2125  for (size_t i = 0; i < size; i++) {
2126  if (kiv[i].kve_start >= (uint64_t)addr &&
2127  kiv[i].kve_end <= (uint64_t)addr) {
2128  found = 1;
2129  break;
2130  }
2131  }
2132  KMP_INTERNAL_FREE(kiv);
2133 #elif KMP_OS_OPENBSD
2134 
2135  int mib[3];
2136  mib[0] = CTL_KERN;
2137  mib[1] = KERN_PROC_VMMAP;
2138  mib[2] = getpid();
2139 
2140  size_t size;
2141  uint64_t end;
2142  rc = sysctl(mib, 3, NULL, &size, NULL, 0);
2143  KMP_ASSERT(!rc);
2144  KMP_ASSERT(size);
2145  end = size;
2146 
2147  struct kinfo_vmentry kiv = {.kve_start = 0};
2148 
2149  while ((rc = sysctl(mib, 3, &kiv, &size, NULL, 0)) == 0) {
2150  KMP_ASSERT(size);
2151  if (kiv.kve_end == end)
2152  break;
2153 
2154  if (kiv.kve_start >= (uint64_t)addr && kiv.kve_end <= (uint64_t)addr) {
2155  found = 1;
2156  break;
2157  }
2158  kiv.kve_start += 1;
2159  }
2160 #elif KMP_OS_DRAGONFLY
2161 
2162  // FIXME(DragonFly): Implement this
2163  found = 1;
2164 
2165 #else
2166 
2167 #error "Unknown or unsupported OS"
2168 
2169 #endif
2170 
2171  return found;
2172 
2173 } // __kmp_is_address_mapped
2174 
2175 #ifdef USE_LOAD_BALANCE
2176 
2177 #if KMP_OS_DARWIN || KMP_OS_NETBSD
2178 
2179 // The function returns the rounded value of the system load average
2180 // during given time interval which depends on the value of
2181 // __kmp_load_balance_interval variable (default is 60 sec, other values
2182 // may be 300 sec or 900 sec).
2183 // It returns -1 in case of error.
2184 int __kmp_get_load_balance(int max) {
2185  double averages[3];
2186  int ret_avg = 0;
2187 
2188  int res = getloadavg(averages, 3);
2189 
2190  // Check __kmp_load_balance_interval to determine which of averages to use.
2191  // getloadavg() may return the number of samples less than requested that is
2192  // less than 3.
2193  if (__kmp_load_balance_interval < 180 && (res >= 1)) {
2194  ret_avg = averages[0]; // 1 min
2195  } else if ((__kmp_load_balance_interval >= 180 &&
2196  __kmp_load_balance_interval < 600) &&
2197  (res >= 2)) {
2198  ret_avg = averages[1]; // 5 min
2199  } else if ((__kmp_load_balance_interval >= 600) && (res == 3)) {
2200  ret_avg = averages[2]; // 15 min
2201  } else { // Error occurred
2202  return -1;
2203  }
2204 
2205  return ret_avg;
2206 }
2207 
2208 #else // Linux* OS
2209 
2210 // The fuction returns number of running (not sleeping) threads, or -1 in case
2211 // of error. Error could be reported if Linux* OS kernel too old (without
2212 // "/proc" support). Counting running threads stops if max running threads
2213 // encountered.
2214 int __kmp_get_load_balance(int max) {
2215  static int permanent_error = 0;
2216  static int glb_running_threads = 0; // Saved count of the running threads for
2217  // the thread balance algorithm
2218  static double glb_call_time = 0; /* Thread balance algorithm call time */
2219 
2220  int running_threads = 0; // Number of running threads in the system.
2221 
2222  DIR *proc_dir = NULL; // Handle of "/proc/" directory.
2223  struct dirent *proc_entry = NULL;
2224 
2225  kmp_str_buf_t task_path; // "/proc/<pid>/task/<tid>/" path.
2226  DIR *task_dir = NULL; // Handle of "/proc/<pid>/task/<tid>/" directory.
2227  struct dirent *task_entry = NULL;
2228  int task_path_fixed_len;
2229 
2230  kmp_str_buf_t stat_path; // "/proc/<pid>/task/<tid>/stat" path.
2231  int stat_file = -1;
2232  int stat_path_fixed_len;
2233 
2234  int total_processes = 0; // Total number of processes in system.
2235  int total_threads = 0; // Total number of threads in system.
2236 
2237  double call_time = 0.0;
2238 
2239  __kmp_str_buf_init(&task_path);
2240  __kmp_str_buf_init(&stat_path);
2241 
2242  __kmp_elapsed(&call_time);
2243 
2244  if (glb_call_time &&
2245  (call_time - glb_call_time < __kmp_load_balance_interval)) {
2246  running_threads = glb_running_threads;
2247  goto finish;
2248  }
2249 
2250  glb_call_time = call_time;
2251 
2252  // Do not spend time on scanning "/proc/" if we have a permanent error.
2253  if (permanent_error) {
2254  running_threads = -1;
2255  goto finish;
2256  }
2257 
2258  if (max <= 0) {
2259  max = INT_MAX;
2260  }
2261 
2262  // Open "/proc/" directory.
2263  proc_dir = opendir("/proc");
2264  if (proc_dir == NULL) {
2265  // Cannot open "/prroc/". Probably the kernel does not support it. Return an
2266  // error now and in subsequent calls.
2267  running_threads = -1;
2268  permanent_error = 1;
2269  goto finish;
2270  }
2271 
2272  // Initialize fixed part of task_path. This part will not change.
2273  __kmp_str_buf_cat(&task_path, "/proc/", 6);
2274  task_path_fixed_len = task_path.used; // Remember number of used characters.
2275 
2276  proc_entry = readdir(proc_dir);
2277  while (proc_entry != NULL) {
2278  // Proc entry is a directory and name starts with a digit. Assume it is a
2279  // process' directory.
2280  if (proc_entry->d_type == DT_DIR && isdigit(proc_entry->d_name[0])) {
2281 
2282  ++total_processes;
2283  // Make sure init process is the very first in "/proc", so we can replace
2284  // strcmp( proc_entry->d_name, "1" ) == 0 with simpler total_processes ==
2285  // 1. We are going to check that total_processes == 1 => d_name == "1" is
2286  // true (where "=>" is implication). Since C++ does not have => operator,
2287  // let us replace it with its equivalent: a => b == ! a || b.
2288  KMP_DEBUG_ASSERT(total_processes != 1 ||
2289  strcmp(proc_entry->d_name, "1") == 0);
2290 
2291  // Construct task_path.
2292  task_path.used = task_path_fixed_len; // Reset task_path to "/proc/".
2293  __kmp_str_buf_cat(&task_path, proc_entry->d_name,
2294  KMP_STRLEN(proc_entry->d_name));
2295  __kmp_str_buf_cat(&task_path, "/task", 5);
2296 
2297  task_dir = opendir(task_path.str);
2298  if (task_dir == NULL) {
2299  // Process can finish between reading "/proc/" directory entry and
2300  // opening process' "task/" directory. So, in general case we should not
2301  // complain, but have to skip this process and read the next one. But on
2302  // systems with no "task/" support we will spend lot of time to scan
2303  // "/proc/" tree again and again without any benefit. "init" process
2304  // (its pid is 1) should exist always, so, if we cannot open
2305  // "/proc/1/task/" directory, it means "task/" is not supported by
2306  // kernel. Report an error now and in the future.
2307  if (strcmp(proc_entry->d_name, "1") == 0) {
2308  running_threads = -1;
2309  permanent_error = 1;
2310  goto finish;
2311  }
2312  } else {
2313  // Construct fixed part of stat file path.
2314  __kmp_str_buf_clear(&stat_path);
2315  __kmp_str_buf_cat(&stat_path, task_path.str, task_path.used);
2316  __kmp_str_buf_cat(&stat_path, "/", 1);
2317  stat_path_fixed_len = stat_path.used;
2318 
2319  task_entry = readdir(task_dir);
2320  while (task_entry != NULL) {
2321  // It is a directory and name starts with a digit.
2322  if (proc_entry->d_type == DT_DIR && isdigit(task_entry->d_name[0])) {
2323  ++total_threads;
2324 
2325  // Construct complete stat file path. Easiest way would be:
2326  // __kmp_str_buf_print( & stat_path, "%s/%s/stat", task_path.str,
2327  // task_entry->d_name );
2328  // but seriae of __kmp_str_buf_cat works a bit faster.
2329  stat_path.used =
2330  stat_path_fixed_len; // Reset stat path to its fixed part.
2331  __kmp_str_buf_cat(&stat_path, task_entry->d_name,
2332  KMP_STRLEN(task_entry->d_name));
2333  __kmp_str_buf_cat(&stat_path, "/stat", 5);
2334 
2335  // Note: Low-level API (open/read/close) is used. High-level API
2336  // (fopen/fclose) works ~ 30 % slower.
2337  stat_file = open(stat_path.str, O_RDONLY);
2338  if (stat_file == -1) {
2339  // We cannot report an error because task (thread) can terminate
2340  // just before reading this file.
2341  } else {
2342  /* Content of "stat" file looks like:
2343  24285 (program) S ...
2344 
2345  It is a single line (if program name does not include funny
2346  symbols). First number is a thread id, then name of executable
2347  file name in paretheses, then state of the thread. We need just
2348  thread state.
2349 
2350  Good news: Length of program name is 15 characters max. Longer
2351  names are truncated.
2352 
2353  Thus, we need rather short buffer: 15 chars for program name +
2354  2 parenthesis, + 3 spaces + ~7 digits of pid = 37.
2355 
2356  Bad news: Program name may contain special symbols like space,
2357  closing parenthesis, or even new line. This makes parsing
2358  "stat" file not 100 % reliable. In case of fanny program names
2359  parsing may fail (report incorrect thread state).
2360 
2361  Parsing "status" file looks more promissing (due to different
2362  file structure and escaping special symbols) but reading and
2363  parsing of "status" file works slower.
2364  -- ln
2365  */
2366  char buffer[65];
2367  int len;
2368  len = read(stat_file, buffer, sizeof(buffer) - 1);
2369  if (len >= 0) {
2370  buffer[len] = 0;
2371  // Using scanf:
2372  // sscanf( buffer, "%*d (%*s) %c ", & state );
2373  // looks very nice, but searching for a closing parenthesis
2374  // works a bit faster.
2375  char *close_parent = strstr(buffer, ") ");
2376  if (close_parent != NULL) {
2377  char state = *(close_parent + 2);
2378  if (state == 'R') {
2379  ++running_threads;
2380  if (running_threads >= max) {
2381  goto finish;
2382  }
2383  }
2384  }
2385  }
2386  close(stat_file);
2387  stat_file = -1;
2388  }
2389  }
2390  task_entry = readdir(task_dir);
2391  }
2392  closedir(task_dir);
2393  task_dir = NULL;
2394  }
2395  }
2396  proc_entry = readdir(proc_dir);
2397  }
2398 
2399  // There _might_ be a timing hole where the thread executing this
2400  // code get skipped in the load balance, and running_threads is 0.
2401  // Assert in the debug builds only!!!
2402  KMP_DEBUG_ASSERT(running_threads > 0);
2403  if (running_threads <= 0) {
2404  running_threads = 1;
2405  }
2406 
2407 finish: // Clean up and exit.
2408  if (proc_dir != NULL) {
2409  closedir(proc_dir);
2410  }
2411  __kmp_str_buf_free(&task_path);
2412  if (task_dir != NULL) {
2413  closedir(task_dir);
2414  }
2415  __kmp_str_buf_free(&stat_path);
2416  if (stat_file != -1) {
2417  close(stat_file);
2418  }
2419 
2420  glb_running_threads = running_threads;
2421 
2422  return running_threads;
2423 
2424 } // __kmp_get_load_balance
2425 
2426 #endif // KMP_OS_DARWIN
2427 
2428 #endif // USE_LOAD_BALANCE
2429 
2430 #if !(KMP_ARCH_X86 || KMP_ARCH_X86_64 || KMP_MIC || \
2431  ((KMP_OS_LINUX || KMP_OS_DARWIN) && KMP_ARCH_AARCH64) || \
2432  KMP_ARCH_PPC64 || KMP_ARCH_RISCV64)
2433 
2434 // we really only need the case with 1 argument, because CLANG always build
2435 // a struct of pointers to shared variables referenced in the outlined function
2436 int __kmp_invoke_microtask(microtask_t pkfn, int gtid, int tid, int argc,
2437  void *p_argv[]
2438 #if OMPT_SUPPORT
2439  ,
2440  void **exit_frame_ptr
2441 #endif
2442  ) {
2443 #if OMPT_SUPPORT
2444  *exit_frame_ptr = OMPT_GET_FRAME_ADDRESS(0);
2445 #endif
2446 
2447  switch (argc) {
2448  default:
2449  fprintf(stderr, "Too many args to microtask: %d!\n", argc);
2450  fflush(stderr);
2451  exit(-1);
2452  case 0:
2453  (*pkfn)(&gtid, &tid);
2454  break;
2455  case 1:
2456  (*pkfn)(&gtid, &tid, p_argv[0]);
2457  break;
2458  case 2:
2459  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1]);
2460  break;
2461  case 3:
2462  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2]);
2463  break;
2464  case 4:
2465  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3]);
2466  break;
2467  case 5:
2468  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4]);
2469  break;
2470  case 6:
2471  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2472  p_argv[5]);
2473  break;
2474  case 7:
2475  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2476  p_argv[5], p_argv[6]);
2477  break;
2478  case 8:
2479  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2480  p_argv[5], p_argv[6], p_argv[7]);
2481  break;
2482  case 9:
2483  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2484  p_argv[5], p_argv[6], p_argv[7], p_argv[8]);
2485  break;
2486  case 10:
2487  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2488  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9]);
2489  break;
2490  case 11:
2491  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2492  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10]);
2493  break;
2494  case 12:
2495  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2496  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2497  p_argv[11]);
2498  break;
2499  case 13:
2500  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2501  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2502  p_argv[11], p_argv[12]);
2503  break;
2504  case 14:
2505  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2506  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2507  p_argv[11], p_argv[12], p_argv[13]);
2508  break;
2509  case 15:
2510  (*pkfn)(&gtid, &tid, p_argv[0], p_argv[1], p_argv[2], p_argv[3], p_argv[4],
2511  p_argv[5], p_argv[6], p_argv[7], p_argv[8], p_argv[9], p_argv[10],
2512  p_argv[11], p_argv[12], p_argv[13], p_argv[14]);
2513  break;
2514  }
2515 
2516  return 1;
2517 }
2518 
2519 #endif
2520 
2521 // end of file //
KMP_INIT_PARTITIONED_TIMERS
#define KMP_INIT_PARTITIONED_TIMERS(name)
Initializes the paritioned timers to begin with name.
Definition: kmp_stats.h:929