LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 //===----------------------------------------------------------------------===//
6 //
7 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
8 // See https://llvm.org/LICENSE.txt for license information.
9 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
10 //
11 //===----------------------------------------------------------------------===//
12 
13 #ifndef KMP_AFFINITY_H
14 #define KMP_AFFINITY_H
15 
16 #include "kmp.h"
17 #include "kmp_os.h"
18 
19 #if KMP_AFFINITY_SUPPORTED
20 #if KMP_USE_HWLOC
21 class KMPHwlocAffinity : public KMPAffinity {
22 public:
23  class Mask : public KMPAffinity::Mask {
24  hwloc_cpuset_t mask;
25 
26  public:
27  Mask() {
28  mask = hwloc_bitmap_alloc();
29  this->zero();
30  }
31  ~Mask() { hwloc_bitmap_free(mask); }
32  void set(int i) override { hwloc_bitmap_set(mask, i); }
33  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
34  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
35  void zero() override { hwloc_bitmap_zero(mask); }
36  void copy(const KMPAffinity::Mask *src) override {
37  const Mask *convert = static_cast<const Mask *>(src);
38  hwloc_bitmap_copy(mask, convert->mask);
39  }
40  void bitwise_and(const KMPAffinity::Mask *rhs) override {
41  const Mask *convert = static_cast<const Mask *>(rhs);
42  hwloc_bitmap_and(mask, mask, convert->mask);
43  }
44  void bitwise_or(const KMPAffinity::Mask *rhs) override {
45  const Mask *convert = static_cast<const Mask *>(rhs);
46  hwloc_bitmap_or(mask, mask, convert->mask);
47  }
48  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
49  int begin() const override { return hwloc_bitmap_first(mask); }
50  int end() const override { return -1; }
51  int next(int previous) const override {
52  return hwloc_bitmap_next(mask, previous);
53  }
54  int get_system_affinity(bool abort_on_error) override {
55  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
56  "Illegal get affinity operation when not capable");
57  int retval =
58  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
59  if (retval >= 0) {
60  return 0;
61  }
62  int error = errno;
63  if (abort_on_error) {
64  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
65  }
66  return error;
67  }
68  int set_system_affinity(bool abort_on_error) const override {
69  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
70  "Illegal get affinity operation when not capable");
71  int retval =
72  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
73  if (retval >= 0) {
74  return 0;
75  }
76  int error = errno;
77  if (abort_on_error) {
78  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
79  }
80  return error;
81  }
82  int get_proc_group() const override {
83  int group = -1;
84 #if KMP_OS_WINDOWS
85  if (__kmp_num_proc_groups == 1) {
86  return 1;
87  }
88  for (int i = 0; i < __kmp_num_proc_groups; i++) {
89  // On windows, the long type is always 32 bits
90  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
91  unsigned long second_32_bits =
92  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
93  if (first_32_bits == 0 && second_32_bits == 0) {
94  continue;
95  }
96  if (group >= 0) {
97  return -1;
98  }
99  group = i;
100  }
101 #endif /* KMP_OS_WINDOWS */
102  return group;
103  }
104  };
105  void determine_capable(const char *var) override {
106  const hwloc_topology_support *topology_support;
107  if (__kmp_hwloc_topology == NULL) {
108  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
109  __kmp_hwloc_error = TRUE;
110  if (__kmp_affinity_verbose)
111  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
112  }
113  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
114  __kmp_hwloc_error = TRUE;
115  if (__kmp_affinity_verbose)
116  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
117  }
118  }
119  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
120  // Is the system capable of setting/getting this thread's affinity?
121  // Also, is topology discovery possible? (pu indicates ability to discover
122  // processing units). And finally, were there no errors when calling any
123  // hwloc_* API functions?
124  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
125  topology_support->cpubind->get_thisthread_cpubind &&
126  topology_support->discovery->pu && !__kmp_hwloc_error) {
127  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
128  KMP_AFFINITY_ENABLE(TRUE);
129  } else {
130  // indicate that hwloc didn't work and disable affinity
131  __kmp_hwloc_error = TRUE;
132  KMP_AFFINITY_DISABLE();
133  }
134  }
135  void bind_thread(int which) override {
136  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
137  "Illegal set affinity operation when not capable");
138  KMPAffinity::Mask *mask;
139  KMP_CPU_ALLOC_ON_STACK(mask);
140  KMP_CPU_ZERO(mask);
141  KMP_CPU_SET(which, mask);
142  __kmp_set_system_affinity(mask, TRUE);
143  KMP_CPU_FREE_FROM_STACK(mask);
144  }
145  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
146  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
147  KMPAffinity::Mask *allocate_mask_array(int num) override {
148  return new Mask[num];
149  }
150  void deallocate_mask_array(KMPAffinity::Mask *array) override {
151  Mask *hwloc_array = static_cast<Mask *>(array);
152  delete[] hwloc_array;
153  }
154  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
155  int index) override {
156  Mask *hwloc_array = static_cast<Mask *>(array);
157  return &(hwloc_array[index]);
158  }
159  api_type get_api_type() const override { return HWLOC; }
160 };
161 #endif /* KMP_USE_HWLOC */
162 
163 #if KMP_OS_LINUX || KMP_OS_FREEBSD
164 #if KMP_OS_LINUX
165 /* On some of the older OS's that we build on, these constants aren't present
166  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
167  all systems of the same arch where they are defined, and they cannot change.
168  stone forever. */
169 #include <sys/syscall.h>
170 #if KMP_ARCH_X86 || KMP_ARCH_ARM
171 #ifndef __NR_sched_setaffinity
172 #define __NR_sched_setaffinity 241
173 #elif __NR_sched_setaffinity != 241
174 #error Wrong code for setaffinity system call.
175 #endif /* __NR_sched_setaffinity */
176 #ifndef __NR_sched_getaffinity
177 #define __NR_sched_getaffinity 242
178 #elif __NR_sched_getaffinity != 242
179 #error Wrong code for getaffinity system call.
180 #endif /* __NR_sched_getaffinity */
181 #elif KMP_ARCH_AARCH64
182 #ifndef __NR_sched_setaffinity
183 #define __NR_sched_setaffinity 122
184 #elif __NR_sched_setaffinity != 122
185 #error Wrong code for setaffinity system call.
186 #endif /* __NR_sched_setaffinity */
187 #ifndef __NR_sched_getaffinity
188 #define __NR_sched_getaffinity 123
189 #elif __NR_sched_getaffinity != 123
190 #error Wrong code for getaffinity system call.
191 #endif /* __NR_sched_getaffinity */
192 #elif KMP_ARCH_X86_64
193 #ifndef __NR_sched_setaffinity
194 #define __NR_sched_setaffinity 203
195 #elif __NR_sched_setaffinity != 203
196 #error Wrong code for setaffinity system call.
197 #endif /* __NR_sched_setaffinity */
198 #ifndef __NR_sched_getaffinity
199 #define __NR_sched_getaffinity 204
200 #elif __NR_sched_getaffinity != 204
201 #error Wrong code for getaffinity system call.
202 #endif /* __NR_sched_getaffinity */
203 #elif KMP_ARCH_PPC64
204 #ifndef __NR_sched_setaffinity
205 #define __NR_sched_setaffinity 222
206 #elif __NR_sched_setaffinity != 222
207 #error Wrong code for setaffinity system call.
208 #endif /* __NR_sched_setaffinity */
209 #ifndef __NR_sched_getaffinity
210 #define __NR_sched_getaffinity 223
211 #elif __NR_sched_getaffinity != 223
212 #error Wrong code for getaffinity system call.
213 #endif /* __NR_sched_getaffinity */
214 # elif KMP_ARCH_MIPS
215 # ifndef __NR_sched_setaffinity
216 # define __NR_sched_setaffinity 4239
217 # elif __NR_sched_setaffinity != 4239
218 # error Wrong code for setaffinity system call.
219 # endif /* __NR_sched_setaffinity */
220 # ifndef __NR_sched_getaffinity
221 # define __NR_sched_getaffinity 4240
222 # elif __NR_sched_getaffinity != 4240
223 # error Wrong code for getaffinity system call.
224 # endif /* __NR_sched_getaffinity */
225 # elif KMP_ARCH_MIPS64
226 # ifndef __NR_sched_setaffinity
227 # define __NR_sched_setaffinity 5195
228 # elif __NR_sched_setaffinity != 5195
229 # error Wrong code for setaffinity system call.
230 # endif /* __NR_sched_setaffinity */
231 # ifndef __NR_sched_getaffinity
232 # define __NR_sched_getaffinity 5196
233 # elif __NR_sched_getaffinity != 5196
234 # error Wrong code for getaffinity system call.
235 # endif /* __NR_sched_getaffinity */
236 # else
237 #error Unknown or unsupported architecture
238 #endif /* KMP_ARCH_* */
239 #elif KMP_OS_FREEBSD
240 #include <pthread.h>
241 #include <pthread_np.h>
242 #endif
243 class KMPNativeAffinity : public KMPAffinity {
244  class Mask : public KMPAffinity::Mask {
245  typedef unsigned char mask_t;
246  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
247 
248  public:
249  mask_t *mask;
250  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
251  ~Mask() {
252  if (mask)
253  __kmp_free(mask);
254  }
255  void set(int i) override {
256  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
257  }
258  bool is_set(int i) const override {
259  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
260  }
261  void clear(int i) override {
262  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
263  }
264  void zero() override {
265  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
266  mask[i] = 0;
267  }
268  void copy(const KMPAffinity::Mask *src) override {
269  const Mask *convert = static_cast<const Mask *>(src);
270  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
271  mask[i] = convert->mask[i];
272  }
273  void bitwise_and(const KMPAffinity::Mask *rhs) override {
274  const Mask *convert = static_cast<const Mask *>(rhs);
275  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
276  mask[i] &= convert->mask[i];
277  }
278  void bitwise_or(const KMPAffinity::Mask *rhs) override {
279  const Mask *convert = static_cast<const Mask *>(rhs);
280  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
281  mask[i] |= convert->mask[i];
282  }
283  void bitwise_not() override {
284  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
285  mask[i] = ~(mask[i]);
286  }
287  int begin() const override {
288  int retval = 0;
289  while (retval < end() && !is_set(retval))
290  ++retval;
291  return retval;
292  }
293  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
294  int next(int previous) const override {
295  int retval = previous + 1;
296  while (retval < end() && !is_set(retval))
297  ++retval;
298  return retval;
299  }
300  int get_system_affinity(bool abort_on_error) override {
301  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
302  "Illegal get affinity operation when not capable");
303 #if KMP_OS_LINUX
304  int retval =
305  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
306 #elif KMP_OS_FREEBSD
307  int retval =
308  pthread_getaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
309 #endif
310  if (retval >= 0) {
311  return 0;
312  }
313  int error = errno;
314  if (abort_on_error) {
315  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
316  }
317  return error;
318  }
319  int set_system_affinity(bool abort_on_error) const override {
320  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
321  "Illegal get affinity operation when not capable");
322 #if KMP_OS_LINUX
323  int retval =
324  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
325 #elif KMP_OS_FREEBSD
326  int retval =
327  pthread_setaffinity_np(pthread_self(), __kmp_affin_mask_size, reinterpret_cast<cpuset_t *>(mask));
328 #endif
329  if (retval >= 0) {
330  return 0;
331  }
332  int error = errno;
333  if (abort_on_error) {
334  __kmp_fatal(KMP_MSG(FatalSysError), KMP_ERR(error), __kmp_msg_null);
335  }
336  return error;
337  }
338  };
339  void determine_capable(const char *env_var) override {
340  __kmp_affinity_determine_capable(env_var);
341  }
342  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
343  KMPAffinity::Mask *allocate_mask() override {
344  KMPNativeAffinity::Mask *retval = new Mask();
345  return retval;
346  }
347  void deallocate_mask(KMPAffinity::Mask *m) override {
348  KMPNativeAffinity::Mask *native_mask =
349  static_cast<KMPNativeAffinity::Mask *>(m);
350  delete native_mask;
351  }
352  KMPAffinity::Mask *allocate_mask_array(int num) override {
353  return new Mask[num];
354  }
355  void deallocate_mask_array(KMPAffinity::Mask *array) override {
356  Mask *linux_array = static_cast<Mask *>(array);
357  delete[] linux_array;
358  }
359  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
360  int index) override {
361  Mask *linux_array = static_cast<Mask *>(array);
362  return &(linux_array[index]);
363  }
364  api_type get_api_type() const override { return NATIVE_OS; }
365 };
366 #endif /* KMP_OS_LINUX || KMP_OS_FREEBSD */
367 
368 #if KMP_OS_WINDOWS
369 class KMPNativeAffinity : public KMPAffinity {
370  class Mask : public KMPAffinity::Mask {
371  typedef ULONG_PTR mask_t;
372  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
373  mask_t *mask;
374 
375  public:
376  Mask() {
377  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
378  }
379  ~Mask() {
380  if (mask)
381  __kmp_free(mask);
382  }
383  void set(int i) override {
384  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
385  }
386  bool is_set(int i) const override {
387  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
388  }
389  void clear(int i) override {
390  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
391  }
392  void zero() override {
393  for (int i = 0; i < __kmp_num_proc_groups; ++i)
394  mask[i] = 0;
395  }
396  void copy(const KMPAffinity::Mask *src) override {
397  const Mask *convert = static_cast<const Mask *>(src);
398  for (int i = 0; i < __kmp_num_proc_groups; ++i)
399  mask[i] = convert->mask[i];
400  }
401  void bitwise_and(const KMPAffinity::Mask *rhs) override {
402  const Mask *convert = static_cast<const Mask *>(rhs);
403  for (int i = 0; i < __kmp_num_proc_groups; ++i)
404  mask[i] &= convert->mask[i];
405  }
406  void bitwise_or(const KMPAffinity::Mask *rhs) override {
407  const Mask *convert = static_cast<const Mask *>(rhs);
408  for (int i = 0; i < __kmp_num_proc_groups; ++i)
409  mask[i] |= convert->mask[i];
410  }
411  void bitwise_not() override {
412  for (int i = 0; i < __kmp_num_proc_groups; ++i)
413  mask[i] = ~(mask[i]);
414  }
415  int begin() const override {
416  int retval = 0;
417  while (retval < end() && !is_set(retval))
418  ++retval;
419  return retval;
420  }
421  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
422  int next(int previous) const override {
423  int retval = previous + 1;
424  while (retval < end() && !is_set(retval))
425  ++retval;
426  return retval;
427  }
428  int set_system_affinity(bool abort_on_error) const override {
429  if (__kmp_num_proc_groups > 1) {
430  // Check for a valid mask.
431  GROUP_AFFINITY ga;
432  int group = get_proc_group();
433  if (group < 0) {
434  if (abort_on_error) {
435  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
436  }
437  return -1;
438  }
439  // Transform the bit vector into a GROUP_AFFINITY struct
440  // and make the system call to set affinity.
441  ga.Group = group;
442  ga.Mask = mask[group];
443  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
444 
445  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
446  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
447  DWORD error = GetLastError();
448  if (abort_on_error) {
449  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
450  __kmp_msg_null);
451  }
452  return error;
453  }
454  } else {
455  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
456  DWORD error = GetLastError();
457  if (abort_on_error) {
458  __kmp_fatal(KMP_MSG(CantSetThreadAffMask), KMP_ERR(error),
459  __kmp_msg_null);
460  }
461  return error;
462  }
463  }
464  return 0;
465  }
466  int get_system_affinity(bool abort_on_error) override {
467  if (__kmp_num_proc_groups > 1) {
468  this->zero();
469  GROUP_AFFINITY ga;
470  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
471  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
472  DWORD error = GetLastError();
473  if (abort_on_error) {
474  __kmp_fatal(KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
475  KMP_ERR(error), __kmp_msg_null);
476  }
477  return error;
478  }
479  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
480  (ga.Mask == 0)) {
481  return -1;
482  }
483  mask[ga.Group] = ga.Mask;
484  } else {
485  mask_t newMask, sysMask, retval;
486  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
487  DWORD error = GetLastError();
488  if (abort_on_error) {
489  __kmp_fatal(KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
490  KMP_ERR(error), __kmp_msg_null);
491  }
492  return error;
493  }
494  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
495  if (!retval) {
496  DWORD error = GetLastError();
497  if (abort_on_error) {
498  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
499  KMP_ERR(error), __kmp_msg_null);
500  }
501  return error;
502  }
503  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
504  if (!newMask) {
505  DWORD error = GetLastError();
506  if (abort_on_error) {
507  __kmp_fatal(KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
508  KMP_ERR(error), __kmp_msg_null);
509  }
510  }
511  *mask = retval;
512  }
513  return 0;
514  }
515  int get_proc_group() const override {
516  int group = -1;
517  if (__kmp_num_proc_groups == 1) {
518  return 1;
519  }
520  for (int i = 0; i < __kmp_num_proc_groups; i++) {
521  if (mask[i] == 0)
522  continue;
523  if (group >= 0)
524  return -1;
525  group = i;
526  }
527  return group;
528  }
529  };
530  void determine_capable(const char *env_var) override {
531  __kmp_affinity_determine_capable(env_var);
532  }
533  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
534  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
535  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
536  KMPAffinity::Mask *allocate_mask_array(int num) override {
537  return new Mask[num];
538  }
539  void deallocate_mask_array(KMPAffinity::Mask *array) override {
540  Mask *windows_array = static_cast<Mask *>(array);
541  delete[] windows_array;
542  }
543  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
544  int index) override {
545  Mask *windows_array = static_cast<Mask *>(array);
546  return &(windows_array[index]);
547  }
548  api_type get_api_type() const override { return NATIVE_OS; }
549 };
550 #endif /* KMP_OS_WINDOWS */
551 #endif /* KMP_AFFINITY_SUPPORTED */
552 
553 class Address {
554 public:
555  static const unsigned maxDepth = 32;
556  unsigned labels[maxDepth];
557  unsigned childNums[maxDepth];
558  unsigned depth;
559  unsigned leader;
560  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
561  Address &operator=(const Address &b) {
562  depth = b.depth;
563  for (unsigned i = 0; i < depth; i++) {
564  labels[i] = b.labels[i];
565  childNums[i] = b.childNums[i];
566  }
567  leader = FALSE;
568  return *this;
569  }
570  bool operator==(const Address &b) const {
571  if (depth != b.depth)
572  return false;
573  for (unsigned i = 0; i < depth; i++)
574  if (labels[i] != b.labels[i])
575  return false;
576  return true;
577  }
578  bool isClose(const Address &b, int level) const {
579  if (depth != b.depth)
580  return false;
581  if ((unsigned)level >= depth)
582  return true;
583  for (unsigned i = 0; i < (depth - level); i++)
584  if (labels[i] != b.labels[i])
585  return false;
586  return true;
587  }
588  bool operator!=(const Address &b) const { return !operator==(b); }
589  void print() const {
590  unsigned i;
591  printf("Depth: %u --- ", depth);
592  for (i = 0; i < depth; i++) {
593  printf("%u ", labels[i]);
594  }
595  }
596 };
597 
598 class AddrUnsPair {
599 public:
600  Address first;
601  unsigned second;
602  AddrUnsPair(Address _first, unsigned _second)
603  : first(_first), second(_second) {}
604  AddrUnsPair &operator=(const AddrUnsPair &b) {
605  first = b.first;
606  second = b.second;
607  return *this;
608  }
609  void print() const {
610  printf("first = ");
611  first.print();
612  printf(" --- second = %u", second);
613  }
614  bool operator==(const AddrUnsPair &b) const {
615  if (first != b.first)
616  return false;
617  if (second != b.second)
618  return false;
619  return true;
620  }
621  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
622 };
623 
624 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
625  const Address *aa = &(((const AddrUnsPair *)a)->first);
626  const Address *bb = &(((const AddrUnsPair *)b)->first);
627  unsigned depth = aa->depth;
628  unsigned i;
629  KMP_DEBUG_ASSERT(depth == bb->depth);
630  for (i = 0; i < depth; i++) {
631  if (aa->labels[i] < bb->labels[i])
632  return -1;
633  if (aa->labels[i] > bb->labels[i])
634  return 1;
635  }
636  return 0;
637 }
638 
639 /* A structure for holding machine-specific hierarchy info to be computed once
640  at init. This structure represents a mapping of threads to the actual machine
641  hierarchy, or to our best guess at what the hierarchy might be, for the
642  purpose of performing an efficient barrier. In the worst case, when there is
643  no machine hierarchy information, it produces a tree suitable for a barrier,
644  similar to the tree used in the hyper barrier. */
645 class hierarchy_info {
646 public:
647  /* Good default values for number of leaves and branching factor, given no
648  affinity information. Behaves a bit like hyper barrier. */
649  static const kmp_uint32 maxLeaves = 4;
650  static const kmp_uint32 minBranch = 4;
656  kmp_uint32 maxLevels;
657 
662  kmp_uint32 depth;
663  kmp_uint32 base_num_threads;
664  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
665  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
666  // 2=initialization in progress
667  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
668 
673  kmp_uint32 *numPerLevel;
674  kmp_uint32 *skipPerLevel;
675 
676  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
677  int hier_depth = adr2os[0].first.depth;
678  int level = 0;
679  for (int i = hier_depth - 1; i >= 0; --i) {
680  int max = -1;
681  for (int j = 0; j < num_addrs; ++j) {
682  int next = adr2os[j].first.childNums[i];
683  if (next > max)
684  max = next;
685  }
686  numPerLevel[level] = max + 1;
687  ++level;
688  }
689  }
690 
691  hierarchy_info()
692  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
693 
694  void fini() {
695  if (!uninitialized && numPerLevel) {
696  __kmp_free(numPerLevel);
697  numPerLevel = NULL;
698  uninitialized = not_initialized;
699  }
700  }
701 
702  void init(AddrUnsPair *adr2os, int num_addrs) {
703  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
704  &uninitialized, not_initialized, initializing);
705  if (bool_result == 0) { // Wait for initialization
706  while (TCR_1(uninitialized) != initialized)
707  KMP_CPU_PAUSE();
708  return;
709  }
710  KMP_DEBUG_ASSERT(bool_result == 1);
711 
712  /* Added explicit initialization of the data fields here to prevent usage of
713  dirty value observed when static library is re-initialized multiple times
714  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
715  OpenMP). */
716  depth = 1;
717  resizing = 0;
718  maxLevels = 7;
719  numPerLevel =
720  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
721  skipPerLevel = &(numPerLevel[maxLevels]);
722  for (kmp_uint32 i = 0; i < maxLevels;
723  ++i) { // init numPerLevel[*] to 1 item per level
724  numPerLevel[i] = 1;
725  skipPerLevel[i] = 1;
726  }
727 
728  // Sort table by physical ID
729  if (adr2os) {
730  qsort(adr2os, num_addrs, sizeof(*adr2os),
731  __kmp_affinity_cmp_Address_labels);
732  deriveLevels(adr2os, num_addrs);
733  } else {
734  numPerLevel[0] = maxLeaves;
735  numPerLevel[1] = num_addrs / maxLeaves;
736  if (num_addrs % maxLeaves)
737  numPerLevel[1]++;
738  }
739 
740  base_num_threads = num_addrs;
741  for (int i = maxLevels - 1; i >= 0;
742  --i) // count non-empty levels to get depth
743  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
744  depth++;
745 
746  kmp_uint32 branch = minBranch;
747  if (numPerLevel[0] == 1)
748  branch = num_addrs / maxLeaves;
749  if (branch < minBranch)
750  branch = minBranch;
751  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
752  while (numPerLevel[d] > branch ||
753  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
754  if (numPerLevel[d] & 1)
755  numPerLevel[d]++;
756  numPerLevel[d] = numPerLevel[d] >> 1;
757  if (numPerLevel[d + 1] == 1)
758  depth++;
759  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
760  }
761  if (numPerLevel[0] == 1) {
762  branch = branch >> 1;
763  if (branch < 4)
764  branch = minBranch;
765  }
766  }
767 
768  for (kmp_uint32 i = 1; i < depth; ++i)
769  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
770  // Fill in hierarchy in the case of oversubscription
771  for (kmp_uint32 i = depth; i < maxLevels; ++i)
772  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
773 
774  uninitialized = initialized; // One writer
775  }
776 
777  // Resize the hierarchy if nproc changes to something larger than before
778  void resize(kmp_uint32 nproc) {
779  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
780  while (bool_result == 0) { // someone else is trying to resize
781  KMP_CPU_PAUSE();
782  if (nproc <= base_num_threads) // happy with other thread's resize
783  return;
784  else // try to resize
785  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
786  }
787  KMP_DEBUG_ASSERT(bool_result != 0);
788  if (nproc <= base_num_threads)
789  return; // happy with other thread's resize
790 
791  // Calculate new maxLevels
792  kmp_uint32 old_sz = skipPerLevel[depth - 1];
793  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
794  // First see if old maxLevels is enough to contain new size
795  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
796  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
797  numPerLevel[i - 1] *= 2;
798  old_sz *= 2;
799  depth++;
800  }
801  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
802  while (nproc > old_sz) {
803  old_sz *= 2;
804  incs++;
805  depth++;
806  }
807  maxLevels += incs;
808 
809  // Resize arrays
810  kmp_uint32 *old_numPerLevel = numPerLevel;
811  kmp_uint32 *old_skipPerLevel = skipPerLevel;
812  numPerLevel = skipPerLevel = NULL;
813  numPerLevel =
814  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
815  skipPerLevel = &(numPerLevel[maxLevels]);
816 
817  // Copy old elements from old arrays
818  for (kmp_uint32 i = 0; i < old_maxLevels;
819  ++i) { // init numPerLevel[*] to 1 item per level
820  numPerLevel[i] = old_numPerLevel[i];
821  skipPerLevel[i] = old_skipPerLevel[i];
822  }
823 
824  // Init new elements in arrays to 1
825  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
826  ++i) { // init numPerLevel[*] to 1 item per level
827  numPerLevel[i] = 1;
828  skipPerLevel[i] = 1;
829  }
830 
831  // Free old arrays
832  __kmp_free(old_numPerLevel);
833  }
834 
835  // Fill in oversubscription levels of hierarchy
836  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
837  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
838 
839  base_num_threads = nproc;
840  resizing = 0; // One writer
841  }
842 };
843 #endif // KMP_AFFINITY_H