LLVM OpenMP* Runtime Library
kmp_affinity.h
1 /*
2  * kmp_affinity.h -- header for affinity management
3  */
4 
5 
6 //===----------------------------------------------------------------------===//
7 //
8 // The LLVM Compiler Infrastructure
9 //
10 // This file is dual licensed under the MIT and the University of Illinois Open
11 // Source Licenses. See LICENSE.txt for details.
12 //
13 //===----------------------------------------------------------------------===//
14 
15 
16 #ifndef KMP_AFFINITY_H
17 #define KMP_AFFINITY_H
18 
19 #include "kmp.h"
20 #include "kmp_os.h"
21 
22 #if KMP_AFFINITY_SUPPORTED
23 #if KMP_USE_HWLOC
24 class KMPHwlocAffinity : public KMPAffinity {
25 public:
26  class Mask : public KMPAffinity::Mask {
27  hwloc_cpuset_t mask;
28 
29  public:
30  Mask() {
31  mask = hwloc_bitmap_alloc();
32  this->zero();
33  }
34  ~Mask() { hwloc_bitmap_free(mask); }
35  void set(int i) override { hwloc_bitmap_set(mask, i); }
36  bool is_set(int i) const override { return hwloc_bitmap_isset(mask, i); }
37  void clear(int i) override { hwloc_bitmap_clr(mask, i); }
38  void zero() override { hwloc_bitmap_zero(mask); }
39  void copy(const KMPAffinity::Mask *src) override {
40  const Mask *convert = static_cast<const Mask *>(src);
41  hwloc_bitmap_copy(mask, convert->mask);
42  }
43  void bitwise_and(const KMPAffinity::Mask *rhs) override {
44  const Mask *convert = static_cast<const Mask *>(rhs);
45  hwloc_bitmap_and(mask, mask, convert->mask);
46  }
47  void bitwise_or(const KMPAffinity::Mask *rhs) override {
48  const Mask *convert = static_cast<const Mask *>(rhs);
49  hwloc_bitmap_or(mask, mask, convert->mask);
50  }
51  void bitwise_not() override { hwloc_bitmap_not(mask, mask); }
52  int begin() const override { return hwloc_bitmap_first(mask); }
53  int end() const override { return -1; }
54  int next(int previous) const override {
55  return hwloc_bitmap_next(mask, previous);
56  }
57  int get_system_affinity(bool abort_on_error) override {
58  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
59  "Illegal get affinity operation when not capable");
60  int retval =
61  hwloc_get_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
62  if (retval >= 0) {
63  return 0;
64  }
65  int error = errno;
66  if (abort_on_error) {
67  __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
68  __kmp_msg_null);
69  }
70  return error;
71  }
72  int set_system_affinity(bool abort_on_error) const override {
73  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
74  "Illegal get affinity operation when not capable");
75  int retval =
76  hwloc_set_cpubind(__kmp_hwloc_topology, mask, HWLOC_CPUBIND_THREAD);
77  if (retval >= 0) {
78  return 0;
79  }
80  int error = errno;
81  if (abort_on_error) {
82  __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
83  __kmp_msg_null);
84  }
85  return error;
86  }
87  int get_proc_group() const override {
88  int i;
89  int group = -1;
90 #if KMP_OS_WINDOWS
91  if (__kmp_num_proc_groups == 1) {
92  return 1;
93  }
94  for (i = 0; i < __kmp_num_proc_groups; i++) {
95  // On windows, the long type is always 32 bits
96  unsigned long first_32_bits = hwloc_bitmap_to_ith_ulong(mask, i * 2);
97  unsigned long second_32_bits =
98  hwloc_bitmap_to_ith_ulong(mask, i * 2 + 1);
99  if (first_32_bits == 0 && second_32_bits == 0) {
100  continue;
101  }
102  if (group >= 0) {
103  return -1;
104  }
105  group = i;
106  }
107 #endif /* KMP_OS_WINDOWS */
108  return group;
109  }
110  };
111  void determine_capable(const char *var) override {
112  const hwloc_topology_support *topology_support;
113  if (__kmp_hwloc_topology == NULL) {
114  if (hwloc_topology_init(&__kmp_hwloc_topology) < 0) {
115  __kmp_hwloc_error = TRUE;
116  if (__kmp_affinity_verbose)
117  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_init()");
118  }
119  if (hwloc_topology_load(__kmp_hwloc_topology) < 0) {
120  __kmp_hwloc_error = TRUE;
121  if (__kmp_affinity_verbose)
122  KMP_WARNING(AffHwlocErrorOccurred, var, "hwloc_topology_load()");
123  }
124  }
125  topology_support = hwloc_topology_get_support(__kmp_hwloc_topology);
126  // Is the system capable of setting/getting this thread's affinity?
127  // Also, is topology discovery possible? (pu indicates ability to discover
128  // processing units). And finally, were there no errors when calling any
129  // hwloc_* API functions?
130  if (topology_support && topology_support->cpubind->set_thisthread_cpubind &&
131  topology_support->cpubind->get_thisthread_cpubind &&
132  topology_support->discovery->pu && !__kmp_hwloc_error) {
133  // enables affinity according to KMP_AFFINITY_CAPABLE() macro
134  KMP_AFFINITY_ENABLE(TRUE);
135  } else {
136  // indicate that hwloc didn't work and disable affinity
137  __kmp_hwloc_error = TRUE;
138  KMP_AFFINITY_DISABLE();
139  }
140  }
141  void bind_thread(int which) override {
142  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
143  "Illegal set affinity operation when not capable");
144  KMPAffinity::Mask *mask;
145  KMP_CPU_ALLOC_ON_STACK(mask);
146  KMP_CPU_ZERO(mask);
147  KMP_CPU_SET(which, mask);
148  __kmp_set_system_affinity(mask, TRUE);
149  KMP_CPU_FREE_FROM_STACK(mask);
150  }
151  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
152  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
153  KMPAffinity::Mask *allocate_mask_array(int num) override {
154  return new Mask[num];
155  }
156  void deallocate_mask_array(KMPAffinity::Mask *array) override {
157  Mask *hwloc_array = static_cast<Mask *>(array);
158  delete[] hwloc_array;
159  }
160  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
161  int index) override {
162  Mask *hwloc_array = static_cast<Mask *>(array);
163  return &(hwloc_array[index]);
164  }
165  api_type get_api_type() const override { return HWLOC; }
166 };
167 #endif /* KMP_USE_HWLOC */
168 
169 #if KMP_OS_LINUX
170 /* On some of the older OS's that we build on, these constants aren't present
171  in <asm/unistd.h> #included from <sys.syscall.h>. They must be the same on
172  all systems of the same arch where they are defined, and they cannot change.
173  stone forever. */
174 #include <sys/syscall.h>
175 #if KMP_ARCH_X86 || KMP_ARCH_ARM
176 #ifndef __NR_sched_setaffinity
177 #define __NR_sched_setaffinity 241
178 #elif __NR_sched_setaffinity != 241
179 #error Wrong code for setaffinity system call.
180 #endif /* __NR_sched_setaffinity */
181 #ifndef __NR_sched_getaffinity
182 #define __NR_sched_getaffinity 242
183 #elif __NR_sched_getaffinity != 242
184 #error Wrong code for getaffinity system call.
185 #endif /* __NR_sched_getaffinity */
186 #elif KMP_ARCH_AARCH64
187 #ifndef __NR_sched_setaffinity
188 #define __NR_sched_setaffinity 122
189 #elif __NR_sched_setaffinity != 122
190 #error Wrong code for setaffinity system call.
191 #endif /* __NR_sched_setaffinity */
192 #ifndef __NR_sched_getaffinity
193 #define __NR_sched_getaffinity 123
194 #elif __NR_sched_getaffinity != 123
195 #error Wrong code for getaffinity system call.
196 #endif /* __NR_sched_getaffinity */
197 #elif KMP_ARCH_X86_64
198 #ifndef __NR_sched_setaffinity
199 #define __NR_sched_setaffinity 203
200 #elif __NR_sched_setaffinity != 203
201 #error Wrong code for setaffinity system call.
202 #endif /* __NR_sched_setaffinity */
203 #ifndef __NR_sched_getaffinity
204 #define __NR_sched_getaffinity 204
205 #elif __NR_sched_getaffinity != 204
206 #error Wrong code for getaffinity system call.
207 #endif /* __NR_sched_getaffinity */
208 #elif KMP_ARCH_PPC64
209 #ifndef __NR_sched_setaffinity
210 #define __NR_sched_setaffinity 222
211 #elif __NR_sched_setaffinity != 222
212 #error Wrong code for setaffinity system call.
213 #endif /* __NR_sched_setaffinity */
214 #ifndef __NR_sched_getaffinity
215 #define __NR_sched_getaffinity 223
216 #elif __NR_sched_getaffinity != 223
217 #error Wrong code for getaffinity system call.
218 #endif /* __NR_sched_getaffinity */
219 # elif KMP_ARCH_MIPS
220 # ifndef __NR_sched_setaffinity
221 # define __NR_sched_setaffinity 4239
222 # elif __NR_sched_setaffinity != 4239
223 # error Wrong code for setaffinity system call.
224 # endif /* __NR_sched_setaffinity */
225 # ifndef __NR_sched_getaffinity
226 # define __NR_sched_getaffinity 4240
227 # elif __NR_sched_getaffinity != 4240
228 # error Wrong code for getaffinity system call.
229 # endif /* __NR_sched_getaffinity */
230 # elif KMP_ARCH_MIPS64
231 # ifndef __NR_sched_setaffinity
232 # define __NR_sched_setaffinity 5195
233 # elif __NR_sched_setaffinity != 5195
234 # error Wrong code for setaffinity system call.
235 # endif /* __NR_sched_setaffinity */
236 # ifndef __NR_sched_getaffinity
237 # define __NR_sched_getaffinity 5196
238 # elif __NR_sched_getaffinity != 5196
239 # error Wrong code for getaffinity system call.
240 # endif /* __NR_sched_getaffinity */
241 # else
242 #error Unknown or unsupported architecture
243 #endif /* KMP_ARCH_* */
244 class KMPNativeAffinity : public KMPAffinity {
245  class Mask : public KMPAffinity::Mask {
246  typedef unsigned char mask_t;
247  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
248 
249  public:
250  mask_t *mask;
251  Mask() { mask = (mask_t *)__kmp_allocate(__kmp_affin_mask_size); }
252  ~Mask() {
253  if (mask)
254  __kmp_free(mask);
255  }
256  void set(int i) override {
257  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
258  }
259  bool is_set(int i) const override {
260  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
261  }
262  void clear(int i) override {
263  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
264  }
265  void zero() override {
266  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
267  mask[i] = 0;
268  }
269  void copy(const KMPAffinity::Mask *src) override {
270  const Mask *convert = static_cast<const Mask *>(src);
271  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
272  mask[i] = convert->mask[i];
273  }
274  void bitwise_and(const KMPAffinity::Mask *rhs) override {
275  const Mask *convert = static_cast<const Mask *>(rhs);
276  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
277  mask[i] &= convert->mask[i];
278  }
279  void bitwise_or(const KMPAffinity::Mask *rhs) override {
280  const Mask *convert = static_cast<const Mask *>(rhs);
281  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
282  mask[i] |= convert->mask[i];
283  }
284  void bitwise_not() override {
285  for (size_t i = 0; i < __kmp_affin_mask_size; ++i)
286  mask[i] = ~(mask[i]);
287  }
288  int begin() const override {
289  int retval = 0;
290  while (retval < end() && !is_set(retval))
291  ++retval;
292  return retval;
293  }
294  int end() const override { return __kmp_affin_mask_size * BITS_PER_MASK_T; }
295  int next(int previous) const override {
296  int retval = previous + 1;
297  while (retval < end() && !is_set(retval))
298  ++retval;
299  return retval;
300  }
301  int get_system_affinity(bool abort_on_error) override {
302  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
303  "Illegal get affinity operation when not capable");
304  int retval =
305  syscall(__NR_sched_getaffinity, 0, __kmp_affin_mask_size, mask);
306  if (retval >= 0) {
307  return 0;
308  }
309  int error = errno;
310  if (abort_on_error) {
311  __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
312  __kmp_msg_null);
313  }
314  return error;
315  }
316  int set_system_affinity(bool abort_on_error) const override {
317  KMP_ASSERT2(KMP_AFFINITY_CAPABLE(),
318  "Illegal get affinity operation when not capable");
319  int retval =
320  syscall(__NR_sched_setaffinity, 0, __kmp_affin_mask_size, mask);
321  if (retval >= 0) {
322  return 0;
323  }
324  int error = errno;
325  if (abort_on_error) {
326  __kmp_msg(kmp_ms_fatal, KMP_MSG(FatalSysError), KMP_ERR(error),
327  __kmp_msg_null);
328  }
329  return error;
330  }
331  };
332  void determine_capable(const char *env_var) override {
333  __kmp_affinity_determine_capable(env_var);
334  }
335  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
336  KMPAffinity::Mask *allocate_mask() override {
337  KMPNativeAffinity::Mask *retval = new Mask();
338  return retval;
339  }
340  void deallocate_mask(KMPAffinity::Mask *m) override {
341  KMPNativeAffinity::Mask *native_mask =
342  static_cast<KMPNativeAffinity::Mask *>(m);
343  delete m;
344  }
345  KMPAffinity::Mask *allocate_mask_array(int num) override {
346  return new Mask[num];
347  }
348  void deallocate_mask_array(KMPAffinity::Mask *array) override {
349  Mask *linux_array = static_cast<Mask *>(array);
350  delete[] linux_array;
351  }
352  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
353  int index) override {
354  Mask *linux_array = static_cast<Mask *>(array);
355  return &(linux_array[index]);
356  }
357  api_type get_api_type() const override { return NATIVE_OS; }
358 };
359 #endif /* KMP_OS_LINUX */
360 
361 #if KMP_OS_WINDOWS
362 class KMPNativeAffinity : public KMPAffinity {
363  class Mask : public KMPAffinity::Mask {
364  typedef ULONG_PTR mask_t;
365  static const int BITS_PER_MASK_T = sizeof(mask_t) * CHAR_BIT;
366  mask_t *mask;
367 
368  public:
369  Mask() {
370  mask = (mask_t *)__kmp_allocate(sizeof(mask_t) * __kmp_num_proc_groups);
371  }
372  ~Mask() {
373  if (mask)
374  __kmp_free(mask);
375  }
376  void set(int i) override {
377  mask[i / BITS_PER_MASK_T] |= ((mask_t)1 << (i % BITS_PER_MASK_T));
378  }
379  bool is_set(int i) const override {
380  return (mask[i / BITS_PER_MASK_T] & ((mask_t)1 << (i % BITS_PER_MASK_T)));
381  }
382  void clear(int i) override {
383  mask[i / BITS_PER_MASK_T] &= ~((mask_t)1 << (i % BITS_PER_MASK_T));
384  }
385  void zero() override {
386  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
387  mask[i] = 0;
388  }
389  void copy(const KMPAffinity::Mask *src) override {
390  const Mask *convert = static_cast<const Mask *>(src);
391  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
392  mask[i] = convert->mask[i];
393  }
394  void bitwise_and(const KMPAffinity::Mask *rhs) override {
395  const Mask *convert = static_cast<const Mask *>(rhs);
396  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
397  mask[i] &= convert->mask[i];
398  }
399  void bitwise_or(const KMPAffinity::Mask *rhs) override {
400  const Mask *convert = static_cast<const Mask *>(rhs);
401  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
402  mask[i] |= convert->mask[i];
403  }
404  void bitwise_not() override {
405  for (size_t i = 0; i < __kmp_num_proc_groups; ++i)
406  mask[i] = ~(mask[i]);
407  }
408  int begin() const override {
409  int retval = 0;
410  while (retval < end() && !is_set(retval))
411  ++retval;
412  return retval;
413  }
414  int end() const override { return __kmp_num_proc_groups * BITS_PER_MASK_T; }
415  int next(int previous) const override {
416  int retval = previous + 1;
417  while (retval < end() && !is_set(retval))
418  ++retval;
419  return retval;
420  }
421  int set_system_affinity(bool abort_on_error) const override {
422  if (__kmp_num_proc_groups > 1) {
423  // Check for a valid mask.
424  GROUP_AFFINITY ga;
425  int group = get_proc_group();
426  if (group < 0) {
427  if (abort_on_error) {
428  KMP_FATAL(AffinityInvalidMask, "kmp_set_affinity");
429  }
430  return -1;
431  }
432  // Transform the bit vector into a GROUP_AFFINITY struct
433  // and make the system call to set affinity.
434  ga.Group = group;
435  ga.Mask = mask[group];
436  ga.Reserved[0] = ga.Reserved[1] = ga.Reserved[2] = 0;
437 
438  KMP_DEBUG_ASSERT(__kmp_SetThreadGroupAffinity != NULL);
439  if (__kmp_SetThreadGroupAffinity(GetCurrentThread(), &ga, NULL) == 0) {
440  DWORD error = GetLastError();
441  if (abort_on_error) {
442  __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask),
443  KMP_ERR(error), __kmp_msg_null);
444  }
445  return error;
446  }
447  } else {
448  if (!SetThreadAffinityMask(GetCurrentThread(), *mask)) {
449  DWORD error = GetLastError();
450  if (abort_on_error) {
451  __kmp_msg(kmp_ms_fatal, KMP_MSG(CantSetThreadAffMask),
452  KMP_ERR(error), __kmp_msg_null);
453  }
454  return error;
455  }
456  }
457  return 0;
458  }
459  int get_system_affinity(bool abort_on_error) override {
460  if (__kmp_num_proc_groups > 1) {
461  this->zero();
462  GROUP_AFFINITY ga;
463  KMP_DEBUG_ASSERT(__kmp_GetThreadGroupAffinity != NULL);
464  if (__kmp_GetThreadGroupAffinity(GetCurrentThread(), &ga) == 0) {
465  DWORD error = GetLastError();
466  if (abort_on_error) {
467  __kmp_msg(kmp_ms_fatal,
468  KMP_MSG(FunctionError, "GetThreadGroupAffinity()"),
469  KMP_ERR(error), __kmp_msg_null);
470  }
471  return error;
472  }
473  if ((ga.Group < 0) || (ga.Group > __kmp_num_proc_groups) ||
474  (ga.Mask == 0)) {
475  return -1;
476  }
477  mask[ga.Group] = ga.Mask;
478  } else {
479  mask_t newMask, sysMask, retval;
480  if (!GetProcessAffinityMask(GetCurrentProcess(), &newMask, &sysMask)) {
481  DWORD error = GetLastError();
482  if (abort_on_error) {
483  __kmp_msg(kmp_ms_fatal,
484  KMP_MSG(FunctionError, "GetProcessAffinityMask()"),
485  KMP_ERR(error), __kmp_msg_null);
486  }
487  return error;
488  }
489  retval = SetThreadAffinityMask(GetCurrentThread(), newMask);
490  if (!retval) {
491  DWORD error = GetLastError();
492  if (abort_on_error) {
493  __kmp_msg(kmp_ms_fatal,
494  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
495  KMP_ERR(error), __kmp_msg_null);
496  }
497  return error;
498  }
499  newMask = SetThreadAffinityMask(GetCurrentThread(), retval);
500  if (!newMask) {
501  DWORD error = GetLastError();
502  if (abort_on_error) {
503  __kmp_msg(kmp_ms_fatal,
504  KMP_MSG(FunctionError, "SetThreadAffinityMask()"),
505  KMP_ERR(error), __kmp_msg_null);
506  }
507  }
508  *mask = retval;
509  }
510  return 0;
511  }
512  int get_proc_group() const override {
513  int group = -1;
514  if (__kmp_num_proc_groups == 1) {
515  return 1;
516  }
517  for (int i = 0; i < __kmp_num_proc_groups; i++) {
518  if (mask[i] == 0)
519  continue;
520  if (group >= 0)
521  return -1;
522  group = i;
523  }
524  return group;
525  }
526  };
527  void determine_capable(const char *env_var) override {
528  __kmp_affinity_determine_capable(env_var);
529  }
530  void bind_thread(int which) override { __kmp_affinity_bind_thread(which); }
531  KMPAffinity::Mask *allocate_mask() override { return new Mask(); }
532  void deallocate_mask(KMPAffinity::Mask *m) override { delete m; }
533  KMPAffinity::Mask *allocate_mask_array(int num) override {
534  return new Mask[num];
535  }
536  void deallocate_mask_array(KMPAffinity::Mask *array) override {
537  Mask *windows_array = static_cast<Mask *>(array);
538  delete[] windows_array;
539  }
540  KMPAffinity::Mask *index_mask_array(KMPAffinity::Mask *array,
541  int index) override {
542  Mask *windows_array = static_cast<Mask *>(array);
543  return &(windows_array[index]);
544  }
545  api_type get_api_type() const override { return NATIVE_OS; }
546 };
547 #endif /* KMP_OS_WINDOWS */
548 #endif /* KMP_AFFINITY_SUPPORTED */
549 
550 class Address {
551 public:
552  static const unsigned maxDepth = 32;
553  unsigned labels[maxDepth];
554  unsigned childNums[maxDepth];
555  unsigned depth;
556  unsigned leader;
557  Address(unsigned _depth) : depth(_depth), leader(FALSE) {}
558  Address &operator=(const Address &b) {
559  depth = b.depth;
560  for (unsigned i = 0; i < depth; i++) {
561  labels[i] = b.labels[i];
562  childNums[i] = b.childNums[i];
563  }
564  leader = FALSE;
565  return *this;
566  }
567  bool operator==(const Address &b) const {
568  if (depth != b.depth)
569  return false;
570  for (unsigned i = 0; i < depth; i++)
571  if (labels[i] != b.labels[i])
572  return false;
573  return true;
574  }
575  bool isClose(const Address &b, int level) const {
576  if (depth != b.depth)
577  return false;
578  if ((unsigned)level >= depth)
579  return true;
580  for (unsigned i = 0; i < (depth - level); i++)
581  if (labels[i] != b.labels[i])
582  return false;
583  return true;
584  }
585  bool operator!=(const Address &b) const { return !operator==(b); }
586  void print() const {
587  unsigned i;
588  printf("Depth: %u --- ", depth);
589  for (i = 0; i < depth; i++) {
590  printf("%u ", labels[i]);
591  }
592  }
593 };
594 
595 class AddrUnsPair {
596 public:
597  Address first;
598  unsigned second;
599  AddrUnsPair(Address _first, unsigned _second)
600  : first(_first), second(_second) {}
601  AddrUnsPair &operator=(const AddrUnsPair &b) {
602  first = b.first;
603  second = b.second;
604  return *this;
605  }
606  void print() const {
607  printf("first = ");
608  first.print();
609  printf(" --- second = %u", second);
610  }
611  bool operator==(const AddrUnsPair &b) const {
612  if (first != b.first)
613  return false;
614  if (second != b.second)
615  return false;
616  return true;
617  }
618  bool operator!=(const AddrUnsPair &b) const { return !operator==(b); }
619 };
620 
621 static int __kmp_affinity_cmp_Address_labels(const void *a, const void *b) {
622  const Address *aa = &(((const AddrUnsPair *)a)->first);
623  const Address *bb = &(((const AddrUnsPair *)b)->first);
624  unsigned depth = aa->depth;
625  unsigned i;
626  KMP_DEBUG_ASSERT(depth == bb->depth);
627  for (i = 0; i < depth; i++) {
628  if (aa->labels[i] < bb->labels[i])
629  return -1;
630  if (aa->labels[i] > bb->labels[i])
631  return 1;
632  }
633  return 0;
634 }
635 
636 /* A structure for holding machine-specific hierarchy info to be computed once
637  at init. This structure represents a mapping of threads to the actual machine
638  hierarchy, or to our best guess at what the hierarchy might be, for the
639  purpose of performing an efficient barrier. In the worst case, when there is
640  no machine hierarchy information, it produces a tree suitable for a barrier,
641  similar to the tree used in the hyper barrier. */
642 class hierarchy_info {
643 public:
644  /* Good default values for number of leaves and branching factor, given no
645  affinity information. Behaves a bit like hyper barrier. */
646  static const kmp_uint32 maxLeaves = 4;
647  static const kmp_uint32 minBranch = 4;
653  kmp_uint32 maxLevels;
654 
659  kmp_uint32 depth;
660  kmp_uint32 base_num_threads;
661  enum init_status { initialized = 0, not_initialized = 1, initializing = 2 };
662  volatile kmp_int8 uninitialized; // 0=initialized, 1=not initialized,
663  // 2=initialization in progress
664  volatile kmp_int8 resizing; // 0=not resizing, 1=resizing
665 
670  kmp_uint32 *numPerLevel;
671  kmp_uint32 *skipPerLevel;
672 
673  void deriveLevels(AddrUnsPair *adr2os, int num_addrs) {
674  int hier_depth = adr2os[0].first.depth;
675  int level = 0;
676  for (int i = hier_depth - 1; i >= 0; --i) {
677  int max = -1;
678  for (int j = 0; j < num_addrs; ++j) {
679  int next = adr2os[j].first.childNums[i];
680  if (next > max)
681  max = next;
682  }
683  numPerLevel[level] = max + 1;
684  ++level;
685  }
686  }
687 
688  hierarchy_info()
689  : maxLevels(7), depth(1), uninitialized(not_initialized), resizing(0) {}
690 
691  void fini() {
692  if (!uninitialized && numPerLevel) {
693  __kmp_free(numPerLevel);
694  numPerLevel = NULL;
695  uninitialized = not_initialized;
696  }
697  }
698 
699  void init(AddrUnsPair *adr2os, int num_addrs) {
700  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(
701  &uninitialized, not_initialized, initializing);
702  if (bool_result == 0) { // Wait for initialization
703  while (TCR_1(uninitialized) != initialized)
704  KMP_CPU_PAUSE();
705  return;
706  }
707  KMP_DEBUG_ASSERT(bool_result == 1);
708 
709  /* Added explicit initialization of the data fields here to prevent usage of
710  dirty value observed when static library is re-initialized multiple times
711  (e.g. when non-OpenMP thread repeatedly launches/joins thread that uses
712  OpenMP). */
713  depth = 1;
714  resizing = 0;
715  maxLevels = 7;
716  numPerLevel =
717  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
718  skipPerLevel = &(numPerLevel[maxLevels]);
719  for (kmp_uint32 i = 0; i < maxLevels;
720  ++i) { // init numPerLevel[*] to 1 item per level
721  numPerLevel[i] = 1;
722  skipPerLevel[i] = 1;
723  }
724 
725  // Sort table by physical ID
726  if (adr2os) {
727  qsort(adr2os, num_addrs, sizeof(*adr2os),
728  __kmp_affinity_cmp_Address_labels);
729  deriveLevels(adr2os, num_addrs);
730  } else {
731  numPerLevel[0] = maxLeaves;
732  numPerLevel[1] = num_addrs / maxLeaves;
733  if (num_addrs % maxLeaves)
734  numPerLevel[1]++;
735  }
736 
737  base_num_threads = num_addrs;
738  for (int i = maxLevels - 1; i >= 0;
739  --i) // count non-empty levels to get depth
740  if (numPerLevel[i] != 1 || depth > 1) // only count one top-level '1'
741  depth++;
742 
743  kmp_uint32 branch = minBranch;
744  if (numPerLevel[0] == 1)
745  branch = num_addrs / maxLeaves;
746  if (branch < minBranch)
747  branch = minBranch;
748  for (kmp_uint32 d = 0; d < depth - 1; ++d) { // optimize hierarchy width
749  while (numPerLevel[d] > branch ||
750  (d == 0 && numPerLevel[d] > maxLeaves)) { // max 4 on level 0!
751  if (numPerLevel[d] & 1)
752  numPerLevel[d]++;
753  numPerLevel[d] = numPerLevel[d] >> 1;
754  if (numPerLevel[d + 1] == 1)
755  depth++;
756  numPerLevel[d + 1] = numPerLevel[d + 1] << 1;
757  }
758  if (numPerLevel[0] == 1) {
759  branch = branch >> 1;
760  if (branch < 4)
761  branch = minBranch;
762  }
763  }
764 
765  for (kmp_uint32 i = 1; i < depth; ++i)
766  skipPerLevel[i] = numPerLevel[i - 1] * skipPerLevel[i - 1];
767  // Fill in hierarchy in the case of oversubscription
768  for (kmp_uint32 i = depth; i < maxLevels; ++i)
769  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
770 
771  uninitialized = initialized; // One writer
772  }
773 
774  // Resize the hierarchy if nproc changes to something larger than before
775  void resize(kmp_uint32 nproc) {
776  kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
777  while (bool_result == 0) { // someone else is trying to resize
778  KMP_CPU_PAUSE();
779  if (nproc <= base_num_threads) // happy with other thread's resize
780  return;
781  else // try to resize
782  bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
783  }
784  KMP_DEBUG_ASSERT(bool_result != 0);
785  if (nproc <= base_num_threads)
786  return; // happy with other thread's resize
787 
788  // Calculate new maxLevels
789  kmp_uint32 old_sz = skipPerLevel[depth - 1];
790  kmp_uint32 incs = 0, old_maxLevels = maxLevels;
791  // First see if old maxLevels is enough to contain new size
792  for (kmp_uint32 i = depth; i < maxLevels && nproc > old_sz; ++i) {
793  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
794  numPerLevel[i - 1] *= 2;
795  old_sz *= 2;
796  depth++;
797  }
798  if (nproc > old_sz) { // Not enough space, need to expand hierarchy
799  while (nproc > old_sz) {
800  old_sz *= 2;
801  incs++;
802  depth++;
803  }
804  maxLevels += incs;
805 
806  // Resize arrays
807  kmp_uint32 *old_numPerLevel = numPerLevel;
808  kmp_uint32 *old_skipPerLevel = skipPerLevel;
809  numPerLevel = skipPerLevel = NULL;
810  numPerLevel =
811  (kmp_uint32 *)__kmp_allocate(maxLevels * 2 * sizeof(kmp_uint32));
812  skipPerLevel = &(numPerLevel[maxLevels]);
813 
814  // Copy old elements from old arrays
815  for (kmp_uint32 i = 0; i < old_maxLevels;
816  ++i) { // init numPerLevel[*] to 1 item per level
817  numPerLevel[i] = old_numPerLevel[i];
818  skipPerLevel[i] = old_skipPerLevel[i];
819  }
820 
821  // Init new elements in arrays to 1
822  for (kmp_uint32 i = old_maxLevels; i < maxLevels;
823  ++i) { // init numPerLevel[*] to 1 item per level
824  numPerLevel[i] = 1;
825  skipPerLevel[i] = 1;
826  }
827 
828  // Free old arrays
829  __kmp_free(old_numPerLevel);
830  }
831 
832  // Fill in oversubscription levels of hierarchy
833  for (kmp_uint32 i = old_maxLevels; i < maxLevels; ++i)
834  skipPerLevel[i] = 2 * skipPerLevel[i - 1];
835 
836  base_num_threads = nproc;
837  resizing = 0; // One writer
838  }
839 };
840 #endif // KMP_AFFINITY_H