17 #include "kmp_affinity.h" 21 #include "kmp_wrapper_getpid.h" 24 static hierarchy_info machine_hierarchy;
26 void __kmp_cleanup_hierarchy() { machine_hierarchy.fini(); }
29 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
33 if (TCR_1(machine_hierarchy.uninitialized))
34 machine_hierarchy.init(NULL, nproc);
37 if (nproc > machine_hierarchy.base_num_threads)
38 machine_hierarchy.resize(nproc);
40 depth = machine_hierarchy.depth;
41 KMP_DEBUG_ASSERT(depth > 0);
43 thr_bar->depth = depth;
44 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0] - 1;
45 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
48 #if KMP_AFFINITY_SUPPORTED 50 bool KMPAffinity::picked_api =
false;
52 void *KMPAffinity::Mask::operator
new(
size_t n) {
return __kmp_allocate(n); }
53 void *KMPAffinity::Mask::operator
new[](
size_t n) {
return __kmp_allocate(n); }
54 void KMPAffinity::Mask::operator
delete(
void *p) { __kmp_free(p); }
55 void KMPAffinity::Mask::operator
delete[](
void *p) { __kmp_free(p); }
56 void *KMPAffinity::operator
new(
size_t n) {
return __kmp_allocate(n); }
57 void KMPAffinity::operator
delete(
void *p) { __kmp_free(p); }
59 void KMPAffinity::pick_api() {
60 KMPAffinity *affinity_dispatch;
66 if (__kmp_affinity_top_method == affinity_top_method_hwloc &&
67 __kmp_affinity_type != affinity_disabled) {
68 affinity_dispatch =
new KMPHwlocAffinity();
72 affinity_dispatch =
new KMPNativeAffinity();
74 __kmp_affinity_dispatch = affinity_dispatch;
78 void KMPAffinity::destroy_api() {
79 if (__kmp_affinity_dispatch != NULL) {
80 delete __kmp_affinity_dispatch;
81 __kmp_affinity_dispatch = NULL;
87 char *__kmp_affinity_print_mask(
char *buf,
int buf_len,
88 kmp_affin_mask_t *mask) {
89 KMP_ASSERT(buf_len >= 40);
91 char *end = buf + buf_len - 1;
96 if (i == mask->end()) {
97 KMP_SNPRINTF(scan, end - scan + 1,
"{<empty>}");
100 KMP_ASSERT(scan <= end);
104 KMP_SNPRINTF(scan, end - scan + 1,
"{%ld", (
long)i);
105 while (*scan !=
'\0')
108 for (; i != mask->end(); i = mask->next(i)) {
109 if (!KMP_CPU_ISSET(i, mask)) {
117 if (end - scan < 15) {
120 KMP_SNPRINTF(scan, end - scan + 1,
",%-ld", (
long)i);
121 while (*scan !=
'\0')
124 if (i != mask->end()) {
125 KMP_SNPRINTF(scan, end - scan + 1,
",...");
126 while (*scan !=
'\0')
129 KMP_SNPRINTF(scan, end - scan + 1,
"}");
130 while (*scan !=
'\0')
132 KMP_ASSERT(scan <= end);
136 void __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask) {
139 #if KMP_GROUP_AFFINITY 141 if (__kmp_num_proc_groups > 1) {
143 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
144 for (group = 0; group < __kmp_num_proc_groups; group++) {
146 int num = __kmp_GetActiveProcessorCount(group);
147 for (i = 0; i < num; i++) {
148 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
157 for (proc = 0; proc < __kmp_xproc; proc++) {
158 KMP_CPU_SET(proc, mask);
174 static void __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
176 KMP_DEBUG_ASSERT(numAddrs > 0);
177 int depth = address2os->first.depth;
178 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
179 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
181 for (labCt = 0; labCt < depth; labCt++) {
182 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
183 lastLabel[labCt] = address2os[0].first.labels[labCt];
186 for (i = 1; i < numAddrs; i++) {
187 for (labCt = 0; labCt < depth; labCt++) {
188 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
190 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
192 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
195 lastLabel[labCt] = address2os[i].first.labels[labCt];
199 for (labCt = 0; labCt < depth; labCt++) {
200 address2os[i].first.childNums[labCt] = counts[labCt];
202 for (; labCt < (int)Address::maxDepth; labCt++) {
203 address2os[i].first.childNums[labCt] = 0;
206 __kmp_free(lastLabel);
221 kmp_affin_mask_t *__kmp_affin_fullMask = NULL;
223 static int nCoresPerPkg, nPackages;
224 static int __kmp_nThreadsPerCore;
225 #ifndef KMP_DFLT_NTH_CORES 226 static int __kmp_ncores;
228 static int *__kmp_pu_os_idx = NULL;
234 inline static bool __kmp_affinity_uniform_topology() {
235 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
240 static void __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
241 int depth,
int pkgLevel,
242 int coreLevel,
int threadLevel) {
245 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
246 for (proc = 0; proc < len; proc++) {
249 __kmp_str_buf_init(&buf);
250 for (level = 0; level < depth; level++) {
251 if (level == threadLevel) {
252 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
253 }
else if (level == coreLevel) {
254 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
255 }
else if (level == pkgLevel) {
256 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
257 }
else if (level > pkgLevel) {
258 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
259 level - pkgLevel - 1);
261 __kmp_str_buf_print(&buf,
"L%d ", level);
263 __kmp_str_buf_print(&buf,
"%d ", address2os[proc].first.labels[level]);
265 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
267 __kmp_str_buf_free(&buf);
278 static int __kmp_affinity_remove_radix_one_levels(AddrUnsPair *address2os,
279 int nActiveThreads,
int depth,
280 int *pkgLevel,
int *coreLevel,
286 for (level = depth - 1; level >= 0; --level) {
288 if (level == *pkgLevel)
292 for (i = 1; i < nActiveThreads; ++i) {
293 if (address2os[0].first.labels[level] !=
294 address2os[i].first.labels[level]) {
300 if (!radix1_detected)
303 if (level == *threadLevel) {
306 for (i = 0; i < nActiveThreads; ++i) {
307 address2os[i].first.depth--;
310 }
else if (level == *coreLevel) {
313 for (i = 0; i < nActiveThreads; ++i) {
314 if (*threadLevel != -1) {
315 address2os[i].first.labels[*coreLevel] =
316 address2os[i].first.labels[*threadLevel];
318 address2os[i].first.depth--;
323 return address2os[0].first.depth;
330 static int __kmp_hwloc_get_nobjs_under_obj(hwloc_obj_t obj,
331 hwloc_obj_type_t type) {
334 for (first = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, obj->type,
335 obj->logical_index, type, 0);
337 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, obj->type, first) ==
339 first = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, first->type,
346 static int __kmp_affinity_create_hwloc_map(AddrUnsPair **address2os,
347 kmp_i18n_id_t *
const msg_id) {
349 *msg_id = kmp_i18n_null;
352 kmp_affin_mask_t *oldMask;
353 KMP_CPU_ALLOC(oldMask);
354 __kmp_get_system_affinity(oldMask, TRUE);
361 if (!KMP_AFFINITY_CAPABLE()) {
364 KMP_ASSERT(__kmp_affinity_type == affinity_none);
366 nCoresPerPkg = __kmp_hwloc_get_nobjs_under_obj(
367 hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0),
369 __kmp_nThreadsPerCore = __kmp_hwloc_get_nobjs_under_obj(
370 hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE, 0),
372 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
373 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
374 if (__kmp_affinity_verbose) {
375 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
376 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
377 if (__kmp_affinity_uniform_topology()) {
378 KMP_INFORM(Uniform,
"KMP_AFFINITY");
380 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
382 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
383 __kmp_nThreadsPerCore, __kmp_ncores);
385 KMP_CPU_FREE(oldMask);
390 AddrUnsPair *retval =
391 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
392 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
402 int nActiveThreads = 0;
403 int socket_identifier = 0;
405 __kmp_ncores = nPackages = nCoresPerPkg = __kmp_nThreadsPerCore = 0;
407 hwloc_get_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, 0);
408 socket != NULL; socket = hwloc_get_next_obj_by_type(
409 __kmp_hwloc_topology, HWLOC_OBJ_PACKAGE, socket),
410 socket_identifier++) {
411 int core_identifier = 0;
412 int num_active_cores = 0;
413 for (core = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, socket->type,
414 socket->logical_index,
417 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, socket->type,
419 core = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_CORE,
422 int pu_identifier = 0;
423 int num_active_threads = 0;
424 for (pu = hwloc_get_obj_below_by_type(__kmp_hwloc_topology, core->type,
425 core->logical_index, HWLOC_OBJ_PU,
428 hwloc_get_ancestor_obj_by_type(__kmp_hwloc_topology, core->type,
430 pu = hwloc_get_next_obj_by_type(__kmp_hwloc_topology, HWLOC_OBJ_PU,
434 if (!KMP_CPU_ISSET(pu->os_index, __kmp_affin_fullMask))
437 (
"Hwloc inserting %d (%d) %d (%d) %d (%d) into address2os\n",
438 socket->os_index, socket->logical_index, core->os_index,
439 core->logical_index, pu->os_index, pu->logical_index));
440 addr.labels[0] = socket_identifier;
441 addr.labels[1] = core_identifier;
442 addr.labels[2] = pu_identifier;
443 retval[nActiveThreads] = AddrUnsPair(addr, pu->os_index);
444 __kmp_pu_os_idx[nActiveThreads] =
447 ++num_active_threads;
449 if (num_active_threads) {
452 if (num_active_threads > __kmp_nThreadsPerCore)
453 __kmp_nThreadsPerCore = num_active_threads;
456 if (num_active_cores) {
458 if (num_active_cores > nCoresPerPkg)
459 nCoresPerPkg = num_active_cores;
464 KMP_DEBUG_ASSERT(nActiveThreads == __kmp_avail_proc);
465 KMP_ASSERT(nActiveThreads > 0);
466 if (nActiveThreads == 1) {
467 __kmp_ncores = nPackages = 1;
468 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
469 if (__kmp_affinity_verbose) {
470 char buf[KMP_AFFIN_MASK_PRINT_LEN];
471 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
473 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
474 if (__kmp_affinity_respect_mask) {
475 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
477 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
479 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
480 KMP_INFORM(Uniform,
"KMP_AFFINITY");
481 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
482 __kmp_nThreadsPerCore, __kmp_ncores);
485 if (__kmp_affinity_type == affinity_none) {
487 KMP_CPU_FREE(oldMask);
493 addr.labels[0] = retval[0].first.labels[pkgLevel];
494 retval[0].first = addr;
496 if (__kmp_affinity_gran_levels < 0) {
497 __kmp_affinity_gran_levels = 0;
500 if (__kmp_affinity_verbose) {
501 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
504 *address2os = retval;
505 KMP_CPU_FREE(oldMask);
510 qsort(retval, nActiveThreads,
sizeof(*retval),
511 __kmp_affinity_cmp_Address_labels);
515 (nPackages * nCoresPerPkg * __kmp_nThreadsPerCore == nActiveThreads);
518 if (__kmp_affinity_verbose) {
519 char mask[KMP_AFFIN_MASK_PRINT_LEN];
520 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
522 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
523 if (__kmp_affinity_respect_mask) {
524 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
526 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
528 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
530 KMP_INFORM(Uniform,
"KMP_AFFINITY");
532 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
536 __kmp_str_buf_init(&buf);
538 __kmp_str_buf_print(&buf,
"%d", nPackages);
542 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
543 __kmp_nThreadsPerCore, __kmp_ncores);
545 __kmp_str_buf_free(&buf);
548 if (__kmp_affinity_type == affinity_none) {
550 KMP_CPU_FREE(oldMask);
556 depth = __kmp_affinity_remove_radix_one_levels(
557 retval, nActiveThreads, depth, &pkgLevel, &coreLevel, &threadLevel);
559 if (__kmp_affinity_gran_levels < 0) {
562 __kmp_affinity_gran_levels = 0;
563 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
564 __kmp_affinity_gran_levels++;
566 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
567 __kmp_affinity_gran_levels++;
569 if (__kmp_affinity_gran > affinity_gran_package) {
570 __kmp_affinity_gran_levels++;
574 if (__kmp_affinity_verbose) {
575 __kmp_affinity_print_topology(retval, nActiveThreads, depth, pkgLevel,
576 coreLevel, threadLevel);
579 KMP_CPU_FREE(oldMask);
580 *address2os = retval;
583 #endif // KMP_USE_HWLOC 588 static int __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
589 kmp_i18n_id_t *
const msg_id) {
591 *msg_id = kmp_i18n_null;
596 if (!KMP_AFFINITY_CAPABLE()) {
597 KMP_ASSERT(__kmp_affinity_type == affinity_none);
598 __kmp_ncores = nPackages = __kmp_xproc;
599 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
600 if (__kmp_affinity_verbose) {
601 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
602 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
603 KMP_INFORM(Uniform,
"KMP_AFFINITY");
604 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
605 __kmp_nThreadsPerCore, __kmp_ncores);
614 __kmp_ncores = nPackages = __kmp_avail_proc;
615 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
616 if (__kmp_affinity_verbose) {
617 char buf[KMP_AFFIN_MASK_PRINT_LEN];
618 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
619 __kmp_affin_fullMask);
621 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
622 if (__kmp_affinity_respect_mask) {
623 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
625 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
627 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
628 KMP_INFORM(Uniform,
"KMP_AFFINITY");
629 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
630 __kmp_nThreadsPerCore, __kmp_ncores);
632 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
633 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
634 if (__kmp_affinity_type == affinity_none) {
637 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
638 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask))
640 __kmp_pu_os_idx[avail_ct++] = i;
647 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
650 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
652 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
655 __kmp_pu_os_idx[avail_ct] = i;
658 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
660 if (__kmp_affinity_verbose) {
661 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
664 if (__kmp_affinity_gran_levels < 0) {
667 if (__kmp_affinity_gran > affinity_gran_package) {
668 __kmp_affinity_gran_levels = 1;
670 __kmp_affinity_gran_levels = 0;
676 #if KMP_GROUP_AFFINITY 682 static int __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
683 kmp_i18n_id_t *
const msg_id) {
685 *msg_id = kmp_i18n_null;
689 if (!KMP_AFFINITY_CAPABLE()) {
696 (AddrUnsPair *)__kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
697 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
698 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
701 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
703 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
706 __kmp_pu_os_idx[avail_ct] = i;
708 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
709 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
710 (*address2os)[avail_ct++] = AddrUnsPair(addr, i);
712 if (__kmp_affinity_verbose) {
713 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
718 if (__kmp_affinity_gran_levels < 0) {
719 if (__kmp_affinity_gran == affinity_gran_group) {
720 __kmp_affinity_gran_levels = 1;
721 }
else if ((__kmp_affinity_gran == affinity_gran_fine) ||
722 (__kmp_affinity_gran == affinity_gran_thread)) {
723 __kmp_affinity_gran_levels = 0;
725 const char *gran_str = NULL;
726 if (__kmp_affinity_gran == affinity_gran_core) {
728 }
else if (__kmp_affinity_gran == affinity_gran_package) {
729 gran_str =
"package";
730 }
else if (__kmp_affinity_gran == affinity_gran_node) {
738 __kmp_affinity_gran_levels = 0;
746 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 748 static int __kmp_cpuid_mask_width(
int count) {
751 while ((1 << r) < count)
756 class apicThreadInfo {
760 unsigned maxCoresPerPkg;
761 unsigned maxThreadsPerPkg;
767 static int __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
769 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
770 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
771 if (aa->osId < bb->osId)
773 if (aa->osId > bb->osId)
778 static int __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
780 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
781 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
782 if (aa->pkgId < bb->pkgId)
784 if (aa->pkgId > bb->pkgId)
786 if (aa->coreId < bb->coreId)
788 if (aa->coreId > bb->coreId)
790 if (aa->threadId < bb->threadId)
792 if (aa->threadId > bb->threadId)
801 static int __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
802 kmp_i18n_id_t *
const msg_id) {
806 *msg_id = kmp_i18n_null;
809 __kmp_x86_cpuid(0, 0, &buf);
811 *msg_id = kmp_i18n_str_NoLeaf4Support;
820 if (!KMP_AFFINITY_CAPABLE()) {
823 KMP_ASSERT(__kmp_affinity_type == affinity_none);
829 __kmp_x86_cpuid(1, 0, &buf);
830 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
831 if (maxThreadsPerPkg == 0) {
832 maxThreadsPerPkg = 1;
846 __kmp_x86_cpuid(0, 0, &buf);
848 __kmp_x86_cpuid(4, 0, &buf);
849 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
867 __kmp_ncores = __kmp_xproc;
868 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
869 __kmp_nThreadsPerCore = 1;
870 if (__kmp_affinity_verbose) {
871 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
872 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
873 if (__kmp_affinity_uniform_topology()) {
874 KMP_INFORM(Uniform,
"KMP_AFFINITY");
876 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
878 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
879 __kmp_nThreadsPerCore, __kmp_ncores);
889 kmp_affin_mask_t *oldMask;
890 KMP_CPU_ALLOC(oldMask);
891 KMP_ASSERT(oldMask != NULL);
892 __kmp_get_system_affinity(oldMask, TRUE);
920 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
921 __kmp_avail_proc *
sizeof(apicThreadInfo));
923 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
925 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
928 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
930 __kmp_affinity_dispatch->bind_thread(i);
931 threadInfo[nApics].osId = i;
934 __kmp_x86_cpuid(1, 0, &buf);
935 if (((buf.edx >> 9) & 1) == 0) {
936 __kmp_set_system_affinity(oldMask, TRUE);
937 __kmp_free(threadInfo);
938 KMP_CPU_FREE(oldMask);
939 *msg_id = kmp_i18n_str_ApicNotPresent;
942 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
943 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
944 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
945 threadInfo[nApics].maxThreadsPerPkg = 1;
954 __kmp_x86_cpuid(0, 0, &buf);
956 __kmp_x86_cpuid(4, 0, &buf);
957 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
959 threadInfo[nApics].maxCoresPerPkg = 1;
963 int widthCT = __kmp_cpuid_mask_width(threadInfo[nApics].maxThreadsPerPkg);
964 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
966 int widthC = __kmp_cpuid_mask_width(threadInfo[nApics].maxCoresPerPkg);
967 int widthT = widthCT - widthC;
972 __kmp_set_system_affinity(oldMask, TRUE);
973 __kmp_free(threadInfo);
974 KMP_CPU_FREE(oldMask);
975 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
979 int maskC = (1 << widthC) - 1;
980 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT) & maskC;
982 int maskT = (1 << widthT) - 1;
983 threadInfo[nApics].threadId = threadInfo[nApics].apicId & maskT;
990 __kmp_set_system_affinity(oldMask, TRUE);
999 KMP_ASSERT(nApics > 0);
1001 __kmp_ncores = nPackages = 1;
1002 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1003 if (__kmp_affinity_verbose) {
1004 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1005 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1007 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1008 if (__kmp_affinity_respect_mask) {
1009 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1011 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1013 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1014 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1015 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1016 __kmp_nThreadsPerCore, __kmp_ncores);
1019 if (__kmp_affinity_type == affinity_none) {
1020 __kmp_free(threadInfo);
1021 KMP_CPU_FREE(oldMask);
1025 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
1027 addr.labels[0] = threadInfo[0].pkgId;
1028 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1030 if (__kmp_affinity_gran_levels < 0) {
1031 __kmp_affinity_gran_levels = 0;
1034 if (__kmp_affinity_verbose) {
1035 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1038 __kmp_free(threadInfo);
1039 KMP_CPU_FREE(oldMask);
1044 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1045 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1062 __kmp_nThreadsPerCore = 1;
1063 unsigned nCores = 1;
1066 unsigned lastPkgId = threadInfo[0].pkgId;
1067 unsigned coreCt = 1;
1068 unsigned lastCoreId = threadInfo[0].coreId;
1069 unsigned threadCt = 1;
1070 unsigned lastThreadId = threadInfo[0].threadId;
1073 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1074 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1076 for (i = 1; i < nApics; i++) {
1077 if (threadInfo[i].pkgId != lastPkgId) {
1080 lastPkgId = threadInfo[i].pkgId;
1081 if ((
int)coreCt > nCoresPerPkg)
1082 nCoresPerPkg = coreCt;
1084 lastCoreId = threadInfo[i].coreId;
1085 if ((
int)threadCt > __kmp_nThreadsPerCore)
1086 __kmp_nThreadsPerCore = threadCt;
1088 lastThreadId = threadInfo[i].threadId;
1092 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1093 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1097 if (threadInfo[i].coreId != lastCoreId) {
1100 lastCoreId = threadInfo[i].coreId;
1101 if ((
int)threadCt > __kmp_nThreadsPerCore)
1102 __kmp_nThreadsPerCore = threadCt;
1104 lastThreadId = threadInfo[i].threadId;
1105 }
else if (threadInfo[i].threadId != lastThreadId) {
1107 lastThreadId = threadInfo[i].threadId;
1109 __kmp_free(threadInfo);
1110 KMP_CPU_FREE(oldMask);
1111 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1117 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg) ||
1118 (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1119 __kmp_free(threadInfo);
1120 KMP_CPU_FREE(oldMask);
1121 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1126 if ((
int)coreCt > nCoresPerPkg)
1127 nCoresPerPkg = coreCt;
1128 if ((
int)threadCt > __kmp_nThreadsPerCore)
1129 __kmp_nThreadsPerCore = threadCt;
1135 __kmp_ncores = nCores;
1136 if (__kmp_affinity_verbose) {
1137 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1138 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1140 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1141 if (__kmp_affinity_respect_mask) {
1142 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1144 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1146 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1147 if (__kmp_affinity_uniform_topology()) {
1148 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1150 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1152 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1153 __kmp_nThreadsPerCore, __kmp_ncores);
1155 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1156 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1157 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1158 for (i = 0; i < nApics; ++i) {
1159 __kmp_pu_os_idx[i] = threadInfo[i].osId;
1161 if (__kmp_affinity_type == affinity_none) {
1162 __kmp_free(threadInfo);
1163 KMP_CPU_FREE(oldMask);
1171 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1173 (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1174 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1176 KMP_ASSERT(depth > 0);
1177 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1179 for (i = 0; i < nApics; ++i) {
1180 Address addr(depth);
1181 unsigned os = threadInfo[i].osId;
1184 if (pkgLevel >= 0) {
1185 addr.labels[d++] = threadInfo[i].pkgId;
1187 if (coreLevel >= 0) {
1188 addr.labels[d++] = threadInfo[i].coreId;
1190 if (threadLevel >= 0) {
1191 addr.labels[d++] = threadInfo[i].threadId;
1193 (*address2os)[i] = AddrUnsPair(addr, os);
1196 if (__kmp_affinity_gran_levels < 0) {
1199 __kmp_affinity_gran_levels = 0;
1200 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1201 __kmp_affinity_gran_levels++;
1203 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1204 __kmp_affinity_gran_levels++;
1206 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1207 __kmp_affinity_gran_levels++;
1211 if (__kmp_affinity_verbose) {
1212 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1213 coreLevel, threadLevel);
1216 __kmp_free(threadInfo);
1217 KMP_CPU_FREE(oldMask);
1224 static int __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1225 kmp_i18n_id_t *
const msg_id) {
1228 *msg_id = kmp_i18n_null;
1231 __kmp_x86_cpuid(0, 0, &buf);
1233 *msg_id = kmp_i18n_str_NoLeaf11Support;
1236 __kmp_x86_cpuid(11, 0, &buf);
1238 *msg_id = kmp_i18n_str_NoLeaf11Support;
1247 int threadLevel = -1;
1250 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1252 for (level = 0;; level++) {
1263 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1266 __kmp_x86_cpuid(11, level, &buf);
1275 int kind = (buf.ecx >> 8) & 0xff;
1278 threadLevel = level;
1281 __kmp_nThreadsPerCore = buf.ebx & 0xffff;
1282 if (__kmp_nThreadsPerCore == 0) {
1283 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1286 }
else if (kind == 2) {
1290 nCoresPerPkg = buf.ebx & 0xffff;
1291 if (nCoresPerPkg == 0) {
1292 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1297 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1300 if (pkgLevel >= 0) {
1304 nPackages = buf.ebx & 0xffff;
1305 if (nPackages == 0) {
1306 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1317 if (threadLevel >= 0) {
1318 threadLevel = depth - threadLevel - 1;
1320 if (coreLevel >= 0) {
1321 coreLevel = depth - coreLevel - 1;
1323 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1324 pkgLevel = depth - pkgLevel - 1;
1331 if (!KMP_AFFINITY_CAPABLE()) {
1334 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1336 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1337 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1338 if (__kmp_affinity_verbose) {
1339 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1340 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1341 if (__kmp_affinity_uniform_topology()) {
1342 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1344 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1346 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1347 __kmp_nThreadsPerCore, __kmp_ncores);
1357 kmp_affin_mask_t *oldMask;
1358 KMP_CPU_ALLOC(oldMask);
1359 __kmp_get_system_affinity(oldMask, TRUE);
1362 AddrUnsPair *retval =
1363 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1369 KMP_CPU_SET_ITERATE(proc, __kmp_affin_fullMask) {
1371 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
1374 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1376 __kmp_affinity_dispatch->bind_thread(proc);
1379 Address addr(depth);
1382 for (level = 0; level < depth; level++) {
1383 __kmp_x86_cpuid(11, level, &buf);
1384 unsigned apicId = buf.edx;
1386 if (level != depth - 1) {
1387 KMP_CPU_FREE(oldMask);
1388 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1391 addr.labels[depth - level - 1] = apicId >> prev_shift;
1395 int shift = buf.eax & 0x1f;
1396 int mask = (1 << shift) - 1;
1397 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1400 if (level != depth) {
1401 KMP_CPU_FREE(oldMask);
1402 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1406 retval[nApics] = AddrUnsPair(addr, proc);
1412 __kmp_set_system_affinity(oldMask, TRUE);
1415 KMP_ASSERT(nApics > 0);
1417 __kmp_ncores = nPackages = 1;
1418 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1419 if (__kmp_affinity_verbose) {
1420 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1421 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1423 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1424 if (__kmp_affinity_respect_mask) {
1425 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1427 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1429 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1430 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1431 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1432 __kmp_nThreadsPerCore, __kmp_ncores);
1435 if (__kmp_affinity_type == affinity_none) {
1437 KMP_CPU_FREE(oldMask);
1443 addr.labels[0] = retval[0].first.labels[pkgLevel];
1444 retval[0].first = addr;
1446 if (__kmp_affinity_gran_levels < 0) {
1447 __kmp_affinity_gran_levels = 0;
1450 if (__kmp_affinity_verbose) {
1451 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1454 *address2os = retval;
1455 KMP_CPU_FREE(oldMask);
1460 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1463 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1464 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1465 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1466 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1467 for (level = 0; level < depth; level++) {
1471 last[level] = retval[0].first.labels[level];
1478 for (proc = 1; (int)proc < nApics; proc++) {
1480 for (level = 0; level < depth; level++) {
1481 if (retval[proc].first.labels[level] != last[level]) {
1483 for (j = level + 1; j < depth; j++) {
1494 last[j] = retval[proc].first.labels[j];
1498 if (counts[level] > maxCt[level]) {
1499 maxCt[level] = counts[level];
1501 last[level] = retval[proc].first.labels[level];
1503 }
else if (level == depth - 1) {
1509 KMP_CPU_FREE(oldMask);
1510 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1520 if (threadLevel >= 0) {
1521 __kmp_nThreadsPerCore = maxCt[threadLevel];
1523 __kmp_nThreadsPerCore = 1;
1525 nPackages = totals[pkgLevel];
1527 if (coreLevel >= 0) {
1528 __kmp_ncores = totals[coreLevel];
1529 nCoresPerPkg = maxCt[coreLevel];
1531 __kmp_ncores = nPackages;
1536 unsigned prod = maxCt[0];
1537 for (level = 1; level < depth; level++) {
1538 prod *= maxCt[level];
1540 bool uniform = (prod == totals[level - 1]);
1543 if (__kmp_affinity_verbose) {
1544 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1545 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1547 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1548 if (__kmp_affinity_respect_mask) {
1549 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1551 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1553 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1555 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1557 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1561 __kmp_str_buf_init(&buf);
1563 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1564 for (level = 1; level <= pkgLevel; level++) {
1565 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1567 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1568 __kmp_nThreadsPerCore, __kmp_ncores);
1570 __kmp_str_buf_free(&buf);
1572 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
1573 KMP_DEBUG_ASSERT(nApics == __kmp_avail_proc);
1574 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
1575 for (proc = 0; (int)proc < nApics; ++proc) {
1576 __kmp_pu_os_idx[proc] = retval[proc].second;
1578 if (__kmp_affinity_type == affinity_none) {
1584 KMP_CPU_FREE(oldMask);
1591 for (level = 0; level < depth; level++) {
1592 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1600 if (new_depth != depth) {
1601 AddrUnsPair *new_retval =
1602 (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1603 for (proc = 0; (int)proc < nApics; proc++) {
1604 Address addr(new_depth);
1605 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1608 int newPkgLevel = -1;
1609 int newCoreLevel = -1;
1610 int newThreadLevel = -1;
1612 for (level = 0; level < depth; level++) {
1613 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1617 if (level == pkgLevel) {
1618 newPkgLevel = level;
1620 if (level == coreLevel) {
1621 newCoreLevel = level;
1623 if (level == threadLevel) {
1624 newThreadLevel = level;
1626 for (proc = 0; (int)proc < nApics; proc++) {
1627 new_retval[proc].first.labels[new_level] =
1628 retval[proc].first.labels[level];
1634 retval = new_retval;
1636 pkgLevel = newPkgLevel;
1637 coreLevel = newCoreLevel;
1638 threadLevel = newThreadLevel;
1641 if (__kmp_affinity_gran_levels < 0) {
1644 __kmp_affinity_gran_levels = 0;
1645 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1646 __kmp_affinity_gran_levels++;
1648 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1649 __kmp_affinity_gran_levels++;
1651 if (__kmp_affinity_gran > affinity_gran_package) {
1652 __kmp_affinity_gran_levels++;
1656 if (__kmp_affinity_verbose) {
1657 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel, coreLevel,
1665 KMP_CPU_FREE(oldMask);
1666 *address2os = retval;
1673 #define threadIdIndex 1 1674 #define coreIdIndex 2 1675 #define pkgIdIndex 3 1676 #define nodeIdIndex 4 1678 typedef unsigned *ProcCpuInfo;
1679 static unsigned maxIndex = pkgIdIndex;
1681 static int __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b) {
1682 const unsigned *aa = (
const unsigned *)a;
1683 const unsigned *bb = (
const unsigned *)b;
1684 if (aa[osIdIndex] < bb[osIdIndex])
1686 if (aa[osIdIndex] > bb[osIdIndex])
1691 static int __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
1694 const unsigned *aa = *(
unsigned *
const *)a;
1695 const unsigned *bb = *(
unsigned *
const *)b;
1696 for (i = maxIndex;; i--) {
1709 static int __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
1711 kmp_i18n_id_t *
const msg_id,
1714 *msg_id = kmp_i18n_null;
1719 unsigned num_records = 0;
1721 buf[
sizeof(buf) - 1] = 1;
1722 if (!fgets(buf,
sizeof(buf), f)) {
1727 char s1[] =
"processor";
1728 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1735 if (KMP_SSCANF(buf,
"node_%u id", &level) == 1) {
1736 if (nodeIdIndex + level >= maxIndex) {
1737 maxIndex = nodeIdIndex + level;
1745 if (num_records == 0) {
1747 *msg_id = kmp_i18n_str_NoProcRecords;
1750 if (num_records > (
unsigned)__kmp_xproc) {
1752 *msg_id = kmp_i18n_str_TooManyProcRecords;
1761 if (fseek(f, 0, SEEK_SET) != 0) {
1763 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1769 unsigned **threadInfo =
1770 (
unsigned **)__kmp_allocate((num_records + 1) *
sizeof(
unsigned *));
1772 for (i = 0; i <= num_records; i++) {
1774 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
1777 #define CLEANUP_THREAD_INFO \ 1778 for (i = 0; i <= num_records; i++) { \ 1779 __kmp_free(threadInfo[i]); \ 1781 __kmp_free(threadInfo); 1786 #define INIT_PROC_INFO(p) \ 1787 for (__index = 0; __index <= maxIndex; __index++) { \ 1788 (p)[__index] = UINT_MAX; \ 1791 for (i = 0; i <= num_records; i++) {
1792 INIT_PROC_INFO(threadInfo[i]);
1795 unsigned num_avail = 0;
1802 buf[
sizeof(buf) - 1] = 1;
1803 bool long_line =
false;
1804 if (!fgets(buf,
sizeof(buf), f)) {
1809 for (i = 0; i <= maxIndex; i++) {
1810 if (threadInfo[num_avail][i] != UINT_MAX) {
1818 }
else if (!buf[
sizeof(buf) - 1]) {
1823 #define CHECK_LINE \ 1825 CLEANUP_THREAD_INFO; \ 1826 *msg_id = kmp_i18n_str_LongLineCpuinfo; \ 1832 char s1[] =
"processor";
1833 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1835 char *p = strchr(buf +
sizeof(s1) - 1,
':');
1837 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
1839 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
1841 threadInfo[num_avail][osIdIndex] = val;
1842 #if KMP_OS_LINUX && USE_SYSFS_INFO 1846 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
1847 threadInfo[num_avail][osIdIndex]);
1848 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
1850 KMP_SNPRINTF(path,
sizeof(path),
1851 "/sys/devices/system/cpu/cpu%u/topology/core_id",
1852 threadInfo[num_avail][osIdIndex]);
1853 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
1857 char s2[] =
"physical id";
1858 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
1860 char *p = strchr(buf +
sizeof(s2) - 1,
':');
1862 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
1864 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
1866 threadInfo[num_avail][pkgIdIndex] = val;
1869 char s3[] =
"core id";
1870 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
1872 char *p = strchr(buf +
sizeof(s3) - 1,
':');
1874 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
1876 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
1878 threadInfo[num_avail][coreIdIndex] = val;
1880 #endif // KMP_OS_LINUX && USE_SYSFS_INFO 1882 char s4[] =
"thread id";
1883 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
1885 char *p = strchr(buf +
sizeof(s4) - 1,
':');
1887 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
1889 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
1891 threadInfo[num_avail][threadIdIndex] = val;
1895 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
1897 char *p = strchr(buf +
sizeof(s4) - 1,
':');
1899 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
1901 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
1902 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
1904 threadInfo[num_avail][nodeIdIndex + level] = val;
1911 if ((*buf != 0) && (*buf !=
'\n')) {
1916 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'))
1924 if ((
int)num_avail == __kmp_xproc) {
1925 CLEANUP_THREAD_INFO;
1926 *msg_id = kmp_i18n_str_TooManyEntries;
1932 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
1933 CLEANUP_THREAD_INFO;
1934 *msg_id = kmp_i18n_str_MissingProcField;
1937 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
1938 CLEANUP_THREAD_INFO;
1939 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
1944 if (!KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex],
1945 __kmp_affin_fullMask)) {
1946 INIT_PROC_INFO(threadInfo[num_avail]);
1953 KMP_ASSERT(num_avail <= num_records);
1954 INIT_PROC_INFO(threadInfo[num_avail]);
1959 CLEANUP_THREAD_INFO;
1960 *msg_id = kmp_i18n_str_MissingValCpuinfo;
1964 CLEANUP_THREAD_INFO;
1965 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
1970 #if KMP_MIC && REDUCE_TEAM_SIZE 1971 unsigned teamSize = 0;
1972 #endif // KMP_MIC && REDUCE_TEAM_SIZE 1983 KMP_ASSERT(num_avail > 0);
1984 KMP_ASSERT(num_avail <= num_records);
1985 if (num_avail == 1) {
1987 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1988 if (__kmp_affinity_verbose) {
1989 if (!KMP_AFFINITY_CAPABLE()) {
1990 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
1991 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1992 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1994 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1995 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
1996 __kmp_affin_fullMask);
1997 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
1998 if (__kmp_affinity_respect_mask) {
1999 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2001 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2003 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2004 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2008 __kmp_str_buf_init(&buf);
2009 __kmp_str_buf_print(&buf,
"1");
2010 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2011 __kmp_str_buf_print(&buf,
" x 1");
2013 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2014 __kmp_str_buf_free(&buf);
2017 if (__kmp_affinity_type == affinity_none) {
2018 CLEANUP_THREAD_INFO;
2022 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair));
2024 addr.labels[0] = threadInfo[0][pkgIdIndex];
2025 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2027 if (__kmp_affinity_gran_levels < 0) {
2028 __kmp_affinity_gran_levels = 0;
2031 if (__kmp_affinity_verbose) {
2032 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2035 CLEANUP_THREAD_INFO;
2040 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2041 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2053 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2055 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2057 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2059 (
unsigned *)__kmp_allocate((maxIndex + 1) *
sizeof(unsigned));
2061 bool assign_thread_ids =
false;
2062 unsigned threadIdCt;
2065 restart_radix_check:
2069 if (assign_thread_ids) {
2070 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2071 threadInfo[0][threadIdIndex] = threadIdCt++;
2072 }
else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2073 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2076 for (index = 0; index <= maxIndex; index++) {
2080 lastId[index] = threadInfo[0][index];
2085 for (i = 1; i < num_avail; i++) {
2088 for (index = maxIndex; index >= threadIdIndex; index--) {
2089 if (assign_thread_ids && (index == threadIdIndex)) {
2091 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2092 threadInfo[i][threadIdIndex] = threadIdCt++;
2096 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2097 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2100 if (threadInfo[i][index] != lastId[index]) {
2105 for (index2 = threadIdIndex; index2 < index; index2++) {
2107 if (counts[index2] > maxCt[index2]) {
2108 maxCt[index2] = counts[index2];
2111 lastId[index2] = threadInfo[i][index2];
2115 lastId[index] = threadInfo[i][index];
2117 if (assign_thread_ids && (index > threadIdIndex)) {
2119 #if KMP_MIC && REDUCE_TEAM_SIZE 2122 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2123 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2129 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2130 threadInfo[i][threadIdIndex] = threadIdCt++;
2136 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2137 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2143 if (index < threadIdIndex) {
2147 if ((threadInfo[i][threadIdIndex] != UINT_MAX) || assign_thread_ids) {
2152 CLEANUP_THREAD_INFO;
2153 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2159 assign_thread_ids =
true;
2160 goto restart_radix_check;
2164 #if KMP_MIC && REDUCE_TEAM_SIZE 2167 teamSize += (threadIdCt <= 2) ? (threadIdCt) : (threadIdCt - 1);
2168 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2170 for (index = threadIdIndex; index <= maxIndex; index++) {
2171 if (counts[index] > maxCt[index]) {
2172 maxCt[index] = counts[index];
2176 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2177 nCoresPerPkg = maxCt[coreIdIndex];
2178 nPackages = totals[pkgIdIndex];
2181 unsigned prod = totals[maxIndex];
2182 for (index = threadIdIndex; index < maxIndex; index++) {
2183 prod *= maxCt[index];
2185 bool uniform = (prod == totals[threadIdIndex]);
2191 __kmp_ncores = totals[coreIdIndex];
2193 if (__kmp_affinity_verbose) {
2194 if (!KMP_AFFINITY_CAPABLE()) {
2195 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2196 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2198 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2200 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2203 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2204 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2205 __kmp_affin_fullMask);
2206 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2207 if (__kmp_affinity_respect_mask) {
2208 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2210 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2212 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2214 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2216 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2220 __kmp_str_buf_init(&buf);
2222 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2223 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2224 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2226 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2227 maxCt[threadIdIndex], __kmp_ncores);
2229 __kmp_str_buf_free(&buf);
2232 #if KMP_MIC && REDUCE_TEAM_SIZE 2234 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2235 __kmp_dflt_team_nth = teamSize;
2236 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting " 2237 "__kmp_dflt_team_nth = %d\n",
2238 __kmp_dflt_team_nth));
2240 #endif // KMP_MIC && REDUCE_TEAM_SIZE 2242 KMP_DEBUG_ASSERT(__kmp_pu_os_idx == NULL);
2243 KMP_DEBUG_ASSERT(num_avail == __kmp_avail_proc);
2244 __kmp_pu_os_idx = (
int *)__kmp_allocate(
sizeof(
int) * __kmp_avail_proc);
2245 for (i = 0; i < num_avail; ++i) {
2246 __kmp_pu_os_idx[i] = threadInfo[i][osIdIndex];
2249 if (__kmp_affinity_type == affinity_none) {
2254 CLEANUP_THREAD_INFO;
2263 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2265 for (index = threadIdIndex; index < maxIndex; index++) {
2266 KMP_ASSERT(totals[index] >= totals[index + 1]);
2267 inMap[index] = (totals[index] > totals[index + 1]);
2269 inMap[maxIndex] = (totals[maxIndex] > 1);
2270 inMap[pkgIdIndex] =
true;
2273 for (index = threadIdIndex; index <= maxIndex; index++) {
2278 KMP_ASSERT(depth > 0);
2281 *address2os = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2284 int threadLevel = -1;
2286 for (i = 0; i < num_avail; ++i) {
2287 Address addr(depth);
2288 unsigned os = threadInfo[i][osIdIndex];
2292 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2293 if (!inMap[src_index]) {
2296 addr.labels[dst_index] = threadInfo[i][src_index];
2297 if (src_index == pkgIdIndex) {
2298 pkgLevel = dst_index;
2299 }
else if (src_index == coreIdIndex) {
2300 coreLevel = dst_index;
2301 }
else if (src_index == threadIdIndex) {
2302 threadLevel = dst_index;
2306 (*address2os)[i] = AddrUnsPair(addr, os);
2309 if (__kmp_affinity_gran_levels < 0) {
2313 __kmp_affinity_gran_levels = 0;
2314 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2315 if (!inMap[src_index]) {
2318 switch (src_index) {
2320 if (__kmp_affinity_gran > affinity_gran_thread) {
2321 __kmp_affinity_gran_levels++;
2326 if (__kmp_affinity_gran > affinity_gran_core) {
2327 __kmp_affinity_gran_levels++;
2332 if (__kmp_affinity_gran > affinity_gran_package) {
2333 __kmp_affinity_gran_levels++;
2340 if (__kmp_affinity_verbose) {
2341 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2342 coreLevel, threadLevel);
2350 CLEANUP_THREAD_INFO;
2357 static kmp_affin_mask_t *__kmp_create_masks(
unsigned *maxIndex,
2358 unsigned *numUnique,
2359 AddrUnsPair *address2os,
2360 unsigned numAddrs) {
2366 KMP_ASSERT(numAddrs > 0);
2367 depth = address2os[0].first.depth;
2370 for (i = 0; i < numAddrs; i++) {
2371 unsigned osId = address2os[i].second;
2372 if (osId > maxOsId) {
2376 kmp_affin_mask_t *osId2Mask;
2377 KMP_CPU_ALLOC_ARRAY(osId2Mask, (maxOsId + 1));
2381 qsort(address2os, numAddrs,
sizeof(*address2os),
2382 __kmp_affinity_cmp_Address_labels);
2384 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2385 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2386 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2388 if (__kmp_affinity_gran_levels >= (
int)depth) {
2389 if (__kmp_affinity_verbose ||
2390 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2391 KMP_WARNING(AffThreadsMayMigrate);
2399 unsigned unique = 0;
2401 unsigned leader = 0;
2402 Address *leaderAddr = &(address2os[0].first);
2403 kmp_affin_mask_t *sum;
2404 KMP_CPU_ALLOC_ON_STACK(sum);
2406 KMP_CPU_SET(address2os[0].second, sum);
2407 for (i = 1; i < numAddrs; i++) {
2411 if (leaderAddr->isClose(address2os[i].first, __kmp_affinity_gran_levels)) {
2412 KMP_CPU_SET(address2os[i].second, sum);
2418 for (; j < i; j++) {
2419 unsigned osId = address2os[j].second;
2420 KMP_DEBUG_ASSERT(osId <= maxOsId);
2421 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2422 KMP_CPU_COPY(mask, sum);
2423 address2os[j].first.leader = (j == leader);
2429 leaderAddr = &(address2os[i].first);
2431 KMP_CPU_SET(address2os[i].second, sum);
2436 for (; j < i; j++) {
2437 unsigned osId = address2os[j].second;
2438 KMP_DEBUG_ASSERT(osId <= maxOsId);
2439 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2440 KMP_CPU_COPY(mask, sum);
2441 address2os[j].first.leader = (j == leader);
2444 KMP_CPU_FREE_FROM_STACK(sum);
2446 *maxIndex = maxOsId;
2447 *numUnique = unique;
2454 static kmp_affin_mask_t *newMasks;
2455 static int numNewMasks;
2456 static int nextNewMask;
2458 #define ADD_MASK(_mask) \ 2460 if (nextNewMask >= numNewMasks) { \ 2463 kmp_affin_mask_t *temp; \ 2464 KMP_CPU_INTERNAL_ALLOC_ARRAY(temp, numNewMasks); \ 2465 for (i = 0; i < numNewMasks / 2; i++) { \ 2466 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i); \ 2467 kmp_affin_mask_t *dest = KMP_CPU_INDEX(temp, i); \ 2468 KMP_CPU_COPY(dest, src); \ 2470 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks / 2); \ 2473 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \ 2477 #define ADD_MASK_OSID(_osId, _osId2Mask, _maxOsId) \ 2479 if (((_osId) > _maxOsId) || \ 2480 (!KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \ 2481 if (__kmp_affinity_verbose || \ 2482 (__kmp_affinity_warnings && \ 2483 (__kmp_affinity_type != affinity_none))) { \ 2484 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \ 2487 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \ 2493 static void __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2494 unsigned int *out_numMasks,
2495 const char *proclist,
2496 kmp_affin_mask_t *osId2Mask,
2499 const char *scan = proclist;
2500 const char *next = proclist;
2505 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2507 kmp_affin_mask_t *sumMask;
2508 KMP_CPU_ALLOC(sumMask);
2512 int start, end, stride;
2516 if (*next ==
'\0') {
2528 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad proclist");
2530 num = __kmp_str_to_int(scan, *next);
2531 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2534 if ((num > maxOsId) ||
2535 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2536 if (__kmp_affinity_verbose ||
2537 (__kmp_affinity_warnings &&
2538 (__kmp_affinity_type != affinity_none))) {
2539 KMP_WARNING(AffIgnoreInvalidProcID, num);
2541 KMP_CPU_ZERO(sumMask);
2543 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2563 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2566 num = __kmp_str_to_int(scan, *next);
2567 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2570 if ((num > maxOsId) ||
2571 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2572 if (__kmp_affinity_verbose ||
2573 (__kmp_affinity_warnings &&
2574 (__kmp_affinity_type != affinity_none))) {
2575 KMP_WARNING(AffIgnoreInvalidProcID, num);
2578 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2595 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2597 start = __kmp_str_to_int(scan, *next);
2598 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2603 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2617 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2619 end = __kmp_str_to_int(scan, *next);
2620 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2637 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2639 stride = __kmp_str_to_int(scan, *next);
2640 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2645 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2647 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2649 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2651 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2656 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2658 }
while (start <= end);
2661 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2663 }
while (start >= end);
2674 *out_numMasks = nextNewMask;
2675 if (nextNewMask == 0) {
2677 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2680 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
2681 for (i = 0; i < nextNewMask; i++) {
2682 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
2683 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
2684 KMP_CPU_COPY(dest, src);
2686 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
2687 KMP_CPU_FREE(sumMask);
2713 static void __kmp_process_subplace_list(
const char **scan,
2714 kmp_affin_mask_t *osId2Mask,
2715 int maxOsId, kmp_affin_mask_t *tempMask,
2720 int start, count, stride, i;
2724 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
2727 start = __kmp_str_to_int(*scan, *next);
2728 KMP_ASSERT(start >= 0);
2733 if (**scan ==
'}' || **scan ==
',') {
2734 if ((start > maxOsId) ||
2735 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2736 if (__kmp_affinity_verbose ||
2737 (__kmp_affinity_warnings &&
2738 (__kmp_affinity_type != affinity_none))) {
2739 KMP_WARNING(AffIgnoreInvalidProcID, start);
2742 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2745 if (**scan ==
'}') {
2751 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
2756 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
2759 count = __kmp_str_to_int(*scan, *next);
2760 KMP_ASSERT(count >= 0);
2765 if (**scan ==
'}' || **scan ==
',') {
2766 for (i = 0; i < count; i++) {
2767 if ((start > maxOsId) ||
2768 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2769 if (__kmp_affinity_verbose ||
2770 (__kmp_affinity_warnings &&
2771 (__kmp_affinity_type != affinity_none))) {
2772 KMP_WARNING(AffIgnoreInvalidProcID, start);
2776 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2781 if (**scan ==
'}') {
2787 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
2794 if (**scan ==
'+') {
2798 if (**scan ==
'-') {
2806 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
"bad explicit places list");
2809 stride = __kmp_str_to_int(*scan, *next);
2810 KMP_ASSERT(stride >= 0);
2816 if (**scan ==
'}' || **scan ==
',') {
2817 for (i = 0; i < count; i++) {
2818 if ((start > maxOsId) ||
2819 (!KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
2820 if (__kmp_affinity_verbose ||
2821 (__kmp_affinity_warnings &&
2822 (__kmp_affinity_type != affinity_none))) {
2823 KMP_WARNING(AffIgnoreInvalidProcID, start);
2827 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
2832 if (**scan ==
'}') {
2839 KMP_ASSERT2(0,
"bad explicit places list");
2843 static void __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
2844 int maxOsId, kmp_affin_mask_t *tempMask,
2850 if (**scan ==
'{') {
2852 __kmp_process_subplace_list(scan, osId2Mask, maxOsId, tempMask, setSize);
2853 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
2855 }
else if (**scan ==
'!') {
2857 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
2858 KMP_CPU_COMPLEMENT(maxOsId, tempMask);
2859 }
else if ((**scan >=
'0') && (**scan <=
'9')) {
2862 int num = __kmp_str_to_int(*scan, *next);
2863 KMP_ASSERT(num >= 0);
2864 if ((num > maxOsId) ||
2865 (!KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2866 if (__kmp_affinity_verbose ||
2867 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
2868 KMP_WARNING(AffIgnoreInvalidProcID, num);
2871 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
2876 KMP_ASSERT2(0,
"bad explicit places list");
2881 void __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
2882 unsigned int *out_numMasks,
2883 const char *placelist,
2884 kmp_affin_mask_t *osId2Mask,
2886 int i, j, count, stride, sign;
2887 const char *scan = placelist;
2888 const char *next = placelist;
2891 KMP_CPU_INTERNAL_ALLOC_ARRAY(newMasks, numNewMasks);
2897 kmp_affin_mask_t *tempMask;
2898 kmp_affin_mask_t *previousMask;
2899 KMP_CPU_ALLOC(tempMask);
2900 KMP_CPU_ZERO(tempMask);
2901 KMP_CPU_ALLOC(previousMask);
2902 KMP_CPU_ZERO(previousMask);
2906 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
2910 if (*scan ==
'\0' || *scan ==
',') {
2914 KMP_CPU_ZERO(tempMask);
2916 if (*scan ==
'\0') {
2923 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
2928 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
2931 count = __kmp_str_to_int(scan, *next);
2932 KMP_ASSERT(count >= 0);
2937 if (*scan ==
'\0' || *scan ==
',') {
2940 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
2959 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
"bad explicit places list");
2962 stride = __kmp_str_to_int(scan, *next);
2963 KMP_DEBUG_ASSERT(stride >= 0);
2969 for (i = 0; i < count; i++) {
2974 KMP_CPU_COPY(previousMask, tempMask);
2975 ADD_MASK(previousMask);
2976 KMP_CPU_ZERO(tempMask);
2978 KMP_CPU_SET_ITERATE(j, previousMask) {
2979 if (!KMP_CPU_ISSET(j, previousMask)) {
2982 if ((j + stride > maxOsId) || (j + stride < 0) ||
2983 (!KMP_CPU_ISSET(j, __kmp_affin_fullMask)) ||
2984 (!KMP_CPU_ISSET(j + stride,
2985 KMP_CPU_INDEX(osId2Mask, j + stride)))) {
2986 if ((__kmp_affinity_verbose ||
2987 (__kmp_affinity_warnings &&
2988 (__kmp_affinity_type != affinity_none))) &&
2990 KMP_WARNING(AffIgnoreInvalidProcID, j + stride);
2994 KMP_CPU_SET(j + stride, tempMask);
2998 KMP_CPU_ZERO(tempMask);
3003 if (*scan ==
'\0') {
3011 KMP_ASSERT2(0,
"bad explicit places list");
3014 *out_numMasks = nextNewMask;
3015 if (nextNewMask == 0) {
3017 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3020 KMP_CPU_ALLOC_ARRAY((*out_masks), nextNewMask);
3021 KMP_CPU_FREE(tempMask);
3022 KMP_CPU_FREE(previousMask);
3023 for (i = 0; i < nextNewMask; i++) {
3024 kmp_affin_mask_t *src = KMP_CPU_INDEX(newMasks, i);
3025 kmp_affin_mask_t *dest = KMP_CPU_INDEX((*out_masks), i);
3026 KMP_CPU_COPY(dest, src);
3028 KMP_CPU_INTERNAL_FREE_ARRAY(newMasks, numNewMasks);
3034 #undef ADD_MASK_OSID 3037 static int __kmp_hwloc_count_children_by_type(hwloc_topology_t t, hwloc_obj_t o,
3038 hwloc_obj_type_t type,
3040 if (!hwloc_compare_types(o->type, type)) {
3046 for (
unsigned i = 0; i < o->arity; i++)
3047 sum += __kmp_hwloc_count_children_by_type(t, o->children[i], type, f);
3051 static int __kmp_hwloc_count_children_by_depth(hwloc_topology_t t,
3052 hwloc_obj_t o,
unsigned depth,
3054 if (o->depth == depth) {
3060 for (
unsigned i = 0; i < o->arity; i++)
3061 sum += __kmp_hwloc_count_children_by_depth(t, o->children[i], depth, f);
3065 static int __kmp_hwloc_skip_PUs_obj(hwloc_topology_t t, hwloc_obj_t o) {
3068 hwloc_obj_t hT = NULL;
3069 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3070 for (
int i = 0; i < N; ++i) {
3071 KMP_DEBUG_ASSERT(hT);
3072 unsigned idx = hT->os_index;
3073 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3074 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3075 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3078 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3083 static int __kmp_hwloc_obj_has_PUs(hwloc_topology_t t, hwloc_obj_t o) {
3085 hwloc_obj_t hT = NULL;
3086 int N = __kmp_hwloc_count_children_by_type(t, o, HWLOC_OBJ_PU, &hT);
3087 for (
int i = 0; i < N; ++i) {
3088 KMP_DEBUG_ASSERT(hT);
3089 unsigned idx = hT->os_index;
3090 if (KMP_CPU_ISSET(idx, __kmp_affin_fullMask))
3092 hT = hwloc_get_next_obj_by_type(t, HWLOC_OBJ_PU, hT);
3096 #endif // KMP_USE_HWLOC 3098 static void __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth) {
3099 AddrUnsPair *newAddr;
3100 if (__kmp_hws_requested == 0)
3103 if (__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3107 hwloc_topology_t tp = __kmp_hwloc_topology;
3108 int nS = 0, nN = 0, nL = 0, nC = 0,
3110 int nCr = 0, nTr = 0;
3111 int nPkg = 0, nCo = 0, n_new = 0, n_old = 0, nCpP = 0, nTpC = 0;
3112 hwloc_obj_t hT, hC, hL, hN, hS;
3116 int numa_support = 0, tile_support = 0;
3117 if (__kmp_pu_os_idx)
3118 hT = hwloc_get_pu_obj_by_os_index(tp,
3119 __kmp_pu_os_idx[__kmp_avail_proc - 1]);
3121 hT = hwloc_get_obj_by_type(tp, HWLOC_OBJ_PU, __kmp_avail_proc - 1);
3123 KMP_WARNING(AffHWSubsetUnsupported);
3127 hN = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hT);
3128 hS = hwloc_get_ancestor_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hT);
3129 if (hN != NULL && hN->depth > hS->depth) {
3131 }
else if (__kmp_hws_node.num > 0) {
3133 KMP_WARNING(AffHWSubsetUnsupported);
3137 L2depth = hwloc_get_cache_type_depth(tp, 2, HWLOC_OBJ_CACHE_UNIFIED);
3138 hL = hwloc_get_ancestor_obj_by_depth(tp, L2depth, hT);
3140 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC) > 1) {
3142 }
else if (__kmp_hws_tile.num > 0) {
3143 if (__kmp_hws_core.num == 0) {
3144 __kmp_hws_core = __kmp_hws_tile;
3145 __kmp_hws_tile.num = 0;
3148 KMP_WARNING(AffHWSubsetInvalid);
3155 if (__kmp_hws_socket.num == 0)
3156 __kmp_hws_socket.num = nPackages;
3157 if (__kmp_hws_socket.offset >= nPackages) {
3158 KMP_WARNING(AffHWSubsetManySockets);
3162 int NN = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE,
3164 if (__kmp_hws_node.num == 0)
3165 __kmp_hws_node.num = NN;
3166 if (__kmp_hws_node.offset >= NN) {
3167 KMP_WARNING(AffHWSubsetManyNodes);
3172 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3173 if (__kmp_hws_tile.num == 0) {
3174 __kmp_hws_tile.num = NL + 1;
3176 if (__kmp_hws_tile.offset >= NL) {
3177 KMP_WARNING(AffHWSubsetManyTiles);
3180 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3182 if (__kmp_hws_core.num == 0)
3183 __kmp_hws_core.num = NC;
3184 if (__kmp_hws_core.offset >= NC) {
3185 KMP_WARNING(AffHWSubsetManyCores);
3189 int NC = __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE,
3191 if (__kmp_hws_core.num == 0)
3192 __kmp_hws_core.num = NC;
3193 if (__kmp_hws_core.offset >= NC) {
3194 KMP_WARNING(AffHWSubsetManyCores);
3201 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3202 if (__kmp_hws_tile.num == 0)
3203 __kmp_hws_tile.num = NL;
3204 if (__kmp_hws_tile.offset >= NL) {
3205 KMP_WARNING(AffHWSubsetManyTiles);
3208 int NC = __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE,
3210 if (__kmp_hws_core.num == 0)
3211 __kmp_hws_core.num = NC;
3212 if (__kmp_hws_core.offset >= NC) {
3213 KMP_WARNING(AffHWSubsetManyCores);
3217 int NC = __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE,
3219 if (__kmp_hws_core.num == 0)
3220 __kmp_hws_core.num = NC;
3221 if (__kmp_hws_core.offset >= NC) {
3222 KMP_WARNING(AffHWSubsetManyCores);
3227 if (__kmp_hws_proc.num == 0)
3228 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3229 if (__kmp_hws_proc.offset >= __kmp_nThreadsPerCore) {
3230 KMP_WARNING(AffHWSubsetManyProcs);
3236 newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3240 int NP = hwloc_get_nbobjs_by_type(tp, HWLOC_OBJ_PACKAGE);
3241 for (
int s = 0; s < NP; ++s) {
3243 hS = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PACKAGE, hS);
3244 if (!__kmp_hwloc_obj_has_PUs(tp, hS))
3247 if (nS <= __kmp_hws_socket.offset ||
3248 nS > __kmp_hws_socket.num + __kmp_hws_socket.offset) {
3249 n_old += __kmp_hwloc_skip_PUs_obj(tp, hS);
3260 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_NUMANODE, &hN);
3261 for (
int n = 0; n < NN; ++n) {
3263 if (!__kmp_hwloc_obj_has_PUs(tp, hN)) {
3264 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3268 if (nN <= __kmp_hws_node.offset ||
3269 nN > __kmp_hws_node.num + __kmp_hws_node.offset) {
3271 n_old += __kmp_hwloc_skip_PUs_obj(tp, hN);
3272 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3279 int NL = __kmp_hwloc_count_children_by_depth(tp, hN, L2depth, &hL);
3280 for (
int l = 0; l < NL; ++l) {
3282 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3283 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3287 if (nL <= __kmp_hws_tile.offset ||
3288 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3290 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3291 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3298 int NC = __kmp_hwloc_count_children_by_type(tp, hL,
3299 HWLOC_OBJ_CORE, &hC);
3300 for (
int c = 0; c < NC; ++c) {
3302 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3303 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3307 if (nC <= __kmp_hws_core.offset ||
3308 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3310 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3311 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3319 int NT = __kmp_hwloc_count_children_by_type(tp, hC,
3321 for (
int t = 0; t < NT; ++t) {
3324 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3325 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3329 if (nT <= __kmp_hws_proc.offset ||
3330 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3332 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3334 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3335 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3340 newAddr[n_new] = (*pAddr)[n_old];
3343 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3351 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3353 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3361 __kmp_hwloc_count_children_by_type(tp, hN, HWLOC_OBJ_CORE, &hC);
3362 for (
int c = 0; c < NC; ++c) {
3364 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3365 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3369 if (nC <= __kmp_hws_core.offset ||
3370 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3372 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3373 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3381 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3382 for (
int t = 0; t < NT; ++t) {
3385 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3386 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3390 if (nT <= __kmp_hws_proc.offset ||
3391 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3393 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3395 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3396 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3401 newAddr[n_new] = (*pAddr)[n_old];
3404 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3412 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3415 hN = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_NUMANODE, hN);
3423 int NL = __kmp_hwloc_count_children_by_depth(tp, hS, L2depth, &hL);
3424 for (
int l = 0; l < NL; ++l) {
3426 if (!__kmp_hwloc_obj_has_PUs(tp, hL)) {
3427 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3431 if (nL <= __kmp_hws_tile.offset ||
3432 nL > __kmp_hws_tile.num + __kmp_hws_tile.offset) {
3434 n_old += __kmp_hwloc_skip_PUs_obj(tp, hL);
3435 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3443 __kmp_hwloc_count_children_by_type(tp, hL, HWLOC_OBJ_CORE, &hC);
3444 for (
int c = 0; c < NC; ++c) {
3446 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3447 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3451 if (nC <= __kmp_hws_core.offset ||
3452 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3454 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3455 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3464 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3465 for (
int t = 0; t < NT; ++t) {
3468 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3469 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3473 if (nT <= __kmp_hws_proc.offset ||
3474 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3476 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3478 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3479 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3484 newAddr[n_new] = (*pAddr)[n_old];
3487 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3495 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3497 hL = hwloc_get_next_obj_by_depth(tp, L2depth, hL);
3505 __kmp_hwloc_count_children_by_type(tp, hS, HWLOC_OBJ_CORE, &hC);
3506 for (
int c = 0; c < NC; ++c) {
3508 if (!__kmp_hwloc_obj_has_PUs(tp, hC)) {
3509 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3513 if (nC <= __kmp_hws_core.offset ||
3514 nC > __kmp_hws_core.num + __kmp_hws_core.offset) {
3516 n_old += __kmp_hwloc_skip_PUs_obj(tp, hC);
3517 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3526 __kmp_hwloc_count_children_by_type(tp, hC, HWLOC_OBJ_PU, &hT);
3527 for (
int t = 0; t < NT; ++t) {
3530 if (!KMP_CPU_ISSET(idx, __kmp_affin_fullMask)) {
3531 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3535 if (nT <= __kmp_hws_proc.offset ||
3536 nT > __kmp_hws_proc.num + __kmp_hws_proc.offset) {
3538 KMP_CPU_CLR(idx, __kmp_affin_fullMask);
3540 KC_TRACE(200, (
"KMP_HW_SUBSET: skipped proc %d\n", idx));
3541 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3546 newAddr[n_new] = (*pAddr)[n_old];
3549 hT = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_PU, hT);
3557 hC = hwloc_get_next_obj_by_type(tp, HWLOC_OBJ_CORE, hC);
3569 KMP_DEBUG_ASSERT(n_old == __kmp_avail_proc);
3570 KMP_DEBUG_ASSERT(nPkg > 0);
3571 KMP_DEBUG_ASSERT(nCpP > 0);
3572 KMP_DEBUG_ASSERT(nTpC > 0);
3573 KMP_DEBUG_ASSERT(nCo > 0);
3574 KMP_DEBUG_ASSERT(nPkg <= nPackages);
3575 KMP_DEBUG_ASSERT(nCpP <= nCoresPerPkg);
3576 KMP_DEBUG_ASSERT(nTpC <= __kmp_nThreadsPerCore);
3577 KMP_DEBUG_ASSERT(nCo <= __kmp_ncores);
3580 nCoresPerPkg = nCpP;
3581 __kmp_nThreadsPerCore = nTpC;
3582 __kmp_avail_proc = n_new;
3586 #endif // KMP_USE_HWLOC 3588 int n_old = 0, n_new = 0, proc_num = 0;
3589 if (__kmp_hws_node.num > 0 || __kmp_hws_tile.num > 0) {
3590 KMP_WARNING(AffHWSubsetNoHWLOC);
3593 if (__kmp_hws_socket.num == 0)
3594 __kmp_hws_socket.num = nPackages;
3595 if (__kmp_hws_core.num == 0)
3596 __kmp_hws_core.num = nCoresPerPkg;
3597 if (__kmp_hws_proc.num == 0 || __kmp_hws_proc.num > __kmp_nThreadsPerCore)
3598 __kmp_hws_proc.num = __kmp_nThreadsPerCore;
3599 if (!__kmp_affinity_uniform_topology()) {
3600 KMP_WARNING(AffHWSubsetNonUniform);
3604 KMP_WARNING(AffHWSubsetNonThreeLevel);
3607 if (__kmp_hws_socket.offset + __kmp_hws_socket.num > nPackages) {
3608 KMP_WARNING(AffHWSubsetManySockets);
3611 if (__kmp_hws_core.offset + __kmp_hws_core.num > nCoresPerPkg) {
3612 KMP_WARNING(AffHWSubsetManyCores);
3617 newAddr = (AddrUnsPair *)__kmp_allocate(
3618 sizeof(AddrUnsPair) * __kmp_hws_socket.num * __kmp_hws_core.num *
3619 __kmp_hws_proc.num);
3620 for (
int i = 0; i < nPackages; ++i) {
3621 if (i < __kmp_hws_socket.offset ||
3622 i >= __kmp_hws_socket.offset + __kmp_hws_socket.num) {
3624 n_old += nCoresPerPkg * __kmp_nThreadsPerCore;
3625 if (__kmp_pu_os_idx != NULL) {
3627 for (
int j = 0; j < nCoresPerPkg; ++j) {
3628 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3629 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3636 for (
int j = 0; j < nCoresPerPkg; ++j) {
3637 if (j < __kmp_hws_core.offset ||
3638 j >= __kmp_hws_core.offset +
3639 __kmp_hws_core.num) {
3640 n_old += __kmp_nThreadsPerCore;
3641 if (__kmp_pu_os_idx != NULL) {
3642 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3643 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3649 for (
int k = 0; k < __kmp_nThreadsPerCore; ++k) {
3650 if (k < __kmp_hws_proc.num) {
3652 newAddr[n_new] = (*pAddr)[n_old];
3655 if (__kmp_pu_os_idx != NULL)
3656 KMP_CPU_CLR(__kmp_pu_os_idx[proc_num], __kmp_affin_fullMask);
3665 KMP_DEBUG_ASSERT(n_old == nPackages * nCoresPerPkg * __kmp_nThreadsPerCore);
3666 KMP_DEBUG_ASSERT(n_new ==
3667 __kmp_hws_socket.num * __kmp_hws_core.num *
3668 __kmp_hws_proc.num);
3669 nPackages = __kmp_hws_socket.num;
3670 nCoresPerPkg = __kmp_hws_core.num;
3671 __kmp_nThreadsPerCore = __kmp_hws_proc.num;
3672 __kmp_avail_proc = n_new;
3673 __kmp_ncores = nPackages * __kmp_hws_core.num;
3679 if (__kmp_affinity_verbose) {
3680 char m[KMP_AFFIN_MASK_PRINT_LEN];
3681 __kmp_affinity_print_mask(m, KMP_AFFIN_MASK_PRINT_LEN,
3682 __kmp_affin_fullMask);
3683 if (__kmp_affinity_respect_mask) {
3684 KMP_INFORM(InitOSProcSetRespect,
"KMP_HW_SUBSET", m);
3686 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_HW_SUBSET", m);
3688 KMP_INFORM(AvailableOSProc,
"KMP_HW_SUBSET", __kmp_avail_proc);
3690 __kmp_str_buf_init(&buf);
3691 __kmp_str_buf_print(&buf,
"%d", nPackages);
3692 KMP_INFORM(TopologyExtra,
"KMP_HW_SUBSET", buf.str, nCoresPerPkg,
3693 __kmp_nThreadsPerCore, __kmp_ncores);
3694 __kmp_str_buf_free(&buf);
3697 if (__kmp_pu_os_idx != NULL) {
3698 __kmp_free(__kmp_pu_os_idx);
3699 __kmp_pu_os_idx = NULL;
3705 static int __kmp_affinity_find_core_level(
const AddrUnsPair *address2os,
3706 int nprocs,
int bottom_level) {
3709 for (
int i = 0; i < nprocs; i++) {
3710 for (
int j = bottom_level; j > 0; j--) {
3711 if (address2os[i].first.labels[j] > 0) {
3712 if (core_level < (j - 1)) {
3722 static int __kmp_affinity_compute_ncores(
const AddrUnsPair *address2os,
3723 int nprocs,
int bottom_level,
3729 for (i = 0; i < nprocs; i++) {
3730 for (j = bottom_level; j > core_level; j--) {
3731 if ((i + 1) < nprocs) {
3732 if (address2os[i + 1].first.labels[j] > 0) {
3737 if (j == core_level) {
3741 if (j > core_level) {
3750 static int __kmp_affinity_find_core(
const AddrUnsPair *address2os,
int proc,
3751 int bottom_level,
int core_level) {
3752 return __kmp_affinity_compute_ncores(address2os, proc + 1, bottom_level,
3759 static int __kmp_affinity_max_proc_per_core(
const AddrUnsPair *address2os,
3760 int nprocs,
int bottom_level,
3762 int maxprocpercore = 0;
3764 if (core_level < bottom_level) {
3765 for (
int i = 0; i < nprocs; i++) {
3766 int percore = address2os[i].first.labels[core_level + 1] + 1;
3768 if (percore > maxprocpercore) {
3769 maxprocpercore = percore;
3775 return maxprocpercore;
3778 static AddrUnsPair *address2os = NULL;
3779 static int *procarr = NULL;
3780 static int __kmp_aff_depth = 0;
3782 #define KMP_EXIT_AFF_NONE \ 3783 KMP_ASSERT(__kmp_affinity_type == affinity_none); \ 3784 KMP_ASSERT(address2os == NULL); \ 3785 __kmp_apply_thread_places(NULL, 0); \ 3788 static int __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b) {
3789 const Address *aa = &(((
const AddrUnsPair *)a)->first);
3790 const Address *bb = &(((
const AddrUnsPair *)b)->first);
3791 unsigned depth = aa->depth;
3793 KMP_DEBUG_ASSERT(depth == bb->depth);
3794 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
3795 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
3796 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
3797 int j = depth - i - 1;
3798 if (aa->childNums[j] < bb->childNums[j])
3800 if (aa->childNums[j] > bb->childNums[j])
3803 for (; i < depth; i++) {
3804 int j = i - __kmp_affinity_compact;
3805 if (aa->childNums[j] < bb->childNums[j])
3807 if (aa->childNums[j] > bb->childNums[j])
3813 static void __kmp_aux_affinity_initialize(
void) {
3814 if (__kmp_affinity_masks != NULL) {
3815 KMP_ASSERT(__kmp_affin_fullMask != NULL);
3823 if (__kmp_affin_fullMask == NULL) {
3824 KMP_CPU_ALLOC(__kmp_affin_fullMask);
3826 if (KMP_AFFINITY_CAPABLE()) {
3827 if (__kmp_affinity_respect_mask) {
3828 __kmp_get_system_affinity(__kmp_affin_fullMask, TRUE);
3832 __kmp_avail_proc = 0;
3833 KMP_CPU_SET_ITERATE(i, __kmp_affin_fullMask) {
3834 if (!KMP_CPU_ISSET(i, __kmp_affin_fullMask)) {
3839 if (__kmp_avail_proc > __kmp_xproc) {
3840 if (__kmp_affinity_verbose ||
3841 (__kmp_affinity_warnings &&
3842 (__kmp_affinity_type != affinity_none))) {
3843 KMP_WARNING(ErrorInitializeAffinity);
3845 __kmp_affinity_type = affinity_none;
3846 KMP_AFFINITY_DISABLE();
3850 __kmp_affinity_entire_machine_mask(__kmp_affin_fullMask);
3851 __kmp_avail_proc = __kmp_xproc;
3856 kmp_i18n_id_t msg_id = kmp_i18n_null;
3860 if ((__kmp_cpuinfo_file != NULL) &&
3861 (__kmp_affinity_top_method == affinity_top_method_all)) {
3862 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3865 if (__kmp_affinity_top_method == affinity_top_method_all) {
3869 const char *file_name = NULL;
3873 __kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC) {
3874 if (__kmp_affinity_verbose) {
3875 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
3877 if (!__kmp_hwloc_error) {
3878 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
3881 }
else if (depth < 0 && __kmp_affinity_verbose) {
3882 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
3884 }
else if (__kmp_affinity_verbose) {
3885 KMP_INFORM(AffIgnoringHwloc,
"KMP_AFFINITY");
3890 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3893 if (__kmp_affinity_verbose) {
3894 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3898 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3904 if (__kmp_affinity_verbose) {
3905 if (msg_id != kmp_i18n_null) {
3906 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY",
3907 __kmp_i18n_catgets(msg_id),
3908 KMP_I18N_STR(DecodingLegacyAPIC));
3910 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3911 KMP_I18N_STR(DecodingLegacyAPIC));
3916 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3928 if (__kmp_affinity_verbose) {
3929 if (msg_id != kmp_i18n_null) {
3930 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY",
3931 __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
3933 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
3937 FILE *f = fopen(
"/proc/cpuinfo",
"r");
3939 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3941 file_name =
"/proc/cpuinfo";
3943 __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3953 #if KMP_GROUP_AFFINITY 3955 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3956 if (__kmp_affinity_verbose) {
3957 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3960 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3961 KMP_ASSERT(depth != 0);
3967 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3968 if (file_name == NULL) {
3969 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3970 }
else if (line == 0) {
3971 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3973 KMP_INFORM(UsingFlatOSFileLine, file_name, line,
3974 __kmp_i18n_catgets(msg_id));
3980 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3984 KMP_ASSERT(depth > 0);
3985 KMP_ASSERT(address2os != NULL);
3993 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 3995 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3996 if (__kmp_affinity_verbose) {
3997 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
4000 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
4005 KMP_ASSERT(msg_id != kmp_i18n_null);
4006 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4008 }
else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
4009 if (__kmp_affinity_verbose) {
4010 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
4013 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
4018 KMP_ASSERT(msg_id != kmp_i18n_null);
4019 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4025 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
4026 const char *filename;
4027 if (__kmp_cpuinfo_file != NULL) {
4028 filename = __kmp_cpuinfo_file;
4030 filename =
"/proc/cpuinfo";
4033 if (__kmp_affinity_verbose) {
4034 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
4037 FILE *f = fopen(filename,
"r");
4040 if (__kmp_cpuinfo_file != NULL) {
4041 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
4042 KMP_ERR(code), KMP_HNT(NameComesFrom_CPUINFO_FILE),
4045 __kmp_msg(kmp_ms_fatal, KMP_MSG(CantOpenFileForReading, filename),
4046 KMP_ERR(code), __kmp_msg_null);
4050 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
4053 KMP_ASSERT(msg_id != kmp_i18n_null);
4055 KMP_FATAL(FileLineMsgExiting, filename, line,
4056 __kmp_i18n_catgets(msg_id));
4058 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
4061 if (__kmp_affinity_type == affinity_none) {
4062 KMP_ASSERT(depth == 0);
4067 #if KMP_GROUP_AFFINITY 4069 else if (__kmp_affinity_top_method == affinity_top_method_group) {
4070 if (__kmp_affinity_verbose) {
4071 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
4074 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
4075 KMP_ASSERT(depth != 0);
4077 KMP_ASSERT(msg_id != kmp_i18n_null);
4078 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
4084 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
4085 if (__kmp_affinity_verbose) {
4086 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
4089 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
4094 KMP_ASSERT(depth > 0);
4095 KMP_ASSERT(address2os != NULL);
4099 else if (__kmp_affinity_top_method == affinity_top_method_hwloc) {
4100 KMP_ASSERT(__kmp_affinity_dispatch->get_api_type() == KMPAffinity::HWLOC);
4101 if (__kmp_affinity_verbose) {
4102 KMP_INFORM(AffUsingHwloc,
"KMP_AFFINITY");
4104 depth = __kmp_affinity_create_hwloc_map(&address2os, &msg_id);
4109 #endif // KMP_USE_HWLOC 4111 if (address2os == NULL) {
4112 if (KMP_AFFINITY_CAPABLE() &&
4113 (__kmp_affinity_verbose ||
4114 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none)))) {
4115 KMP_WARNING(ErrorInitializeAffinity);
4117 __kmp_affinity_type = affinity_none;
4118 KMP_AFFINITY_DISABLE();
4122 __kmp_apply_thread_places(&address2os, depth);
4127 kmp_affin_mask_t *osId2Mask =
4128 __kmp_create_masks(&maxIndex, &numUnique, address2os, __kmp_avail_proc);
4129 if (__kmp_affinity_gran_levels == 0) {
4130 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
4136 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
4138 switch (__kmp_affinity_type) {
4140 case affinity_explicit:
4141 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
4143 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4146 __kmp_affinity_process_proclist(
4147 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4148 __kmp_affinity_proclist, osId2Mask, maxIndex);
4152 __kmp_affinity_process_placelist(
4153 &__kmp_affinity_masks, &__kmp_affinity_num_masks,
4154 __kmp_affinity_proclist, osId2Mask, maxIndex);
4157 if (__kmp_affinity_num_masks == 0) {
4158 if (__kmp_affinity_verbose ||
4159 (__kmp_affinity_warnings && (__kmp_affinity_type != affinity_none))) {
4160 KMP_WARNING(AffNoValidProcID);
4162 __kmp_affinity_type = affinity_none;
4172 case affinity_logical:
4173 __kmp_affinity_compact = 0;
4174 if (__kmp_affinity_offset) {
4175 __kmp_affinity_offset =
4176 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4180 case affinity_physical:
4181 if (__kmp_nThreadsPerCore > 1) {
4182 __kmp_affinity_compact = 1;
4183 if (__kmp_affinity_compact >= depth) {
4184 __kmp_affinity_compact = 0;
4187 __kmp_affinity_compact = 0;
4189 if (__kmp_affinity_offset) {
4190 __kmp_affinity_offset =
4191 __kmp_nThreadsPerCore * __kmp_affinity_offset % __kmp_avail_proc;
4195 case affinity_scatter:
4196 if (__kmp_affinity_compact >= depth) {
4197 __kmp_affinity_compact = 0;
4199 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
4203 case affinity_compact:
4204 if (__kmp_affinity_compact >= depth) {
4205 __kmp_affinity_compact = depth - 1;
4209 case affinity_balanced:
4211 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4212 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4214 __kmp_affinity_type = affinity_none;
4216 }
else if (__kmp_affinity_uniform_topology()) {
4221 __kmp_aff_depth = depth;
4223 int core_level = __kmp_affinity_find_core_level(
4224 address2os, __kmp_avail_proc, depth - 1);
4225 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4226 depth - 1, core_level);
4227 int maxprocpercore = __kmp_affinity_max_proc_per_core(
4228 address2os, __kmp_avail_proc, depth - 1, core_level);
4230 int nproc = ncores * maxprocpercore;
4231 if ((nproc < 2) || (nproc < __kmp_avail_proc)) {
4232 if (__kmp_affinity_verbose || __kmp_affinity_warnings) {
4233 KMP_WARNING(AffBalancedNotAvail,
"KMP_AFFINITY");
4235 __kmp_affinity_type = affinity_none;
4239 procarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4240 for (
int i = 0; i < nproc; i++) {
4246 for (
int i = 0; i < __kmp_avail_proc; i++) {
4247 int proc = address2os[i].second;
4249 __kmp_affinity_find_core(address2os, i, depth - 1, core_level);
4251 if (core == lastcore) {
4258 procarr[core * maxprocpercore + inlastcore] = proc;
4266 if (__kmp_affinity_dups) {
4267 __kmp_affinity_num_masks = __kmp_avail_proc;
4269 __kmp_affinity_num_masks = numUnique;
4273 if ((__kmp_nested_proc_bind.bind_types[0] != proc_bind_intel) &&
4274 (__kmp_affinity_num_places > 0) &&
4275 ((
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks)) {
4276 __kmp_affinity_num_masks = __kmp_affinity_num_places;
4280 KMP_CPU_ALLOC_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4284 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
4285 __kmp_affinity_cmp_Address_child_num);
4289 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
4290 if ((!__kmp_affinity_dups) && (!address2os[i].first.leader)) {
4293 unsigned osId = address2os[i].second;
4294 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
4295 kmp_affin_mask_t *dest = KMP_CPU_INDEX(__kmp_affinity_masks, j);
4296 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
4297 KMP_CPU_COPY(dest, src);
4298 if (++j >= __kmp_affinity_num_masks) {
4302 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
4307 KMP_ASSERT2(0,
"Unexpected affinity setting");
4310 KMP_CPU_FREE_ARRAY(osId2Mask, maxIndex + 1);
4311 machine_hierarchy.init(address2os, __kmp_avail_proc);
4313 #undef KMP_EXIT_AFF_NONE 4315 void __kmp_affinity_initialize(
void) {
4324 int disabled = (__kmp_affinity_type == affinity_disabled);
4325 if (!KMP_AFFINITY_CAPABLE()) {
4326 KMP_ASSERT(disabled);
4329 __kmp_affinity_type = affinity_none;
4331 __kmp_aux_affinity_initialize();
4333 __kmp_affinity_type = affinity_disabled;
4337 void __kmp_affinity_uninitialize(
void) {
4338 if (__kmp_affinity_masks != NULL) {
4339 KMP_CPU_FREE_ARRAY(__kmp_affinity_masks, __kmp_affinity_num_masks);
4340 __kmp_affinity_masks = NULL;
4342 if (__kmp_affin_fullMask != NULL) {
4343 KMP_CPU_FREE(__kmp_affin_fullMask);
4344 __kmp_affin_fullMask = NULL;
4346 __kmp_affinity_num_masks = 0;
4347 __kmp_affinity_type = affinity_default;
4349 __kmp_affinity_num_places = 0;
4351 if (__kmp_affinity_proclist != NULL) {
4352 __kmp_free(__kmp_affinity_proclist);
4353 __kmp_affinity_proclist = NULL;
4355 if (address2os != NULL) {
4356 __kmp_free(address2os);
4359 if (procarr != NULL) {
4360 __kmp_free(procarr);
4364 if (__kmp_hwloc_topology != NULL) {
4365 hwloc_topology_destroy(__kmp_hwloc_topology);
4366 __kmp_hwloc_topology = NULL;
4369 KMPAffinity::destroy_api();
4372 void __kmp_affinity_set_init_mask(
int gtid,
int isa_root) {
4373 if (!KMP_AFFINITY_CAPABLE()) {
4377 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4378 if (th->th.th_affin_mask == NULL) {
4379 KMP_CPU_ALLOC(th->th.th_affin_mask);
4381 KMP_CPU_ZERO(th->th.th_affin_mask);
4388 kmp_affin_mask_t *mask;
4392 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4395 if ((__kmp_affinity_type == affinity_none) ||
4396 (__kmp_affinity_type == affinity_balanced)) {
4397 #if KMP_GROUP_AFFINITY 4398 if (__kmp_num_proc_groups > 1) {
4402 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4404 mask = __kmp_affin_fullMask;
4406 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4407 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4408 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4414 (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4415 #if KMP_GROUP_AFFINITY 4416 if (__kmp_num_proc_groups > 1) {
4420 KMP_ASSERT(__kmp_affin_fullMask != NULL);
4422 mask = __kmp_affin_fullMask;
4426 KMP_DEBUG_ASSERT(__kmp_affinity_num_masks > 0);
4427 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4428 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4434 th->th.th_current_place = i;
4436 th->th.th_new_place = i;
4437 th->th.th_first_place = 0;
4438 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4441 if (i == KMP_PLACE_ALL) {
4442 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4445 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4452 (
"__kmp_affinity_set_init_mask: binding T#%d to __kmp_affin_fullMask\n",
4455 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4460 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4462 if (__kmp_affinity_verbose) {
4463 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4464 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4465 th->th.th_affin_mask);
4466 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4467 __kmp_gettid(), gtid, buf);
4474 if (__kmp_affinity_type == affinity_none) {
4475 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4478 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4483 void __kmp_affinity_set_place(
int gtid) {
4486 if (!KMP_AFFINITY_CAPABLE()) {
4490 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4492 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current " 4494 gtid, th->th.th_new_place, th->th.th_current_place));
4497 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4498 KMP_ASSERT(th->th.th_new_place >= 0);
4499 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4500 if (th->th.th_first_place <= th->th.th_last_place) {
4501 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place) &&
4502 (th->th.th_new_place <= th->th.th_last_place));
4504 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place) ||
4505 (th->th.th_new_place >= th->th.th_last_place));
4510 kmp_affin_mask_t *mask =
4511 KMP_CPU_INDEX(__kmp_affinity_masks, th->th.th_new_place);
4512 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4513 th->th.th_current_place = th->th.th_new_place;
4515 if (__kmp_affinity_verbose) {
4516 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4517 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4518 th->th.th_affin_mask);
4519 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4520 __kmp_gettid(), gtid, buf);
4522 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4527 int __kmp_aux_set_affinity(
void **mask) {
4532 if (!KMP_AFFINITY_CAPABLE()) {
4536 gtid = __kmp_entry_gtid();
4538 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4539 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4540 (kmp_affin_mask_t *)(*mask));
4542 "kmp_set_affinity: setting affinity mask for thread %d = %s\n", gtid,
4546 if (__kmp_env_consistency_check) {
4547 if ((mask == NULL) || (*mask == NULL)) {
4548 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4553 KMP_CPU_SET_ITERATE(proc, ((kmp_affin_mask_t *)(*mask))) {
4554 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4555 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4557 if (!KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4562 if (num_procs == 0) {
4563 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4566 #if KMP_GROUP_AFFINITY 4567 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4568 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4574 th = __kmp_threads[gtid];
4575 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4576 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4578 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4582 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4583 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4584 th->th.th_first_place = 0;
4585 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4588 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4594 int __kmp_aux_get_affinity(
void **mask) {
4599 if (!KMP_AFFINITY_CAPABLE()) {
4603 gtid = __kmp_entry_gtid();
4604 th = __kmp_threads[gtid];
4605 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4608 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4609 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4610 th->th.th_affin_mask);
4611 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n",
4615 if (__kmp_env_consistency_check) {
4616 if ((mask == NULL) || (*mask == NULL)) {
4617 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4623 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4625 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4626 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4627 (kmp_affin_mask_t *)(*mask));
4628 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n",
4635 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4641 int __kmp_aux_get_affinity_max_proc() {
4642 if (!KMP_AFFINITY_CAPABLE()) {
4645 #if KMP_GROUP_AFFINITY 4646 if (__kmp_num_proc_groups > 1) {
4647 return (
int)(__kmp_num_proc_groups *
sizeof(DWORD_PTR) * CHAR_BIT);
4653 int __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask) {
4656 if (!KMP_AFFINITY_CAPABLE()) {
4661 int gtid = __kmp_entry_gtid();
4662 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4663 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4664 (kmp_affin_mask_t *)(*mask));
4665 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in " 4666 "affinity mask for thread %d = %s\n",
4670 if (__kmp_env_consistency_check) {
4671 if ((mask == NULL) || (*mask == NULL)) {
4672 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4676 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4679 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4683 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4687 int __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask) {
4690 if (!KMP_AFFINITY_CAPABLE()) {
4695 int gtid = __kmp_entry_gtid();
4696 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4697 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4698 (kmp_affin_mask_t *)(*mask));
4699 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in " 4700 "affinity mask for thread %d = %s\n",
4704 if (__kmp_env_consistency_check) {
4705 if ((mask == NULL) || (*mask == NULL)) {
4706 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4710 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4713 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4717 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4721 int __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask) {
4724 if (!KMP_AFFINITY_CAPABLE()) {
4729 int gtid = __kmp_entry_gtid();
4730 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4731 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4732 (kmp_affin_mask_t *)(*mask));
4733 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in " 4734 "affinity mask for thread %d = %s\n",
4738 if (__kmp_env_consistency_check) {
4739 if ((mask == NULL) || (*mask == NULL)) {
4740 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4744 if ((proc < 0) || (proc >= __kmp_aux_get_affinity_max_proc())) {
4747 if (!KMP_CPU_ISSET(proc, __kmp_affin_fullMask)) {
4751 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4755 void __kmp_balanced_affinity(
int tid,
int nthreads) {
4756 bool fine_gran =
true;
4758 switch (__kmp_affinity_gran) {
4759 case affinity_gran_fine:
4760 case affinity_gran_thread:
4762 case affinity_gran_core:
4763 if (__kmp_nThreadsPerCore > 1) {
4767 case affinity_gran_package:
4768 if (nCoresPerPkg > 1) {
4776 if (__kmp_affinity_uniform_topology()) {
4780 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4782 int ncores = __kmp_ncores;
4783 if ((nPackages > 1) && (__kmp_nth_per_core <= 1)) {
4784 __kmp_nth_per_core = __kmp_avail_proc / nPackages;
4788 int chunk = nthreads / ncores;
4790 int big_cores = nthreads % ncores;
4792 int big_nth = (chunk + 1) * big_cores;
4793 if (tid < big_nth) {
4794 coreID = tid / (chunk + 1);
4795 threadID = (tid % (chunk + 1)) % __kmp_nth_per_core;
4797 coreID = (tid - big_cores) / chunk;
4798 threadID = ((tid - big_cores) % chunk) % __kmp_nth_per_core;
4801 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4802 "Illegal set affinity operation when not capable");
4804 kmp_affin_mask_t *mask;
4805 KMP_CPU_ALLOC_ON_STACK(mask);
4809 int osID = address2os[coreID * __kmp_nth_per_core + threadID].second;
4810 KMP_CPU_SET(osID, mask);
4812 for (
int i = 0; i < __kmp_nth_per_core; i++) {
4814 osID = address2os[coreID * __kmp_nth_per_core + i].second;
4815 KMP_CPU_SET(osID, mask);
4818 if (__kmp_affinity_verbose) {
4819 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4820 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4821 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4822 __kmp_gettid(), tid, buf);
4824 __kmp_set_system_affinity(mask, TRUE);
4825 KMP_CPU_FREE_FROM_STACK(mask);
4828 kmp_affin_mask_t *mask;
4829 KMP_CPU_ALLOC_ON_STACK(mask);
4832 int core_level = __kmp_affinity_find_core_level(
4833 address2os, __kmp_avail_proc, __kmp_aff_depth - 1);
4834 int ncores = __kmp_affinity_compute_ncores(address2os, __kmp_avail_proc,
4835 __kmp_aff_depth - 1, core_level);
4836 int nth_per_core = __kmp_affinity_max_proc_per_core(
4837 address2os, __kmp_avail_proc, __kmp_aff_depth - 1, core_level);
4841 if (nthreads == __kmp_avail_proc) {
4843 int osID = address2os[tid].second;
4844 KMP_CPU_SET(osID, mask);
4846 int core = __kmp_affinity_find_core(address2os, tid,
4847 __kmp_aff_depth - 1, core_level);
4848 for (
int i = 0; i < __kmp_avail_proc; i++) {
4849 int osID = address2os[i].second;
4850 if (__kmp_affinity_find_core(address2os, i, __kmp_aff_depth - 1,
4851 core_level) == core) {
4852 KMP_CPU_SET(osID, mask);
4856 }
else if (nthreads <= ncores) {
4859 for (
int i = 0; i < ncores; i++) {
4862 for (
int j = 0; j < nth_per_core; j++) {
4863 if (procarr[i * nth_per_core + j] != -1) {
4870 for (
int j = 0; j < nth_per_core; j++) {
4871 int osID = procarr[i * nth_per_core + j];
4873 KMP_CPU_SET(osID, mask);
4889 int *nproc_at_core = (
int *)KMP_ALLOCA(
sizeof(
int) * ncores);
4891 int *ncores_with_x_procs =
4892 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
4894 int *ncores_with_x_to_max_procs =
4895 (
int *)KMP_ALLOCA(
sizeof(
int) * (nth_per_core + 1));
4897 for (
int i = 0; i <= nth_per_core; i++) {
4898 ncores_with_x_procs[i] = 0;
4899 ncores_with_x_to_max_procs[i] = 0;
4902 for (
int i = 0; i < ncores; i++) {
4904 for (
int j = 0; j < nth_per_core; j++) {
4905 if (procarr[i * nth_per_core + j] != -1) {
4909 nproc_at_core[i] = cnt;
4910 ncores_with_x_procs[cnt]++;
4913 for (
int i = 0; i <= nth_per_core; i++) {
4914 for (
int j = i; j <= nth_per_core; j++) {
4915 ncores_with_x_to_max_procs[i] += ncores_with_x_procs[j];
4920 int nproc = nth_per_core * ncores;
4922 int *newarr = (
int *)__kmp_allocate(
sizeof(
int) * nproc);
4923 for (
int i = 0; i < nproc; i++) {
4930 for (
int j = 1; j <= nth_per_core; j++) {
4931 int cnt = ncores_with_x_to_max_procs[j];
4932 for (
int i = 0; i < ncores; i++) {
4934 if (nproc_at_core[i] == 0) {
4937 for (
int k = 0; k < nth_per_core; k++) {
4938 if (procarr[i * nth_per_core + k] != -1) {
4939 if (newarr[i * nth_per_core + k] == 0) {
4940 newarr[i * nth_per_core + k] = 1;
4946 newarr[i * nth_per_core + k]++;
4954 if (cnt == 0 || nth == 0) {
4965 for (
int i = 0; i < nproc; i++) {
4969 int osID = procarr[i];
4970 KMP_CPU_SET(osID, mask);
4972 int coreID = i / nth_per_core;
4973 for (
int ii = 0; ii < nth_per_core; ii++) {
4974 int osID = procarr[coreID * nth_per_core + ii];
4976 KMP_CPU_SET(osID, mask);
4986 if (__kmp_affinity_verbose) {
4987 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4988 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4989 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4990 __kmp_gettid(), tid, buf);
4992 __kmp_set_system_affinity(mask, TRUE);
4993 KMP_CPU_FREE_FROM_STACK(mask);
5011 kmp_set_thread_affinity_mask_initial()
5016 int gtid = __kmp_get_gtid();
5019 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5020 "non-omp thread, returning\n"));
5023 if (!KMP_AFFINITY_CAPABLE() || !__kmp_init_middle) {
5024 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5025 "affinity not initialized, returning\n"));
5028 KA_TRACE(30, (
"kmp_set_thread_affinity_mask_initial: " 5029 "set full mask for thread %d\n",
5031 KMP_DEBUG_ASSERT(__kmp_affin_fullMask != NULL);
5032 return __kmp_set_system_affinity(__kmp_affin_fullMask, FALSE);
5036 #endif // KMP_AFFINITY_SUPPORTED