39 #include "kmp_wrapper_getpid.h"
41 #if KMP_AFFINITY_SUPPORTED
47 __kmp_affinity_print_mask(
char *buf,
int buf_len, kmp_affin_mask_t *mask)
49 KMP_ASSERT(buf_len >= 40);
51 char *end = buf + buf_len - 1;
57 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
58 if (KMP_CPU_ISSET(i, mask)) {
62 if (i == KMP_CPU_SETSIZE) {
63 KMP_SNPRINTF(scan, buf_len,
"{<empty>}");
64 while (*scan !=
'\0') scan++;
65 KMP_ASSERT(scan <= end);
69 KMP_SNPRINTF(scan, buf_len,
"{%ld", (
long)i);
70 while (*scan !=
'\0') scan++;
72 for (; i < KMP_CPU_SETSIZE; i++) {
73 if (! KMP_CPU_ISSET(i, mask)) {
83 if (end - scan < 15) {
86 KMP_SNPRINTF(scan, buf_len,
",%-ld", (
long)i);
87 while (*scan !=
'\0') scan++;
89 if (i < KMP_CPU_SETSIZE) {
90 KMP_SNPRINTF(scan, buf_len,
",...");
91 while (*scan !=
'\0') scan++;
93 KMP_SNPRINTF(scan, buf_len,
"}");
94 while (*scan !=
'\0') scan++;
95 KMP_ASSERT(scan <= end);
101 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
105 # if KMP_GROUP_AFFINITY
107 if (__kmp_num_proc_groups > 1) {
109 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
110 for (group = 0; group < __kmp_num_proc_groups; group++) {
112 int num = __kmp_GetActiveProcessorCount(group);
113 for (i = 0; i < num; i++) {
114 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
124 for (proc = 0; proc < __kmp_xproc; proc++) {
125 KMP_CPU_SET(proc, mask);
153 # if !defined(KMP_DEBUG) && !defined(COVER)
157 static const unsigned maxDepth = 32;
158 unsigned labels[maxDepth];
159 unsigned childNums[maxDepth];
162 Address(
unsigned _depth)
163 : depth(_depth), leader(FALSE) {
165 Address &operator=(
const Address &b) {
167 for (
unsigned i = 0; i < depth; i++) {
168 labels[i] = b.labels[i];
169 childNums[i] = b.childNums[i];
174 bool operator==(
const Address &b)
const {
175 if (depth != b.depth)
177 for (
unsigned i = 0; i < depth; i++)
178 if(labels[i] != b.labels[i])
182 bool isClose(
const Address &b,
int level)
const {
183 if (depth != b.depth)
185 if ((
unsigned)level >= depth)
187 for (
unsigned i = 0; i < (depth - level); i++)
188 if(labels[i] != b.labels[i])
192 bool operator!=(
const Address &b)
const {
193 return !operator==(b);
201 AddrUnsPair(Address _first,
unsigned _second)
202 : first(_first), second(_second) {
204 AddrUnsPair &operator=(
const AddrUnsPair &b)
216 static const unsigned maxDepth = 32;
217 unsigned labels[maxDepth];
218 unsigned childNums[maxDepth];
221 Address(
unsigned _depth);
222 Address &operator=(
const Address &b);
223 bool operator==(
const Address &b)
const;
224 bool isClose(
const Address &b,
int level)
const;
225 bool operator!=(
const Address &b)
const;
228 Address::Address(
unsigned _depth)
234 Address &Address::operator=(
const Address &b) {
236 for (
unsigned i = 0; i < depth; i++) {
237 labels[i] = b.labels[i];
238 childNums[i] = b.childNums[i];
244 bool Address::operator==(
const Address &b)
const {
245 if (depth != b.depth)
247 for (
unsigned i = 0; i < depth; i++)
248 if(labels[i] != b.labels[i])
253 bool Address::isClose(
const Address &b,
int level)
const {
254 if (depth != b.depth)
256 if ((
unsigned)level >= depth)
258 for (
unsigned i = 0; i < (depth - level); i++)
259 if(labels[i] != b.labels[i])
264 bool Address::operator!=(
const Address &b)
const {
265 return !operator==(b);
272 AddrUnsPair(Address _first,
unsigned _second);
273 AddrUnsPair &operator=(
const AddrUnsPair &b);
276 AddrUnsPair::AddrUnsPair(Address _first,
unsigned _second)
277 : first(_first), second(_second)
281 AddrUnsPair &AddrUnsPair::operator=(
const AddrUnsPair &b)
292 __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b)
294 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
296 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
298 unsigned depth = aa->depth;
300 KMP_DEBUG_ASSERT(depth == bb->depth);
301 for (i = 0; i < depth; i++) {
302 if (aa->labels[i] < bb->labels[i])
return -1;
303 if (aa->labels[i] > bb->labels[i])
return 1;
310 __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b)
312 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
314 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
316 unsigned depth = aa->depth;
318 KMP_DEBUG_ASSERT(depth == bb->depth);
319 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
320 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
321 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
322 int j = depth - i - 1;
323 if (aa->childNums[j] < bb->childNums[j])
return -1;
324 if (aa->childNums[j] > bb->childNums[j])
return 1;
326 for (; i < depth; i++) {
327 int j = i - __kmp_affinity_compact;
328 if (aa->childNums[j] < bb->childNums[j])
return -1;
329 if (aa->childNums[j] > bb->childNums[j])
return 1;
335 class hierarchy_info {
339 kmp_uint32 maxLevels;
345 kmp_uint32 base_num_threads;
346 volatile kmp_int8 uninitialized;
347 volatile kmp_int8 resizing;
352 kmp_uint32 *numPerLevel;
353 kmp_uint32 *skipPerLevel;
355 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
356 int hier_depth = adr2os[0].first.depth;
358 for (
int i=hier_depth-1; i>=0; --i) {
360 for (
int j=0; j<num_addrs; ++j) {
361 int next = adr2os[j].first.childNums[i];
362 if (next > max) max = next;
364 numPerLevel[level] = max+1;
369 hierarchy_info() : maxLevels(7), depth(1), uninitialized(1), resizing(0) {}
374 void init(AddrUnsPair *adr2os,
int num_addrs)
376 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&uninitialized, 1, 2);
377 if (bool_result == 0) {
378 while (TCR_1(uninitialized) != 0) KMP_CPU_PAUSE();
381 KMP_DEBUG_ASSERT(bool_result==1);
389 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
390 skipPerLevel = &(numPerLevel[maxLevels]);
391 for (kmp_uint32 i=0; i<maxLevels; ++i) {
398 qsort(adr2os, num_addrs,
sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
399 deriveLevels(adr2os, num_addrs);
403 numPerLevel[1] = num_addrs/4;
404 if (num_addrs%4) numPerLevel[1]++;
407 base_num_threads = num_addrs;
408 for (
int i=maxLevels-1; i>=0; --i)
409 if (numPerLevel[i] != 1 || depth > 1)
412 kmp_uint32 branch = 4;
413 if (numPerLevel[0] == 1) branch = num_addrs/4;
414 if (branch<4) branch=4;
415 for (kmp_uint32 d=0; d<depth-1; ++d) {
416 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) {
417 if (numPerLevel[d] & 1) numPerLevel[d]++;
418 numPerLevel[d] = numPerLevel[d] >> 1;
419 if (numPerLevel[d+1] == 1) depth++;
420 numPerLevel[d+1] = numPerLevel[d+1] << 1;
422 if(numPerLevel[0] == 1) {
423 branch = branch >> 1;
424 if (branch<4) branch = 4;
428 for (kmp_uint32 i=1; i<depth; ++i)
429 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
431 for (kmp_uint32 i=depth; i<maxLevels; ++i)
432 skipPerLevel[i] = 2*skipPerLevel[i-1];
438 void resize(kmp_uint32 nproc)
440 kmp_int8 bool_result = KMP_COMPARE_AND_STORE_ACQ8(&resizing, 0, 1);
441 if (bool_result == 0) {
442 while (TCR_1(resizing) != 0) KMP_CPU_PAUSE();
445 KMP_DEBUG_ASSERT(bool_result!=0);
446 KMP_DEBUG_ASSERT(nproc > base_num_threads);
449 kmp_uint32 old_sz = skipPerLevel[depth-1];
450 kmp_uint32 incs = 0, old_maxLevels= maxLevels;
451 while (nproc > old_sz) {
458 kmp_uint32 *old_numPerLevel = numPerLevel;
459 kmp_uint32 *old_skipPerLevel = skipPerLevel;
460 numPerLevel = skipPerLevel = NULL;
461 numPerLevel = (kmp_uint32 *)__kmp_allocate(maxLevels*2*
sizeof(kmp_uint32));
462 skipPerLevel = &(numPerLevel[maxLevels]);
465 for (kmp_uint32 i=0; i<old_maxLevels; ++i) {
466 numPerLevel[i] = old_numPerLevel[i];
467 skipPerLevel[i] = old_skipPerLevel[i];
471 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i) {
477 __kmp_free(old_numPerLevel);
480 for (kmp_uint32 i=old_maxLevels; i<maxLevels; ++i)
481 skipPerLevel[i] = 2*skipPerLevel[i-1];
483 base_num_threads = nproc;
489 static hierarchy_info machine_hierarchy;
491 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
494 if (TCR_1(machine_hierarchy.uninitialized))
495 machine_hierarchy.init(NULL, nproc);
497 if (nproc > machine_hierarchy.base_num_threads)
498 machine_hierarchy.resize(nproc);
500 depth = machine_hierarchy.depth;
501 KMP_DEBUG_ASSERT(depth > 0);
503 while (nproc > machine_hierarchy.skipPerLevel[depth-1])
506 thr_bar->depth = depth;
507 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
508 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
525 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
528 KMP_DEBUG_ASSERT(numAddrs > 0);
529 int depth = address2os->first.depth;
530 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
531 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth
534 for (labCt = 0; labCt < depth; labCt++) {
535 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
536 lastLabel[labCt] = address2os[0].first.labels[labCt];
539 for (i = 1; i < numAddrs; i++) {
540 for (labCt = 0; labCt < depth; labCt++) {
541 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
543 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
545 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
548 lastLabel[labCt] = address2os[i].first.labels[labCt];
552 for (labCt = 0; labCt < depth; labCt++) {
553 address2os[i].first.childNums[labCt] = counts[labCt];
555 for (; labCt < (int)Address::maxDepth; labCt++) {
556 address2os[i].first.childNums[labCt] = 0;
574 static kmp_affin_mask_t *fullMask = NULL;
577 __kmp_affinity_get_fullMask() {
return fullMask; }
580 static int nCoresPerPkg, nPackages;
581 static int __kmp_nThreadsPerCore;
582 #ifndef KMP_DFLT_NTH_CORES
583 static int __kmp_ncores;
593 __kmp_affinity_uniform_topology()
595 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
604 __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
int depth,
605 int pkgLevel,
int coreLevel,
int threadLevel)
609 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
610 for (proc = 0; proc < len; proc++) {
613 __kmp_str_buf_init(&buf);
614 for (level = 0; level < depth; level++) {
615 if (level == threadLevel) {
616 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
618 else if (level == coreLevel) {
619 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
621 else if (level == pkgLevel) {
622 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
624 else if (level > pkgLevel) {
625 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
626 level - pkgLevel - 1);
629 __kmp_str_buf_print(&buf,
"L%d ", level);
631 __kmp_str_buf_print(&buf,
"%d ",
632 address2os[proc].first.labels[level]);
634 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
636 __kmp_str_buf_free(&buf);
647 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
648 kmp_i18n_id_t *
const msg_id)
651 *msg_id = kmp_i18n_null;
658 if (! KMP_AFFINITY_CAPABLE()) {
659 KMP_ASSERT(__kmp_affinity_type == affinity_none);
660 __kmp_ncores = nPackages = __kmp_xproc;
661 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
662 if (__kmp_affinity_verbose) {
663 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
664 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
665 KMP_INFORM(Uniform,
"KMP_AFFINITY");
666 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
667 __kmp_nThreadsPerCore, __kmp_ncores);
678 __kmp_ncores = nPackages = __kmp_avail_proc;
679 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
680 if (__kmp_affinity_verbose) {
681 char buf[KMP_AFFIN_MASK_PRINT_LEN];
682 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
684 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
685 if (__kmp_affinity_respect_mask) {
686 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
688 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
690 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
691 KMP_INFORM(Uniform,
"KMP_AFFINITY");
692 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
693 __kmp_nThreadsPerCore, __kmp_ncores);
695 if (__kmp_affinity_type == affinity_none) {
702 *address2os = (AddrUnsPair*)
703 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
706 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
710 if (! KMP_CPU_ISSET(i, fullMask)) {
716 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
718 if (__kmp_affinity_verbose) {
719 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
722 if (__kmp_affinity_gran_levels < 0) {
727 if (__kmp_affinity_gran > affinity_gran_package) {
728 __kmp_affinity_gran_levels = 1;
731 __kmp_affinity_gran_levels = 0;
738 # if KMP_GROUP_AFFINITY
749 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
750 kmp_i18n_id_t *
const msg_id)
753 *msg_id = kmp_i18n_null;
759 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
767 *address2os = (AddrUnsPair*)
768 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
771 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
775 if (! KMP_CPU_ISSET(i, fullMask)) {
780 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
781 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
782 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
784 if (__kmp_affinity_verbose) {
785 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
790 if (__kmp_affinity_gran_levels < 0) {
791 if (__kmp_affinity_gran == affinity_gran_group) {
792 __kmp_affinity_gran_levels = 1;
794 else if ((__kmp_affinity_gran == affinity_gran_fine)
795 || (__kmp_affinity_gran == affinity_gran_thread)) {
796 __kmp_affinity_gran_levels = 0;
799 const char *gran_str = NULL;
800 if (__kmp_affinity_gran == affinity_gran_core) {
803 else if (__kmp_affinity_gran == affinity_gran_package) {
804 gran_str =
"package";
806 else if (__kmp_affinity_gran == affinity_gran_node) {
814 __kmp_affinity_gran_levels = 0;
823 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
826 __kmp_cpuid_mask_width(
int count) {
829 while((1<<r) < count)
835 class apicThreadInfo {
839 unsigned maxCoresPerPkg;
840 unsigned maxThreadsPerPkg;
848 __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
const void *b)
850 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
851 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
852 if (aa->osId < bb->osId)
return -1;
853 if (aa->osId > bb->osId)
return 1;
859 __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
const void *b)
861 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
862 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
863 if (aa->pkgId < bb->pkgId)
return -1;
864 if (aa->pkgId > bb->pkgId)
return 1;
865 if (aa->coreId < bb->coreId)
return -1;
866 if (aa->coreId > bb->coreId)
return 1;
867 if (aa->threadId < bb->threadId)
return -1;
868 if (aa->threadId > bb->threadId)
return 1;
880 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
881 kmp_i18n_id_t *
const msg_id)
886 *msg_id = kmp_i18n_null;
891 __kmp_x86_cpuid(0, 0, &buf);
893 *msg_id = kmp_i18n_str_NoLeaf4Support;
905 if (! KMP_AFFINITY_CAPABLE()) {
910 KMP_ASSERT(__kmp_affinity_type == affinity_none);
920 __kmp_x86_cpuid(1, 0, &buf);
921 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
922 if (maxThreadsPerPkg == 0) {
923 maxThreadsPerPkg = 1;
939 __kmp_x86_cpuid(0, 0, &buf);
941 __kmp_x86_cpuid(4, 0, &buf);
942 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
965 __kmp_ncores = __kmp_xproc;
966 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
967 __kmp_nThreadsPerCore = 1;
968 if (__kmp_affinity_verbose) {
969 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
970 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
971 if (__kmp_affinity_uniform_topology()) {
972 KMP_INFORM(Uniform,
"KMP_AFFINITY");
974 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
976 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
977 __kmp_nThreadsPerCore, __kmp_ncores);
992 kmp_affin_mask_t *oldMask;
993 KMP_CPU_ALLOC(oldMask);
994 KMP_ASSERT(oldMask != NULL);
995 __kmp_get_system_affinity(oldMask, TRUE);
1030 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
1031 __kmp_avail_proc *
sizeof(apicThreadInfo));
1032 unsigned nApics = 0;
1033 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
1037 if (! KMP_CPU_ISSET(i, fullMask)) {
1040 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
1042 __kmp_affinity_bind_thread(i);
1043 threadInfo[nApics].osId = i;
1048 __kmp_x86_cpuid(1, 0, &buf);
1049 if (! (buf.edx >> 9) & 1) {
1050 __kmp_set_system_affinity(oldMask, TRUE);
1051 __kmp_free(threadInfo);
1052 KMP_CPU_FREE(oldMask);
1053 *msg_id = kmp_i18n_str_ApicNotPresent;
1056 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
1057 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
1058 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
1059 threadInfo[nApics].maxThreadsPerPkg = 1;
1070 __kmp_x86_cpuid(0, 0, &buf);
1072 __kmp_x86_cpuid(4, 0, &buf);
1073 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1076 threadInfo[nApics].maxCoresPerPkg = 1;
1083 int widthCT = __kmp_cpuid_mask_width(
1084 threadInfo[nApics].maxThreadsPerPkg);
1085 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1087 int widthC = __kmp_cpuid_mask_width(
1088 threadInfo[nApics].maxCoresPerPkg);
1089 int widthT = widthCT - widthC;
1096 __kmp_set_system_affinity(oldMask, TRUE);
1097 __kmp_free(threadInfo);
1098 KMP_CPU_FREE(oldMask);
1099 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1103 int maskC = (1 << widthC) - 1;
1104 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1107 int maskT = (1 << widthT) - 1;
1108 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1117 __kmp_set_system_affinity(oldMask, TRUE);
1129 KMP_ASSERT(nApics > 0);
1131 __kmp_ncores = nPackages = 1;
1132 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1133 if (__kmp_affinity_verbose) {
1134 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1135 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1137 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1138 if (__kmp_affinity_respect_mask) {
1139 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1141 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1143 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1144 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1145 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1146 __kmp_nThreadsPerCore, __kmp_ncores);
1149 if (__kmp_affinity_type == affinity_none) {
1150 __kmp_free(threadInfo);
1151 KMP_CPU_FREE(oldMask);
1155 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
1157 addr.labels[0] = threadInfo[0].pkgId;
1158 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1160 if (__kmp_affinity_gran_levels < 0) {
1161 __kmp_affinity_gran_levels = 0;
1164 if (__kmp_affinity_verbose) {
1165 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1168 __kmp_free(threadInfo);
1169 KMP_CPU_FREE(oldMask);
1176 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1177 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1196 __kmp_nThreadsPerCore = 1;
1197 unsigned nCores = 1;
1200 unsigned lastPkgId = threadInfo[0].pkgId;
1201 unsigned coreCt = 1;
1202 unsigned lastCoreId = threadInfo[0].coreId;
1203 unsigned threadCt = 1;
1204 unsigned lastThreadId = threadInfo[0].threadId;
1207 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1208 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1210 for (i = 1; i < nApics; i++) {
1211 if (threadInfo[i].pkgId != lastPkgId) {
1214 lastPkgId = threadInfo[i].pkgId;
1215 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1217 lastCoreId = threadInfo[i].coreId;
1218 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1220 lastThreadId = threadInfo[i].threadId;
1227 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1228 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1232 if (threadInfo[i].coreId != lastCoreId) {
1235 lastCoreId = threadInfo[i].coreId;
1236 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1238 lastThreadId = threadInfo[i].threadId;
1240 else if (threadInfo[i].threadId != lastThreadId) {
1242 lastThreadId = threadInfo[i].threadId;
1245 __kmp_free(threadInfo);
1246 KMP_CPU_FREE(oldMask);
1247 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1255 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1256 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1257 __kmp_free(threadInfo);
1258 KMP_CPU_FREE(oldMask);
1259 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1264 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1265 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1273 __kmp_ncores = nCores;
1274 if (__kmp_affinity_verbose) {
1275 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1276 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1278 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1279 if (__kmp_affinity_respect_mask) {
1280 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1282 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1284 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1285 if (__kmp_affinity_uniform_topology()) {
1286 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1288 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1290 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1291 __kmp_nThreadsPerCore, __kmp_ncores);
1295 if (__kmp_affinity_type == affinity_none) {
1296 __kmp_free(threadInfo);
1297 KMP_CPU_FREE(oldMask);
1307 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1308 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1309 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1311 KMP_ASSERT(depth > 0);
1312 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1314 for (i = 0; i < nApics; ++i) {
1315 Address addr(depth);
1316 unsigned os = threadInfo[i].osId;
1319 if (pkgLevel >= 0) {
1320 addr.labels[d++] = threadInfo[i].pkgId;
1322 if (coreLevel >= 0) {
1323 addr.labels[d++] = threadInfo[i].coreId;
1325 if (threadLevel >= 0) {
1326 addr.labels[d++] = threadInfo[i].threadId;
1328 (*address2os)[i] = AddrUnsPair(addr, os);
1331 if (__kmp_affinity_gran_levels < 0) {
1336 __kmp_affinity_gran_levels = 0;
1337 if ((threadLevel >= 0)
1338 && (__kmp_affinity_gran > affinity_gran_thread)) {
1339 __kmp_affinity_gran_levels++;
1341 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1342 __kmp_affinity_gran_levels++;
1344 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1345 __kmp_affinity_gran_levels++;
1349 if (__kmp_affinity_verbose) {
1350 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1351 coreLevel, threadLevel);
1354 __kmp_free(threadInfo);
1355 KMP_CPU_FREE(oldMask);
1366 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1367 kmp_i18n_id_t *
const msg_id)
1371 *msg_id = kmp_i18n_null;
1376 __kmp_x86_cpuid(0, 0, &buf);
1378 *msg_id = kmp_i18n_str_NoLeaf11Support;
1381 __kmp_x86_cpuid(11, 0, &buf);
1383 *msg_id = kmp_i18n_str_NoLeaf11Support;
1394 int threadLevel = -1;
1397 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1399 for (level = 0;; level++) {
1412 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1415 __kmp_x86_cpuid(11, level, &buf);
1426 int kind = (buf.ecx >> 8) & 0xff;
1431 threadLevel = level;
1434 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1435 if (__kmp_nThreadsPerCore == 0) {
1436 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1440 else if (kind == 2) {
1446 nCoresPerPkg = buf.ebx & 0xff;
1447 if (nCoresPerPkg == 0) {
1448 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1454 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1457 if (pkgLevel >= 0) {
1461 nPackages = buf.ebx & 0xff;
1462 if (nPackages == 0) {
1463 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1476 if (threadLevel >= 0) {
1477 threadLevel = depth - threadLevel - 1;
1479 if (coreLevel >= 0) {
1480 coreLevel = depth - coreLevel - 1;
1482 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1483 pkgLevel = depth - pkgLevel - 1;
1493 if (! KMP_AFFINITY_CAPABLE())
1499 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1501 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1502 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1503 if (__kmp_affinity_verbose) {
1504 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1505 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1506 if (__kmp_affinity_uniform_topology()) {
1507 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1509 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1511 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1512 __kmp_nThreadsPerCore, __kmp_ncores);
1527 kmp_affin_mask_t *oldMask;
1528 KMP_CPU_ALLOC(oldMask);
1529 __kmp_get_system_affinity(oldMask, TRUE);
1534 AddrUnsPair *retval = (AddrUnsPair *)
1535 __kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1543 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1547 if (! KMP_CPU_ISSET(proc, fullMask)) {
1550 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1552 __kmp_affinity_bind_thread(proc);
1558 Address addr(depth);
1561 for (level = 0; level < depth; level++) {
1562 __kmp_x86_cpuid(11, level, &buf);
1563 unsigned apicId = buf.edx;
1565 if (level != depth - 1) {
1566 KMP_CPU_FREE(oldMask);
1567 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1570 addr.labels[depth - level - 1] = apicId >> prev_shift;
1574 int shift = buf.eax & 0x1f;
1575 int mask = (1 << shift) - 1;
1576 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1579 if (level != depth) {
1580 KMP_CPU_FREE(oldMask);
1581 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1585 retval[nApics] = AddrUnsPair(addr, proc);
1593 __kmp_set_system_affinity(oldMask, TRUE);
1598 KMP_ASSERT(nApics > 0);
1600 __kmp_ncores = nPackages = 1;
1601 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1602 if (__kmp_affinity_verbose) {
1603 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1604 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1606 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1607 if (__kmp_affinity_respect_mask) {
1608 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1610 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1612 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1613 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1614 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1615 __kmp_nThreadsPerCore, __kmp_ncores);
1618 if (__kmp_affinity_type == affinity_none) {
1620 KMP_CPU_FREE(oldMask);
1628 addr.labels[0] = retval[0].first.labels[pkgLevel];
1629 retval[0].first = addr;
1631 if (__kmp_affinity_gran_levels < 0) {
1632 __kmp_affinity_gran_levels = 0;
1635 if (__kmp_affinity_verbose) {
1636 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1639 *address2os = retval;
1640 KMP_CPU_FREE(oldMask);
1647 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1652 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1653 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1654 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1655 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1656 for (level = 0; level < depth; level++) {
1660 last[level] = retval[0].first.labels[level];
1669 for (proc = 1; (int)proc < nApics; proc++) {
1671 for (level = 0; level < depth; level++) {
1672 if (retval[proc].first.labels[level] != last[level]) {
1674 for (j = level + 1; j < depth; j++) {
1684 last[j] = retval[proc].first.labels[j];
1688 if (counts[level] > maxCt[level]) {
1689 maxCt[level] = counts[level];
1691 last[level] = retval[proc].first.labels[level];
1694 else if (level == depth - 1) {
1700 KMP_CPU_FREE(oldMask);
1701 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1713 if (threadLevel >= 0) {
1714 __kmp_nThreadsPerCore = maxCt[threadLevel];
1717 __kmp_nThreadsPerCore = 1;
1719 nPackages = totals[pkgLevel];
1721 if (coreLevel >= 0) {
1722 __kmp_ncores = totals[coreLevel];
1723 nCoresPerPkg = maxCt[coreLevel];
1726 __kmp_ncores = nPackages;
1733 unsigned prod = maxCt[0];
1734 for (level = 1; level < depth; level++) {
1735 prod *= maxCt[level];
1737 bool uniform = (prod == totals[level - 1]);
1742 if (__kmp_affinity_verbose) {
1743 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1744 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1746 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1747 if (__kmp_affinity_respect_mask) {
1748 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1750 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1752 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1754 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1756 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1760 __kmp_str_buf_init(&buf);
1762 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1763 for (level = 1; level <= pkgLevel; level++) {
1764 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1766 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1767 __kmp_nThreadsPerCore, __kmp_ncores);
1769 __kmp_str_buf_free(&buf);
1772 if (__kmp_affinity_type == affinity_none) {
1778 KMP_CPU_FREE(oldMask);
1787 for (level = 0; level < depth; level++) {
1788 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1798 if (new_depth != depth) {
1799 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1800 sizeof(AddrUnsPair) * nApics);
1801 for (proc = 0; (int)proc < nApics; proc++) {
1802 Address addr(new_depth);
1803 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1806 for (level = 0; level < depth; level++) {
1807 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1808 if (level == threadLevel) {
1811 else if ((threadLevel >= 0) && (level < threadLevel)) {
1814 if (level == coreLevel) {
1817 else if ((coreLevel >= 0) && (level < coreLevel)) {
1820 if (level < pkgLevel) {
1825 for (proc = 0; (int)proc < nApics; proc++) {
1826 new_retval[proc].first.labels[new_level]
1827 = retval[proc].first.labels[level];
1833 retval = new_retval;
1837 if (__kmp_affinity_gran_levels < 0) {
1842 __kmp_affinity_gran_levels = 0;
1843 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1844 __kmp_affinity_gran_levels++;
1846 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1847 __kmp_affinity_gran_levels++;
1849 if (__kmp_affinity_gran > affinity_gran_package) {
1850 __kmp_affinity_gran_levels++;
1854 if (__kmp_affinity_verbose) {
1855 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1856 coreLevel, threadLevel);
1863 KMP_CPU_FREE(oldMask);
1864 *address2os = retval;
1873 #define threadIdIndex 1
1874 #define coreIdIndex 2
1875 #define pkgIdIndex 3
1876 #define nodeIdIndex 4
1878 typedef unsigned *ProcCpuInfo;
1879 static unsigned maxIndex = pkgIdIndex;
1883 __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b)
1885 const unsigned *aa = (
const unsigned *)a;
1886 const unsigned *bb = (
const unsigned *)b;
1887 if (aa[osIdIndex] < bb[osIdIndex])
return -1;
1888 if (aa[osIdIndex] > bb[osIdIndex])
return 1;
1894 __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
const void *b)
1897 const unsigned *aa = *((
const unsigned **)a);
1898 const unsigned *bb = *((
const unsigned **)b);
1899 for (i = maxIndex; ; i--) {
1900 if (aa[i] < bb[i])
return -1;
1901 if (aa[i] > bb[i])
return 1;
1902 if (i == osIdIndex)
break;
1913 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
int *line,
1914 kmp_i18n_id_t *
const msg_id, FILE *f)
1917 *msg_id = kmp_i18n_null;
1924 unsigned num_records = 0;
1926 buf[
sizeof(buf) - 1] = 1;
1927 if (! fgets(buf,
sizeof(buf), f)) {
1934 char s1[] =
"processor";
1935 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1944 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
1945 if (nodeIdIndex + level >= maxIndex) {
1946 maxIndex = nodeIdIndex + level;
1957 if (num_records == 0) {
1959 *msg_id = kmp_i18n_str_NoProcRecords;
1962 if (num_records > (
unsigned)__kmp_xproc) {
1964 *msg_id = kmp_i18n_str_TooManyProcRecords;
1975 if (fseek(f, 0, SEEK_SET) != 0) {
1977 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1985 unsigned **threadInfo = (
unsigned **)__kmp_allocate((num_records + 1)
1986 *
sizeof(
unsigned *));
1988 for (i = 0; i <= num_records; i++) {
1989 threadInfo[i] = (
unsigned *)__kmp_allocate((maxIndex + 1)
1990 *
sizeof(unsigned));
1993 #define CLEANUP_THREAD_INFO \
1994 for (i = 0; i <= num_records; i++) { \
1995 __kmp_free(threadInfo[i]); \
1997 __kmp_free(threadInfo);
2004 #define INIT_PROC_INFO(p) \
2005 for (__index = 0; __index <= maxIndex; __index++) { \
2006 (p)[__index] = UINT_MAX; \
2009 for (i = 0; i <= num_records; i++) {
2010 INIT_PROC_INFO(threadInfo[i]);
2013 unsigned num_avail = 0;
2023 buf[
sizeof(buf) - 1] = 1;
2024 bool long_line =
false;
2025 if (! fgets(buf,
sizeof(buf), f)) {
2033 for (i = 0; i <= maxIndex; i++) {
2034 if (threadInfo[num_avail][i] != UINT_MAX) {
2042 }
else if (!buf[
sizeof(buf) - 1]) {
2049 #define CHECK_LINE \
2051 CLEANUP_THREAD_INFO; \
2052 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
2058 char s1[] =
"processor";
2059 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2061 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2063 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2064 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
goto dup_field;
2065 threadInfo[num_avail][osIdIndex] = val;
2066 #if KMP_OS_LINUX && USE_SYSFS_INFO
2068 KMP_SNPRINTF(path,
sizeof(path),
2069 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2070 threadInfo[num_avail][osIdIndex]);
2071 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2073 KMP_SNPRINTF(path,
sizeof(path),
2074 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2075 threadInfo[num_avail][osIdIndex]);
2076 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2080 char s2[] =
"physical id";
2081 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2083 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2085 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2086 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
goto dup_field;
2087 threadInfo[num_avail][pkgIdIndex] = val;
2090 char s3[] =
"core id";
2091 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2093 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2095 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2096 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
goto dup_field;
2097 threadInfo[num_avail][coreIdIndex] = val;
2099 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
2101 char s4[] =
"thread id";
2102 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2104 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2106 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2107 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
goto dup_field;
2108 threadInfo[num_avail][threadIdIndex] = val;
2112 if (KMP_SSCANF(buf,
"node_%d id", &level) == 1) {
2114 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2116 if ((p == NULL) || (KMP_SSCANF(p + 1,
"%u\n", &val) != 1))
goto no_val;
2117 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2118 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
goto dup_field;
2119 threadInfo[num_avail][nodeIdIndex + level] = val;
2128 if ((*buf != 0) && (*buf !=
'\n')) {
2135 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'));
2144 if ((
int)num_avail == __kmp_xproc) {
2145 CLEANUP_THREAD_INFO;
2146 *msg_id = kmp_i18n_str_TooManyEntries;
2154 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2155 CLEANUP_THREAD_INFO;
2156 *msg_id = kmp_i18n_str_MissingProcField;
2159 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2160 CLEANUP_THREAD_INFO;
2161 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2168 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2169 INIT_PROC_INFO(threadInfo[num_avail]);
2178 KMP_ASSERT(num_avail <= num_records);
2179 INIT_PROC_INFO(threadInfo[num_avail]);
2184 CLEANUP_THREAD_INFO;
2185 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2189 CLEANUP_THREAD_INFO;
2190 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2195 # if KMP_MIC && REDUCE_TEAM_SIZE
2196 unsigned teamSize = 0;
2197 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2211 KMP_ASSERT(num_avail > 0);
2212 KMP_ASSERT(num_avail <= num_records);
2213 if (num_avail == 1) {
2215 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2216 if (__kmp_affinity_verbose) {
2217 if (! KMP_AFFINITY_CAPABLE()) {
2218 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2219 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2220 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2223 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2224 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2226 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2227 if (__kmp_affinity_respect_mask) {
2228 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2230 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2232 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2233 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2237 __kmp_str_buf_init(&buf);
2238 __kmp_str_buf_print(&buf,
"1");
2239 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2240 __kmp_str_buf_print(&buf,
" x 1");
2242 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2243 __kmp_str_buf_free(&buf);
2246 if (__kmp_affinity_type == affinity_none) {
2247 CLEANUP_THREAD_INFO;
2251 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
2253 addr.labels[0] = threadInfo[0][pkgIdIndex];
2254 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2256 if (__kmp_affinity_gran_levels < 0) {
2257 __kmp_affinity_gran_levels = 0;
2260 if (__kmp_affinity_verbose) {
2261 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2264 CLEANUP_THREAD_INFO;
2271 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2272 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2285 unsigned *counts = (
unsigned *)__kmp_allocate((maxIndex + 1)
2286 *
sizeof(unsigned));
2287 unsigned *maxCt = (
unsigned *)__kmp_allocate((maxIndex + 1)
2288 *
sizeof(unsigned));
2289 unsigned *totals = (
unsigned *)__kmp_allocate((maxIndex + 1)
2290 *
sizeof(unsigned));
2291 unsigned *lastId = (
unsigned *)__kmp_allocate((maxIndex + 1)
2292 *
sizeof(unsigned));
2294 bool assign_thread_ids =
false;
2295 unsigned threadIdCt;
2298 restart_radix_check:
2304 if (assign_thread_ids) {
2305 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2306 threadInfo[0][threadIdIndex] = threadIdCt++;
2308 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2309 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2312 for (index = 0; index <= maxIndex; index++) {
2316 lastId[index] = threadInfo[0][index];;
2322 for (i = 1; i < num_avail; i++) {
2327 for (index = maxIndex; index >= threadIdIndex; index--) {
2328 if (assign_thread_ids && (index == threadIdIndex)) {
2332 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2333 threadInfo[i][threadIdIndex] = threadIdCt++;
2341 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2342 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2345 if (threadInfo[i][index] != lastId[index]) {
2354 for (index2 = threadIdIndex; index2 < index; index2++) {
2356 if (counts[index2] > maxCt[index2]) {
2357 maxCt[index2] = counts[index2];
2360 lastId[index2] = threadInfo[i][index2];
2364 lastId[index] = threadInfo[i][index];
2366 if (assign_thread_ids && (index > threadIdIndex)) {
2368 # if KMP_MIC && REDUCE_TEAM_SIZE
2373 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2374 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2384 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2385 threadInfo[i][threadIdIndex] = threadIdCt++;
2393 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2394 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2400 if (index < threadIdIndex) {
2406 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2407 || assign_thread_ids) {
2412 CLEANUP_THREAD_INFO;
2413 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2422 assign_thread_ids =
true;
2423 goto restart_radix_check;
2427 # if KMP_MIC && REDUCE_TEAM_SIZE
2432 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2433 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2435 for (index = threadIdIndex; index <= maxIndex; index++) {
2436 if (counts[index] > maxCt[index]) {
2437 maxCt[index] = counts[index];
2441 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2442 nCoresPerPkg = maxCt[coreIdIndex];
2443 nPackages = totals[pkgIdIndex];
2448 unsigned prod = totals[maxIndex];
2449 for (index = threadIdIndex; index < maxIndex; index++) {
2450 prod *= maxCt[index];
2452 bool uniform = (prod == totals[threadIdIndex]);
2460 __kmp_ncores = totals[coreIdIndex];
2462 if (__kmp_affinity_verbose) {
2463 if (! KMP_AFFINITY_CAPABLE()) {
2464 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2465 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2467 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2469 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2473 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2474 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2475 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2476 if (__kmp_affinity_respect_mask) {
2477 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2479 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2481 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2483 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2485 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2489 __kmp_str_buf_init(&buf);
2491 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2492 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2493 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2495 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2496 maxCt[threadIdIndex], __kmp_ncores);
2498 __kmp_str_buf_free(&buf);
2501 # if KMP_MIC && REDUCE_TEAM_SIZE
2505 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2506 __kmp_dflt_team_nth = teamSize;
2507 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2508 __kmp_dflt_team_nth));
2510 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2512 if (__kmp_affinity_type == affinity_none) {
2517 CLEANUP_THREAD_INFO;
2528 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2530 for (index = threadIdIndex; index < maxIndex; index++) {
2531 KMP_ASSERT(totals[index] >= totals[index + 1]);
2532 inMap[index] = (totals[index] > totals[index + 1]);
2534 inMap[maxIndex] = (totals[maxIndex] > 1);
2535 inMap[pkgIdIndex] =
true;
2538 for (index = threadIdIndex; index <= maxIndex; index++) {
2543 KMP_ASSERT(depth > 0);
2548 *address2os = (AddrUnsPair*)
2549 __kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2552 int threadLevel = -1;
2554 for (i = 0; i < num_avail; ++i) {
2555 Address addr(depth);
2556 unsigned os = threadInfo[i][osIdIndex];
2560 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2561 if (! inMap[src_index]) {
2564 addr.labels[dst_index] = threadInfo[i][src_index];
2565 if (src_index == pkgIdIndex) {
2566 pkgLevel = dst_index;
2568 else if (src_index == coreIdIndex) {
2569 coreLevel = dst_index;
2571 else if (src_index == threadIdIndex) {
2572 threadLevel = dst_index;
2576 (*address2os)[i] = AddrUnsPair(addr, os);
2579 if (__kmp_affinity_gran_levels < 0) {
2585 __kmp_affinity_gran_levels = 0;
2586 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2587 if (! inMap[src_index]) {
2590 switch (src_index) {
2592 if (__kmp_affinity_gran > affinity_gran_thread) {
2593 __kmp_affinity_gran_levels++;
2598 if (__kmp_affinity_gran > affinity_gran_core) {
2599 __kmp_affinity_gran_levels++;
2604 if (__kmp_affinity_gran > affinity_gran_package) {
2605 __kmp_affinity_gran_levels++;
2612 if (__kmp_affinity_verbose) {
2613 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2614 coreLevel, threadLevel);
2622 CLEANUP_THREAD_INFO;
2632 static kmp_affin_mask_t *
2633 __kmp_create_masks(
unsigned *maxIndex,
unsigned *numUnique,
2634 AddrUnsPair *address2os,
unsigned numAddrs)
2643 KMP_ASSERT(numAddrs > 0);
2644 depth = address2os[0].first.depth;
2647 for (i = 0; i < numAddrs; i++) {
2648 unsigned osId = address2os[i].second;
2649 if (osId > maxOsId) {
2653 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2654 (maxOsId + 1) * __kmp_affin_mask_size);
2661 qsort(address2os, numAddrs,
sizeof(*address2os),
2662 __kmp_affinity_cmp_Address_labels);
2664 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2665 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2666 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2668 if (__kmp_affinity_gran_levels >= (
int)depth) {
2669 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2670 && (__kmp_affinity_type != affinity_none))) {
2671 KMP_WARNING(AffThreadsMayMigrate);
2681 unsigned unique = 0;
2683 unsigned leader = 0;
2684 Address *leaderAddr = &(address2os[0].first);
2685 kmp_affin_mask_t *sum
2686 = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
2688 KMP_CPU_SET(address2os[0].second, sum);
2689 for (i = 1; i < numAddrs; i++) {
2695 if (leaderAddr->isClose(address2os[i].first,
2696 __kmp_affinity_gran_levels)) {
2697 KMP_CPU_SET(address2os[i].second, sum);
2706 for (; j < i; j++) {
2707 unsigned osId = address2os[j].second;
2708 KMP_DEBUG_ASSERT(osId <= maxOsId);
2709 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2710 KMP_CPU_COPY(mask, sum);
2711 address2os[j].first.leader = (j == leader);
2719 leaderAddr = &(address2os[i].first);
2721 KMP_CPU_SET(address2os[i].second, sum);
2728 for (; j < i; j++) {
2729 unsigned osId = address2os[j].second;
2730 KMP_DEBUG_ASSERT(osId <= maxOsId);
2731 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2732 KMP_CPU_COPY(mask, sum);
2733 address2os[j].first.leader = (j == leader);
2737 *maxIndex = maxOsId;
2738 *numUnique = unique;
2748 static kmp_affin_mask_t *newMasks;
2749 static int numNewMasks;
2750 static int nextNewMask;
2752 #define ADD_MASK(_mask) \
2754 if (nextNewMask >= numNewMasks) { \
2756 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2757 numNewMasks * __kmp_affin_mask_size); \
2759 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2763 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2765 if (((_osId) > _maxOsId) || \
2766 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2767 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2768 && (__kmp_affinity_type != affinity_none))) { \
2769 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2773 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2783 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2784 unsigned int *out_numMasks,
const char *proclist,
2785 kmp_affin_mask_t *osId2Mask,
int maxOsId)
2787 const char *scan = proclist;
2788 const char *next = proclist;
2795 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2796 * __kmp_affin_mask_size);
2798 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2799 __kmp_affin_mask_size);
2803 int start, end, stride;
2807 if (*next ==
'\0') {
2821 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2824 num = __kmp_str_to_int(scan, *next);
2825 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2830 if ((num > maxOsId) ||
2831 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2832 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2833 && (__kmp_affinity_type != affinity_none))) {
2834 KMP_WARNING(AffIgnoreInvalidProcID, num);
2836 KMP_CPU_ZERO(sumMask);
2839 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2865 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2866 "bad explicit proc list");
2869 num = __kmp_str_to_int(scan, *next);
2870 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2875 if ((num > maxOsId) ||
2876 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2877 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2878 && (__kmp_affinity_type != affinity_none))) {
2879 KMP_WARNING(AffIgnoreInvalidProcID, num);
2883 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2902 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2904 start = __kmp_str_to_int(scan, *next);
2905 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2912 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2930 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2932 end = __kmp_str_to_int(scan, *next);
2933 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2954 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2955 "bad explicit proc list");
2957 stride = __kmp_str_to_int(scan, *next);
2958 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2965 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2967 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2970 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2972 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2979 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2981 }
while (start <= end);
2985 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2987 }
while (start >= end);
3000 *out_numMasks = nextNewMask;
3001 if (nextNewMask == 0) {
3003 KMP_INTERNAL_FREE(newMasks);
3007 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3008 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3009 __kmp_free(sumMask);
3010 KMP_INTERNAL_FREE(newMasks);
3040 __kmp_process_subplace_list(
const char **scan, kmp_affin_mask_t *osId2Mask,
3041 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3046 int start, count, stride, i;
3052 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3053 "bad explicit places list");
3056 start = __kmp_str_to_int(*scan, *next);
3057 KMP_ASSERT(start >= 0);
3064 if (**scan ==
'}' || **scan ==
',') {
3065 if ((start > maxOsId) ||
3066 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3067 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3068 && (__kmp_affinity_type != affinity_none))) {
3069 KMP_WARNING(AffIgnoreInvalidProcID, start);
3073 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3076 if (**scan ==
'}') {
3082 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3089 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3090 "bad explicit places list");
3093 count = __kmp_str_to_int(*scan, *next);
3094 KMP_ASSERT(count >= 0);
3101 if (**scan ==
'}' || **scan ==
',') {
3102 for (i = 0; i < count; i++) {
3103 if ((start > maxOsId) ||
3104 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3105 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3106 && (__kmp_affinity_type != affinity_none))) {
3107 KMP_WARNING(AffIgnoreInvalidProcID, start);
3112 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3117 if (**scan ==
'}') {
3123 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3132 if (**scan ==
'+') {
3136 if (**scan ==
'-') {
3144 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3145 "bad explicit places list");
3148 stride = __kmp_str_to_int(*scan, *next);
3149 KMP_ASSERT(stride >= 0);
3157 if (**scan ==
'}' || **scan ==
',') {
3158 for (i = 0; i < count; i++) {
3159 if ((start > maxOsId) ||
3160 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3161 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3162 && (__kmp_affinity_type != affinity_none))) {
3163 KMP_WARNING(AffIgnoreInvalidProcID, start);
3168 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3173 if (**scan ==
'}') {
3180 KMP_ASSERT2(0,
"bad explicit places list");
3186 __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3187 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3195 if (**scan ==
'{') {
3197 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3199 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3202 else if (**scan ==
'!') {
3203 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3204 KMP_CPU_COMPLEMENT(tempMask);
3207 else if ((**scan >=
'0') && (**scan <=
'9')) {
3210 int num = __kmp_str_to_int(*scan, *next);
3211 KMP_ASSERT(num >= 0);
3212 if ((num > maxOsId) ||
3213 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3214 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3215 && (__kmp_affinity_type != affinity_none))) {
3216 KMP_WARNING(AffIgnoreInvalidProcID, num);
3220 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3226 KMP_ASSERT2(0,
"bad explicit places list");
3233 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3234 unsigned int *out_numMasks,
const char *placelist,
3235 kmp_affin_mask_t *osId2Mask,
int maxOsId)
3237 const char *scan = placelist;
3238 const char *next = placelist;
3241 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3242 * __kmp_affin_mask_size);
3245 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3246 __kmp_affin_mask_size);
3247 KMP_CPU_ZERO(tempMask);
3251 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3257 if (*scan ==
'\0' || *scan ==
',') {
3261 KMP_CPU_ZERO(tempMask);
3263 if (*scan ==
'\0') {
3270 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3277 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3278 "bad explicit places list");
3281 int count = __kmp_str_to_int(scan, *next);
3282 KMP_ASSERT(count >= 0);
3290 if (*scan ==
'\0' || *scan ==
',') {
3294 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3315 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3316 "bad explicit places list");
3319 stride = __kmp_str_to_int(scan, *next);
3320 KMP_DEBUG_ASSERT(stride >= 0);
3327 for (i = 0; i < count; i++) {
3334 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
3335 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3336 KMP_CPU_CLR(j, tempMask);
3338 else if ((j > maxOsId) ||
3339 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3340 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3341 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3342 KMP_WARNING(AffIgnoreInvalidProcID, j);
3344 KMP_CPU_CLR(j, tempMask);
3347 KMP_CPU_SET(j, tempMask);
3351 for (; j >= 0; j--) {
3352 KMP_CPU_CLR(j, tempMask);
3358 for (i = 0; i < count; i++) {
3365 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
3367 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3368 KMP_CPU_CLR(j, tempMask);
3370 else if ((j > maxOsId) ||
3371 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3372 if ((__kmp_affinity_verbose || (__kmp_affinity_warnings
3373 && (__kmp_affinity_type != affinity_none))) && i < count - 1) {
3374 KMP_WARNING(AffIgnoreInvalidProcID, j);
3376 KMP_CPU_CLR(j, tempMask);
3379 KMP_CPU_SET(j, tempMask);
3383 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
3384 KMP_CPU_CLR(j, tempMask);
3388 KMP_CPU_ZERO(tempMask);
3395 if (*scan ==
'\0') {
3403 KMP_ASSERT2(0,
"bad explicit places list");
3406 *out_numMasks = nextNewMask;
3407 if (nextNewMask == 0) {
3409 KMP_INTERNAL_FREE(newMasks);
3413 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3414 KMP_MEMCPY(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3415 __kmp_free(tempMask);
3416 KMP_INTERNAL_FREE(newMasks);
3422 #undef ADD_MASK_OSID
3425 __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth)
3427 if ( __kmp_place_num_cores == 0 ) {
3428 if ( __kmp_place_num_threads_per_core == 0 ) {
3431 __kmp_place_num_cores = nCoresPerPkg;
3433 if ( !__kmp_affinity_uniform_topology() ) {
3434 KMP_WARNING( AffThrPlaceNonUniform );
3438 KMP_WARNING( AffThrPlaceNonThreeLevel );
3441 if ( __kmp_place_num_threads_per_core == 0 ) {
3442 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;
3444 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3445 KMP_WARNING( AffThrPlaceManyCores );
3449 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3450 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3451 int i, j, k, n_old = 0, n_new = 0;
3452 for ( i = 0; i < nPackages; ++i ) {
3453 for ( j = 0; j < nCoresPerPkg; ++j ) {
3454 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
3455 n_old += __kmp_nThreadsPerCore;
3457 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
3458 if ( k < __kmp_place_num_threads_per_core ) {
3459 newAddr[n_new] = (*pAddr)[n_old];
3467 nCoresPerPkg = __kmp_place_num_cores;
3468 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core;
3469 __kmp_avail_proc = n_new;
3470 __kmp_ncores = nPackages * __kmp_place_num_cores;
3472 __kmp_free( *pAddr );
3477 static AddrUnsPair *address2os = NULL;
3478 static int * procarr = NULL;
3479 static int __kmp_aff_depth = 0;
3482 __kmp_aux_affinity_initialize(
void)
3484 if (__kmp_affinity_masks != NULL) {
3485 KMP_ASSERT(fullMask != NULL);
3495 if (fullMask == NULL) {
3496 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3498 if (KMP_AFFINITY_CAPABLE()) {
3499 if (__kmp_affinity_respect_mask) {
3500 __kmp_get_system_affinity(fullMask, TRUE);
3506 __kmp_avail_proc = 0;
3507 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3508 if (! KMP_CPU_ISSET(i, fullMask)) {
3513 if (__kmp_avail_proc > __kmp_xproc) {
3514 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3515 && (__kmp_affinity_type != affinity_none))) {
3516 KMP_WARNING(ErrorInitializeAffinity);
3518 __kmp_affinity_type = affinity_none;
3519 KMP_AFFINITY_DISABLE();
3524 __kmp_affinity_entire_machine_mask(fullMask);
3525 __kmp_avail_proc = __kmp_xproc;
3530 kmp_i18n_id_t msg_id = kmp_i18n_null;
3536 if ((__kmp_cpuinfo_file != NULL) &&
3537 (__kmp_affinity_top_method == affinity_top_method_all)) {
3538 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3541 if (__kmp_affinity_top_method == affinity_top_method_all) {
3547 const char *file_name = NULL;
3550 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3552 if (__kmp_affinity_verbose) {
3553 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3557 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3559 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3560 KMP_ASSERT(address2os == NULL);
3565 if (__kmp_affinity_verbose) {
3566 if (msg_id != kmp_i18n_null) {
3567 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3568 KMP_I18N_STR(DecodingLegacyAPIC));
3571 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3576 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3578 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3579 KMP_ASSERT(address2os == NULL);
3589 if (__kmp_affinity_verbose) {
3590 if (msg_id != kmp_i18n_null) {
3591 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
3594 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
3598 FILE *f = fopen(
"/proc/cpuinfo",
"r");
3600 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3603 file_name =
"/proc/cpuinfo";
3604 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3607 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3608 KMP_ASSERT(address2os == NULL);
3616 # if KMP_GROUP_AFFINITY
3618 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3619 if (__kmp_affinity_verbose) {
3620 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3623 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3624 KMP_ASSERT(depth != 0);
3630 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3631 if (file_name == NULL) {
3632 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3634 else if (line == 0) {
3635 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3638 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3644 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3646 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3647 KMP_ASSERT(address2os == NULL);
3650 KMP_ASSERT(depth > 0);
3651 KMP_ASSERT(address2os != NULL);
3661 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3663 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3664 if (__kmp_affinity_verbose) {
3665 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3666 KMP_I18N_STR(Decodingx2APIC));
3669 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3671 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3672 KMP_ASSERT(address2os == NULL);
3676 KMP_ASSERT(msg_id != kmp_i18n_null);
3677 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3680 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3681 if (__kmp_affinity_verbose) {
3682 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3683 KMP_I18N_STR(DecodingLegacyAPIC));
3686 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3688 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3689 KMP_ASSERT(address2os == NULL);
3693 KMP_ASSERT(msg_id != kmp_i18n_null);
3694 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3700 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3701 const char *filename;
3702 if (__kmp_cpuinfo_file != NULL) {
3703 filename = __kmp_cpuinfo_file;
3706 filename =
"/proc/cpuinfo";
3709 if (__kmp_affinity_verbose) {
3710 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
3713 FILE *f = fopen(filename,
"r");
3716 if (__kmp_cpuinfo_file != NULL) {
3719 KMP_MSG(CantOpenFileForReading, filename),
3721 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3728 KMP_MSG(CantOpenFileForReading, filename),
3735 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3738 KMP_ASSERT(msg_id != kmp_i18n_null);
3740 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3743 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3746 if (__kmp_affinity_type == affinity_none) {
3747 KMP_ASSERT(depth == 0);
3748 KMP_ASSERT(address2os == NULL);
3753 # if KMP_GROUP_AFFINITY
3755 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3756 if (__kmp_affinity_verbose) {
3757 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3760 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3761 KMP_ASSERT(depth != 0);
3763 KMP_ASSERT(msg_id != kmp_i18n_null);
3764 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3770 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3771 if (__kmp_affinity_verbose) {
3772 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
3775 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3777 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3778 KMP_ASSERT(address2os == NULL);
3782 KMP_ASSERT(depth > 0);
3783 KMP_ASSERT(address2os != NULL);
3786 if (address2os == NULL) {
3787 if (KMP_AFFINITY_CAPABLE()
3788 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3789 && (__kmp_affinity_type != affinity_none)))) {
3790 KMP_WARNING(ErrorInitializeAffinity);
3792 __kmp_affinity_type = affinity_none;
3793 KMP_AFFINITY_DISABLE();
3797 __kmp_apply_thread_places(&address2os, depth);
3804 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3805 address2os, __kmp_avail_proc);
3806 if (__kmp_affinity_gran_levels == 0) {
3807 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
3815 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3817 switch (__kmp_affinity_type) {
3819 case affinity_explicit:
3820 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3822 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3825 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3826 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3831 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3832 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3836 if (__kmp_affinity_num_masks == 0) {
3837 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3838 && (__kmp_affinity_type != affinity_none))) {
3839 KMP_WARNING(AffNoValidProcID);
3841 __kmp_affinity_type = affinity_none;
3854 case affinity_logical:
3855 __kmp_affinity_compact = 0;
3856 if (__kmp_affinity_offset) {
3857 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3862 case affinity_physical:
3863 if (__kmp_nThreadsPerCore > 1) {
3864 __kmp_affinity_compact = 1;
3865 if (__kmp_affinity_compact >= depth) {
3866 __kmp_affinity_compact = 0;
3869 __kmp_affinity_compact = 0;
3871 if (__kmp_affinity_offset) {
3872 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3877 case affinity_scatter:
3878 if (__kmp_affinity_compact >= depth) {
3879 __kmp_affinity_compact = 0;
3882 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3886 case affinity_compact:
3887 if (__kmp_affinity_compact >= depth) {
3888 __kmp_affinity_compact = depth - 1;
3892 case affinity_balanced:
3894 if( nPackages > 1 ) {
3895 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3896 KMP_WARNING( AffBalancedNotAvail,
"KMP_AFFINITY" );
3898 __kmp_affinity_type = affinity_none;
3900 }
else if( __kmp_affinity_uniform_topology() ) {
3905 __kmp_aff_depth = depth;
3908 int nth_per_core = __kmp_nThreadsPerCore;
3911 if( nth_per_core > 1 ) {
3912 core_level = depth - 2;
3914 core_level = depth - 1;
3916 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3917 int nproc = nth_per_core * ncores;
3919 procarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
3920 for(
int i = 0; i < nproc; i++ ) {
3924 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
3925 int proc = address2os[ i ].second;
3929 int level = depth - 1;
3933 int core = address2os[ i ].first.labels[ level ];
3935 if( nth_per_core > 1 ) {
3936 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3937 core = address2os[ i ].first.labels[ level - 1 ];
3939 procarr[ core * nth_per_core + thread ] = proc;
3949 if (__kmp_affinity_dups) {
3950 __kmp_affinity_num_masks = __kmp_avail_proc;
3953 __kmp_affinity_num_masks = numUnique;
3957 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3958 && ( __kmp_affinity_num_places > 0 )
3959 && ( (
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3960 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3964 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3965 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3971 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
3972 __kmp_affinity_cmp_Address_child_num);
3976 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3977 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3980 unsigned osId = address2os[i].second;
3981 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3982 kmp_affin_mask_t *dest
3983 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3984 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3985 KMP_CPU_COPY(dest, src);
3986 if (++j >= __kmp_affinity_num_masks) {
3990 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3995 KMP_ASSERT2(0,
"Unexpected affinity setting");
3998 __kmp_free(osId2Mask);
3999 machine_hierarchy.init(address2os, __kmp_avail_proc);
4004 __kmp_affinity_initialize(
void)
4017 int disabled = (__kmp_affinity_type == affinity_disabled);
4018 if (! KMP_AFFINITY_CAPABLE()) {
4019 KMP_ASSERT(disabled);
4022 __kmp_affinity_type = affinity_none;
4024 __kmp_aux_affinity_initialize();
4026 __kmp_affinity_type = affinity_disabled;
4032 __kmp_affinity_uninitialize(
void)
4034 if (__kmp_affinity_masks != NULL) {
4035 __kmp_free(__kmp_affinity_masks);
4036 __kmp_affinity_masks = NULL;
4038 if (fullMask != NULL) {
4039 KMP_CPU_FREE(fullMask);
4042 __kmp_affinity_num_masks = 0;
4044 __kmp_affinity_num_places = 0;
4046 if (__kmp_affinity_proclist != NULL) {
4047 __kmp_free(__kmp_affinity_proclist);
4048 __kmp_affinity_proclist = NULL;
4050 if( address2os != NULL ) {
4051 __kmp_free( address2os );
4054 if( procarr != NULL ) {
4055 __kmp_free( procarr );
4062 __kmp_affinity_set_init_mask(
int gtid,
int isa_root)
4064 if (! KMP_AFFINITY_CAPABLE()) {
4068 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4069 if (th->th.th_affin_mask == NULL) {
4070 KMP_CPU_ALLOC(th->th.th_affin_mask);
4073 KMP_CPU_ZERO(th->th.th_affin_mask);
4083 kmp_affin_mask_t *mask;
4087 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4090 if ((__kmp_affinity_type == affinity_none) || (__kmp_affinity_type == affinity_balanced)
4092 # if KMP_GROUP_AFFINITY
4093 if (__kmp_num_proc_groups > 1) {
4097 KMP_ASSERT(fullMask != NULL);
4102 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4103 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4104 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4110 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4111 # if KMP_GROUP_AFFINITY
4112 if (__kmp_num_proc_groups > 1) {
4116 KMP_ASSERT(fullMask != NULL);
4125 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4126 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4127 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4133 th->th.th_current_place = i;
4135 th->th.th_new_place = i;
4136 th->th.th_first_place = 0;
4137 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4140 if (i == KMP_PLACE_ALL) {
4141 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4145 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4150 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4154 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4159 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4161 if (__kmp_affinity_verbose) {
4162 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4163 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4164 th->th.th_affin_mask);
4165 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4175 if ( __kmp_affinity_type == affinity_none ) {
4176 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4180 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4187 __kmp_affinity_set_place(
int gtid)
4191 if (! KMP_AFFINITY_CAPABLE()) {
4195 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4197 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4198 gtid, th->th.th_new_place, th->th.th_current_place));
4203 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4204 KMP_ASSERT(th->th.th_new_place >= 0);
4205 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4206 if (th->th.th_first_place <= th->th.th_last_place) {
4207 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
4208 && (th->th.th_new_place <= th->th.th_last_place));
4211 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
4212 || (th->th.th_new_place >= th->th.th_last_place));
4219 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4220 th->th.th_new_place);
4221 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4222 th->th.th_current_place = th->th.th_new_place;
4224 if (__kmp_affinity_verbose) {
4225 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4226 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4227 th->th.th_affin_mask);
4228 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4231 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4238 __kmp_aux_set_affinity(
void **mask)
4244 if (! KMP_AFFINITY_CAPABLE()) {
4248 gtid = __kmp_entry_gtid();
4250 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4251 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4252 (kmp_affin_mask_t *)(*mask));
4253 __kmp_debug_printf(
"kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4257 if (__kmp_env_consistency_check) {
4258 if ((mask == NULL) || (*mask == NULL)) {
4259 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4265 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4266 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4270 if (! KMP_CPU_ISSET(proc, fullMask)) {
4271 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4275 if (num_procs == 0) {
4276 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4279 # if KMP_GROUP_AFFINITY
4280 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4281 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4288 th = __kmp_threads[gtid];
4289 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4290 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4292 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4296 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4297 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4298 th->th.th_first_place = 0;
4299 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4304 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4312 __kmp_aux_get_affinity(
void **mask)
4318 if (! KMP_AFFINITY_CAPABLE()) {
4322 gtid = __kmp_entry_gtid();
4323 th = __kmp_threads[gtid];
4324 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4327 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4328 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4329 th->th.th_affin_mask);
4330 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4333 if (__kmp_env_consistency_check) {
4334 if ((mask == NULL) || (*mask == NULL)) {
4335 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4339 # if !KMP_OS_WINDOWS
4341 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4343 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4344 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4345 (kmp_affin_mask_t *)(*mask));
4346 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4352 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4360 __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask)
4364 if (! KMP_AFFINITY_CAPABLE()) {
4369 int gtid = __kmp_entry_gtid();
4370 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4371 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4372 (kmp_affin_mask_t *)(*mask));
4373 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4377 if (__kmp_env_consistency_check) {
4378 if ((mask == NULL) || (*mask == NULL)) {
4379 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4383 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4386 if (! KMP_CPU_ISSET(proc, fullMask)) {
4390 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4396 __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask)
4400 if (! KMP_AFFINITY_CAPABLE()) {
4405 int gtid = __kmp_entry_gtid();
4406 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4407 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4408 (kmp_affin_mask_t *)(*mask));
4409 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4413 if (__kmp_env_consistency_check) {
4414 if ((mask == NULL) || (*mask == NULL)) {
4415 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4419 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4422 if (! KMP_CPU_ISSET(proc, fullMask)) {
4426 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4432 __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask)
4436 if (! KMP_AFFINITY_CAPABLE()) {
4441 int gtid = __kmp_entry_gtid();
4442 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4443 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4444 (kmp_affin_mask_t *)(*mask));
4445 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4449 if (__kmp_env_consistency_check) {
4450 if ((mask == NULL) || (*mask == NULL)) {
4451 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity_mask_proc");
4455 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4458 if (! KMP_CPU_ISSET(proc, fullMask)) {
4462 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4467 void __kmp_balanced_affinity(
int tid,
int nthreads )
4469 if( __kmp_affinity_uniform_topology() ) {
4473 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4475 int ncores = __kmp_ncores;
4477 int chunk = nthreads / ncores;
4479 int big_cores = nthreads % ncores;
4481 int big_nth = ( chunk + 1 ) * big_cores;
4482 if( tid < big_nth ) {
4483 coreID = tid / (chunk + 1 );
4484 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4486 coreID = ( tid - big_cores ) / chunk;
4487 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4490 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4491 "Illegal set affinity operation when not capable");
4493 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
4497 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4498 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4499 KMP_CPU_SET( osID, mask);
4500 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4501 for(
int i = 0; i < __kmp_nth_per_core; i++ ) {
4503 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4504 KMP_CPU_SET( osID, mask);
4507 if (__kmp_affinity_verbose) {
4508 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4509 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4510 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4513 __kmp_set_system_affinity( mask, TRUE );
4516 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)KMP_ALLOCA(__kmp_affin_mask_size);
4520 int nth_per_core = __kmp_nThreadsPerCore;
4522 if( nth_per_core > 1 ) {
4523 core_level = __kmp_aff_depth - 2;
4525 core_level = __kmp_aff_depth - 1;
4529 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4532 if( nthreads == __kmp_avail_proc ) {
4533 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4534 int osID = address2os[ tid ].second;
4535 KMP_CPU_SET( osID, mask);
4536 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4537 int coreID = address2os[ tid ].first.labels[ core_level ];
4541 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
4542 int osID = address2os[ i ].second;
4543 int core = address2os[ i ].first.labels[ core_level ];
4544 if( core == coreID ) {
4545 KMP_CPU_SET( osID, mask);
4547 if( cnt == nth_per_core ) {
4553 }
else if( nthreads <= __kmp_ncores ) {
4556 for(
int i = 0; i < ncores; i++ ) {
4559 for(
int j = 0; j < nth_per_core; j++ ) {
4560 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4567 for(
int j = 0; j < nth_per_core; j++ ) {
4568 int osID = procarr[ i * nth_per_core + j ];
4570 KMP_CPU_SET( osID, mask );
4572 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4587 int nproc_at_core[ ncores ];
4589 int ncores_with_x_procs[ nth_per_core + 1 ];
4591 int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
4593 for(
int i = 0; i <= nth_per_core; i++ ) {
4594 ncores_with_x_procs[ i ] = 0;
4595 ncores_with_x_to_max_procs[ i ] = 0;
4598 for(
int i = 0; i < ncores; i++ ) {
4600 for(
int j = 0; j < nth_per_core; j++ ) {
4601 if( procarr[ i * nth_per_core + j ] != -1 ) {
4605 nproc_at_core[ i ] = cnt;
4606 ncores_with_x_procs[ cnt ]++;
4609 for(
int i = 0; i <= nth_per_core; i++ ) {
4610 for(
int j = i; j <= nth_per_core; j++ ) {
4611 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4616 int nproc = nth_per_core * ncores;
4618 int * newarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
4619 for(
int i = 0; i < nproc; i++ ) {
4626 for(
int j = 1; j <= nth_per_core; j++ ) {
4627 int cnt = ncores_with_x_to_max_procs[ j ];
4628 for(
int i = 0; i < ncores; i++ ) {
4630 if( nproc_at_core[ i ] == 0 ) {
4633 for(
int k = 0; k < nth_per_core; k++ ) {
4634 if( procarr[ i * nth_per_core + k ] != -1 ) {
4635 if( newarr[ i * nth_per_core + k ] == 0 ) {
4636 newarr[ i * nth_per_core + k ] = 1;
4642 newarr[ i * nth_per_core + k ] ++;
4650 if( cnt == 0 || nth == 0 ) {
4661 for(
int i = 0; i < nproc; i++ ) {
4665 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4666 int osID = procarr[ i ];
4667 KMP_CPU_SET( osID, mask);
4668 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4669 int coreID = i / nth_per_core;
4670 for(
int ii = 0; ii < nth_per_core; ii++ ) {
4671 int osID = procarr[ coreID * nth_per_core + ii ];
4673 KMP_CPU_SET( osID, mask);
4680 __kmp_free( newarr );
4683 if (__kmp_affinity_verbose) {
4684 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4685 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4686 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4689 __kmp_set_system_affinity( mask, TRUE );
4696 static const kmp_uint32 noaff_maxLevels=7;
4697 kmp_uint32 noaff_skipPerLevel[noaff_maxLevels];
4698 kmp_uint32 noaff_depth;
4699 kmp_uint8 noaff_leaf_kids;
4700 kmp_int8 noaff_uninitialized=1;
4702 void noaff_init(
int nprocs)
4704 kmp_int8 result = KMP_COMPARE_AND_STORE_ACQ8(&noaff_uninitialized, 1, 2);
4705 if (result == 0)
return;
4706 else if (result == 2) {
4707 while (TCR_1(noaff_uninitialized) != 0) KMP_CPU_PAUSE();
4710 KMP_DEBUG_ASSERT(result==1);
4712 kmp_uint32 numPerLevel[noaff_maxLevels];
4714 for (kmp_uint32 i=0; i<noaff_maxLevels; ++i) {
4716 noaff_skipPerLevel[i] = 1;
4720 numPerLevel[1] = nprocs/4;
4721 if (nprocs%4) numPerLevel[1]++;
4723 for (
int i=noaff_maxLevels-1; i>=0; --i)
4724 if (numPerLevel[i] != 1 || noaff_depth > 1)
4727 kmp_uint32 branch = 4;
4728 if (numPerLevel[0] == 1) branch = nprocs/4;
4729 if (branch<4) branch=4;
4730 for (kmp_uint32 d=0; d<noaff_depth-1; ++d) {
4731 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) {
4732 if (numPerLevel[d] & 1) numPerLevel[d]++;
4733 numPerLevel[d] = numPerLevel[d] >> 1;
4734 if (numPerLevel[d+1] == 1) noaff_depth++;
4735 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4737 if(numPerLevel[0] == 1) {
4738 branch = branch >> 1;
4739 if (branch<4) branch = 4;
4743 for (kmp_uint32 i=1; i<noaff_depth; ++i)
4744 noaff_skipPerLevel[i] = numPerLevel[i-1] * noaff_skipPerLevel[i-1];
4746 for (kmp_uint32 i=noaff_depth; i<noaff_maxLevels; ++i)
4747 noaff_skipPerLevel[i] = 2*noaff_skipPerLevel[i-1];
4748 noaff_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4749 noaff_uninitialized = 0;
4753 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4754 if (noaff_uninitialized)
4757 thr_bar->depth = noaff_depth;
4758 thr_bar->base_leaf_kids = noaff_leaf_kids;
4759 thr_bar->skip_per_level = noaff_skipPerLevel;
4762 #endif // KMP_AFFINITY_SUPPORTED