41 #include "kmp_wrapper_getpid.h"
43 #if KMP_AFFINITY_SUPPORTED
49 __kmp_affinity_print_mask(
char *buf,
int buf_len, kmp_affin_mask_t *mask)
51 KMP_ASSERT(buf_len >= 40);
53 char *end = buf + buf_len - 1;
59 for (i = 0; i < KMP_CPU_SETSIZE; i++) {
60 if (KMP_CPU_ISSET(i, mask)) {
64 if (i == KMP_CPU_SETSIZE) {
65 sprintf(scan,
"{<empty>}");
66 while (*scan !=
'\0') scan++;
67 KMP_ASSERT(scan <= end);
71 sprintf(scan,
"{%ld", (
long)i);
72 while (*scan !=
'\0') scan++;
74 for (; i < KMP_CPU_SETSIZE; i++) {
75 if (! KMP_CPU_ISSET(i, mask)) {
85 if (end - scan < 15) {
88 sprintf(scan,
",%-ld", (
long)i);
89 while (*scan !=
'\0') scan++;
91 if (i < KMP_CPU_SETSIZE) {
92 sprintf(scan,
",...");
93 while (*scan !=
'\0') scan++;
96 while (*scan !=
'\0') scan++;
97 KMP_ASSERT(scan <= end);
103 __kmp_affinity_entire_machine_mask(kmp_affin_mask_t *mask)
107 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
109 if (__kmp_num_proc_groups > 1) {
111 KMP_DEBUG_ASSERT(__kmp_GetActiveProcessorCount != NULL);
112 for (group = 0; group < __kmp_num_proc_groups; group++) {
114 int num = __kmp_GetActiveProcessorCount(group);
115 for (i = 0; i < num; i++) {
116 KMP_CPU_SET(i + group * (CHAR_BIT *
sizeof(DWORD_PTR)), mask);
126 for (proc = 0; proc < __kmp_xproc; proc++) {
127 KMP_CPU_SET(proc, mask);
155 # if !defined(KMP_DEBUG) && !defined(COVER)
159 static const unsigned maxDepth = 32;
160 unsigned labels[maxDepth];
161 unsigned childNums[maxDepth];
164 Address(
unsigned _depth)
165 : depth(_depth), leader(FALSE) {
167 Address &operator=(
const Address &b) {
169 for (
unsigned i = 0; i < depth; i++) {
170 labels[i] = b.labels[i];
171 childNums[i] = b.childNums[i];
176 bool operator==(
const Address &b)
const {
177 if (depth != b.depth)
179 for (
unsigned i = 0; i < depth; i++)
180 if(labels[i] != b.labels[i])
184 bool isClose(
const Address &b,
int level)
const {
185 if (depth != b.depth)
187 if ((
unsigned)level >= depth)
189 for (
unsigned i = 0; i < (depth - level); i++)
190 if(labels[i] != b.labels[i])
194 bool operator!=(
const Address &b)
const {
195 return !operator==(b);
203 AddrUnsPair(Address _first,
unsigned _second)
204 : first(_first), second(_second) {
206 AddrUnsPair &operator=(
const AddrUnsPair &b)
218 static const unsigned maxDepth = 32;
219 unsigned labels[maxDepth];
220 unsigned childNums[maxDepth];
223 Address(
unsigned _depth);
224 Address &operator=(
const Address &b);
225 bool operator==(
const Address &b)
const;
226 bool isClose(
const Address &b,
int level)
const;
227 bool operator!=(
const Address &b)
const;
230 Address::Address(
unsigned _depth)
236 Address &Address::operator=(
const Address &b) {
238 for (
unsigned i = 0; i < depth; i++) {
239 labels[i] = b.labels[i];
240 childNums[i] = b.childNums[i];
246 bool Address::operator==(
const Address &b)
const {
247 if (depth != b.depth)
249 for (
unsigned i = 0; i < depth; i++)
250 if(labels[i] != b.labels[i])
255 bool Address::isClose(
const Address &b,
int level)
const {
256 if (depth != b.depth)
258 if ((
unsigned)level >= depth)
260 for (
unsigned i = 0; i < (depth - level); i++)
261 if(labels[i] != b.labels[i])
266 bool Address::operator!=(
const Address &b)
const {
267 return !operator==(b);
274 AddrUnsPair(Address _first,
unsigned _second);
275 AddrUnsPair &operator=(
const AddrUnsPair &b);
278 AddrUnsPair::AddrUnsPair(Address _first,
unsigned _second)
279 : first(_first), second(_second)
283 AddrUnsPair &AddrUnsPair::operator=(
const AddrUnsPair &b)
294 __kmp_affinity_cmp_Address_labels(
const void *a,
const void *b)
296 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
298 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
300 unsigned depth = aa->depth;
302 KMP_DEBUG_ASSERT(depth == bb->depth);
303 for (i = 0; i < depth; i++) {
304 if (aa->labels[i] < bb->labels[i])
return -1;
305 if (aa->labels[i] > bb->labels[i])
return 1;
312 __kmp_affinity_cmp_Address_child_num(
const void *a,
const void *b)
314 const Address *aa = (
const Address *)&(((AddrUnsPair *)a)
316 const Address *bb = (
const Address *)&(((AddrUnsPair *)b)
318 unsigned depth = aa->depth;
320 KMP_DEBUG_ASSERT(depth == bb->depth);
321 KMP_DEBUG_ASSERT((
unsigned)__kmp_affinity_compact <= depth);
322 KMP_DEBUG_ASSERT(__kmp_affinity_compact >= 0);
323 for (i = 0; i < (unsigned)__kmp_affinity_compact; i++) {
324 int j = depth - i - 1;
325 if (aa->childNums[j] < bb->childNums[j])
return -1;
326 if (aa->childNums[j] > bb->childNums[j])
return 1;
328 for (; i < depth; i++) {
329 int j = i - __kmp_affinity_compact;
330 if (aa->childNums[j] < bb->childNums[j])
return -1;
331 if (aa->childNums[j] > bb->childNums[j])
return 1;
337 class hierarchy_info {
341 static const kmp_uint32 maxLevels=7;
347 kmp_uint32 base_depth;
348 kmp_uint32 base_num_threads;
354 kmp_uint32 numPerLevel[maxLevels];
355 kmp_uint32 skipPerLevel[maxLevels];
357 void deriveLevels(AddrUnsPair *adr2os,
int num_addrs) {
358 int hier_depth = adr2os[0].first.depth;
360 for (
int i=hier_depth-1; i>=0; --i) {
362 for (
int j=0; j<num_addrs; ++j) {
363 int next = adr2os[j].first.childNums[i];
364 if (next > max) max = next;
366 numPerLevel[level] = max+1;
371 hierarchy_info() : depth(1), uninitialized(true) {}
372 void init(AddrUnsPair *adr2os,
int num_addrs)
374 uninitialized =
false;
375 for (kmp_uint32 i=0; i<maxLevels; ++i) {
382 qsort(adr2os, num_addrs,
sizeof(*adr2os), __kmp_affinity_cmp_Address_labels);
383 deriveLevels(adr2os, num_addrs);
387 numPerLevel[1] = num_addrs/4;
388 if (num_addrs%4) numPerLevel[1]++;
391 base_num_threads = num_addrs;
392 for (
int i=maxLevels-1; i>=0; --i)
393 if (numPerLevel[i] != 1 || depth > 1)
396 kmp_uint32 branch = 4;
397 if (numPerLevel[0] == 1) branch = num_addrs/4;
398 if (branch<4) branch=4;
399 for (kmp_uint32 d=0; d<depth-1; ++d) {
400 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) {
401 if (numPerLevel[d] & 1) numPerLevel[d]++;
402 numPerLevel[d] = numPerLevel[d] >> 1;
403 if (numPerLevel[d+1] == 1) depth++;
404 numPerLevel[d+1] = numPerLevel[d+1] << 1;
406 if(numPerLevel[0] == 1) {
407 branch = branch >> 1;
408 if (branch<4) branch = 4;
412 for (kmp_uint32 i=1; i<depth; ++i)
413 skipPerLevel[i] = numPerLevel[i-1] * skipPerLevel[i-1];
419 static hierarchy_info machine_hierarchy;
421 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
422 if (machine_hierarchy.uninitialized)
423 machine_hierarchy.init(NULL, nproc);
425 if (nproc <= machine_hierarchy.base_num_threads)
426 machine_hierarchy.depth = machine_hierarchy.base_depth;
427 KMP_DEBUG_ASSERT(machine_hierarchy.depth > 0);
428 while (nproc > machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1]) {
429 machine_hierarchy.depth++;
430 machine_hierarchy.skipPerLevel[machine_hierarchy.depth-1] = 2*machine_hierarchy.skipPerLevel[machine_hierarchy.depth-2];
432 thr_bar->depth = machine_hierarchy.depth;
433 thr_bar->base_leaf_kids = (kmp_uint8)machine_hierarchy.numPerLevel[0]-1;
434 thr_bar->skip_per_level = machine_hierarchy.skipPerLevel;
451 __kmp_affinity_assign_child_nums(AddrUnsPair *address2os,
454 KMP_DEBUG_ASSERT(numAddrs > 0);
455 int depth = address2os->first.depth;
456 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
457 unsigned *lastLabel = (
unsigned *)__kmp_allocate(depth
460 for (labCt = 0; labCt < depth; labCt++) {
461 address2os[0].first.childNums[labCt] = counts[labCt] = 0;
462 lastLabel[labCt] = address2os[0].first.labels[labCt];
465 for (i = 1; i < numAddrs; i++) {
466 for (labCt = 0; labCt < depth; labCt++) {
467 if (address2os[i].first.labels[labCt] != lastLabel[labCt]) {
469 for (labCt2 = labCt + 1; labCt2 < depth; labCt2++) {
471 lastLabel[labCt2] = address2os[i].first.labels[labCt2];
474 lastLabel[labCt] = address2os[i].first.labels[labCt];
478 for (labCt = 0; labCt < depth; labCt++) {
479 address2os[i].first.childNums[labCt] = counts[labCt];
481 for (; labCt < (int)Address::maxDepth; labCt++) {
482 address2os[i].first.childNums[labCt] = 0;
500 static kmp_affin_mask_t *fullMask = NULL;
503 __kmp_affinity_get_fullMask() {
return fullMask; }
506 static int nCoresPerPkg, nPackages;
507 int __kmp_nThreadsPerCore;
516 __kmp_affinity_uniform_topology()
518 return __kmp_avail_proc == (__kmp_nThreadsPerCore * nCoresPerPkg * nPackages);
527 __kmp_affinity_print_topology(AddrUnsPair *address2os,
int len,
int depth,
528 int pkgLevel,
int coreLevel,
int threadLevel)
532 KMP_INFORM(OSProcToPhysicalThreadMap,
"KMP_AFFINITY");
533 for (proc = 0; proc < len; proc++) {
536 __kmp_str_buf_init(&buf);
537 for (level = 0; level < depth; level++) {
538 if (level == threadLevel) {
539 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Thread));
541 else if (level == coreLevel) {
542 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Core));
544 else if (level == pkgLevel) {
545 __kmp_str_buf_print(&buf,
"%s ", KMP_I18N_STR(Package));
547 else if (level > pkgLevel) {
548 __kmp_str_buf_print(&buf,
"%s_%d ", KMP_I18N_STR(Node),
549 level - pkgLevel - 1);
552 __kmp_str_buf_print(&buf,
"L%d ", level);
554 __kmp_str_buf_print(&buf,
"%d ",
555 address2os[proc].first.labels[level]);
557 KMP_INFORM(OSProcMapToPack,
"KMP_AFFINITY", address2os[proc].second,
559 __kmp_str_buf_free(&buf);
570 __kmp_affinity_create_flat_map(AddrUnsPair **address2os,
571 kmp_i18n_id_t *
const msg_id)
574 *msg_id = kmp_i18n_null;
581 if (! KMP_AFFINITY_CAPABLE()) {
582 KMP_ASSERT(__kmp_affinity_type == affinity_none);
583 __kmp_ncores = nPackages = __kmp_xproc;
584 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
585 __kmp_ht_enabled = FALSE;
586 if (__kmp_affinity_verbose) {
587 KMP_INFORM(AffFlatTopology,
"KMP_AFFINITY");
588 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
589 KMP_INFORM(Uniform,
"KMP_AFFINITY");
590 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
591 __kmp_nThreadsPerCore, __kmp_ncores);
602 __kmp_ncores = nPackages = __kmp_avail_proc;
603 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
604 __kmp_ht_enabled = FALSE;
605 if (__kmp_affinity_verbose) {
606 char buf[KMP_AFFIN_MASK_PRINT_LEN];
607 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
609 KMP_INFORM(AffCapableUseFlat,
"KMP_AFFINITY");
610 if (__kmp_affinity_respect_mask) {
611 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
613 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
615 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
616 KMP_INFORM(Uniform,
"KMP_AFFINITY");
617 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
618 __kmp_nThreadsPerCore, __kmp_ncores);
620 if (__kmp_affinity_type == affinity_none) {
627 *address2os = (AddrUnsPair*)
628 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
631 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
635 if (! KMP_CPU_ISSET(i, fullMask)) {
641 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
643 if (__kmp_affinity_verbose) {
644 KMP_INFORM(OSProcToPackage,
"KMP_AFFINITY");
647 if (__kmp_affinity_gran_levels < 0) {
652 if (__kmp_affinity_gran > affinity_gran_package) {
653 __kmp_affinity_gran_levels = 1;
656 __kmp_affinity_gran_levels = 0;
663 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
674 __kmp_affinity_create_proc_group_map(AddrUnsPair **address2os,
675 kmp_i18n_id_t *
const msg_id)
678 *msg_id = kmp_i18n_null;
684 if ((! KMP_AFFINITY_CAPABLE()) || (__kmp_get_proc_group(fullMask) >= 0)) {
692 *address2os = (AddrUnsPair*)
693 __kmp_allocate(
sizeof(**address2os) * __kmp_avail_proc);
696 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
700 if (! KMP_CPU_ISSET(i, fullMask)) {
705 addr.labels[0] = i / (CHAR_BIT *
sizeof(DWORD_PTR));
706 addr.labels[1] = i % (CHAR_BIT *
sizeof(DWORD_PTR));
707 (*address2os)[avail_ct++] = AddrUnsPair(addr,i);
709 if (__kmp_affinity_verbose) {
710 KMP_INFORM(AffOSProcToGroup,
"KMP_AFFINITY", i, addr.labels[0],
715 if (__kmp_affinity_gran_levels < 0) {
716 if (__kmp_affinity_gran == affinity_gran_group) {
717 __kmp_affinity_gran_levels = 1;
719 else if ((__kmp_affinity_gran == affinity_gran_fine)
720 || (__kmp_affinity_gran == affinity_gran_thread)) {
721 __kmp_affinity_gran_levels = 0;
724 const char *gran_str = NULL;
725 if (__kmp_affinity_gran == affinity_gran_core) {
728 else if (__kmp_affinity_gran == affinity_gran_package) {
729 gran_str =
"package";
731 else if (__kmp_affinity_gran == affinity_gran_node) {
739 __kmp_affinity_gran_levels = 0;
748 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
751 __kmp_cpuid_mask_width(
int count) {
754 while((1<<r) < count)
760 class apicThreadInfo {
764 unsigned maxCoresPerPkg;
765 unsigned maxThreadsPerPkg;
773 __kmp_affinity_cmp_apicThreadInfo_os_id(
const void *a,
const void *b)
775 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
776 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
777 if (aa->osId < bb->osId)
return -1;
778 if (aa->osId > bb->osId)
return 1;
784 __kmp_affinity_cmp_apicThreadInfo_phys_id(
const void *a,
const void *b)
786 const apicThreadInfo *aa = (
const apicThreadInfo *)a;
787 const apicThreadInfo *bb = (
const apicThreadInfo *)b;
788 if (aa->pkgId < bb->pkgId)
return -1;
789 if (aa->pkgId > bb->pkgId)
return 1;
790 if (aa->coreId < bb->coreId)
return -1;
791 if (aa->coreId > bb->coreId)
return 1;
792 if (aa->threadId < bb->threadId)
return -1;
793 if (aa->threadId > bb->threadId)
return 1;
805 __kmp_affinity_create_apicid_map(AddrUnsPair **address2os,
806 kmp_i18n_id_t *
const msg_id)
810 *msg_id = kmp_i18n_null;
818 __kmp_x86_cpuid(0, 0, &buf);
820 *msg_id = kmp_i18n_str_NoLeaf4Support;
836 if (! KMP_AFFINITY_CAPABLE()) {
841 KMP_ASSERT(__kmp_affinity_type == affinity_none);
852 __kmp_x86_cpuid(1, 0, &buf);
853 int maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
854 if (maxThreadsPerPkg == 0) {
855 maxThreadsPerPkg = 1;
871 __kmp_x86_cpuid(0, 0, &buf);
873 __kmp_x86_cpuid(4, 0, &buf);
874 nCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
897 __kmp_ncores = __kmp_xproc;
898 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
899 __kmp_nThreadsPerCore = 1;
900 __kmp_ht_enabled = FALSE;
901 if (__kmp_affinity_verbose) {
902 KMP_INFORM(AffNotCapableUseLocCpuid,
"KMP_AFFINITY");
903 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
904 if (__kmp_affinity_uniform_topology()) {
905 KMP_INFORM(Uniform,
"KMP_AFFINITY");
907 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
909 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
910 __kmp_nThreadsPerCore, __kmp_ncores);
925 kmp_affin_mask_t *oldMask;
926 KMP_CPU_ALLOC(oldMask);
927 KMP_ASSERT(oldMask != NULL);
928 __kmp_get_system_affinity(oldMask, TRUE);
963 apicThreadInfo *threadInfo = (apicThreadInfo *)__kmp_allocate(
964 __kmp_avail_proc *
sizeof(apicThreadInfo));
966 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
970 if (! KMP_CPU_ISSET(i, fullMask)) {
973 KMP_DEBUG_ASSERT((
int)nApics < __kmp_avail_proc);
975 __kmp_affinity_bind_thread(i);
976 threadInfo[nApics].osId = i;
982 __kmp_x86_cpuid(1, 0, &buf);
983 if (! (buf.edx >> 9) & 1) {
984 __kmp_set_system_affinity(oldMask, TRUE);
985 __kmp_free(threadInfo);
986 KMP_CPU_FREE(oldMask);
987 *msg_id = kmp_i18n_str_ApicNotPresent;
990 threadInfo[nApics].apicId = (buf.ebx >> 24) & 0xff;
991 threadInfo[nApics].maxThreadsPerPkg = (buf.ebx >> 16) & 0xff;
992 if (threadInfo[nApics].maxThreadsPerPkg == 0) {
993 threadInfo[nApics].maxThreadsPerPkg = 1;
1004 __kmp_x86_cpuid(0, 0, &buf);
1006 __kmp_x86_cpuid(4, 0, &buf);
1007 threadInfo[nApics].maxCoresPerPkg = ((buf.eax >> 26) & 0x3f) + 1;
1010 threadInfo[nApics].maxCoresPerPkg = 1;
1017 int widthCT = __kmp_cpuid_mask_width(
1018 threadInfo[nApics].maxThreadsPerPkg);
1019 threadInfo[nApics].pkgId = threadInfo[nApics].apicId >> widthCT;
1021 int widthC = __kmp_cpuid_mask_width(
1022 threadInfo[nApics].maxCoresPerPkg);
1023 int widthT = widthCT - widthC;
1030 __kmp_set_system_affinity(oldMask, TRUE);
1031 __kmp_free(threadInfo);
1032 KMP_CPU_FREE(oldMask);
1033 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1037 int maskC = (1 << widthC) - 1;
1038 threadInfo[nApics].coreId = (threadInfo[nApics].apicId >> widthT)
1041 int maskT = (1 << widthT) - 1;
1042 threadInfo[nApics].threadId = threadInfo[nApics].apicId &maskT;
1051 __kmp_set_system_affinity(oldMask, TRUE);
1063 KMP_ASSERT(nApics > 0);
1065 __kmp_ncores = nPackages = 1;
1066 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1067 __kmp_ht_enabled = FALSE;
1068 if (__kmp_affinity_verbose) {
1069 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1070 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1072 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1073 if (__kmp_affinity_respect_mask) {
1074 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1076 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1078 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1079 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1080 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1081 __kmp_nThreadsPerCore, __kmp_ncores);
1084 if (__kmp_affinity_type == affinity_none) {
1085 __kmp_free(threadInfo);
1086 KMP_CPU_FREE(oldMask);
1090 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
1092 addr.labels[0] = threadInfo[0].pkgId;
1093 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0].osId);
1095 if (__kmp_affinity_gran_levels < 0) {
1096 __kmp_affinity_gran_levels = 0;
1099 if (__kmp_affinity_verbose) {
1100 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
1103 __kmp_free(threadInfo);
1104 KMP_CPU_FREE(oldMask);
1111 qsort(threadInfo, nApics,
sizeof(*threadInfo),
1112 __kmp_affinity_cmp_apicThreadInfo_phys_id);
1131 __kmp_nThreadsPerCore = 1;
1132 unsigned nCores = 1;
1135 unsigned lastPkgId = threadInfo[0].pkgId;
1136 unsigned coreCt = 1;
1137 unsigned lastCoreId = threadInfo[0].coreId;
1138 unsigned threadCt = 1;
1139 unsigned lastThreadId = threadInfo[0].threadId;
1142 unsigned prevMaxCoresPerPkg = threadInfo[0].maxCoresPerPkg;
1143 unsigned prevMaxThreadsPerPkg = threadInfo[0].maxThreadsPerPkg;
1145 for (i = 1; i < nApics; i++) {
1146 if (threadInfo[i].pkgId != lastPkgId) {
1149 lastPkgId = threadInfo[i].pkgId;
1150 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1152 lastCoreId = threadInfo[i].coreId;
1153 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1155 lastThreadId = threadInfo[i].threadId;
1162 prevMaxCoresPerPkg = threadInfo[i].maxCoresPerPkg;
1163 prevMaxThreadsPerPkg = threadInfo[i].maxThreadsPerPkg;
1167 if (threadInfo[i].coreId != lastCoreId) {
1170 lastCoreId = threadInfo[i].coreId;
1171 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1173 lastThreadId = threadInfo[i].threadId;
1175 else if (threadInfo[i].threadId != lastThreadId) {
1177 lastThreadId = threadInfo[i].threadId;
1180 __kmp_free(threadInfo);
1181 KMP_CPU_FREE(oldMask);
1182 *msg_id = kmp_i18n_str_LegacyApicIDsNotUnique;
1190 if ((prevMaxCoresPerPkg != threadInfo[i].maxCoresPerPkg)
1191 || (prevMaxThreadsPerPkg != threadInfo[i].maxThreadsPerPkg)) {
1192 __kmp_free(threadInfo);
1193 KMP_CPU_FREE(oldMask);
1194 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1199 if ((
int)coreCt > nCoresPerPkg) nCoresPerPkg = coreCt;
1200 if ((
int)threadCt > __kmp_nThreadsPerCore) __kmp_nThreadsPerCore = threadCt;
1208 __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
1209 __kmp_ncores = nCores;
1210 if (__kmp_affinity_verbose) {
1211 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1212 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1214 KMP_INFORM(AffUseGlobCpuid,
"KMP_AFFINITY");
1215 if (__kmp_affinity_respect_mask) {
1216 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1218 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1220 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1221 if (__kmp_affinity_uniform_topology()) {
1222 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1224 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1226 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1227 __kmp_nThreadsPerCore, __kmp_ncores);
1231 if (__kmp_affinity_type == affinity_none) {
1232 __kmp_free(threadInfo);
1233 KMP_CPU_FREE(oldMask);
1243 int coreLevel = (nCoresPerPkg <= 1) ? -1 : 1;
1244 int threadLevel = (__kmp_nThreadsPerCore <= 1) ? -1 : ((coreLevel >= 0) ? 2 : 1);
1245 unsigned depth = (pkgLevel >= 0) + (coreLevel >= 0) + (threadLevel >= 0);
1247 KMP_ASSERT(depth > 0);
1248 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair) * nApics);
1250 for (i = 0; i < nApics; ++i) {
1251 Address addr(depth);
1252 unsigned os = threadInfo[i].osId;
1255 if (pkgLevel >= 0) {
1256 addr.labels[d++] = threadInfo[i].pkgId;
1258 if (coreLevel >= 0) {
1259 addr.labels[d++] = threadInfo[i].coreId;
1261 if (threadLevel >= 0) {
1262 addr.labels[d++] = threadInfo[i].threadId;
1264 (*address2os)[i] = AddrUnsPair(addr, os);
1267 if (__kmp_affinity_gran_levels < 0) {
1272 __kmp_affinity_gran_levels = 0;
1273 if ((threadLevel >= 0)
1274 && (__kmp_affinity_gran > affinity_gran_thread)) {
1275 __kmp_affinity_gran_levels++;
1277 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1278 __kmp_affinity_gran_levels++;
1280 if ((pkgLevel >= 0) && (__kmp_affinity_gran > affinity_gran_package)) {
1281 __kmp_affinity_gran_levels++;
1285 if (__kmp_affinity_verbose) {
1286 __kmp_affinity_print_topology(*address2os, nApics, depth, pkgLevel,
1287 coreLevel, threadLevel);
1290 __kmp_free(threadInfo);
1291 KMP_CPU_FREE(oldMask);
1302 __kmp_affinity_create_x2apicid_map(AddrUnsPair **address2os,
1303 kmp_i18n_id_t *
const msg_id)
1308 *msg_id = kmp_i18n_null;
1313 __kmp_x86_cpuid(0, 0, &buf);
1315 *msg_id = kmp_i18n_str_NoLeaf11Support;
1318 __kmp_x86_cpuid(11, 0, &buf);
1320 *msg_id = kmp_i18n_str_NoLeaf11Support;
1331 int threadLevel = -1;
1334 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
1336 for (level = 0;; level++) {
1349 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1352 __kmp_x86_cpuid(11, level, &buf);
1363 int kind = (buf.ecx >> 8) & 0xff;
1368 threadLevel = level;
1371 __kmp_nThreadsPerCore = buf.ebx & 0xff;
1372 if (__kmp_nThreadsPerCore == 0) {
1373 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1377 else if (kind == 2) {
1383 nCoresPerPkg = buf.ebx & 0xff;
1384 if (nCoresPerPkg == 0) {
1385 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1391 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1394 if (pkgLevel >= 0) {
1398 nPackages = buf.ebx & 0xff;
1399 if (nPackages == 0) {
1400 *msg_id = kmp_i18n_str_InvalidCpuidInfo;
1413 if (threadLevel >= 0) {
1414 threadLevel = depth - threadLevel - 1;
1416 if (coreLevel >= 0) {
1417 coreLevel = depth - coreLevel - 1;
1419 KMP_DEBUG_ASSERT(pkgLevel >= 0);
1420 pkgLevel = depth - pkgLevel - 1;
1429 if (! KMP_AFFINITY_CAPABLE())
1435 KMP_ASSERT(__kmp_affinity_type == affinity_none);
1437 __kmp_ncores = __kmp_xproc / __kmp_nThreadsPerCore;
1438 nPackages = (__kmp_xproc + nCoresPerPkg - 1) / nCoresPerPkg;
1439 __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
1440 if (__kmp_affinity_verbose) {
1441 KMP_INFORM(AffNotCapableUseLocCpuidL11,
"KMP_AFFINITY");
1442 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1443 if (__kmp_affinity_uniform_topology()) {
1444 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1446 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1448 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1449 __kmp_nThreadsPerCore, __kmp_ncores);
1464 kmp_affin_mask_t *oldMask;
1465 KMP_CPU_ALLOC(oldMask);
1466 __kmp_get_system_affinity(oldMask, TRUE);
1471 AddrUnsPair *retval = (AddrUnsPair *)
1472 __kmp_allocate(
sizeof(AddrUnsPair) * __kmp_avail_proc);
1480 for (proc = 0; proc < KMP_CPU_SETSIZE; ++proc) {
1484 if (! KMP_CPU_ISSET(proc, fullMask)) {
1487 KMP_DEBUG_ASSERT(nApics < __kmp_avail_proc);
1489 __kmp_affinity_bind_thread(proc);
1495 Address addr(depth);
1498 for (level = 0; level < depth; level++) {
1499 __kmp_x86_cpuid(11, level, &buf);
1500 unsigned apicId = buf.edx;
1502 if (level != depth - 1) {
1503 KMP_CPU_FREE(oldMask);
1504 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1507 addr.labels[depth - level - 1] = apicId >> prev_shift;
1511 int shift = buf.eax & 0x1f;
1512 int mask = (1 << shift) - 1;
1513 addr.labels[depth - level - 1] = (apicId & mask) >> prev_shift;
1516 if (level != depth) {
1517 KMP_CPU_FREE(oldMask);
1518 *msg_id = kmp_i18n_str_InconsistentCpuidInfo;
1522 retval[nApics] = AddrUnsPair(addr, proc);
1530 __kmp_set_system_affinity(oldMask, TRUE);
1535 KMP_ASSERT(nApics > 0);
1537 __kmp_ncores = nPackages = 1;
1538 __kmp_nThreadsPerCore = nCoresPerPkg = 1;
1539 __kmp_ht_enabled = FALSE;
1540 if (__kmp_affinity_verbose) {
1541 char buf[KMP_AFFIN_MASK_PRINT_LEN];
1542 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1544 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1545 if (__kmp_affinity_respect_mask) {
1546 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
1548 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
1550 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1551 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1552 KMP_INFORM(Topology,
"KMP_AFFINITY", nPackages, nCoresPerPkg,
1553 __kmp_nThreadsPerCore, __kmp_ncores);
1556 if (__kmp_affinity_type == affinity_none) {
1558 KMP_CPU_FREE(oldMask);
1566 addr.labels[0] = retval[0].first.labels[pkgLevel];
1567 retval[0].first = addr;
1569 if (__kmp_affinity_gran_levels < 0) {
1570 __kmp_affinity_gran_levels = 0;
1573 if (__kmp_affinity_verbose) {
1574 __kmp_affinity_print_topology(retval, 1, 1, 0, -1, -1);
1577 *address2os = retval;
1578 KMP_CPU_FREE(oldMask);
1585 qsort(retval, nApics,
sizeof(*retval), __kmp_affinity_cmp_Address_labels);
1590 unsigned *totals = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1591 unsigned *counts = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1592 unsigned *maxCt = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1593 unsigned *last = (
unsigned *)__kmp_allocate(depth *
sizeof(
unsigned));
1594 for (level = 0; level < depth; level++) {
1598 last[level] = retval[0].first.labels[level];
1607 for (proc = 1; (int)proc < nApics; proc++) {
1609 for (level = 0; level < depth; level++) {
1610 if (retval[proc].first.labels[level] != last[level]) {
1612 for (j = level + 1; j < depth; j++) {
1622 last[j] = retval[proc].first.labels[j];
1626 if (counts[level] > maxCt[level]) {
1627 maxCt[level] = counts[level];
1629 last[level] = retval[proc].first.labels[level];
1632 else if (level == depth - 1) {
1638 KMP_CPU_FREE(oldMask);
1639 *msg_id = kmp_i18n_str_x2ApicIDsNotUnique;
1651 if (threadLevel >= 0) {
1652 __kmp_nThreadsPerCore = maxCt[threadLevel];
1655 __kmp_nThreadsPerCore = 1;
1657 __kmp_ht_enabled = (__kmp_nThreadsPerCore > 1);
1659 nPackages = totals[pkgLevel];
1661 if (coreLevel >= 0) {
1662 __kmp_ncores = totals[coreLevel];
1663 nCoresPerPkg = maxCt[coreLevel];
1666 __kmp_ncores = nPackages;
1673 unsigned prod = maxCt[0];
1674 for (level = 1; level < depth; level++) {
1675 prod *= maxCt[level];
1677 bool uniform = (prod == totals[level - 1]);
1682 if (__kmp_affinity_verbose) {
1683 char mask[KMP_AFFIN_MASK_PRINT_LEN];
1684 __kmp_affinity_print_mask(mask, KMP_AFFIN_MASK_PRINT_LEN, oldMask);
1686 KMP_INFORM(AffUseGlobCpuidL11,
"KMP_AFFINITY");
1687 if (__kmp_affinity_respect_mask) {
1688 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", mask);
1690 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", mask);
1692 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
1694 KMP_INFORM(Uniform,
"KMP_AFFINITY");
1696 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
1700 __kmp_str_buf_init(&buf);
1702 __kmp_str_buf_print(&buf,
"%d", totals[0]);
1703 for (level = 1; level <= pkgLevel; level++) {
1704 __kmp_str_buf_print(&buf,
" x %d", maxCt[level]);
1706 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, nCoresPerPkg,
1707 __kmp_nThreadsPerCore, __kmp_ncores);
1709 __kmp_str_buf_free(&buf);
1712 if (__kmp_affinity_type == affinity_none) {
1718 KMP_CPU_FREE(oldMask);
1727 for (level = 0; level < depth; level++) {
1728 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1738 if (new_depth != depth) {
1739 AddrUnsPair *new_retval = (AddrUnsPair *)__kmp_allocate(
1740 sizeof(AddrUnsPair) * nApics);
1741 for (proc = 0; (int)proc < nApics; proc++) {
1742 Address addr(new_depth);
1743 new_retval[proc] = AddrUnsPair(addr, retval[proc].second);
1746 for (level = 0; level < depth; level++) {
1747 if ((maxCt[level] == 1) && (level != pkgLevel)) {
1748 if (level == threadLevel) {
1751 else if ((threadLevel >= 0) && (level < threadLevel)) {
1754 if (level == coreLevel) {
1757 else if ((coreLevel >= 0) && (level < coreLevel)) {
1760 if (level < pkgLevel) {
1765 for (proc = 0; (int)proc < nApics; proc++) {
1766 new_retval[proc].first.labels[new_level]
1767 = retval[proc].first.labels[level];
1773 retval = new_retval;
1777 if (__kmp_affinity_gran_levels < 0) {
1782 __kmp_affinity_gran_levels = 0;
1783 if ((threadLevel >= 0) && (__kmp_affinity_gran > affinity_gran_thread)) {
1784 __kmp_affinity_gran_levels++;
1786 if ((coreLevel >= 0) && (__kmp_affinity_gran > affinity_gran_core)) {
1787 __kmp_affinity_gran_levels++;
1789 if (__kmp_affinity_gran > affinity_gran_package) {
1790 __kmp_affinity_gran_levels++;
1794 if (__kmp_affinity_verbose) {
1795 __kmp_affinity_print_topology(retval, nApics, depth, pkgLevel,
1796 coreLevel, threadLevel);
1803 KMP_CPU_FREE(oldMask);
1804 *address2os = retval;
1813 #define threadIdIndex 1
1814 #define coreIdIndex 2
1815 #define pkgIdIndex 3
1816 #define nodeIdIndex 4
1818 typedef unsigned *ProcCpuInfo;
1819 static unsigned maxIndex = pkgIdIndex;
1823 __kmp_affinity_cmp_ProcCpuInfo_os_id(
const void *a,
const void *b)
1825 const unsigned *aa = (
const unsigned *)a;
1826 const unsigned *bb = (
const unsigned *)b;
1827 if (aa[osIdIndex] < bb[osIdIndex])
return -1;
1828 if (aa[osIdIndex] > bb[osIdIndex])
return 1;
1834 __kmp_affinity_cmp_ProcCpuInfo_phys_id(
const void *a,
const void *b)
1837 const unsigned *aa = *((
const unsigned **)a);
1838 const unsigned *bb = *((
const unsigned **)b);
1839 for (i = maxIndex; ; i--) {
1840 if (aa[i] < bb[i])
return -1;
1841 if (aa[i] > bb[i])
return 1;
1842 if (i == osIdIndex)
break;
1853 __kmp_affinity_create_cpuinfo_map(AddrUnsPair **address2os,
int *line,
1854 kmp_i18n_id_t *
const msg_id, FILE *f)
1857 *msg_id = kmp_i18n_null;
1864 unsigned num_records = 0;
1866 buf[
sizeof(buf) - 1] = 1;
1867 if (! fgets(buf,
sizeof(buf), f)) {
1874 char s1[] =
"processor";
1875 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
1884 if (sscanf(buf,
"node_%d id", &level) == 1) {
1885 if (nodeIdIndex + level >= maxIndex) {
1886 maxIndex = nodeIdIndex + level;
1897 if (num_records == 0) {
1899 *msg_id = kmp_i18n_str_NoProcRecords;
1902 if (num_records > (
unsigned)__kmp_xproc) {
1904 *msg_id = kmp_i18n_str_TooManyProcRecords;
1915 if (fseek(f, 0, SEEK_SET) != 0) {
1917 *msg_id = kmp_i18n_str_CantRewindCpuinfo;
1925 unsigned **threadInfo = (
unsigned **)__kmp_allocate((num_records + 1)
1926 *
sizeof(
unsigned *));
1928 for (i = 0; i <= num_records; i++) {
1929 threadInfo[i] = (
unsigned *)__kmp_allocate((maxIndex + 1)
1930 *
sizeof(unsigned));
1933 #define CLEANUP_THREAD_INFO \
1934 for (i = 0; i <= num_records; i++) { \
1935 __kmp_free(threadInfo[i]); \
1937 __kmp_free(threadInfo);
1944 #define INIT_PROC_INFO(p) \
1945 for (__index = 0; __index <= maxIndex; __index++) { \
1946 (p)[__index] = UINT_MAX; \
1949 for (i = 0; i <= num_records; i++) {
1950 INIT_PROC_INFO(threadInfo[i]);
1953 unsigned num_avail = 0;
1963 buf[
sizeof(buf) - 1] = 1;
1964 bool long_line =
false;
1965 if (! fgets(buf,
sizeof(buf), f)) {
1973 for (i = 0; i <= maxIndex; i++) {
1974 if (threadInfo[num_avail][i] != UINT_MAX) {
1982 }
else if (!buf[
sizeof(buf) - 1]) {
1989 #define CHECK_LINE \
1991 CLEANUP_THREAD_INFO; \
1992 *msg_id = kmp_i18n_str_LongLineCpuinfo; \
1998 char s1[] =
"processor";
1999 if (strncmp(buf, s1,
sizeof(s1) - 1) == 0) {
2001 char *p = strchr(buf +
sizeof(s1) - 1,
':');
2003 if ((p == NULL) || (sscanf(p + 1,
"%u\n", &val) != 1))
goto no_val;
2004 if (threadInfo[num_avail][osIdIndex] != UINT_MAX)
goto dup_field;
2005 threadInfo[num_avail][osIdIndex] = val;
2006 #if KMP_OS_LINUX && USE_SYSFS_INFO
2008 snprintf(path,
sizeof(path),
2009 "/sys/devices/system/cpu/cpu%u/topology/physical_package_id",
2010 threadInfo[num_avail][osIdIndex]);
2011 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][pkgIdIndex]);
2013 snprintf(path,
sizeof(path),
2014 "/sys/devices/system/cpu/cpu%u/topology/core_id",
2015 threadInfo[num_avail][osIdIndex]);
2016 __kmp_read_from_file(path,
"%u", &threadInfo[num_avail][coreIdIndex]);
2020 char s2[] =
"physical id";
2021 if (strncmp(buf, s2,
sizeof(s2) - 1) == 0) {
2023 char *p = strchr(buf +
sizeof(s2) - 1,
':');
2025 if ((p == NULL) || (sscanf(p + 1,
"%u\n", &val) != 1))
goto no_val;
2026 if (threadInfo[num_avail][pkgIdIndex] != UINT_MAX)
goto dup_field;
2027 threadInfo[num_avail][pkgIdIndex] = val;
2030 char s3[] =
"core id";
2031 if (strncmp(buf, s3,
sizeof(s3) - 1) == 0) {
2033 char *p = strchr(buf +
sizeof(s3) - 1,
':');
2035 if ((p == NULL) || (sscanf(p + 1,
"%u\n", &val) != 1))
goto no_val;
2036 if (threadInfo[num_avail][coreIdIndex] != UINT_MAX)
goto dup_field;
2037 threadInfo[num_avail][coreIdIndex] = val;
2039 #endif // KMP_OS_LINUX && USE_SYSFS_INFO
2041 char s4[] =
"thread id";
2042 if (strncmp(buf, s4,
sizeof(s4) - 1) == 0) {
2044 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2046 if ((p == NULL) || (sscanf(p + 1,
"%u\n", &val) != 1))
goto no_val;
2047 if (threadInfo[num_avail][threadIdIndex] != UINT_MAX)
goto dup_field;
2048 threadInfo[num_avail][threadIdIndex] = val;
2052 if (sscanf(buf,
"node_%d id", &level) == 1) {
2054 char *p = strchr(buf +
sizeof(s4) - 1,
':');
2056 if ((p == NULL) || (sscanf(p + 1,
"%u\n", &val) != 1))
goto no_val;
2057 KMP_ASSERT(nodeIdIndex + level <= maxIndex);
2058 if (threadInfo[num_avail][nodeIdIndex + level] != UINT_MAX)
goto dup_field;
2059 threadInfo[num_avail][nodeIdIndex + level] = val;
2068 if ((*buf != 0) && (*buf !=
'\n')) {
2075 while (((ch = fgetc(f)) != EOF) && (ch !=
'\n'));
2084 if ((
int)num_avail == __kmp_xproc) {
2085 CLEANUP_THREAD_INFO;
2086 *msg_id = kmp_i18n_str_TooManyEntries;
2094 if (threadInfo[num_avail][osIdIndex] == UINT_MAX) {
2095 CLEANUP_THREAD_INFO;
2096 *msg_id = kmp_i18n_str_MissingProcField;
2099 if (threadInfo[0][pkgIdIndex] == UINT_MAX) {
2100 CLEANUP_THREAD_INFO;
2101 *msg_id = kmp_i18n_str_MissingPhysicalIDField;
2108 if (! KMP_CPU_ISSET(threadInfo[num_avail][osIdIndex], fullMask)) {
2109 INIT_PROC_INFO(threadInfo[num_avail]);
2118 KMP_ASSERT(num_avail <= num_records);
2119 INIT_PROC_INFO(threadInfo[num_avail]);
2124 CLEANUP_THREAD_INFO;
2125 *msg_id = kmp_i18n_str_MissingValCpuinfo;
2129 CLEANUP_THREAD_INFO;
2130 *msg_id = kmp_i18n_str_DuplicateFieldCpuinfo;
2135 # if KMP_MIC && REDUCE_TEAM_SIZE
2136 unsigned teamSize = 0;
2137 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2151 KMP_ASSERT(num_avail > 0);
2152 KMP_ASSERT(num_avail <= num_records);
2153 if (num_avail == 1) {
2155 __kmp_nThreadsPerCore = nCoresPerPkg = nPackages = 1;
2156 __kmp_ht_enabled = FALSE;
2157 if (__kmp_affinity_verbose) {
2158 if (! KMP_AFFINITY_CAPABLE()) {
2159 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2160 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2161 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2164 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2165 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
2167 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2168 if (__kmp_affinity_respect_mask) {
2169 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2171 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2173 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2174 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2178 __kmp_str_buf_init(&buf);
2179 __kmp_str_buf_print(&buf,
"1");
2180 for (index = maxIndex - 1; index > pkgIdIndex; index--) {
2181 __kmp_str_buf_print(&buf,
" x 1");
2183 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, 1, 1, 1);
2184 __kmp_str_buf_free(&buf);
2187 if (__kmp_affinity_type == affinity_none) {
2188 CLEANUP_THREAD_INFO;
2192 *address2os = (AddrUnsPair*)__kmp_allocate(
sizeof(AddrUnsPair));
2194 addr.labels[0] = threadInfo[0][pkgIdIndex];
2195 (*address2os)[0] = AddrUnsPair(addr, threadInfo[0][osIdIndex]);
2197 if (__kmp_affinity_gran_levels < 0) {
2198 __kmp_affinity_gran_levels = 0;
2201 if (__kmp_affinity_verbose) {
2202 __kmp_affinity_print_topology(*address2os, 1, 1, 0, -1, -1);
2205 CLEANUP_THREAD_INFO;
2212 qsort(threadInfo, num_avail,
sizeof(*threadInfo),
2213 __kmp_affinity_cmp_ProcCpuInfo_phys_id);
2226 unsigned *counts = (
unsigned *)__kmp_allocate((maxIndex + 1)
2227 *
sizeof(unsigned));
2228 unsigned *maxCt = (
unsigned *)__kmp_allocate((maxIndex + 1)
2229 *
sizeof(unsigned));
2230 unsigned *totals = (
unsigned *)__kmp_allocate((maxIndex + 1)
2231 *
sizeof(unsigned));
2232 unsigned *lastId = (
unsigned *)__kmp_allocate((maxIndex + 1)
2233 *
sizeof(unsigned));
2235 bool assign_thread_ids =
false;
2236 unsigned threadIdCt;
2239 restart_radix_check:
2245 if (assign_thread_ids) {
2246 if (threadInfo[0][threadIdIndex] == UINT_MAX) {
2247 threadInfo[0][threadIdIndex] = threadIdCt++;
2249 else if (threadIdCt <= threadInfo[0][threadIdIndex]) {
2250 threadIdCt = threadInfo[0][threadIdIndex] + 1;
2253 for (index = 0; index <= maxIndex; index++) {
2257 lastId[index] = threadInfo[0][index];;
2263 for (i = 1; i < num_avail; i++) {
2268 for (index = maxIndex; index >= threadIdIndex; index--) {
2269 if (assign_thread_ids && (index == threadIdIndex)) {
2273 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2274 threadInfo[i][threadIdIndex] = threadIdCt++;
2282 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2283 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2286 if (threadInfo[i][index] != lastId[index]) {
2295 for (index2 = threadIdIndex; index2 < index; index2++) {
2297 if (counts[index2] > maxCt[index2]) {
2298 maxCt[index2] = counts[index2];
2301 lastId[index2] = threadInfo[i][index2];
2305 lastId[index] = threadInfo[i][index];
2307 if (assign_thread_ids && (index > threadIdIndex)) {
2309 # if KMP_MIC && REDUCE_TEAM_SIZE
2314 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2315 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2325 if (threadInfo[i][threadIdIndex] == UINT_MAX) {
2326 threadInfo[i][threadIdIndex] = threadIdCt++;
2334 else if (threadIdCt <= threadInfo[i][threadIdIndex]) {
2335 threadIdCt = threadInfo[i][threadIdIndex] + 1;
2341 if (index < threadIdIndex) {
2347 if ((threadInfo[i][threadIdIndex] != UINT_MAX)
2348 || assign_thread_ids) {
2353 CLEANUP_THREAD_INFO;
2354 *msg_id = kmp_i18n_str_PhysicalIDsNotUnique;
2363 assign_thread_ids =
true;
2364 goto restart_radix_check;
2368 # if KMP_MIC && REDUCE_TEAM_SIZE
2373 teamSize += ( threadIdCt <= 2 ) ? ( threadIdCt ) : ( threadIdCt - 1 );
2374 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2376 for (index = threadIdIndex; index <= maxIndex; index++) {
2377 if (counts[index] > maxCt[index]) {
2378 maxCt[index] = counts[index];
2382 __kmp_nThreadsPerCore = maxCt[threadIdIndex];
2383 nCoresPerPkg = maxCt[coreIdIndex];
2384 nPackages = totals[pkgIdIndex];
2389 unsigned prod = totals[maxIndex];
2390 for (index = threadIdIndex; index < maxIndex; index++) {
2391 prod *= maxCt[index];
2393 bool uniform = (prod == totals[threadIdIndex]);
2401 __kmp_ht_enabled = (maxCt[threadIdIndex] > 1);
2402 __kmp_ncores = totals[coreIdIndex];
2404 if (__kmp_affinity_verbose) {
2405 if (! KMP_AFFINITY_CAPABLE()) {
2406 KMP_INFORM(AffNotCapableUseCpuinfo,
"KMP_AFFINITY");
2407 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2409 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2411 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2415 char buf[KMP_AFFIN_MASK_PRINT_LEN];
2416 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, fullMask);
2417 KMP_INFORM(AffCapableUseCpuinfo,
"KMP_AFFINITY");
2418 if (__kmp_affinity_respect_mask) {
2419 KMP_INFORM(InitOSProcSetRespect,
"KMP_AFFINITY", buf);
2421 KMP_INFORM(InitOSProcSetNotRespect,
"KMP_AFFINITY", buf);
2423 KMP_INFORM(AvailableOSProc,
"KMP_AFFINITY", __kmp_avail_proc);
2425 KMP_INFORM(Uniform,
"KMP_AFFINITY");
2427 KMP_INFORM(NonUniform,
"KMP_AFFINITY");
2431 __kmp_str_buf_init(&buf);
2433 __kmp_str_buf_print(&buf,
"%d", totals[maxIndex]);
2434 for (index = maxIndex - 1; index >= pkgIdIndex; index--) {
2435 __kmp_str_buf_print(&buf,
" x %d", maxCt[index]);
2437 KMP_INFORM(TopologyExtra,
"KMP_AFFINITY", buf.str, maxCt[coreIdIndex],
2438 maxCt[threadIdIndex], __kmp_ncores);
2440 __kmp_str_buf_free(&buf);
2443 # if KMP_MIC && REDUCE_TEAM_SIZE
2447 if ((__kmp_dflt_team_nth == 0) && (teamSize > 0)) {
2448 __kmp_dflt_team_nth = teamSize;
2449 KA_TRACE(20, (
"__kmp_affinity_create_cpuinfo_map: setting __kmp_dflt_team_nth = %d\n",
2450 __kmp_dflt_team_nth));
2452 # endif // KMP_MIC && REDUCE_TEAM_SIZE
2454 if (__kmp_affinity_type == affinity_none) {
2459 CLEANUP_THREAD_INFO;
2470 bool *inMap = (
bool *)__kmp_allocate((maxIndex + 1) *
sizeof(bool));
2472 for (index = threadIdIndex; index < maxIndex; index++) {
2473 KMP_ASSERT(totals[index] >= totals[index + 1]);
2474 inMap[index] = (totals[index] > totals[index + 1]);
2476 inMap[maxIndex] = (totals[maxIndex] > 1);
2477 inMap[pkgIdIndex] =
true;
2480 for (index = threadIdIndex; index <= maxIndex; index++) {
2485 KMP_ASSERT(depth > 0);
2490 *address2os = (AddrUnsPair*)
2491 __kmp_allocate(
sizeof(AddrUnsPair) * num_avail);
2494 int threadLevel = -1;
2496 for (i = 0; i < num_avail; ++i) {
2497 Address addr(depth);
2498 unsigned os = threadInfo[i][osIdIndex];
2502 for (src_index = maxIndex; src_index >= threadIdIndex; src_index--) {
2503 if (! inMap[src_index]) {
2506 addr.labels[dst_index] = threadInfo[i][src_index];
2507 if (src_index == pkgIdIndex) {
2508 pkgLevel = dst_index;
2510 else if (src_index == coreIdIndex) {
2511 coreLevel = dst_index;
2513 else if (src_index == threadIdIndex) {
2514 threadLevel = dst_index;
2518 (*address2os)[i] = AddrUnsPair(addr, os);
2521 if (__kmp_affinity_gran_levels < 0) {
2527 __kmp_affinity_gran_levels = 0;
2528 for (src_index = threadIdIndex; src_index <= maxIndex; src_index++) {
2529 if (! inMap[src_index]) {
2532 switch (src_index) {
2534 if (__kmp_affinity_gran > affinity_gran_thread) {
2535 __kmp_affinity_gran_levels++;
2540 if (__kmp_affinity_gran > affinity_gran_core) {
2541 __kmp_affinity_gran_levels++;
2546 if (__kmp_affinity_gran > affinity_gran_package) {
2547 __kmp_affinity_gran_levels++;
2554 if (__kmp_affinity_verbose) {
2555 __kmp_affinity_print_topology(*address2os, num_avail, depth, pkgLevel,
2556 coreLevel, threadLevel);
2564 CLEANUP_THREAD_INFO;
2574 static kmp_affin_mask_t *
2575 __kmp_create_masks(
unsigned *maxIndex,
unsigned *numUnique,
2576 AddrUnsPair *address2os,
unsigned numAddrs)
2585 KMP_ASSERT(numAddrs > 0);
2586 depth = address2os[0].first.depth;
2589 for (i = 0; i < numAddrs; i++) {
2590 unsigned osId = address2os[i].second;
2591 if (osId > maxOsId) {
2595 kmp_affin_mask_t *osId2Mask = (kmp_affin_mask_t *)__kmp_allocate(
2596 (maxOsId + 1) * __kmp_affin_mask_size);
2603 qsort(address2os, numAddrs,
sizeof(*address2os),
2604 __kmp_affinity_cmp_Address_labels);
2606 KMP_ASSERT(__kmp_affinity_gran_levels >= 0);
2607 if (__kmp_affinity_verbose && (__kmp_affinity_gran_levels > 0)) {
2608 KMP_INFORM(ThreadsMigrate,
"KMP_AFFINITY", __kmp_affinity_gran_levels);
2610 if (__kmp_affinity_gran_levels >= (
int)depth) {
2611 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2612 && (__kmp_affinity_type != affinity_none))) {
2613 KMP_WARNING(AffThreadsMayMigrate);
2623 unsigned unique = 0;
2625 unsigned leader = 0;
2626 Address *leaderAddr = &(address2os[0].first);
2627 kmp_affin_mask_t *sum
2628 = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
2630 KMP_CPU_SET(address2os[0].second, sum);
2631 for (i = 1; i < numAddrs; i++) {
2637 if (leaderAddr->isClose(address2os[i].first,
2638 __kmp_affinity_gran_levels)) {
2639 KMP_CPU_SET(address2os[i].second, sum);
2648 for (; j < i; j++) {
2649 unsigned osId = address2os[j].second;
2650 KMP_DEBUG_ASSERT(osId <= maxOsId);
2651 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2652 KMP_CPU_COPY(mask, sum);
2653 address2os[j].first.leader = (j == leader);
2661 leaderAddr = &(address2os[i].first);
2663 KMP_CPU_SET(address2os[i].second, sum);
2670 for (; j < i; j++) {
2671 unsigned osId = address2os[j].second;
2672 KMP_DEBUG_ASSERT(osId <= maxOsId);
2673 kmp_affin_mask_t *mask = KMP_CPU_INDEX(osId2Mask, osId);
2674 KMP_CPU_COPY(mask, sum);
2675 address2os[j].first.leader = (j == leader);
2679 *maxIndex = maxOsId;
2680 *numUnique = unique;
2690 static kmp_affin_mask_t *newMasks;
2691 static int numNewMasks;
2692 static int nextNewMask;
2694 #define ADD_MASK(_mask) \
2696 if (nextNewMask >= numNewMasks) { \
2698 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_REALLOC(newMasks, \
2699 numNewMasks * __kmp_affin_mask_size); \
2701 KMP_CPU_COPY(KMP_CPU_INDEX(newMasks, nextNewMask), (_mask)); \
2705 #define ADD_MASK_OSID(_osId,_osId2Mask,_maxOsId) \
2707 if (((_osId) > _maxOsId) || \
2708 (! KMP_CPU_ISSET((_osId), KMP_CPU_INDEX((_osId2Mask), (_osId))))) { \
2709 if (__kmp_affinity_verbose || (__kmp_affinity_warnings \
2710 && (__kmp_affinity_type != affinity_none))) { \
2711 KMP_WARNING(AffIgnoreInvalidProcID, _osId); \
2715 ADD_MASK(KMP_CPU_INDEX(_osId2Mask, (_osId))); \
2725 __kmp_affinity_process_proclist(kmp_affin_mask_t **out_masks,
2726 unsigned int *out_numMasks,
const char *proclist,
2727 kmp_affin_mask_t *osId2Mask,
int maxOsId)
2729 const char *scan = proclist;
2730 const char *next = proclist;
2737 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
2738 * __kmp_affin_mask_size);
2740 kmp_affin_mask_t *sumMask = (kmp_affin_mask_t *)__kmp_allocate(
2741 __kmp_affin_mask_size);
2745 int start, end, stride;
2749 if (*next ==
'\0') {
2763 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2766 num = __kmp_str_to_int(scan, *next);
2767 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2772 if ((num > maxOsId) ||
2773 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2774 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2775 && (__kmp_affinity_type != affinity_none))) {
2776 KMP_WARNING(AffIgnoreInvalidProcID, num);
2778 KMP_CPU_ZERO(sumMask);
2781 KMP_CPU_COPY(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2807 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2808 "bad explicit proc list");
2811 num = __kmp_str_to_int(scan, *next);
2812 KMP_ASSERT2(num >= 0,
"bad explicit proc list");
2817 if ((num > maxOsId) ||
2818 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
2819 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
2820 && (__kmp_affinity_type != affinity_none))) {
2821 KMP_WARNING(AffIgnoreInvalidProcID, num);
2825 KMP_CPU_UNION(sumMask, KMP_CPU_INDEX(osId2Mask, num));
2844 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2846 start = __kmp_str_to_int(scan, *next);
2847 KMP_ASSERT2(start >= 0,
"bad explicit proc list");
2854 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2872 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
"bad explicit proc list");
2874 end = __kmp_str_to_int(scan, *next);
2875 KMP_ASSERT2(end >= 0,
"bad explicit proc list");
2896 KMP_ASSERT2((*next >=
'0') && (*next <=
'9'),
2897 "bad explicit proc list");
2899 stride = __kmp_str_to_int(scan, *next);
2900 KMP_ASSERT2(stride >= 0,
"bad explicit proc list");
2907 KMP_ASSERT2(stride != 0,
"bad explicit proc list");
2909 KMP_ASSERT2(start <= end,
"bad explicit proc list");
2912 KMP_ASSERT2(start >= end,
"bad explicit proc list");
2914 KMP_ASSERT2((end - start) / stride <= 65536,
"bad explicit proc list");
2921 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2923 }
while (start <= end);
2927 ADD_MASK_OSID(start, osId2Mask, maxOsId);
2929 }
while (start >= end);
2942 *out_numMasks = nextNewMask;
2943 if (nextNewMask == 0) {
2945 KMP_INTERNAL_FREE(newMasks);
2949 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
2950 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
2951 __kmp_free(sumMask);
2952 KMP_INTERNAL_FREE(newMasks);
2982 __kmp_process_subplace_list(
const char **scan, kmp_affin_mask_t *osId2Mask,
2983 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
2988 int start, count, stride, i;
2994 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
2995 "bad explicit places list");
2998 start = __kmp_str_to_int(*scan, *next);
2999 KMP_ASSERT(start >= 0);
3006 if (**scan ==
'}' || **scan ==
',') {
3007 if ((start > maxOsId) ||
3008 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3009 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3010 && (__kmp_affinity_type != affinity_none))) {
3011 KMP_WARNING(AffIgnoreInvalidProcID, start);
3015 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3018 if (**scan ==
'}') {
3024 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3031 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3032 "bad explicit places list");
3035 count = __kmp_str_to_int(*scan, *next);
3036 KMP_ASSERT(count >= 0);
3043 if (**scan ==
'}' || **scan ==
',') {
3044 for (i = 0; i < count; i++) {
3045 if ((start > maxOsId) ||
3046 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3047 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3048 && (__kmp_affinity_type != affinity_none))) {
3049 KMP_WARNING(AffIgnoreInvalidProcID, start);
3054 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3059 if (**scan ==
'}') {
3065 KMP_ASSERT2(**scan ==
':',
"bad explicit places list");
3074 if (**scan ==
'+') {
3078 if (**scan ==
'-') {
3086 KMP_ASSERT2((**scan >=
'0') && (**scan <=
'9'),
3087 "bad explicit places list");
3090 stride = __kmp_str_to_int(*scan, *next);
3091 KMP_ASSERT(stride >= 0);
3099 if (**scan ==
'}' || **scan ==
',') {
3100 for (i = 0; i < count; i++) {
3101 if ((start > maxOsId) ||
3102 (! KMP_CPU_ISSET(start, KMP_CPU_INDEX(osId2Mask, start)))) {
3103 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3104 && (__kmp_affinity_type != affinity_none))) {
3105 KMP_WARNING(AffIgnoreInvalidProcID, start);
3110 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, start));
3115 if (**scan ==
'}') {
3122 KMP_ASSERT2(0,
"bad explicit places list");
3128 __kmp_process_place(
const char **scan, kmp_affin_mask_t *osId2Mask,
3129 int maxOsId, kmp_affin_mask_t *tempMask,
int *setSize)
3137 if (**scan ==
'{') {
3139 __kmp_process_subplace_list(scan, osId2Mask, maxOsId , tempMask,
3141 KMP_ASSERT2(**scan ==
'}',
"bad explicit places list");
3144 else if (**scan ==
'!') {
3145 __kmp_process_place(scan, osId2Mask, maxOsId, tempMask, setSize);
3146 KMP_CPU_COMPLEMENT(tempMask);
3149 else if ((**scan >=
'0') && (**scan <=
'9')) {
3152 int num = __kmp_str_to_int(*scan, *next);
3153 KMP_ASSERT(num >= 0);
3154 if ((num > maxOsId) ||
3155 (! KMP_CPU_ISSET(num, KMP_CPU_INDEX(osId2Mask, num)))) {
3156 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3157 && (__kmp_affinity_type != affinity_none))) {
3158 KMP_WARNING(AffIgnoreInvalidProcID, num);
3162 KMP_CPU_UNION(tempMask, KMP_CPU_INDEX(osId2Mask, num));
3168 KMP_ASSERT2(0,
"bad explicit places list");
3175 __kmp_affinity_process_placelist(kmp_affin_mask_t **out_masks,
3176 unsigned int *out_numMasks,
const char *placelist,
3177 kmp_affin_mask_t *osId2Mask,
int maxOsId)
3179 const char *scan = placelist;
3180 const char *next = placelist;
3183 newMasks = (kmp_affin_mask_t *)KMP_INTERNAL_MALLOC(numNewMasks
3184 * __kmp_affin_mask_size);
3187 kmp_affin_mask_t *tempMask = (kmp_affin_mask_t *)__kmp_allocate(
3188 __kmp_affin_mask_size);
3189 KMP_CPU_ZERO(tempMask);
3193 __kmp_process_place(&scan, osId2Mask, maxOsId, tempMask, &setSize);
3199 if (*scan ==
'\0' || *scan ==
',') {
3203 KMP_CPU_ZERO(tempMask);
3205 if (*scan ==
'\0') {
3212 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3219 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3220 "bad explicit places list");
3223 int count = __kmp_str_to_int(scan, *next);
3224 KMP_ASSERT(count >= 0);
3232 if (*scan ==
'\0' || *scan ==
',') {
3236 KMP_ASSERT2(*scan ==
':',
"bad explicit places list");
3257 KMP_ASSERT2((*scan >=
'0') && (*scan <=
'9'),
3258 "bad explicit places list");
3261 stride = __kmp_str_to_int(scan, *next);
3262 KMP_DEBUG_ASSERT(stride >= 0);
3269 for (i = 0; i < count; i++) {
3276 for (j = __kmp_affin_mask_size * CHAR_BIT - 1; j >= stride; j--) {
3277 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3278 KMP_CPU_CLR(j, tempMask);
3280 else if ((j > maxOsId) ||
3281 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3282 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3283 && (__kmp_affinity_type != affinity_none))) {
3284 KMP_WARNING(AffIgnoreInvalidProcID, j);
3286 KMP_CPU_CLR(j, tempMask);
3289 KMP_CPU_SET(j, tempMask);
3293 for (; j >= 0; j--) {
3294 KMP_CPU_CLR(j, tempMask);
3300 for (i = 0; i < count; i++) {
3307 for (j = 0; j < ((int)__kmp_affin_mask_size * CHAR_BIT) + stride;
3309 if (! KMP_CPU_ISSET(j - stride, tempMask)) {
3310 KMP_CPU_CLR(j, tempMask);
3312 else if ((j > maxOsId) ||
3313 (! KMP_CPU_ISSET(j, KMP_CPU_INDEX(osId2Mask, j)))) {
3314 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3315 && (__kmp_affinity_type != affinity_none))) {
3316 KMP_WARNING(AffIgnoreInvalidProcID, j);
3318 KMP_CPU_CLR(j, tempMask);
3321 KMP_CPU_SET(j, tempMask);
3325 for (; j < (int)__kmp_affin_mask_size * CHAR_BIT; j++) {
3326 KMP_CPU_CLR(j, tempMask);
3330 KMP_CPU_ZERO(tempMask);
3337 if (*scan ==
'\0') {
3345 KMP_ASSERT2(0,
"bad explicit places list");
3348 *out_numMasks = nextNewMask;
3349 if (nextNewMask == 0) {
3351 KMP_INTERNAL_FREE(newMasks);
3355 = (kmp_affin_mask_t *)__kmp_allocate(nextNewMask * __kmp_affin_mask_size);
3356 memcpy(*out_masks, newMasks, nextNewMask * __kmp_affin_mask_size);
3357 __kmp_free(tempMask);
3358 KMP_INTERNAL_FREE(newMasks);
3364 #undef ADD_MASK_OSID
3370 __kmp_apply_thread_places(AddrUnsPair **pAddr,
int depth)
3372 if ( __kmp_place_num_cores == 0 ) {
3373 if ( __kmp_place_num_threads_per_core == 0 ) {
3376 __kmp_place_num_cores = nCoresPerPkg;
3378 if ( !__kmp_affinity_uniform_topology() ) {
3379 KMP_WARNING( AffThrPlaceNonUniform );
3383 KMP_WARNING( AffThrPlaceNonThreeLevel );
3386 if ( __kmp_place_num_threads_per_core == 0 ) {
3387 __kmp_place_num_threads_per_core = __kmp_nThreadsPerCore;
3389 if ( __kmp_place_core_offset + __kmp_place_num_cores > nCoresPerPkg ) {
3390 KMP_WARNING( AffThrPlaceManyCores );
3394 AddrUnsPair *newAddr = (AddrUnsPair *)__kmp_allocate(
sizeof(AddrUnsPair) *
3395 nPackages * __kmp_place_num_cores * __kmp_place_num_threads_per_core);
3396 int i, j, k, n_old = 0, n_new = 0;
3397 for ( i = 0; i < nPackages; ++i ) {
3398 for ( j = 0; j < nCoresPerPkg; ++j ) {
3399 if ( j < __kmp_place_core_offset || j >= __kmp_place_core_offset + __kmp_place_num_cores ) {
3400 n_old += __kmp_nThreadsPerCore;
3402 for ( k = 0; k < __kmp_nThreadsPerCore; ++k ) {
3403 if ( k < __kmp_place_num_threads_per_core ) {
3404 newAddr[n_new] = (*pAddr)[n_old];
3412 nCoresPerPkg = __kmp_place_num_cores;
3413 __kmp_nThreadsPerCore = __kmp_place_num_threads_per_core;
3414 __kmp_avail_proc = n_new;
3415 __kmp_ncores = nPackages * __kmp_place_num_cores;
3417 __kmp_free( *pAddr );
3424 static AddrUnsPair *address2os = NULL;
3425 static int * procarr = NULL;
3426 static int __kmp_aff_depth = 0;
3429 __kmp_aux_affinity_initialize(
void)
3431 if (__kmp_affinity_masks != NULL) {
3432 KMP_ASSERT(fullMask != NULL);
3442 if (fullMask == NULL) {
3443 fullMask = (kmp_affin_mask_t *)__kmp_allocate(__kmp_affin_mask_size);
3445 if (KMP_AFFINITY_CAPABLE()) {
3446 if (__kmp_affinity_respect_mask) {
3447 __kmp_get_system_affinity(fullMask, TRUE);
3453 __kmp_avail_proc = 0;
3454 for (i = 0; i < KMP_CPU_SETSIZE; ++i) {
3455 if (! KMP_CPU_ISSET(i, fullMask)) {
3460 if (__kmp_avail_proc > __kmp_xproc) {
3461 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3462 && (__kmp_affinity_type != affinity_none))) {
3463 KMP_WARNING(ErrorInitializeAffinity);
3465 __kmp_affinity_type = affinity_none;
3466 __kmp_affin_mask_size = 0;
3471 __kmp_affinity_entire_machine_mask(fullMask);
3472 __kmp_avail_proc = __kmp_xproc;
3477 kmp_i18n_id_t msg_id = kmp_i18n_null;
3483 if ((__kmp_cpuinfo_file != NULL) &&
3484 (__kmp_affinity_top_method == affinity_top_method_all)) {
3485 __kmp_affinity_top_method = affinity_top_method_cpuinfo;
3488 if (__kmp_affinity_top_method == affinity_top_method_all) {
3494 const char *file_name = NULL;
3497 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3499 if (__kmp_affinity_verbose) {
3500 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(Decodingx2APIC));
3504 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3506 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3507 KMP_ASSERT(address2os == NULL);
3512 if (__kmp_affinity_verbose) {
3513 if (msg_id != kmp_i18n_null) {
3514 KMP_INFORM(AffInfoStrStr,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
3515 KMP_I18N_STR(DecodingLegacyAPIC));
3518 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY", KMP_I18N_STR(DecodingLegacyAPIC));
3523 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3525 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3526 KMP_ASSERT(address2os == NULL);
3536 if (__kmp_affinity_verbose) {
3537 if (msg_id != kmp_i18n_null) {
3538 KMP_INFORM(AffStrParseFilename,
"KMP_AFFINITY", __kmp_i18n_catgets(msg_id),
"/proc/cpuinfo");
3541 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY",
"/proc/cpuinfo");
3545 FILE *f = fopen(
"/proc/cpuinfo",
"r");
3547 msg_id = kmp_i18n_str_CantOpenCpuinfo;
3550 file_name =
"/proc/cpuinfo";
3551 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3554 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3555 KMP_ASSERT(address2os == NULL);
3563 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
3565 if ((depth < 0) && (__kmp_num_proc_groups > 1)) {
3566 if (__kmp_affinity_verbose) {
3567 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3570 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3571 KMP_ASSERT(depth != 0);
3577 if (__kmp_affinity_verbose && (msg_id != kmp_i18n_null)) {
3578 if (file_name == NULL) {
3579 KMP_INFORM(UsingFlatOS, __kmp_i18n_catgets(msg_id));
3581 else if (line == 0) {
3582 KMP_INFORM(UsingFlatOSFile, file_name, __kmp_i18n_catgets(msg_id));
3585 KMP_INFORM(UsingFlatOSFileLine, file_name, line, __kmp_i18n_catgets(msg_id));
3591 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3593 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3594 KMP_ASSERT(address2os == NULL);
3597 KMP_ASSERT(depth > 0);
3598 KMP_ASSERT(address2os != NULL);
3608 # if KMP_ARCH_X86 || KMP_ARCH_X86_64
3610 else if (__kmp_affinity_top_method == affinity_top_method_x2apicid) {
3611 if (__kmp_affinity_verbose) {
3612 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3613 KMP_I18N_STR(Decodingx2APIC));
3616 depth = __kmp_affinity_create_x2apicid_map(&address2os, &msg_id);
3618 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3619 KMP_ASSERT(address2os == NULL);
3623 KMP_ASSERT(msg_id != kmp_i18n_null);
3624 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3627 else if (__kmp_affinity_top_method == affinity_top_method_apicid) {
3628 if (__kmp_affinity_verbose) {
3629 KMP_INFORM(AffInfoStr,
"KMP_AFFINITY",
3630 KMP_I18N_STR(DecodingLegacyAPIC));
3633 depth = __kmp_affinity_create_apicid_map(&address2os, &msg_id);
3635 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3636 KMP_ASSERT(address2os == NULL);
3640 KMP_ASSERT(msg_id != kmp_i18n_null);
3641 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3647 else if (__kmp_affinity_top_method == affinity_top_method_cpuinfo) {
3648 const char *filename;
3649 if (__kmp_cpuinfo_file != NULL) {
3650 filename = __kmp_cpuinfo_file;
3653 filename =
"/proc/cpuinfo";
3656 if (__kmp_affinity_verbose) {
3657 KMP_INFORM(AffParseFilename,
"KMP_AFFINITY", filename);
3660 FILE *f = fopen(filename,
"r");
3663 if (__kmp_cpuinfo_file != NULL) {
3666 KMP_MSG(CantOpenFileForReading, filename),
3668 KMP_HNT(NameComesFrom_CPUINFO_FILE),
3675 KMP_MSG(CantOpenFileForReading, filename),
3682 depth = __kmp_affinity_create_cpuinfo_map(&address2os, &line, &msg_id, f);
3685 KMP_ASSERT(msg_id != kmp_i18n_null);
3687 KMP_FATAL(FileLineMsgExiting, filename, line, __kmp_i18n_catgets(msg_id));
3690 KMP_FATAL(FileMsgExiting, filename, __kmp_i18n_catgets(msg_id));
3693 if (__kmp_affinity_type == affinity_none) {
3694 KMP_ASSERT(depth == 0);
3695 KMP_ASSERT(address2os == NULL);
3700 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
3702 else if (__kmp_affinity_top_method == affinity_top_method_group) {
3703 if (__kmp_affinity_verbose) {
3704 KMP_INFORM(AffWindowsProcGroupMap,
"KMP_AFFINITY");
3707 depth = __kmp_affinity_create_proc_group_map(&address2os, &msg_id);
3708 KMP_ASSERT(depth != 0);
3710 KMP_ASSERT(msg_id != kmp_i18n_null);
3711 KMP_FATAL(MsgExiting, __kmp_i18n_catgets(msg_id));
3717 else if (__kmp_affinity_top_method == affinity_top_method_flat) {
3718 if (__kmp_affinity_verbose) {
3719 KMP_INFORM(AffUsingFlatOS,
"KMP_AFFINITY");
3722 depth = __kmp_affinity_create_flat_map(&address2os, &msg_id);
3724 KMP_ASSERT(__kmp_affinity_type == affinity_none);
3725 KMP_ASSERT(address2os == NULL);
3729 KMP_ASSERT(depth > 0);
3730 KMP_ASSERT(address2os != NULL);
3733 if (address2os == NULL) {
3734 if (KMP_AFFINITY_CAPABLE()
3735 && (__kmp_affinity_verbose || (__kmp_affinity_warnings
3736 && (__kmp_affinity_type != affinity_none)))) {
3737 KMP_WARNING(ErrorInitializeAffinity);
3739 __kmp_affinity_type = affinity_none;
3740 __kmp_affin_mask_size = 0;
3745 __kmp_apply_thread_places(&address2os, depth);
3753 kmp_affin_mask_t *osId2Mask = __kmp_create_masks(&maxIndex, &numUnique,
3754 address2os, __kmp_avail_proc);
3755 if (__kmp_affinity_gran_levels == 0) {
3756 KMP_DEBUG_ASSERT((
int)numUnique == __kmp_avail_proc);
3764 __kmp_affinity_assign_child_nums(address2os, __kmp_avail_proc);
3766 switch (__kmp_affinity_type) {
3768 case affinity_explicit:
3769 KMP_DEBUG_ASSERT(__kmp_affinity_proclist != NULL);
3771 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
3774 __kmp_affinity_process_proclist(&__kmp_affinity_masks,
3775 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3780 __kmp_affinity_process_placelist(&__kmp_affinity_masks,
3781 &__kmp_affinity_num_masks, __kmp_affinity_proclist, osId2Mask,
3785 if (__kmp_affinity_num_masks == 0) {
3786 if (__kmp_affinity_verbose || (__kmp_affinity_warnings
3787 && (__kmp_affinity_type != affinity_none))) {
3788 KMP_WARNING(AffNoValidProcID);
3790 __kmp_affinity_type = affinity_none;
3803 case affinity_logical:
3804 __kmp_affinity_compact = 0;
3805 if (__kmp_affinity_offset) {
3806 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3811 case affinity_physical:
3812 if (__kmp_nThreadsPerCore > 1) {
3813 __kmp_affinity_compact = 1;
3814 if (__kmp_affinity_compact >= depth) {
3815 __kmp_affinity_compact = 0;
3818 __kmp_affinity_compact = 0;
3820 if (__kmp_affinity_offset) {
3821 __kmp_affinity_offset = __kmp_nThreadsPerCore * __kmp_affinity_offset
3826 case affinity_scatter:
3827 if (__kmp_affinity_compact >= depth) {
3828 __kmp_affinity_compact = 0;
3831 __kmp_affinity_compact = depth - 1 - __kmp_affinity_compact;
3835 case affinity_compact:
3836 if (__kmp_affinity_compact >= depth) {
3837 __kmp_affinity_compact = depth - 1;
3842 case affinity_balanced:
3844 if( nPackages > 1 ) {
3845 if( __kmp_affinity_verbose || __kmp_affinity_warnings ) {
3846 KMP_WARNING( AffBalancedNotAvail,
"KMP_AFFINITY" );
3848 __kmp_affinity_type = affinity_none;
3850 }
else if( __kmp_affinity_uniform_topology() ) {
3855 __kmp_aff_depth = depth;
3858 int nth_per_core = __kmp_nThreadsPerCore;
3861 if( nth_per_core > 1 ) {
3862 core_level = depth - 2;
3864 core_level = depth - 1;
3866 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
3867 int nproc = nth_per_core * ncores;
3869 procarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
3870 for(
int i = 0; i < nproc; i++ ) {
3874 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
3875 int proc = address2os[ i ].second;
3879 int level = depth - 1;
3883 int core = address2os[ i ].first.labels[ level ];
3885 if( nth_per_core > 1 ) {
3886 thread = address2os[ i ].first.labels[ level ] % nth_per_core;
3887 core = address2os[ i ].first.labels[ level - 1 ];
3889 procarr[ core * nth_per_core + thread ] = proc;
3900 if (__kmp_affinity_dups) {
3901 __kmp_affinity_num_masks = __kmp_avail_proc;
3904 __kmp_affinity_num_masks = numUnique;
3908 if ( ( __kmp_nested_proc_bind.bind_types[0] != proc_bind_intel )
3909 && ( __kmp_affinity_num_places > 0 )
3910 && ( (
unsigned)__kmp_affinity_num_places < __kmp_affinity_num_masks ) ) {
3911 __kmp_affinity_num_masks = __kmp_affinity_num_places;
3915 __kmp_affinity_masks = (kmp_affin_mask_t*)__kmp_allocate(
3916 __kmp_affinity_num_masks * __kmp_affin_mask_size);
3922 qsort(address2os, __kmp_avail_proc,
sizeof(*address2os),
3923 __kmp_affinity_cmp_Address_child_num);
3927 for (i = 0, j = 0; i < __kmp_avail_proc; i++) {
3928 if ((! __kmp_affinity_dups) && (! address2os[i].first.leader)) {
3931 unsigned osId = address2os[i].second;
3932 kmp_affin_mask_t *src = KMP_CPU_INDEX(osId2Mask, osId);
3933 kmp_affin_mask_t *dest
3934 = KMP_CPU_INDEX(__kmp_affinity_masks, j);
3935 KMP_ASSERT(KMP_CPU_ISSET(osId, src));
3936 KMP_CPU_COPY(dest, src);
3937 if (++j >= __kmp_affinity_num_masks) {
3941 KMP_DEBUG_ASSERT(j == __kmp_affinity_num_masks);
3946 KMP_ASSERT2(0,
"Unexpected affinity setting");
3949 __kmp_free(osId2Mask);
3950 machine_hierarchy.init(address2os, __kmp_avail_proc);
3955 __kmp_affinity_initialize(
void)
3968 int disabled = (__kmp_affinity_type == affinity_disabled);
3969 if (! KMP_AFFINITY_CAPABLE()) {
3970 KMP_ASSERT(disabled);
3973 __kmp_affinity_type = affinity_none;
3975 __kmp_aux_affinity_initialize();
3977 __kmp_affinity_type = affinity_disabled;
3983 __kmp_affinity_uninitialize(
void)
3985 if (__kmp_affinity_masks != NULL) {
3986 __kmp_free(__kmp_affinity_masks);
3987 __kmp_affinity_masks = NULL;
3989 if (fullMask != NULL) {
3990 KMP_CPU_FREE(fullMask);
3993 __kmp_affinity_num_masks = 0;
3995 __kmp_affinity_num_places = 0;
3997 if (__kmp_affinity_proclist != NULL) {
3998 __kmp_free(__kmp_affinity_proclist);
3999 __kmp_affinity_proclist = NULL;
4001 if( address2os != NULL ) {
4002 __kmp_free( address2os );
4005 if( procarr != NULL ) {
4006 __kmp_free( procarr );
4013 __kmp_affinity_set_init_mask(
int gtid,
int isa_root)
4015 if (! KMP_AFFINITY_CAPABLE()) {
4019 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4020 if (th->th.th_affin_mask == NULL) {
4021 KMP_CPU_ALLOC(th->th.th_affin_mask);
4024 KMP_CPU_ZERO(th->th.th_affin_mask);
4034 kmp_affin_mask_t *mask;
4038 if (__kmp_nested_proc_bind.bind_types[0] == proc_bind_intel)
4041 if ((__kmp_affinity_type == affinity_none)
4043 || (__kmp_affinity_type == affinity_balanced)
4046 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
4047 if (__kmp_num_proc_groups > 1) {
4051 KMP_ASSERT(fullMask != NULL);
4056 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4057 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4058 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4064 || (__kmp_nested_proc_bind.bind_types[0] == proc_bind_false)) {
4065 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
4066 if (__kmp_num_proc_groups > 1) {
4070 KMP_ASSERT(fullMask != NULL);
4079 KMP_DEBUG_ASSERT( __kmp_affinity_num_masks > 0 );
4080 i = (gtid + __kmp_affinity_offset) % __kmp_affinity_num_masks;
4081 mask = KMP_CPU_INDEX(__kmp_affinity_masks, i);
4087 th->th.th_current_place = i;
4089 th->th.th_new_place = i;
4090 th->th.th_first_place = 0;
4091 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4094 if (i == KMP_PLACE_ALL) {
4095 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to all places\n",
4099 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to place %d\n",
4104 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to fullMask\n",
4108 KA_TRACE(100, (
"__kmp_affinity_set_init_mask: binding T#%d to mask %d\n",
4113 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4115 if (__kmp_affinity_verbose) {
4116 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4117 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4118 th->th.th_affin_mask);
4119 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(), gtid,
4129 if ( __kmp_affinity_type == affinity_none ) {
4130 __kmp_set_system_affinity(th->th.th_affin_mask, FALSE);
4134 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4141 __kmp_affinity_set_place(
int gtid)
4145 if (! KMP_AFFINITY_CAPABLE()) {
4149 kmp_info_t *th = (kmp_info_t *)TCR_SYNC_PTR(__kmp_threads[gtid]);
4151 KA_TRACE(100, (
"__kmp_affinity_set_place: binding T#%d to place %d (current place = %d)\n",
4152 gtid, th->th.th_new_place, th->th.th_current_place));
4157 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4158 KMP_ASSERT(th->th.th_new_place >= 0);
4159 KMP_ASSERT((
unsigned)th->th.th_new_place <= __kmp_affinity_num_masks);
4160 if (th->th.th_first_place <= th->th.th_last_place) {
4161 KMP_ASSERT((th->th.th_new_place >= th->th.th_first_place)
4162 && (th->th.th_new_place <= th->th.th_last_place));
4165 KMP_ASSERT((th->th.th_new_place <= th->th.th_first_place)
4166 || (th->th.th_new_place >= th->th.th_last_place));
4173 kmp_affin_mask_t *mask = KMP_CPU_INDEX(__kmp_affinity_masks,
4174 th->th.th_new_place);
4175 KMP_CPU_COPY(th->th.th_affin_mask, mask);
4176 th->th.th_current_place = th->th.th_new_place;
4178 if (__kmp_affinity_verbose) {
4179 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4180 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4181 th->th.th_affin_mask);
4182 KMP_INFORM(BoundToOSProcSet,
"OMP_PROC_BIND", (kmp_int32)getpid(),
4185 __kmp_set_system_affinity(th->th.th_affin_mask, TRUE);
4192 __kmp_aux_set_affinity(
void **mask)
4198 if (! KMP_AFFINITY_CAPABLE()) {
4202 gtid = __kmp_entry_gtid();
4204 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4205 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4206 (kmp_affin_mask_t *)(*mask));
4207 __kmp_debug_printf(
"kmp_set_affinity: setting affinity mask for thread %d = %s\n",
4211 if (__kmp_env_consistency_check) {
4212 if ((mask == NULL) || (*mask == NULL)) {
4213 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4219 for (proc = 0; proc < KMP_CPU_SETSIZE; proc++) {
4220 if (! KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask))) {
4224 if (! KMP_CPU_ISSET(proc, fullMask)) {
4225 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4229 if (num_procs == 0) {
4230 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4233 # if KMP_OS_WINDOWS && KMP_ARCH_X86_64
4234 if (__kmp_get_proc_group((kmp_affin_mask_t *)(*mask)) < 0) {
4235 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity");
4242 th = __kmp_threads[gtid];
4243 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4244 retval = __kmp_set_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4246 KMP_CPU_COPY(th->th.th_affin_mask, (kmp_affin_mask_t *)(*mask));
4250 th->th.th_current_place = KMP_PLACE_UNDEFINED;
4251 th->th.th_new_place = KMP_PLACE_UNDEFINED;
4252 th->th.th_first_place = 0;
4253 th->th.th_last_place = __kmp_affinity_num_masks - 1;
4258 th->th.th_current_task->td_icvs.proc_bind = proc_bind_false;
4266 __kmp_aux_get_affinity(
void **mask)
4272 if (! KMP_AFFINITY_CAPABLE()) {
4276 gtid = __kmp_entry_gtid();
4277 th = __kmp_threads[gtid];
4278 KMP_DEBUG_ASSERT(th->th.th_affin_mask != NULL);
4281 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4282 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4283 th->th.th_affin_mask);
4284 __kmp_printf(
"kmp_get_affinity: stored affinity mask for thread %d = %s\n", gtid, buf);
4287 if (__kmp_env_consistency_check) {
4288 if ((mask == NULL) || (*mask == NULL)) {
4289 KMP_FATAL(AffinityInvalidMask,
"kmp_get_affinity");
4293 # if !KMP_OS_WINDOWS
4295 retval = __kmp_get_system_affinity((kmp_affin_mask_t *)(*mask), FALSE);
4297 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4298 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4299 (kmp_affin_mask_t *)(*mask));
4300 __kmp_printf(
"kmp_get_affinity: system affinity mask for thread %d = %s\n", gtid, buf);
4306 KMP_CPU_COPY((kmp_affin_mask_t *)(*mask), th->th.th_affin_mask);
4314 __kmp_aux_set_affinity_mask_proc(
int proc,
void **mask)
4318 if (! KMP_AFFINITY_CAPABLE()) {
4323 int gtid = __kmp_entry_gtid();
4324 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4325 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4326 (kmp_affin_mask_t *)(*mask));
4327 __kmp_debug_printf(
"kmp_set_affinity_mask_proc: setting proc %d in affinity mask for thread %d = %s\n",
4331 if (__kmp_env_consistency_check) {
4332 if ((mask == NULL) || (*mask == NULL)) {
4333 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4337 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4340 if (! KMP_CPU_ISSET(proc, fullMask)) {
4344 KMP_CPU_SET(proc, (kmp_affin_mask_t *)(*mask));
4350 __kmp_aux_unset_affinity_mask_proc(
int proc,
void **mask)
4354 if (! KMP_AFFINITY_CAPABLE()) {
4359 int gtid = __kmp_entry_gtid();
4360 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4361 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4362 (kmp_affin_mask_t *)(*mask));
4363 __kmp_debug_printf(
"kmp_unset_affinity_mask_proc: unsetting proc %d in affinity mask for thread %d = %s\n",
4367 if (__kmp_env_consistency_check) {
4368 if ((mask == NULL) || (*mask == NULL)) {
4369 KMP_FATAL(AffinityInvalidMask,
"kmp_unset_affinity_mask_proc");
4373 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4376 if (! KMP_CPU_ISSET(proc, fullMask)) {
4380 KMP_CPU_CLR(proc, (kmp_affin_mask_t *)(*mask));
4386 __kmp_aux_get_affinity_mask_proc(
int proc,
void **mask)
4390 if (! KMP_AFFINITY_CAPABLE()) {
4395 int gtid = __kmp_entry_gtid();
4396 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4397 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN,
4398 (kmp_affin_mask_t *)(*mask));
4399 __kmp_debug_printf(
"kmp_get_affinity_mask_proc: getting proc %d in affinity mask for thread %d = %s\n",
4403 if (__kmp_env_consistency_check) {
4404 if ((mask == NULL) || (*mask == NULL)) {
4405 KMP_FATAL(AffinityInvalidMask,
"kmp_set_affinity_mask_proc");
4409 if ((proc < 0) || ((
unsigned)proc >= KMP_CPU_SETSIZE)) {
4412 if (! KMP_CPU_ISSET(proc, fullMask)) {
4416 return KMP_CPU_ISSET(proc, (kmp_affin_mask_t *)(*mask));
4422 void __kmp_balanced_affinity(
int tid,
int nthreads )
4424 if( __kmp_affinity_uniform_topology() ) {
4428 int __kmp_nth_per_core = __kmp_avail_proc / __kmp_ncores;
4430 int ncores = __kmp_ncores;
4432 int chunk = nthreads / ncores;
4434 int big_cores = nthreads % ncores;
4436 int big_nth = ( chunk + 1 ) * big_cores;
4437 if( tid < big_nth ) {
4438 coreID = tid / (chunk + 1 );
4439 threadID = ( tid % (chunk + 1 ) ) % __kmp_nth_per_core ;
4441 coreID = ( tid - big_cores ) / chunk;
4442 threadID = ( ( tid - big_cores ) % chunk ) % __kmp_nth_per_core ;
4445 KMP_DEBUG_ASSERT2(KMP_AFFINITY_CAPABLE(),
4446 "Illegal set affinity operation when not capable");
4448 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4452 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4453 int osID = address2os[ coreID * __kmp_nth_per_core + threadID ].second;
4454 KMP_CPU_SET( osID, mask);
4455 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4456 for(
int i = 0; i < __kmp_nth_per_core; i++ ) {
4458 osID = address2os[ coreID * __kmp_nth_per_core + i ].second;
4459 KMP_CPU_SET( osID, mask);
4462 if (__kmp_affinity_verbose) {
4463 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4464 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4465 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4468 __kmp_set_system_affinity( mask, TRUE );
4471 kmp_affin_mask_t *mask = (kmp_affin_mask_t *)alloca(__kmp_affin_mask_size);
4475 int nth_per_core = __kmp_nThreadsPerCore;
4477 if( nth_per_core > 1 ) {
4478 core_level = __kmp_aff_depth - 2;
4480 core_level = __kmp_aff_depth - 1;
4484 int ncores = address2os[ __kmp_avail_proc - 1 ].first.labels[ core_level ] + 1;
4487 if( nthreads == __kmp_avail_proc ) {
4488 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4489 int osID = address2os[ tid ].second;
4490 KMP_CPU_SET( osID, mask);
4491 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4492 int coreID = address2os[ tid ].first.labels[ core_level ];
4496 for(
int i = 0; i < __kmp_avail_proc; i++ ) {
4497 int osID = address2os[ i ].second;
4498 int core = address2os[ i ].first.labels[ core_level ];
4499 if( core == coreID ) {
4500 KMP_CPU_SET( osID, mask);
4502 if( cnt == nth_per_core ) {
4508 }
else if( nthreads <= __kmp_ncores ) {
4511 for(
int i = 0; i < ncores; i++ ) {
4514 for(
int j = 0; j < nth_per_core; j++ ) {
4515 if( procarr[ i * nth_per_core + j ] != - 1 ) {
4522 for(
int j = 0; j < nth_per_core; j++ ) {
4523 int osID = procarr[ i * nth_per_core + j ];
4525 KMP_CPU_SET( osID, mask );
4527 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4542 int nproc_at_core[ ncores ];
4544 int ncores_with_x_procs[ nth_per_core + 1 ];
4546 int ncores_with_x_to_max_procs[ nth_per_core + 1 ];
4548 for(
int i = 0; i <= nth_per_core; i++ ) {
4549 ncores_with_x_procs[ i ] = 0;
4550 ncores_with_x_to_max_procs[ i ] = 0;
4553 for(
int i = 0; i < ncores; i++ ) {
4555 for(
int j = 0; j < nth_per_core; j++ ) {
4556 if( procarr[ i * nth_per_core + j ] != -1 ) {
4560 nproc_at_core[ i ] = cnt;
4561 ncores_with_x_procs[ cnt ]++;
4564 for(
int i = 0; i <= nth_per_core; i++ ) {
4565 for(
int j = i; j <= nth_per_core; j++ ) {
4566 ncores_with_x_to_max_procs[ i ] += ncores_with_x_procs[ j ];
4571 int nproc = nth_per_core * ncores;
4573 int * newarr = (
int * )__kmp_allocate(
sizeof(
int ) * nproc );
4574 for(
int i = 0; i < nproc; i++ ) {
4581 for(
int j = 1; j <= nth_per_core; j++ ) {
4582 int cnt = ncores_with_x_to_max_procs[ j ];
4583 for(
int i = 0; i < ncores; i++ ) {
4585 if( nproc_at_core[ i ] == 0 ) {
4588 for(
int k = 0; k < nth_per_core; k++ ) {
4589 if( procarr[ i * nth_per_core + k ] != -1 ) {
4590 if( newarr[ i * nth_per_core + k ] == 0 ) {
4591 newarr[ i * nth_per_core + k ] = 1;
4597 newarr[ i * nth_per_core + k ] ++;
4605 if( cnt == 0 || nth == 0 ) {
4616 for(
int i = 0; i < nproc; i++ ) {
4620 if( __kmp_affinity_gran == affinity_gran_fine || __kmp_affinity_gran == affinity_gran_thread) {
4621 int osID = procarr[ i ];
4622 KMP_CPU_SET( osID, mask);
4623 }
else if( __kmp_affinity_gran == affinity_gran_core ) {
4624 int coreID = i / nth_per_core;
4625 for(
int ii = 0; ii < nth_per_core; ii++ ) {
4626 int osID = procarr[ coreID * nth_per_core + ii ];
4628 KMP_CPU_SET( osID, mask);
4635 __kmp_free( newarr );
4638 if (__kmp_affinity_verbose) {
4639 char buf[KMP_AFFIN_MASK_PRINT_LEN];
4640 __kmp_affinity_print_mask(buf, KMP_AFFIN_MASK_PRINT_LEN, mask);
4641 KMP_INFORM(BoundToOSProcSet,
"KMP_AFFINITY", (kmp_int32)getpid(),
4644 __kmp_set_system_affinity( mask, TRUE );
4653 kmp_uint32 mac_skipPerLevel[7];
4654 kmp_uint32 mac_depth;
4655 kmp_uint8 mac_leaf_kids;
4656 void __kmp_get_hierarchy(kmp_uint32 nproc, kmp_bstate_t *thr_bar) {
4657 static int first = 1;
4659 const kmp_uint32 maxLevels = 7;
4660 kmp_uint32 numPerLevel[maxLevels];
4662 for (kmp_uint32 i=0; i<maxLevels; ++i) {
4664 mac_skipPerLevel[i] = 1;
4668 numPerLevel[0] = nproc;
4670 kmp_uint32 branch = 4;
4671 if (numPerLevel[0] == 1) branch = nproc/4;
4672 if (branch<4) branch=4;
4673 for (kmp_uint32 d=0; d<mac_depth-1; ++d) {
4674 while (numPerLevel[d] > branch || (d==0 && numPerLevel[d]>4)) {
4675 if (numPerLevel[d] & 1) numPerLevel[d]++;
4676 numPerLevel[d] = numPerLevel[d] >> 1;
4677 if (numPerLevel[d+1] == 1) mac_depth++;
4678 numPerLevel[d+1] = numPerLevel[d+1] << 1;
4680 if(numPerLevel[0] == 1) {
4681 branch = branch >> 1;
4682 if (branch<4) branch = 4;
4686 for (kmp_uint32 i=1; i<mac_depth; ++i)
4687 mac_skipPerLevel[i] = numPerLevel[i-1] * mac_skipPerLevel[i-1];
4688 mac_leaf_kids = (kmp_uint8)numPerLevel[0]-1;
4691 thr_bar->depth = mac_depth;
4692 thr_bar->base_leaf_kids = mac_leaf_kids;
4693 thr_bar->skip_per_level = mac_skipPerLevel;
4696 #endif // KMP_AFFINITY_SUPPORTED