18 #include "kmp_stats.h" 27 #define STRINGIZE2(x) #x 28 #define STRINGIZE(x) STRINGIZE2(x) 30 #define expandName(name, flags, ignore) {STRINGIZE(name), flags}, 31 statInfo timeStat::timerInfo[] = {
32 KMP_FOREACH_TIMER(expandName, 0){
"TIMER_LAST", 0}};
33 const statInfo counter::counterInfo[] = {
37 #define expandName(ignore1, ignore2, ignore3) {0.0, 0.0, 0.0}, 38 kmp_stats_output_module::rgb_color kmp_stats_output_module::timerColorInfo[] = {
39 KMP_FOREACH_TIMER(expandName, 0){0.0, 0.0, 0.0}};
42 const kmp_stats_output_module::rgb_color
43 kmp_stats_output_module::globalColorArray[] = {
67 static uint32_t statsPrinted = 0;
70 static kmp_stats_output_module *__kmp_stats_global_output = NULL;
74 void statistic::addSample(
double sample) {
75 double delta = sample - meanVal;
77 sampleCount = sampleCount + 1;
78 meanVal = meanVal + delta / sampleCount;
79 m2 = m2 + delta * (sample - meanVal);
81 minVal = std::min(minVal, sample);
82 maxVal = std::max(maxVal, sample);
85 statistic &statistic::operator+=(
const statistic &other) {
86 if (sampleCount == 0) {
91 uint64_t newSampleCount = sampleCount + other.sampleCount;
92 double dnsc = double(newSampleCount);
93 double dsc = double(sampleCount);
94 double dscBydnsc = dsc / dnsc;
95 double dosc = double(other.sampleCount);
96 double delta = other.meanVal - meanVal;
104 meanVal = meanVal * dscBydnsc + other.meanVal * (1 - dscBydnsc);
105 m2 = m2 + other.m2 + dscBydnsc * dosc * delta * delta;
106 minVal = std::min(minVal, other.minVal);
107 maxVal = std::max(maxVal, other.maxVal);
108 sampleCount = newSampleCount;
113 void statistic::scale(
double factor) {
114 minVal = minVal * factor;
115 maxVal = maxVal * factor;
116 meanVal = meanVal * factor;
117 m2 = m2 * factor * factor;
121 std::string statistic::format(
char unit,
bool total)
const {
122 std::string result = formatSI(sampleCount, 9,
' ');
124 if (sampleCount == 0) {
125 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
126 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
127 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
129 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
130 result = result + std::string(
", ") + formatSI(0.0, 9, unit);
132 result = result + std::string(
", ") + formatSI(minVal, 9, unit);
133 result = result + std::string(
", ") + formatSI(meanVal, 9, unit);
134 result = result + std::string(
", ") + formatSI(maxVal, 9, unit);
137 result + std::string(
", ") + formatSI(meanVal * sampleCount, 9, unit);
138 result = result + std::string(
", ") + formatSI(getSD(), 9, unit);
145 void explicitTimer::start(timer_e timerEnumValue) {
146 startTime = tsc_tick_count::now();
149 __kmp_stats_thread_ptr->incrementNestValue();
154 void explicitTimer::stop(timer_e timerEnumValue,
155 kmp_stats_list *stats_ptr ) {
156 if (startTime.getValue() == 0)
159 tsc_tick_count finishTime = tsc_tick_count::now();
162 stat->addSample(((finishTime - startTime) - totalPauseTime).ticks());
166 stats_ptr = __kmp_stats_thread_ptr;
167 stats_ptr->push_event(
168 startTime.getValue() - __kmp_stats_start_time.getValue(),
169 finishTime.getValue() - __kmp_stats_start_time.getValue(),
170 __kmp_stats_thread_ptr->getNestValue(), timerEnumValue);
171 stats_ptr->decrementNestValue();
181 partitionedTimers::partitionedTimers() { timer_stack.reserve(8); }
184 void partitionedTimers::add_timer(explicit_timer_e timer_index,
185 explicitTimer *timer_pointer) {
186 KMP_DEBUG_ASSERT((
int)timer_index < (
int)EXPLICIT_TIMER_LAST + 1);
187 timers[timer_index] = timer_pointer;
191 void partitionedTimers::init(timerPair init_timer_pair) {
192 KMP_DEBUG_ASSERT(this->timer_stack.size() == 0);
193 timer_stack.push_back(init_timer_pair);
194 timers[init_timer_pair.get_index()]->start(init_timer_pair.get_timer());
201 void partitionedTimers::push(timerPair timer_pair) {
206 KMP_DEBUG_ASSERT(this->timer_stack.size() > 0);
207 timerPair current_timer = timer_stack.back();
208 timer_stack.push_back(timer_pair);
209 if (current_timer != timer_pair) {
210 timers[current_timer.get_index()]->pause();
211 timers[timer_pair.get_index()]->start(timer_pair.get_timer());
216 void partitionedTimers::pop() {
221 KMP_DEBUG_ASSERT(this->timer_stack.size() > 1);
222 timerPair current_timer = timer_stack.back();
223 timer_stack.pop_back();
224 timerPair new_timer = timer_stack.back();
225 if (current_timer != new_timer) {
226 timers[current_timer.get_index()]->stop(current_timer.get_timer());
227 timers[new_timer.get_index()]->resume();
235 void partitionedTimers::windup() {
236 while (timer_stack.size() > 1) {
239 if (timer_stack.size() > 0) {
240 timerPair last_timer = timer_stack.back();
241 timer_stack.pop_back();
242 timers[last_timer.get_index()]->stop(last_timer.get_timer());
248 void kmp_stats_event_vector::deallocate() {
259 int compare_two_events(
const void *event1,
const void *event2) {
260 kmp_stats_event *ev1 = (kmp_stats_event *)event1;
261 kmp_stats_event *ev2 = (kmp_stats_event *)event2;
263 if (ev1->getStart() < ev2->getStart())
265 else if (ev1->getStart() > ev2->getStart())
271 void kmp_stats_event_vector::sort() {
272 qsort(events, internal_size,
sizeof(kmp_stats_event), compare_two_events);
278 kmp_stats_list *kmp_stats_list::push_back(
int gtid) {
279 kmp_stats_list *newnode =
280 (kmp_stats_list *)__kmp_allocate(
sizeof(kmp_stats_list));
283 new (newnode) kmp_stats_list();
284 newnode->setGtid(gtid);
285 newnode->prev = this->prev;
286 newnode->next =
this;
287 newnode->prev->next = newnode;
288 newnode->next->prev = newnode;
291 void kmp_stats_list::deallocate() {
292 kmp_stats_list *ptr = this->next;
293 kmp_stats_list *delptr = this->next;
294 while (ptr !=
this) {
298 delptr->_event_vector.deallocate();
299 delptr->~kmp_stats_list();
303 kmp_stats_list::iterator kmp_stats_list::begin() {
304 kmp_stats_list::iterator it;
308 kmp_stats_list::iterator kmp_stats_list::end() {
309 kmp_stats_list::iterator it;
313 int kmp_stats_list::size() {
315 kmp_stats_list::iterator it;
316 for (retval = 0, it = begin(); it != end(); it++, retval++) {
323 kmp_stats_list::iterator::iterator() : ptr(NULL) {}
324 kmp_stats_list::iterator::~iterator() {}
325 kmp_stats_list::iterator kmp_stats_list::iterator::operator++() {
326 this->ptr = this->ptr->next;
329 kmp_stats_list::iterator kmp_stats_list::iterator::operator++(
int dummy) {
330 this->ptr = this->ptr->next;
333 kmp_stats_list::iterator kmp_stats_list::iterator::operator--() {
334 this->ptr = this->ptr->prev;
337 kmp_stats_list::iterator kmp_stats_list::iterator::operator--(
int dummy) {
338 this->ptr = this->ptr->prev;
341 bool kmp_stats_list::iterator::operator!=(
const kmp_stats_list::iterator &rhs) {
342 return this->ptr != rhs.ptr;
344 bool kmp_stats_list::iterator::operator==(
const kmp_stats_list::iterator &rhs) {
345 return this->ptr == rhs.ptr;
347 kmp_stats_list *kmp_stats_list::iterator::operator*()
const {
353 const char *kmp_stats_output_module::eventsFileName = NULL;
354 const char *kmp_stats_output_module::plotFileName = NULL;
355 int kmp_stats_output_module::printPerThreadFlag = 0;
356 int kmp_stats_output_module::printPerThreadEventsFlag = 0;
360 void kmp_stats_output_module::init() {
361 char *statsFileName = getenv(
"KMP_STATS_FILE");
362 eventsFileName = getenv(
"KMP_STATS_EVENTS_FILE");
363 plotFileName = getenv(
"KMP_STATS_PLOT_FILE");
364 char *threadStats = getenv(
"KMP_STATS_THREADS");
365 char *threadEvents = getenv(
"KMP_STATS_EVENTS");
372 std::string baseFileName, pid, suffix;
373 std::stringstream ss;
374 outputFileName = std::string(statsFileName);
375 index = outputFileName.find_last_of(
'.');
376 if (index == std::string::npos) {
377 baseFileName = outputFileName;
379 baseFileName = outputFileName.substr(0, index);
380 suffix = outputFileName.substr(index);
384 outputFileName = baseFileName +
"-" + pid + suffix;
386 eventsFileName = eventsFileName ? eventsFileName :
"events.dat";
387 plotFileName = plotFileName ? plotFileName :
"events.plt";
391 printPerThreadFlag = __kmp_str_match_true(threadStats);
392 printPerThreadEventsFlag = __kmp_str_match_true(threadEvents);
394 if (printPerThreadEventsFlag) {
399 timeStat::clearEventFlags();
405 void kmp_stats_output_module::setupEventColors() {
407 int globalColorIndex = 0;
408 int numGlobalColors =
sizeof(globalColorArray) /
sizeof(rgb_color);
409 for (i = 0; i < TIMER_LAST; i++) {
411 timerColorInfo[i] = globalColorArray[globalColorIndex];
412 globalColorIndex = (globalColorIndex + 1) % numGlobalColors;
418 void kmp_stats_output_module::printTimerStats(FILE *statsOut,
419 statistic
const *theStats,
420 statistic
const *totalStats) {
421 fprintf(statsOut,
"Timer, SampleCount, Min, " 422 "Mean, Max, Total, SD\n");
423 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
424 statistic
const *stat = &theStats[s];
427 fprintf(statsOut,
"%-28s, %s\n", timeStat::name(s),
428 stat->format(tag,
true).c_str());
431 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
434 fprintf(statsOut,
"Total_%-22s, %s\n", timeStat::name(s),
435 totalStats[s].format(tag,
true).c_str());
439 void kmp_stats_output_module::printCounterStats(FILE *statsOut,
440 statistic
const *theStats) {
441 fprintf(statsOut,
"Counter, ThreadCount, Min, Mean, " 442 " Max, Total, SD\n");
443 for (
int s = 0; s < COUNTER_LAST; s++) {
444 statistic
const *stat = &theStats[s];
445 fprintf(statsOut,
"%-25s, %s\n", counter::name(counter_e(s)),
446 stat->format(
' ',
true).c_str());
450 void kmp_stats_output_module::printCounters(FILE *statsOut,
451 counter
const *theCounters) {
454 fprintf(statsOut,
"\nCounter, Count\n");
455 for (
int c = 0; c < COUNTER_LAST; c++) {
456 counter
const *stat = &theCounters[c];
457 fprintf(statsOut,
"%-25s, %s\n", counter::name(counter_e(c)),
458 formatSI(stat->getValue(), 9,
' ').c_str());
462 void kmp_stats_output_module::printEvents(FILE *eventsOut,
463 kmp_stats_event_vector *theEvents,
467 for (
int i = 0; i < theEvents->size(); i++) {
468 kmp_stats_event ev = theEvents->at(i);
469 rgb_color color = getEventColor(ev.getTimerName());
470 fprintf(eventsOut,
"%d %lu %lu %1.1f rgb(%1.1f,%1.1f,%1.1f) %s\n", gtid,
471 ev.getStart(), ev.getStop(), 1.2 - (ev.getNestLevel() * 0.2),
472 color.r, color.g, color.b, timeStat::name(ev.getTimerName()));
477 void kmp_stats_output_module::windupExplicitTimers() {
481 kmp_stats_list::iterator it;
482 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
483 kmp_stats_list *ptr = *it;
484 ptr->getPartitionedTimers()->windup();
485 for (
int timer = 0; timer < EXPLICIT_TIMER_LAST; timer++) {
486 ptr->getExplicitTimer(explicit_timer_e(timer))->stop((timer_e)timer, ptr);
491 void kmp_stats_output_module::printPloticusFile() {
493 int size = __kmp_stats_list->size();
494 FILE *plotOut = fopen(plotFileName,
"w+");
496 fprintf(plotOut,
"#proc page\n" 500 fprintf(plotOut,
"#proc getdata\n" 504 fprintf(plotOut,
"#proc areadef\n" 505 " title: OpenMP Sampling Timeline\n" 506 " titledetails: align=center size=16\n" 507 " rectangle: 1 1 13 9\n" 508 " xautorange: datafield=2,3\n" 509 " yautorange: -1 %d\n\n",
512 fprintf(plotOut,
"#proc xaxis\n" 514 " stubdetails: size=12\n" 515 " label: Time (ticks)\n" 516 " labeldetails: size=14\n\n");
518 fprintf(plotOut,
"#proc yaxis\n" 521 " stubdetails: size=12\n" 523 " labeldetails: size=14\n\n",
526 fprintf(plotOut,
"#proc bars\n" 527 " exactcolorfield: 5\n" 530 " segmentfields: 2 3\n" 531 " barwidthfield: 4\n\n");
534 for (i = 0; i < TIMER_LAST; i++) {
536 rgb_color c = getEventColor((timer_e)i);
537 fprintf(plotOut,
"#proc legendentry\n" 538 " sampletype: color\n" 540 " details: rgb(%1.1f,%1.1f,%1.1f)\n\n",
541 timeStat::name((timer_e)i), c.r, c.g, c.b);
545 fprintf(plotOut,
"#proc legend\n" 547 " location: max max\n\n");
557 void kmp_stats_output_module::printHeaderInfo(FILE *statsOut) {
558 std::time_t now = std::time(0);
562 std::strftime(&buffer[0],
sizeof(buffer),
"%c", std::localtime(&now));
563 fprintf(statsOut,
"# Time of run: %s\n", &buffer[0]);
564 if (gethostname(&hostName[0],
sizeof(hostName)) == 0)
565 fprintf(statsOut,
"# Hostname: %s\n", &hostName[0]);
566 #if KMP_ARCH_X86 || KMP_ARCH_X86_64 567 fprintf(statsOut,
"# CPU: %s\n", &__kmp_cpuinfo.name[0]);
568 fprintf(statsOut,
"# Family: %d, Model: %d, Stepping: %d\n",
569 __kmp_cpuinfo.family, __kmp_cpuinfo.model, __kmp_cpuinfo.stepping);
570 if (__kmp_cpuinfo.frequency == 0)
571 fprintf(statsOut,
"# Nominal frequency: Unknown\n");
573 fprintf(statsOut,
"# Nominal frequency: %sz\n",
574 formatSI(
double(__kmp_cpuinfo.frequency), 9,
'H').c_str());
578 void kmp_stats_output_module::outputStats(
const char *heading) {
582 windupExplicitTimers();
584 statistic allStats[TIMER_LAST];
585 statistic totalStats[TIMER_LAST];
587 statistic allCounters[COUNTER_LAST];
590 !outputFileName.empty() ? fopen(outputFileName.c_str(),
"a+") : stderr;
595 if (eventPrintingEnabled()) {
596 eventsOut = fopen(eventsFileName,
"w+");
599 printHeaderInfo(statsOut);
600 fprintf(statsOut,
"%s\n", heading);
602 kmp_stats_list::iterator it;
603 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
604 int t = (*it)->getGtid();
606 if (printPerThreadFlag) {
607 fprintf(statsOut,
"Thread %d\n", t);
608 printTimerStats(statsOut, (*it)->getTimers(), 0);
609 printCounters(statsOut, (*it)->getCounters());
610 fprintf(statsOut,
"\n");
613 if (eventPrintingEnabled()) {
614 kmp_stats_event_vector events = (*it)->getEventVector();
615 printEvents(eventsOut, &events, t);
619 for (timer_e s = timer_e(0); s < TIMER_LAST; s = timer_e(s + 1)) {
621 if ((timeStat::masterOnly(s) && (t != 0)) ||
623 (timeStat::workerOnly(s) && (t == 0))
629 statistic *threadStat = (*it)->getTimer(s);
630 allStats[s] += *threadStat;
634 totalStats[s].addSample(threadStat->getTotal());
638 for (counter_e c = counter_e(0); c < COUNTER_LAST; c = counter_e(c + 1)) {
639 if (counter::masterOnly(c) && t != 0)
641 allCounters[c].addSample((*it)->getCounter(c)->getValue());
645 if (eventPrintingEnabled()) {
650 fprintf(statsOut,
"Aggregate for all threads\n");
651 printTimerStats(statsOut, &allStats[0], &totalStats[0]);
652 fprintf(statsOut,
"\n");
653 printCounterStats(statsOut, &allCounters[0]);
655 if (statsOut != stderr)
665 void __kmp_reset_stats() {
666 kmp_stats_list::iterator it;
667 for (it = __kmp_stats_list->begin(); it != __kmp_stats_list->end(); it++) {
668 timeStat *timers = (*it)->getTimers();
669 counter *counters = (*it)->getCounters();
670 explicitTimer *eTimers = (*it)->getExplicitTimers();
672 for (
int t = 0; t < TIMER_LAST; t++)
675 for (
int c = 0; c < COUNTER_LAST; c++)
678 for (
int t = 0; t < EXPLICIT_TIMER_LAST; t++)
682 (*it)->resetEventVector();
688 void __kmp_output_stats(
const char *heading) {
689 __kmp_stats_global_output->outputStats(heading);
693 void __kmp_accumulate_stats_at_exit(
void) {
695 if (KMP_XCHG_FIXED32(&statsPrinted, 1) != 0)
698 __kmp_output_stats(
"Statistics on exit");
701 void __kmp_stats_init(
void) {
702 __kmp_init_tas_lock(&__kmp_stats_lock);
703 __kmp_stats_start_time = tsc_tick_count::now();
704 __kmp_stats_global_output =
new kmp_stats_output_module();
705 __kmp_stats_list =
new kmp_stats_list();
708 void __kmp_stats_fini(
void) {
709 __kmp_accumulate_stats_at_exit();
710 __kmp_stats_list->deallocate();
711 delete __kmp_stats_global_output;
712 delete __kmp_stats_list;
do not show a TOTAL_aggregation for this statistic
statistic doesn't need units printed next to it in output
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.