LLVM OpenMP* Runtime Library
kmp_stats.h
1 #ifndef KMP_STATS_H
2 #define KMP_STATS_H
3 
9 //===----------------------------------------------------------------------===//
10 //
11 // The LLVM Compiler Infrastructure
12 //
13 // This file is dual licensed under the MIT and the University of Illinois Open
14 // Source Licenses. See LICENSE.txt for details.
15 //
16 //===----------------------------------------------------------------------===//
17 
18 
19 #include "kmp_config.h"
20 
21 #if KMP_STATS_ENABLED
22 /* Statistics accumulator.
23  Accumulates number of samples and computes min, max, mean, standard deviation
24  on the fly.
25 
26  Online variance calculation algorithm from
27  http://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#On-line_algorithm
28  */
29 
30 #include "kmp_stats_timing.h"
31 #include <limits>
32 #include <math.h>
33 #include <new> // placement new
34 #include <stdint.h>
35 #include <string>
36 #include <vector>
37 
38 /* Enable developer statistics here if you want them. They are more detailed
39  than is useful for application characterisation and are intended for the
40  runtime library developer. */
41 // #define KMP_DEVELOPER_STATS 1
42 
49  noTotal = 1 << 0,
50  onlyInMaster = 1 << 1,
52  1 << 2,
53  notInMaster = 1 << 3,
54  logEvent = 1 << 4
55 };
57 
64  IDLE,
65  SERIAL_REGION,
66  FORK_JOIN_BARRIER,
67  PLAIN_BARRIER,
68  TASKWAIT,
69  TASKYIELD,
70  TASKGROUP,
71  IMPLICIT_TASK,
72  EXPLICIT_TASK
73 };
74 
93 // clang-format off
94 #define KMP_FOREACH_COUNTER(macro, arg) \
95  macro(OMP_PARALLEL, stats_flags_e::onlyInMaster | stats_flags_e::noTotal, \
96  arg) macro(OMP_NESTED_PARALLEL, 0, arg) macro(OMP_FOR_static, 0, arg) \
97  macro(OMP_FOR_static_steal, 0, arg) macro(OMP_FOR_dynamic, 0, arg) \
98  macro(OMP_DISTRIBUTE, 0, arg) macro(OMP_BARRIER, 0, arg) \
99  macro(OMP_CRITICAL, 0, arg) macro(OMP_SINGLE, 0, arg) \
100  macro(OMP_MASTER, 0, arg) macro(OMP_TEAMS, 0, arg) \
101  macro(OMP_set_lock, 0, arg) macro(OMP_test_lock, 0, arg) \
102  macro(REDUCE_wait, 0, arg) \
103  macro(REDUCE_nowait, 0, arg) \
104  macro(OMP_TASKYIELD, 0, arg) \
105  macro(OMP_TASKLOOP, 0, arg) \
106  macro(TASK_executed, 0, arg) \
107  macro(TASK_cancelled, 0, arg) \
108  macro(TASK_stolen, 0, arg)
109 // clang-format on
110 
129 // clang-format off
130 #define KMP_FOREACH_TIMER(macro, arg) \
131  macro (OMP_worker_thread_life, stats_flags_e::logEvent, arg) \
132  macro (FOR_static_scheduling, 0, arg) \
133  macro (FOR_dynamic_scheduling, 0, arg) \
134  macro (OMP_critical, 0, arg) \
135  macro (OMP_critical_wait, 0, arg) \
136  macro (OMP_single, 0, arg) \
137  macro (OMP_master, 0, arg) \
138  macro (OMP_idle, stats_flags_e::logEvent, arg) \
139  macro (OMP_plain_barrier, stats_flags_e::logEvent, arg) \
140  macro (OMP_fork_barrier, stats_flags_e::logEvent, arg) \
141  macro (OMP_join_barrier, stats_flags_e::logEvent, arg) \
142  macro (OMP_parallel, stats_flags_e::logEvent, arg) \
143  macro (OMP_task_immediate, 0, arg) \
144  macro (OMP_task_taskwait, 0, arg) \
145  macro (OMP_task_taskyield, 0, arg) \
146  macro (OMP_task_taskgroup, 0, arg) \
147  macro (OMP_task_join_bar, 0, arg) \
148  macro (OMP_task_plain_bar, 0, arg) \
149  macro (OMP_serial, stats_flags_e::logEvent, arg) \
150  macro (OMP_taskloop_scheduling, 0, arg) \
151  macro (OMP_set_numthreads, stats_flags_e::noUnits | stats_flags_e::noTotal,\
152  arg) \
153  macro (OMP_PARALLEL_args, stats_flags_e::noUnits | stats_flags_e::noTotal, \
154  arg) \
155  macro (FOR_static_iterations, \
156  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
157  macro (FOR_dynamic_iterations, \
158  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
159  macro (FOR_static_steal_stolen, \
160  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
161  macro (FOR_static_steal_chunks, \
162  stats_flags_e::noUnits | stats_flags_e::noTotal, arg) \
163  KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
164 // clang-format on
165 
166 // OMP_start_end -- Time from when OpenMP is initialized until the
167 // stats are printed at exit
168 // OMP_serial -- Thread zero time executing serial code
169 // OMP_work -- Elapsed time in code dispatched by a fork (measured
170 // in the thread)
171 // OMP_barrier -- Time at "real" barriers (includes task time)
172 // FOR_static_scheduling -- Time spent doing scheduling for a static "for"
173 // FOR_dynamic_scheduling -- Time spent doing scheduling for a dynamic "for"
174 // OMP_idle -- Worker threads time spent waiting for inclusion in
175 // a parallel region
176 // OMP_plain_barrier -- Time spent in a barrier construct
177 // OMP_fork_join_barrier -- Time spent in a the fork-join barrier surrounding a
178 // parallel region
179 // OMP_parallel -- Time spent inside a parallel construct
180 // OMP_task_immediate -- Time spent executing non-deferred tasks
181 // OMP_task_taskwait -- Time spent executing tasks inside a taskwait
182 // construct
183 // OMP_task_taskyield -- Time spent executing tasks inside a taskyield
184 // construct
185 // OMP_task_taskgroup -- Time spent executing tasks inside a taskygroup
186 // construct
187 // OMP_task_join_bar -- Time spent executing tasks inside a join barrier
188 // OMP_task_plain_bar -- Time spent executing tasks inside a barrier
189 // construct
190 // OMP_single -- Time spent executing a "single" region
191 // OMP_master -- Time spent executing a "master" region
192 // OMP_set_numthreads -- Values passed to omp_set_num_threads
193 // OMP_PARALLEL_args -- Number of arguments passed to a parallel region
194 // FOR_static_iterations -- Number of available parallel chunks of work in a
195 // static for
196 // FOR_dynamic_iterations -- Number of available parallel chunks of work in a
197 // dynamic for
198 // Both adjust for any chunking, so if there were an
199 // iteration count of 20 but a chunk size of 10, we'd
200 // record 2.
201 
202 #if (KMP_DEVELOPER_STATS)
203 // Timers which are of interest to runtime library developers, not end users.
204 // These have to be explicitly enabled in addition to the other stats.
205 
206 // KMP_fork_barrier -- time in __kmp_fork_barrier
207 // KMP_join_barrier -- time in __kmp_join_barrier
208 // KMP_barrier -- time in __kmp_barrier
209 // KMP_end_split_barrier -- time in __kmp_end_split_barrier
210 // KMP_setup_icv_copy -- time in __kmp_setup_icv_copy
211 // KMP_icv_copy -- start/stop timer for any ICV copying
212 // KMP_linear_gather -- time in __kmp_linear_barrier_gather
213 // KMP_linear_release -- time in __kmp_linear_barrier_release
214 // KMP_tree_gather -- time in __kmp_tree_barrier_gather
215 // KMP_tree_release -- time in __kmp_tree_barrier_release
216 // KMP_hyper_gather -- time in __kmp_hyper_barrier_gather
217 // KMP_hyper_release -- time in __kmp_hyper_barrier_release
218 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg) \
219  macro(KMP_fork_call, 0, arg) macro(KMP_join_call, 0, arg) macro( \
220  KMP_end_split_barrier, 0, arg) macro(KMP_hier_gather, 0, arg) \
221  macro(KMP_hier_release, 0, arg) macro(KMP_hyper_gather, 0, arg) \
222  macro(KMP_hyper_release, 0, arg) macro(KMP_linear_gather, 0, arg) \
223  macro(KMP_linear_release, 0, arg) macro(KMP_tree_gather, 0, arg) \
224  macro(KMP_tree_release, 0, arg) macro(USER_resume, 0, arg) \
225  macro(USER_suspend, 0, arg) \
226  macro(KMP_allocate_team, 0, arg) \
227  macro(KMP_setup_icv_copy, 0, arg) \
228  macro(USER_icv_copy, 0, arg)
229 #else
230 #define KMP_FOREACH_DEVELOPER_TIMER(macro, arg)
231 #endif
232 
252 #define KMP_FOREACH_EXPLICIT_TIMER(macro, arg) KMP_FOREACH_TIMER(macro, arg)
253 
254 #define ENUMERATE(name, ignore, prefix) prefix##name,
255 enum timer_e { KMP_FOREACH_TIMER(ENUMERATE, TIMER_) TIMER_LAST };
256 
257 enum explicit_timer_e {
258  KMP_FOREACH_EXPLICIT_TIMER(ENUMERATE, EXPLICIT_TIMER_) EXPLICIT_TIMER_LAST
259 };
260 
261 enum counter_e { KMP_FOREACH_COUNTER(ENUMERATE, COUNTER_) COUNTER_LAST };
262 #undef ENUMERATE
263 
264 class timerPair {
265  explicit_timer_e timer_index;
266  timer_e timer;
267 
268 public:
269  timerPair(explicit_timer_e ti, timer_e t) : timer_index(ti), timer(t) {}
270  inline explicit_timer_e get_index() const { return timer_index; }
271  inline timer_e get_timer() const { return timer; }
272  bool operator==(const timerPair &rhs) {
273  return this->get_index() == rhs.get_index();
274  }
275  bool operator!=(const timerPair &rhs) { return !(*this == rhs); }
276 };
277 
278 class statistic {
279  double minVal;
280  double maxVal;
281  double meanVal;
282  double m2;
283  uint64_t sampleCount;
284 
285 public:
286  statistic() { reset(); }
287  statistic(statistic const &o)
288  : minVal(o.minVal), maxVal(o.maxVal), meanVal(o.meanVal), m2(o.m2),
289  sampleCount(o.sampleCount) {}
290 
291  double getMin() const { return minVal; }
292  double getMean() const { return meanVal; }
293  double getMax() const { return maxVal; }
294  uint64_t getCount() const { return sampleCount; }
295  double getSD() const { return sqrt(m2 / sampleCount); }
296  double getTotal() const { return sampleCount * meanVal; }
297 
298  void reset() {
299  minVal = std::numeric_limits<double>::max();
300  maxVal = -std::numeric_limits<double>::max();
301  meanVal = 0.0;
302  m2 = 0.0;
303  sampleCount = 0;
304  }
305  void addSample(double sample);
306  void scale(double factor);
307  void scaleDown(double f) { scale(1. / f); }
308  statistic &operator+=(statistic const &other);
309 
310  std::string format(char unit, bool total = false) const;
311 };
312 
313 struct statInfo {
314  const char *name;
315  uint32_t flags;
316 };
317 
318 class timeStat : public statistic {
319  static statInfo timerInfo[];
320 
321 public:
322  timeStat() : statistic() {}
323  static const char *name(timer_e e) { return timerInfo[e].name; }
324  static bool noTotal(timer_e e) {
325  return timerInfo[e].flags & stats_flags_e::noTotal;
326  }
327  static bool masterOnly(timer_e e) {
328  return timerInfo[e].flags & stats_flags_e::onlyInMaster;
329  }
330  static bool workerOnly(timer_e e) {
331  return timerInfo[e].flags & stats_flags_e::notInMaster;
332  }
333  static bool noUnits(timer_e e) {
334  return timerInfo[e].flags & stats_flags_e::noUnits;
335  }
336  static bool logEvent(timer_e e) {
337  return timerInfo[e].flags & stats_flags_e::logEvent;
338  }
339  static void clearEventFlags() {
340  for (int i = 0; i < TIMER_LAST; i++) {
341  timerInfo[i].flags &= (~(stats_flags_e::logEvent));
342  }
343  }
344 };
345 
346 // Where we need explicitly to start and end the timer, this version can be used
347 // Since these timers normally aren't nicely scoped, so don't have a good place
348 // to live on the stack of the thread, they're more work to use.
349 class explicitTimer {
350  timeStat *stat;
351  tsc_tick_count startTime;
352  tsc_tick_count pauseStartTime;
353  tsc_tick_count::tsc_interval_t totalPauseTime;
354 
355 public:
356  explicitTimer()
357  : stat(0), startTime(0), pauseStartTime(0), totalPauseTime() {}
358  explicitTimer(timeStat *s)
359  : stat(s), startTime(), pauseStartTime(0), totalPauseTime() {}
360 
361  void setStat(timeStat *s) { stat = s; }
362  void start(timer_e timerEnumValue);
363  void pause() { pauseStartTime = tsc_tick_count::now(); }
364  void resume() { totalPauseTime += (tsc_tick_count::now() - pauseStartTime); }
365  void stop(timer_e timerEnumValue, kmp_stats_list *stats_ptr = nullptr);
366  void reset() {
367  startTime = 0;
368  pauseStartTime = 0;
369  totalPauseTime = 0;
370  }
371 };
372 
373 // Where all you need is to time a block, this is enough.
374 // (It avoids the need to have an explicit end, leaving the scope suffices.)
375 class blockTimer : public explicitTimer {
376  timer_e timerEnumValue;
377 
378 public:
379  blockTimer(timeStat *s, timer_e newTimerEnumValue)
380  : timerEnumValue(newTimerEnumValue), explicitTimer(s) {
381  start(timerEnumValue);
382  }
383  ~blockTimer() { stop(timerEnumValue); }
384 };
385 
386 // Where you need to partition a threads clock ticks into separate states
387 // e.g., a partitionedTimers class with two timers of EXECUTING_TASK, and
388 // DOING_NOTHING would render these conditions:
389 // time(EXECUTING_TASK) + time(DOING_NOTHING) = total time thread is alive
390 // No clock tick in the EXECUTING_TASK is a member of DOING_NOTHING and vice
391 // versa
392 class partitionedTimers {
393 private:
394  explicitTimer *timers[EXPLICIT_TIMER_LAST + 1];
395  std::vector<timerPair> timer_stack;
396 
397 public:
398  partitionedTimers();
399  void add_timer(explicit_timer_e timer_index, explicitTimer *timer_pointer);
400  void init(timerPair timer_index);
401  void push(timerPair timer_index);
402  void pop();
403  void windup();
404 };
405 
406 // Special wrapper around the partioned timers to aid timing code blocks
407 // It avoids the need to have an explicit end, leaving the scope suffices.
408 class blockPartitionedTimer {
409  partitionedTimers *part_timers;
410  timerPair timer_pair;
411 
412 public:
413  blockPartitionedTimer(partitionedTimers *pt, timerPair tp)
414  : part_timers(pt), timer_pair(tp) {
415  part_timers->push(timer_pair);
416  }
417  ~blockPartitionedTimer() { part_timers->pop(); }
418 };
419 
420 // Special wrapper around the thread state to aid in keeping state in code
421 // blocks It avoids the need to have an explicit end, leaving the scope
422 // suffices.
423 class blockThreadState {
424  stats_state_e *state_pointer;
425  stats_state_e old_state;
426 
427 public:
428  blockThreadState(stats_state_e *thread_state_pointer, stats_state_e new_state)
429  : state_pointer(thread_state_pointer), old_state(*thread_state_pointer) {
430  *state_pointer = new_state;
431  }
432  ~blockThreadState() { *state_pointer = old_state; }
433 };
434 
435 // If all you want is a count, then you can use this...
436 // The individual per-thread counts will be aggregated into a statistic at
437 // program exit.
438 class counter {
439  uint64_t value;
440  static const statInfo counterInfo[];
441 
442 public:
443  counter() : value(0) {}
444  void increment() { value++; }
445  uint64_t getValue() const { return value; }
446  void reset() { value = 0; }
447  static const char *name(counter_e e) { return counterInfo[e].name; }
448  static bool masterOnly(counter_e e) {
449  return counterInfo[e].flags & stats_flags_e::onlyInMaster;
450  }
451 };
452 
453 /* ****************************************************************
454  Class to implement an event
455 
456  There are four components to an event: start time, stop time
457  nest_level, and timer_name.
458  The start and stop time should be obvious (recorded in clock ticks).
459  The nest_level relates to the bar width in the timeline graph.
460  The timer_name is used to determine which timer event triggered this event.
461 
462  the interface to this class is through four read-only operations:
463  1) getStart() -- returns the start time as 64 bit integer
464  2) getStop() -- returns the stop time as 64 bit integer
465  3) getNestLevel() -- returns the nest level of the event
466  4) getTimerName() -- returns the timer name that triggered event
467 
468  *MORE ON NEST_LEVEL*
469  The nest level is used in the bar graph that represents the timeline.
470  Its main purpose is for showing how events are nested inside eachother.
471  For example, say events, A, B, and C are recorded. If the timeline
472  looks like this:
473 
474 Begin -------------------------------------------------------------> Time
475  | | | | | |
476  A B C C B A
477  start start start end end end
478 
479  Then A, B, C will have a nest level of 1, 2, 3 respectively.
480  These values are then used to calculate the barwidth so you can
481  see that inside A, B has occurred, and inside B, C has occurred.
482  Currently, this is shown with A's bar width being larger than B's
483  bar width, and B's bar width being larger than C's bar width.
484 
485 **************************************************************** */
486 class kmp_stats_event {
487  uint64_t start;
488  uint64_t stop;
489  int nest_level;
490  timer_e timer_name;
491 
492 public:
493  kmp_stats_event()
494  : start(0), stop(0), nest_level(0), timer_name(TIMER_LAST) {}
495  kmp_stats_event(uint64_t strt, uint64_t stp, int nst, timer_e nme)
496  : start(strt), stop(stp), nest_level(nst), timer_name(nme) {}
497  inline uint64_t getStart() const { return start; }
498  inline uint64_t getStop() const { return stop; }
499  inline int getNestLevel() const { return nest_level; }
500  inline timer_e getTimerName() const { return timer_name; }
501 };
502 
503 /* ****************************************************************
504  Class to implement a dynamically expandable array of events
505 
506  ---------------------------------------------------------
507  | event 1 | event 2 | event 3 | event 4 | ... | event N |
508  ---------------------------------------------------------
509 
510  An event is pushed onto the back of this array at every
511  explicitTimer->stop() call. The event records the thread #,
512  start time, stop time, and nest level related to the bar width.
513 
514  The event vector starts at size INIT_SIZE and grows (doubles in size)
515  if needed. An implication of this behavior is that log(N)
516  reallocations are needed (where N is number of events). If you want
517  to avoid reallocations, then set INIT_SIZE to a large value.
518 
519  the interface to this class is through six operations:
520  1) reset() -- sets the internal_size back to 0 but does not deallocate any
521  memory
522  2) size() -- returns the number of valid elements in the vector
523  3) push_back(start, stop, nest, timer_name) -- pushes an event onto
524  the back of the array
525  4) deallocate() -- frees all memory associated with the vector
526  5) sort() -- sorts the vector by start time
527  6) operator[index] or at(index) -- returns event reference at that index
528 **************************************************************** */
529 class kmp_stats_event_vector {
530  kmp_stats_event *events;
531  int internal_size;
532  int allocated_size;
533  static const int INIT_SIZE = 1024;
534 
535 public:
536  kmp_stats_event_vector() {
537  events =
538  (kmp_stats_event *)__kmp_allocate(sizeof(kmp_stats_event) * INIT_SIZE);
539  internal_size = 0;
540  allocated_size = INIT_SIZE;
541  }
542  ~kmp_stats_event_vector() {}
543  inline void reset() { internal_size = 0; }
544  inline int size() const { return internal_size; }
545  void push_back(uint64_t start_time, uint64_t stop_time, int nest_level,
546  timer_e name) {
547  int i;
548  if (internal_size == allocated_size) {
549  kmp_stats_event *tmp = (kmp_stats_event *)__kmp_allocate(
550  sizeof(kmp_stats_event) * allocated_size * 2);
551  for (i = 0; i < internal_size; i++)
552  tmp[i] = events[i];
553  __kmp_free(events);
554  events = tmp;
555  allocated_size *= 2;
556  }
557  events[internal_size] =
558  kmp_stats_event(start_time, stop_time, nest_level, name);
559  internal_size++;
560  return;
561  }
562  void deallocate();
563  void sort();
564  const kmp_stats_event &operator[](int index) const { return events[index]; }
565  kmp_stats_event &operator[](int index) { return events[index]; }
566  const kmp_stats_event &at(int index) const { return events[index]; }
567  kmp_stats_event &at(int index) { return events[index]; }
568 };
569 
570 /* ****************************************************************
571  Class to implement a doubly-linked, circular, statistics list
572 
573  |---| ---> |---| ---> |---| ---> |---| ---> ... next
574  | | | | | | | |
575  |---| <--- |---| <--- |---| <--- |---| <--- ... prev
576  Sentinel first second third
577  Node node node node
578 
579  The Sentinel Node is the user handle on the list.
580  The first node corresponds to thread 0's statistics.
581  The second node corresponds to thread 1's statistics and so on...
582 
583  Each node has a _timers, _counters, and _explicitTimers array to hold that
584  thread's statistics. The _explicitTimers point to the correct _timer and
585  update its statistics at every stop() call. The explicitTimers' pointers are
586  set up in the constructor. Each node also has an event vector to hold that
587  thread's timing events. The event vector expands as necessary and records
588  the start-stop times for each timer.
589 
590  The nestLevel variable is for plotting events and is related
591  to the bar width in the timeline graph.
592 
593  Every thread will have a __thread local pointer to its node in
594  the list. The sentinel node is used by the master thread to
595  store "dummy" statistics before __kmp_create_worker() is called.
596 **************************************************************** */
597 class kmp_stats_list {
598  int gtid;
599  timeStat _timers[TIMER_LAST + 1];
600  counter _counters[COUNTER_LAST + 1];
601  explicitTimer _explicitTimers[EXPLICIT_TIMER_LAST + 1];
602  partitionedTimers _partitionedTimers;
603  int _nestLevel; // one per thread
604  kmp_stats_event_vector _event_vector;
605  kmp_stats_list *next;
606  kmp_stats_list *prev;
607  stats_state_e state;
608  int thread_is_idle_flag;
609 
610 public:
611  kmp_stats_list()
612  : _nestLevel(0), _event_vector(), next(this), prev(this), state(IDLE),
613  thread_is_idle_flag(0) {
614 #define doInit(name, ignore1, ignore2) \
615  getExplicitTimer(EXPLICIT_TIMER_##name)->setStat(getTimer(TIMER_##name)); \
616  _partitionedTimers.add_timer(EXPLICIT_TIMER_##name, \
617  getExplicitTimer(EXPLICIT_TIMER_##name));
618  KMP_FOREACH_EXPLICIT_TIMER(doInit, 0);
619 #undef doInit
620  }
621  ~kmp_stats_list() {}
622  inline timeStat *getTimer(timer_e idx) { return &_timers[idx]; }
623  inline counter *getCounter(counter_e idx) { return &_counters[idx]; }
624  inline explicitTimer *getExplicitTimer(explicit_timer_e idx) {
625  return &_explicitTimers[idx];
626  }
627  inline partitionedTimers *getPartitionedTimers() {
628  return &_partitionedTimers;
629  }
630  inline timeStat *getTimers() { return _timers; }
631  inline counter *getCounters() { return _counters; }
632  inline explicitTimer *getExplicitTimers() { return _explicitTimers; }
633  inline kmp_stats_event_vector &getEventVector() { return _event_vector; }
634  inline void resetEventVector() { _event_vector.reset(); }
635  inline void incrementNestValue() { _nestLevel++; }
636  inline int getNestValue() { return _nestLevel; }
637  inline void decrementNestValue() { _nestLevel--; }
638  inline int getGtid() const { return gtid; }
639  inline void setGtid(int newgtid) { gtid = newgtid; }
640  inline void setState(stats_state_e newstate) { state = newstate; }
641  inline stats_state_e getState() const { return state; }
642  inline stats_state_e *getStatePointer() { return &state; }
643  inline bool isIdle() { return thread_is_idle_flag == 1; }
644  inline void setIdleFlag() { thread_is_idle_flag = 1; }
645  inline void resetIdleFlag() { thread_is_idle_flag = 0; }
646  kmp_stats_list *push_back(int gtid); // returns newly created list node
647  inline void push_event(uint64_t start_time, uint64_t stop_time,
648  int nest_level, timer_e name) {
649  _event_vector.push_back(start_time, stop_time, nest_level, name);
650  }
651  void deallocate();
652  class iterator;
653  kmp_stats_list::iterator begin();
654  kmp_stats_list::iterator end();
655  int size();
656  class iterator {
657  kmp_stats_list *ptr;
658  friend kmp_stats_list::iterator kmp_stats_list::begin();
659  friend kmp_stats_list::iterator kmp_stats_list::end();
660 
661  public:
662  iterator();
663  ~iterator();
664  iterator operator++();
665  iterator operator++(int dummy);
666  iterator operator--();
667  iterator operator--(int dummy);
668  bool operator!=(const iterator &rhs);
669  bool operator==(const iterator &rhs);
670  kmp_stats_list *operator*() const; // dereference operator
671  };
672 };
673 
674 /* ****************************************************************
675  Class to encapsulate all output functions and the environment variables
676 
677  This module holds filenames for various outputs (normal stats, events, plot
678  file), as well as coloring information for the plot file.
679 
680  The filenames and flags variables are read from environment variables.
681  These are read once by the constructor of the global variable
682  __kmp_stats_output which calls init().
683 
684  During this init() call, event flags for the timeStat::timerInfo[] global
685  array are cleared if KMP_STATS_EVENTS is not true (on, 1, yes).
686 
687  The only interface function that is public is outputStats(heading). This
688  function should print out everything it needs to, either to files or stderr,
689  depending on the environment variables described below
690 
691  ENVIRONMENT VARIABLES:
692  KMP_STATS_FILE -- if set, all statistics (not events) will be printed to this
693  file, otherwise, print to stderr
694  KMP_STATS_THREADS -- if set to "on", then will print per thread statistics to
695  either KMP_STATS_FILE or stderr
696  KMP_STATS_PLOT_FILE -- if set, print the ploticus plot file to this filename,
697  otherwise, the plot file is sent to "events.plt"
698  KMP_STATS_EVENTS -- if set to "on", then log events, otherwise, don't log
699  events
700  KMP_STATS_EVENTS_FILE -- if set, all events are outputted to this file,
701  otherwise, output is sent to "events.dat"
702 **************************************************************** */
703 class kmp_stats_output_module {
704 
705 public:
706  struct rgb_color {
707  float r;
708  float g;
709  float b;
710  };
711 
712 private:
713  std::string outputFileName;
714  static const char *eventsFileName;
715  static const char *plotFileName;
716  static int printPerThreadFlag;
717  static int printPerThreadEventsFlag;
718  static const rgb_color globalColorArray[];
719  static rgb_color timerColorInfo[];
720 
721  void init();
722  static void setupEventColors();
723  static void printPloticusFile();
724  static void printHeaderInfo(FILE *statsOut);
725  static void printTimerStats(FILE *statsOut, statistic const *theStats,
726  statistic const *totalStats);
727  static void printCounterStats(FILE *statsOut, statistic const *theStats);
728  static void printCounters(FILE *statsOut, counter const *theCounters);
729  static void printEvents(FILE *eventsOut, kmp_stats_event_vector *theEvents,
730  int gtid);
731  static rgb_color getEventColor(timer_e e) { return timerColorInfo[e]; }
732  static void windupExplicitTimers();
733  bool eventPrintingEnabled() const { return printPerThreadEventsFlag; }
734 
735 public:
736  kmp_stats_output_module() { init(); }
737  void outputStats(const char *heading);
738 };
739 
740 #ifdef __cplusplus
741 extern "C" {
742 #endif
743 void __kmp_stats_init();
744 void __kmp_stats_fini();
745 void __kmp_reset_stats();
746 void __kmp_output_stats(const char *);
747 void __kmp_accumulate_stats_at_exit(void);
748 // thread local pointer to stats node within list
749 extern __thread kmp_stats_list *__kmp_stats_thread_ptr;
750 // head to stats list.
751 extern kmp_stats_list *__kmp_stats_list;
752 // lock for __kmp_stats_list
753 extern kmp_tas_lock_t __kmp_stats_lock;
754 // reference start time
755 extern tsc_tick_count __kmp_stats_start_time;
756 // interface to output
757 extern kmp_stats_output_module __kmp_stats_output;
758 
759 #ifdef __cplusplus
760 }
761 #endif
762 
763 // Simple, standard interfaces that drop out completely if stats aren't enabled
764 
777 #define KMP_TIME_BLOCK(name) \
778  blockTimer __BLOCKTIME__(__kmp_stats_thread_ptr->getTimer(TIMER_##name), \
779  TIMER_##name)
780 
792 #define KMP_COUNT_VALUE(name, value) \
793  __kmp_stats_thread_ptr->getTimer(TIMER_##name)->addSample(value)
794 
805 #define KMP_COUNT_BLOCK(name) \
806  __kmp_stats_thread_ptr->getCounter(COUNTER_##name)->increment()
807 
823 #define KMP_START_EXPLICIT_TIMER(name) \
824  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
825  ->start(TIMER_##name)
826 
842 #define KMP_STOP_EXPLICIT_TIMER(name) \
843  __kmp_stats_thread_ptr->getExplicitTimer(EXPLICIT_TIMER_##name) \
844  ->stop(TIMER_##name)
845 
863 #define KMP_OUTPUT_STATS(heading_string) __kmp_output_stats(heading_string)
864 
872 #define KMP_INIT_PARTITIONED_TIMERS(name) \
873  __kmp_stats_thread_ptr->getPartitionedTimers()->init( \
874  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
875 
876 #define KMP_TIME_PARTITIONED_BLOCK(name) \
877  blockPartitionedTimer __PBLOCKTIME__( \
878  __kmp_stats_thread_ptr->getPartitionedTimers(), \
879  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
880 
881 #define KMP_PUSH_PARTITIONED_TIMER(name) \
882  __kmp_stats_thread_ptr->getPartitionedTimers()->push( \
883  timerPair(EXPLICIT_TIMER_##name, TIMER_##name))
884 
885 #define KMP_POP_PARTITIONED_TIMER() \
886  __kmp_stats_thread_ptr->getPartitionedTimers()->pop()
887 
888 #define KMP_SET_THREAD_STATE(state_name) \
889  __kmp_stats_thread_ptr->setState(state_name)
890 
891 #define KMP_GET_THREAD_STATE() __kmp_stats_thread_ptr->getState()
892 
893 #define KMP_SET_THREAD_STATE_BLOCK(state_name) \
894  blockThreadState __BTHREADSTATE__(__kmp_stats_thread_ptr->getStatePointer(), \
895  state_name)
896 
904 #define KMP_RESET_STATS() __kmp_reset_stats()
905 
906 #if (KMP_DEVELOPER_STATS)
907 #define KMP_TIME_DEVELOPER_BLOCK(n) KMP_TIME_BLOCK(n)
908 #define KMP_COUNT_DEVELOPER_VALUE(n, v) KMP_COUNT_VALUE(n, v)
909 #define KMP_COUNT_DEVELOPER_BLOCK(n) KMP_COUNT_BLOCK(n)
910 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) KMP_START_EXPLICIT_TIMER(n)
911 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) KMP_STOP_EXPLICIT_TIMER(n)
912 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) KMP_TIME_PARTITIONED_BLOCK(n)
913 #else
914 // Null definitions
915 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
916 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
917 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
918 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
919 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
920 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
921 #endif
922 
923 #else // KMP_STATS_ENABLED
924 
925 // Null definitions
926 #define KMP_TIME_BLOCK(n) ((void)0)
927 #define KMP_COUNT_VALUE(n, v) ((void)0)
928 #define KMP_COUNT_BLOCK(n) ((void)0)
929 #define KMP_START_EXPLICIT_TIMER(n) ((void)0)
930 #define KMP_STOP_EXPLICIT_TIMER(n) ((void)0)
931 
932 #define KMP_OUTPUT_STATS(heading_string) ((void)0)
933 #define KMP_RESET_STATS() ((void)0)
934 
935 #define KMP_TIME_DEVELOPER_BLOCK(n) ((void)0)
936 #define KMP_COUNT_DEVELOPER_VALUE(n, v) ((void)0)
937 #define KMP_COUNT_DEVELOPER_BLOCK(n) ((void)0)
938 #define KMP_START_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
939 #define KMP_STOP_DEVELOPER_EXPLICIT_TIMER(n) ((void)0)
940 #define KMP_INIT_PARTITIONED_TIMERS(name) ((void)0)
941 #define KMP_TIME_PARTITIONED_BLOCK(name) ((void)0)
942 #define KMP_TIME_DEVELOPER_PARTITIONED_BLOCK(n) ((void)0)
943 #define KMP_PUSH_PARTITIONED_TIMER(name) ((void)0)
944 #define KMP_POP_PARTITIONED_TIMER() ((void)0)
945 #define KMP_SET_THREAD_STATE(state_name) ((void)0)
946 #define KMP_GET_THREAD_STATE() ((void)0)
947 #define KMP_SET_THREAD_STATE_BLOCK(state_name) ((void)0)
948 #endif // KMP_STATS_ENABLED
949 
950 #endif // KMP_STATS_H
statistic is valid only for master
Definition: kmp_stats.h:50
statistic is valid only for non-master threads
Definition: kmp_stats.h:53
do not show a TOTAL_aggregation for this statistic
Definition: kmp_stats.h:49
#define KMP_FOREACH_EXPLICIT_TIMER(macro, arg)
Add new explicit timers under KMP_FOREACH_EXPLICIT_TIMER() macro.
Definition: kmp_stats.h:252
statistic doesn&#39;t need units printed next to it in output
Definition: kmp_stats.h:51
stats_flags_e
flags to describe the statistic (timer or counter)
Definition: kmp_stats.h:48
#define KMP_FOREACH_COUNTER(macro, arg)
Add new counters under KMP_FOREACH_COUNTER() macro in kmp_stats.h.
Definition: kmp_stats.h:94
stats_state_e
the states which a thread can be in
Definition: kmp_stats.h:63