Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_TimeMonitor.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_TimeMonitor.hpp"
43 #include "Teuchos_CommHelpers.hpp"
44 #include "Teuchos_DefaultComm.hpp"
45 #include "Teuchos_TableColumn.hpp"
46 #include "Teuchos_TableFormat.hpp"
47 #include "Teuchos_StandardParameterEntryValidators.hpp"
48 #include "Teuchos_ScalarTraits.hpp"
49 #include <functional>
50 
51 
52 namespace Teuchos {
105  template<class Ordinal, class ScalarType, class IndexType>
106  class MaxLoc :
107  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
108  public:
109  void
110  reduce (const Ordinal count,
111  const std::pair<ScalarType, IndexType> inBuffer[],
112  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
113  };
114 
115  template<class Ordinal>
116  class MaxLoc<Ordinal, double, int> :
117  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
118  public:
119  void
120  reduce (const Ordinal count,
121  const std::pair<double, int> inBuffer[],
122  std::pair<double, int> inoutBuffer[]) const
123  {
124  for (Ordinal ind = 0; ind < count; ++ind) {
125  const std::pair<double, int>& in = inBuffer[ind];
126  std::pair<double, int>& inout = inoutBuffer[ind];
127 
128  if (in.first > inout.first) {
129  inout.first = in.first;
130  inout.second = in.second;
131  } else if (in.first < inout.first) {
132  // Don't need to do anything; inout has the values.
133  } else { // equal, or at least one is NaN.
134  inout.first = in.first;
135  inout.second = std::min (in.second, inout.second);
136  }
137  }
138  }
139  };
140 
167  template<class Ordinal, class ScalarType, class IndexType>
168  class MinLoc :
169  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
170  public:
171  void
172  reduce (const Ordinal count,
173  const std::pair<ScalarType, IndexType> inBuffer[],
174  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
175  };
176 
177  template<class Ordinal>
178  class MinLoc<Ordinal, double, int> :
179  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
180  public:
181  void
182  reduce (const Ordinal count,
183  const std::pair<double, int> inBuffer[],
184  std::pair<double, int> inoutBuffer[]) const
185  {
186  for (Ordinal ind = 0; ind < count; ++ind) {
187  const std::pair<double, int>& in = inBuffer[ind];
188  std::pair<double, int>& inout = inoutBuffer[ind];
189 
190  if (in.first < inout.first) {
191  inout.first = in.first;
192  inout.second = in.second;
193  } else if (in.first > inout.first) {
194  // Don't need to do anything; inout has the values.
195  } else { // equal, or at least one is NaN.
196  inout.first = in.first;
197  inout.second = std::min (in.second, inout.second);
198  }
199  }
200  }
201  };
202 
206  template<class Ordinal, class ScalarType, class IndexType>
208  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
209  public:
210  void
211  reduce (const Ordinal count,
212  const std::pair<ScalarType, IndexType> inBuffer[],
213  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
214  };
215 
216  template<class Ordinal>
217  class MinLocNonzero<Ordinal, double, int> :
218  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
219  public:
220  void
221  reduce (const Ordinal count,
222  const std::pair<double, int> inBuffer[],
223  std::pair<double, int> inoutBuffer[]) const
224  {
225  for (Ordinal ind = 0; ind < count; ++ind) {
226  const std::pair<double, int>& in = inBuffer[ind];
227  std::pair<double, int>& inout = inoutBuffer[ind];
228 
229  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
230  inout.first = in.first;
231  inout.second = in.second;
232  } else if (in.first > inout.first) {
233  // Don't need to do anything; inout has the values.
234  } else { // equal, or at least one is NaN.
235  inout.first = in.first;
236  inout.second = std::min (in.second, inout.second);
237  }
238  }
239  }
240  };
241 
242  // Typedef used internally by TimeMonitor::summarize() and its
243  // helper functions. The map is keyed on timer label (a string).
244  // Each value is a pair: (total number of seconds over all calls to
245  // that timer, total number of calls to that timer).
246  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
247 
248  TimeMonitor::TimeMonitor (Time& timer, bool reset)
249  : PerformanceMonitorBase<Time>(timer, reset)
250  {
251  if (!isRecursiveCall()) counter().start(reset);
252  }
253 
255  if (!isRecursiveCall()) counter().stop();
256  }
257 
258  void
259  TimeMonitor::disableTimer (const std::string& name)
260  {
261  RCP<Time> timer = lookupCounter (name);
263  timer == null, std::invalid_argument,
264  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
265  timer->disable ();
266  }
267 
268  void
269  TimeMonitor::enableTimer (const std::string& name)
270  {
271  RCP<Time> timer = lookupCounter (name);
273  timer == null, std::invalid_argument,
274  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
275  timer->enable ();
276  }
277 
278  void
280  {
281  typedef std::map<std::string, RCP<Time> > map_type;
282  typedef map_type::iterator iter_type;
283  map_type& ctrs = counters ();
284 
285  // In debug mode, loop first to check whether any of the timers
286  // are running, before resetting them. This ensures that this
287  // method satisfies the strong exception guarantee (either it
288  // completes normally, or there are no side effects).
289 #ifdef TEUCHOS_DEBUG
290  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
291  // We throw a runtime_error rather than a logic_error, because
292  // logic_error suggests a bug in the implementation of
293  // TimeMonitor. Calling zeroOutTimers() when a timer is running
294  // is not TimeMonitor's fault.
296  it->second->isRunning (), std::runtime_error,
297  "Timer \"" << it->second->name () << "\" is currently running. "
298  "You are not allowed to reset running timers.");
299  }
300 #endif // TEUCHOS_DEBUG
301 
302  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
303  it->second->reset ();
304  }
305  }
306 
307  // An anonymous namespace is the standard way of limiting linkage of
308  // its contained routines to file scope.
309  namespace {
310  // \brief Return an "empty" local timer datum.
311  //
312  // "Empty" means the datum has zero elapsed time and zero call
313  // count. This function does not actually create a timer.
314  //
315  // \param name The timer's name.
316  std::pair<std::string, std::pair<double, int> >
317  makeEmptyTimerDatum (const std::string& name)
318  {
319  return std::make_pair (name, std::make_pair (double(0), int(0)));
320  }
321 
322  // \fn collectLocalTimerData
323  // \brief Collect and sort local timer data by timer names.
324  //
325  // \param localData [out] Map whose keys are the timer names, and
326  // whose value for each key is the total elapsed time (in
327  // seconds) and the call count for the timer with that name.
328  //
329  // \param localCounters [in] Timers from which to extract data.
330  //
331  // \param filter [in] Filter for timer labels. If filter is not
332  // empty, this method will only collect data for local timers
333  // whose labels begin with this string.
334  //
335  // Extract the total elapsed time and call count from each timer
336  // in the given array. Merge results for timers with duplicate
337  // labels, by summing their total elapsed times and call counts
338  // pairwise.
339  void
340  collectLocalTimerData (timer_map_t& localData,
341  const std::map<std::string, RCP<Time> >& localCounters,
342  const std::string& filter="")
343  {
344  using std::make_pair;
345  typedef timer_map_t::iterator iter_t;
346 
347  timer_map_t theLocalData;
348  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
349  it != localCounters.end(); ++it) {
350  const std::string& name = it->second->name ();
351 
352  // Filter current timer name, if provided filter is nonempty.
353  // Filter string must _start_ the timer label, not just be in it.
354  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
355  if (! skipThisOne) {
356  const double timing = it->second->totalElapsedTime ();
357  const int numCalls = it->second->numCalls ();
358 
359  // Merge timers with duplicate labels, by summing their
360  // total elapsed times and call counts.
361  iter_t loc = theLocalData.find (name);
362  if (loc == theLocalData.end()) {
363  // Use loc as an insertion location hint.
364  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
365  }
366  else {
367  loc->second.first += timing;
368  loc->second.second += numCalls;
369  }
370  }
371  }
372  // This avoids copying the map, and also makes this method
373  // satisfy the strong exception guarantee.
374  localData.swap (theLocalData);
375  }
376 
377  // \brief Locally filter out timer data with zero call counts.
378  //
379  // \param timerData [in/out]
380  void
381  filterZeroData (timer_map_t& timerData)
382  {
383  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
384  // iterator hint, instead of rebuilding the map completely.
385  timer_map_t newTimerData;
386  for (timer_map_t::const_iterator it = timerData.begin();
387  it != timerData.end(); ++it) {
388  if (it->second.second > 0) {
389  newTimerData[it->first] = it->second;
390  }
391  }
392  timerData.swap (newTimerData);
393  }
394 
416  void
417  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
418  Array<std::string>& localTimerNames,
419  const std::map<std::string, RCP<Time> >& localTimers,
420  const bool writeZeroTimers,
421  const std::string& filter="")
422  {
423  // Collect and sort local timer data by timer names.
424  collectLocalTimerData (localTimerData, localTimers, filter);
425 
426  // Filter out zero data locally first. This ensures that if we
427  // are writing global stats, and if a timer name exists in the
428  // set of global names, then that timer has a nonzero call count
429  // on at least one MPI process.
430  if (! writeZeroTimers) {
431  filterZeroData (localTimerData);
432  }
433 
434  // Extract the set of local timer names. The std::map keeps
435  // them sorted alphabetically.
436  localTimerNames.reserve (localTimerData.size());
437  for (timer_map_t::const_iterator it = localTimerData.begin();
438  it != localTimerData.end(); ++it) {
439  localTimerNames.push_back (it->first);
440  }
441  }
442 
477  void
478  collectGlobalTimerData (timer_map_t& globalTimerData,
479  Array<std::string>& globalTimerNames,
480  timer_map_t& localTimerData,
481  Array<std::string>& localTimerNames,
482  Ptr<const Comm<int> > comm,
483  const bool alwaysWriteLocal,
484  const ECounterSetOp setOp)
485  {
486  // There may be some global timers that are not local timers on
487  // the calling MPI process(es). In that case, if
488  // alwaysWriteLocal is true, then we need to fill in the
489  // "missing" local timers. That will ensure that both global
490  // and local timer columns in the output table have the same
491  // number of rows. The collectLocalTimerDataAndNames() method
492  // may have already filtered out local timers with zero call
493  // counts (if its writeZeroTimers argument was false), but we
494  // won't be filtering again. Thus, any local timer data we
495  // insert here won't get filtered out.
496  //
497  // Note that calling summarize() with writeZeroTimers == false
498  // will still do what it says, even if we insert local timers
499  // with zero call counts here.
500 
501  // This does the correct and inexpensive thing (just copies the
502  // timer data) if numProcs == 1. Otherwise, it initiates a
503  // communication with \f$O(\log P)\f$ messages along the
504  // critical path, where \f$P\f$ is the number of participating
505  // processes.
506  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
507 
508 #ifdef TEUCHOS_DEBUG
509  {
510  // Sanity check that all processes have the name number of
511  // global timer names.
512  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
513  timer_map_t::size_type minNumGlobalNames = 0;
514  timer_map_t::size_type maxNumGlobalNames = 0;
515  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
516  outArg (minNumGlobalNames));
517  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
518  outArg (maxNumGlobalNames));
519  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
520  std::logic_error, "Min # global timer names = " << minNumGlobalNames
521  << " != max # global timer names = " << maxNumGlobalNames
522  << ". Please report this bug to the Teuchos developers.");
523  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
524  std::logic_error, "My # global timer names = " << myNumGlobalNames
525  << " != min # global timer names = " << minNumGlobalNames
526  << ". Please report this bug to the Teuchos developers.");
527  }
528 #endif // TEUCHOS_DEBUG
529 
530  // mergeCounterNames() just merges the counters' names, not
531  // their actual data. Now we need to fill globalTimerData with
532  // this process' timer data for the timers in globalTimerNames.
533  //
534  // All processes need the full list of global timers, since
535  // there may be some global timers that are not local timers.
536  // That's why mergeCounterNames() has to be an all-reduce, not
537  // just a reduction to Proc 0.
538  //
539  // Insertion optimization: if the iterator given to map::insert
540  // points right before where we want to insert, insertion is
541  // O(1). globalTimerNames is sorted, so feeding the iterator
542  // output of map::insert into the next invocation's input should
543  // make the whole insertion O(N) where N is the number of
544  // entries in globalTimerNames.
545  timer_map_t::iterator globalMapIter = globalTimerData.begin();
546  timer_map_t::iterator localMapIter;
547  for (Array<string>::const_iterator it = globalTimerNames.begin();
548  it != globalTimerNames.end(); ++it) {
549  const std::string& globalName = *it;
550  localMapIter = localTimerData.find (globalName);
551 
552  if (localMapIter == localTimerData.end()) {
553  if (alwaysWriteLocal) {
554  // If there are some global timers that are not local
555  // timers, and if we want to print local timers, we insert
556  // a local timer datum with zero elapsed time and zero
557  // call count into localTimerData as well. This will
558  // ensure that both global and local timer columns in the
559  // output table have the same number of rows.
560  //
561  // We really only need to do this on Proc 0, which is the
562  // only process that currently may print local timers.
563  // However, we do it on all processes, just in case
564  // someone later wants to modify this function to print
565  // out local timer data for some process other than Proc
566  // 0. This extra computation won't affect the cost along
567  // the critical path, for future computations in which
568  // Proc 0 participates.
569  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
570 
571  // Make sure the missing global name gets added to the
572  // list of local names. We'll re-sort the list of local
573  // names below.
574  localTimerNames.push_back (globalName);
575  }
576  // There's a global timer that's not a local timer. Add it
577  // to our pre-merge version of the global timer data so that
578  // we can safely merge the global timer data later.
579  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
580  }
581  else {
582  // We have this global timer name in our local timer list.
583  // Fill in our pre-merge version of the global timer data
584  // with our local data.
585  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
586  }
587  }
588 
589  if (alwaysWriteLocal) {
590  // Re-sort the list of local timer names, since we may have
591  // inserted "missing" names above.
592  std::sort (localTimerNames.begin(), localTimerNames.end());
593  }
594 
595 #ifdef TEUCHOS_DEBUG
596  {
597  // Sanity check that all processes have the name number of
598  // global timers.
599  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
600  timer_map_t::size_type minNumGlobalTimers = 0;
601  timer_map_t::size_type maxNumGlobalTimers = 0;
602  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
603  outArg (minNumGlobalTimers));
604  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
605  outArg (maxNumGlobalTimers));
606  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
607  std::logic_error, "Min # global timers = " << minNumGlobalTimers
608  << " != max # global timers = " << maxNumGlobalTimers
609  << ". Please report this bug to the Teuchos developers.");
610  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
611  std::logic_error, "My # global timers = " << myNumGlobalTimers
612  << " != min # global timers = " << minNumGlobalTimers
613  << ". Please report this bug to the Teuchos developers.");
614  }
615 #endif // TEUCHOS_DEBUG
616  }
617 
664  void
665  computeGlobalTimerStats (stat_map_type& statData,
666  std::vector<std::string>& statNames,
667  Ptr<const Comm<int> > comm,
668  const timer_map_t& globalTimerData,
669  const bool ignoreZeroTimers)
670  {
671  using Teuchos::ScalarTraits;
672 
673  const int numTimers = static_cast<int> (globalTimerData.size());
674  const int numProcs = comm->getSize();
675 
676  // Extract pre-reduction timings and call counts into a
677  // sequential array. This array will be in the same order as
678  // the global timer names are in the map.
679  Array<std::pair<double, int> > timingsAndCallCounts;
680  timingsAndCallCounts.reserve (numTimers);
681  for (timer_map_t::const_iterator it = globalTimerData.begin();
682  it != globalTimerData.end(); ++it) {
683  timingsAndCallCounts.push_back (it->second);
684  }
685 
686  // For each timer name, compute the min timing and its
687  // corresponding call count. If two processes have the same
688  // timing but different call counts, the minimum call count will
689  // be used.
690  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
691  if (numTimers > 0) {
692  if (ignoreZeroTimers)
693  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
694  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
695  else
696  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
697  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
698  }
699 
700  // For each timer name, compute the max timing and its
701  // corresponding call count. If two processes have the same
702  // timing but different call counts, the minimum call count will
703  // be used.
704  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
705  if (numTimers > 0) {
706  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
707  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
708  }
709 
710  // For each timer name, compute the mean-over-processes timing,
711  // the mean call count, and the mean-over-call-counts timing.
712  // The mean call count is reported as a double to allow a
713  // fractional value.
714  //
715  // Each local timing is really the total timing over all local
716  // invocations. The number of local invocations is the call
717  // count. Thus, the mean-over-call-counts timing is the sum of
718  // all the timings (over all processes), divided by the sum of
719  // all the call counts (over all processes). We compute it in a
720  // different way to over unnecessary overflow.
721  Array<double> meanOverCallCountsTimings (numTimers);
722  Array<double> meanOverProcsTimings (numTimers);
723  Array<double> meanCallCounts (numTimers);
724  Array<int> ICallThisTimer (numTimers);
725  Array<int> numProcsCallingEachTimer (numTimers);
726  {
727  // Figure out how many processors actually call each timer.
728  if (ignoreZeroTimers) {
729  for (int k = 0; k < numTimers; ++k) {
730  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
731  if (callCount > 0) ICallThisTimer[k] = 1;
732  else ICallThisTimer[k] = 0;
733  }
734  if (numTimers > 0) {
735  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
736  &numProcsCallingEachTimer[0]);
737  }
738  }
739 
740  // When summing, first scale by the number of processes. This
741  // avoids unnecessary overflow, and also gives us the mean
742  // call count automatically.
743  Array<double> scaledTimings (numTimers);
744  Array<double> scaledCallCounts (numTimers);
745  const double P = static_cast<double> (numProcs);
746 
747  if (ignoreZeroTimers) {
748  for (int k = 0; k < numTimers; ++k) {
749  const double timing = timingsAndCallCounts[k].first;
750  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
751 
752  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
753  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
754  }
755  }
756  else {
757  for (int k = 0; k < numTimers; ++k) {
758  const double timing = timingsAndCallCounts[k].first;
759  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
760 
761  scaledTimings[k] = timing / P;
762  scaledCallCounts[k] = callCount / P;
763  }
764  }
765 
766  if (numTimers > 0) {
767  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
768  &meanOverProcsTimings[0]);
769  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
770  &meanCallCounts[0]);
771  }
772  // We don't have to undo the scaling for the mean timings;
773  // just divide by the scaled call count.
774  for (int k = 0; k < numTimers; ++k) {
775  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
776  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
777  }
778  else {
779  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
780  }
781  }
782  }
783 
784  // Reformat the data into the map of statistics. Be sure that
785  // each value (the std::vector of (timing, call count) pairs,
786  // each entry of which is a different statistic) preserves the
787  // order of statNames.
788  statNames.resize (4);
789  statNames[0] = "MinOverProcs";
790  statNames[1] = "MeanOverProcs";
791  statNames[2] = "MaxOverProcs";
792  statNames[3] = "MeanOverCallCounts";
793 
794  stat_map_type::iterator statIter = statData.end();
795  timer_map_t::const_iterator it = globalTimerData.begin();
796  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
797  std::vector<std::pair<double, double> > curData (4);
798  curData[0] = minTimingsAndCallCounts[k];
799  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
800  curData[2] = maxTimingsAndCallCounts[k];
801  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
802 
803  // statIter gives an insertion location hint that makes each
804  // insertion O(1), since we remember the location of the last
805  // insertion.
806  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
807  }
808  }
809 
810 
828  getDefaultComm ()
829  {
830  // The default communicator. If Trilinos was built with MPI
831  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
832  // been initialized, it's not valid to use the communicator!)
833  // Otherwise, this should be a "serial" (no MPI, one "process")
834  // communicator.
836 
837 #ifdef HAVE_MPI
838  {
839  int mpiHasBeenStarted = 0;
840  MPI_Initialized (&mpiHasBeenStarted);
841  if (! mpiHasBeenStarted) {
842  // Make pComm a new "serial communicator."
843  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
844  }
845  }
846 #endif // HAVE_MPI
847  return comm;
848  }
849 
850  } // namespace (anonymous)
851 
852 
853  void
855  std::vector<std::string>& statNames,
856  Ptr<const Comm<int> > comm,
857  const ECounterSetOp setOp,
858  const std::string& filter)
859  {
860  // Collect local timer data and names. Filter out timers with
861  // zero call counts if writeZeroTimers is false. Also, apply the
862  // timer label filter at this point, so we don't have to compute
863  // statistics on timers we don't want to display anyway.
864  timer_map_t localTimerData;
865  Array<std::string> localTimerNames;
866  const bool writeZeroTimers = false;
867  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
868  counters(), writeZeroTimers, filter);
869  // Merge the local timer data and names into global timer data and
870  // names.
871  timer_map_t globalTimerData;
872  Array<std::string> globalTimerNames;
873  const bool alwaysWriteLocal = false;
874  collectGlobalTimerData (globalTimerData, globalTimerNames,
875  localTimerData, localTimerNames,
876  comm, alwaysWriteLocal, setOp);
877  // Compute statistics on the data.
878  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
879  }
880 
881 
882  void
884  std::ostream& out,
885  const bool alwaysWriteLocal,
886  const bool writeGlobalStats,
887  const bool writeZeroTimers,
888  const ECounterSetOp setOp,
889  const std::string& filter,
890  const bool ignoreZeroTimers)
891  {
892  //
893  // We can't just call computeGlobalTimerStatistics(), since
894  // summarize() has different options that affect whether global
895  // statistics are computed and printed.
896  //
897  const int numProcs = comm->getSize();
898  const int myRank = comm->getRank();
899 
900  // Collect local timer data and names. Filter out timers with
901  // zero call counts if writeZeroTimers is false. Also, apply the
902  // timer label filter at this point, so we don't have to compute
903  // statistics on timers we don't want to display anyway.
904  timer_map_t localTimerData;
905  Array<std::string> localTimerNames;
906  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
907  counters(), writeZeroTimers, filter);
908 
909  // If we're computing global statistics, merge the local timer
910  // data and names into global timer data and names, and compute
911  // global timer statistics. Otherwise, leave the global data
912  // empty.
913  timer_map_t globalTimerData;
914  Array<std::string> globalTimerNames;
915  stat_map_type statData;
916  std::vector<std::string> statNames;
917  if (writeGlobalStats) {
918  collectGlobalTimerData (globalTimerData, globalTimerNames,
919  localTimerData, localTimerNames,
920  comm, alwaysWriteLocal, setOp);
921  // Compute statistics on the data, but only if the communicator
922  // contains more than one process. Otherwise, statistics don't
923  // make sense and we don't print them (see below).
924  if (numProcs > 1) {
925  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
926  }
927  }
928 
929  // Precision of floating-point numbers in the table.
930  const int precision = format().precision();
931 
932  // All columns of the table, in order.
933  Array<TableColumn> tableColumns;
934 
935  // Labels of all the columns of the table.
936  // We will append to this when we add each column.
937  Array<std::string> titles;
938 
939  // Widths (in number of characters) of each column.
940  // We will append to this when we add each column.
941  Array<int> columnWidths;
942 
943  // Table column containing all timer names. If writeGlobalStats
944  // is true, we use the global timer names, otherwise we use the
945  // local timer names. We build the table on all processes
946  // redundantly, but only print on Rank 0.
947  {
948  titles.append ("Timer Name");
949 
950  // The column labels depend on whether we are computing global statistics.
951  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
952  tableColumns.append (nameCol);
953 
954  // Each column is as wide as it needs to be to hold both its
955  // title and all of the column data. This column's title is the
956  // current last entry of the titles array.
957  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
958  }
959 
960  // Table column containing local timer stats, if applicable. We
961  // only write local stats if asked, only on MPI Proc 0, and only
962  // if there is more than one MPI process in the communicator
963  // (otherwise local stats == global stats, so we just print the
964  // global stats). In this case, we've padded the local data on
965  // Proc 0 if necessary to match the global timer list, so that the
966  // columns have the same number of rows.
967  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
968  titles.append ("Local time (num calls)");
969 
970  // Copy local timer data out of the array-of-structs into
971  // separate arrays, for display in the table.
972  Array<double> localTimings;
973  Array<double> localNumCalls;
974  for (timer_map_t::const_iterator it = localTimerData.begin();
975  it != localTimerData.end(); ++it) {
976  localTimings.push_back (it->second.first);
977  localNumCalls.push_back (static_cast<double> (it->second.second));
978  }
979  TableColumn timeAndCalls (localTimings, localNumCalls, precision, true);
980  tableColumns.append (timeAndCalls);
981  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
982  }
983 
984  if (writeGlobalStats) {
985  // If there's only 1 process in the communicator, don't display
986  // statistics; statistics don't make sense in that case. Just
987  // display the timings and call counts. If there's more than 1
988  // process, do display statistics.
989  if (numProcs == 1) {
990  // Extract timings and the call counts from globalTimerData.
991  Array<double> globalTimings;
992  Array<double> globalNumCalls;
993  for (timer_map_t::const_iterator it = globalTimerData.begin();
994  it != globalTimerData.end(); ++it) {
995  globalTimings.push_back (it->second.first);
996  globalNumCalls.push_back (static_cast<double> (it->second.second));
997  }
998  // Print the table column.
999  titles.append ("Global time (num calls)");
1000  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, true);
1001  tableColumns.append (timeAndCalls);
1002  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1003  }
1004  else { // numProcs > 1
1005  // Print a table column for each statistic. statNames and
1006  // each value in statData use the same ordering, so we can
1007  // iterate over valid indices of statNames to display the
1008  // statistics in the right order.
1009  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1010  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1011  // Extract lists of timings and their call counts for the
1012  // current statistic.
1013  Array<double> statTimings (numGlobalTimers);
1014  Array<double> statCallCounts (numGlobalTimers);
1015  stat_map_type::const_iterator it = statData.begin();
1016  for (int k = 0; it != statData.end(); ++it, ++k) {
1017  statTimings[k] = (it->second[statInd]).first;
1018  statCallCounts[k] = (it->second[statInd]).second;
1019  }
1020  // Print the table column.
1021  const std::string& statisticName = statNames[statInd];
1022  const std::string titleString = statisticName;
1023  titles.append (titleString);
1024  TableColumn timeAndCalls (statTimings, statCallCounts, precision, true);
1025  tableColumns.append (timeAndCalls);
1026  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1027  }
1028  }
1029  }
1030 
1031  // Print the whole table to the given output stream on MPI Rank 0.
1032  format().setColumnWidths (columnWidths);
1033  if (myRank == 0) {
1034  std::ostringstream theTitle;
1035  theTitle << "TimeMonitor results over " << numProcs << " processor"
1036  << (numProcs > 1 ? "s" : "");
1037  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1038  }
1039  }
1040 
1041  void
1042  TimeMonitor::summarize (std::ostream &out,
1043  const bool alwaysWriteLocal,
1044  const bool writeGlobalStats,
1045  const bool writeZeroTimers,
1046  const ECounterSetOp setOp,
1047  const std::string& filter,
1048  const bool ignoreZeroTimers)
1049  {
1050  // The default communicator. If Trilinos was built with MPI
1051  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1052  // be a "serial" (no MPI, one "process") communicator.
1053  RCP<const Comm<int> > comm = getDefaultComm();
1054 
1055  summarize (comm.ptr(), out, alwaysWriteLocal,
1056  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1057  }
1058 
1059  void
1061  std::vector<std::string>& statNames,
1062  const ECounterSetOp setOp,
1063  const std::string& filter)
1064  {
1065  // The default communicator. If Trilinos was built with MPI
1066  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1067  // be a "serial" (no MPI, one "process") communicator.
1068  RCP<const Comm<int> > comm = getDefaultComm();
1069 
1070  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1071  }
1072 
1073 
1074  namespace {
1098  std::string
1099  quoteLabelForYaml (const std::string& label)
1100  {
1101  // YAML allows empty keys in key: value pairs. See Section 7.2
1102  // of the YAML 1.2 spec. We thus let an empty label pass
1103  // through without quoting or other special treatment.
1104  if (label.empty ()) {
1105  return label;
1106  }
1107 
1108  // Check whether the label is already quoted. If so, we don't
1109  // need to quote it again. However, we do need to quote any
1110  // quote symbols in the string inside the outer quotes.
1111  const bool alreadyQuoted = label.size () >= 2 &&
1112  label[0] == '"' && label[label.size() - 1] == '"';
1113 
1114  // We need to quote if there are any colons or (inner) quotes in
1115  // the string. We'll determine this as we read through the
1116  // string and escape any characters that need escaping.
1117  bool needToQuote = false;
1118 
1119  std::string out; // To fill with the return value
1120  out.reserve (label.size ());
1121 
1122  const size_t startPos = alreadyQuoted ? 1 : 0;
1123  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1124  for (size_t i = startPos; i < endPos; ++i) {
1125  const char c = label[i];
1126  if (c == '"' || c == '\\') {
1127  out.push_back ('\\'); // Escape the quote or backslash.
1128  needToQuote = true;
1129  }
1130  else if (c == ':') {
1131  needToQuote = true;
1132  }
1133  out.push_back (c);
1134  }
1135 
1136  if (needToQuote || alreadyQuoted) {
1137  // If the input string was already quoted, then out doesn't
1138  // include its quotes, so we have to add them back in.
1139  return "\"" + out + "\"";
1140  }
1141  else {
1142  return out;
1143  }
1144  }
1145 
1146  } // namespace (anonymous)
1147 
1148 
1149  void TimeMonitor::
1150  summarizeToYaml (Ptr<const Comm<int> > comm,
1151  std::ostream &out,
1152  const ETimeMonitorYamlFormat yamlStyle,
1153  const std::string& filter)
1154  {
1155  using Teuchos::FancyOStream;
1156  using Teuchos::fancyOStream;
1157  using Teuchos::getFancyOStream;
1158  using Teuchos::OSTab;
1159  using Teuchos::RCP;
1160  using Teuchos::rcpFromRef;
1161  using std::endl;
1162  typedef std::vector<std::string>::size_type size_type;
1163 
1164  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1165 
1166  // const bool writeGlobalStats = true;
1167  // const bool writeZeroTimers = true;
1168  // const bool alwaysWriteLocal = false;
1169  const ECounterSetOp setOp = Intersection;
1170 
1171  stat_map_type statData;
1172  std::vector<std::string> statNames;
1173  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1174 
1175  const int numProcs = comm->getSize();
1176 
1177  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1178  // " as the line prefix does not work, else I would prefer that
1179  // method for printing each line of a YAML block sequence (see
1180  // Section 8.2.1 of the YAML 1.2 spec).
1181  //
1182  // Also, I have to set the tab indent string here, rather than in
1183  // OSTab's constructor. This is because line prefix (which for
1184  // some reason is what OSTab's constructor takes, rather than tab
1185  // indent string) means something different from tab indent
1186  // string, and turning on the line prefix prints all sorts of
1187  // things including "|" for some reason.
1188  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1189  pfout->setTabIndentStr (" ");
1190  FancyOStream& fout = *pfout;
1191 
1192  fout << "# Teuchos::TimeMonitor report" << endl
1193  << "---" << endl;
1194 
1195  // mfh 19 Aug 2012: An important goal of our chosen output format
1196  // was to minimize the nesting depth. We have managed to keep the
1197  // nesting depth to 3, which is the limit that the current version
1198  // of PylotDB imposes for its YAML input.
1199 
1200  // Outermost level is a dictionary. (Individual entries of a
1201  // dictionary do _not_ begin with "- ".) We always print the
1202  // outermost level in standard style, not flow style, for better
1203  // readability. We begin the outermost level with metadata.
1204  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1205  << "Number of processes: " << numProcs << endl
1206  << "Time unit: s" << endl;
1207  // For a key: value pair where the value is a sequence or
1208  // dictionary on the following line, YAML requires a space after
1209  // the colon.
1210  fout << "Statistics collected: ";
1211  // Print list of the names of all the statistics we collected.
1212  if (compact) {
1213  fout << " [";
1214  for (size_type i = 0; i < statNames.size (); ++i) {
1215  fout << quoteLabelForYaml (statNames[i]);
1216  if (i + 1 < statNames.size ()) {
1217  fout << ", ";
1218  }
1219  }
1220  fout << "]" << endl;
1221  }
1222  else {
1223  fout << endl;
1224  OSTab tab1 (pfout);
1225  for (size_type i = 0; i < statNames.size (); ++i) {
1226  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1227  }
1228  }
1229 
1230  // Print the list of timer names.
1231  //
1232  // It might be nicer instead to print a map from timer name to all
1233  // of its data, but keeping the maximum nesting depth small
1234  // ensures better compatibility with different parsing tools.
1235  fout << "Timer names: ";
1236  if (compact) {
1237  fout << " [";
1238  size_type ind = 0;
1239  for (stat_map_type::const_iterator it = statData.begin();
1240  it != statData.end(); ++it, ++ind) {
1241  fout << quoteLabelForYaml (it->first);
1242  if (ind + 1 < statData.size ()) {
1243  fout << ", ";
1244  }
1245  }
1246  fout << "]" << endl;
1247  }
1248  else {
1249  fout << endl;
1250  OSTab tab1 (pfout);
1251  for (stat_map_type::const_iterator it = statData.begin();
1252  it != statData.end(); ++it) {
1253  fout << "- " << quoteLabelForYaml (it->first) << endl;
1254  }
1255  }
1256 
1257  // Print times for each timer, as a map from statistic name to its time.
1258  fout << "Total times: ";
1259  if (compact) {
1260  fout << " {";
1261  size_type outerInd = 0;
1262  for (stat_map_type::const_iterator outerIter = statData.begin();
1263  outerIter != statData.end(); ++outerIter, ++outerInd) {
1264  // Print timer name.
1265  fout << quoteLabelForYaml (outerIter->first) << ": ";
1266  // Print that timer's data.
1267  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1268  fout << "{";
1269  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1270  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1271  << curData[innerInd].first;
1272  if (innerInd + 1 < curData.size ()) {
1273  fout << ", ";
1274  }
1275  }
1276  fout << "}";
1277  if (outerInd + 1 < statData.size ()) {
1278  fout << ", ";
1279  }
1280  }
1281  fout << "}" << endl;
1282  }
1283  else {
1284  fout << endl;
1285  OSTab tab1 (pfout);
1286  size_type outerInd = 0;
1287  for (stat_map_type::const_iterator outerIter = statData.begin();
1288  outerIter != statData.end(); ++outerIter, ++outerInd) {
1289  // Print timer name.
1290  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1291  // Print that timer's data.
1292  OSTab tab2 (pfout);
1293  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1294  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1295  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1296  << curData[innerInd].first << endl;
1297  }
1298  }
1299  }
1300 
1301  // Print call counts for each timer, for each statistic name.
1302  fout << "Call counts:";
1303  if (compact) {
1304  fout << " {";
1305  size_type outerInd = 0;
1306  for (stat_map_type::const_iterator outerIter = statData.begin();
1307  outerIter != statData.end(); ++outerIter, ++outerInd) {
1308  // Print timer name.
1309  fout << quoteLabelForYaml (outerIter->first) << ": ";
1310  // Print that timer's data.
1311  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1312  fout << "{";
1313  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1314  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1315  << curData[innerInd].second;
1316  if (innerInd + 1 < curData.size ()) {
1317  fout << ", ";
1318  }
1319  }
1320  fout << "}";
1321  if (outerInd + 1 < statData.size ()) {
1322  fout << ", ";
1323  }
1324  }
1325  fout << "}" << endl;
1326  }
1327  else {
1328  fout << endl;
1329  OSTab tab1 (pfout);
1330  size_type outerInd = 0;
1331  for (stat_map_type::const_iterator outerIter = statData.begin();
1332  outerIter != statData.end(); ++outerIter, ++outerInd) {
1333  // Print timer name.
1334  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1335  // Print that timer's data.
1336  OSTab tab2 (pfout);
1337  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1338  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1339  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1340  << curData[innerInd].second << endl;
1341  }
1342  }
1343  }
1344  }
1345 
1346  void TimeMonitor::
1347  summarizeToYaml (std::ostream &out,
1348  const ETimeMonitorYamlFormat yamlStyle,
1349  const std::string& filter)
1350  {
1351  // The default communicator. If Trilinos was built with MPI
1352  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1353  // be a "serial" (no MPI, one "process") communicator.
1354  RCP<const Comm<int> > comm = getDefaultComm ();
1355 
1356  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1357  }
1358 
1359  // Default value is false. We'll set to true once
1360  // setReportParameters() completes successfully.
1361  bool TimeMonitor::setParams_ = false;
1362 
1363  // We have to declare all of these here in order to avoid linker errors.
1364  TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1365  TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1366  ECounterSetOp TimeMonitor::setOp_ = Intersection;
1367  bool TimeMonitor::alwaysWriteLocal_ = false;
1368  bool TimeMonitor::writeGlobalStats_ = true;
1369  bool TimeMonitor::writeZeroTimers_ = true;
1370 
1371  void
1372  TimeMonitor::setReportFormatParameter (ParameterList& plist)
1373  {
1374  const std::string name ("Report format");
1375  const std::string defaultValue ("Table");
1376  const std::string docString ("Output format for report of timer statistics");
1377  Array<std::string> strings;
1378  Array<std::string> docs;
1380 
1381  strings.push_back ("YAML");
1382  docs.push_back ("YAML (see yaml.org) format");
1383  values.push_back (REPORT_FORMAT_YAML);
1384  strings.push_back ("Table");
1385  docs.push_back ("Tabular format via Teuchos::TableFormat");
1386  values.push_back (REPORT_FORMAT_TABLE);
1387 
1388  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1389  docString,
1390  strings (), docs (),
1391  values (), &plist);
1392  }
1393 
1394  void
1395  TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1396  {
1397  const std::string name ("YAML style");
1398  const std::string defaultValue ("spacious");
1399  const std::string docString ("YAML-specific output format");
1400  Array<std::string> strings;
1401  Array<std::string> docs;
1403 
1404  strings.push_back ("compact");
1405  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1406  "yaml.org) for most sequences except the outermost sequence");
1407  values.push_back (YAML_FORMAT_COMPACT);
1408 
1409  strings.push_back ("spacious");
1410  docs.push_back ("Spacious format: avoid flow style");
1411  values.push_back (YAML_FORMAT_SPACIOUS);
1412 
1413  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1414  docString,
1415  strings (), docs (),
1416  values (), &plist);
1417  }
1418 
1419  void
1420  TimeMonitor::setSetOpParameter (ParameterList& plist)
1421  {
1422  const std::string name ("How to merge timer sets");
1423  const std::string defaultValue ("Intersection");
1424  const std::string docString ("How to merge differing sets of timers "
1425  "across processes");
1426  Array<std::string> strings;
1427  Array<std::string> docs;
1428  Array<ECounterSetOp> values;
1429 
1430  strings.push_back ("Intersection");
1431  docs.push_back ("Compute intersection of timer sets over processes");
1432  values.push_back (Intersection);
1433  strings.push_back ("Union");
1434  docs.push_back ("Compute union of timer sets over processes");
1435  values.push_back (Union);
1436 
1437  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1438  strings (), docs (), values (),
1439  &plist);
1440  }
1441 
1444  {
1445  // Our implementation favors recomputation over persistent
1446  // storage. That is, we simply recreate the list every time we
1447  // need it.
1448  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1449 
1450  const bool alwaysWriteLocal = false;
1451  const bool writeGlobalStats = true;
1452  const bool writeZeroTimers = true;
1453 
1454  setReportFormatParameter (*plist);
1455  setYamlFormatParameter (*plist);
1456  setSetOpParameter (*plist);
1457  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1458  "Always output local timers' values on Proc 0");
1459  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1460  "statistics, even if there is only one process in the "
1461  "communicator");
1462  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1463  "timers that have never been called");
1464 
1465  return rcp_const_cast<const ParameterList> (plist);
1466  }
1467 
1468  void
1469  TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1470  {
1471  ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1472  ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1473  ECounterSetOp setOp = Intersection;
1474  bool alwaysWriteLocal = false;
1475  bool writeGlobalStats = true;
1476  bool writeZeroTimers = true;
1477 
1478  if (params.is_null ()) {
1479  // If we've set parameters before, leave their current values.
1480  // Otherwise, set defaults (below).
1481  if (setParams_) {
1482  return;
1483  }
1484  }
1485  else { // params is nonnull. Let's read it!
1486  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1487 
1488  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1489  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1490  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1491  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1492  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1493  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1494  }
1495  // Defer setting state until here, to ensure the strong exception
1496  // guarantee for this method (either it throws with no externally
1497  // visible state changes, or it returns normally).
1498  reportFormat_ = reportFormat;
1499  yamlStyle_ = yamlStyle;
1500  setOp_ = setOp;
1501  alwaysWriteLocal_ = alwaysWriteLocal;
1502  writeGlobalStats_ = writeGlobalStats;
1503  writeZeroTimers_ = writeZeroTimers;
1504 
1505  setParams_ = true; // Yay, we successfully set parameters!
1506  }
1507 
1508  void
1510  std::ostream& out,
1511  const std::string& filter,
1512  const RCP<ParameterList>& params)
1513  {
1514  setReportParameters (params);
1515 
1516  if (reportFormat_ == REPORT_FORMAT_YAML) {
1517  summarizeToYaml (comm, out, yamlStyle_, filter);
1518  }
1519  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1520  summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1521  writeZeroTimers_, setOp_, filter);
1522  }
1523  else {
1524  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1525  "Invalid report format. This should never happen; ParameterList "
1526  "validation should have caught this. Please report this bug to the "
1527  "Teuchos developers.");
1528  }
1529  }
1530 
1531  void
1533  std::ostream& out,
1534  const RCP<ParameterList>& params)
1535  {
1536  report (comm, out, "", params);
1537  }
1538 
1539  void
1540  TimeMonitor::report (std::ostream& out,
1541  const std::string& filter,
1542  const RCP<ParameterList>& params)
1543  {
1544  RCP<const Comm<int> > comm = getDefaultComm ();
1545  report (comm.ptr (), out, filter, params);
1546  }
1547 
1548  void
1549  TimeMonitor::report (std::ostream& out,
1550  const RCP<ParameterList>& params)
1551  {
1552  RCP<const Comm<int> > comm = getDefaultComm ();
1553  report (comm.ptr (), out, "", params);
1554  }
1555 
1556 } // namespace Teuchos
void reserve(size_type n)
Array< T > & append(const T &x)
Add a new entry at the end of the array.
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
basic_OSTab< char > OSTab
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
const Time & counter() const
Constant access to the instance&#39;s counter reference.
TimeMonitor(Time &timer, bool reset=false)
Constructor: starts the timer.
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
~TimeMonitor()
Destructor: stops the timer.
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
basic_FancyOStream< char > FancyOStream
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
static void disableTimer(const std::string &name)
Disable the timer with the given name.
Teuchos version of MPI_MINLOC.
T * get() const
Get the raw C++ pointer to the underlying object.
Concrete serial communicator subclass.
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
Tabbing class for helping to create formated, indented output for a basic_FancyOStream object...
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
This structure defines some basic traits for a scalar field type.
Teuchos version of MPI_MAXLOC.
Base interface class for user-defined reduction operations for objects that use value semantics...
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
A column of TableEntry objects.
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
double stop()
Stop the timer, if the timer is enabled (see disable()).
std::ostream subclass that performs the magic of indenting data sent to an std::ostream object among ...
Wall-clock timer.
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
int precision() const
Get the precision for writing doubles. Default is 4.
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
Provides utilities for formatting tabular output.
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
Deprecated .
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
A list of parameters of arbitrary type.
std::vector< T >::const_iterator const_iterator
The type of a const forward iterator.
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process...
reference back()
Abstract interface for distributed-memory communication.
void push_back(const value_type &x)
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos, as well as a number of utility routines.
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
size_type size() const
Defines basic traits for the scalar field type.
static T zero()
Returns representation of zero for this scalar type.
Scope protection wrapper for Teuchos::Time, with timer reporting functionality.
Smart reference counting pointer class for automatic garbage collection.
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Common capabilities for collecting and reporting performance data across processors.
iterator begin()
same as MinLoc, but don&#39;t allow zero
Simple wrapper class for raw pointers to single objects where no persisting relationship exists...
static void enableTimer(const std::string &name)
Enable the timer with the given name.
static TableFormat & format()
Table format that will be used to print a summary of timer results.
static void zeroOutTimers()
Reset all global timers to zero.
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
bool is_null() const
Returns true if the underlying pointer is null.