Teuchos - Trilinos Tools Package  Version of the Day
Teuchos_TimeMonitor.cpp
1 // @HEADER
2 // ***********************************************************************
3 //
4 // Teuchos: Common Tools Package
5 // Copyright (2004) Sandia Corporation
6 //
7 // Under terms of Contract DE-AC04-94AL85000, there is a non-exclusive
8 // license for use of this work by or on behalf of the U.S. Government.
9 //
10 // Redistribution and use in source and binary forms, with or without
11 // modification, are permitted provided that the following conditions are
12 // met:
13 //
14 // 1. Redistributions of source code must retain the above copyright
15 // notice, this list of conditions and the following disclaimer.
16 //
17 // 2. Redistributions in binary form must reproduce the above copyright
18 // notice, this list of conditions and the following disclaimer in the
19 // documentation and/or other materials provided with the distribution.
20 //
21 // 3. Neither the name of the Corporation nor the names of the
22 // contributors may be used to endorse or promote products derived from
23 // this software without specific prior written permission.
24 //
25 // THIS SOFTWARE IS PROVIDED BY SANDIA CORPORATION "AS IS" AND ANY
26 // EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
27 // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
28 // PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL SANDIA CORPORATION OR THE
29 // CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
30 // EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
31 // PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
32 // PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
33 // LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
34 // NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
35 // SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36 //
37 // Questions? Contact Michael A. Heroux (maherou@sandia.gov)
38 //
39 // ***********************************************************************
40 // @HEADER
41 
42 #include "Teuchos_TimeMonitor.hpp"
43 #include "Teuchos_CommHelpers.hpp"
44 #include "Teuchos_DefaultComm.hpp"
45 #include "Teuchos_TableColumn.hpp"
46 #include "Teuchos_TableFormat.hpp"
47 #include "Teuchos_StandardParameterEntryValidators.hpp"
48 #include "Teuchos_ScalarTraits.hpp"
49 #include "Teuchos_StackedTimer.hpp"
50 
51 #include <functional>
52 #include <iomanip>
53 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
54 #include <sstream>
55 #endif
56 
57 namespace Teuchos {
110  template<class Ordinal, class ScalarType, class IndexType>
111  class MaxLoc :
112  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
113  public:
114  void
115  reduce (const Ordinal count,
116  const std::pair<ScalarType, IndexType> inBuffer[],
117  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
118  };
119 
120  template<class Ordinal>
121  class MaxLoc<Ordinal, double, int> :
122  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
123  public:
124  void
125  reduce (const Ordinal count,
126  const std::pair<double, int> inBuffer[],
127  std::pair<double, int> inoutBuffer[]) const
128  {
129  for (Ordinal ind = 0; ind < count; ++ind) {
130  const std::pair<double, int>& in = inBuffer[ind];
131  std::pair<double, int>& inout = inoutBuffer[ind];
132 
133  if (in.first > inout.first) {
134  inout.first = in.first;
135  inout.second = in.second;
136  } else if (in.first < inout.first) {
137  // Don't need to do anything; inout has the values.
138  } else { // equal, or at least one is NaN.
139  inout.first = in.first;
140  inout.second = std::min (in.second, inout.second);
141  }
142  }
143  }
144  };
145 
172  template<class Ordinal, class ScalarType, class IndexType>
173  class MinLoc :
174  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
175  public:
176  void
177  reduce (const Ordinal count,
178  const std::pair<ScalarType, IndexType> inBuffer[],
179  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
180  };
181 
182  template<class Ordinal>
183  class MinLoc<Ordinal, double, int> :
184  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
185  public:
186  void
187  reduce (const Ordinal count,
188  const std::pair<double, int> inBuffer[],
189  std::pair<double, int> inoutBuffer[]) const
190  {
191  for (Ordinal ind = 0; ind < count; ++ind) {
192  const std::pair<double, int>& in = inBuffer[ind];
193  std::pair<double, int>& inout = inoutBuffer[ind];
194 
195  if (in.first < inout.first) {
196  inout.first = in.first;
197  inout.second = in.second;
198  } else if (in.first > inout.first) {
199  // Don't need to do anything; inout has the values.
200  } else { // equal, or at least one is NaN.
201  inout.first = in.first;
202  inout.second = std::min (in.second, inout.second);
203  }
204  }
205  }
206  };
207 
211  template<class Ordinal, class ScalarType, class IndexType>
213  public ValueTypeReductionOp<Ordinal, std::pair<ScalarType, IndexType> > {
214  public:
215  void
216  reduce (const Ordinal count,
217  const std::pair<ScalarType, IndexType> inBuffer[],
218  std::pair<ScalarType, IndexType> inoutBuffer[]) const;
219  };
220 
221  template<class Ordinal>
222  class MinLocNonzero<Ordinal, double, int> :
223  public ValueTypeReductionOp<Ordinal, std::pair<double, int> > {
224  public:
225  void
226  reduce (const Ordinal count,
227  const std::pair<double, int> inBuffer[],
228  std::pair<double, int> inoutBuffer[]) const
229  {
230  for (Ordinal ind = 0; ind < count; ++ind) {
231  const std::pair<double, int>& in = inBuffer[ind];
232  std::pair<double, int>& inout = inoutBuffer[ind];
233 
234  if ( (in.first < inout.first && in.first != 0) || (inout.first == 0 && in.first != 0) ) {
235  inout.first = in.first;
236  inout.second = in.second;
237  } else if (in.first > inout.first) {
238  // Don't need to do anything; inout has the values.
239  } else { // equal, or at least one is NaN.
240  inout.first = in.first;
241  inout.second = std::min (in.second, inout.second);
242  }
243  }
244  }
245  };
246 
247  // Typedef used internally by TimeMonitor::summarize() and its
248  // helper functions. The map is keyed on timer label (a string).
249  // Each value is a pair: (total number of seconds over all calls to
250  // that timer, total number of calls to that timer).
251  typedef std::map<std::string, std::pair<double, int> > timer_map_t;
252 
253  // static initialization
254  Teuchos::RCP<Teuchos::StackedTimer> TimeMonitor::stackedTimer_ = Teuchos::rcp(new Teuchos::StackedTimer("Teuchos::StackedTimer"));
255 
256  TimeMonitor::TimeMonitor (Time& timer, bool reset)
257  : PerformanceMonitorBase<Time>(timer, reset)
258  {
259  if (!isRecursiveCall()) {
260  counter().start(reset);
261 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
262  if (nonnull(stackedTimer_))
263  stackedTimer_->start(counter().name());
264 #endif
265  }
266  }
267 
269  if (!isRecursiveCall()) {
270  counter().stop();
271 #ifdef HAVE_TEUCHOS_ADD_TIME_MONITOR_TO_STACKED_TIMER
272  try {
273  if (nonnull(stackedTimer_))
274  stackedTimer_->stop(counter().name());
275  }
276  catch (std::runtime_error&) {
277  std::ostringstream warning;
278  warning <<
279  "\n*********************************************************************\n"
280  "WARNING: Overlapping timers detected!\n"
281  "A TimeMonitor timer was stopped before a nested subtimer was\n"
282  "stopped. This is not allowed by the StackedTimer. This corner case\n"
283  "typically occurs if the TimeMonitor is stored in an RCP and the RCP is\n"
284  "assigned to a new timer. To disable this warning, either fix the\n"
285  "ordering of timer creation and destuction or disable the StackedTimer\n"
286  "support in the TimeMonitor by setting the StackedTimer to null\n"
287  "with:\n"
288  "Teuchos::TimeMonitor::setStackedTimer(Teuchos::null)\n"
289  "*********************************************************************\n";
290  std::cout << warning.str() << std::endl;
292  }
293 #endif
294  }
295  }
296 
297  void
298  TimeMonitor::disableTimer (const std::string& name)
299  {
300  RCP<Time> timer = lookupCounter (name);
302  timer == null, std::invalid_argument,
303  "TimeMonitor::disableTimer: Invalid timer \"" << name << "\"");
304  timer->disable ();
305  }
306 
307  void
308  TimeMonitor::enableTimer (const std::string& name)
309  {
310  RCP<Time> timer = lookupCounter (name);
312  timer == null, std::invalid_argument,
313  "TimeMonitor::enableTimer: Invalid timer \"" << name << "\"");
314  timer->enable ();
315  }
316 
317  void
319  {
320  typedef std::map<std::string, RCP<Time> > map_type;
321  typedef map_type::iterator iter_type;
322  map_type& ctrs = counters ();
323 
324  // In debug mode, loop first to check whether any of the timers
325  // are running, before resetting them. This ensures that this
326  // method satisfies the strong exception guarantee (either it
327  // completes normally, or there are no side effects).
328 #ifdef TEUCHOS_DEBUG
329  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
330  // We throw a runtime_error rather than a logic_error, because
331  // logic_error suggests a bug in the implementation of
332  // TimeMonitor. Calling zeroOutTimers() when a timer is running
333  // is not TimeMonitor's fault.
335  it->second->isRunning (), std::runtime_error,
336  "Timer \"" << it->second->name () << "\" is currently running. "
337  "You are not allowed to reset running timers.");
338  }
339 #endif // TEUCHOS_DEBUG
340 
341  for (iter_type it = ctrs.begin(); it != ctrs.end(); ++it) {
342  it->second->reset ();
343  }
344  }
345 
346  // An anonymous namespace is the standard way of limiting linkage of
347  // its contained routines to file scope.
348  namespace {
349  // \brief Return an "empty" local timer datum.
350  //
351  // "Empty" means the datum has zero elapsed time and zero call
352  // count. This function does not actually create a timer.
353  //
354  // \param name The timer's name.
355  std::pair<std::string, std::pair<double, int> >
356  makeEmptyTimerDatum (const std::string& name)
357  {
358  return std::make_pair (name, std::make_pair (double(0), int(0)));
359  }
360 
361  // \fn collectLocalTimerData
362  // \brief Collect and sort local timer data by timer names.
363  //
364  // \param localData [out] Map whose keys are the timer names, and
365  // whose value for each key is the total elapsed time (in
366  // seconds) and the call count for the timer with that name.
367  //
368  // \param localCounters [in] Timers from which to extract data.
369  //
370  // \param filter [in] Filter for timer labels. If filter is not
371  // empty, this method will only collect data for local timers
372  // whose labels begin with this string.
373  //
374  // Extract the total elapsed time and call count from each timer
375  // in the given array. Merge results for timers with duplicate
376  // labels, by summing their total elapsed times and call counts
377  // pairwise.
378  void
379  collectLocalTimerData (timer_map_t& localData,
380  const std::map<std::string, RCP<Time> >& localCounters,
381  const std::string& filter="")
382  {
383  using std::make_pair;
384  typedef timer_map_t::iterator iter_t;
385 
386  timer_map_t theLocalData;
387  for (std::map<std::string, RCP<Time> >::const_iterator it = localCounters.begin();
388  it != localCounters.end(); ++it) {
389  const std::string& name = it->second->name ();
390 
391  // Filter current timer name, if provided filter is nonempty.
392  // Filter string must _start_ the timer label, not just be in it.
393  const bool skipThisOne = (filter != "" && name.find (filter) != 0);
394  if (! skipThisOne) {
395  const double timing = it->second->totalElapsedTime ();
396  const int numCalls = it->second->numCalls ();
397 
398  // Merge timers with duplicate labels, by summing their
399  // total elapsed times and call counts.
400  iter_t loc = theLocalData.find (name);
401  if (loc == theLocalData.end()) {
402  // Use loc as an insertion location hint.
403  theLocalData.insert (loc, make_pair (name, make_pair (timing, numCalls)));
404  }
405  else {
406  loc->second.first += timing;
407  loc->second.second += numCalls;
408  }
409  }
410  }
411  // This avoids copying the map, and also makes this method
412  // satisfy the strong exception guarantee.
413  localData.swap (theLocalData);
414  }
415 
416  // \brief Locally filter out timer data with zero call counts.
417  //
418  // \param timerData [in/out]
419  void
420  filterZeroData (timer_map_t& timerData)
421  {
422  // FIXME (mfh 15 Mar 2013) Should use std::map::erase with
423  // iterator hint, instead of rebuilding the map completely.
424  timer_map_t newTimerData;
425  for (timer_map_t::const_iterator it = timerData.begin();
426  it != timerData.end(); ++it) {
427  if (it->second.second > 0) {
428  newTimerData[it->first] = it->second;
429  }
430  }
431  timerData.swap (newTimerData);
432  }
433 
455  void
456  collectLocalTimerDataAndNames (timer_map_t& localTimerData,
457  Array<std::string>& localTimerNames,
458  const std::map<std::string, RCP<Time> >& localTimers,
459  const bool writeZeroTimers,
460  const std::string& filter="")
461  {
462  // Collect and sort local timer data by timer names.
463  collectLocalTimerData (localTimerData, localTimers, filter);
464 
465  // Filter out zero data locally first. This ensures that if we
466  // are writing global stats, and if a timer name exists in the
467  // set of global names, then that timer has a nonzero call count
468  // on at least one MPI process.
469  if (! writeZeroTimers) {
470  filterZeroData (localTimerData);
471  }
472 
473  // Extract the set of local timer names. The std::map keeps
474  // them sorted alphabetically.
475  localTimerNames.reserve (localTimerData.size());
476  for (timer_map_t::const_iterator it = localTimerData.begin();
477  it != localTimerData.end(); ++it) {
478  localTimerNames.push_back (it->first);
479  }
480  }
481 
516  void
517  collectGlobalTimerData (timer_map_t& globalTimerData,
518  Array<std::string>& globalTimerNames,
519  timer_map_t& localTimerData,
520  Array<std::string>& localTimerNames,
521  Ptr<const Comm<int> > comm,
522  const bool alwaysWriteLocal,
523  const ECounterSetOp setOp)
524  {
525  // There may be some global timers that are not local timers on
526  // the calling MPI process(es). In that case, if
527  // alwaysWriteLocal is true, then we need to fill in the
528  // "missing" local timers. That will ensure that both global
529  // and local timer columns in the output table have the same
530  // number of rows. The collectLocalTimerDataAndNames() method
531  // may have already filtered out local timers with zero call
532  // counts (if its writeZeroTimers argument was false), but we
533  // won't be filtering again. Thus, any local timer data we
534  // insert here won't get filtered out.
535  //
536  // Note that calling summarize() with writeZeroTimers == false
537  // will still do what it says, even if we insert local timers
538  // with zero call counts here.
539 
540  // This does the correct and inexpensive thing (just copies the
541  // timer data) if numProcs == 1. Otherwise, it initiates a
542  // communication with \f$O(\log P)\f$ messages along the
543  // critical path, where \f$P\f$ is the number of participating
544  // processes.
545  mergeCounterNames (*comm, localTimerNames, globalTimerNames, setOp);
546 
547 #ifdef TEUCHOS_DEBUG
548  {
549  // Sanity check that all processes have the name number of
550  // global timer names.
551  const timer_map_t::size_type myNumGlobalNames = globalTimerNames.size();
552  timer_map_t::size_type minNumGlobalNames = 0;
553  timer_map_t::size_type maxNumGlobalNames = 0;
554  reduceAll (*comm, REDUCE_MIN, myNumGlobalNames,
555  outArg (minNumGlobalNames));
556  reduceAll (*comm, REDUCE_MAX, myNumGlobalNames,
557  outArg (maxNumGlobalNames));
558  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalNames != maxNumGlobalNames,
559  std::logic_error, "Min # global timer names = " << minNumGlobalNames
560  << " != max # global timer names = " << maxNumGlobalNames
561  << ". Please report this bug to the Teuchos developers.");
562  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalNames != minNumGlobalNames,
563  std::logic_error, "My # global timer names = " << myNumGlobalNames
564  << " != min # global timer names = " << minNumGlobalNames
565  << ". Please report this bug to the Teuchos developers.");
566  }
567 #endif // TEUCHOS_DEBUG
568 
569  // mergeCounterNames() just merges the counters' names, not
570  // their actual data. Now we need to fill globalTimerData with
571  // this process' timer data for the timers in globalTimerNames.
572  //
573  // All processes need the full list of global timers, since
574  // there may be some global timers that are not local timers.
575  // That's why mergeCounterNames() has to be an all-reduce, not
576  // just a reduction to Proc 0.
577  //
578  // Insertion optimization: if the iterator given to map::insert
579  // points right before where we want to insert, insertion is
580  // O(1). globalTimerNames is sorted, so feeding the iterator
581  // output of map::insert into the next invocation's input should
582  // make the whole insertion O(N) where N is the number of
583  // entries in globalTimerNames.
584  timer_map_t::iterator globalMapIter = globalTimerData.begin();
585  timer_map_t::iterator localMapIter;
586  for (Array<string>::const_iterator it = globalTimerNames.begin();
587  it != globalTimerNames.end(); ++it) {
588  const std::string& globalName = *it;
589  localMapIter = localTimerData.find (globalName);
590 
591  if (localMapIter == localTimerData.end()) {
592  if (alwaysWriteLocal) {
593  // If there are some global timers that are not local
594  // timers, and if we want to print local timers, we insert
595  // a local timer datum with zero elapsed time and zero
596  // call count into localTimerData as well. This will
597  // ensure that both global and local timer columns in the
598  // output table have the same number of rows.
599  //
600  // We really only need to do this on Proc 0, which is the
601  // only process that currently may print local timers.
602  // However, we do it on all processes, just in case
603  // someone later wants to modify this function to print
604  // out local timer data for some process other than Proc
605  // 0. This extra computation won't affect the cost along
606  // the critical path, for future computations in which
607  // Proc 0 participates.
608  localMapIter = localTimerData.insert (localMapIter, makeEmptyTimerDatum (globalName));
609 
610  // Make sure the missing global name gets added to the
611  // list of local names. We'll re-sort the list of local
612  // names below.
613  localTimerNames.push_back (globalName);
614  }
615  // There's a global timer that's not a local timer. Add it
616  // to our pre-merge version of the global timer data so that
617  // we can safely merge the global timer data later.
618  globalMapIter = globalTimerData.insert (globalMapIter, makeEmptyTimerDatum (globalName));
619  }
620  else {
621  // We have this global timer name in our local timer list.
622  // Fill in our pre-merge version of the global timer data
623  // with our local data.
624  globalMapIter = globalTimerData.insert (globalMapIter, std::make_pair (globalName, localMapIter->second));
625  }
626  }
627 
628  if (alwaysWriteLocal) {
629  // Re-sort the list of local timer names, since we may have
630  // inserted "missing" names above.
631  std::sort (localTimerNames.begin(), localTimerNames.end());
632  }
633 
634 #ifdef TEUCHOS_DEBUG
635  {
636  // Sanity check that all processes have the name number of
637  // global timers.
638  const timer_map_t::size_type myNumGlobalTimers = globalTimerData.size();
639  timer_map_t::size_type minNumGlobalTimers = 0;
640  timer_map_t::size_type maxNumGlobalTimers = 0;
641  reduceAll (*comm, REDUCE_MIN, myNumGlobalTimers,
642  outArg (minNumGlobalTimers));
643  reduceAll (*comm, REDUCE_MAX, myNumGlobalTimers,
644  outArg (maxNumGlobalTimers));
645  TEUCHOS_TEST_FOR_EXCEPTION(minNumGlobalTimers != maxNumGlobalTimers,
646  std::logic_error, "Min # global timers = " << minNumGlobalTimers
647  << " != max # global timers = " << maxNumGlobalTimers
648  << ". Please report this bug to the Teuchos developers.");
649  TEUCHOS_TEST_FOR_EXCEPTION(myNumGlobalTimers != minNumGlobalTimers,
650  std::logic_error, "My # global timers = " << myNumGlobalTimers
651  << " != min # global timers = " << minNumGlobalTimers
652  << ". Please report this bug to the Teuchos developers.");
653  }
654 #endif // TEUCHOS_DEBUG
655  }
656 
703  void
704  computeGlobalTimerStats (stat_map_type& statData,
705  std::vector<std::string>& statNames,
706  Ptr<const Comm<int> > comm,
707  const timer_map_t& globalTimerData,
708  const bool ignoreZeroTimers)
709  {
710  using Teuchos::ScalarTraits;
711 
712  const int numTimers = static_cast<int> (globalTimerData.size());
713  const int numProcs = comm->getSize();
714 
715  // Extract pre-reduction timings and call counts into a
716  // sequential array. This array will be in the same order as
717  // the global timer names are in the map.
718  Array<std::pair<double, int> > timingsAndCallCounts;
719  timingsAndCallCounts.reserve (numTimers);
720  for (timer_map_t::const_iterator it = globalTimerData.begin();
721  it != globalTimerData.end(); ++it) {
722  timingsAndCallCounts.push_back (it->second);
723  }
724 
725  // For each timer name, compute the min timing and its
726  // corresponding call count. If two processes have the same
727  // timing but different call counts, the minimum call count will
728  // be used.
729  Array<std::pair<double, int> > minTimingsAndCallCounts (numTimers);
730  if (numTimers > 0) {
731  if (ignoreZeroTimers)
732  reduceAll (*comm, MinLocNonzero<int, double, int>(), numTimers,
733  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
734  else
735  reduceAll (*comm, MinLoc<int, double, int>(), numTimers,
736  &timingsAndCallCounts[0], &minTimingsAndCallCounts[0]);
737  }
738 
739  // For each timer name, compute the max timing and its
740  // corresponding call count. If two processes have the same
741  // timing but different call counts, the minimum call count will
742  // be used.
743  Array<std::pair<double, int> > maxTimingsAndCallCounts (numTimers);
744  if (numTimers > 0) {
745  reduceAll (*comm, MaxLoc<int, double, int>(), numTimers,
746  &timingsAndCallCounts[0], &maxTimingsAndCallCounts[0]);
747  }
748 
749  // For each timer name, compute the mean-over-processes timing,
750  // the mean call count, and the mean-over-call-counts timing.
751  // The mean call count is reported as a double to allow a
752  // fractional value.
753  //
754  // Each local timing is really the total timing over all local
755  // invocations. The number of local invocations is the call
756  // count. Thus, the mean-over-call-counts timing is the sum of
757  // all the timings (over all processes), divided by the sum of
758  // all the call counts (over all processes). We compute it in a
759  // different way to over unnecessary overflow.
760  Array<double> meanOverCallCountsTimings (numTimers);
761  Array<double> meanOverProcsTimings (numTimers);
762  Array<double> meanCallCounts (numTimers);
763  Array<int> ICallThisTimer (numTimers);
764  Array<int> numProcsCallingEachTimer (numTimers);
765  {
766  // Figure out how many processors actually call each timer.
767  if (ignoreZeroTimers) {
768  for (int k = 0; k < numTimers; ++k) {
769  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
770  if (callCount > 0) ICallThisTimer[k] = 1;
771  else ICallThisTimer[k] = 0;
772  }
773  if (numTimers > 0) {
774  reduceAll (*comm, REDUCE_SUM, numTimers, &ICallThisTimer[0],
775  &numProcsCallingEachTimer[0]);
776  }
777  }
778 
779  // When summing, first scale by the number of processes. This
780  // avoids unnecessary overflow, and also gives us the mean
781  // call count automatically.
782  Array<double> scaledTimings (numTimers);
783  Array<double> scaledCallCounts (numTimers);
784  const double P = static_cast<double> (numProcs);
785 
786  if (ignoreZeroTimers) {
787  for (int k = 0; k < numTimers; ++k) {
788  const double timing = timingsAndCallCounts[k].first;
789  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
790 
791  scaledTimings[k] = timing / numProcsCallingEachTimer[k];
792  scaledCallCounts[k] = callCount / numProcsCallingEachTimer[k];
793  }
794  }
795  else {
796  for (int k = 0; k < numTimers; ++k) {
797  const double timing = timingsAndCallCounts[k].first;
798  const double callCount = static_cast<double> (timingsAndCallCounts[k].second);
799 
800  scaledTimings[k] = timing / P;
801  scaledCallCounts[k] = callCount / P;
802  }
803  }
804 
805  if (numTimers > 0) {
806  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledTimings[0],
807  &meanOverProcsTimings[0]);
808  reduceAll (*comm, REDUCE_SUM, numTimers, &scaledCallCounts[0],
809  &meanCallCounts[0]);
810  }
811  // We don't have to undo the scaling for the mean timings;
812  // just divide by the scaled call count.
813  for (int k = 0; k < numTimers; ++k) {
814  if (meanCallCounts[k] > ScalarTraits<double>::zero ()) {
815  meanOverCallCountsTimings[k] = meanOverProcsTimings[k] / meanCallCounts[k];
816  }
817  else {
818  meanOverCallCountsTimings[k] = ScalarTraits<double>::zero ();
819  }
820  }
821  }
822 
823  // Reformat the data into the map of statistics. Be sure that
824  // each value (the std::vector of (timing, call count) pairs,
825  // each entry of which is a different statistic) preserves the
826  // order of statNames.
827  statNames.resize (4);
828  statNames[0] = "MinOverProcs";
829  statNames[1] = "MeanOverProcs";
830  statNames[2] = "MaxOverProcs";
831  statNames[3] = "MeanOverCallCounts";
832 
833  stat_map_type::iterator statIter = statData.end();
834  timer_map_t::const_iterator it = globalTimerData.begin();
835  for (int k = 0; it != globalTimerData.end(); ++k, ++it) {
836  std::vector<std::pair<double, double> > curData (4);
837  curData[0] = minTimingsAndCallCounts[k];
838  curData[1] = std::make_pair (meanOverProcsTimings[k], meanCallCounts[k]);
839  curData[2] = maxTimingsAndCallCounts[k];
840  curData[3] = std::make_pair (meanOverCallCountsTimings[k], meanCallCounts[k]);
841 
842  // statIter gives an insertion location hint that makes each
843  // insertion O(1), since we remember the location of the last
844  // insertion.
845  statIter = statData.insert (statIter, std::make_pair (it->first, curData));
846  }
847  }
848 
849 
866  RCP<const Comm<int> >
867  getDefaultComm ()
868  {
869  // The default communicator. If Trilinos was built with MPI
870  // enabled, this should be MPI_COMM_WORLD. (If MPI has not yet
871  // been initialized, it's not valid to use the communicator!)
872  // Otherwise, this should be a "serial" (no MPI, one "process")
873  // communicator.
874  RCP<const Comm<int> > comm = DefaultComm<int>::getComm ();
875 
876 #ifdef HAVE_MPI
877  {
878  int mpiHasBeenStarted = 0;
879  MPI_Initialized (&mpiHasBeenStarted);
880  if (! mpiHasBeenStarted) {
881  // Make pComm a new "serial communicator."
882  comm = rcp_implicit_cast<const Comm<int> > (rcp (new SerialComm<int> ()));
883  }
884  }
885 #endif // HAVE_MPI
886  return comm;
887  }
888 
889  } // namespace (anonymous)
890 
891 
892  void
894  std::vector<std::string>& statNames,
895  Ptr<const Comm<int> > comm,
896  const ECounterSetOp setOp,
897  const std::string& filter)
898  {
899  // Collect local timer data and names. Filter out timers with
900  // zero call counts if writeZeroTimers is false. Also, apply the
901  // timer label filter at this point, so we don't have to compute
902  // statistics on timers we don't want to display anyway.
903  timer_map_t localTimerData;
904  Array<std::string> localTimerNames;
905  const bool writeZeroTimers = false;
906  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
907  counters(), writeZeroTimers, filter);
908  // Merge the local timer data and names into global timer data and
909  // names.
910  timer_map_t globalTimerData;
911  Array<std::string> globalTimerNames;
912  const bool alwaysWriteLocal = false;
913  collectGlobalTimerData (globalTimerData, globalTimerNames,
914  localTimerData, localTimerNames,
915  comm, alwaysWriteLocal, setOp);
916  // Compute statistics on the data.
917  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, false);
918  }
919 
920 
921  void
923  std::ostream& out,
924  const bool alwaysWriteLocal,
925  const bool writeGlobalStats,
926  const bool writeZeroTimers,
927  const ECounterSetOp setOp,
928  const std::string& filter,
929  const bool ignoreZeroTimers)
930  {
931  //
932  // We can't just call computeGlobalTimerStatistics(), since
933  // summarize() has different options that affect whether global
934  // statistics are computed and printed.
935  //
936  const int numProcs = comm->getSize();
937  const int myRank = comm->getRank();
938 
939  // Collect local timer data and names. Filter out timers with
940  // zero call counts if writeZeroTimers is false. Also, apply the
941  // timer label filter at this point, so we don't have to compute
942  // statistics on timers we don't want to display anyway.
943  timer_map_t localTimerData;
944  Array<std::string> localTimerNames;
945  collectLocalTimerDataAndNames (localTimerData, localTimerNames,
946  counters(), writeZeroTimers, filter);
947 
948  // If we're computing global statistics, merge the local timer
949  // data and names into global timer data and names, and compute
950  // global timer statistics. Otherwise, leave the global data
951  // empty.
952  timer_map_t globalTimerData;
953  Array<std::string> globalTimerNames;
954  stat_map_type statData;
955  std::vector<std::string> statNames;
956  if (writeGlobalStats) {
957  collectGlobalTimerData (globalTimerData, globalTimerNames,
958  localTimerData, localTimerNames,
959  comm, alwaysWriteLocal, setOp);
960  // Compute statistics on the data, but only if the communicator
961  // contains more than one process. Otherwise, statistics don't
962  // make sense and we don't print them (see below).
963  if (numProcs > 1) {
964  computeGlobalTimerStats (statData, statNames, comm, globalTimerData, ignoreZeroTimers);
965  }
966  }
967 
968  // Precision of floating-point numbers in the table.
969  const int precision = format().precision();
970  const std::ios_base::fmtflags& flags = out.flags();
971 
972  // All columns of the table, in order.
973  Array<TableColumn> tableColumns;
974 
975  // Labels of all the columns of the table.
976  // We will append to this when we add each column.
977  Array<std::string> titles;
978 
979  // Widths (in number of characters) of each column.
980  // We will append to this when we add each column.
981  Array<int> columnWidths;
982 
983  // Table column containing all timer names. If writeGlobalStats
984  // is true, we use the global timer names, otherwise we use the
985  // local timer names. We build the table on all processes
986  // redundantly, but only print on Rank 0.
987  {
988  titles.append ("Timer Name");
989 
990  // The column labels depend on whether we are computing global statistics.
991  TableColumn nameCol (writeGlobalStats ? globalTimerNames : localTimerNames);
992  tableColumns.append (nameCol);
993 
994  // Each column is as wide as it needs to be to hold both its
995  // title and all of the column data. This column's title is the
996  // current last entry of the titles array.
997  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), nameCol));
998  }
999 
1000  // Table column containing local timer stats, if applicable. We
1001  // only write local stats if asked, only on MPI Proc 0, and only
1002  // if there is more than one MPI process in the communicator
1003  // (otherwise local stats == global stats, so we just print the
1004  // global stats). In this case, we've padded the local data on
1005  // Proc 0 if necessary to match the global timer list, so that the
1006  // columns have the same number of rows.
1007  if (alwaysWriteLocal && numProcs > 1 && myRank == 0) {
1008  titles.append ("Local time (num calls)");
1009 
1010  // Copy local timer data out of the array-of-structs into
1011  // separate arrays, for display in the table.
1012  Array<double> localTimings;
1013  Array<double> localNumCalls;
1014  for (timer_map_t::const_iterator it = localTimerData.begin();
1015  it != localTimerData.end(); ++it) {
1016  localTimings.push_back (it->second.first);
1017  localNumCalls.push_back (static_cast<double> (it->second.second));
1018  }
1019  TableColumn timeAndCalls (localTimings, localNumCalls, precision, flags, true);
1020  tableColumns.append (timeAndCalls);
1021  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1022  }
1023 
1024  if (writeGlobalStats) {
1025  // If there's only 1 process in the communicator, don't display
1026  // statistics; statistics don't make sense in that case. Just
1027  // display the timings and call counts. If there's more than 1
1028  // process, do display statistics.
1029  if (numProcs == 1) {
1030  // Extract timings and the call counts from globalTimerData.
1031  Array<double> globalTimings;
1032  Array<double> globalNumCalls;
1033  for (timer_map_t::const_iterator it = globalTimerData.begin();
1034  it != globalTimerData.end(); ++it) {
1035  globalTimings.push_back (it->second.first);
1036  globalNumCalls.push_back (static_cast<double> (it->second.second));
1037  }
1038  // Print the table column.
1039  titles.append ("Global time (num calls)");
1040  TableColumn timeAndCalls (globalTimings, globalNumCalls, precision, flags, true);
1041  tableColumns.append (timeAndCalls);
1042  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1043  }
1044  else { // numProcs > 1
1045  // Print a table column for each statistic. statNames and
1046  // each value in statData use the same ordering, so we can
1047  // iterate over valid indices of statNames to display the
1048  // statistics in the right order.
1049  const timer_map_t::size_type numGlobalTimers = globalTimerData.size();
1050  for (std::vector<std::string>::size_type statInd = 0; statInd < statNames.size(); ++statInd) {
1051  // Extract lists of timings and their call counts for the
1052  // current statistic.
1053  Array<double> statTimings (numGlobalTimers);
1054  Array<double> statCallCounts (numGlobalTimers);
1055  stat_map_type::const_iterator it = statData.begin();
1056  for (int k = 0; it != statData.end(); ++it, ++k) {
1057  statTimings[k] = (it->second[statInd]).first;
1058  statCallCounts[k] = (it->second[statInd]).second;
1059  }
1060  // Print the table column.
1061  const std::string& statisticName = statNames[statInd];
1062  const std::string titleString = statisticName;
1063  titles.append (titleString);
1064  TableColumn timeAndCalls (statTimings, statCallCounts, precision, flags, true);
1065  tableColumns.append (timeAndCalls);
1066  columnWidths.append (format().computeRequiredColumnWidth (titles.back(), timeAndCalls));
1067  }
1068  }
1069  }
1070 
1071  // Print the whole table to the given output stream on MPI Rank 0.
1072  format().setColumnWidths (columnWidths);
1073  if (myRank == 0) {
1074  std::ostringstream theTitle;
1075  theTitle << "TimeMonitor results over " << numProcs << " processor"
1076  << (numProcs > 1 ? "s" : "");
1077  format().writeWholeTable (out, theTitle.str(), titles, tableColumns);
1078  }
1079  }
1080 
1081  void
1082  TimeMonitor::summarize (std::ostream &out,
1083  const bool alwaysWriteLocal,
1084  const bool writeGlobalStats,
1085  const bool writeZeroTimers,
1086  const ECounterSetOp setOp,
1087  const std::string& filter,
1088  const bool ignoreZeroTimers)
1089  {
1090  // The default communicator. If Trilinos was built with MPI
1091  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1092  // be a "serial" (no MPI, one "process") communicator.
1093  RCP<const Comm<int> > comm = getDefaultComm();
1094 
1095  summarize (comm.ptr(), out, alwaysWriteLocal,
1096  writeGlobalStats, writeZeroTimers, setOp, filter, ignoreZeroTimers);
1097  }
1098 
1099  void
1101  std::vector<std::string>& statNames,
1102  const ECounterSetOp setOp,
1103  const std::string& filter)
1104  {
1105  // The default communicator. If Trilinos was built with MPI
1106  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1107  // be a "serial" (no MPI, one "process") communicator.
1108  RCP<const Comm<int> > comm = getDefaultComm();
1109 
1110  computeGlobalTimerStatistics (statData, statNames, comm.ptr(), setOp, filter);
1111  }
1112 
1113 
1114  namespace {
1138  std::string
1139  quoteLabelForYaml (const std::string& label)
1140  {
1141  // YAML allows empty keys in key: value pairs. See Section 7.2
1142  // of the YAML 1.2 spec. We thus let an empty label pass
1143  // through without quoting or other special treatment.
1144  if (label.empty ()) {
1145  return label;
1146  }
1147 
1148  // Check whether the label is already quoted. If so, we don't
1149  // need to quote it again. However, we do need to quote any
1150  // quote symbols in the string inside the outer quotes.
1151  const bool alreadyQuoted = label.size () >= 2 &&
1152  label[0] == '"' && label[label.size() - 1] == '"';
1153 
1154  // We need to quote if there are any colons or (inner) quotes in
1155  // the string. We'll determine this as we read through the
1156  // string and escape any characters that need escaping.
1157  bool needToQuote = false;
1158 
1159  std::string out; // To fill with the return value
1160  out.reserve (label.size ());
1161 
1162  const size_t startPos = alreadyQuoted ? 1 : 0;
1163  const size_t endPos = alreadyQuoted ? label.size () - 1 : label.size ();
1164  for (size_t i = startPos; i < endPos; ++i) {
1165  const char c = label[i];
1166  if (c == '"' || c == '\\') {
1167  out.push_back ('\\'); // Escape the quote or backslash.
1168  needToQuote = true;
1169  }
1170  else if (c == ':') {
1171  needToQuote = true;
1172  }
1173  out.push_back (c);
1174  }
1175 
1176  if (needToQuote || alreadyQuoted) {
1177  // If the input string was already quoted, then out doesn't
1178  // include its quotes, so we have to add them back in.
1179  return "\"" + out + "\"";
1180  }
1181  else {
1182  return out;
1183  }
1184  }
1185 
1186  } // namespace (anonymous)
1187 
1188 
1189  void TimeMonitor::
1190  summarizeToYaml (Ptr<const Comm<int> > comm,
1191  std::ostream &out,
1192  const ETimeMonitorYamlFormat yamlStyle,
1193  const std::string& filter)
1194  {
1195  using Teuchos::FancyOStream;
1196  using Teuchos::fancyOStream;
1197  using Teuchos::getFancyOStream;
1198  using Teuchos::OSTab;
1199  using Teuchos::RCP;
1200  using Teuchos::rcpFromRef;
1201  using std::endl;
1202  typedef std::vector<std::string>::size_type size_type;
1203 
1204  const bool compact = (yamlStyle == YAML_FORMAT_COMPACT);
1205 
1206  // const bool writeGlobalStats = true;
1207  // const bool writeZeroTimers = true;
1208  // const bool alwaysWriteLocal = false;
1209  const ECounterSetOp setOp = Intersection;
1210 
1211  stat_map_type statData;
1212  std::vector<std::string> statNames;
1213  computeGlobalTimerStatistics (statData, statNames, comm, setOp, filter);
1214 
1215  const int numProcs = comm->getSize();
1216 
1217  // HACK (mfh 20 Aug 2012) For some reason, creating OSTab with "-
1218  // " as the line prefix does not work, else I would prefer that
1219  // method for printing each line of a YAML block sequence (see
1220  // Section 8.2.1 of the YAML 1.2 spec).
1221  //
1222  // Also, I have to set the tab indent string here, rather than in
1223  // OSTab's constructor. This is because line prefix (which for
1224  // some reason is what OSTab's constructor takes, rather than tab
1225  // indent string) means something different from tab indent
1226  // string, and turning on the line prefix prints all sorts of
1227  // things including "|" for some reason.
1228  RCP<FancyOStream> pfout = getFancyOStream (rcpFromRef (out));
1229  pfout->setTabIndentStr (" ");
1230  FancyOStream& fout = *pfout;
1231 
1232  fout << "# Teuchos::TimeMonitor report" << endl
1233  << "---" << endl;
1234 
1235  // mfh 19 Aug 2012: An important goal of our chosen output format
1236  // was to minimize the nesting depth. We have managed to keep the
1237  // nesting depth to 3, which is the limit that the current version
1238  // of PylotDB imposes for its YAML input.
1239 
1240  // Outermost level is a dictionary. (Individual entries of a
1241  // dictionary do _not_ begin with "- ".) We always print the
1242  // outermost level in standard style, not flow style, for better
1243  // readability. We begin the outermost level with metadata.
1244  fout << "Output mode: " << (compact ? "compact" : "spacious") << endl
1245  << "Number of processes: " << numProcs << endl
1246  << "Time unit: s" << endl;
1247  // For a key: value pair where the value is a sequence or
1248  // dictionary on the following line, YAML requires a space after
1249  // the colon.
1250  fout << "Statistics collected: ";
1251  // Print list of the names of all the statistics we collected.
1252  if (compact) {
1253  fout << " [";
1254  for (size_type i = 0; i < statNames.size (); ++i) {
1255  fout << quoteLabelForYaml (statNames[i]);
1256  if (i + 1 < statNames.size ()) {
1257  fout << ", ";
1258  }
1259  }
1260  fout << "]" << endl;
1261  }
1262  else {
1263  fout << endl;
1264  OSTab tab1 (pfout);
1265  for (size_type i = 0; i < statNames.size (); ++i) {
1266  fout << "- " << quoteLabelForYaml (statNames[i]) << endl;
1267  }
1268  }
1269 
1270  // Print the list of timer names.
1271  //
1272  // It might be nicer instead to print a map from timer name to all
1273  // of its data, but keeping the maximum nesting depth small
1274  // ensures better compatibility with different parsing tools.
1275  fout << "Timer names: ";
1276  if (compact) {
1277  fout << " [";
1278  size_type ind = 0;
1279  for (stat_map_type::const_iterator it = statData.begin();
1280  it != statData.end(); ++it, ++ind) {
1281  fout << quoteLabelForYaml (it->first);
1282  if (ind + 1 < statData.size ()) {
1283  fout << ", ";
1284  }
1285  }
1286  fout << "]" << endl;
1287  }
1288  else {
1289  fout << endl;
1290  OSTab tab1 (pfout);
1291  for (stat_map_type::const_iterator it = statData.begin();
1292  it != statData.end(); ++it) {
1293  fout << "- " << quoteLabelForYaml (it->first) << endl;
1294  }
1295  }
1296 
1297  // Print times for each timer, as a map from statistic name to its time.
1298  fout << "Total times: ";
1299  if (compact) {
1300  fout << " {";
1301  size_type outerInd = 0;
1302  for (stat_map_type::const_iterator outerIter = statData.begin();
1303  outerIter != statData.end(); ++outerIter, ++outerInd) {
1304  // Print timer name.
1305  fout << quoteLabelForYaml (outerIter->first) << ": ";
1306  // Print that timer's data.
1307  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1308  fout << "{";
1309  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1310  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1311  << curData[innerInd].first;
1312  if (innerInd + 1 < curData.size ()) {
1313  fout << ", ";
1314  }
1315  }
1316  fout << "}";
1317  if (outerInd + 1 < statData.size ()) {
1318  fout << ", ";
1319  }
1320  }
1321  fout << "}" << endl;
1322  }
1323  else {
1324  fout << endl;
1325  OSTab tab1 (pfout);
1326  size_type outerInd = 0;
1327  for (stat_map_type::const_iterator outerIter = statData.begin();
1328  outerIter != statData.end(); ++outerIter, ++outerInd) {
1329  // Print timer name.
1330  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1331  // Print that timer's data.
1332  OSTab tab2 (pfout);
1333  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1334  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1335  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1336  << curData[innerInd].first << endl;
1337  }
1338  }
1339  }
1340 
1341  // Print call counts for each timer, for each statistic name.
1342  fout << "Call counts:";
1343  if (compact) {
1344  fout << " {";
1345  size_type outerInd = 0;
1346  for (stat_map_type::const_iterator outerIter = statData.begin();
1347  outerIter != statData.end(); ++outerIter, ++outerInd) {
1348  // Print timer name.
1349  fout << quoteLabelForYaml (outerIter->first) << ": ";
1350  // Print that timer's data.
1351  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1352  fout << "{";
1353  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1354  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1355  << curData[innerInd].second;
1356  if (innerInd + 1 < curData.size ()) {
1357  fout << ", ";
1358  }
1359  }
1360  fout << "}";
1361  if (outerInd + 1 < statData.size ()) {
1362  fout << ", ";
1363  }
1364  }
1365  fout << "}" << endl;
1366  }
1367  else {
1368  fout << endl;
1369  OSTab tab1 (pfout);
1370  size_type outerInd = 0;
1371  for (stat_map_type::const_iterator outerIter = statData.begin();
1372  outerIter != statData.end(); ++outerIter, ++outerInd) {
1373  // Print timer name.
1374  fout << quoteLabelForYaml (outerIter->first) << ": " << endl;
1375  // Print that timer's data.
1376  OSTab tab2 (pfout);
1377  const std::vector<std::pair<double, double> >& curData = outerIter->second;
1378  for (size_type innerInd = 0; innerInd < curData.size (); ++innerInd) {
1379  fout << quoteLabelForYaml (statNames[innerInd]) << ": "
1380  << curData[innerInd].second << endl;
1381  }
1382  }
1383  }
1384  }
1385 
1386  void TimeMonitor::
1387  summarizeToYaml (std::ostream &out,
1388  const ETimeMonitorYamlFormat yamlStyle,
1389  const std::string& filter)
1390  {
1391  // The default communicator. If Trilinos was built with MPI
1392  // enabled, this should be MPI_COMM_WORLD. Otherwise, this should
1393  // be a "serial" (no MPI, one "process") communicator.
1394  RCP<const Comm<int> > comm = getDefaultComm ();
1395 
1396  summarizeToYaml (comm.ptr (), out, yamlStyle, filter);
1397  }
1398 
1399  // Default value is false. We'll set to true once
1400  // setReportParameters() completes successfully.
1401  bool TimeMonitor::setParams_ = false;
1402 
1403  // We have to declare all of these here in order to avoid linker errors.
1404  TimeMonitor::ETimeMonitorReportFormat TimeMonitor::reportFormat_ = TimeMonitor::REPORT_FORMAT_TABLE;
1405  TimeMonitor::ETimeMonitorYamlFormat TimeMonitor::yamlStyle_ = TimeMonitor::YAML_FORMAT_SPACIOUS;
1406  ECounterSetOp TimeMonitor::setOp_ = Intersection;
1407  bool TimeMonitor::alwaysWriteLocal_ = false;
1408  bool TimeMonitor::writeGlobalStats_ = true;
1409  bool TimeMonitor::writeZeroTimers_ = true;
1410 
1411  void
1412  TimeMonitor::setReportFormatParameter (ParameterList& plist)
1413  {
1414  const std::string name ("Report format");
1415  const std::string defaultValue ("Table");
1416  const std::string docString ("Output format for report of timer statistics");
1417  Array<std::string> strings;
1418  Array<std::string> docs;
1419  Array<ETimeMonitorReportFormat> values;
1420 
1421  strings.push_back ("YAML");
1422  docs.push_back ("YAML (see yaml.org) format");
1423  values.push_back (REPORT_FORMAT_YAML);
1424  strings.push_back ("Table");
1425  docs.push_back ("Tabular format via Teuchos::TableFormat");
1426  values.push_back (REPORT_FORMAT_TABLE);
1427 
1428  setStringToIntegralParameter<ETimeMonitorReportFormat> (name, defaultValue,
1429  docString,
1430  strings (), docs (),
1431  values (), &plist);
1432  }
1433 
1434  void
1435  TimeMonitor::setYamlFormatParameter (ParameterList& plist)
1436  {
1437  const std::string name ("YAML style");
1438  const std::string defaultValue ("spacious");
1439  const std::string docString ("YAML-specific output format");
1440  Array<std::string> strings;
1441  Array<std::string> docs;
1442  Array<ETimeMonitorYamlFormat> values;
1443 
1444  strings.push_back ("compact");
1445  docs.push_back ("Compact format: use \"flow style\" (see YAML 1.2 spec at "
1446  "yaml.org) for most sequences except the outermost sequence");
1447  values.push_back (YAML_FORMAT_COMPACT);
1448 
1449  strings.push_back ("spacious");
1450  docs.push_back ("Spacious format: avoid flow style");
1451  values.push_back (YAML_FORMAT_SPACIOUS);
1452 
1453  setStringToIntegralParameter<ETimeMonitorYamlFormat> (name, defaultValue,
1454  docString,
1455  strings (), docs (),
1456  values (), &plist);
1457  }
1458 
1459  void
1460  TimeMonitor::setSetOpParameter (ParameterList& plist)
1461  {
1462  const std::string name ("How to merge timer sets");
1463  const std::string defaultValue ("Intersection");
1464  const std::string docString ("How to merge differing sets of timers "
1465  "across processes");
1466  Array<std::string> strings;
1467  Array<std::string> docs;
1468  Array<ECounterSetOp> values;
1469 
1470  strings.push_back ("Intersection");
1471  docs.push_back ("Compute intersection of timer sets over processes");
1472  values.push_back (Intersection);
1473  strings.push_back ("Union");
1474  docs.push_back ("Compute union of timer sets over processes");
1475  values.push_back (Union);
1476 
1477  setStringToIntegralParameter<ECounterSetOp> (name, defaultValue, docString,
1478  strings (), docs (), values (),
1479  &plist);
1480  }
1481 
1482  void
1484  {
1485  stackedTimer_ = t;
1486  }
1487 
1490  {
1491  return stackedTimer_;
1492  }
1493 
1496  {
1497  // Our implementation favors recomputation over persistent
1498  // storage. That is, we simply recreate the list every time we
1499  // need it.
1500  RCP<ParameterList> plist = parameterList ("TimeMonitor::report");
1501 
1502  const bool alwaysWriteLocal = false;
1503  const bool writeGlobalStats = true;
1504  const bool writeZeroTimers = true;
1505 
1506  setReportFormatParameter (*plist);
1507  setYamlFormatParameter (*plist);
1508  setSetOpParameter (*plist);
1509  plist->set ("alwaysWriteLocal", alwaysWriteLocal,
1510  "Always output local timers' values on Proc 0");
1511  plist->set ("writeGlobalStats", writeGlobalStats, "Always output global "
1512  "statistics, even if there is only one process in the "
1513  "communicator");
1514  plist->set ("writeZeroTimers", writeZeroTimers, "Generate output for "
1515  "timers that have never been called");
1516 
1517  return rcp_const_cast<const ParameterList> (plist);
1518  }
1519 
1520  void
1521  TimeMonitor::setReportParameters (const RCP<ParameterList>& params)
1522  {
1523  ETimeMonitorReportFormat reportFormat = REPORT_FORMAT_TABLE;
1524  ETimeMonitorYamlFormat yamlStyle = YAML_FORMAT_SPACIOUS;
1525  ECounterSetOp setOp = Intersection;
1526  bool alwaysWriteLocal = false;
1527  bool writeGlobalStats = true;
1528  bool writeZeroTimers = true;
1529 
1530  if (params.is_null ()) {
1531  // If we've set parameters before, leave their current values.
1532  // Otherwise, set defaults (below).
1533  if (setParams_) {
1534  return;
1535  }
1536  }
1537  else { // params is nonnull. Let's read it!
1538  params->validateParametersAndSetDefaults (*getValidReportParameters ());
1539 
1540  reportFormat = getIntegralValue<ETimeMonitorReportFormat> (*params, "Report format");
1541  yamlStyle = getIntegralValue<ETimeMonitorYamlFormat> (*params, "YAML style");
1542  setOp = getIntegralValue<ECounterSetOp> (*params, "How to merge timer sets");
1543  alwaysWriteLocal = params->get<bool> ("alwaysWriteLocal");
1544  writeGlobalStats = params->get<bool> ("writeGlobalStats");
1545  writeZeroTimers = params->get<bool> ("writeZeroTimers");
1546  }
1547  // Defer setting state until here, to ensure the strong exception
1548  // guarantee for this method (either it throws with no externally
1549  // visible state changes, or it returns normally).
1550  reportFormat_ = reportFormat;
1551  yamlStyle_ = yamlStyle;
1552  setOp_ = setOp;
1553  alwaysWriteLocal_ = alwaysWriteLocal;
1554  writeGlobalStats_ = writeGlobalStats;
1555  writeZeroTimers_ = writeZeroTimers;
1556 
1557  setParams_ = true; // Yay, we successfully set parameters!
1558  }
1559 
1560  void
1562  std::ostream& out,
1563  const std::string& filter,
1564  const RCP<ParameterList>& params)
1565  {
1566  setReportParameters (params);
1567 
1568  if (reportFormat_ == REPORT_FORMAT_YAML) {
1569  summarizeToYaml (comm, out, yamlStyle_, filter);
1570  }
1571  else if (reportFormat_ == REPORT_FORMAT_TABLE) {
1572  summarize (comm, out, alwaysWriteLocal_, writeGlobalStats_,
1573  writeZeroTimers_, setOp_, filter);
1574  }
1575  else {
1576  TEUCHOS_TEST_FOR_EXCEPTION(true, std::logic_error, "TimeMonitor::report: "
1577  "Invalid report format. This should never happen; ParameterList "
1578  "validation should have caught this. Please report this bug to the "
1579  "Teuchos developers.");
1580  }
1581  }
1582 
1583  void
1585  std::ostream& out,
1586  const RCP<ParameterList>& params)
1587  {
1588  report (comm, out, "", params);
1589  }
1590 
1591  void
1592  TimeMonitor::report (std::ostream& out,
1593  const std::string& filter,
1594  const RCP<ParameterList>& params)
1595  {
1596  RCP<const Comm<int> > comm = getDefaultComm ();
1597  report (comm.ptr (), out, filter, params);
1598  }
1599 
1600  void
1601  TimeMonitor::report (std::ostream& out,
1602  const RCP<ParameterList>& params)
1603  {
1604  RCP<const Comm<int> > comm = getDefaultComm ();
1605  report (comm.ptr (), out, "", params);
1606  }
1607 
1608 } // namespace Teuchos
Teuchos::PerformanceMonitorBase< Time >::format
static TableFormat & format()
Table format that will be used to print a summary of timer results.
Definition: Teuchos_PerformanceMonitorBase.hpp:232
Teuchos::TimeMonitor::setStackedTimer
static void setStackedTimer(const Teuchos::RCP< Teuchos::StackedTimer > &t)
Sets the StackedTimer that the TimeMonitor will use to insert timings into.
Definition: Teuchos_TimeMonitor.cpp:1483
Teuchos::TimeMonitor::summarize
static void summarize(Ptr< const Comm< int > > comm, std::ostream &out=std::cout, const bool alwaysWriteLocal=false, const bool writeGlobalStats=true, const bool writeZeroTimers=true, const ECounterSetOp setOp=Intersection, const std::string &filter="", const bool ignoreZeroTimers=false)
Print summary statistics for all timers on the given communicator.
Definition: Teuchos_TimeMonitor.cpp:922
Teuchos::ECounterSetOp
ECounterSetOp
Set operation type for mergeCounterNames() to perform.
Definition: Teuchos_PerformanceMonitorBase.hpp:66
Teuchos::FancyOStream
basic_FancyOStream< char > FancyOStream
Definition: Teuchos_FancyOStream.hpp:845
Teuchos::MinLoc::reduce
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Teuchos::PerformanceMonitorBase< Time >::counters
static std::map< std::string, RCP< Time > > & counters()
Array of all counters that were created with getNewCounter() on the calling (MPI) process.
Definition: Teuchos_PerformanceMonitorBase.hpp:315
Teuchos::Array::size
size_type size() const
Definition: Teuchos_Array.hpp:1017
Teuchos::Array::const_iterator
std::vector< T >::const_iterator const_iterator
The type of a const forward iterator.
Definition: Teuchos_Array.hpp:266
Teuchos::Time::stop
double stop()
Stop the timer, if the timer is enabled (see disable()).
Definition: Teuchos_Time.cpp:134
Teuchos::Array::back
reference back()
Definition: Teuchos_Array.hpp:1135
Teuchos::TimeMonitor::computeGlobalTimerStatistics
static void computeGlobalTimerStatistics(stat_map_type &statData, std::vector< std::string > &statNames, Ptr< const Comm< int > > comm, const ECounterSetOp setOp=Intersection, const std::string &filter="")
Compute global timer statistics for all timers on the given communicator.
Definition: Teuchos_TimeMonitor.cpp:893
Teuchos::ScalarTraits::zero
static T zero()
Returns representation of zero for this scalar type.
Definition: Teuchos_ScalarTraitsDecl.hpp:132
Teuchos::TableFormat::precision
int precision() const
Get the precision for writing doubles. Default is 4.
Definition: Teuchos_TableFormat.hpp:83
Teuchos::TimeMonitor::getStackedTimer
static const Teuchos::RCP< Teuchos::StackedTimer > & getStackedTimer()
Returns the StackedTimer used by the TimeMonitor.
Definition: Teuchos_TimeMonitor.cpp:1489
Teuchos::MaxLoc
Teuchos version of MPI_MAXLOC.
Definition: Teuchos_TimeMonitor.cpp:111
Teuchos::Array::push_back
void push_back(const value_type &x)
Definition: Teuchos_Array.hpp:1156
Teuchos::REDUCE_MAX
Max.
Definition: Teuchos_EReductionType.hpp:74
Teuchos::rcp
TEUCHOS_DEPRECATED RCP< T > rcp(T *p, Dealloc_T dealloc, bool owns_mem)
Deprecated.
Definition: Teuchos_RCPDecl.hpp:1224
Teuchos_TableFormat.hpp
Provides utilities for formatting tabular output.
Teuchos::TimeMonitor::enableTimer
static void enableTimer(const std::string &name)
Enable the timer with the given name.
Definition: Teuchos_TimeMonitor.cpp:308
Teuchos::StackedTimer::start
void start()
Definition: Teuchos_StackedTimer.hpp:413
Teuchos::Time
Wall-clock timer.
Definition: Teuchos_Time.hpp:85
Teuchos::PerformanceMonitorBase< Time >::isRecursiveCall
bool isRecursiveCall() const
Whether we are currently in a recursive call of the counter.
Definition: Teuchos_PerformanceMonitorBase.hpp:309
Teuchos::Time::start
void start(bool reset=false)
Start the timer, if the timer is enabled (see disable()).
Definition: Teuchos_Time.cpp:113
Teuchos_TimeMonitor.hpp
Scope protection wrapper for Teuchos::Time, with timer reporting functionality.
Teuchos::RCP< Teuchos::StackedTimer >
Teuchos::Ptr
Simple wrapper class for raw pointers to single objects where no persisting relationship exists.
Definition: Teuchos_PtrDecl.hpp:104
Teuchos::Array< std::string >
Teuchos_TableColumn.hpp
A column of TableEntry objects.
Teuchos::REDUCE_SUM
Sum.
Definition: Teuchos_EReductionType.hpp:72
Teuchos::TableFormat::setColumnWidths
void setColumnWidths(const Array< int > &colWidths)
Set the column widths to be used for subsequent rows.
Definition: Teuchos_TableFormat.hpp:134
Teuchos::TimeMonitor::TimeMonitor
TimeMonitor(Time &timer, bool reset=false)
Constructor: starts the timer.
Definition: Teuchos_TimeMonitor.cpp:256
Teuchos::TimeMonitor::zeroOutTimers
static void zeroOutTimers()
Reset all global timers to zero.
Definition: Teuchos_TimeMonitor.cpp:318
Teuchos::StackedTimer::stop
void stop(const std::string &name="RootTimer")
Definition: Teuchos_StackedTimer.hpp:436
Teuchos::ScalarTraits
This structure defines some basic traits for a scalar field type.
Definition: Teuchos_ScalarTraitsDecl.hpp:90
Teuchos::TimeMonitor::disableTimer
static void disableTimer(const std::string &name)
Disable the timer with the given name.
Definition: Teuchos_TimeMonitor.cpp:298
Teuchos::Array::append
Array< T > & append(const T &x)
Add a new entry at the end of the array.
Definition: Teuchos_Array.hpp:1321
Teuchos::TimeMonitor::report
static void report(Ptr< const Comm< int > > comm, std::ostream &out, const std::string &filter, const RCP< ParameterList > &params=null)
Report timer statistics to the given output stream.
Definition: Teuchos_TimeMonitor.cpp:1561
Teuchos::reduceAll
TEUCHOS_DEPRECATED void reduceAll(const Comm< Ordinal > &comm, const EReductionType reductType, const Packet &send, Packet *globalReduct)
Deprecated .
Definition: Teuchos_CommHelpers.hpp:377
Teuchos::DefaultComm::getComm
static Teuchos::RCP< const Comm< OrdinalType > > getComm()
Return the default global communicator.
Definition: Teuchos_DefaultComm.hpp:212
Teuchos::RCP::is_null
bool is_null() const
Returns true if the underlying pointer is null.
Definition: Teuchos_RCP.hpp:336
Teuchos::MinLocNonzero
same as MinLoc, but don't allow zero
Definition: Teuchos_TimeMonitor.cpp:212
Teuchos::TableColumn
Definition: Teuchos_TableColumn.hpp:61
Teuchos::StackedTimer
This class allows one to push and pop timers on and off a stack.
Definition: Teuchos_StackedTimer.hpp:169
Teuchos::mergeCounterNames
void mergeCounterNames(const Comm< int > &comm, const Array< std::string > &localNames, Array< std::string > &globalNames, const ECounterSetOp setOp)
Merge counter names over all processors.
Definition: Teuchos_PerformanceMonitorBase.cpp:515
Teuchos::MaxLoc::reduce
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
Teuchos::PerformanceMonitorBase< Time >::lookupCounter
static RCP< Time > lookupCounter(const std::string &name)
Return the first counter with the given name, or null if none.
Definition: Teuchos_PerformanceMonitorBase.hpp:406
Teuchos::REDUCE_MIN
Min.
Definition: Teuchos_EReductionType.hpp:73
Teuchos::RCP::get
T * get() const
Get the raw C++ pointer to the underlying object.
Definition: Teuchos_RCP.hpp:363
Teuchos::TimeMonitor::~TimeMonitor
~TimeMonitor()
Destructor: stops the timer.
Definition: Teuchos_TimeMonitor.cpp:268
Teuchos::PerformanceMonitorBase
Common capabilities for collecting and reporting performance data across processors.
Definition: Teuchos_PerformanceMonitorBase.hpp:157
Teuchos::nonnull
bool nonnull(const std::shared_ptr< T > &p)
Returns true if p.get()!=NULL.
Definition: Teuchos_RCPStdSharedPtrConversionsDecl.hpp:159
Teuchos::Comm
Abstract interface for distributed-memory communication.
Definition: Teuchos_Comm.hpp:85
Teuchos_ScalarTraits.hpp
Defines basic traits for the scalar field type.
Teuchos::RCP::ptr
Ptr< T > ptr() const
Get a safer wrapper raw C++ pointer to the underlying object.
Definition: Teuchos_RCP.hpp:380
Teuchos
The Teuchos namespace contains all of the classes, structs and enums used by Teuchos,...
Teuchos::PerformanceMonitorBase< Time >::counter
const Time & counter() const
Constant access to the instance's counter reference.
Definition: Teuchos_PerformanceMonitorBase.hpp:298
Teuchos::ValueTypeReductionOp
Base interface class for user-defined reduction operations for objects that use value semantics.
Definition: Teuchos_ReductionOp.hpp:60
Teuchos::MinLocNonzero::reduce
void reduce(const Ordinal count, const std::pair< ScalarType, IndexType > inBuffer[], std::pair< ScalarType, IndexType > inoutBuffer[]) const
TEUCHOS_TEST_FOR_EXCEPTION
#define TEUCHOS_TEST_FOR_EXCEPTION(throw_exception_test, Exception, msg)
Macro for throwing an exception with breakpointing to ease debugging.
Definition: Teuchos_TestForException.hpp:170
Teuchos::OSTab
basic_OSTab< char > OSTab
Definition: Teuchos_FancyOStream.hpp:851
Teuchos::MinLoc
Teuchos version of MPI_MINLOC.
Definition: Teuchos_TimeMonitor.cpp:173
Teuchos::stat_map_type
std::map< std::string, std::vector< std::pair< double, double > > > stat_map_type
Global statistics collected from timer data.
Definition: Teuchos_TimeMonitor.hpp:144
Teuchos::TimeMonitor::getValidReportParameters
static RCP< const ParameterList > getValidReportParameters()
Default parameters (with validators) for report().
Definition: Teuchos_TimeMonitor.cpp:1495
Teuchos::TableFormat::writeWholeTable
void writeWholeTable(std::ostream &out, const std::string &tableTitle, const Array< std::string > &columnNames, const Array< TableColumn > &columns) const
Definition: Teuchos_TableFormat.cpp:139