MPQC  3.0.0-alpha
thread_timer.h
1 //
2 // thread_timer.h
3 //
4 // Copyright (C) 2014 David Hollman
5 //
6 // Author: David Hollman
7 // Maintainer: DSH
8 // Created: Feb 7, 2014
9 //
10 // This file is part of the SC Toolkit.
11 //
12 // The SC Toolkit is free software; you can redistribute it and/or modify
13 // it under the terms of the GNU Library General Public License as published by
14 // the Free Software Foundation; either version 2, or (at your option)
15 // any later version.
16 //
17 // The SC Toolkit is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 // GNU Library General Public License for more details.
21 //
22 // You should have received a copy of the GNU Library General Public License
23 // along with the SC Toolkit; see the file COPYING.LIB. If not, write to
24 // the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25 //
26 // The U.S. Government is granted a limited license as per AL 91-7.
27 //
28 
29 #ifndef _util_misc_thread_timer_h
30 #define _util_misc_thread_timer_h
31 
32 // standard library includes
33 #include <atomic>
34 #include <chrono>
35 #include <type_traits>
36 #include <thread>
37 #include <functional>
38 #include <unordered_map>
39 #include <vector>
40 
41 // boost includes
42 #include <boost/thread/thread.hpp>
43 
44 // madness includes
45 #include <madness/world/worldhashmap.h>
46 
47 // MPQC includes
48 #include <util/misc/formio.h>
49 #include <util/misc/regtime.h>
50 
51 namespace sc {
52 
53 template<
54  typename DurationType = std::chrono::nanoseconds,
55  typename ClockType = std::chrono::high_resolution_clock,
56  typename AccumulateToType = std::atomic_uint_fast64_t
57 >
59 
60  public:
61 
62  typedef DurationType duration_type;
63  typedef ClockType clock_type;
64  typedef AccumulateToType accumulate_to_type;
65  typedef auto_time_accumulator<
66  duration_type, clock_type, accumulate_to_type
67  > self_type;
68 
69  auto_time_accumulator() = delete;
71  auto_time_accumulator(const self_type&) = delete;
72  void* operator new(size_t) = delete;
74 
75  auto_time_accumulator(accumulate_to_type& dest)
76  : dest_(dest)
77  {
78  start_ = clock_type::now();
79  }
80 
82  {
83  auto end = clock_type::now();
84  dest_ += std::chrono::duration_cast<DurationType>(end - start_).count();
85  }
86 
87  private:
88  typename clock_type::time_point start_;
89  accumulate_to_type& dest_;
90 };
91 
92 
93 template<
94  typename AccumulateToType = std::atomic_uint_fast64_t
95 >
97  std::chrono::nanoseconds,
98  std::chrono::high_resolution_clock,
99  AccumulateToType
100 >
101 make_auto_timer(AccumulateToType& dest) {
102  return auto_time_accumulator<
103  std::chrono::nanoseconds,
104  std::chrono::high_resolution_clock,
105  AccumulateToType
106  >(dest);
107 }
108 
110 
111 template<
112  typename DurationType = std::chrono::nanoseconds,
113  typename ClockType = std::chrono::high_resolution_clock,
114  typename AccumulateToType = std::atomic_uint_fast64_t
115 >
117 
118  public:
119 
120  typedef DurationType duration_type;
121  typedef ClockType clock_type;
122  typedef AccumulateToType accumulate_to_type;
123  typedef time_accumulator_factory<
124  duration_type, clock_type, accumulate_to_type
125  > self_type;
126  typedef auto_time_accumulator<
127  duration_type, clock_type, accumulate_to_type
128  > generated_type;
129  typedef decltype(accumulate_to_type().load()) accumulated_value_type;
130 
131  time_accumulator_factory() = delete;
132 
133  explicit time_accumulator_factory(accumulate_to_type& dest)
134  : dest_(dest)
135  { }
136 
137  generated_type create() const {
138  return generated_type(dest_);
139  }
140 
141  accumulated_value_type total_time() const {
142  return dest_.load();
143  }
144 
145  template <typename ToDuration>
146  double total_time_in()
147  {
148  return std::chrono::duration<double, typename ToDuration::period>(
149  std::chrono::duration<accumulated_value_type, typename DurationType::period>(
150  dest_.load()
151  )
152  ).count();
153  }
154 
155  protected:
156 
157  accumulate_to_type& dest_;
158 
159 };
160 
162 
163 namespace {
164  template <bool default_val>
165  struct default_value_bool {
166  bool value = default_val;
167  inline operator bool(){ return value; }
168  };
169 }
170 
171 template<
172  typename DurationType = std::chrono::nanoseconds,
173  typename ClockType = std::chrono::high_resolution_clock,
174  typename AccumulateToType = std::atomic_uint_fast64_t
175 >
177  public:
178 
179  typedef DurationType duration_type;
180  typedef ClockType clock_type;
181  typedef AccumulateToType accumulate_to_type;
182 
184  DurationType, ClockType, AccumulateToType
185  > self_type;
186 
187  typedef madness::ConcurrentHashMap<
188  std::thread::id, default_value_bool<false>, std::hash<unsigned long>
189  > timer_active_flag_map;
190 
194  void* operator new(size_t) = delete;
196 
198  accumulate_to_type& dest,
199  timer_active_flag_map& flag_map
200  ) : is_outer_(flag_map[std::this_thread::get_id()]),
201  thread_has_timer_map_(flag_map),
202  dest_(dest)
203  {
204  start_ = clock_type::now();
205  }
206 
208  {
209  if(is_outer_) {
210  auto end = clock_type::now();
211  dest_ += std::chrono::duration_cast<DurationType>(end - start_).count();
212  thread_has_timer_map_.erase(std::this_thread::get_id());
213  }
214  }
215 
216  private:
217 
218  bool is_outer_;
219  timer_active_flag_map& thread_has_timer_map_;
220  typename clock_type::time_point start_;
221  accumulate_to_type& dest_;
222 
223 };
224 
225 
226 template<
227  typename DurationType = std::chrono::nanoseconds,
228  typename ClockType = std::chrono::high_resolution_clock,
229  typename AccumulateToType = std::atomic_uint_fast64_t,
230  // Used to estimate the optimal number of bins for the madness hash map to use
231  int max_n_thread_estimate = 40
232 >
234  : public time_accumulator_factory<
235  DurationType,
236  ClockType,
237  AccumulateToType
238  >
239 {
240 
241  public:
242 
244  DurationType, ClockType, AccumulateToType
245  > self_type;
246 
247  typedef time_accumulator_factory<
248  DurationType, ClockType, AccumulateToType
249  > super_t;
250 
252  DurationType, ClockType, AccumulateToType
253  > generated_type;
254 
255  typedef madness::ConcurrentHashMap<
256  std::thread::id, default_value_bool<false>, std::hash<unsigned long>
257  > timer_active_flag_map;
258 
259 
260  using super_t::time_accumulator_factory;
261 
262  generated_type create() const {
263  return generated_type(
264  super_t::dest_,
265  thread_has_timer_
266  );
267  }
268 
269  private:
270 
271  timer_active_flag_map thread_has_timer_{ max_n_thread_estimate };
272 
273 };
274 
276 
277 class MultiThreadTimer;
278 class ThreadTimer;
279 class TimedRegion;
280 
281 class ThreadTimer {
282 
283  public:
284 
285  typedef std::map<std::string, ThreadTimer> section_map;
286  typedef typename time_accumulator_factory<>::clock_type clock_type;
287  typedef std::chrono::time_point<clock_type> time_type;
288  typedef std::chrono::nanoseconds duration_type;
289  typedef std::chrono::duration<double> fp_seconds;
290 
291  private:
292 
293  time_type begin_time_;
294  duration_type accum_time_{ 0 };
295 
296  std::vector<std::string> section_names_;
297  section_map subtimers_;
298 
299  ThreadTimer* active_subsection_;
300  std::string active_subname_ = "";
301  bool stopped_{ true };
302 
303  int depth_;
304 
305  void start() {
306  assert(stopped_);
307  stopped_ = false;
308  begin_time_ = clock_type::now();
309  }
310 
311  void stop() {
312  assert(!stopped_);
313  accum_time_ += std::chrono::duration_cast<duration_type>(
314  clock_type::now() - begin_time_
315  );
316  stopped_ = true;
317  }
318 
319  struct Holdable {
320  ThreadTimer* to_hold;
321  ThreadTimer* parent;
322  Holdable(ThreadTimer* to_hold, ThreadTimer* parent)
323  : to_hold(to_hold), parent(parent)
324  { }
325  };
326 
327  public:
328 
329  //ThreadTimer() = delete;
330 
331  explicit ThreadTimer(int depth, bool start=true)
332  : section_names_(0),
333  subtimers_(),
334  active_subsection_(0),
335  depth_(depth)
336  {
337  if(start) this->start();
338  }
339 
340  Holdable get_subtimer(const std::string& subname, bool start=false);
341 
342  void enter(const std::string& subname) {
343  get_subtimer(subname, true);
344  }
345 
346  void exit() {
347  // TODO This should throw exceptions on failure rather than just asserting
348  if(active_subsection_) {
349  active_subsection_->exit();
350  if(active_subsection_->stopped_) {
351  active_subsection_ = 0;
352  }
353  }
354  else{
355  this->stop();
356  }
357  }
358 
359  void change(const std::string& newsub) {
360  this->exit();
361  enter(newsub);
362  }
363 
364  bool is_stopped() const { return stopped_; }
365 
366  double read_seconds() const {
367  assert(stopped_);
368  return fp_seconds(accum_time_).count();
369  }
370 
371  friend class MultiThreadTimer;
372  friend class TimerHolder;
373 
374 };
375 
376 inline
377 ThreadTimer::Holdable ThreadTimer::get_subtimer(const std::string& subname, bool start) {
378  if(active_subsection_) {
379  return active_subsection_->get_subtimer(subname, start);
380  }
381  else {
382  ThreadTimer* rv_ptr;
383  active_subname_ = subname;
384  auto subspot = subtimers_.find(subname);
385  if(subspot != subtimers_.end()) {
386  rv_ptr = &(subspot->second);
387  if(start) {
388  active_subsection_ = rv_ptr;
389  rv_ptr->start();
390  }
391  }
392  else {
393  auto insertion_pair = subtimers_.emplace(
394  std::piecewise_construct,
395  std::forward_as_tuple(subname),
396  std::forward_as_tuple(depth_+1, start)
397  );
398  section_names_.push_back(subname);
399  assert(insertion_pair.second);
400  rv_ptr = &(insertion_pair.first->second);
401  if(start) {
402  active_subsection_ = rv_ptr;
403  }
404  }
405  return ThreadTimer::Holdable(rv_ptr, this);
406  }
407 }
408 
409 class TimerHolder {
410 
411  ThreadTimer* held;
412  ThreadTimer* parent;
413 
414  public:
415 
416  explicit TimerHolder(const ThreadTimer::Holdable& to_hold)
417  : held(to_hold.to_hold), parent(to_hold.parent) {
418  held->start();
419  parent->active_subsection_ = held;
420  }
421 
422  ~TimerHolder()
423  {
424  held->stop();
425  parent->active_subsection_ = 0;
426  }
427 
428  void change(ThreadTimer::Holdable& other) {
429  assert(other.parent == parent);
430  held->stop();
431  held = other.to_hold;
432  parent->active_subsection_ = held;
433  held->start();
434  }
435 
436 };
437 
439 
440  std::vector<ThreadTimer> thread_timers_;
441  int nthreads_;
442  std::string name_;
443 
445  typedef typename accum_factory::clock_type clock_type;
446  typedef typename accum_factory::accumulate_to_type accumulate_to_type;
447 
448  accumulate_to_type overhead_nanos_{ 0 };
449  accum_factory overhead_factory_{ overhead_nanos_ };
450 
451  typename clock_type::time_point wall_start;
452  typename clock_type::duration wall_time;
453 
454 
455  boost::thread::id creator_id_;
456 
457  TimedRegion* collect_regions_recursive(
458  const std::vector<const ThreadTimer*>& subtimers,
459  const std::string& curr_name,
460  TimedRegion* parent
461  ) const;
462 
463  void print_sub(
464  std::ostream& out,
465  int indent_size,
466  const std::vector<const ThreadTimer*>& subtimers,
467  const std::string& name,
468  int label_width
469  ) {
470  double sum = 0.0;
471  double min = std::numeric_limits<double>::infinity();
472  double max = 0.0;
473  for(auto timer : subtimers) {
474  // TODO throw exception rather than just asserting
475  assert(timer->is_stopped());
476  const double time = timer->read_seconds();
477  sum += time;
478  if(time < min) min = time;
479  if(time > max) max = time;
480  }
481  const double avg = sum / (double)subtimers.size();
482  const std::string indent(indent_size, ' ');
483  out << std::setw(label_width) << std::left << (indent + name + ":")
484  << scprintf("%7.2f %7.2f %7.2f",
485  avg, min, max
486  )
487  << std::endl;
488  //----------------------------------------//
489  std::vector<std::vector<bool>> dones;
490  for(auto st : subtimers) { dones.emplace_back(st->section_names_.size(), false); }
491  auto all = [](const std::vector<bool> v) -> bool {
492  for(const auto& i : v){
493  if(!i) return false;
494  }
495  return true;
496  };
497  auto first_false_index = [](const std::vector<bool> v) -> int {
498  int idx = 0;
499  for(const auto& i : v){
500  if(!i) return idx;
501  else ++idx;
502  }
503  return -1;
504  };
505  while(true){
506  std::vector<const ThreadTimer*> next_subs;
507  std::string curr_name;
508  bool name_found = false;
509  for(int i = 0; i < subtimers.size(); ++i) {
510  const ThreadTimer* sub = subtimers[i];
511  if(not all(dones[i])){
512  if(not name_found){
513  const int idx = first_false_index(dones[i]);
514  curr_name = sub->section_names_[idx];
515  dones[i][idx] = true;
516  name_found = true;
517  next_subs.push_back(&(sub->subtimers_.at(curr_name)));
518  }
519  else {
520  if(sub->subtimers_.find(curr_name) != sub->subtimers_.end()) {
521  int iname = 0;
522  for(const auto& subname : sub->section_names_) {
523  if(subname == curr_name) {
524  dones[i][iname] = true;
525  next_subs.push_back(&(sub->subtimers_.at(curr_name)));
526  break;
527  }
528  else ++iname;
529  }
530  }
531  }
532  }
533  } // end loop over subtimers
534  if(name_found){
535  print_sub(out, indent_size+2, next_subs, curr_name, label_width);
536  }
537  else{
538  break;
539  }
540  } // end while all_done
541 
542  }
543 
544  public:
545 
546  MultiThreadTimer(const std::string& name, int nthreads)
547  : name_(name),
548  nthreads_(nthreads),
549  creator_id_(boost::this_thread::get_id()),
550  overhead_nanos_(0),
551  overhead_factory_(overhead_nanos_)
552  {
553  wall_start = clock_type::now();
554  for(int i = 0; i < nthreads_; ++i) {
555  thread_timers_.emplace_back(0);
556  }
557  }
558 
559  void enter(const std::string& subname, int ithr) {
560  auto overtime = overhead_factory_.create();
561  thread_timers_[ithr].enter(subname);
562  }
563 
564  void exit(int ithr) {
565  auto overtime = overhead_factory_.create();
566  thread_timers_[ithr].exit();
567  }
568 
569  ThreadTimer::Holdable
570  get_subtimer(const std::string& subname, int ithr) {
571  auto overtime = overhead_factory_.create();
572  return thread_timers_[ithr].get_subtimer(subname);
573  }
574 
575  void exit() {
576  auto overtime = overhead_factory_.create();
577  const boost::thread::id& my_id = boost::this_thread::get_id();
578  assert(my_id == creator_id_);
579  for(auto& tim : thread_timers_) tim.exit();
580  auto wall_stop = clock_type::now();
581  wall_time = wall_stop - wall_start;
582  }
583 
584  void change(const std::string& subname, int ithr) {
585  auto overtime = overhead_factory_.create();
586  thread_timers_[ithr].change(subname);
587  }
588 
589  TimedRegion* make_timed_region() const;
590 
591  void print(
592  std::ostream& out=ExEnv::out0(),
593  int indent_size = 0,
594  int label_width=50,
595  const std::string& title = ""
596  ) {
597 
598  std::vector<const ThreadTimer*> tim_ptrs;
599  for(const auto& tim : thread_timers_) tim_ptrs.push_back(&tim);
600  const std::string indent = std::string(indent_size, ' ');
601  out << std::setw(label_width) << std::left << (indent + title)
602  << std::setw(8) << std::internal << "avg"
603  << std::setw(8) << std::internal << "min"
604  << std::setw(8) << std::internal << "max"
605  << std::endl;
606  print_sub(out, indent_size, tim_ptrs, name_, label_width);
607  out << indent << "Timer overhead: " << std::setprecision(3)
608  << (double)((unsigned long long)overhead_nanos_)/1.e9
609  << std::endl;
610  // TODO walltime/thread time ratio and efficiency
611  }
612 
613 };
614 
615 //############################################################################//
616 
617 } // end namespace sc
618 
619 #endif /* _util_misc_thread_timer_h */
sc::time_accumulator_factory
Definition: thread_timer.h:116
sc::auto_time_accumulator
Definition: thread_timer.h:58
sc::reentrant_auto_time_accumulator
Definition: thread_timer.h:176
sc::reentrant_time_accumulator_factory
Definition: thread_timer.h:233
sc::MultiThreadTimer
Definition: thread_timer.h:438
sc::ThreadTimer
Definition: thread_timer.h:281
sc::TimerHolder
Definition: thread_timer.h:409
sc::TimedRegion
TimedRegion is a helper class for RegionTimer.
Definition: regtime.h:47
sc::other
SpinCase1 other(SpinCase1 S)
given 1-spin return the other 1-spin
sc::ExEnv::out0
static std::ostream & out0()
Return an ostream that writes from node 0.
sc::scprintf
This class allows printf-like output to be sent to an ostream.
Definition: formio.h:97
sc
Contains all MPQC code up to version 3.
Definition: mpqcin.h:14

Generated at Sun Jan 26 2020 23:24:02 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.8.16.