29 #ifndef _util_misc_thread_timer_h
30 #define _util_misc_thread_timer_h
35 #include <type_traits>
38 #include <unordered_map>
42 #include <boost/thread/thread.hpp>
45 #include <madness/world/worldhashmap.h>
48 #include <util/misc/formio.h>
49 #include <util/misc/regtime.h>
54 typename DurationType = std::chrono::nanoseconds,
55 typename ClockType = std::chrono::high_resolution_clock,
56 typename AccumulateToType = std::atomic_uint_fast64_t
62 typedef DurationType duration_type;
63 typedef ClockType clock_type;
64 typedef AccumulateToType accumulate_to_type;
66 duration_type, clock_type, accumulate_to_type
72 void*
operator new(size_t) =
delete;
78 start_ = clock_type::now();
83 auto end = clock_type::now();
84 dest_ += std::chrono::duration_cast<DurationType>(end - start_).count();
88 typename clock_type::time_point start_;
89 accumulate_to_type& dest_;
94 typename AccumulateToType = std::atomic_uint_fast64_t
97 std::chrono::nanoseconds,
98 std::chrono::high_resolution_clock,
101 make_auto_timer(AccumulateToType& dest) {
103 std::chrono::nanoseconds,
104 std::chrono::high_resolution_clock,
112 typename DurationType = std::chrono::nanoseconds,
113 typename ClockType = std::chrono::high_resolution_clock,
114 typename AccumulateToType = std::atomic_uint_fast64_t
120 typedef DurationType duration_type;
121 typedef ClockType clock_type;
122 typedef AccumulateToType accumulate_to_type;
124 duration_type, clock_type, accumulate_to_type
127 duration_type, clock_type, accumulate_to_type
129 typedef decltype(accumulate_to_type().load()) accumulated_value_type;
141 accumulated_value_type total_time()
const {
145 template <
typename ToDuration>
146 double total_time_in()
148 return std::chrono::duration<double, typename ToDuration::period>(
149 std::chrono::duration<accumulated_value_type, typename DurationType::period>(
157 accumulate_to_type& dest_;
164 template <
bool default_val>
165 struct default_value_bool {
166 bool value = default_val;
167 inline operator bool(){
return value; }
172 typename DurationType = std::chrono::nanoseconds,
173 typename ClockType = std::chrono::high_resolution_clock,
174 typename AccumulateToType = std::atomic_uint_fast64_t
179 typedef DurationType duration_type;
180 typedef ClockType clock_type;
181 typedef AccumulateToType accumulate_to_type;
184 DurationType, ClockType, AccumulateToType
187 typedef madness::ConcurrentHashMap<
188 std::thread::id, default_value_bool<false>, std::hash<unsigned long>
189 > timer_active_flag_map;
194 void*
operator new(size_t) =
delete;
198 accumulate_to_type& dest,
199 timer_active_flag_map& flag_map
200 ) : is_outer_(flag_map[std::this_thread::get_id()]),
201 thread_has_timer_map_(flag_map),
204 start_ = clock_type::now();
210 auto end = clock_type::now();
211 dest_ += std::chrono::duration_cast<DurationType>(end - start_).count();
212 thread_has_timer_map_.erase(std::this_thread::get_id());
219 timer_active_flag_map& thread_has_timer_map_;
220 typename clock_type::time_point start_;
221 accumulate_to_type& dest_;
227 typename DurationType = std::chrono::nanoseconds,
228 typename ClockType = std::chrono::high_resolution_clock,
229 typename AccumulateToType = std::atomic_uint_fast64_t,
231 int max_n_thread_estimate = 40
244 DurationType, ClockType, AccumulateToType
248 DurationType, ClockType, AccumulateToType
252 DurationType, ClockType, AccumulateToType
255 typedef madness::ConcurrentHashMap<
256 std::thread::id, default_value_bool<false>, std::hash<unsigned long>
257 > timer_active_flag_map;
260 using super_t::time_accumulator_factory;
271 timer_active_flag_map thread_has_timer_{ max_n_thread_estimate };
285 typedef std::map<std::string, ThreadTimer> section_map;
286 typedef typename time_accumulator_factory<>::clock_type clock_type;
287 typedef std::chrono::time_point<clock_type> time_type;
288 typedef std::chrono::nanoseconds duration_type;
289 typedef std::chrono::duration<double> fp_seconds;
293 time_type begin_time_;
294 duration_type accum_time_{ 0 };
296 std::vector<std::string> section_names_;
297 section_map subtimers_;
300 std::string active_subname_ =
"";
301 bool stopped_{
true };
308 begin_time_ = clock_type::now();
313 accum_time_ += std::chrono::duration_cast<duration_type>(
314 clock_type::now() - begin_time_
323 : to_hold(to_hold), parent(parent)
334 active_subsection_(0),
337 if(start) this->start();
340 Holdable get_subtimer(
const std::string& subname,
bool start=
false);
342 void enter(
const std::string& subname) {
343 get_subtimer(subname,
true);
348 if(active_subsection_) {
349 active_subsection_->exit();
350 if(active_subsection_->stopped_) {
351 active_subsection_ = 0;
359 void change(
const std::string& newsub) {
364 bool is_stopped()
const {
return stopped_; }
366 double read_seconds()
const {
368 return fp_seconds(accum_time_).count();
377 ThreadTimer::Holdable ThreadTimer::get_subtimer(
const std::string& subname,
bool start) {
378 if(active_subsection_) {
379 return active_subsection_->get_subtimer(subname, start);
383 active_subname_ = subname;
384 auto subspot = subtimers_.find(subname);
385 if(subspot != subtimers_.end()) {
386 rv_ptr = &(subspot->second);
388 active_subsection_ = rv_ptr;
393 auto insertion_pair = subtimers_.emplace(
394 std::piecewise_construct,
395 std::forward_as_tuple(subname),
396 std::forward_as_tuple(depth_+1, start)
398 section_names_.push_back(subname);
399 assert(insertion_pair.second);
400 rv_ptr = &(insertion_pair.first->second);
402 active_subsection_ = rv_ptr;
405 return ThreadTimer::Holdable(rv_ptr,
this);
416 explicit TimerHolder(
const ThreadTimer::Holdable& to_hold)
417 : held(to_hold.to_hold), parent(to_hold.parent) {
419 parent->active_subsection_ = held;
425 parent->active_subsection_ = 0;
428 void change(ThreadTimer::Holdable&
other) {
429 assert(
other.parent == parent);
431 held =
other.to_hold;
432 parent->active_subsection_ = held;
440 std::vector<ThreadTimer> thread_timers_;
445 typedef typename accum_factory::clock_type clock_type;
446 typedef typename accum_factory::accumulate_to_type accumulate_to_type;
448 accumulate_to_type overhead_nanos_{ 0 };
451 typename clock_type::time_point wall_start;
452 typename clock_type::duration wall_time;
455 boost::thread::id creator_id_;
458 const std::vector<const ThreadTimer*>& subtimers,
459 const std::string& curr_name,
466 const std::vector<const ThreadTimer*>& subtimers,
467 const std::string& name,
471 double min = std::numeric_limits<double>::infinity();
473 for(
auto timer : subtimers) {
475 assert(timer->is_stopped());
476 const double time = timer->read_seconds();
478 if(time < min) min = time;
479 if(time > max) max = time;
481 const double avg = sum / (double)subtimers.size();
482 const std::string indent(indent_size,
' ');
483 out << std::setw(label_width) << std::left << (indent + name +
":")
489 std::vector<std::vector<bool>> dones;
490 for(
auto st : subtimers) { dones.emplace_back(st->section_names_.size(),
false); }
491 auto all = [](
const std::vector<bool> v) ->
bool {
492 for(
const auto& i : v){
497 auto first_false_index = [](
const std::vector<bool> v) ->
int {
499 for(
const auto& i : v){
506 std::vector<const ThreadTimer*> next_subs;
507 std::string curr_name;
508 bool name_found =
false;
509 for(
int i = 0; i < subtimers.size(); ++i) {
511 if(not all(dones[i])){
513 const int idx = first_false_index(dones[i]);
514 curr_name = sub->section_names_[idx];
515 dones[i][idx] =
true;
517 next_subs.push_back(&(sub->subtimers_.at(curr_name)));
520 if(sub->subtimers_.find(curr_name) != sub->subtimers_.end()) {
522 for(
const auto& subname : sub->section_names_) {
523 if(subname == curr_name) {
524 dones[i][iname] =
true;
525 next_subs.push_back(&(sub->subtimers_.at(curr_name)));
535 print_sub(out, indent_size+2, next_subs, curr_name, label_width);
549 creator_id_(boost::this_thread::get_id()),
551 overhead_factory_(overhead_nanos_)
553 wall_start = clock_type::now();
554 for(
int i = 0; i < nthreads_; ++i) {
555 thread_timers_.emplace_back(0);
559 void enter(
const std::string& subname,
int ithr) {
560 auto overtime = overhead_factory_.create();
561 thread_timers_[ithr].enter(subname);
564 void exit(
int ithr) {
565 auto overtime = overhead_factory_.create();
566 thread_timers_[ithr].exit();
569 ThreadTimer::Holdable
570 get_subtimer(
const std::string& subname,
int ithr) {
571 auto overtime = overhead_factory_.create();
572 return thread_timers_[ithr].get_subtimer(subname);
576 auto overtime = overhead_factory_.create();
577 const boost::thread::id& my_id = boost::this_thread::get_id();
578 assert(my_id == creator_id_);
579 for(
auto& tim : thread_timers_) tim.exit();
580 auto wall_stop = clock_type::now();
581 wall_time = wall_stop - wall_start;
584 void change(
const std::string& subname,
int ithr) {
585 auto overtime = overhead_factory_.create();
586 thread_timers_[ithr].change(subname);
595 const std::string& title =
""
598 std::vector<const ThreadTimer*> tim_ptrs;
599 for(
const auto& tim : thread_timers_) tim_ptrs.push_back(&tim);
600 const std::string indent = std::string(indent_size,
' ');
601 out << std::setw(label_width) << std::left << (indent + title)
602 << std::setw(8) << std::internal <<
"avg"
603 << std::setw(8) << std::internal <<
"min"
604 << std::setw(8) << std::internal <<
"max"
606 print_sub(out, indent_size, tim_ptrs, name_, label_width);
607 out << indent <<
"Timer overhead: " << std::setprecision(3)
608 << (double)((
unsigned long long)overhead_nanos_)/1.e9