MPQC 3.0.0-alpha
Loading...
Searching...
No Matches
thread_timer.h
1//
2// thread_timer.h
3//
4// Copyright (C) 2014 David Hollman
5//
6// Author: David Hollman
7// Maintainer: DSH
8// Created: Feb 7, 2014
9//
10// This file is part of the SC Toolkit.
11//
12// The SC Toolkit is free software; you can redistribute it and/or modify
13// it under the terms of the GNU Library General Public License as published by
14// the Free Software Foundation; either version 2, or (at your option)
15// any later version.
16//
17// The SC Toolkit is distributed in the hope that it will be useful,
18// but WITHOUT ANY WARRANTY; without even the implied warranty of
19// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20// GNU Library General Public License for more details.
21//
22// You should have received a copy of the GNU Library General Public License
23// along with the SC Toolkit; see the file COPYING.LIB. If not, write to
24// the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA.
25//
26// The U.S. Government is granted a limited license as per AL 91-7.
27//
28
29#ifndef _util_misc_thread_timer_h
30#define _util_misc_thread_timer_h
31
32// standard library includes
33#include <atomic>
34#include <chrono>
35#include <type_traits>
36#include <thread>
37#include <functional>
38#include <unordered_map>
39#include <vector>
40
41// boost includes
42#include <boost/thread/thread.hpp>
43
44// madness includes
45#include <madness/world/worldhashmap.h>
46
47// MPQC includes
48#include <util/misc/formio.h>
49#include <util/misc/regtime.h>
50
51namespace sc {
52
53template<
54 typename DurationType = std::chrono::nanoseconds,
55 typename ClockType = std::chrono::high_resolution_clock,
56 typename AccumulateToType = std::atomic_uint_fast64_t
57>
59
60 public:
61
62 typedef DurationType duration_type;
63 typedef ClockType clock_type;
64 typedef AccumulateToType accumulate_to_type;
66 duration_type, clock_type, accumulate_to_type
67 > self_type;
68
69 auto_time_accumulator() = delete;
71 auto_time_accumulator(const self_type&) = delete;
72 void* operator new(size_t) = delete;
74
75 auto_time_accumulator(accumulate_to_type& dest)
76 : dest_(dest)
77 {
78 start_ = clock_type::now();
79 }
80
82 {
83 auto end = clock_type::now();
84 dest_ += std::chrono::duration_cast<DurationType>(end - start_).count();
85 }
86
87 private:
88 typename clock_type::time_point start_;
89 accumulate_to_type& dest_;
90};
91
92
93template<
94 typename AccumulateToType = std::atomic_uint_fast64_t
95>
97 std::chrono::nanoseconds,
98 std::chrono::high_resolution_clock,
99 AccumulateToType
100>
101make_auto_timer(AccumulateToType& dest) {
103 std::chrono::nanoseconds,
104 std::chrono::high_resolution_clock,
105 AccumulateToType
106 >(dest);
107}
108
110
111template<
112 typename DurationType = std::chrono::nanoseconds,
113 typename ClockType = std::chrono::high_resolution_clock,
114 typename AccumulateToType = std::atomic_uint_fast64_t
115>
117
118 public:
119
120 typedef DurationType duration_type;
121 typedef ClockType clock_type;
122 typedef AccumulateToType accumulate_to_type;
124 duration_type, clock_type, accumulate_to_type
125 > self_type;
126 typedef auto_time_accumulator<
127 duration_type, clock_type, accumulate_to_type
129 typedef decltype(accumulate_to_type().load()) accumulated_value_type;
130
131 time_accumulator_factory() = delete;
132
133 explicit time_accumulator_factory(accumulate_to_type& dest)
134 : dest_(dest)
135 { }
136
137 generated_type create() const {
138 return generated_type(dest_);
139 }
140
141 accumulated_value_type total_time() const {
142 return dest_.load();
143 }
144
145 template <typename ToDuration>
146 double total_time_in()
147 {
148 return std::chrono::duration<double, typename ToDuration::period>(
149 std::chrono::duration<accumulated_value_type, typename DurationType::period>(
150 dest_.load()
151 )
152 ).count();
153 }
154
155 protected:
156
157 accumulate_to_type& dest_;
158
159};
160
162
163namespace {
164 template <bool default_val>
165 struct default_value_bool {
166 bool value = default_val;
167 inline operator bool(){ return value; }
168 };
169}
170
171template<
172 typename DurationType = std::chrono::nanoseconds,
173 typename ClockType = std::chrono::high_resolution_clock,
174 typename AccumulateToType = std::atomic_uint_fast64_t
175>
177 public:
178
179 typedef DurationType duration_type;
180 typedef ClockType clock_type;
181 typedef AccumulateToType accumulate_to_type;
182
184 DurationType, ClockType, AccumulateToType
185 > self_type;
186
187 typedef madness::ConcurrentHashMap<
188 std::thread::id, default_value_bool<false>, std::hash<unsigned long>
189 > timer_active_flag_map;
190
194 void* operator new(size_t) = delete;
196
198 accumulate_to_type& dest,
199 timer_active_flag_map& flag_map
200 ) : is_outer_(flag_map[std::this_thread::get_id()]),
201 thread_has_timer_map_(flag_map),
202 dest_(dest)
203 {
204 start_ = clock_type::now();
205 }
206
208 {
209 if(is_outer_) {
210 auto end = clock_type::now();
211 dest_ += std::chrono::duration_cast<DurationType>(end - start_).count();
212 thread_has_timer_map_.erase(std::this_thread::get_id());
213 }
214 }
215
216 private:
217
218 bool is_outer_;
219 timer_active_flag_map& thread_has_timer_map_;
220 typename clock_type::time_point start_;
221 accumulate_to_type& dest_;
222
223};
224
225
226template<
227 typename DurationType = std::chrono::nanoseconds,
228 typename ClockType = std::chrono::high_resolution_clock,
229 typename AccumulateToType = std::atomic_uint_fast64_t,
230 // Used to estimate the optimal number of bins for the madness hash map to use
231 int max_n_thread_estimate = 40
232>
235 DurationType,
236 ClockType,
237 AccumulateToType
238 >
239{
240
241 public:
242
244 DurationType, ClockType, AccumulateToType
245 > self_type;
246
248 DurationType, ClockType, AccumulateToType
249 > super_t;
250
252 DurationType, ClockType, AccumulateToType
254
255 typedef madness::ConcurrentHashMap<
256 std::thread::id, default_value_bool<false>, std::hash<unsigned long>
257 > timer_active_flag_map;
258
259
260 using super_t::time_accumulator_factory;
261
262 generated_type create() const {
263 return generated_type(
264 super_t::dest_,
265 thread_has_timer_
266 );
267 }
268
269 private:
270
271 timer_active_flag_map thread_has_timer_{ max_n_thread_estimate };
272
273};
274
276
277class MultiThreadTimer;
278class ThreadTimer;
279class TimedRegion;
280
282
283 public:
284
285 typedef std::map<std::string, ThreadTimer> section_map;
286 typedef typename time_accumulator_factory<>::clock_type clock_type;
287 typedef std::chrono::time_point<clock_type> time_type;
288 typedef std::chrono::nanoseconds duration_type;
289 typedef std::chrono::duration<double> fp_seconds;
290
291 private:
292
293 time_type begin_time_;
294 duration_type accum_time_{ 0 };
295
296 std::vector<std::string> section_names_;
297 section_map subtimers_;
298
299 ThreadTimer* active_subsection_;
300 std::string active_subname_ = "";
301 bool stopped_{ true };
302
303 int depth_;
304
305 void start() {
306 assert(stopped_);
307 stopped_ = false;
308 begin_time_ = clock_type::now();
309 }
310
311 void stop() {
312 assert(!stopped_);
313 accum_time_ += std::chrono::duration_cast<duration_type>(
314 clock_type::now() - begin_time_
315 );
316 stopped_ = true;
317 }
318
319 struct Holdable {
320 ThreadTimer* to_hold;
321 ThreadTimer* parent;
322 Holdable(ThreadTimer* to_hold, ThreadTimer* parent)
323 : to_hold(to_hold), parent(parent)
324 { }
325 };
326
327 public:
328
329 //ThreadTimer() = delete;
330
331 explicit ThreadTimer(int depth, bool start=true)
332 : section_names_(0),
333 subtimers_(),
334 active_subsection_(0),
335 depth_(depth)
336 {
337 if(start) this->start();
338 }
339
340 Holdable get_subtimer(const std::string& subname, bool start=false);
341
342 void enter(const std::string& subname) {
343 get_subtimer(subname, true);
344 }
345
346 void exit() {
347 // TODO This should throw exceptions on failure rather than just asserting
348 if(active_subsection_) {
349 active_subsection_->exit();
350 if(active_subsection_->stopped_) {
351 active_subsection_ = 0;
352 }
353 }
354 else{
355 this->stop();
356 }
357 }
358
359 void change(const std::string& newsub) {
360 this->exit();
361 enter(newsub);
362 }
363
364 bool is_stopped() const { return stopped_; }
365
366 double read_seconds() const {
367 assert(stopped_);
368 return fp_seconds(accum_time_).count();
369 }
370
371 friend class MultiThreadTimer;
372 friend class TimerHolder;
373
374};
375
376inline
377ThreadTimer::Holdable ThreadTimer::get_subtimer(const std::string& subname, bool start) {
378 if(active_subsection_) {
379 return active_subsection_->get_subtimer(subname, start);
380 }
381 else {
382 ThreadTimer* rv_ptr;
383 active_subname_ = subname;
384 auto subspot = subtimers_.find(subname);
385 if(subspot != subtimers_.end()) {
386 rv_ptr = &(subspot->second);
387 if(start) {
388 active_subsection_ = rv_ptr;
389 rv_ptr->start();
390 }
391 }
392 else {
393 auto insertion_pair = subtimers_.emplace(
394 std::piecewise_construct,
395 std::forward_as_tuple(subname),
396 std::forward_as_tuple(depth_+1, start)
397 );
398 section_names_.push_back(subname);
399 assert(insertion_pair.second);
400 rv_ptr = &(insertion_pair.first->second);
401 if(start) {
402 active_subsection_ = rv_ptr;
403 }
404 }
405 return ThreadTimer::Holdable(rv_ptr, this);
406 }
407}
408
410
411 ThreadTimer* held;
412 ThreadTimer* parent;
413
414 public:
415
416 explicit TimerHolder(const ThreadTimer::Holdable& to_hold)
417 : held(to_hold.to_hold), parent(to_hold.parent) {
418 held->start();
419 parent->active_subsection_ = held;
420 }
421
423 {
424 held->stop();
425 parent->active_subsection_ = 0;
426 }
427
428 void change(ThreadTimer::Holdable& other) {
429 assert(other.parent == parent);
430 held->stop();
431 held = other.to_hold;
432 parent->active_subsection_ = held;
433 held->start();
434 }
435
436};
437
439
440 std::vector<ThreadTimer> thread_timers_;
441 int nthreads_;
442 std::string name_;
443
445 typedef typename accum_factory::clock_type clock_type;
446 typedef typename accum_factory::accumulate_to_type accumulate_to_type;
447
448 accumulate_to_type overhead_nanos_{ 0 };
449 accum_factory overhead_factory_{ overhead_nanos_ };
450
451 typename clock_type::time_point wall_start;
452 typename clock_type::duration wall_time;
453
454
455 boost::thread::id creator_id_;
456
457 TimedRegion* collect_regions_recursive(
458 const std::vector<const ThreadTimer*>& subtimers,
459 const std::string& curr_name,
460 TimedRegion* parent
461 ) const;
462
463 void print_sub(
464 std::ostream& out,
465 int indent_size,
466 const std::vector<const ThreadTimer*>& subtimers,
467 const std::string& name,
468 int label_width
469 ) {
470 double sum = 0.0;
471 double min = std::numeric_limits<double>::infinity();
472 double max = 0.0;
473 for(auto timer : subtimers) {
474 // TODO throw exception rather than just asserting
475 assert(timer->is_stopped());
476 const double time = timer->read_seconds();
477 sum += time;
478 if(time < min) min = time;
479 if(time > max) max = time;
480 }
481 const double avg = sum / (double)subtimers.size();
482 const std::string indent(indent_size, ' ');
483 out << std::setw(label_width) << std::left << (indent + name + ":")
484 << scprintf("%7.2f %7.2f %7.2f",
485 avg, min, max
486 )
487 << std::endl;
488 //----------------------------------------//
489 std::vector<std::vector<bool>> dones;
490 for(auto st : subtimers) { dones.emplace_back(st->section_names_.size(), false); }
491 auto all = [](const std::vector<bool> v) -> bool {
492 for(const auto& i : v){
493 if(!i) return false;
494 }
495 return true;
496 };
497 auto first_false_index = [](const std::vector<bool> v) -> int {
498 int idx = 0;
499 for(const auto& i : v){
500 if(!i) return idx;
501 else ++idx;
502 }
503 return -1;
504 };
505 while(true){
506 std::vector<const ThreadTimer*> next_subs;
507 std::string curr_name;
508 bool name_found = false;
509 for(int i = 0; i < subtimers.size(); ++i) {
510 const ThreadTimer* sub = subtimers[i];
511 if(not all(dones[i])){
512 if(not name_found){
513 const int idx = first_false_index(dones[i]);
514 curr_name = sub->section_names_[idx];
515 dones[i][idx] = true;
516 name_found = true;
517 next_subs.push_back(&(sub->subtimers_.at(curr_name)));
518 }
519 else {
520 if(sub->subtimers_.find(curr_name) != sub->subtimers_.end()) {
521 int iname = 0;
522 for(const auto& subname : sub->section_names_) {
523 if(subname == curr_name) {
524 dones[i][iname] = true;
525 next_subs.push_back(&(sub->subtimers_.at(curr_name)));
526 break;
527 }
528 else ++iname;
529 }
530 }
531 }
532 }
533 } // end loop over subtimers
534 if(name_found){
535 print_sub(out, indent_size+2, next_subs, curr_name, label_width);
536 }
537 else{
538 break;
539 }
540 } // end while all_done
541
542 }
543
544 public:
545
546 MultiThreadTimer(const std::string& name, int nthreads)
547 : name_(name),
548 nthreads_(nthreads),
549 creator_id_(boost::this_thread::get_id()),
550 overhead_nanos_(0),
551 overhead_factory_(overhead_nanos_)
552 {
553 wall_start = clock_type::now();
554 for(int i = 0; i < nthreads_; ++i) {
555 thread_timers_.emplace_back(0);
556 }
557 }
558
559 void enter(const std::string& subname, int ithr) {
560 auto overtime = overhead_factory_.create();
561 thread_timers_[ithr].enter(subname);
562 }
563
564 void exit(int ithr) {
565 auto overtime = overhead_factory_.create();
566 thread_timers_[ithr].exit();
567 }
568
569 ThreadTimer::Holdable
570 get_subtimer(const std::string& subname, int ithr) {
571 auto overtime = overhead_factory_.create();
572 return thread_timers_[ithr].get_subtimer(subname);
573 }
574
575 void exit() {
576 auto overtime = overhead_factory_.create();
577 const boost::thread::id& my_id = boost::this_thread::get_id();
578 assert(my_id == creator_id_);
579 for(auto& tim : thread_timers_) tim.exit();
580 auto wall_stop = clock_type::now();
581 wall_time = wall_stop - wall_start;
582 }
583
584 void change(const std::string& subname, int ithr) {
585 auto overtime = overhead_factory_.create();
586 thread_timers_[ithr].change(subname);
587 }
588
589 TimedRegion* make_timed_region() const;
590
591 void print(
592 std::ostream& out=ExEnv::out0(),
593 int indent_size = 0,
594 int label_width=50,
595 const std::string& title = ""
596 ) {
597
598 std::vector<const ThreadTimer*> tim_ptrs;
599 for(const auto& tim : thread_timers_) tim_ptrs.push_back(&tim);
600 const std::string indent = std::string(indent_size, ' ');
601 out << std::setw(label_width) << std::left << (indent + title)
602 << std::setw(8) << std::internal << "avg"
603 << std::setw(8) << std::internal << "min"
604 << std::setw(8) << std::internal << "max"
605 << std::endl;
606 print_sub(out, indent_size, tim_ptrs, name_, label_width);
607 out << indent << "Timer overhead: " << std::setprecision(3)
608 << (double)((unsigned long long)overhead_nanos_)/1.e9
609 << std::endl;
610 // TODO walltime/thread time ratio and efficiency
611 }
612
613};
614
615//############################################################################//
616
617} // end namespace sc
618
619#endif /* _util_misc_thread_timer_h */
static std::ostream & out0()
Return an ostream that writes from node 0.
Definition thread_timer.h:438
Definition thread_timer.h:281
TimedRegion is a helper class for RegionTimer.
Definition regtime.h:47
Definition thread_timer.h:409
Definition thread_timer.h:58
Definition thread_timer.h:176
Definition thread_timer.h:239
This class allows printf-like output to be sent to an ostream.
Definition formio.h:97
Definition thread_timer.h:116
SpinCase1 other(SpinCase1 S)
given 1-spin return the other 1-spin
Contains all MPQC code up to version 3.
Definition mpqcin.h:14

Generated at Wed Sep 25 2024 02:45:31 for MPQC 3.0.0-alpha using the documentation package Doxygen 1.12.0.