Line data Source code
1 : // Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2 : // SPDX-License-Identifier: Apache-2.0
3 :
4 : #ifndef PALACE_UTILS_TIMER_HPP
5 : #define PALACE_UTILS_TIMER_HPP
6 :
7 : #include <chrono>
8 : #include <stack>
9 : #include <string>
10 : #include <vector>
11 : #include "utils/communication.hpp"
12 :
13 : namespace palace
14 : {
15 :
16 : //
17 : // Timer classes for profiling.
18 : //
19 :
20 : class Timer
21 : {
22 : public:
23 : using Clock = std::chrono::steady_clock;
24 : using Duration = std::chrono::duration<double>;
25 : using TimePoint = typename Clock::time_point;
26 :
27 : enum Index
28 : {
29 : INIT = 0,
30 : MESH_PREPROCESS, // Preprocessing mesh
31 : CONSTRUCT, // Space and operator construction
32 : WAVE_PORT, // Wave port solver
33 : KSP, // Linear solver
34 : KSP_SETUP, // Linear solver setup
35 : KSP_PRECONDITIONER, // Linear solver preconditioner
36 : KSP_COARSE_SOLVE, // Linear solver coarse-level solve
37 : TS, // Time integrator
38 : EPS, // Eigenvalue problem solver
39 : DIV_FREE, // Divergence-free projection
40 : CONSTRUCT_PROM, // Adaptive frequency sweep offline
41 : SOLVE_PROM, // Adaptive frequency sweep online
42 : ESTIMATION, // Error estimation
43 : CONSTRUCT_ESTIMATOR, // Construction of estimator
44 : SOLVE_ESTIMATOR, // Evaluation of estimator
45 : ADAPTATION, // Adaptation
46 : REBALANCE, // Rebalancing
47 : POSTPRO, // Solution postprocessing
48 : POSTPRO_FARFIELD, // Computing far-fields
49 : POSTPRO_PARAVIEW, // Paraview calculations and I/O
50 : POSTPRO_GRIDFUNCTION, // MFEM gridfunction calculations and I/O
51 : IO, // Disk I/O
52 : TOTAL,
53 : NUM_TIMINGS
54 : };
55 :
56 : // clang-format off
57 : inline static const std::vector<std::string> descriptions{
58 : "Initialization",
59 : " Mesh Preprocessing",
60 : "Operator Construction",
61 : " Wave Ports",
62 : "Linear Solve",
63 : " Setup",
64 : " Preconditioner",
65 : " Coarse Solve",
66 : "Time Stepping",
67 : "Eigenvalue Solve",
68 : "Div.-Free Projection",
69 : "PROM Construction",
70 : "PROM Solve",
71 : "Estimation",
72 : " Construction",
73 : " Solve",
74 : "Adaptation",
75 : " Rebalancing",
76 : "Postprocessing",
77 : " Far Fields",
78 : " Paraview",
79 : " Grid function",
80 : "Disk IO",
81 : "Total"};
82 : // clang-format on
83 :
84 : private:
85 : const TimePoint start_time;
86 : TimePoint last_lap_time;
87 : std::vector<Duration> data;
88 : std::vector<int> counts;
89 :
90 : public:
91 66 : Timer()
92 66 : : start_time(Now()), last_lap_time(start_time), data(NUM_TIMINGS), counts(NUM_TIMINGS)
93 : {
94 66 : }
95 :
96 : // Get the current time.
97 91 : static TimePoint Now() { return Clock::now(); }
98 :
99 : // Provide stopwatch lap split functionality.
100 : Duration Lap()
101 : {
102 117 : auto temp_time = last_lap_time;
103 117 : last_lap_time = Now();
104 25 : return last_lap_time - temp_time;
105 : }
106 :
107 : // Return the time elapsed since timer creation.
108 : Duration TimeFromStart() const { return Now() - start_time; }
109 :
110 : // Lap and record a timing step.
111 117 : Duration MarkTime(Index idx, bool count_it = true)
112 : {
113 117 : return MarkTime(idx, Lap(), count_it);
114 : }
115 :
116 : // Record a timing step by adding a duration, without lapping; optionally, count it.
117 117 : Duration MarkTime(Index idx, Duration time, bool count_it = true)
118 : {
119 117 : if (idx == Timer::TOTAL)
120 : {
121 0 : data[idx] = time;
122 : }
123 : else
124 : {
125 117 : data[idx] += time;
126 : }
127 48 : counts[idx] += count_it;
128 117 : return data[idx];
129 : }
130 :
131 : // Provide map-like read-only access to the timing data.
132 0 : auto Data(Index idx) const { return data[idx].count(); }
133 :
134 : // Return number of times timer.MarkTime(idx) or TimerBlock b(idx) was called.
135 0 : auto Counts(Index idx) const { return counts[idx]; }
136 : };
137 :
138 : class BlockTimer
139 : {
140 : using Index = Timer::Index;
141 :
142 : private:
143 : inline static Timer timer;
144 : inline static std::stack<Index> stack;
145 : bool count;
146 :
147 : // Reduce timing information across MPI ranks.
148 0 : static void Reduce(MPI_Comm comm, std::vector<double> &data_min,
149 : std::vector<double> &data_max, std::vector<double> &data_avg)
150 : {
151 0 : data_min.resize(Timer::NUM_TIMINGS);
152 0 : data_max.resize(Timer::NUM_TIMINGS);
153 0 : data_avg.resize(Timer::NUM_TIMINGS);
154 0 : for (int i = Timer::INIT; i < Timer::NUM_TIMINGS; i++)
155 : {
156 0 : data_min[i] = data_max[i] = data_avg[i] = timer.Data((Timer::Index)i);
157 : }
158 :
159 : Mpi::GlobalMin(Timer::NUM_TIMINGS, data_min.data(), comm);
160 : Mpi::GlobalMax(Timer::NUM_TIMINGS, data_max.data(), comm);
161 : Mpi::GlobalSum(Timer::NUM_TIMINGS, data_avg.data(), comm);
162 :
163 : const int np = Mpi::Size(comm);
164 0 : for (int i = Timer::INIT; i < Timer::NUM_TIMINGS; i++)
165 : {
166 0 : data_avg[i] /= np;
167 : }
168 0 : }
169 :
170 : public:
171 71 : BlockTimer(Index i, bool count = true) : count(count)
172 : {
173 : // Start timing when entering the block, interrupting whatever we were timing before.
174 : // Take note of what we are now timing.
175 71 : if (count)
176 : {
177 117 : stack.empty() ? timer.Lap() : timer.MarkTime(stack.top(), false);
178 : stack.push(i);
179 : }
180 71 : }
181 :
182 71 : ~BlockTimer()
183 : {
184 : // When a BlockTimer is no longer in scope, record the time (check whether stack is
185 : // empty in case the timer has already been finalized).
186 71 : if (count && !stack.empty())
187 : {
188 71 : timer.MarkTime(stack.top());
189 : stack.pop();
190 : }
191 71 : }
192 :
193 : // Read-only access the static Timer object.
194 : static const Timer &GlobalTimer() { return timer; }
195 :
196 : // Print timing information after reducing the data across all processes.
197 0 : static void Print(MPI_Comm comm)
198 : {
199 0 : while (!stack.empty())
200 : {
201 0 : timer.MarkTime(stack.top());
202 : stack.pop();
203 : }
204 : timer.MarkTime(Timer::TOTAL, timer.TimeFromStart());
205 :
206 : // Reduce timing data.
207 : std::vector<double> data_min, data_max, data_avg;
208 0 : Reduce(comm, data_min, data_max, data_avg);
209 :
210 : // Print a nice table of the timing data.
211 0 : constexpr int p = 3; // Floating point precision
212 0 : constexpr int w = 12; // Data column width
213 0 : constexpr int h = 26; // Left-hand side width
214 : // clang-format off
215 0 : Mpi::Print(comm, "\n{:<{}s}{:>{}s}{:>{}s}{:>{}s}\n",
216 : "Elapsed Time Report (s)", h, "Min.", w, "Max.", w, "Avg.", w);
217 : // clang-format on
218 0 : Mpi::Print(comm, "{}\n", std::string(h + 3 * w, '='));
219 0 : for (int i = Timer::INIT; i < Timer::NUM_TIMINGS; i++)
220 : {
221 0 : if (timer.Counts((Timer::Index)i) > 0)
222 : {
223 0 : if (i == Timer::TOTAL)
224 : {
225 0 : Mpi::Print(comm, "{}\n", std::string(h + 3 * w, '-'));
226 : }
227 : // clang-format off
228 0 : Mpi::Print(comm, "{:<{}s}{:{}.{}f}{:{}.{}f}{:{}.{}f}\n",
229 : timer.descriptions[i], h,
230 0 : data_min[i], w, p, data_max[i], w, p, data_avg[i], w, p);
231 : // clang-format on
232 : }
233 : }
234 0 : }
235 : };
236 :
237 : } // namespace palace
238 :
239 : #endif // PALACE_UTILS_TIMER_HPP
|