Line data Source code
1 : ! Copyright 2014 Lorenz Hüdepohl
2 : !
3 : ! This file is part of ftimings.
4 : !
5 : ! ftimings is free software: you can redistribute it and/or modify
6 : ! it under the terms of the GNU Lesser General Public License as published by
7 : ! the Free Software Foundation, either version 3 of the License, or
8 : ! (at your option) any later version.
9 : !
10 : ! ftimings is distributed in the hope that it will be useful,
11 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
12 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 : ! GNU Lesser General Public License for more details.
14 : !
15 : ! You should have received a copy of the GNU Lesser General Public License
16 : ! along with ftimings. If not, see <http://www.gnu.org/licenses/>.
17 :
18 : #ifdef HAVE_CONFIG_H
19 : #include "config-f90.h"
20 : #endif
21 :
22 : !> \mainpage Ftimings
23 : !>
24 : !> An almost pure-fortran attempt to play with tree structures, which evolved
25 : !> into the timing library used e.g. by the VERTEX supernova code.
26 : !>
27 : !> All you need to know is contained in the \ref ftimings::timer_t derived type.
28 : module ftimings
29 : use ftimings_type
30 : use ftimings_value
31 : use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
32 : implicit none
33 : save
34 :
35 : private
36 :
37 : ! this is mainly needed for Doxygen, they are
38 : ! by implicitly reachable as type-bound procedures
39 : ! of timer_t, however Doxygen does not document them
40 : ! if they are not also public
41 : public timer_start, timer_stop, timer_free, timer_print, &
42 : timer_enable, timer_disable, timer_is_enabled, &
43 : timer_in_entries, timer_get, timer_since, timer_sort, &
44 : timer_set_print_options, &
45 : timer_measure_flops, &
46 : timer_measure_allocated_memory, &
47 : timer_measure_virtual_memory, &
48 : timer_measure_max_allocated_memory, &
49 : timer_measure_memory_bandwidth
50 :
51 : character(len=name_length), private, parameter :: own = "(own)"
52 : character(len=name_length), private, parameter :: below = "(below threshold)"
53 :
54 : !> Type for a timer instance.
55 : !>
56 : !> Typical usage:
57 : !> \code{.f90}
58 : !> type(timer_t) :: timer
59 : !>
60 : !> call timer%enable()
61 : !>
62 : !> call timer%start("section")
63 : !> ...
64 : !> call timer%start("subsection")
65 : !> ...
66 : !> call timer%stop("subsection")
67 : !> ...
68 : !> call timer%stop("section")
69 : !>
70 : !> call timer%print()
71 : !> \endcode
72 : !>
73 : !> Every first call to timer%start() at a certain point in the graph
74 : !> allocates a small amount of memory. If the timer is no longer needed,
75 : !> all that memory can be freed again with
76 : !>
77 : !> \code{.f90}
78 : !> call timer%free()
79 : !> \endcode
80 : type, public :: timer_t
81 : logical, private :: active = .false. !< If set to .false., most operations return immediately without any action
82 : logical, private :: record_allocated_memory = .false. !< IF set to .true., record also the current resident set size
83 : logical, private :: record_virtual_memory = .false. !< IF set to .true., record also the virtual memory
84 : logical, private :: record_max_allocated_memory = .false. !< IF set to .true., record also the max resident set size ("high water mark")
85 : logical, private :: record_flop_counts = .false. !< If set to .true., record also FLOP counts via PAPI calls
86 : logical, private :: record_memory_bandwidth = .false. !< If set to .true., record also FLOP counts via PAPI calls
87 :
88 : logical, private :: print_allocated_memory = .false.
89 : logical, private :: print_max_allocated_memory = .false.
90 : logical, private :: print_virtual_memory = .false.
91 : logical, private :: print_flop_count = .false.
92 : logical, private :: print_flop_rate = .false.
93 : logical, private :: print_ldst = .false.
94 : logical, private :: print_memory_bandwidth = .false.
95 : logical, private :: print_ai = .false.
96 : integer, private :: bytes_per_ldst = 8
97 :
98 : type(node_t), private, pointer :: root => NULL() !< Start of graph
99 : type(node_t), private, pointer :: current_node => NULL() !< Current position in the graph
100 : contains
101 : procedure, pass :: start => timer_start
102 : procedure, pass :: stop => timer_stop
103 : procedure, pass :: free => timer_free
104 : procedure, pass :: print => timer_print
105 : procedure, pass :: enable => timer_enable
106 : procedure, pass :: disable => timer_disable
107 : procedure, pass :: is_enabled => timer_is_enabled
108 : procedure, pass :: measure_flops => timer_measure_flops
109 : procedure, pass :: measure_allocated_memory => timer_measure_allocated_memory
110 : procedure, pass :: measure_virtual_memory => timer_measure_virtual_memory
111 : procedure, pass :: measure_max_allocated_memory => timer_measure_max_allocated_memory
112 : procedure, pass :: measure_memory_bandwidth => timer_measure_memory_bandwidth
113 : procedure, pass :: set_print_options => timer_set_print_options
114 : procedure, pass :: in_entries => timer_in_entries
115 : procedure, pass :: get => timer_get
116 : procedure, pass :: since => timer_since
117 : procedure, pass :: sort => timer_sort
118 : end type
119 :
120 : ! Private type node_t, representing a graph node
121 : !
122 : type :: node_t
123 : character(len=name_length) :: name ! Descriptive name, used when printing the timings
124 : integer :: count = 0 ! Number of node_stop calls
125 : type(value_t) :: value ! The actual counter data, see ftimings_values.F90
126 : logical :: is_running = .false. ! .true. if still running
127 : type(node_t), pointer :: firstChild => NULL()
128 : type(node_t), pointer :: lastChild => NULL()
129 : type(node_t), pointer :: parent => NULL()
130 : type(node_t), pointer :: nextSibling => NULL()
131 : class(timer_t), pointer :: timer
132 : contains
133 : procedure, pass :: now => node_now
134 : procedure, pass :: start => node_start
135 : procedure, pass :: stop => node_stop
136 : procedure, pass :: get_value => node_get_value
137 : procedure, pass :: new_child => node_new_child
138 : procedure, pass :: get_child => node_get_child
139 : procedure, pass :: sum_of_children => node_sum_of_children
140 : procedure, pass :: sum_of_children_with_name => node_sum_of_children_with_name
141 : procedure, pass :: sum_of_children_below => node_sum_of_children_below
142 : procedure, pass :: print => node_print
143 : procedure, pass :: print_graph => node_print_graph
144 : procedure, pass :: sort_children => node_sort_children
145 : end type
146 :
147 : interface
148 : function microseconds_since_epoch() result(us) bind(C, name="ftimings_microseconds_since_epoch")
149 : use, intrinsic :: iso_c_binding
150 : implicit none
151 : integer(kind=C_INT64_T) :: us
152 : end function
153 : end interface
154 :
155 : #ifdef HAVE_LIBPAPI
156 : interface
157 : function flop_init() result(ret) bind(C, name="ftimings_flop_init")
158 : use, intrinsic :: iso_c_binding
159 : implicit none
160 : integer(kind=C_INT) :: ret
161 : end function
162 : end interface
163 :
164 : interface
165 : function loads_stores_init() result(ret) bind(C, name="ftimings_loads_stores_init")
166 : use, intrinsic :: iso_c_binding
167 : implicit none
168 : integer(kind=C_INT) :: ret
169 : end function
170 : end interface
171 :
172 : interface
173 : subroutine papi_counters(flops, ldst) bind(C, name="ftimings_papi_counters")
174 : use, intrinsic :: iso_c_binding
175 : implicit none
176 : integer(kind=C_LONG_LONG), intent(out) :: flops, ldst
177 : end subroutine
178 : end interface
179 : #endif
180 :
181 : interface
182 : function resident_set_size() result(rsssize) bind(C, name="ftimings_resident_set_size")
183 : use, intrinsic :: iso_c_binding
184 : implicit none
185 : integer(kind=C_LONG) :: rsssize
186 : end function
187 : end interface
188 :
189 : interface
190 : function virtual_memory() result(virtualmem) bind(C, name="ftimings_virtual_memory")
191 : use, intrinsic :: iso_c_binding
192 : implicit none
193 : integer(kind=C_LONG) :: virtualmem
194 : end function
195 : end interface
196 :
197 : interface
198 : function max_resident_set_size() result(maxrsssize) bind(C, name="ftimings_highwater_mark")
199 : use, intrinsic :: iso_c_binding
200 : implicit none
201 : integer(kind=C_LONG) :: maxrsssize
202 : end function
203 : end interface
204 :
205 : contains
206 :
207 : !> Activate the timer, without this, most methods are non-ops.
208 : !>
209 51840 : subroutine timer_enable(self)
210 : class(timer_t), intent(inout), target :: self
211 :
212 51840 : self%active = .true.
213 51840 : end subroutine
214 :
215 : !> Call with enabled = .true. to also record amount of newly allocated memory.
216 : !> By default, memory usage is not recored. Call with .false. to deactivate again.
217 : !>
218 : !> This opens /proc/self/statm, parses it, and closes it agagain and is thus
219 : !> quite costly, use when appropriate.
220 : !>
221 0 : subroutine timer_measure_allocated_memory(self, enabled)
222 : class(timer_t), intent(inout) :: self
223 : logical, intent(in) :: enabled
224 :
225 0 : self%record_allocated_memory = enabled
226 0 : end subroutine
227 :
228 : !> Call with enabled = .true. to also record amount of newly created virtual memory.
229 : !> By default, memory usage is not recored. Call with .false. to deactivate again.
230 : !>
231 : !> This opens /proc/self/statm, parses it, and closes it agagain and is thus
232 : !> quite costly, use when appropriate.
233 : !>
234 0 : subroutine timer_measure_virtual_memory(self, enabled)
235 : class(timer_t), intent(inout) :: self
236 : logical, intent(in) :: enabled
237 :
238 0 : self%record_virtual_memory = enabled
239 0 : end subroutine
240 :
241 : !> Call with enabled = .true. to also record amount of newly increase of max.
242 : !> resident memory
243 : !> By default, memory usage is not recored. Call with .false. to deactivate again.
244 : !>
245 : !> This opens /proc/self/status, parses it, and closes it agagain and is thus
246 : !> quite costly, use when appropriate.
247 : !>
248 0 : subroutine timer_measure_max_allocated_memory(self, enabled)
249 : class(timer_t), intent(inout) :: self
250 : logical, intent(in) :: enabled
251 :
252 0 : self%record_max_allocated_memory = enabled
253 0 : end subroutine
254 :
255 : !> Call with enabled = .true. to also record the memory bandwidth with PAPI
256 : !> By default, this is not recorded. Call with .false. to deactivate again.
257 : !>
258 0 : subroutine timer_measure_memory_bandwidth(self, enabled)
259 : class(timer_t), intent(inout) :: self
260 : logical, intent(in) :: enabled
261 :
262 0 : if (enabled) then
263 : #ifdef HAVE_LIBPAPI
264 : if (loads_stores_init() == 1) then
265 : self%record_memory_bandwidth = .true.
266 : else
267 : write(0,'(a)') "ftimings: Could not initialize PAPI, disabling memory bandwidth counter"
268 : self%record_memory_bandwidth = .false.
269 : endif
270 : #else
271 0 : write(0,'(a)') "ftimings: not compiled with PAPI support, disabling memory bandwidth counter"
272 0 : self%record_memory_bandwidth = .false.
273 : #endif
274 : else
275 : ! explicitly set to .false. by caller
276 0 : self%record_memory_bandwidth = .false.
277 : endif
278 0 : end subroutine
279 :
280 : !> Call with enabled = .true. to also record FLOP counts via PAPI calls.
281 : !> By default no FLOPS are recored. Call with .false. to deactivate again.
282 : !>
283 0 : subroutine timer_measure_flops(self, enabled)
284 : class(timer_t), intent(inout) :: self
285 : logical, intent(in) :: enabled
286 :
287 0 : if (enabled) then
288 : #ifdef HAVE_LIBPAPI
289 : if (flop_init() == 1) then
290 : self%record_flop_counts = .true.
291 : else
292 : write(0,'(a)') "ftimings: Could not initialize PAPI, disabling FLOP counter"
293 : self%record_flop_counts = .false.
294 : endif
295 : #else
296 0 : write(0,'(a)') "ftimings: not compiled with PAPI support, disabling FLOP counter"
297 0 : self%record_flop_counts = .false.
298 : #endif
299 : else
300 : ! Explicitly set to .false. by caller
301 0 : self%record_flop_counts = .false.
302 : endif
303 0 : end subroutine
304 :
305 : !> Deactivate the timer
306 : !>
307 0 : subroutine timer_disable(self)
308 : class(timer_t), intent(inout), target :: self
309 0 : self%active = .false.
310 0 : end subroutine
311 :
312 : !> Return whether the timer is currently running
313 : !>
314 0 : function timer_is_enabled(self) result(is)
315 : class(timer_t), intent(inout), target :: self
316 : logical :: is
317 0 : is = self%active
318 0 : end function
319 :
320 : !> Control what to print on following %print calls
321 : !>
322 : !> \param print_allocated_memory Amount of newly allocated,
323 : !> resident memory
324 : !> \param print_virtual_memory Amount of newly created virtual
325 : !> memory
326 : !> \param print_max_allocated_memory Amount of new increase of max.
327 : !> resident memory ("high water mark")
328 : !> \param print_flop_count Number of floating point operations
329 : !> \param print_flop_rate Rate of floating point operations per second
330 : !> \param print_ldst Number of loads+stores
331 : !> \param print_memory_bandwidth Rate of loads+stores per second
332 : !> \param print_ai Arithmetic intensity, that is number of
333 : !> floating point operations per
334 : !> number of load and store
335 : !> operations (currently untested)
336 : !> \param bytes_per_ldst For calculating the AI, assume this number
337 : !> of bytes per load or store (default: 8)
338 0 : subroutine timer_set_print_options(self, &
339 : print_allocated_memory, &
340 : print_virtual_memory, &
341 : print_max_allocated_memory, &
342 : print_flop_count, &
343 : print_flop_rate, &
344 : print_ldst, &
345 : print_memory_bandwidth, &
346 : print_ai, &
347 : bytes_per_ldst)
348 : class(timer_t), intent(inout) :: self
349 : logical, intent(in), optional :: &
350 : print_allocated_memory, &
351 : print_virtual_memory, &
352 : print_max_allocated_memory, &
353 : print_flop_count, &
354 : print_flop_rate, &
355 : print_ldst, &
356 : print_memory_bandwidth, &
357 : print_ai
358 : integer, intent(in), optional :: bytes_per_ldst
359 :
360 0 : if (present(print_allocated_memory)) then
361 0 : self%print_allocated_memory = print_allocated_memory
362 0 : if ((.not. self%record_allocated_memory) .and. self%print_allocated_memory) then
363 0 : write(0,'(a)') "ftimings: Warning: RSS size recording was disabled, expect zeros!"
364 : endif
365 : endif
366 :
367 0 : if (present(print_virtual_memory)) then
368 0 : self%print_virtual_memory = print_virtual_memory
369 0 : if ((.not. self%record_virtual_memory) .and. self%print_virtual_memory) then
370 0 : write(0,'(a)') "ftimings: Warning: Virtual memory recording was disabled, expect zeros!"
371 : endif
372 : endif
373 :
374 0 : if (present(print_max_allocated_memory)) then
375 0 : self%print_max_allocated_memory = print_max_allocated_memory
376 0 : if ((.not. self%record_max_allocated_memory) .and. self%print_max_allocated_memory) then
377 0 : write(0,'(a)') "ftimings: Warning: HWM recording was disabled, expect zeros!"
378 : endif
379 : endif
380 :
381 0 : if (present(print_flop_count)) then
382 0 : self%print_flop_count = print_flop_count
383 0 : if ((.not. self%record_flop_counts) .and. self%print_flop_count) then
384 0 : write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!"
385 : endif
386 : endif
387 :
388 0 : if (present(print_flop_rate)) then
389 0 : self%print_flop_rate = print_flop_rate
390 0 : if ((.not. self%record_flop_counts) .and. self%print_flop_rate) then
391 0 : write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!"
392 : endif
393 : endif
394 :
395 0 : if (present(print_ldst)) then
396 0 : self%print_ldst = print_ldst
397 0 : if ((.not. self%record_memory_bandwidth) .and. self%print_ldst) then
398 0 : write(0,'(a)') "ftimings: Warning: Load+Store counters were disabled, expect zeros!"
399 : endif
400 : endif
401 0 : if (present(print_memory_bandwidth)) then
402 0 : self%print_memory_bandwidth = print_memory_bandwidth
403 0 : if ((.not. self%record_memory_bandwidth) .and. self%print_memory_bandwidth) then
404 0 : write(0,'(a)') "ftimings: Warning: Load+Store counters were disabled, expect zeros for memory bandwidth!"
405 : endif
406 : endif
407 :
408 0 : if (present(print_ai)) then
409 0 : self%print_ai = print_ai
410 0 : if (.not. (self%record_memory_bandwidth .and. self%record_flop_counts)) then
411 0 : write(0,'(a)') "ftimings: Warning: Memory bandwidth or FLOP counters were disabled, expect invalid values for AI"
412 : endif
413 : endif
414 :
415 0 : if (present(bytes_per_ldst)) then
416 0 : self%bytes_per_ldst = bytes_per_ldst
417 : endif
418 0 : end subroutine
419 :
420 : !> Start a timing section
421 : !>
422 : !> \param name A descriptive name
423 : !> \param replace If .true. (default .false.), replace any entries at the
424 : !> current position with the same name. If .false., add the
425 : !> time to a possibly existing entry
426 : !>
427 : !> Care must be taken to balance any invocations of %start() and %stop(), e.g.
428 : !> the following is valid
429 : !>
430 : !> \code{.f90}
431 : !> call timer%start("A")
432 : !> call timer%start("B")
433 : !> call timer%stop("B")
434 : !> call timer%stop("A")
435 : !> \endcode
436 : !>
437 : !> while the following is not
438 : !>
439 : !> \code{.f90}
440 : !> call timer%start("A")
441 : !> call timer%start("B")
442 : !> call timer%stop("A")
443 : !> call timer%stop("B")
444 : !> \endcode
445 : !>
446 474215784 : subroutine timer_start(self, name, replace)
447 : class(timer_t), intent(inout), target :: self
448 : character(len=*), intent(in) :: name
449 : logical, intent(in), optional :: replace
450 : type(node_t), pointer :: node
451 : !$ integer :: omp_get_thread_num, omp_get_num_threads, omp_get_level, omp_get_ancestor_thread_num
452 : !$ integer :: i
453 :
454 474215784 : if (.not. self%active) then
455 6108204 : return
456 : endif
457 :
458 : ! Deal with nested parallelization
459 484211220 : !$ do i = 0, omp_get_level()
460 246119760 : !$ if (omp_get_ancestor_thread_num(i) > 0) then
461 0 : !$ return
462 : !$ endif
463 : !$ end do
464 :
465 238091460 : !$omp master
466 :
467 470262096 : if (.not. associated(self%current_node)) then
468 : ! First call to timer_start()
469 51840 : allocate(self%root)
470 51840 : self%root%name = "[Root]"
471 51840 : self%root%timer => self
472 51840 : call self%root%start()
473 51840 : nullify(self%root%firstChild)
474 51840 : nullify(self%root%lastChild)
475 51840 : nullify(self%root%parent)
476 51840 : nullify(self%root%nextSibling)
477 51840 : self%current_node => self%root
478 : endif
479 :
480 470262096 : if (string_eq(self%current_node%name, name)) then
481 0 : !$omp critical
482 0 : write(error_unit,*) "Recursion error! Printing tree so far.."
483 0 : write(error_unit,*) "Got %start(""" // trim(name) // """), while %start(""" // trim(name) // """) was still active"
484 0 : !$ write(*,*) "omp_get_thread_num() = ", omp_get_thread_num()
485 0 : !$ write(*,*) "omp_get_num_threads() = ", omp_get_num_threads()
486 0 : !$ write(*,*) "omp_get_level() = ", omp_get_level()
487 0 : !$ do i = 0, omp_get_level()
488 0 : !$ write(*,*) "omp_get_ancestor_thread_num(", i, ") = ", omp_get_ancestor_thread_num(i)
489 : !$ end do
490 0 : call self%root%print_graph(0)
491 : !$omp end critical
492 0 : stop "timer_start() while same timer was active"
493 : endif
494 470262096 : node => self%current_node%get_child(name)
495 470262096 : if (.not. associated(node)) then
496 2052228 : node => self%current_node%new_child(name)
497 468209868 : else if (present(replace)) then
498 0 : if (replace) then
499 0 : node%value = null_value
500 0 : node%count = 0
501 0 : if (associated(node%firstChild)) then
502 0 : call deallocate_node(node%firstChild)
503 0 : nullify(node%firstChild)
504 0 : nullify(node%lastChild)
505 : endif
506 : endif
507 : endif
508 :
509 470262096 : call node%start()
510 :
511 470262096 : self%current_node => node
512 :
513 : !$omp end master
514 :
515 474215784 : end subroutine
516 :
517 : !> End a timing segment, \sa timer_start
518 : !>
519 : !> \param name The exact same name as was used for %start().
520 : !> If not provided, close the currently active region.
521 : !> If given, warns if it does not match the last %start()
522 : !> call on stderr and disables the current timer instance.
523 : !>
524 474215784 : subroutine timer_stop(self, name)
525 : class(timer_t), intent(inout), target :: self
526 : character(len=*), intent(in), optional :: name
527 : logical :: error
528 : !$ integer :: omp_get_level, omp_get_ancestor_thread_num
529 : !$ integer :: i
530 :
531 474215784 : if (.not. self%active) then
532 6108204 : return
533 : endif
534 :
535 : ! Deal with nested parallelization
536 484211220 : !$ do i = 0, omp_get_level()
537 246119760 : !$ if (omp_get_ancestor_thread_num(i) > 0) then
538 0 : !$ return
539 : !$ endif
540 : !$ end do
541 :
542 238091460 : !$omp master
543 470262096 : error = .false.
544 :
545 470262096 : if (.not. associated(self%current_node)) then
546 0 : write(error_unit,'(a)') "Called timer_stop() without first calling any timer_start(), disabling timings"
547 0 : call self%free()
548 0 : self%active = .false.
549 0 : error = .true.
550 470262096 : else if (present(name)) then
551 470262096 : if (.not. string_eq(self%current_node%name, name)) then
552 : write(error_unit,'(a)') "Expected %stop(""" // trim(self%current_node%name) // """),&
553 0 : & but got %stop(""" // trim(name) // """), disabling timings"
554 0 : call self%free()
555 0 : self%active = .false.
556 0 : error = .true.
557 : endif
558 : endif
559 :
560 470262096 : if (.not. error) then
561 470262096 : call self%current_node%stop()
562 :
563 : ! climb up to parent
564 470262096 : if (.not. associated(self%current_node%parent)) then
565 0 : write(error_unit,'(a)') "Error: No valid parent node found for node '" // trim(self%current_node%name) // "'"
566 0 : call self%free()
567 0 : self%active = .false.
568 : endif
569 470262096 : self%current_node => self%current_node%parent
570 :
571 : endif
572 : !$omp end master
573 :
574 474215784 : end subroutine
575 :
576 : !> Deallocate all objects associated with (but not including) self
577 : !>
578 92016 : subroutine timer_free(self)
579 : class(timer_t), intent(inout), target :: self
580 92016 : if (associated(self%root)) then
581 51840 : call deallocate_node(self%root)
582 : endif
583 92016 : nullify(self%root)
584 92016 : nullify(self%current_node)
585 92016 : end subroutine
586 :
587 : !> Print a timing graph
588 : !>
589 : !> \param name1 If given, first descend one level to the node with name name1
590 : !> \param name2 If given, also descend another level to the node with name2 there
591 : !> \param name3 etc.
592 : !> \param name4 etc.
593 : !> \param threshold If given, subsume any entries with a value of threshold
594 : !> seconds in a single node "(below threshold)"
595 : !> \param is_sorted Assume a sorted graph for inserting "(own)" and "(below threshold)"
596 : !> \param unit The unit number on which to print, default stdout
597 : !>
598 48624 : subroutine timer_print(self, name1, name2, name3, name4, threshold, is_sorted, unit)
599 : class(timer_t), intent(in), target :: self
600 : character(len=*), intent(in), optional :: name1, name2, name3, name4
601 : real(kind=rk), intent(in), optional :: threshold
602 : logical, intent(in), optional :: is_sorted
603 : integer, intent(in), optional :: unit
604 :
605 : integer :: unit_act
606 :
607 : type(node_t), pointer :: node
608 : character(len=64) :: format_spec
609 :
610 : ! I hate fortran's string handling
611 : character(len=name_length), parameter :: group = "Group"
612 : character(len=12), parameter :: seconds = " [s]"
613 : character(len=12), parameter :: fract = " fraction"
614 : character(len=12), parameter :: ram = " alloc. RAM"
615 : character(len=12), parameter :: vmem = " alloc. VM"
616 : character(len=12), parameter :: hwm = " alloc. HWM"
617 : character(len=12), parameter :: flop_rate = " Mflop/s"
618 : character(len=12), parameter :: flop_count = " Mflop"
619 : character(len=12), parameter :: ldst = "loads+stores"
620 : character(len=12), parameter :: bandwidth = " mem bandw."
621 : character(len=12), parameter :: ai = "arithm. Int."
622 : character(len=12), parameter :: dash = "============"
623 :
624 48624 : if (.not. self%active) then
625 0 : return
626 : endif
627 :
628 48624 : if (present(unit)) then
629 0 : unit_act = unit
630 : else
631 48624 : unit_act = output_unit
632 : endif
633 :
634 48624 : node => self%root
635 48624 : if (present(name1)) then
636 48624 : node => node%get_child(name1)
637 48624 : if (.not. associated(node)) then
638 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name1) // """"
639 0 : return
640 : endif
641 : end if
642 48624 : if (present(name2)) then
643 0 : node => node%get_child(name2)
644 0 : if (.not. associated(node)) then
645 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name2) // """"
646 0 : return
647 : endif
648 : end if
649 48624 : if (present(name3)) then
650 0 : node => node%get_child(name3)
651 0 : if (.not. associated(node)) then
652 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name3) // """"
653 0 : return
654 : endif
655 : end if
656 48624 : if (present(name4)) then
657 0 : node => node%get_child(name4)
658 0 : if (.not. associated(node)) then
659 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name4) // """"
660 0 : return
661 : endif
662 : end if
663 :
664 : ! I really do hate it ..
665 48624 : write(format_spec,'("("" /= "",a",i0,",2x,a12,1x,a12)")') name_length
666 48624 : write(unit_act, format_spec, advance='no') adjustl(group), seconds, fract
667 :
668 48624 : if (self%print_allocated_memory) then
669 0 : write(unit_act,'(1x,a12)',advance='no') ram
670 : endif
671 :
672 48624 : if (self%print_virtual_memory) then
673 0 : write(unit_act,'(1x,a12)',advance='no') vmem
674 : endif
675 :
676 48624 : if (self%print_max_allocated_memory) then
677 0 : write(unit_act,'(1x,a12)',advance='no') hwm
678 : endif
679 :
680 48624 : if (self%print_flop_count) then
681 0 : write(unit_act,'(1x,a12)',advance='no') flop_count
682 : endif
683 48624 : if (self%print_flop_rate) then
684 0 : write(unit_act,'(1x,a12)',advance='no') flop_rate
685 : endif
686 48624 : if (self%print_ldst) then
687 0 : write(unit_act,'(1x,a12)',advance='no') ldst
688 : endif
689 48624 : if (self%print_memory_bandwidth) then
690 0 : write(unit_act,'(1x,a12)',advance='no') bandwidth
691 : endif
692 48624 : if (self%print_ai) then
693 0 : write(unit_act,'(1x,a12)',advance='no') ai
694 : endif
695 :
696 48624 : write(unit_act,'(a)') ""
697 :
698 48624 : write(format_spec,'("("" | "",a",i0,",1x,2(1x,a12))")') name_length
699 48624 : write(unit_act, format_spec, advance='no') "", dash, dash
700 :
701 48624 : if (self%print_allocated_memory) then
702 0 : write(unit_act,'(1x,a12)',advance='no') dash
703 : endif
704 :
705 48624 : if (self%print_virtual_memory) then
706 0 : write(unit_act,'(1x,a12)',advance='no') dash
707 : endif
708 :
709 48624 : if (self%print_max_allocated_memory) then
710 0 : write(unit_act,'(1x,a12)',advance='no') dash
711 : endif
712 :
713 48624 : if (self%print_flop_count) then
714 0 : write(unit_act,'(1x,a12)',advance='no') dash
715 : endif
716 48624 : if (self%print_flop_rate) then
717 0 : write(unit_act,'(1x,a12)',advance='no') dash
718 : endif
719 48624 : if (self%print_ldst) then
720 0 : write(unit_act,'(1x,a12)',advance='no') dash
721 : endif
722 48624 : if (self%print_memory_bandwidth) then
723 0 : write(unit_act,'(1x,a12)',advance='no') dash
724 : endif
725 48624 : if (self%print_ai) then
726 0 : write(unit_act,'(1x,a12)',advance='no') dash
727 : endif
728 :
729 48624 : write(unit_act,'(a)') ""
730 :
731 48624 : call node%print_graph(0, threshold, is_sorted, unit=unit)
732 :
733 48624 : end subroutine
734 :
735 : !> Return the sum of all entries with a certain name below
736 : !> a given node. Specify the name with the last argument, the
737 : !> path to the starting point with the first few parameters
738 : !>
739 : !> \param name1, .., namei-1 The path to the starting node
740 : !> \param namei The name of all sub-entries below this
741 : !> node which should be summed together
742 : !>
743 : !> For example timer%in_entries("foo", "bar", "parallel") returns
744 : !> the sum of all entries named "parallel" below the foo->bar node
745 : !>
746 0 : function timer_in_entries(self, name1, name2, name3, name4) result(s)
747 : use, intrinsic :: iso_fortran_env, only : error_unit
748 : class(timer_t), intent(in), target :: self
749 : character(len=*), intent(in) :: name1
750 : character(len=*), intent(in), optional :: name2, name3, name4
751 : real(kind=rk) :: s
752 : type(node_t), pointer :: node ! the starting node
753 : type(value_t) :: val
754 : character(len=name_length) :: name ! the name of the sections
755 :
756 0 : s = 0._rk
757 :
758 0 : if (.not. self%active) then
759 0 : return
760 : endif
761 :
762 0 : node => self%root
763 0 : name = name1
764 :
765 0 : if (present(name2)) then
766 0 : node => node%get_child(name1)
767 0 : if (.not. associated(node)) then
768 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name1) // """"
769 0 : return
770 : endif
771 0 : name = name2
772 : end if
773 0 : if (present(name3)) then
774 0 : node => node%get_child(name2)
775 0 : if (.not. associated(node)) then
776 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name2) // """"
777 0 : return
778 : endif
779 0 : name = name3
780 : end if
781 0 : if (present(name4)) then
782 0 : node => node%get_child(name3)
783 0 : if (.not. associated(node)) then
784 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name3) // """"
785 0 : return
786 : endif
787 0 : name = name4
788 : end if
789 :
790 0 : val = node%sum_of_children_with_name(name)
791 0 : s = real(val%micros, kind=rk) * 1e-6_rk
792 0 : end function
793 :
794 : !> Access a specific, already stopped entry of the graph by specifying the
795 : !> names of the nodes along the graph from the root node
796 : !>
797 : !> The result is only meaningfull if the entry was never appended by
798 : !> additional %start() calls.
799 : !>
800 48384 : function timer_get(self, name1, name2, name3, name4, name5, name6) result(s)
801 : class(timer_t), intent(in), target :: self
802 : ! this is clunky, but what can you do..
803 : character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6
804 : real(kind=rk) :: s
805 : type(node_t), pointer :: node
806 :
807 48384 : s = 0._rk
808 :
809 48384 : if (.not. self%active) then
810 0 : return
811 : endif
812 :
813 48384 : node => self%root
814 48384 : if (present(name1)) then
815 48384 : node => node%get_child(name1)
816 48384 : if (.not. associated(node)) then
817 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name1) // """"
818 0 : return
819 : endif
820 : end if
821 48384 : if (present(name2)) then
822 48384 : node => node%get_child(name2)
823 48384 : if (.not. associated(node)) then
824 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name2) // """"
825 0 : return
826 : endif
827 : end if
828 48384 : if (present(name3)) then
829 0 : node => node%get_child(name3)
830 0 : if (.not. associated(node)) then
831 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name3) // """"
832 0 : return
833 : endif
834 : end if
835 48384 : if (present(name4)) then
836 0 : node => node%get_child(name4)
837 0 : if (.not. associated(node)) then
838 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name4) // """"
839 0 : return
840 : endif
841 : end if
842 48384 : if (present(name5)) then
843 0 : node => node%get_child(name5)
844 0 : if (.not. associated(node)) then
845 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name5) // """"
846 0 : return
847 : endif
848 : end if
849 48384 : if (present(name6)) then
850 0 : node => node%get_child(name6)
851 0 : if (.not. associated(node)) then
852 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name6) // """"
853 0 : return
854 : endif
855 : end if
856 48384 : if (node%is_running) then
857 0 : write(error_unit,'(a)') "Timer """ // trim(node%name) // """ not yet stopped"
858 0 : return
859 : endif
860 48384 : s = real(node%value%micros, kind=rk) * 1e-6_rk
861 96768 : end function
862 :
863 : !> Access a specific, not yet stopped entry of the graph by specifying the
864 : !> names of the nodes along the graph from the root node and return the
865 : !> seconds that have passed since the entry was created.
866 : !>
867 : !> The result is only meaningfull if the entry was never appended by
868 : !> additional %start() calls.
869 : !>
870 0 : function timer_since(self, name1, name2, name3, name4) result(s)
871 : class(timer_t), intent(in), target :: self
872 : character(len=*), intent(in), optional :: name1, name2, name3, name4
873 : real(kind=rk) :: s
874 : type(value_t) :: val
875 : type(node_t), pointer :: node
876 :
877 0 : s = 0._rk
878 :
879 0 : node => self%root
880 0 : if (present(name1)) then
881 0 : node => node%get_child(name1)
882 0 : if (.not. associated(node)) then
883 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name1) // """"
884 0 : return
885 : endif
886 : end if
887 0 : if (present(name2)) then
888 0 : node => node%get_child(name2)
889 0 : if (.not. associated(node)) then
890 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name2) // """"
891 0 : return
892 : endif
893 : end if
894 0 : if (present(name3)) then
895 0 : node => node%get_child(name3)
896 0 : if (.not. associated(node)) then
897 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name3) // """"
898 0 : return
899 : endif
900 : end if
901 0 : if (present(name4)) then
902 0 : node => node%get_child(name4)
903 0 : if (.not. associated(node)) then
904 0 : write(error_unit,'(a)') "Could not descend to """ // trim(name4) // """"
905 0 : return
906 : endif
907 : end if
908 0 : if (node%is_running .neqv. .true.) then
909 0 : write(error_unit,'(a)') "Timer """ // trim(node%name) // """ already stopped"
910 0 : return
911 : endif
912 0 : val = node%value + node%now()
913 0 : s = real(val%micros, kind=rk) * 1e-6_rk
914 0 : end function
915 :
916 : !> Sort the graph on each level.
917 : !> Warning: This irrevocable destroys the old ordering.
918 : !>
919 0 : subroutine timer_sort(self)
920 : class(timer_t), intent(inout), target :: self
921 : type(node_t), pointer :: node
922 :
923 0 : call sort_nodes(self%root, node)
924 :
925 0 : node => self%root
926 0 : do while (associated(node))
927 0 : call node%sort_children()
928 0 : node => node%nextSibling
929 : enddo
930 0 : end subroutine
931 :
932 :
933 :
934 : ! Now methods of node_t:
935 :
936 :
937 : ! This is the function that actually returns the current timestamp and all other counters
938 940576032 : function node_now(self) result(val)
939 : use, intrinsic :: iso_c_binding
940 : class(node_t), intent(in) :: self
941 : type(value_t) :: val
942 :
943 : ! current time
944 940576032 : val%micros = microseconds_since_epoch()
945 :
946 940576032 : if (self%timer%record_allocated_memory) then
947 0 : val%rsssize = resident_set_size()
948 : endif
949 :
950 940576032 : if (self%timer%record_virtual_memory) then
951 0 : val%virtualmem = virtual_memory()
952 : endif
953 :
954 940576032 : if (self%timer%record_max_allocated_memory) then
955 0 : val%maxrsssize = max_resident_set_size()
956 : endif
957 :
958 : #ifdef HAVE_LIBPAPI
959 : if (self%timer%record_flop_counts .or. self%timer%record_memory_bandwidth) then
960 : call papi_counters(val%flop_count, val%ldst)
961 : endif
962 : #endif
963 1881152064 : end function
964 :
965 :
966 470313936 : subroutine node_start(self)
967 : class(node_t), intent(inout) :: self
968 :
969 : ! take the time
970 470313936 : self%value = self%value - self%now()
971 470313936 : self%is_running = .true.
972 470313936 : end subroutine
973 :
974 470262096 : subroutine node_stop(self)
975 : class(node_t), intent(inout) :: self
976 :
977 470262096 : self%count = self%count + 1
978 :
979 : ! take the time
980 470262096 : self%value = self%value + self%now()
981 470262096 : self%is_running = .false.
982 470262096 : end subroutine
983 :
984 3266808 : function node_get_value(self) result(val)
985 : class(node_t), intent(in) :: self
986 : type(value_t) :: val
987 3266808 : val = self%value
988 3266808 : if (self%is_running) then
989 : ! we have not finished, give time up to NOW
990 0 : val = val + self%now()
991 : endif
992 6533616 : end function
993 :
994 2052228 : function node_new_child(self, name) result(new)
995 : class(node_t), intent(inout), target :: self
996 : character(len=*), intent(in) :: name
997 : type(node_t), pointer :: new
998 :
999 2052228 : if (.not. associated(self%lastChild)) then
1000 1105236 : allocate(self%lastChild)
1001 1105236 : new => self%lastChild
1002 1105236 : self%firstChild => new
1003 : else
1004 946992 : allocate(self%lastChild%nextSibling)
1005 946992 : new => self%lastChild%nextSibling
1006 946992 : self%lastChild => new
1007 : endif
1008 :
1009 : select type (self)
1010 : type is (node_t)
1011 2052228 : new%parent => self
1012 : class default
1013 2052228 : stop "node_new_child(): This should not happen"
1014 : end select
1015 :
1016 2052228 : new%name = name
1017 2052228 : new%count = 0
1018 2052228 : new%timer => self%timer
1019 :
1020 2052228 : nullify(new%firstChild)
1021 2052228 : nullify(new%lastChild)
1022 2052228 : nullify(new%nextSibling)
1023 4104456 : end function
1024 :
1025 :
1026 1809672036 : function string_eq(str1, str2) result(eq)
1027 : character(len=name_length), intent(in) :: str1
1028 : character(len=*), intent(in) :: str2
1029 : logical :: eq
1030 1809672036 : eq = trim(str1) .eq. str2(1:min(len(trim(str2)), name_length))
1031 3619344072 : end function
1032 :
1033 470407488 : function node_get_child(self, name) result(child)
1034 : class(node_t), intent(in) :: self
1035 : character(len=*), intent(in) :: name
1036 : type(node_t), pointer :: child
1037 :
1038 470407488 : child => self%firstChild
1039 1271992656 : do while (associated(child))
1040 869147844 : if (string_eq(child%name, name)) then
1041 468355260 : return
1042 : endif
1043 400792584 : child => child%nextSibling
1044 : enddo
1045 2052228 : nullify(child)
1046 472459716 : end function
1047 :
1048 2104068 : recursive subroutine deallocate_node(entry)
1049 : type(node_t), intent(inout), pointer :: entry
1050 : type(node_t), pointer :: nextSibling
1051 :
1052 2104068 : if (associated(entry%firstChild)) then
1053 1105236 : call deallocate_node(entry%firstChild)
1054 : endif
1055 2104068 : nextSibling => entry%nextSibling
1056 2104068 : deallocate(entry)
1057 2104068 : nullify(entry)
1058 2104068 : if (associated(nextSibling)) then
1059 946992 : call deallocate_node(nextSibling)
1060 : endif
1061 2104068 : end subroutine
1062 :
1063 1121352 : function node_sum_of_children(self) result(sum_time)
1064 : class(node_t), intent(in) :: self
1065 : type(node_t), pointer :: cur_entry
1066 : type(value_t) :: sum_time
1067 :
1068 1121352 : cur_entry => self%firstChild
1069 3266808 : do while (associated(cur_entry))
1070 1072728 : sum_time = sum_time + cur_entry%get_value()
1071 1072728 : cur_entry => cur_entry%nextSibling
1072 : enddo
1073 2242704 : end function
1074 :
1075 0 : recursive function node_sum_of_children_with_name(self, name) result(sum_time)
1076 : class(node_t), intent(in) :: self
1077 : character(len=*), intent(in) :: name
1078 : type(node_t), pointer :: cur_entry
1079 : type(value_t) :: sum_time
1080 :
1081 0 : cur_entry => self%firstChild
1082 0 : do while (associated(cur_entry))
1083 0 : if (string_eq(cur_entry%name, name)) then
1084 0 : sum_time = sum_time + cur_entry%value
1085 : else
1086 0 : sum_time = sum_time + cur_entry%sum_of_children_with_name(name)
1087 : endif
1088 0 : cur_entry => cur_entry%nextSibling
1089 : enddo
1090 0 : end function
1091 :
1092 1121352 : function node_sum_of_children_below(self, threshold) result(sum_time)
1093 : class(node_t), intent(in) :: self
1094 : real(kind=rk), intent(in), optional :: threshold
1095 : type(node_t), pointer :: cur_entry
1096 : type(value_t) :: sum_time, cur_value
1097 :
1098 1121352 : if (.not. present(threshold)) then
1099 1121352 : return
1100 : endif
1101 :
1102 0 : cur_entry => self%firstChild
1103 :
1104 0 : do while (associated(cur_entry))
1105 0 : cur_value = cur_entry%get_value()
1106 0 : if (cur_value%micros * 1e-6_rk < threshold) then
1107 0 : sum_time = sum_time + cur_value
1108 : endif
1109 0 : cur_entry => cur_entry%nextSibling
1110 : enddo
1111 1121352 : end function
1112 :
1113 0 : subroutine insert_into_sorted_list(head, node)
1114 : type(node_t), pointer, intent(inout) :: head
1115 : type(node_t), target, intent(inout) :: node
1116 : type(node_t), pointer :: cur
1117 :
1118 0 : if (node%value%micros >= head%value%micros) then
1119 0 : node%nextSibling => head
1120 0 : head => node
1121 0 : return
1122 : endif
1123 :
1124 0 : cur => head
1125 0 : do while (associated(cur%nextSibling))
1126 0 : if (cur%value%micros > node%value%micros .and. node%value%micros >= cur%nextSibling%value%micros) then
1127 0 : node%nextSibling => cur%nextSibling
1128 0 : cur%nextSibling => node
1129 0 : return
1130 : endif
1131 0 : cur => cur%nextSibling
1132 : end do
1133 :
1134 : ! node has to be appended at the end
1135 0 : cur%nextSibling => node
1136 0 : node%nextSibling => NULL()
1137 : end subroutine
1138 :
1139 0 : subroutine remove_from_list(head, node)
1140 : type(node_t), pointer, intent(inout) :: head
1141 : type(node_t), pointer, intent(in) :: node
1142 : type(node_t), pointer :: cur
1143 :
1144 0 : if (associated(head,node)) then
1145 0 : head => head%nextSibling
1146 0 : return
1147 : endif
1148 :
1149 0 : cur => head
1150 0 : do while (associated(cur%nextSibling))
1151 0 : if (associated(cur%nextSibling,node)) then
1152 0 : cur%nextSibling => cur%nextSibling%nextSibling
1153 0 : return
1154 : endif
1155 0 : cur => cur%nextSibling
1156 : end do
1157 : end subroutine
1158 :
1159 1121352 : subroutine node_print(self, indent_level, total, unit)
1160 : class(node_t), intent(inout) :: self
1161 : integer, intent(in) :: indent_level
1162 : type(value_t), intent(in) :: total
1163 : type(value_t) :: val
1164 : integer, intent(in) :: unit
1165 : character(len=name_length) :: name, suffix
1166 :
1167 1121352 : if (self%is_running) then
1168 0 : name = trim(self%name) // " (running)"
1169 : else
1170 1121352 : name = self%name
1171 : endif
1172 :
1173 1121352 : if (self%count > 1) then
1174 442776 : write(suffix, '(" (",i0,"x)")') self%count
1175 442776 : name = trim(name) // " " // trim(suffix)
1176 : endif
1177 :
1178 1121352 : if (self%is_running) then
1179 0 : val = self%value + self%now()
1180 : else
1181 1121352 : val = self%value
1182 : endif
1183 1121352 : call print_value(val, self%timer, indent_level, name, total, unit)
1184 2242704 : end subroutine
1185 :
1186 1121352 : recursive subroutine node_print_graph(self, indent_level, threshold, is_sorted, total, unit)
1187 : use, intrinsic :: iso_fortran_env, only : output_unit
1188 : class(node_t), intent(inout) :: self
1189 : integer, intent(in) :: indent_level
1190 : real(kind=rk), intent(in), optional :: threshold
1191 : logical, intent(in), optional :: is_sorted
1192 : type(value_t), intent(in), optional :: total
1193 : integer, intent(in), optional :: unit
1194 :
1195 : type(node_t), pointer :: node
1196 : integer :: i
1197 : type(value_t) :: cur_value, node_value, own_value, below_threshold_value, total_act
1198 : type(node_t), pointer :: own_node, threshold_node
1199 : real(kind=rk) :: threshold_act
1200 : logical :: is_sorted_act, print_own, print_threshold
1201 : integer :: unit_act
1202 :
1203 1121352 : nullify(own_node)
1204 1121352 : nullify(threshold_node)
1205 :
1206 1121352 : if (present(threshold)) then
1207 0 : threshold_act = threshold
1208 : else
1209 1121352 : threshold_act = 0
1210 : endif
1211 :
1212 1121352 : if (present(is_sorted)) then
1213 0 : is_sorted_act = is_sorted
1214 : else
1215 1121352 : is_sorted_act = .false.
1216 : endif
1217 :
1218 1121352 : cur_value = self%get_value()
1219 :
1220 1121352 : if (present(total)) then
1221 1072728 : total_act = total
1222 : else
1223 48624 : total_act = cur_value
1224 : endif
1225 :
1226 1121352 : if (present(unit)) then
1227 1072728 : unit_act = unit
1228 : else
1229 48624 : unit_act = output_unit
1230 : endif
1231 :
1232 1121352 : call self%print(indent_level, total_act, unit_act)
1233 :
1234 1121352 : own_value = cur_value - self%sum_of_children()
1235 1121352 : below_threshold_value = self%sum_of_children_below(threshold)
1236 :
1237 1121352 : print_own = associated(self%firstChild)
1238 1121352 : print_threshold = below_threshold_value%micros > 0
1239 :
1240 : ! Deal with "(own)" and "(below threshold)" entries
1241 1121352 : if (is_sorted_act) then
1242 : ! sort them in
1243 0 : if (print_own) then
1244 : ! insert an "(own)" node
1245 0 : allocate(own_node)
1246 0 : own_node%value = own_value
1247 0 : own_node%name = own
1248 0 : own_node%timer => self%timer
1249 0 : call insert_into_sorted_list(self%firstChild, own_node)
1250 : endif
1251 :
1252 0 : if (print_threshold) then
1253 : ! insert a "(below threshold)" node
1254 0 : allocate(threshold_node)
1255 0 : threshold_node%value = below_threshold_value
1256 0 : threshold_node%name = below
1257 0 : threshold_node%timer => self%timer
1258 0 : call insert_into_sorted_list(self%firstChild, threshold_node)
1259 : endif
1260 :
1261 : else
1262 : ! print them first
1263 1121352 : if (print_own) then
1264 577428 : call print_value(own_value, self%timer, indent_level + 1, own, cur_value, unit_act)
1265 : endif
1266 1121352 : if (print_threshold) then
1267 0 : call print_value(below_threshold_value, self%timer, indent_level + 1, below, cur_value, unit_act)
1268 : endif
1269 : endif
1270 :
1271 : ! print children
1272 1121352 : node => self%firstChild
1273 3266808 : do while (associated(node))
1274 1072728 : node_value = node%get_value()
1275 : if (node_value%micros * 1e-6_rk >= threshold_act &
1276 : .or. associated(node, threshold_node) &
1277 1072728 : .or. associated(node, own_node)) then
1278 1072728 : call node%print_graph(indent_level + 1, threshold, is_sorted, cur_value, unit_act)
1279 : endif
1280 1072728 : node => node%nextSibling
1281 : end do
1282 :
1283 1121352 : if (is_sorted_act) then
1284 : ! remove inserted dummy nodes again
1285 0 : if (print_own) then
1286 0 : call remove_from_list(self%firstChild, own_node)
1287 0 : deallocate(own_node)
1288 : endif
1289 0 : if (print_threshold) then
1290 0 : call remove_from_list(self%firstChild, threshold_node)
1291 0 : deallocate(threshold_node)
1292 : endif
1293 : endif
1294 :
1295 2242704 : end subroutine
1296 :
1297 : ! In-place sort a node_t linked list and return the first and last element,
1298 0 : subroutine sort_nodes(head, tail)
1299 : type(node_t), pointer, intent(inout) :: head, tail
1300 :
1301 : type(node_t), pointer :: p, q, e
1302 : type(value_t) :: p_val, q_val
1303 : integer :: insize, nmerges, psize, qsize, i
1304 :
1305 0 : if (.not. associated(head)) then
1306 0 : nullify(tail)
1307 0 : return
1308 : endif
1309 :
1310 0 : insize = 1
1311 :
1312 0 : do while (.true.)
1313 0 : p => head
1314 0 : nullify(head)
1315 0 : nullify(tail)
1316 0 : nmerges = 0
1317 :
1318 0 : do while(associated(p))
1319 0 : nmerges = nmerges + 1
1320 0 : q => p
1321 0 : psize = 0
1322 0 : do i = 1, insize
1323 0 : psize = psize + 1
1324 0 : q => q%nextSibling
1325 0 : if (.not. associated(q)) then
1326 0 : exit
1327 : endif
1328 : end do
1329 :
1330 0 : qsize = insize
1331 :
1332 0 : do while (psize > 0 .or. (qsize > 0 .and. associated(q)))
1333 0 : if (psize == 0) then
1334 0 : e => q
1335 0 : q => q%nextSibling
1336 0 : qsize = qsize - 1
1337 :
1338 0 : else if (qsize == 0 .or. (.not. associated(q))) then
1339 0 : e => p;
1340 0 : p => p%nextSibling
1341 0 : psize = psize - 1
1342 : else
1343 0 : p_val = p%get_value()
1344 0 : q_val = q%get_value()
1345 0 : if (p_val%micros >= q_val%micros) then
1346 0 : e => p
1347 0 : p => p%nextSibling
1348 0 : psize = psize - 1
1349 :
1350 : else
1351 0 : e => q
1352 0 : q => q%nextSibling
1353 0 : qsize = qsize - 1
1354 :
1355 : end if
1356 : end if
1357 :
1358 0 : if (associated(tail)) then
1359 0 : tail%nextSibling => e
1360 : else
1361 0 : head => e
1362 : endif
1363 0 : tail => e
1364 :
1365 : end do
1366 :
1367 0 : p => q
1368 :
1369 : end do
1370 :
1371 0 : nullify(tail%nextSibling)
1372 :
1373 0 : if (nmerges <= 1) then
1374 0 : return
1375 : endif
1376 :
1377 0 : insize = insize * 2
1378 :
1379 : end do
1380 0 : end subroutine
1381 :
1382 :
1383 0 : recursive subroutine node_sort_children(self)
1384 : class(node_t), intent(inout) :: self
1385 : type(node_t), pointer :: node
1386 :
1387 0 : call sort_nodes(self%firstChild, self%lastChild)
1388 :
1389 0 : node => self%firstChild
1390 0 : do while (associated(node))
1391 0 : call node%sort_children()
1392 0 : node => node%nextSibling
1393 : enddo
1394 0 : end subroutine
1395 :
1396 1698780 : subroutine print_value(value, timer, indent_level, label, total, unit)
1397 : type(value_t), intent(in) :: value
1398 : type(timer_t), intent(in) :: timer
1399 : integer, intent(in) :: indent_level
1400 : character(len=name_length), intent(in) :: label
1401 : type(value_t), intent(in) :: total
1402 : integer, intent(in) :: unit
1403 :
1404 : character(len=64) :: format_spec
1405 :
1406 1698780 : write(format_spec,'("(",i0,"x,""|_ "",a",i0,",2x,f12.6,1x,f12.3)")') indent_level * 2 + 1, name_length
1407 : write(unit,format_spec,advance='no') &
1408 1698780 : label, &
1409 1698780 : real(value%micros, kind=rk) * 1e-6_rk, &
1410 3397560 : real(value%micros, kind=rk) / real(total%micros, kind=rk)
1411 :
1412 1698780 : if (timer%print_allocated_memory) then
1413 : write(unit,'(1x,a12)',advance='no') &
1414 0 : nice_format(real(value%rsssize, kind=C_DOUBLE))
1415 : endif
1416 :
1417 1698780 : if (timer%print_virtual_memory) then
1418 : write(unit,'(1x,a12)',advance='no') &
1419 0 : nice_format(real(value%virtualmem, kind=C_DOUBLE))
1420 : endif
1421 :
1422 1698780 : if (timer%print_max_allocated_memory) then
1423 : write(unit,'(1x,a12)',advance='no') &
1424 0 : nice_format(real(value%maxrsssize, kind=C_DOUBLE))
1425 : endif
1426 :
1427 1698780 : if (timer%print_flop_count) then
1428 0 : write(unit,'(1x,f12.2)',advance='no') real(value%flop_count, kind=rk) / 1e6_rk
1429 : endif
1430 1698780 : if (timer%print_flop_rate) then
1431 0 : write(unit,'(1x,f12.2)',advance='no') real(value%flop_count, kind=rk) / value%micros
1432 : endif
1433 1698780 : if (timer%print_ldst) then
1434 0 : write(unit,'(1x,a12)',advance='no') nice_format(real(value%ldst, kind=rk))
1435 : endif
1436 1698780 : if (timer%print_memory_bandwidth) then
1437 0 : write(unit,'(1x,a12)',advance='no') nice_format(real(value%ldst*timer%bytes_per_ldst, kind=rk) / (value%micros * 1e-6_rk))
1438 : endif
1439 1698780 : if (timer%print_ai) then
1440 0 : write(unit,'(1x,f12.4)',advance='no') real(value%flop_count, kind=rk) / value%ldst / timer%bytes_per_ldst
1441 : endif
1442 :
1443 1698780 : write(unit,'(a)') ""
1444 1698780 : end subroutine
1445 :
1446 0 : pure elemental function nice_format(number) result(string)
1447 : real(kind=C_DOUBLE), intent(in) :: number
1448 : character(len=12) :: string
1449 : real(kind=C_DOUBLE), parameter :: &
1450 : kibi = 2.0_C_DOUBLE**10, &
1451 : mebi = 2.0_C_DOUBLE**20, &
1452 : gibi = 2.0_C_DOUBLE**30, &
1453 : tebi = 2.0_C_DOUBLE**40, &
1454 : pebi = 2.0_C_DOUBLE**50
1455 :
1456 0 : if (abs(number) >= pebi) then
1457 0 : write(string,'(es12.2)') number
1458 0 : else if (abs(number) >= tebi) then
1459 0 : write(string,'(f9.2,'' Ti'')') number / tebi
1460 0 : else if (abs(number) >= gibi) then
1461 0 : write(string,'(f9.2,'' Gi'')') number / gibi
1462 0 : else if (abs(number) >= mebi) then
1463 0 : write(string,'(f9.2,'' Mi'')') number / mebi
1464 0 : else if (abs(number) >= kibi) then
1465 0 : write(string,'(f9.2,'' ki'')') number / kibi
1466 : else
1467 0 : write(string,'(f12.2)') number
1468 : endif
1469 0 : end function
1470 :
1471 :
1472 4104456 : end module
|