LCOV - code coverage report
Current view: top level - src/ftimings - ftimings.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 223 575 38.8 %
Date: 2018-01-10 09:29:53 Functions: 19 38 50.0 %

          Line data    Source code
       1             : ! Copyright 2014 Lorenz Hüdepohl
       2             : !
       3             : ! This file is part of ftimings.
       4             : !
       5             : ! ftimings is free software: you can redistribute it and/or modify
       6             : ! it under the terms of the GNU Lesser General Public License as published by
       7             : ! the Free Software Foundation, either version 3 of the License, or
       8             : ! (at your option) any later version.
       9             : !
      10             : ! ftimings is distributed in the hope that it will be useful,
      11             : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
      12             : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      13             : ! GNU Lesser General Public License for more details.
      14             : !
      15             : ! You should have received a copy of the GNU Lesser General Public License
      16             : ! along with ftimings.  If not, see <http://www.gnu.org/licenses/>.
      17             : 
      18             : #ifdef HAVE_CONFIG_H
      19             : #include "config-f90.h"
      20             : #endif
      21             : 
      22             : !> \mainpage Ftimings
      23             : !>
      24             : !> An almost pure-fortran attempt to play with tree structures, which evolved
      25             : !> into the timing library used e.g. by the VERTEX supernova code.
      26             : !>
      27             : !> All you need to know is contained in the \ref ftimings::timer_t derived type.
      28             : module ftimings
      29             :   use ftimings_type
      30             :   use ftimings_value
      31             :   use, intrinsic :: iso_fortran_env, only : error_unit, output_unit
      32             :   implicit none
      33             :   save
      34             : 
      35             :   private
      36             : 
      37             :   ! this is mainly needed for Doxygen, they are
      38             :   ! by implicitly reachable as type-bound procedures
      39             :   ! of timer_t, however Doxygen does not document them
      40             :   ! if they are not also public
      41             :   public timer_start, timer_stop, timer_free, timer_print, &
      42             :          timer_enable, timer_disable, timer_is_enabled, &
      43             :          timer_in_entries, timer_get, timer_since, timer_sort, &
      44             :          timer_set_print_options, &
      45             :          timer_measure_flops, &
      46             :          timer_measure_allocated_memory, &
      47             :          timer_measure_virtual_memory,   &
      48             :          timer_measure_max_allocated_memory,   &
      49             :          timer_measure_memory_bandwidth
      50             : 
      51             :   character(len=name_length), private, parameter :: own = "(own)"
      52             :   character(len=name_length), private, parameter :: below = "(below threshold)"
      53             : 
      54             :   !> Type for a timer instance.
      55             :   !>
      56             :   !> Typical usage:
      57             :   !> \code{.f90}
      58             :   !>   type(timer_t) :: timer
      59             :   !>
      60             :   !>   call timer%enable()
      61             :   !>
      62             :   !>   call timer%start("section")
      63             :   !>     ...
      64             :   !>   call timer%start("subsection")
      65             :   !>     ...
      66             :   !>   call timer%stop("subsection")
      67             :   !>     ...
      68             :   !>   call timer%stop("section")
      69             :   !>
      70             :   !>   call timer%print()
      71             :   !> \endcode
      72             :   !>
      73             :   !> Every first call to timer%start() at a certain point in the graph
      74             :   !> allocates a small amount of memory. If the timer is no longer needed,
      75             :   !> all that memory can be freed again with
      76             :   !>
      77             :   !> \code{.f90}
      78             :   !>   call timer%free()
      79             :   !> \endcode
      80             :   type, public :: timer_t
      81             :     logical, private :: active = .false.                         !< If set to .false., most operations return immediately without any action
      82             :     logical, private :: record_allocated_memory = .false.        !< IF set to .true., record also the current resident set size
      83             :     logical, private :: record_virtual_memory = .false.          !< IF set to .true., record also the virtual memory
      84             :     logical, private :: record_max_allocated_memory = .false.    !< IF set to .true., record also the max resident set size ("high water mark")
      85             :     logical, private :: record_flop_counts = .false.             !< If set to .true., record also FLOP counts via PAPI calls
      86             :     logical, private :: record_memory_bandwidth = .false.        !< If set to .true., record also FLOP counts via PAPI calls
      87             : 
      88             :     logical, private :: print_allocated_memory = .false.
      89             :     logical, private :: print_max_allocated_memory = .false.
      90             :     logical, private :: print_virtual_memory = .false.
      91             :     logical, private :: print_flop_count = .false.
      92             :     logical, private :: print_flop_rate = .false.
      93             :     logical, private :: print_ldst = .false.
      94             :     logical, private :: print_memory_bandwidth = .false.
      95             :     logical, private :: print_ai = .false.
      96             :     integer, private :: bytes_per_ldst = 8
      97             : 
      98             :     type(node_t), private, pointer :: root => NULL()             !< Start of graph
      99             :     type(node_t), private, pointer :: current_node => NULL()     !< Current position in the graph
     100             :     contains
     101             :       procedure, pass :: start => timer_start
     102             :       procedure, pass :: stop => timer_stop
     103             :       procedure, pass :: free => timer_free
     104             :       procedure, pass :: print => timer_print
     105             :       procedure, pass :: enable => timer_enable
     106             :       procedure, pass :: disable => timer_disable
     107             :       procedure, pass :: is_enabled => timer_is_enabled
     108             :       procedure, pass :: measure_flops => timer_measure_flops
     109             :       procedure, pass :: measure_allocated_memory => timer_measure_allocated_memory
     110             :       procedure, pass :: measure_virtual_memory => timer_measure_virtual_memory
     111             :       procedure, pass :: measure_max_allocated_memory => timer_measure_max_allocated_memory
     112             :       procedure, pass :: measure_memory_bandwidth => timer_measure_memory_bandwidth
     113             :       procedure, pass :: set_print_options => timer_set_print_options
     114             :       procedure, pass :: in_entries => timer_in_entries
     115             :       procedure, pass :: get => timer_get
     116             :       procedure, pass :: since => timer_since
     117             :       procedure, pass :: sort => timer_sort
     118             :   end type
     119             : 
     120             :   ! Private type node_t, representing a graph node
     121             :   !
     122             :   type :: node_t
     123             :     character(len=name_length) :: name             ! Descriptive name, used when printing the timings
     124             :     integer :: count = 0                           ! Number of node_stop calls
     125             :     type(value_t) :: value                         ! The actual counter data, see ftimings_values.F90
     126             :     logical :: is_running = .false.                ! .true. if still running
     127             :     type(node_t), pointer :: firstChild => NULL()
     128             :     type(node_t), pointer :: lastChild => NULL()
     129             :     type(node_t), pointer :: parent => NULL()
     130             :     type(node_t), pointer :: nextSibling => NULL()
     131             :     class(timer_t), pointer :: timer
     132             :     contains
     133             :       procedure, pass :: now => node_now
     134             :       procedure, pass :: start => node_start
     135             :       procedure, pass :: stop => node_stop
     136             :       procedure, pass :: get_value => node_get_value
     137             :       procedure, pass :: new_child => node_new_child
     138             :       procedure, pass :: get_child => node_get_child
     139             :       procedure, pass :: sum_of_children => node_sum_of_children
     140             :       procedure, pass :: sum_of_children_with_name => node_sum_of_children_with_name
     141             :       procedure, pass :: sum_of_children_below => node_sum_of_children_below
     142             :       procedure, pass :: print => node_print
     143             :       procedure, pass :: print_graph => node_print_graph
     144             :       procedure, pass :: sort_children => node_sort_children
     145             :   end type
     146             : 
     147             :   interface
     148             :     function microseconds_since_epoch() result(us) bind(C, name="ftimings_microseconds_since_epoch")
     149             :       use, intrinsic :: iso_c_binding
     150             :       implicit none
     151             :       integer(kind=C_INT64_T) :: us
     152             :     end function
     153             :   end interface
     154             : 
     155             : #ifdef HAVE_LIBPAPI
     156             :   interface
     157             :     function flop_init() result(ret) bind(C, name="ftimings_flop_init")
     158             :       use, intrinsic :: iso_c_binding
     159             :       implicit none
     160             :       integer(kind=C_INT) :: ret
     161             :     end function
     162             :   end interface
     163             : 
     164             :   interface
     165             :     function loads_stores_init() result(ret) bind(C, name="ftimings_loads_stores_init")
     166             :       use, intrinsic :: iso_c_binding
     167             :       implicit none
     168             :       integer(kind=C_INT) :: ret
     169             :     end function
     170             :   end interface
     171             : 
     172             :   interface
     173             :     subroutine papi_counters(flops, ldst) bind(C, name="ftimings_papi_counters")
     174             :       use, intrinsic :: iso_c_binding
     175             :       implicit none
     176             :       integer(kind=C_LONG_LONG), intent(out) :: flops, ldst
     177             :     end subroutine
     178             :   end interface
     179             : #endif
     180             : 
     181             :   interface
     182             :     function resident_set_size() result(rsssize) bind(C, name="ftimings_resident_set_size")
     183             :       use, intrinsic :: iso_c_binding
     184             :       implicit none
     185             :       integer(kind=C_LONG) :: rsssize
     186             :     end function
     187             :   end interface
     188             : 
     189             :   interface
     190             :     function virtual_memory() result(virtualmem) bind(C, name="ftimings_virtual_memory")
     191             :       use, intrinsic :: iso_c_binding
     192             :       implicit none
     193             :       integer(kind=C_LONG) :: virtualmem
     194             :     end function
     195             :   end interface
     196             : 
     197             :   interface
     198             :     function max_resident_set_size() result(maxrsssize) bind(C, name="ftimings_highwater_mark")
     199             :       use, intrinsic :: iso_c_binding
     200             :       implicit none
     201             :       integer(kind=C_LONG) :: maxrsssize
     202             :     end function
     203             :   end interface
     204             : 
     205             :   contains
     206             : 
     207             :   !> Activate the timer, without this, most methods are non-ops.
     208             :   !>
     209       51840 :   subroutine timer_enable(self)
     210             :     class(timer_t), intent(inout), target :: self
     211             : 
     212       51840 :     self%active = .true.
     213       51840 :   end subroutine
     214             : 
     215             :   !> Call with enabled = .true. to also record amount of newly allocated memory.
     216             :   !> By default, memory usage is not recored. Call with .false. to deactivate again.
     217             :   !>
     218             :   !> This opens /proc/self/statm, parses it, and closes it agagain and is thus
     219             :   !> quite costly, use when appropriate.
     220             :   !>
     221           0 :   subroutine timer_measure_allocated_memory(self, enabled)
     222             :     class(timer_t), intent(inout) :: self
     223             :     logical, intent(in) :: enabled
     224             : 
     225           0 :     self%record_allocated_memory = enabled
     226           0 :   end subroutine
     227             : 
     228             :   !> Call with enabled = .true. to also record amount of newly created virtual memory.
     229             :   !> By default, memory usage is not recored. Call with .false. to deactivate again.
     230             :   !>
     231             :   !> This opens /proc/self/statm, parses it, and closes it agagain and is thus
     232             :   !> quite costly, use when appropriate.
     233             :   !>
     234           0 :   subroutine timer_measure_virtual_memory(self, enabled)
     235             :     class(timer_t), intent(inout) :: self
     236             :     logical, intent(in) :: enabled
     237             : 
     238           0 :     self%record_virtual_memory = enabled
     239           0 :   end subroutine
     240             : 
     241             :   !> Call with enabled = .true. to also record amount of newly increase of max.
     242             :   !> resident memory
     243             :   !> By default, memory usage is not recored. Call with .false. to deactivate again.
     244             :   !>
     245             :   !> This opens /proc/self/status, parses it, and closes it agagain and is thus
     246             :   !> quite costly, use when appropriate.
     247             :   !>
     248           0 :   subroutine timer_measure_max_allocated_memory(self, enabled)
     249             :     class(timer_t), intent(inout) :: self
     250             :     logical, intent(in) :: enabled
     251             : 
     252           0 :     self%record_max_allocated_memory = enabled
     253           0 :   end subroutine
     254             : 
     255             :   !> Call with enabled = .true. to also record the memory bandwidth with PAPI
     256             :   !> By default, this is not recorded. Call with .false. to deactivate again.
     257             :   !>
     258           0 :   subroutine timer_measure_memory_bandwidth(self, enabled)
     259             :     class(timer_t), intent(inout) :: self
     260             :     logical, intent(in) :: enabled
     261             : 
     262           0 :     if (enabled) then
     263             : #ifdef HAVE_LIBPAPI
     264             :       if (loads_stores_init() == 1) then
     265             :         self%record_memory_bandwidth = .true.
     266             :       else
     267             :         write(0,'(a)') "ftimings: Could not initialize PAPI, disabling memory bandwidth counter"
     268             :         self%record_memory_bandwidth = .false.
     269             :       endif
     270             : #else
     271           0 :       write(0,'(a)') "ftimings: not compiled with PAPI support, disabling memory bandwidth counter"
     272           0 :       self%record_memory_bandwidth = .false.
     273             : #endif
     274             :     else
     275             :       ! explicitly set to .false. by caller
     276           0 :       self%record_memory_bandwidth = .false.
     277             :     endif
     278           0 :   end subroutine
     279             : 
     280             :   !> Call with enabled = .true. to also record FLOP counts via PAPI calls.
     281             :   !> By default no FLOPS are recored. Call with .false. to deactivate again.
     282             :   !>
     283           0 :   subroutine timer_measure_flops(self, enabled)
     284             :     class(timer_t), intent(inout) :: self
     285             :     logical, intent(in) :: enabled
     286             : 
     287           0 :     if (enabled) then
     288             : #ifdef HAVE_LIBPAPI
     289             :       if (flop_init() == 1) then
     290             :         self%record_flop_counts = .true.
     291             :       else
     292             :         write(0,'(a)') "ftimings: Could not initialize PAPI, disabling FLOP counter"
     293             :         self%record_flop_counts = .false.
     294             :       endif
     295             : #else
     296           0 :       write(0,'(a)') "ftimings: not compiled with PAPI support, disabling FLOP counter"
     297           0 :       self%record_flop_counts = .false.
     298             : #endif
     299             :     else
     300             :       ! Explicitly set to .false. by caller
     301           0 :       self%record_flop_counts = .false.
     302             :     endif
     303           0 :   end subroutine
     304             : 
     305             :   !> Deactivate the timer
     306             :   !>
     307           0 :   subroutine timer_disable(self)
     308             :     class(timer_t), intent(inout), target :: self
     309           0 :     self%active = .false.
     310           0 :   end subroutine
     311             : 
     312             :   !> Return whether the timer is currently running
     313             :   !>
     314           0 :   function timer_is_enabled(self) result(is)
     315             :     class(timer_t), intent(inout), target :: self
     316             :     logical :: is
     317           0 :     is = self%active
     318           0 :   end function
     319             : 
     320             :   !> Control what to print on following %print calls
     321             :   !>
     322             :   !> \param     print_allocated_memory       Amount of newly allocated,
     323             :   !>                                         resident memory
     324             :   !> \param     print_virtual_memory         Amount of newly created virtual
     325             :   !>                                         memory
     326             :   !> \param     print_max_allocated_memory   Amount of new increase of max.
     327             :   !>                                         resident memory ("high water mark")
     328             :   !> \param     print_flop_count             Number of floating point operations
     329             :   !> \param     print_flop_rate              Rate of floating point operations per second
     330             :   !> \param     print_ldst                   Number of loads+stores
     331             :   !> \param     print_memory_bandwidth       Rate of loads+stores per second
     332             :   !> \param     print_ai                     Arithmetic intensity, that is number of
     333             :   !>                                         floating point operations per
     334             :   !>                                         number of load and store
     335             :   !>                                         operations (currently untested)
     336             :   !> \param     bytes_per_ldst               For calculating the AI, assume this number
     337             :   !>                                         of bytes per load or store (default: 8)
     338           0 :   subroutine timer_set_print_options(self, &
     339             :         print_allocated_memory, &
     340             :         print_virtual_memory, &
     341             :         print_max_allocated_memory, &
     342             :         print_flop_count, &
     343             :         print_flop_rate, &
     344             :         print_ldst, &
     345             :         print_memory_bandwidth, &
     346             :         print_ai, &
     347             :         bytes_per_ldst)
     348             :     class(timer_t), intent(inout) :: self
     349             :     logical, intent(in), optional :: &
     350             :         print_allocated_memory, &
     351             :         print_virtual_memory, &
     352             :         print_max_allocated_memory, &
     353             :         print_flop_count, &
     354             :         print_flop_rate, &
     355             :         print_ldst, &
     356             :         print_memory_bandwidth, &
     357             :         print_ai
     358             :     integer, intent(in), optional :: bytes_per_ldst
     359             : 
     360           0 :     if (present(print_allocated_memory)) then
     361           0 :       self%print_allocated_memory = print_allocated_memory
     362           0 :       if ((.not. self%record_allocated_memory) .and. self%print_allocated_memory) then
     363           0 :          write(0,'(a)') "ftimings: Warning: RSS size recording was disabled, expect zeros!"
     364             :       endif
     365             :     endif
     366             : 
     367           0 :     if (present(print_virtual_memory)) then
     368           0 :       self%print_virtual_memory = print_virtual_memory
     369           0 :       if ((.not. self%record_virtual_memory) .and. self%print_virtual_memory) then
     370           0 :          write(0,'(a)') "ftimings: Warning: Virtual memory recording was disabled, expect zeros!"
     371             :       endif
     372             :     endif
     373             : 
     374           0 :     if (present(print_max_allocated_memory)) then
     375           0 :       self%print_max_allocated_memory = print_max_allocated_memory
     376           0 :       if ((.not. self%record_max_allocated_memory) .and. self%print_max_allocated_memory) then
     377           0 :          write(0,'(a)') "ftimings: Warning: HWM recording was disabled, expect zeros!"
     378             :       endif
     379             :     endif
     380             : 
     381           0 :     if (present(print_flop_count)) then
     382           0 :       self%print_flop_count = print_flop_count
     383           0 :       if ((.not. self%record_flop_counts) .and. self%print_flop_count) then
     384           0 :          write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!"
     385             :       endif
     386             :     endif
     387             : 
     388           0 :     if (present(print_flop_rate)) then
     389           0 :       self%print_flop_rate = print_flop_rate
     390           0 :       if ((.not. self%record_flop_counts) .and. self%print_flop_rate) then
     391           0 :          write(0,'(a)') "ftimings: Warning: FLOP counter was disabled, expect zeros!"
     392             :       endif
     393             :     endif
     394             : 
     395           0 :     if (present(print_ldst)) then
     396           0 :       self%print_ldst = print_ldst
     397           0 :       if ((.not. self%record_memory_bandwidth) .and. self%print_ldst) then
     398           0 :          write(0,'(a)') "ftimings: Warning: Load+Store counters were disabled, expect zeros!"
     399             :       endif
     400             :     endif
     401           0 :     if (present(print_memory_bandwidth)) then
     402           0 :       self%print_memory_bandwidth = print_memory_bandwidth
     403           0 :       if ((.not. self%record_memory_bandwidth) .and. self%print_memory_bandwidth) then
     404           0 :          write(0,'(a)') "ftimings: Warning: Load+Store counters were disabled, expect zeros for memory bandwidth!"
     405             :       endif
     406             :     endif
     407             : 
     408           0 :     if (present(print_ai)) then
     409           0 :       self%print_ai = print_ai
     410           0 :       if (.not. (self%record_memory_bandwidth .and. self%record_flop_counts)) then
     411           0 :          write(0,'(a)') "ftimings: Warning: Memory bandwidth or FLOP counters were disabled, expect invalid values for AI"
     412             :       endif
     413             :     endif
     414             : 
     415           0 :     if (present(bytes_per_ldst)) then
     416           0 :       self%bytes_per_ldst = bytes_per_ldst
     417             :     endif
     418           0 :   end subroutine
     419             : 
     420             :   !> Start a timing section
     421             :   !>
     422             :   !> \param name        A descriptive name
     423             :   !> \param replace     If .true. (default .false.), replace any entries at the
     424             :   !>                    current position with the same name. If .false., add the
     425             :   !>                    time to a possibly existing entry
     426             :   !>
     427             :   !> Care must be taken to balance any invocations of %start() and %stop(), e.g.
     428             :   !> the following is valid
     429             :   !>
     430             :   !> \code{.f90}
     431             :   !>   call timer%start("A")
     432             :   !>   call timer%start("B")
     433             :   !>   call timer%stop("B")
     434             :   !>   call timer%stop("A")
     435             :   !> \endcode
     436             :   !>
     437             :   !> while the following is not
     438             :   !>
     439             :   !> \code{.f90}
     440             :   !>   call timer%start("A")
     441             :   !>   call timer%start("B")
     442             :   !>   call timer%stop("A")
     443             :   !>   call timer%stop("B")
     444             :   !> \endcode
     445             :   !>
     446   474215784 :   subroutine timer_start(self, name, replace)
     447             :     class(timer_t), intent(inout), target :: self
     448             :     character(len=*), intent(in)  :: name
     449             :     logical, intent(in), optional  :: replace
     450             :     type(node_t), pointer :: node
     451             :     !$ integer :: omp_get_thread_num, omp_get_num_threads, omp_get_level, omp_get_ancestor_thread_num
     452             :     !$ integer :: i
     453             : 
     454   474215784 :     if (.not. self%active) then
     455     6108204 :       return
     456             :     endif
     457             : 
     458             :     ! Deal with nested parallelization
     459   484211220 :     !$ do i = 0, omp_get_level()
     460   246119760 :     !$   if (omp_get_ancestor_thread_num(i) > 0) then
     461           0 :     !$     return
     462             :     !$   endif
     463             :     !$ end do
     464             : 
     465   238091460 :     !$omp master
     466             : 
     467   470262096 :     if (.not. associated(self%current_node)) then
     468             :       ! First call to timer_start()
     469       51840 :       allocate(self%root)
     470       51840 :       self%root%name = "[Root]"
     471       51840 :       self%root%timer => self
     472       51840 :       call self%root%start()
     473       51840 :       nullify(self%root%firstChild)
     474       51840 :       nullify(self%root%lastChild)
     475       51840 :       nullify(self%root%parent)
     476       51840 :       nullify(self%root%nextSibling)
     477       51840 :       self%current_node => self%root
     478             :     endif
     479             : 
     480   470262096 :     if (string_eq(self%current_node%name, name)) then
     481           0 :       !$omp critical
     482           0 :       write(error_unit,*) "Recursion error! Printing tree so far.."
     483           0 :       write(error_unit,*) "Got %start(""" // trim(name) // """), while %start(""" // trim(name) // """) was still active"
     484           0 :       !$ write(*,*) "omp_get_thread_num() = ", omp_get_thread_num()
     485           0 :       !$ write(*,*) "omp_get_num_threads() = ", omp_get_num_threads()
     486           0 :       !$ write(*,*) "omp_get_level() = ", omp_get_level()
     487           0 :       !$ do i = 0, omp_get_level()
     488           0 :       !$   write(*,*) "omp_get_ancestor_thread_num(", i, ") = ", omp_get_ancestor_thread_num(i)
     489             :       !$ end do
     490           0 :       call self%root%print_graph(0)
     491             :       !$omp end critical
     492           0 :       stop "timer_start() while same timer was active"
     493             :     endif
     494   470262096 :     node => self%current_node%get_child(name)
     495   470262096 :     if (.not. associated(node)) then
     496     2052228 :       node => self%current_node%new_child(name)
     497   468209868 :     else if (present(replace)) then
     498           0 :       if (replace) then
     499           0 :         node%value = null_value
     500           0 :         node%count = 0
     501           0 :         if (associated(node%firstChild)) then
     502           0 :           call deallocate_node(node%firstChild)
     503           0 :           nullify(node%firstChild)
     504           0 :           nullify(node%lastChild)
     505             :         endif
     506             :       endif
     507             :     endif
     508             : 
     509   470262096 :     call node%start()
     510             : 
     511   470262096 :     self%current_node => node
     512             : 
     513             :     !$omp end master
     514             : 
     515   474215784 :   end subroutine
     516             : 
     517             :   !> End a timing segment, \sa timer_start
     518             :   !>
     519             :   !> \param name        The exact same name as was used for %start().
     520             :   !>                    If not provided, close the currently active region.
     521             :   !>                    If given, warns if it does not match the last %start()
     522             :   !>                    call on stderr and disables the current timer instance.
     523             :   !>
     524   474215784 :   subroutine timer_stop(self, name)
     525             :     class(timer_t), intent(inout), target :: self
     526             :     character(len=*), intent(in), optional :: name
     527             :     logical :: error
     528             :     !$ integer :: omp_get_level, omp_get_ancestor_thread_num
     529             :     !$ integer :: i
     530             : 
     531   474215784 :     if (.not. self%active) then
     532     6108204 :       return
     533             :     endif
     534             : 
     535             :     ! Deal with nested parallelization
     536   484211220 :     !$ do i = 0, omp_get_level()
     537   246119760 :     !$   if (omp_get_ancestor_thread_num(i) > 0) then
     538           0 :     !$     return
     539             :     !$   endif
     540             :     !$ end do
     541             : 
     542   238091460 :     !$omp master
     543   470262096 :     error = .false.
     544             : 
     545   470262096 :     if (.not. associated(self%current_node)) then
     546           0 :       write(error_unit,'(a)') "Called timer_stop() without first calling any timer_start(), disabling timings"
     547           0 :       call self%free()
     548           0 :       self%active = .false.
     549           0 :       error = .true.
     550   470262096 :     else if (present(name)) then
     551   470262096 :       if (.not. string_eq(self%current_node%name, name)) then
     552             :         write(error_unit,'(a)') "Expected %stop(""" // trim(self%current_node%name)  // """),&
     553           0 :                  & but got %stop(""" // trim(name) //  """), disabling timings"
     554           0 :         call self%free()
     555           0 :         self%active = .false.
     556           0 :         error = .true.
     557             :       endif
     558             :     endif
     559             : 
     560   470262096 :     if (.not. error) then
     561   470262096 :       call self%current_node%stop()
     562             : 
     563             :       ! climb up to parent
     564   470262096 :       if (.not. associated(self%current_node%parent)) then
     565           0 :         write(error_unit,'(a)') "Error: No valid parent node found for node '" // trim(self%current_node%name) // "'"
     566           0 :         call self%free()
     567           0 :         self%active = .false.
     568             :       endif
     569   470262096 :       self%current_node => self%current_node%parent
     570             : 
     571             :     endif
     572             :     !$omp end master
     573             : 
     574   474215784 :   end subroutine
     575             : 
     576             :   !> Deallocate all objects associated with (but not including) self
     577             :   !>
     578       92016 :   subroutine timer_free(self)
     579             :     class(timer_t), intent(inout), target :: self
     580       92016 :     if (associated(self%root)) then
     581       51840 :       call deallocate_node(self%root)
     582             :     endif
     583       92016 :     nullify(self%root)
     584       92016 :     nullify(self%current_node)
     585       92016 :   end subroutine
     586             : 
     587             :   !> Print a timing graph
     588             :   !>
     589             :   !> \param name1       If given, first descend one level to the node with name name1
     590             :   !> \param name2       If given, also descend another level to the node with name2 there
     591             :   !> \param name3       etc.
     592             :   !> \param name4       etc.
     593             :   !> \param threshold   If given, subsume any entries with a value of threshold
     594             :   !>                    seconds in a single node "(below threshold)"
     595             :   !> \param is_sorted   Assume a sorted graph for inserting "(own)" and "(below threshold)"
     596             :   !> \param unit        The unit number on which to print, default stdout
     597             :   !>
     598       48624 :   subroutine timer_print(self, name1, name2, name3, name4, threshold, is_sorted, unit)
     599             :     class(timer_t), intent(in), target :: self
     600             :     character(len=*), intent(in), optional :: name1, name2, name3, name4
     601             :     real(kind=rk), intent(in), optional :: threshold
     602             :     logical, intent(in), optional :: is_sorted
     603             :     integer, intent(in), optional :: unit
     604             : 
     605             :     integer :: unit_act
     606             : 
     607             :     type(node_t), pointer :: node
     608             :     character(len=64) :: format_spec
     609             : 
     610             :     ! I hate fortran's string handling
     611             :     character(len=name_length), parameter :: group = "Group"
     612             :     character(len=12), parameter :: seconds    = "         [s]"
     613             :     character(len=12), parameter :: fract      = "    fraction"
     614             :     character(len=12), parameter :: ram        = "  alloc. RAM"
     615             :     character(len=12), parameter :: vmem       = "   alloc. VM"
     616             :     character(len=12), parameter :: hwm        = "  alloc. HWM"
     617             :     character(len=12), parameter :: flop_rate  = "     Mflop/s"
     618             :     character(len=12), parameter :: flop_count = "       Mflop"
     619             :     character(len=12), parameter :: ldst       = "loads+stores"
     620             :     character(len=12), parameter :: bandwidth  = "  mem bandw."
     621             :     character(len=12), parameter :: ai         = "arithm. Int."
     622             :     character(len=12), parameter :: dash       = "============"
     623             : 
     624       48624 :     if (.not. self%active) then
     625           0 :       return
     626             :     endif
     627             : 
     628       48624 :     if (present(unit)) then
     629           0 :       unit_act = unit
     630             :     else
     631       48624 :       unit_act = output_unit
     632             :     endif
     633             : 
     634       48624 :     node => self%root
     635       48624 :     if (present(name1)) then
     636       48624 :       node => node%get_child(name1)
     637       48624 :       if (.not. associated(node)) then
     638           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name1)  // """"
     639           0 :         return
     640             :       endif
     641             :     end if
     642       48624 :     if (present(name2)) then
     643           0 :       node => node%get_child(name2)
     644           0 :       if (.not. associated(node)) then
     645           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name2)  // """"
     646           0 :         return
     647             :       endif
     648             :     end if
     649       48624 :     if (present(name3)) then
     650           0 :       node => node%get_child(name3)
     651           0 :       if (.not. associated(node)) then
     652           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name3)  // """"
     653           0 :         return
     654             :       endif
     655             :     end if
     656       48624 :     if (present(name4)) then
     657           0 :       node => node%get_child(name4)
     658           0 :       if (.not. associated(node)) then
     659           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name4)  // """"
     660           0 :         return
     661             :       endif
     662             :     end if
     663             : 
     664             :     ! I really do hate it ..
     665       48624 :     write(format_spec,'("("" /= "",a",i0,",2x,a12,1x,a12)")') name_length
     666       48624 :     write(unit_act, format_spec, advance='no') adjustl(group), seconds, fract
     667             : 
     668       48624 :     if (self%print_allocated_memory) then
     669           0 :       write(unit_act,'(1x,a12)',advance='no') ram
     670             :     endif
     671             : 
     672       48624 :     if (self%print_virtual_memory) then
     673           0 :       write(unit_act,'(1x,a12)',advance='no') vmem
     674             :     endif
     675             : 
     676       48624 :     if (self%print_max_allocated_memory) then
     677           0 :       write(unit_act,'(1x,a12)',advance='no') hwm
     678             :     endif
     679             : 
     680       48624 :     if (self%print_flop_count) then
     681           0 :       write(unit_act,'(1x,a12)',advance='no') flop_count
     682             :     endif
     683       48624 :     if (self%print_flop_rate) then
     684           0 :       write(unit_act,'(1x,a12)',advance='no') flop_rate
     685             :     endif
     686       48624 :     if (self%print_ldst) then
     687           0 :       write(unit_act,'(1x,a12)',advance='no') ldst
     688             :     endif
     689       48624 :     if (self%print_memory_bandwidth) then
     690           0 :       write(unit_act,'(1x,a12)',advance='no') bandwidth
     691             :     endif
     692       48624 :     if (self%print_ai) then
     693           0 :       write(unit_act,'(1x,a12)',advance='no') ai
     694             :     endif
     695             : 
     696       48624 :     write(unit_act,'(a)') ""
     697             : 
     698       48624 :     write(format_spec,'("("" |  "",a",i0,",1x,2(1x,a12))")') name_length
     699       48624 :     write(unit_act, format_spec, advance='no') "", dash, dash
     700             : 
     701       48624 :     if (self%print_allocated_memory) then
     702           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     703             :     endif
     704             : 
     705       48624 :     if (self%print_virtual_memory) then
     706           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     707             :     endif
     708             : 
     709       48624 :     if (self%print_max_allocated_memory) then
     710           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     711             :     endif
     712             : 
     713       48624 :     if (self%print_flop_count) then
     714           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     715             :     endif
     716       48624 :     if (self%print_flop_rate) then
     717           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     718             :     endif
     719       48624 :     if (self%print_ldst) then
     720           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     721             :     endif
     722       48624 :     if (self%print_memory_bandwidth) then
     723           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     724             :     endif
     725       48624 :     if (self%print_ai) then
     726           0 :       write(unit_act,'(1x,a12)',advance='no') dash
     727             :     endif
     728             : 
     729       48624 :     write(unit_act,'(a)') ""
     730             : 
     731       48624 :     call node%print_graph(0, threshold, is_sorted, unit=unit)
     732             : 
     733       48624 :   end subroutine
     734             : 
     735             :   !> Return the sum of all entries with a certain name below
     736             :   !> a given node. Specify the name with the last argument, the
     737             :   !> path to the starting point with the first few parameters
     738             :   !>
     739             :   !> \param name1, .., namei-1  The path to the starting node
     740             :   !> \param namei               The name of all sub-entries below this
     741             :   !>                            node which should be summed together
     742             :   !>
     743             :   !> For example timer%in_entries("foo", "bar", "parallel") returns
     744             :   !> the sum of all entries named "parallel" below the foo->bar node
     745             :   !>
     746           0 :   function timer_in_entries(self, name1, name2, name3, name4) result(s)
     747             :     use, intrinsic :: iso_fortran_env, only : error_unit
     748             :     class(timer_t), intent(in), target :: self
     749             :     character(len=*), intent(in) :: name1
     750             :     character(len=*), intent(in), optional :: name2, name3, name4
     751             :     real(kind=rk) :: s
     752             :     type(node_t), pointer :: node ! the starting node
     753             :     type(value_t) :: val
     754             :     character(len=name_length) :: name ! the name of the sections
     755             : 
     756           0 :     s = 0._rk
     757             : 
     758           0 :     if (.not. self%active) then
     759           0 :       return
     760             :     endif
     761             : 
     762           0 :     node => self%root
     763           0 :     name = name1
     764             : 
     765           0 :     if (present(name2)) then
     766           0 :       node => node%get_child(name1)
     767           0 :       if (.not. associated(node)) then
     768           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name1)  // """"
     769           0 :         return
     770             :       endif
     771           0 :       name = name2
     772             :     end if
     773           0 :     if (present(name3)) then
     774           0 :       node => node%get_child(name2)
     775           0 :       if (.not. associated(node)) then
     776           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name2)  // """"
     777           0 :         return
     778             :       endif
     779           0 :       name = name3
     780             :     end if
     781           0 :     if (present(name4)) then
     782           0 :       node => node%get_child(name3)
     783           0 :       if (.not. associated(node)) then
     784           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name3)  // """"
     785           0 :         return
     786             :       endif
     787           0 :       name = name4
     788             :     end if
     789             : 
     790           0 :     val = node%sum_of_children_with_name(name)
     791           0 :     s = real(val%micros, kind=rk) * 1e-6_rk
     792           0 :   end function
     793             : 
     794             :   !> Access a specific, already stopped entry of the graph by specifying the
     795             :   !> names of the nodes along the graph from the root node
     796             :   !>
     797             :   !> The result is only meaningfull if the entry was never appended by
     798             :   !> additional %start() calls.
     799             :   !>
     800       48384 :   function timer_get(self, name1, name2, name3, name4, name5, name6) result(s)
     801             :     class(timer_t), intent(in), target :: self
     802             :     ! this is clunky, but what can you do..
     803             :     character(len=*), intent(in), optional :: name1, name2, name3, name4, name5, name6
     804             :     real(kind=rk) :: s
     805             :     type(node_t), pointer :: node
     806             : 
     807       48384 :     s = 0._rk
     808             : 
     809       48384 :     if (.not. self%active) then
     810           0 :       return
     811             :     endif
     812             : 
     813       48384 :     node => self%root
     814       48384 :     if (present(name1)) then
     815       48384 :       node => node%get_child(name1)
     816       48384 :       if (.not. associated(node)) then
     817           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name1)  // """"
     818           0 :         return
     819             :       endif
     820             :     end if
     821       48384 :     if (present(name2)) then
     822       48384 :       node => node%get_child(name2)
     823       48384 :       if (.not. associated(node)) then
     824           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name2)  // """"
     825           0 :         return
     826             :       endif
     827             :     end if
     828       48384 :     if (present(name3)) then
     829           0 :       node => node%get_child(name3)
     830           0 :       if (.not. associated(node)) then
     831           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name3)  // """"
     832           0 :         return
     833             :       endif
     834             :     end if
     835       48384 :     if (present(name4)) then
     836           0 :       node => node%get_child(name4)
     837           0 :       if (.not. associated(node)) then
     838           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name4)  // """"
     839           0 :         return
     840             :       endif
     841             :     end if
     842       48384 :     if (present(name5)) then
     843           0 :       node => node%get_child(name5)
     844           0 :       if (.not. associated(node)) then
     845           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name5)  // """"
     846           0 :         return
     847             :       endif
     848             :     end if
     849       48384 :     if (present(name6)) then
     850           0 :       node => node%get_child(name6)
     851           0 :       if (.not. associated(node)) then
     852           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name6)  // """"
     853           0 :         return
     854             :       endif
     855             :     end if
     856       48384 :     if (node%is_running) then
     857           0 :       write(error_unit,'(a)') "Timer """ // trim(node%name) // """ not yet stopped"
     858           0 :       return
     859             :     endif
     860       48384 :     s = real(node%value%micros, kind=rk) * 1e-6_rk
     861       96768 :   end function
     862             : 
     863             :   !> Access a specific, not yet stopped entry of the graph by specifying the
     864             :   !> names of the nodes along the graph from the root node and return the
     865             :   !> seconds that have passed since the entry was created.
     866             :   !>
     867             :   !> The result is only meaningfull if the entry was never appended by
     868             :   !> additional %start() calls.
     869             :   !>
     870           0 :   function timer_since(self, name1, name2, name3, name4) result(s)
     871             :     class(timer_t), intent(in), target :: self
     872             :     character(len=*), intent(in), optional :: name1, name2, name3, name4
     873             :     real(kind=rk) :: s
     874             :     type(value_t) :: val
     875             :     type(node_t), pointer :: node
     876             : 
     877           0 :     s = 0._rk
     878             : 
     879           0 :     node => self%root
     880           0 :     if (present(name1)) then
     881           0 :       node => node%get_child(name1)
     882           0 :       if (.not. associated(node)) then
     883           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name1)  // """"
     884           0 :         return
     885             :       endif
     886             :     end if
     887           0 :     if (present(name2)) then
     888           0 :       node => node%get_child(name2)
     889           0 :       if (.not. associated(node)) then
     890           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name2)  // """"
     891           0 :         return
     892             :       endif
     893             :     end if
     894           0 :     if (present(name3)) then
     895           0 :       node => node%get_child(name3)
     896           0 :       if (.not. associated(node)) then
     897           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name3)  // """"
     898           0 :         return
     899             :       endif
     900             :     end if
     901           0 :     if (present(name4)) then
     902           0 :       node => node%get_child(name4)
     903           0 :       if (.not. associated(node)) then
     904           0 :         write(error_unit,'(a)') "Could not descend to """ // trim(name4)  // """"
     905           0 :         return
     906             :       endif
     907             :     end if
     908           0 :     if (node%is_running .neqv. .true.) then
     909           0 :       write(error_unit,'(a)') "Timer """ // trim(node%name) // """ already stopped"
     910           0 :       return
     911             :     endif
     912           0 :     val = node%value + node%now()
     913           0 :     s = real(val%micros, kind=rk) * 1e-6_rk
     914           0 :   end function
     915             : 
     916             :   !> Sort the graph on each level.
     917             :   !> Warning: This irrevocable destroys the old ordering.
     918             :   !>
     919           0 :   subroutine timer_sort(self)
     920             :     class(timer_t), intent(inout), target :: self
     921             :     type(node_t), pointer :: node
     922             : 
     923           0 :     call sort_nodes(self%root, node)
     924             : 
     925           0 :     node => self%root
     926           0 :     do while (associated(node))
     927           0 :       call node%sort_children()
     928           0 :       node => node%nextSibling
     929             :     enddo
     930           0 :   end subroutine
     931             : 
     932             : 
     933             : 
     934             :   ! Now methods of node_t:
     935             : 
     936             : 
     937             :   ! This is the function that actually returns the current timestamp and all other counters
     938   940576032 :   function node_now(self) result(val)
     939             :     use, intrinsic :: iso_c_binding
     940             :     class(node_t), intent(in) :: self
     941             :     type(value_t) :: val
     942             : 
     943             :     ! current time
     944   940576032 :     val%micros = microseconds_since_epoch()
     945             : 
     946   940576032 :     if (self%timer%record_allocated_memory) then
     947           0 :       val%rsssize = resident_set_size()
     948             :     endif
     949             : 
     950   940576032 :     if (self%timer%record_virtual_memory) then
     951           0 :       val%virtualmem = virtual_memory()
     952             :     endif
     953             : 
     954   940576032 :     if (self%timer%record_max_allocated_memory) then
     955           0 :       val%maxrsssize = max_resident_set_size()
     956             :     endif
     957             : 
     958             : #ifdef HAVE_LIBPAPI
     959             :     if (self%timer%record_flop_counts .or. self%timer%record_memory_bandwidth) then
     960             :       call papi_counters(val%flop_count, val%ldst)
     961             :     endif
     962             : #endif
     963  1881152064 :   end function
     964             : 
     965             : 
     966   470313936 :   subroutine node_start(self)
     967             :     class(node_t), intent(inout) :: self
     968             : 
     969             :     ! take the time
     970   470313936 :     self%value = self%value - self%now()
     971   470313936 :     self%is_running = .true.
     972   470313936 :   end subroutine
     973             : 
     974   470262096 :   subroutine node_stop(self)
     975             :     class(node_t), intent(inout) :: self
     976             : 
     977   470262096 :     self%count = self%count + 1
     978             : 
     979             :     ! take the time
     980   470262096 :     self%value = self%value + self%now()
     981   470262096 :     self%is_running = .false.
     982   470262096 :   end subroutine
     983             : 
     984     3266808 :   function node_get_value(self) result(val)
     985             :     class(node_t), intent(in) :: self
     986             :     type(value_t) :: val
     987     3266808 :     val = self%value
     988     3266808 :     if (self%is_running) then
     989             :       ! we have not finished, give time up to NOW
     990           0 :       val = val + self%now()
     991             :     endif
     992     6533616 :   end function
     993             : 
     994     2052228 :   function node_new_child(self, name) result(new)
     995             :     class(node_t), intent(inout), target :: self
     996             :     character(len=*), intent(in) :: name
     997             :     type(node_t), pointer :: new
     998             : 
     999     2052228 :     if (.not. associated(self%lastChild)) then
    1000     1105236 :       allocate(self%lastChild)
    1001     1105236 :       new => self%lastChild
    1002     1105236 :       self%firstChild => new
    1003             :     else
    1004      946992 :       allocate(self%lastChild%nextSibling)
    1005      946992 :       new => self%lastChild%nextSibling
    1006      946992 :       self%lastChild => new
    1007             :     endif
    1008             : 
    1009             :     select type (self)
    1010             :       type is (node_t)
    1011     2052228 :         new%parent => self
    1012             :       class default
    1013     2052228 :         stop "node_new_child(): This should not happen"
    1014             :     end select
    1015             : 
    1016     2052228 :     new%name = name
    1017     2052228 :     new%count = 0
    1018     2052228 :     new%timer => self%timer
    1019             : 
    1020     2052228 :     nullify(new%firstChild)
    1021     2052228 :     nullify(new%lastChild)
    1022     2052228 :     nullify(new%nextSibling)
    1023     4104456 :   end function
    1024             : 
    1025             : 
    1026  1809672036 :   function string_eq(str1, str2) result(eq)
    1027             :     character(len=name_length), intent(in) :: str1
    1028             :     character(len=*), intent(in) :: str2
    1029             :     logical :: eq
    1030  1809672036 :     eq = trim(str1) .eq. str2(1:min(len(trim(str2)), name_length))
    1031  3619344072 :   end function
    1032             : 
    1033   470407488 :   function node_get_child(self, name) result(child)
    1034             :     class(node_t), intent(in) :: self
    1035             :     character(len=*), intent(in) :: name
    1036             :     type(node_t), pointer :: child
    1037             : 
    1038   470407488 :     child => self%firstChild
    1039  1271992656 :     do while (associated(child))
    1040   869147844 :       if (string_eq(child%name, name)) then
    1041   468355260 :         return
    1042             :       endif
    1043   400792584 :       child => child%nextSibling
    1044             :     enddo
    1045     2052228 :     nullify(child)
    1046   472459716 :   end function
    1047             : 
    1048     2104068 :   recursive subroutine deallocate_node(entry)
    1049             :     type(node_t), intent(inout), pointer :: entry
    1050             :     type(node_t), pointer :: nextSibling
    1051             : 
    1052     2104068 :     if (associated(entry%firstChild)) then
    1053     1105236 :       call deallocate_node(entry%firstChild)
    1054             :     endif
    1055     2104068 :     nextSibling => entry%nextSibling
    1056     2104068 :     deallocate(entry)
    1057     2104068 :     nullify(entry)
    1058     2104068 :     if (associated(nextSibling)) then
    1059      946992 :       call deallocate_node(nextSibling)
    1060             :     endif
    1061     2104068 :   end subroutine
    1062             : 
    1063     1121352 :   function node_sum_of_children(self) result(sum_time)
    1064             :     class(node_t), intent(in) :: self
    1065             :     type(node_t), pointer :: cur_entry
    1066             :     type(value_t) :: sum_time
    1067             : 
    1068     1121352 :     cur_entry => self%firstChild
    1069     3266808 :     do while (associated(cur_entry))
    1070     1072728 :       sum_time = sum_time + cur_entry%get_value()
    1071     1072728 :       cur_entry => cur_entry%nextSibling
    1072             :     enddo
    1073     2242704 :   end function
    1074             : 
    1075           0 :   recursive function node_sum_of_children_with_name(self, name) result(sum_time)
    1076             :     class(node_t), intent(in) :: self
    1077             :     character(len=*), intent(in) :: name
    1078             :     type(node_t), pointer :: cur_entry
    1079             :     type(value_t) :: sum_time
    1080             : 
    1081           0 :     cur_entry => self%firstChild
    1082           0 :     do while (associated(cur_entry))
    1083           0 :       if (string_eq(cur_entry%name, name)) then
    1084           0 :         sum_time = sum_time + cur_entry%value
    1085             :       else
    1086           0 :         sum_time = sum_time + cur_entry%sum_of_children_with_name(name)
    1087             :       endif
    1088           0 :       cur_entry => cur_entry%nextSibling
    1089             :     enddo
    1090           0 :   end function
    1091             : 
    1092     1121352 :   function node_sum_of_children_below(self, threshold) result(sum_time)
    1093             :     class(node_t), intent(in) :: self
    1094             :     real(kind=rk), intent(in), optional :: threshold
    1095             :     type(node_t), pointer :: cur_entry
    1096             :     type(value_t) :: sum_time, cur_value
    1097             : 
    1098     1121352 :     if (.not. present(threshold)) then
    1099     1121352 :       return
    1100             :     endif
    1101             : 
    1102           0 :     cur_entry => self%firstChild
    1103             : 
    1104           0 :     do while (associated(cur_entry))
    1105           0 :       cur_value = cur_entry%get_value()
    1106           0 :       if (cur_value%micros * 1e-6_rk < threshold) then
    1107           0 :         sum_time = sum_time + cur_value
    1108             :       endif
    1109           0 :       cur_entry => cur_entry%nextSibling
    1110             :     enddo
    1111     1121352 :   end function
    1112             : 
    1113           0 :   subroutine insert_into_sorted_list(head, node)
    1114             :     type(node_t), pointer, intent(inout) :: head
    1115             :     type(node_t), target, intent(inout)    :: node
    1116             :     type(node_t), pointer :: cur
    1117             : 
    1118           0 :     if (node%value%micros >= head%value%micros) then
    1119           0 :       node%nextSibling => head
    1120           0 :       head => node
    1121           0 :       return
    1122             :     endif
    1123             : 
    1124           0 :     cur => head
    1125           0 :     do while (associated(cur%nextSibling))
    1126           0 :       if (cur%value%micros > node%value%micros .and. node%value%micros >= cur%nextSibling%value%micros) then
    1127           0 :         node%nextSibling => cur%nextSibling
    1128           0 :         cur%nextSibling => node
    1129           0 :         return
    1130             :       endif
    1131           0 :       cur => cur%nextSibling
    1132             :     end do
    1133             : 
    1134             :     ! node has to be appended at the end
    1135           0 :     cur%nextSibling => node
    1136           0 :     node%nextSibling => NULL()
    1137             :   end subroutine
    1138             : 
    1139           0 :   subroutine remove_from_list(head, node)
    1140             :     type(node_t), pointer, intent(inout) :: head
    1141             :     type(node_t), pointer, intent(in)    :: node
    1142             :     type(node_t), pointer :: cur
    1143             : 
    1144           0 :     if (associated(head,node)) then
    1145           0 :       head => head%nextSibling
    1146           0 :       return
    1147             :     endif
    1148             : 
    1149           0 :     cur => head
    1150           0 :     do while (associated(cur%nextSibling))
    1151           0 :       if (associated(cur%nextSibling,node)) then
    1152           0 :         cur%nextSibling => cur%nextSibling%nextSibling
    1153           0 :         return
    1154             :       endif
    1155           0 :       cur => cur%nextSibling
    1156             :     end do
    1157             :   end subroutine
    1158             : 
    1159     1121352 :   subroutine node_print(self, indent_level, total, unit)
    1160             :     class(node_t), intent(inout) :: self
    1161             :     integer, intent(in) :: indent_level
    1162             :     type(value_t), intent(in) :: total
    1163             :     type(value_t) :: val
    1164             :     integer, intent(in) :: unit
    1165             :     character(len=name_length) :: name, suffix
    1166             : 
    1167     1121352 :     if (self%is_running) then
    1168           0 :       name = trim(self%name) // " (running)"
    1169             :     else
    1170     1121352 :       name = self%name
    1171             :     endif
    1172             : 
    1173     1121352 :     if (self%count > 1) then
    1174      442776 :       write(suffix, '(" (",i0,"x)")') self%count
    1175      442776 :       name = trim(name) // " " // trim(suffix)
    1176             :     endif
    1177             : 
    1178     1121352 :     if (self%is_running) then
    1179           0 :       val = self%value + self%now()
    1180             :     else
    1181     1121352 :       val = self%value
    1182             :     endif
    1183     1121352 :     call print_value(val, self%timer, indent_level, name, total, unit)
    1184     2242704 :   end subroutine
    1185             : 
    1186     1121352 :   recursive subroutine node_print_graph(self, indent_level, threshold, is_sorted, total, unit)
    1187             :     use, intrinsic :: iso_fortran_env, only : output_unit
    1188             :     class(node_t), intent(inout) :: self
    1189             :     integer, intent(in) :: indent_level
    1190             :     real(kind=rk), intent(in), optional :: threshold
    1191             :     logical, intent(in), optional :: is_sorted
    1192             :     type(value_t), intent(in), optional :: total
    1193             :     integer, intent(in), optional :: unit
    1194             : 
    1195             :     type(node_t), pointer :: node
    1196             :     integer :: i
    1197             :     type(value_t) :: cur_value, node_value, own_value, below_threshold_value, total_act
    1198             :     type(node_t), pointer :: own_node, threshold_node
    1199             :     real(kind=rk) :: threshold_act
    1200             :     logical :: is_sorted_act, print_own, print_threshold
    1201             :     integer :: unit_act
    1202             : 
    1203     1121352 :     nullify(own_node)
    1204     1121352 :     nullify(threshold_node)
    1205             : 
    1206     1121352 :     if (present(threshold)) then
    1207           0 :       threshold_act = threshold
    1208             :     else
    1209     1121352 :       threshold_act = 0
    1210             :     endif
    1211             : 
    1212     1121352 :     if (present(is_sorted)) then
    1213           0 :       is_sorted_act = is_sorted
    1214             :     else
    1215     1121352 :       is_sorted_act = .false.
    1216             :     endif
    1217             : 
    1218     1121352 :     cur_value = self%get_value()
    1219             : 
    1220     1121352 :     if (present(total)) then
    1221     1072728 :       total_act = total
    1222             :     else
    1223       48624 :       total_act = cur_value
    1224             :     endif
    1225             : 
    1226     1121352 :     if (present(unit)) then
    1227     1072728 :       unit_act = unit
    1228             :     else
    1229       48624 :       unit_act = output_unit
    1230             :     endif
    1231             : 
    1232     1121352 :     call self%print(indent_level, total_act, unit_act)
    1233             : 
    1234     1121352 :     own_value = cur_value - self%sum_of_children()
    1235     1121352 :     below_threshold_value = self%sum_of_children_below(threshold)
    1236             : 
    1237     1121352 :     print_own = associated(self%firstChild)
    1238     1121352 :     print_threshold = below_threshold_value%micros > 0
    1239             : 
    1240             :     ! Deal with "(own)" and "(below threshold)" entries
    1241     1121352 :     if (is_sorted_act) then
    1242             :       ! sort them in
    1243           0 :       if (print_own) then
    1244             :         ! insert an "(own)" node
    1245           0 :         allocate(own_node)
    1246           0 :         own_node%value = own_value
    1247           0 :         own_node%name = own
    1248           0 :         own_node%timer => self%timer
    1249           0 :         call insert_into_sorted_list(self%firstChild, own_node)
    1250             :       endif
    1251             : 
    1252           0 :       if (print_threshold) then
    1253             :         ! insert a "(below threshold)" node
    1254           0 :         allocate(threshold_node)
    1255           0 :         threshold_node%value = below_threshold_value
    1256           0 :         threshold_node%name = below
    1257           0 :         threshold_node%timer => self%timer
    1258           0 :         call insert_into_sorted_list(self%firstChild, threshold_node)
    1259             :       endif
    1260             : 
    1261             :     else
    1262             :       ! print them first
    1263     1121352 :       if (print_own) then
    1264      577428 :         call print_value(own_value, self%timer, indent_level + 1, own, cur_value, unit_act)
    1265             :       endif
    1266     1121352 :       if (print_threshold) then
    1267           0 :         call print_value(below_threshold_value, self%timer, indent_level + 1, below, cur_value, unit_act)
    1268             :       endif
    1269             :     endif
    1270             : 
    1271             :     ! print children
    1272     1121352 :     node => self%firstChild
    1273     3266808 :     do while (associated(node))
    1274     1072728 :       node_value = node%get_value()
    1275             :       if (node_value%micros * 1e-6_rk >= threshold_act &
    1276             :                 .or. associated(node, threshold_node) &
    1277     1072728 :                 .or. associated(node, own_node)) then
    1278     1072728 :         call node%print_graph(indent_level + 1, threshold, is_sorted, cur_value, unit_act)
    1279             :       endif
    1280     1072728 :       node => node%nextSibling
    1281             :     end do
    1282             : 
    1283     1121352 :     if (is_sorted_act) then
    1284             :       ! remove inserted dummy nodes again
    1285           0 :       if (print_own) then
    1286           0 :         call remove_from_list(self%firstChild, own_node)
    1287           0 :         deallocate(own_node)
    1288             :       endif
    1289           0 :       if (print_threshold) then
    1290           0 :         call remove_from_list(self%firstChild, threshold_node)
    1291           0 :         deallocate(threshold_node)
    1292             :       endif
    1293             :     endif
    1294             : 
    1295     2242704 :   end subroutine
    1296             : 
    1297             :   ! In-place sort a node_t linked list and return the first and last element,
    1298           0 :   subroutine sort_nodes(head, tail)
    1299             :     type(node_t), pointer, intent(inout) :: head, tail
    1300             : 
    1301             :     type(node_t), pointer :: p, q, e
    1302             :     type(value_t) :: p_val, q_val
    1303             :     integer :: insize, nmerges, psize, qsize, i
    1304             : 
    1305           0 :     if (.not. associated(head)) then
    1306           0 :       nullify(tail)
    1307           0 :       return
    1308             :     endif
    1309             : 
    1310           0 :     insize = 1
    1311             : 
    1312           0 :     do while (.true.)
    1313           0 :       p => head
    1314           0 :       nullify(head)
    1315           0 :       nullify(tail)
    1316           0 :       nmerges = 0
    1317             : 
    1318           0 :       do while(associated(p))
    1319           0 :         nmerges = nmerges + 1
    1320           0 :         q => p
    1321           0 :         psize = 0
    1322           0 :         do i = 1, insize
    1323           0 :           psize = psize + 1
    1324           0 :           q => q%nextSibling
    1325           0 :           if (.not. associated(q)) then
    1326           0 :             exit
    1327             :           endif
    1328             :         end do
    1329             : 
    1330           0 :         qsize = insize
    1331             : 
    1332           0 :         do while (psize > 0 .or. (qsize > 0 .and. associated(q)))
    1333           0 :           if (psize == 0) then
    1334           0 :             e => q
    1335           0 :             q => q%nextSibling
    1336           0 :             qsize = qsize - 1
    1337             : 
    1338           0 :           else if (qsize == 0 .or. (.not. associated(q))) then
    1339           0 :             e => p;
    1340           0 :             p => p%nextSibling
    1341           0 :             psize = psize - 1
    1342             :           else
    1343           0 :             p_val = p%get_value()
    1344           0 :             q_val = q%get_value()
    1345           0 :             if (p_val%micros >= q_val%micros) then
    1346           0 :               e => p
    1347           0 :               p => p%nextSibling
    1348           0 :               psize = psize - 1
    1349             : 
    1350             :             else
    1351           0 :               e => q
    1352           0 :               q => q%nextSibling
    1353           0 :               qsize = qsize - 1
    1354             : 
    1355             :             end if
    1356             :           end if
    1357             : 
    1358           0 :           if (associated(tail)) then
    1359           0 :             tail%nextSibling => e
    1360             :           else
    1361           0 :             head => e
    1362             :           endif
    1363           0 :           tail => e
    1364             : 
    1365             :         end do
    1366             : 
    1367           0 :         p => q
    1368             : 
    1369             :       end do
    1370             : 
    1371           0 :       nullify(tail%nextSibling)
    1372             : 
    1373           0 :       if (nmerges <= 1) then
    1374           0 :         return
    1375             :       endif
    1376             : 
    1377           0 :       insize = insize * 2
    1378             : 
    1379             :     end do
    1380           0 :   end subroutine
    1381             : 
    1382             : 
    1383           0 :   recursive subroutine node_sort_children(self)
    1384             :     class(node_t), intent(inout) :: self
    1385             :     type(node_t), pointer :: node
    1386             : 
    1387           0 :     call sort_nodes(self%firstChild, self%lastChild)
    1388             : 
    1389           0 :     node => self%firstChild
    1390           0 :     do while (associated(node))
    1391           0 :       call node%sort_children()
    1392           0 :       node => node%nextSibling
    1393             :     enddo
    1394           0 :   end subroutine
    1395             : 
    1396     1698780 :   subroutine print_value(value, timer, indent_level, label, total, unit)
    1397             :     type(value_t), intent(in) :: value
    1398             :     type(timer_t), intent(in) :: timer
    1399             :     integer, intent(in) :: indent_level
    1400             :     character(len=name_length), intent(in) :: label
    1401             :     type(value_t), intent(in) :: total
    1402             :     integer, intent(in) :: unit
    1403             : 
    1404             :     character(len=64) :: format_spec
    1405             : 
    1406     1698780 :     write(format_spec,'("(",i0,"x,""|_ "",a",i0,",2x,f12.6,1x,f12.3)")') indent_level * 2 + 1, name_length
    1407             :     write(unit,format_spec,advance='no') &
    1408     1698780 :       label, &
    1409     1698780 :       real(value%micros, kind=rk) * 1e-6_rk, &
    1410     3397560 :       real(value%micros, kind=rk) / real(total%micros, kind=rk)
    1411             : 
    1412     1698780 :     if (timer%print_allocated_memory) then
    1413             :       write(unit,'(1x,a12)',advance='no') &
    1414           0 :         nice_format(real(value%rsssize, kind=C_DOUBLE))
    1415             :     endif
    1416             : 
    1417     1698780 :     if (timer%print_virtual_memory) then
    1418             :       write(unit,'(1x,a12)',advance='no') &
    1419           0 :         nice_format(real(value%virtualmem, kind=C_DOUBLE))
    1420             :     endif
    1421             : 
    1422     1698780 :     if (timer%print_max_allocated_memory) then
    1423             :       write(unit,'(1x,a12)',advance='no') &
    1424           0 :         nice_format(real(value%maxrsssize, kind=C_DOUBLE))
    1425             :     endif
    1426             : 
    1427     1698780 :     if (timer%print_flop_count) then
    1428           0 :       write(unit,'(1x,f12.2)',advance='no') real(value%flop_count, kind=rk) / 1e6_rk
    1429             :     endif
    1430     1698780 :     if (timer%print_flop_rate) then
    1431           0 :       write(unit,'(1x,f12.2)',advance='no') real(value%flop_count, kind=rk) / value%micros
    1432             :     endif
    1433     1698780 :     if (timer%print_ldst) then
    1434           0 :       write(unit,'(1x,a12)',advance='no') nice_format(real(value%ldst, kind=rk))
    1435             :     endif
    1436     1698780 :     if (timer%print_memory_bandwidth) then
    1437           0 :       write(unit,'(1x,a12)',advance='no') nice_format(real(value%ldst*timer%bytes_per_ldst, kind=rk) / (value%micros * 1e-6_rk))
    1438             :     endif
    1439     1698780 :     if (timer%print_ai) then
    1440           0 :       write(unit,'(1x,f12.4)',advance='no') real(value%flop_count, kind=rk) / value%ldst / timer%bytes_per_ldst
    1441             :     endif
    1442             : 
    1443     1698780 :     write(unit,'(a)') ""
    1444     1698780 :   end subroutine
    1445             : 
    1446           0 :   pure elemental function nice_format(number) result(string)
    1447             :     real(kind=C_DOUBLE), intent(in) :: number
    1448             :     character(len=12) :: string
    1449             :     real(kind=C_DOUBLE), parameter :: &
    1450             :         kibi = 2.0_C_DOUBLE**10, &
    1451             :         mebi = 2.0_C_DOUBLE**20, &
    1452             :         gibi = 2.0_C_DOUBLE**30, &
    1453             :         tebi = 2.0_C_DOUBLE**40, &
    1454             :         pebi = 2.0_C_DOUBLE**50
    1455             : 
    1456           0 :     if (abs(number) >= pebi) then
    1457           0 :       write(string,'(es12.2)') number
    1458           0 :     else if (abs(number) >= tebi) then
    1459           0 :       write(string,'(f9.2,'' Ti'')') number / tebi
    1460           0 :     else if (abs(number) >= gibi) then
    1461           0 :       write(string,'(f9.2,'' Gi'')') number / gibi
    1462           0 :     else if (abs(number) >= mebi) then
    1463           0 :       write(string,'(f9.2,'' Mi'')') number / mebi
    1464           0 :     else if (abs(number) >= kibi) then
    1465           0 :       write(string,'(f9.2,'' ki'')') number / kibi
    1466             :     else
    1467           0 :       write(string,'(f12.2)') number
    1468             :     endif
    1469           0 :   end function
    1470             : 
    1471             : 
    1472     4104456 : end module

Generated by: LCOV version 1.12