LCOV - code coverage report
Current view: top level - src/elpa2 - elpa2_determine_workload.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 24 26 92.3 %
Date: 2018-01-10 09:29:53 Functions: 2 2 100.0 %

          Line data    Source code
       1             : !   This file is part of ELPA.
       2             : !
       3             : !    The ELPA library was originally created by the ELPA consortium,
       4             : !    consisting of the following organizations:
       5             : !
       6             : !    - Max Planck Computing and Data Facility (MPCDF), fomerly known as
       7             : !      Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       8             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
       9             : !      Informatik,
      10             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      11             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      12             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      13             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      14             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      15             : !      and
      16             : !    - IBM Deutschland GmbH
      17             : !
      18             : !    This particular source code file contains additions, changes and
      19             : !    enhancements authored by Intel Corporation which is not part of
      20             : !    the ELPA consortium.
      21             : !
      22             : !    More information can be found here:
      23             : !    http://elpa.mpcdf.mpg.de/
      24             : !
      25             : !    ELPA is free software: you can redistribute it and/or modify
      26             : !    it under the terms of the version 3 of the license of the
      27             : !    GNU Lesser General Public License as published by the Free
      28             : !    Software Foundation.
      29             : !
      30             : !    ELPA is distributed in the hope that it will be useful,
      31             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      32             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      33             : !    GNU Lesser General Public License for more details.
      34             : !
      35             : !    You should have received a copy of the GNU Lesser General Public License
      36             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      37             : !
      38             : !    ELPA reflects a substantial effort on the part of the original
      39             : !    ELPA consortium, and we ask you to respect the spirit of the
      40             : !    license that we chose: i.e., please contribute any changes you
      41             : !    may have back to the original ELPA library distribution, and keep
      42             : !    any derivatives of ELPA under the same license that we chose for
      43             : !    the original distribution, the GNU Lesser General Public License.
      44             : !
      45             : !
      46             : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
      47             : !
      48             : ! Copyright of the original code rests with the authors inside the ELPA
      49             : ! consortium. The copyright of any additional modifications shall rest
      50             : ! with their original authors, but shall adhere to the licensing terms
      51             : ! distributed along with the original code in the file "COPYING".
      52             : 
      53             : 
      54             : 
      55             : ! ELPA2 -- 2-stage solver for ELPA
      56             : !
      57             : ! Copyright of the original code rests with the authors inside the ELPA
      58             : ! consortium. The copyright of any additional modifications shall rest
      59             : ! with their original authors, but shall adhere to the licensing terms
      60             : ! distributed along with the original code in the file "COPYING".
      61             : 
      62             : #include "config-f90.h"
      63             : 
      64             : module elpa2_workload
      65             : 
      66             :   implicit none
      67             :   private
      68             : 
      69             :   public :: determine_workload
      70             :   public :: divide_band
      71             : 
      72             :   contains
      73     1164672 :     subroutine determine_workload(obj, na, nb, nprocs, limits)
      74             :       use elpa_abstract_impl
      75             :       use precision
      76             :       implicit none
      77             : 
      78             :       class(elpa_abstract_impl_t), intent(inout) :: obj
      79             :       integer(kind=ik), intent(in)  :: na, nb, nprocs
      80             :       integer(kind=ik), intent(out) :: limits(0:nprocs)
      81             : 
      82             :       integer(kind=ik)              :: i
      83             : 
      84     1164672 :       call obj%timer%start("determine_workload")
      85             : 
      86     1164672 :       if (na <= 0) then
      87       46296 :         limits(:) = 0
      88             : 
      89       46296 :         call obj%timer%stop("determine_workload")
      90       46296 :         return
      91             :       endif
      92             : 
      93     1118376 :       if (nb*nprocs > na) then
      94             :           ! there is not enough work for all
      95     1361400 :         do i = 0, nprocs
      96     1001472 :           limits(i) = min(na, i*nb)
      97             :         enddo
      98             :       else
      99     2713608 :          do i = 0, nprocs
     100     1955160 :            limits(i) = (i*na)/nprocs
     101             :          enddo
     102             :       endif
     103             : 
     104     1118376 :       call obj%timer%stop("determine_workload")
     105             :     end subroutine
     106             :     !---------------------------------------------------------------------------------------------------
     107             :     ! divide_band: sets the work distribution in band
     108             :     ! Proc n works on blocks block_limits(n)+1 .. block_limits(n+1)
     109             : 
     110      118620 :     subroutine divide_band(obj, nblocks_total, n_pes, block_limits)
     111             :       use precision
     112             :       use elpa_abstract_impl
     113             :       implicit none
     114             :       class(elpa_abstract_impl_t), intent(inout) :: obj
     115             :       integer(kind=ik), intent(in)  :: nblocks_total ! total number of blocks in band
     116             :       integer(kind=ik), intent(in)  :: n_pes         ! number of PEs for division
     117             :       integer(kind=ik), intent(out) :: block_limits(0:n_pes)
     118             : 
     119             :       integer(kind=ik)              :: n, nblocks, nblocks_left
     120             : 
     121      118620 :       call obj%timer%start("divide_band")
     122             : 
     123      118620 :       block_limits(0) = 0
     124      118620 :       if (nblocks_total < n_pes) then
     125             :         ! Not enough work for all: The first tasks get exactly 1 block
     126           0 :         do n=1,n_pes
     127           0 :           block_limits(n) = min(nblocks_total,n)
     128             :         enddo
     129             :       else
     130             :         ! Enough work for all. If there is no exact loadbalance,
     131             :         ! the LAST tasks get more work since they are finishing earlier!
     132      118620 :         nblocks = nblocks_total/n_pes
     133      118620 :         nblocks_left = nblocks_total - n_pes*nblocks
     134      300504 :         do n=1,n_pes
     135      181884 :           if (n<=n_pes-nblocks_left) then
     136      122844 :             block_limits(n) = block_limits(n-1) + nblocks
     137             :           else
     138       59040 :             block_limits(n) = block_limits(n-1) + nblocks + 1
     139             :           endif
     140             :         enddo
     141             :       endif
     142             : 
     143      118620 :       call obj%timer%stop("divide_band")
     144             : 
     145      118620 :     end subroutine
     146             : end module elpa2_workload

Generated by: LCOV version 1.12