Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), fomerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : ! This particular source code file contains additions, changes and
19 : ! enhancements authored by Intel Corporation which is not part of
20 : ! the ELPA consortium.
21 : !
22 : ! More information can be found here:
23 : ! http://elpa.mpcdf.mpg.de/
24 : !
25 : ! ELPA is free software: you can redistribute it and/or modify
26 : ! it under the terms of the version 3 of the license of the
27 : ! GNU Lesser General Public License as published by the Free
28 : ! Software Foundation.
29 : !
30 : ! ELPA is distributed in the hope that it will be useful,
31 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
32 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 : ! GNU Lesser General Public License for more details.
34 : !
35 : ! You should have received a copy of the GNU Lesser General Public License
36 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
37 : !
38 : ! ELPA reflects a substantial effort on the part of the original
39 : ! ELPA consortium, and we ask you to respect the spirit of the
40 : ! license that we chose: i.e., please contribute any changes you
41 : ! may have back to the original ELPA library distribution, and keep
42 : ! any derivatives of ELPA under the same license that we chose for
43 : ! the original distribution, the GNU Lesser General Public License.
44 : !
45 : !
46 : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
47 : !
48 : ! Copyright of the original code rests with the authors inside the ELPA
49 : ! consortium. The copyright of any additional modifications shall rest
50 : ! with their original authors, but shall adhere to the licensing terms
51 : ! distributed along with the original code in the file "COPYING".
52 :
53 :
54 :
55 : ! ELPA2 -- 2-stage solver for ELPA
56 : !
57 : ! Copyright of the original code rests with the authors inside the ELPA
58 : ! consortium. The copyright of any additional modifications shall rest
59 : ! with their original authors, but shall adhere to the licensing terms
60 : ! distributed along with the original code in the file "COPYING".
61 :
62 : #include "config-f90.h"
63 :
64 : module elpa2_workload
65 :
66 : implicit none
67 : private
68 :
69 : public :: determine_workload
70 : public :: divide_band
71 :
72 : contains
73 1164672 : subroutine determine_workload(obj, na, nb, nprocs, limits)
74 : use elpa_abstract_impl
75 : use precision
76 : implicit none
77 :
78 : class(elpa_abstract_impl_t), intent(inout) :: obj
79 : integer(kind=ik), intent(in) :: na, nb, nprocs
80 : integer(kind=ik), intent(out) :: limits(0:nprocs)
81 :
82 : integer(kind=ik) :: i
83 :
84 1164672 : call obj%timer%start("determine_workload")
85 :
86 1164672 : if (na <= 0) then
87 46296 : limits(:) = 0
88 :
89 46296 : call obj%timer%stop("determine_workload")
90 46296 : return
91 : endif
92 :
93 1118376 : if (nb*nprocs > na) then
94 : ! there is not enough work for all
95 1361400 : do i = 0, nprocs
96 1001472 : limits(i) = min(na, i*nb)
97 : enddo
98 : else
99 2713608 : do i = 0, nprocs
100 1955160 : limits(i) = (i*na)/nprocs
101 : enddo
102 : endif
103 :
104 1118376 : call obj%timer%stop("determine_workload")
105 : end subroutine
106 : !---------------------------------------------------------------------------------------------------
107 : ! divide_band: sets the work distribution in band
108 : ! Proc n works on blocks block_limits(n)+1 .. block_limits(n+1)
109 :
110 118620 : subroutine divide_band(obj, nblocks_total, n_pes, block_limits)
111 : use precision
112 : use elpa_abstract_impl
113 : implicit none
114 : class(elpa_abstract_impl_t), intent(inout) :: obj
115 : integer(kind=ik), intent(in) :: nblocks_total ! total number of blocks in band
116 : integer(kind=ik), intent(in) :: n_pes ! number of PEs for division
117 : integer(kind=ik), intent(out) :: block_limits(0:n_pes)
118 :
119 : integer(kind=ik) :: n, nblocks, nblocks_left
120 :
121 118620 : call obj%timer%start("divide_band")
122 :
123 118620 : block_limits(0) = 0
124 118620 : if (nblocks_total < n_pes) then
125 : ! Not enough work for all: The first tasks get exactly 1 block
126 0 : do n=1,n_pes
127 0 : block_limits(n) = min(nblocks_total,n)
128 : enddo
129 : else
130 : ! Enough work for all. If there is no exact loadbalance,
131 : ! the LAST tasks get more work since they are finishing earlier!
132 118620 : nblocks = nblocks_total/n_pes
133 118620 : nblocks_left = nblocks_total - n_pes*nblocks
134 300504 : do n=1,n_pes
135 181884 : if (n<=n_pes-nblocks_left) then
136 122844 : block_limits(n) = block_limits(n-1) + nblocks
137 : else
138 59040 : block_limits(n) = block_limits(n-1) + nblocks + 1
139 : endif
140 : enddo
141 : endif
142 :
143 118620 : call obj%timer%stop("divide_band")
144 :
145 118620 : end subroutine
146 : end module elpa2_workload
|