Line data Source code
1 : #if 0
2 : ! This file is part of ELPA.
3 : !
4 : ! The ELPA library was originally created by the ELPA consortium,
5 : ! consisting of the following organizations:
6 : !
7 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
8 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
9 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
10 : ! Informatik,
11 : ! - Technische Universität München, Lehrstuhl für Informatik mit
12 : ! Schwerpunkt Wissenschaftliches Rechnen ,
13 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
16 : ! and
17 : ! - IBM Deutschland GmbH
18 : !
19 : !
20 : ! More information can be found here:
21 : ! http://elpa.mpcdf.mpg.de/
22 : !
23 : ! ELPA is free software: you can redistribute it and/or modify
24 : ! it under the terms of the version 3 of the license of the
25 : ! GNU Lesser General Public License as published by the Free
26 : ! Software Foundation.
27 : !
28 : ! ELPA is distributed in the hope that it will be useful,
29 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
30 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 : ! GNU Lesser General Public License for more details.
32 : !
33 : ! You should have received a copy of the GNU Lesser General Public License
34 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
35 : !
36 : ! ELPA reflects a substantial effort on the part of the original
37 : ! ELPA consortium, and we ask you to respect the spirit of the
38 : ! license that we chose: i.e., please contribute any changes you
39 : ! may have back to the original ELPA library distribution, and keep
40 : ! any derivatives of ELPA under the same license that we chose for
41 : ! the original distribution, the GNU Lesser General Public License.
42 : !
43 : ! Copyright of the original code rests with the authors inside the ELPA
44 : ! consortium. The copyright of any additional modifications shall rest
45 : ! with their original authors, but shall adhere to the licensing terms
46 : ! distributed along with the original code in the file "COPYING".
47 : ! Author: Andreas Marek, MPCDF
48 : #endif
49 :
50 : #include "config-f90.h"
51 : #include "../general/sanity.F90"
52 :
53 : subroutine elpa_transpose_vectors_&
54 : &MATH_DATATYPE&
55 : &_&
56 5551488 : &PRECISION &
57 5551488 : (obj, vmat_s, ld_s, comm_s, vmat_t, ld_t, comm_t, nvs, nvr, nvc, nblk)
58 :
59 : !-------------------------------------------------------------------------------
60 : ! This routine transposes an array of vectors which are distributed in
61 : ! communicator comm_s into its transposed form distributed in communicator comm_t.
62 : ! There must be an identical copy of vmat_s in every communicator comm_s.
63 : ! After this routine, there is an identical copy of vmat_t in every communicator comm_t.
64 : !
65 : ! vmat_s original array of vectors
66 : ! ld_s leading dimension of vmat_s
67 : ! comm_s communicator over which vmat_s is distributed
68 : ! vmat_t array of vectors in transposed form
69 : ! ld_t leading dimension of vmat_t
70 : ! comm_t communicator over which vmat_t is distributed
71 : ! nvs global index where to start in vmat_s/vmat_t
72 : ! Please note: this is kind of a hint, some values before nvs will be
73 : ! accessed in vmat_s/put into vmat_t
74 : ! nvr global length of vmat_s/vmat_t
75 : ! nvc number of columns in vmat_s/vmat_t
76 : ! nblk block size of block cyclic distribution
77 : !
78 : !-------------------------------------------------------------------------------
79 : use precision
80 : use elpa_abstract_impl
81 : #ifdef WITH_OPENMP
82 : use omp_lib
83 : #endif
84 : use elpa_mpi
85 :
86 : implicit none
87 : class(elpa_abstract_impl_t), intent(inout) :: obj
88 : integer(kind=ik), intent(in) :: ld_s, comm_s, ld_t, comm_t, nvs, nvr, nvc, nblk
89 : MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(in) :: vmat_s(ld_s,nvc)
90 : MATH_DATATYPE(kind=C_DATATYPE_KIND), intent(inout):: vmat_t(ld_t,nvc)
91 :
92 5551488 : MATH_DATATYPE(kind=C_DATATYPE_KIND), allocatable :: aux(:)
93 : integer(kind=ik) :: myps, mypt, nps, npt
94 : integer(kind=ik) :: n, lc, k, i, ips, ipt, ns, nl, mpierr
95 : integer(kind=ik) :: lcm_s_t, nblks_tot, nblks_comm, nblks_skip
96 : integer(kind=ik) :: auxstride
97 :
98 : call obj%timer%start("elpa_transpose_vectors_&
99 : &MATH_DATATYPE&
100 : &" // &
101 : &PRECISION_SUFFIX &
102 5551488 : )
103 :
104 5551488 : call obj%timer%start("mpi_communication")
105 5551488 : call mpi_comm_rank(comm_s,myps,mpierr)
106 5551488 : call mpi_comm_size(comm_s,nps ,mpierr)
107 5551488 : call mpi_comm_rank(comm_t,mypt,mpierr)
108 5551488 : call mpi_comm_size(comm_t,npt ,mpierr)
109 :
110 5551488 : call obj%timer%stop("mpi_communication")
111 : ! The basic idea of this routine is that for every block (in the block cyclic
112 : ! distribution), the processor within comm_t which owns the diagonal
113 : ! broadcasts its values of vmat_s to all processors within comm_t.
114 : ! Of course this has not to be done for every block separately, since
115 : ! the communictation pattern repeats in the global matrix after
116 : ! the least common multiple of (nps,npt) blocks
117 :
118 5551488 : lcm_s_t = least_common_multiple(nps,npt) ! least common multiple of nps, npt
119 :
120 5551488 : nblks_tot = (nvr+nblk-1)/nblk ! number of blocks corresponding to nvr
121 :
122 : ! Get the number of blocks to be skipped at the begin.
123 : ! This must be a multiple of lcm_s_t (else it is getting complicated),
124 : ! thus some elements before nvs will be accessed/set.
125 :
126 5551488 : nblks_skip = ((nvs-1)/(nblk*lcm_s_t))*lcm_s_t
127 :
128 5551488 : allocate(aux( ((nblks_tot-nblks_skip+lcm_s_t-1)/lcm_s_t) * nblk * nvc ))
129 : #ifdef WITH_OPENMP
130 5551488 : !$omp parallel private(lc, i, k, ns, nl, nblks_comm, auxstride, ips, ipt, n)
131 : #endif
132 14803968 : do n = 0, lcm_s_t-1
133 :
134 9252480 : ips = mod(n,nps)
135 9252480 : ipt = mod(n,npt)
136 :
137 9252480 : if(mypt == ipt) then
138 :
139 7394208 : nblks_comm = (nblks_tot-nblks_skip-n+lcm_s_t-1)/lcm_s_t
140 7394208 : auxstride = nblk * nblks_comm
141 : ! if(nblks_comm==0) cycle
142 7394208 : if (nblks_comm .ne. 0) then
143 7255968 : if(myps == ips) then
144 : ! k = 0
145 : #ifdef WITH_OPENMP
146 2729664 : !$omp do
147 : #endif
148 12012624 : do lc=1,nvc
149 218112816 : do i = nblks_skip+n, nblks_tot-1, lcm_s_t
150 190263936 : k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
151 190263936 : ns = (i/nps)*nblk ! local start of block i
152 279176544 : nl = min(nvr-i*nblk,nblk) ! length
153 1698064704 : aux(k+1:k+nl) = vmat_s(ns+1:ns+nl,lc)
154 : ! k = k+nblk
155 : enddo
156 : enddo
157 : endif
158 :
159 : #ifdef WITH_OPENMP
160 3627984 : !$omp barrier
161 2702736 : !$omp master
162 : #endif
163 :
164 : #ifdef WITH_MPI
165 5405472 : call obj%timer%start("mpi_communication")
166 :
167 : call MPI_Bcast(aux, nblks_comm*nblk*nvc, &
168 : #if REALCASE == 1
169 : MPI_REAL_PRECISION, &
170 : #endif
171 : #if COMPLEXCASE == 1
172 : MPI_COMPLEX_PRECISION, &
173 : #endif
174 5405472 : ips, comm_s, mpierr)
175 :
176 :
177 5405472 : call obj%timer%stop("mpi_communication")
178 : #endif /* WITH_MPI */
179 :
180 : #ifdef WITH_OPENMP
181 : !$omp end master
182 3627984 : !$omp barrier
183 :
184 3627984 : !$omp do
185 : #endif
186 : ! k = 0
187 15935376 : do lc=1,nvc
188 274365024 : do i = nblks_skip+n, nblks_tot-1, lcm_s_t
189 237442848 : k = (i - nblks_skip - n)/lcm_s_t * nblk + (lc - 1) * auxstride
190 237442848 : ns = (i/npt)*nblk ! local start of block i
191 348421176 : nl = min(nvr-i*nblk,nblk) ! length
192 2119408272 : vmat_t(ns+1:ns+nl,lc) = aux(k+1:k+nl)
193 : ! k = k+nblk
194 : enddo
195 : enddo
196 : endif
197 : endif
198 :
199 : enddo
200 : #ifdef WITH_OPENMP
201 : !$omp end parallel
202 : #endif
203 5551488 : deallocate(aux)
204 :
205 : call obj%timer%stop("elpa_transpose_vectors_&
206 : &MATH_DATATYPE&
207 : &" // &
208 : &PRECISION_SUFFIX &
209 5551488 : )
210 :
211 5551488 : end subroutine
212 :
|