Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
7 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
8 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
9 : ! Informatik,
10 : ! - Technische Universität München, Lehrstuhl für Informatik mit
11 : ! Schwerpunkt Wissenschaftliches Rechnen ,
12 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
13 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
14 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
15 : ! and
16 : ! - IBM Deutschland GmbH
17 : !
18 : ! This particular source code file contains additions, changes and
19 : ! enhancements authored by Intel Corporation which is not part of
20 : ! the ELPA consortium.
21 : !
22 : ! More information can be found here:
23 : ! http://elpa.mpcdf.mpg.de/
24 : !
25 : ! ELPA is free software: you can redistribute it and/or modify
26 : ! it under the terms of the version 3 of the license of the
27 : ! GNU Lesser General Public License as published by the Free
28 : ! Software Foundation.
29 : !
30 : ! ELPA is distributed in the hope that it will be useful,
31 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
32 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
33 : ! GNU Lesser General Public License for more details.
34 : !
35 : ! You should have received a copy of the GNU Lesser General Public License
36 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
37 : !
38 : ! ELPA reflects a substantial effort on the part of the original
39 : ! ELPA consortium, and we ask you to respect the spirit of the
40 : ! license that we chose: i.e., please contribute any changes you
41 : ! may have back to the original ELPA library distribution, and keep
42 : ! any derivatives of ELPA under the same license that we chose for
43 : ! the original distribution, the GNU Lesser General Public License.
44 : !
45 : !
46 : ! ELPA1 -- Faster replacements for ScaLAPACK symmetric eigenvalue routines
47 : !
48 : ! Copyright of the original code rests with the authors inside the ELPA
49 : ! consortium. The copyright of any additional modifications shall rest
50 : ! with their original authors, but shall adhere to the licensing terms
51 : ! distributed along with the original code in the file "COPYING".
52 :
53 : #include "../general/sanity.F90"
54 :
55 : use precision
56 : use elpa1_compute
57 : use elpa_utilities
58 : use elpa_mpi
59 : use elpa_abstract_impl
60 : implicit none
61 : #include "../general/precision_kinds.F90"
62 : class(elpa_abstract_impl_t), intent(inout) :: obj
63 : integer(kind=ik) :: na, lda, nblk, matrixCols, mpi_comm_rows, mpi_comm_cols
64 : #ifdef USE_ASSUMED_SIZE
65 : MATH_DATATYPE(kind=rck) :: a(obj%local_nrows,*)
66 : #else
67 : MATH_DATATYPE(kind=rck) :: a(obj%local_nrows,obj%local_ncols)
68 : #endif
69 :
70 : integer(kind=ik) :: my_prow, my_pcol, np_rows, np_cols, mpierr
71 : integer(kind=ik) :: l_cols, l_rows, l_col1, l_row1, l_colx, l_rowx
72 : integer(kind=ik) :: n, nc, i, info, ns, nb
73 864 : MATH_DATATYPE(kind=rck), allocatable :: tmp1(:), tmp2(:,:), tmat1(:,:), tmat2(:,:)
74 : logical :: wantDebug
75 : logical :: success
76 : integer(kind=ik) :: istat, debug, error
77 : character(200) :: errorMessage
78 :
79 : call obj%timer%start("elpa_invert_trm_&
80 : &MATH_DATATYPE&
81 : &_&
82 : &PRECISION&
83 864 : &")
84 :
85 864 : na = obj%na
86 864 : lda = obj%local_nrows
87 864 : nblk = obj%nblk
88 864 : matrixCols = obj%local_ncols
89 :
90 864 : call obj%get("mpi_comm_rows",mpi_comm_rows,error)
91 864 : if (error .ne. ELPA_OK) then
92 0 : print *,"Error getting option. Aborting..."
93 0 : stop
94 : endif
95 864 : call obj%get("mpi_comm_cols",mpi_comm_cols,error)
96 864 : if (error .ne. ELPA_OK) then
97 0 : print *,"Error getting option. Aborting..."
98 0 : stop
99 : endif
100 :
101 864 : call obj%get("debug", debug,error)
102 864 : if (error .ne. ELPA_OK) then
103 0 : print *,"Error getting option. Aborting..."
104 0 : stop
105 : endif
106 864 : if (debug == 1) then
107 864 : wantDebug = .true.
108 : else
109 0 : wantDebug = .true.
110 : endif
111 864 : call obj%timer%start("mpi_communication")
112 864 : call mpi_comm_rank(mpi_comm_rows,my_prow,mpierr)
113 864 : call mpi_comm_size(mpi_comm_rows,np_rows,mpierr)
114 864 : call mpi_comm_rank(mpi_comm_cols,my_pcol,mpierr)
115 864 : call mpi_comm_size(mpi_comm_cols,np_cols,mpierr)
116 864 : call obj%timer%stop("mpi_communication")
117 864 : success = .true.
118 :
119 864 : l_rows = local_index(na, my_prow, np_rows, nblk, -1) ! Local rows of a
120 864 : l_cols = local_index(na, my_pcol, np_cols, nblk, -1) ! Local cols of a
121 :
122 864 : allocate(tmp1(nblk*nblk), stat=istat, errmsg=errorMessage)
123 864 : if (istat .ne. 0) then
124 : print *,"elpa_invert_trm_&
125 : &MATH_DATATYPE&
126 0 : &: error when allocating tmp1 "//errorMessage
127 0 : stop 1
128 : endif
129 :
130 864 : allocate(tmp2(nblk,nblk), stat=istat, errmsg=errorMessage)
131 864 : if (istat .ne. 0) then
132 : print *,"elpa_invert_trm_&
133 : &MATH_DATATYPE&
134 0 : &: error when allocating tmp2 "//errorMessage
135 0 : stop 1
136 : endif
137 :
138 864 : tmp1 = 0
139 864 : tmp2 = 0
140 :
141 864 : allocate(tmat1(l_rows,nblk), stat=istat, errmsg=errorMessage)
142 864 : if (istat .ne. 0) then
143 : print *,"elpa_invert_trm_&
144 : &MATH_DATATYPE&
145 0 : &: error when allocating tmat1 "//errorMessage
146 0 : stop 1
147 : endif
148 :
149 864 : allocate(tmat2(nblk,l_cols), stat=istat, errmsg=errorMessage)
150 864 : if (istat .ne. 0) then
151 : print *,"elpa_invert_trm_&
152 : &MATH_DATATYPE&
153 0 : &: error when allocating tmat2 "//errorMessage
154 0 : stop 1
155 : endif
156 :
157 864 : tmat1 = 0
158 864 : tmat2 = 0
159 :
160 :
161 864 : ns = ((na-1)/nblk)*nblk + 1
162 :
163 9504 : do n = ns,1,-nblk
164 :
165 8640 : l_row1 = local_index(n, my_prow, np_rows, nblk, +1)
166 8640 : l_col1 = local_index(n, my_pcol, np_cols, nblk, +1)
167 :
168 8640 : nb = nblk
169 8640 : if (na-n+1 < nblk) nb = na-n+1
170 :
171 8640 : l_rowx = local_index(n+nb, my_prow, np_rows, nblk, +1)
172 8640 : l_colx = local_index(n+nb, my_pcol, np_cols, nblk, +1)
173 :
174 8640 : if (my_prow==prow(n, nblk, np_rows)) then
175 :
176 5760 : if (my_pcol==pcol(n, nblk, np_cols)) then
177 5760 : call obj%timer%start("blas")
178 :
179 5760 : call PRECISION_TRTRI('U', 'N', nb, a(l_row1,l_col1), lda, info)
180 :
181 5760 : call obj%timer%stop("blas")
182 :
183 5760 : if (info/=0) then
184 0 : if (wantDebug) write(error_unit,*) "elpa_invert_trm_&
185 : &MATH_DATATYPE&
186 :
187 : #if REALCASE == 1
188 0 : &: Error in DTRTRI"
189 : #endif
190 : #if COMPLEXCASE == 1
191 0 : &: Error in ZTRTRI"
192 : #endif
193 :
194 0 : success = .false.
195 : call obj%timer%stop("elpa_invert_trm_&
196 : &MATH_DATATYPE&
197 : &_&
198 : &PRECISION&
199 0 : &")
200 0 : return
201 : endif
202 :
203 5760 : nc = 0
204 92160 : do i=1,nb
205 86400 : tmp1(nc+1:nc+i) = a(l_row1:l_row1+i-1,l_col1+i-1)
206 86400 : nc = nc+i
207 : enddo
208 : endif
209 : #ifdef WITH_MPI
210 2880 : call obj%timer%start("mpi_communication")
211 : call MPI_Bcast(tmp1, nb*(nb+1)/2, MPI_MATH_DATATYPE_PRECISION, &
212 2880 : pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
213 2880 : call obj%timer%stop("mpi_communication")
214 : #endif /* WITH_MPI */
215 5760 : nc = 0
216 92160 : do i=1,nb
217 86400 : tmp2(1:i,i) = tmp1(nc+1:nc+i)
218 86400 : nc = nc+i
219 : enddo
220 :
221 5760 : call obj%timer%start("blas")
222 5760 : if (l_cols-l_colx+1>0) &
223 : call PRECISION_TRMM('L', 'U', 'N', 'N', nb, l_cols-l_colx+1, ONE, &
224 5184 : tmp2, ubound(tmp2,dim=1), a(l_row1,l_colx), lda)
225 5760 : call obj%timer%stop("blas")
226 5760 : if (l_colx<=l_cols) tmat2(1:nb,l_colx:l_cols) = a(l_row1:l_row1+nb-1,l_colx:l_cols)
227 5760 : if (my_pcol==pcol(n, nblk, np_cols)) tmat2(1:nb,l_col1:l_col1+nb-1) = tmp2(1:nb,1:nb) ! tmp2 has the lower left triangle 0
228 :
229 : endif
230 :
231 8640 : if (l_row1>1) then
232 7488 : if (my_pcol==pcol(n, nblk, np_cols)) then
233 7488 : tmat1(1:l_row1-1,1:nb) = a(1:l_row1-1,l_col1:l_col1+nb-1)
234 7488 : a(1:l_row1-1,l_col1:l_col1+nb-1) = 0
235 : endif
236 :
237 80064 : do i=1,nb
238 : #ifdef WITH_MPI
239 72576 : call obj%timer%start("mpi_communication")
240 : call MPI_Bcast(tmat1(1,i), l_row1-1, MPI_MATH_DATATYPE_PRECISION, &
241 72576 : pcol(n, nblk, np_cols), mpi_comm_cols, mpierr)
242 :
243 72576 : call obj%timer%stop("mpi_communication")
244 : #endif /* WITH_MPI */
245 : enddo
246 : endif
247 : #ifdef WITH_MPI
248 5760 : call obj%timer%start("mpi_communication")
249 5760 : if (l_cols-l_col1+1>0) &
250 : call MPI_Bcast(tmat2(1,l_col1), (l_cols-l_col1+1)*nblk, MPI_MATH_DATATYPE_PRECISION, &
251 5760 : prow(n, nblk, np_rows), mpi_comm_rows, mpierr)
252 :
253 5760 : call obj%timer%stop("mpi_communication")
254 : #endif /* WITH_MPI */
255 :
256 8640 : call obj%timer%start("blas")
257 8640 : if (l_row1>1 .and. l_cols-l_col1+1>0) &
258 : call PRECISION_GEMM('N', 'N', l_row1-1, l_cols-l_col1+1, nb, -ONE, &
259 : tmat1, ubound(tmat1,dim=1), tmat2(1,l_col1), ubound(tmat2,dim=1), ONE, &
260 7488 : a(1,l_col1), lda)
261 :
262 8640 : call obj%timer%stop("blas")
263 :
264 : enddo
265 :
266 864 : deallocate(tmp1, tmp2, tmat1, tmat2, stat=istat, errmsg=errorMessage)
267 864 : if (istat .ne. 0) then
268 : print *,"elpa_invert_trm_&
269 : &MATH_DATATYPE&
270 0 : &: error when deallocating tmp1 "//errorMessage
271 0 : stop 1
272 : endif
273 :
274 : call obj%timer%stop("elpa_invert_trm_&
275 : &MATH_DATATYPE&
276 : &_&
277 : &PRECISION&
278 864 : &")
|