Line data Source code
1 : #if 0
2 : ! This file is part of ELPA.
3 : !
4 : ! The ELPA library was originally created by the ELPA consortium,
5 : ! consisting of the following organizations:
6 : !
7 : ! - Max Planck Computing and Data Facility (MPCDF), formerly known as
8 : ! Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
9 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
10 : ! Informatik,
11 : ! - Technische Universität München, Lehrstuhl für Informatik mit
12 : ! Schwerpunkt Wissenschaftliches Rechnen ,
13 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
14 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
15 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
16 : ! and
17 : ! - IBM Deutschland GmbH
18 : !
19 : !
20 : ! More information can be found here:
21 : ! http://elpa.mpcdf.mpg.de/
22 : !
23 : ! ELPA is free software: you can redistribute it and/or modify
24 : ! it under the terms of the version 3 of the license of the
25 : ! GNU Lesser General Public License as published by the Free
26 : ! Software Foundation.
27 : !
28 : ! ELPA is distributed in the hope that it will be useful,
29 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
30 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
31 : ! GNU Lesser General Public License for more details.
32 : !
33 : ! You should have received a copy of the GNU Lesser General Public License
34 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
35 : !
36 : ! ELPA reflects a substantial effort on the part of the original
37 : ! ELPA consortium, and we ask you to respect the spirit of the
38 : ! license that we chose: i.e., please contribute any changes you
39 : ! may have back to the original ELPA library distribution, and keep
40 : ! any derivatives of ELPA under the same license that we chose for
41 : ! the original distribution, the GNU Lesser General Public License.
42 : !
43 : ! This file was written by A. Marek, MPCDF
44 : #endif
45 :
46 : subroutine pack_row_&
47 : &MATH_DATATYPE&
48 : #ifdef WITH_OPENMP
49 : &_cpu_openmp_&
50 : #else
51 : &_cpu_&
52 : #endif
53 6081840 : &PRECISION &
54 6081840 : (obj, a, row, n, stripe_width, &
55 : #ifdef WITH_OPENMP
56 : stripe_count, max_threads, thread_width, l_nev)
57 : #else
58 : last_stripe_width, stripe_count)
59 : #endif
60 : use elpa_abstract_impl
61 : use precision
62 : implicit none
63 : class(elpa_abstract_impl_t), intent(inout) :: obj
64 :
65 : integer(kind=ik), intent(in) :: n, stripe_count, stripe_width
66 : #ifdef WITH_OPENMP
67 : integer(kind=ik), intent(in) :: max_threads, thread_width, l_nev
68 :
69 : #if REALCASE == 1
70 : real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:)
71 : #endif
72 : #if COMPLEXCASE == 1
73 : complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:,:)
74 : #endif
75 :
76 : #else /* WITH_OPENMP */
77 : integer(kind=ik), intent(in) :: last_stripe_width
78 : #if REALCASE == 1
79 : real(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:)
80 : #endif
81 : #if COMPLEXCASE == 1
82 : complex(kind=C_DATATYPE_KIND), intent(in) :: a(:,:,:)
83 : #endif
84 :
85 : #endif /* WITH_OPENMP */
86 :
87 : #if REALCASE == 1
88 : real(kind=C_DATATYPE_KIND) :: row(:)
89 : #endif
90 : #if COMPLEXCASE == 1
91 : complex(kind=C_DATATYPE_KIND) :: row(:)
92 : #endif
93 :
94 : integer(kind=ik) :: i, noff, nl
95 : #ifdef WITH_OPENMP
96 : integer(kind=ik) :: nt
97 : #endif
98 :
99 : call obj%timer%start("pack_row_&
100 : &MATH_DATATYPE&
101 : #ifdef WITH_OPENMP
102 : &_cpu_openmp" // &
103 : #else
104 : &_cpu" // &
105 : #endif
106 : &PRECISION_SUFFIX &
107 6081840 : )
108 :
109 : #ifdef WITH_OPENMP
110 6081840 : do nt = 1, max_threads
111 19334280 : do i = 1, stripe_count
112 16293360 : noff = (nt-1)*thread_width + (i-1)*stripe_width
113 16293360 : nl = min(stripe_width, nt*thread_width-noff, l_nev-noff)
114 16293360 : if (nl<=0) exit
115 16293360 : row(noff+1:noff+nl) = a(1:nl,n,i,nt)
116 : enddo
117 : enddo
118 : #else
119 19334280 : do i=1,stripe_count
120 16293360 : nl = merge(stripe_width, last_stripe_width, i<stripe_count)
121 16293360 : noff = (i-1)*stripe_width
122 16293360 : row(noff+1:noff+nl) = a(1:nl,n,i)
123 : enddo
124 : #endif
125 :
126 : call obj%timer%stop("pack_row_&
127 : &MATH_DATATYPE&
128 : #ifdef WITH_OPENMP
129 : &_cpu_openmp" // &
130 : #else
131 : &_cpu" // &
132 : #endif
133 : &PRECISION_SUFFIX &
134 6081840 : )
135 :
136 6081840 : end subroutine
137 :
138 : subroutine unpack_row_&
139 : &MATH_DATATYPE&
140 : #ifdef WITH_OPENMP
141 : &_cpu_openmp_&
142 : #else
143 : &_cpu_&
144 : #endif
145 5935200 : &PRECISION &
146 5935200 : (obj, a, row, n, &
147 : #ifdef WITH_OPENMP
148 : my_thread, &
149 : #endif
150 : stripe_count, &
151 : #ifdef WITH_OPENMP
152 : thread_width, &
153 : #endif
154 : stripe_width, &
155 : #ifdef WITH_OPENMP
156 : l_nev)
157 : #else
158 : last_stripe_width)
159 : #endif
160 : use elpa_abstract_impl
161 : use precision
162 : implicit none
163 : class(elpa_abstract_impl_t), intent(inout) :: obj
164 : integer(kind=ik), intent(in) :: n, stripe_count, stripe_width
165 :
166 : #ifdef WITH_OPENMP
167 : ! Private variables in OMP regions (my_thread) should better be in the argument list!
168 : integer(kind=ik), intent(in) :: thread_width, l_nev, my_thread
169 : #if REALCASE == 1
170 : real(kind=C_DATATYPE_KIND) :: a(:,:,:,:)
171 : #endif
172 : #if COMPLEXCASE == 1
173 : complex(kind=C_DATATYPE_KIND) :: a(:,:,:,:)
174 :
175 : #endif
176 : #else /* WITH_OPENMP */
177 : integer(kind=ik), intent(in) :: last_stripe_width
178 : #if REALCASE == 1
179 : real(kind=C_DATATYPE_KIND) :: a(:,:,:)
180 : #endif
181 : #if COMPLEXCASE == 1
182 : complex(kind=C_DATATYPE_KIND) :: a(:,:,:)
183 : #endif
184 :
185 : #endif /* WITH_OPENMP */
186 :
187 : #if REALCASE == 1
188 : real(kind=C_DATATYPE_KIND), intent(in) :: row(:)
189 : #endif
190 : #if COMPLEXCASE == 1
191 : complex(kind=C_DATATYPE_KIND), intent(in) :: row(:)
192 : #endif
193 : integer(kind=ik) :: i, noff, nl
194 :
195 : call obj%timer%start("unpack_row_&
196 : &MATH_DATATYPE&
197 : #ifdef WITH_OPENMP
198 : &_cpu_opemp" // &
199 : #else
200 : &_cpu" // &
201 : #endif
202 : &PRECISION_SUFFIX &
203 5935200 : )
204 :
205 37984800 : do i=1,stripe_count
206 : #ifdef WITH_OPENMP
207 16024800 : noff = (my_thread-1)*thread_width + (i-1)*stripe_width
208 16024800 : nl = min(stripe_width, my_thread*thread_width-noff, l_nev-noff)
209 16024800 : if( nl<= 0) exit
210 16024800 : a(1:nl,n,i,my_thread) = row(noff+1:noff+nl)
211 : #else
212 16024800 : nl = merge(stripe_width, last_stripe_width, i<stripe_count)
213 16024800 : noff = (i-1)*stripe_width
214 16024800 : a(1:nl,n,i) = row(noff+1:noff+nl)
215 : #endif
216 :
217 : enddo
218 :
219 : call obj%timer%stop("unpack_row_&
220 : &MATH_DATATYPE&
221 : #ifdef WITH_OPENMP
222 : &_cpu_opemp" // &
223 : #else
224 : &_cpu" // &
225 : #endif
226 : &PRECISION_SUFFIX &
227 5935200 : )
228 :
229 5935200 : end subroutine
230 :
|