Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
7 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
8 : ! Informatik,
9 : ! - Technische Universität München, Lehrstuhl für Informatik mit
10 : ! Schwerpunkt Wissenschaftliches Rechnen ,
11 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
14 : ! and
15 : ! - IBM Deutschland GmbH
16 : !
17 : !
18 : ! More information can be found here:
19 : ! http://elpa.rzg.mpg.de/
20 : !
21 : ! ELPA is free software: you can redistribute it and/or modify
22 : ! it under the terms of the version 3 of the license of the
23 : ! GNU Lesser General Public License as published by the Free
24 : ! Software Foundation.
25 : !
26 : ! ELPA is distributed in the hope that it will be useful,
27 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
28 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 : ! GNU Lesser General Public License for more details.
30 : !
31 : ! You should have received a copy of the GNU Lesser General Public License
32 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
33 : !
34 : ! ELPA reflects a substantial effort on the part of the original
35 : ! ELPA consortium, and we ask you to respect the spirit of the
36 : ! license that we chose: i.e., please contribute any changes you
37 : ! may have back to the original ELPA library distribution, and keep
38 : ! any derivatives of ELPA under the same license that we chose for
39 : ! the original distribution, the GNU Lesser General Public License.
40 :
41 : !This is a module contains all CUDA C Calls
42 : ! it was provided by NVIDIA with their ELPA GPU port and
43 : ! adapted for an ELPA release by A.Marek, RZG
44 :
45 : #include "config-f90.h"
46 :
47 : module cuda_c_kernel
48 : implicit none
49 :
50 : #if 0 /* not used anywhere */
51 : interface
52 : subroutine launch_dot_product_kernel_c_complex_double(hs_dev, hv_new_dev, tau_new, x_dev, h_dev,hv_dev, nr) &
53 : bind(c,name="launch_dot_product_kernel_complex_double")
54 : use precision
55 : use iso_c_binding
56 :
57 : implicit none
58 : integer(kind=c_int), value :: nr
59 : integer(kind=C_intptr_T), value :: hs_dev ,hv_new_dev,x_dev,h_dev, hv_dev
60 : complex(kind=ck8),value :: tau_new
61 :
62 : end subroutine
63 : end interface
64 :
65 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
66 : interface
67 : subroutine launch_dot_product_kernel_c_complex_single(hs_dev, hv_new_dev, tau_new, x_dev, h_dev,hv_dev, nr) &
68 : bind(c,name="launch_dot_product_kernel_complex_single")
69 : use precision
70 : use iso_c_binding
71 :
72 : implicit none
73 : integer(kind=c_int), value :: nr
74 : integer(kind=C_intptr_T), value :: hs_dev ,hv_new_dev,x_dev,h_dev, hv_dev
75 : complex(kind=ck4),value :: tau_new
76 :
77 : end subroutine
78 : end interface
79 :
80 : #endif
81 :
82 : #endif /* not used anywhere */
83 :
84 : #if 0 /* not used anywhere */
85 :
86 : interface
87 : subroutine launch_dot_product_kernel_1_c_complex_double(ab_dev, hs_dev, hv_new_dev, x_dev,h_dev,hv_dev,nb, nr, ns) &
88 : bind(c, name="launch_dot_product_kernel_1_complex_double")
89 :
90 : use iso_c_binding
91 :
92 : implicit none
93 : integer(kind=c_int), value :: nb, nr, ns
94 : integer(kind=C_intptr_T), value :: x_dev,h_dev, hv_dev, ab_dev, hs_dev,hv_new_dev
95 :
96 : end subroutine
97 : end interface
98 :
99 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
100 :
101 : interface
102 : subroutine launch_dot_product_kernel_1_c_complex_single(ab_dev, hs_dev, hv_new_dev, x_dev,h_dev,hv_dev,nb, nr, ns) &
103 : bind(c, name="launch_dot_product_kernel_1_complex_single")
104 :
105 : use iso_c_binding
106 :
107 : implicit none
108 : integer(kind=c_int), value :: nb, nr, ns
109 : integer(kind=C_intptr_T), value :: x_dev,h_dev, hv_dev, ab_dev, hs_dev,hv_new_dev
110 :
111 : end subroutine
112 : end interface
113 :
114 : #endif
115 : #endif /* not used anywhere */
116 :
117 : #if 0 /* not used anywhere */
118 :
119 : interface
120 : subroutine launch_dot_product_kernel_2_c_complex_double(ab_dev, hs_dev, hv_dev,hd_dev,nb, nr, ne) &
121 : bind(c,name="launch_dot_product_kernel_2_complex_double")
122 :
123 : use iso_c_binding
124 :
125 : implicit none
126 : integer(kind=c_int), value :: nb, nr, ne
127 : integer(kind=C_intptr_T), value :: hd_dev,hv_dev, hs_dev, ab_dev
128 :
129 : end subroutine
130 : end interface
131 :
132 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
133 :
134 : interface
135 : subroutine launch_dot_product_kernel_2_c_complex_single(ab_dev, hs_dev, hv_dev,hd_dev,nb, nr, ne) &
136 : bind(c,name="launch_dot_product_kernel_2_complex_single")
137 :
138 : use iso_c_binding
139 :
140 : implicit none
141 : integer(kind=c_int), value :: nb, nr, ne
142 : integer(kind=C_intptr_T), value :: hd_dev,hv_dev, hs_dev, ab_dev
143 :
144 : end subroutine
145 : end interface
146 :
147 : #endif
148 : #endif /* not used anywhere */
149 :
150 : #if 0 /* not used anywhere */
151 : interface
152 : subroutine launch_double_hh_transform_1_c_complex_double(ab_dev, hs_dev,hv_dev,nb,ns) &
153 : bind(c,name="launch_double_hh_transform_1_complex_double")
154 :
155 : use iso_c_binding
156 :
157 : implicit none
158 : integer(kind=c_int), value :: nb, ns
159 : integer(kind=C_intptr_T), value :: hv_dev, ab_dev,hs_dev
160 :
161 : end subroutine
162 : end interface
163 :
164 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
165 :
166 : interface
167 : subroutine launch_double_hh_transform_1_c_complex_single(ab_dev, hs_dev,hv_dev,nb,ns) &
168 : bind(c,name="launch_double_hh_transform_1_complex_single")
169 :
170 : use iso_c_binding
171 :
172 : implicit none
173 : integer(kind=c_int), value :: nb, ns
174 : integer(kind=C_intptr_T), value :: hv_dev, ab_dev,hs_dev
175 :
176 : end subroutine
177 : end interface
178 :
179 : #endif
180 : #endif /* not used anywhere */
181 :
182 : #if 0 /* not used anywhere */
183 : interface
184 : subroutine launch_double_hh_transform_2_c_complex_double(ab_dev, hd_dev,hv_dev,nc,ns, nb) &
185 : bind(c,name="launch_double_hh_transform_2_complex_double")
186 :
187 : use iso_c_binding
188 :
189 : implicit none
190 : integer(kind=c_int), value :: nc, ns, nb
191 : integer(kind=C_intptr_T), value :: hv_dev, ab_dev,hd_dev
192 : end subroutine
193 : end interface
194 :
195 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
196 :
197 : interface
198 : subroutine launch_double_hh_transform_2_c_complex_single(ab_dev, hd_dev,hv_dev,nc,ns, nb) &
199 : bind(c,name="launch_double_hh_transform_2_complex_single")
200 :
201 : use iso_c_binding
202 :
203 : implicit none
204 : integer(kind=c_int), value :: nc, ns, nb
205 : integer(kind=C_intptr_T), value :: hv_dev, ab_dev,hd_dev
206 : end subroutine
207 : end interface
208 :
209 : #endif
210 : #endif /* not used anywhere */
211 :
212 : #if 0 /* not used anywhere */
213 : interface
214 : subroutine launch_compute_kernel_reduce_c_complex_double(a_dev, lda, n, nbw, h1_dev) &
215 : bind(c,name="launch_compute_kernel_reduce_complex_double")
216 :
217 : use iso_c_binding
218 :
219 : implicit none
220 : integer(kind=c_int), value :: n,lda,nbw
221 : integer(kind=C_intptr_T), value :: h1_dev ,a_dev
222 : end subroutine
223 : end interface
224 :
225 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
226 :
227 : interface
228 : subroutine launch_compute_kernel_reduce_c_complex_single(a_dev, lda, n, nbw, h1_dev) &
229 : bind(c,name="launch_compute_kernel_reduce_complex_single")
230 :
231 : use iso_c_binding
232 :
233 : implicit none
234 : integer(kind=c_int), value :: n,lda,nbw
235 : integer(kind=C_intptr_T), value :: h1_dev ,a_dev
236 : end subroutine
237 : end interface
238 : #endif
239 : #endif /* not used anywhere */
240 :
241 : #if 0 /* not used anywhere */
242 : interface
243 : subroutine launch_compute_kernel_reduce_1_c_complex_double(a_dev, lda, n, h1_dev) &
244 : bind(c,name="launch_compute_kernel_reduce_1_complex_double")
245 :
246 : use iso_c_binding
247 :
248 : implicit none
249 : integer(kind=c_int), value :: n,lda
250 : integer(kind=C_intptr_T), value :: h1_dev ,a_dev
251 :
252 : end subroutine
253 : end interface
254 :
255 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
256 : interface
257 : subroutine launch_compute_kernel_reduce_1_c_complex_single(a_dev, lda, n, h1_dev) &
258 : bind(c,name="launch_compute_kernel_reduce_1_complex_single")
259 :
260 : use iso_c_binding
261 :
262 : implicit none
263 : integer(kind=c_int), value :: n,lda
264 : integer(kind=C_intptr_T), value :: h1_dev ,a_dev
265 :
266 : end subroutine
267 : end interface
268 : #endif
269 : #endif /* not used anywhere */
270 :
271 : interface
272 : subroutine launch_compute_hh_trafo_c_kernel_real_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols) &
273 : bind(c,name="launch_compute_hh_trafo_c_kernel_real_double")
274 :
275 : use iso_c_binding
276 :
277 : implicit none
278 : integer(kind=c_int), value :: nev, nb, ldq, off, ncols
279 : integer(kind=c_intptr_t), value :: q
280 : integer(kind=c_intptr_t), value :: hh_dot
281 : integer(c_intptr_t), value :: hh_tau ,hh
282 : end subroutine
283 : end interface
284 :
285 : #ifdef WANT_SINGLE_PRECISION_REAL
286 : interface
287 : subroutine launch_compute_hh_trafo_c_kernel_real_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols) &
288 : bind(c,name="launch_compute_hh_trafo_c_kernel_real_single")
289 :
290 : use iso_c_binding
291 :
292 : implicit none
293 : integer(kind=c_int), value :: nev, nb, ldq, off, ncols
294 : integer(kind=c_intptr_t), value :: q
295 : integer(kind=c_intptr_t), value :: hh_dot
296 : integer(c_intptr_t), value :: hh_tau ,hh
297 : end subroutine
298 : end interface
299 :
300 : #endif
301 :
302 : interface
303 : subroutine launch_compute_hh_trafo_c_kernel_complex_double(q, hh, hh_tau, nev, nb,ldq,off, ncols) &
304 : bind(c,name="launch_compute_hh_trafo_c_kernel_complex_double")
305 :
306 : use iso_c_binding
307 :
308 : implicit none
309 : integer(kind=c_int), value :: nev, nb, ldq, off, ncols
310 : integer(kind=c_intptr_t), value :: q
311 : integer(kind=c_intptr_t), value :: hh_tau ,hh
312 : end subroutine
313 : end interface
314 :
315 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
316 :
317 : interface
318 : subroutine launch_compute_hh_trafo_c_kernel_complex_single(q, hh, hh_tau, nev, nb,ldq,off, ncols) &
319 : bind(c,name="launch_compute_hh_trafo_c_kernel_complex_single")
320 :
321 : use iso_c_binding
322 :
323 : implicit none
324 : integer(kind=c_int), value :: nev, nb, ldq, off, ncols
325 : integer(kind=c_intptr_t), value :: q
326 : integer(kind=c_intptr_t), value :: hh_tau ,hh
327 : end subroutine
328 : end interface
329 :
330 : #endif
331 :
332 : #if 0
333 : interface
334 : subroutine launch_compute_hh_trafo_c_kernel_complex_1_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols) &
335 : bind(c,name="launch_compute_hh_trafo_c_kernel_complex_1_double")
336 :
337 : use iso_c_binding
338 :
339 : implicit none
340 : integer(kind=c_int), value :: nev, nb, ldq, off, ncols
341 : integer(kind=c_intptr_t), value :: q
342 : integer(kind=c_intptr_t), value :: hh_tau ,hh, hh_dot
343 :
344 : end subroutine
345 : end interface
346 :
347 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
348 : interface
349 : subroutine launch_compute_hh_trafo_c_kernel_complex_1_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols) &
350 : bind(c,name="launch_compute_hh_trafo_c_kernel_complex_1_single")
351 :
352 : use iso_c_binding
353 :
354 : implicit none
355 : integer(kind=c_int), value :: nev, nb, ldq, off, ncols
356 : integer(kind=c_intptr_t), value :: q
357 : integer(kind=c_intptr_t), value :: hh_tau ,hh, hh_dot
358 :
359 : end subroutine
360 : end interface
361 :
362 : #endif
363 :
364 : #endif
365 :
366 : interface
367 : subroutine launch_my_unpack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
368 : l_nev,row_group_dev, a_dev) bind(c,name="launch_my_unpack_c_kernel_real_double")
369 :
370 : use iso_c_binding
371 :
372 : implicit none
373 : integer(kind=c_int), value :: row_count
374 : integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
375 : integer(kind=c_intptr_t), value :: a_dev, row_group_dev
376 :
377 : end subroutine
378 : end interface
379 :
380 : #ifdef WANT_SINGLE_PRECISION_REAL
381 : interface
382 : subroutine launch_my_unpack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
383 : l_nev,row_group_dev, a_dev) bind(c,name="launch_my_unpack_c_kernel_real_single")
384 :
385 : use iso_c_binding
386 :
387 : implicit none
388 : integer(kind=c_int), value :: row_count
389 : integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
390 : integer(kind=c_intptr_t), value :: a_dev, row_group_dev
391 :
392 : end subroutine
393 : end interface
394 :
395 : #endif
396 :
397 : interface
398 : subroutine launch_my_pack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, &
399 : stripe_count, l_nev, a_dev, &
400 : row_group_dev) bind(c,name="launch_my_pack_c_kernel_real_double")
401 :
402 : use iso_c_binding
403 :
404 : implicit none
405 : integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
406 : integer(kind=c_intptr_t), value :: a_dev
407 : integer(kind=c_intptr_t), value :: row_group_dev
408 :
409 : end subroutine
410 : end interface
411 :
412 : #ifdef WANT_SINGLE_PRECISION_REAL
413 : interface
414 : subroutine launch_my_pack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
415 : l_nev, a_dev, &
416 : row_group_dev) bind(c,name="launch_my_pack_c_kernel_real_single")
417 :
418 : use iso_c_binding
419 :
420 : implicit none
421 : integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
422 : integer(kind=c_intptr_t), value :: a_dev
423 : integer(kind=c_intptr_t), value :: row_group_dev
424 :
425 : end subroutine
426 : end interface
427 :
428 : #endif
429 :
430 : interface
431 : subroutine launch_compute_hh_dotp_c_kernel_real_double(bcast_buffer_dev, hh_dot_dev, nbw, n) &
432 : bind(c,name="launch_compute_hh_dotp_c_kernel_real_double")
433 :
434 : use iso_c_binding
435 :
436 : implicit none
437 : integer(kind=c_intptr_t), value :: bcast_buffer_dev
438 : integer(kind=c_intptr_t), value :: hh_dot_dev
439 : integer(kind=c_int), value :: nbw, n
440 :
441 : end subroutine
442 : end interface
443 :
444 : #ifdef WANT_SINGLE_PRECISION_REAL
445 : interface
446 : subroutine launch_compute_hh_dotp_c_kernel_real_single(bcast_buffer_dev, hh_dot_dev, nbw, n) &
447 : bind(c,name="launch_compute_hh_dotp_c_kernel_real_single")
448 :
449 : use iso_c_binding
450 :
451 : implicit none
452 : integer(kind=c_intptr_t), value :: bcast_buffer_dev
453 : integer(kind=c_intptr_t), value :: hh_dot_dev
454 : integer(kind=c_int), value :: nbw, n
455 :
456 : end subroutine
457 : end interface
458 :
459 : #endif
460 :
461 : interface
462 : subroutine launch_extract_hh_tau_c_kernel_real_double(hh, hh_tau, nb, n, is_zero) &
463 : bind(c,NAME="launch_extract_hh_tau_c_kernel_real_double")
464 :
465 : use iso_c_binding
466 :
467 : implicit none
468 : integer(kind=c_intptr_t), value :: hh
469 : integer(kind=c_intptr_t), value :: hh_tau
470 : integer(kind=c_int), value :: nb, n
471 : integer(kind=c_int), value :: is_zero
472 :
473 : end subroutine
474 : end interface
475 : #ifdef WANT_SINGLE_PRECISION_REAL
476 : interface
477 : subroutine launch_extract_hh_tau_c_kernel_real_single(hh, hh_tau, nb, n, is_zero) &
478 : bind(c,NAME="launch_extract_hh_tau_c_kernel_real_single")
479 :
480 : use iso_c_binding
481 :
482 : implicit none
483 : integer(kind=c_intptr_t), value :: hh
484 : integer(kind=c_intptr_t), value :: hh_tau
485 : integer(kind=c_int), value :: nb, n
486 : integer(kind=c_int), value :: is_zero
487 :
488 : end subroutine
489 : end interface
490 : #endif
491 :
492 : interface
493 : subroutine launch_my_unpack_c_kernel_complex_double(row_count, n_offset, max_idx, stripe_width, a_dim2, &
494 : stripe_count, l_nev, &
495 : row_group_dev, a_dev) bind(c,name="launch_my_unpack_c_kernel_complex_double")
496 :
497 : use iso_c_binding
498 :
499 : implicit none
500 :
501 : integer(kind=c_int), value :: row_count
502 : integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
503 : integer(kind=c_intptr_t), value :: a_dev, row_group_dev
504 :
505 : end subroutine
506 : end interface
507 :
508 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
509 : interface
510 : subroutine launch_my_unpack_c_kernel_complex_single(row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev, &
511 : row_group_dev, a_dev) bind(c,name="launch_my_unpack_c_kernel_complex_single")
512 :
513 : use iso_c_binding
514 :
515 : implicit none
516 :
517 : integer(kind=c_int), value :: row_count
518 : integer(kind=c_int), value :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
519 : integer(kind=c_intptr_t), value :: a_dev, row_group_dev
520 :
521 : end subroutine
522 : end interface
523 :
524 : #endif
525 :
526 : interface
527 : subroutine launch_my_pack_c_kernel_complex_double(row_count, n_offset, max_idx,stripe_width,a_dim2, &
528 : stripe_count, l_nev, a_dev, &
529 : row_group_dev) bind(c,name="launch_my_pack_c_kernel_complex_double")
530 :
531 : use iso_c_binding
532 :
533 : implicit none
534 : integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
535 : integer(kind=c_intptr_t), value :: a_dev
536 : integer(kind=c_intptr_t), value :: row_group_dev
537 :
538 : end subroutine
539 : end interface
540 :
541 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
542 : interface
543 : subroutine launch_my_pack_c_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, &
544 : stripe_count, l_nev, a_dev, &
545 : row_group_dev) bind(c,name="launch_my_pack_c_kernel_complex_single")
546 :
547 : use iso_c_binding
548 :
549 : implicit none
550 : integer(kind=c_int), value :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
551 : integer(kind=c_intptr_t), value :: a_dev
552 : integer(kind=c_intptr_t), value :: row_group_dev
553 :
554 : end subroutine
555 : end interface
556 :
557 : #endif
558 :
559 : interface
560 : subroutine launch_compute_hh_dotp_c_kernel_complex_double(bcast_buffer_dev, hh_dot_dev, nbw,n) &
561 : bind(c,name="launch_compute_hh_dotp_c_kernel_complex_double")
562 :
563 : use iso_c_binding
564 :
565 : implicit none
566 : integer(kind=c_intptr_t), value :: bcast_buffer_dev
567 : integer(kind=c_intptr_t), value :: hh_dot_dev
568 : integer(kind=c_int), value :: nbw, n
569 : end subroutine
570 : end interface
571 :
572 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
573 : interface
574 : subroutine launch_compute_hh_dotp_c_kernel_complex_single(bcast_buffer_dev, hh_dot_dev, nbw,n) &
575 : bind(c,name="launch_compute_hh_dotp_c_kernel_complex_single")
576 :
577 : use iso_c_binding
578 :
579 : implicit none
580 : integer(kind=c_intptr_t), value :: bcast_buffer_dev
581 : integer(kind=c_intptr_t), value :: hh_dot_dev
582 : integer(kind=c_int), value :: nbw, n
583 : end subroutine
584 : end interface
585 :
586 : #endif
587 :
588 : interface
589 : subroutine launch_extract_hh_tau_c_kernel_complex_double(hh, hh_tau, nb, n, is_zero) &
590 : bind(c,name="launch_extract_hh_tau_c_kernel_complex_double")
591 :
592 : use iso_c_binding
593 :
594 : implicit none
595 : integer(kind=c_intptr_t), value :: hh
596 : integer(kind=c_intptr_t), value :: hh_tau
597 : integer(kind=c_int), value :: nb, n
598 : integer(kind=c_int), value :: is_zero
599 :
600 : end subroutine
601 : end interface
602 :
603 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
604 :
605 : interface
606 : subroutine launch_extract_hh_tau_c_kernel_complex_single(hh, hh_tau, nb, n, is_zero) &
607 : bind(c,name="launch_extract_hh_tau_c_kernel_complex_single")
608 :
609 : use iso_c_binding
610 :
611 : implicit none
612 : integer(kind=c_intptr_t), value :: hh
613 : integer(kind=c_intptr_t), value :: hh_tau
614 : integer(kind=c_int), value :: nb, n
615 : integer(kind=c_int), value :: is_zero
616 :
617 : end subroutine
618 : end interface
619 :
620 : #endif
621 :
622 : contains
623 :
624 : #if 0 /* not used anywhere */
625 : subroutine launch_dot_product_kernel_complex_double(hs_dev, hv_new_dev, tau_new, x_dev, h_dev,hv_dev, nr)
626 :
627 : use iso_c_binding
628 : use precision
629 : implicit none
630 : integer(kind=c_int) :: nr
631 : integer(kind=C_intptr_T) :: hs_dev ,hv_new_dev,x_dev,h_dev, hv_dev
632 : complex(kind=ck8) :: tau_new
633 : #ifdef WITH_GPU_VERSION
634 : call launch_dot_product_kernel_c_complex_double(hs_dev, hv_new_dev, tau_new, x_dev, h_dev,hv_dev, nr)
635 : #endif
636 : end subroutine
637 :
638 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
639 : subroutine launch_dot_product_kernel_complex_single(hs_dev, hv_new_dev, tau_new, x_dev, h_dev,hv_dev, nr)
640 :
641 : use iso_c_binding
642 : use precision
643 : implicit none
644 : integer(kind=c_int) :: nr
645 : integer(kind=C_intptr_T) :: hs_dev ,hv_new_dev,x_dev,h_dev, hv_dev
646 : complex(kind=ck4) :: tau_new
647 : #ifdef WITH_GPU_VERSION
648 : call launch_dot_product_kernel_c_complex_single(hs_dev, hv_new_dev, tau_new, x_dev, h_dev,hv_dev, nr)
649 : #endif
650 : end subroutine
651 : #endif
652 :
653 : #endif /* not used anywhere */
654 :
655 : #if 0 /* not used anywhere */
656 :
657 : subroutine launch_dot_product_kernel_1_complex_double(ab_dev, hs_dev, hv_new_dev, x_dev,h_dev,hv_dev,nb, nr, ns)
658 :
659 : use iso_c_binding
660 :
661 : implicit none
662 : integer(kind=c_int) :: nb, nr, ns
663 : integer(kind=C_intptr_T) :: x_dev,h_dev, hv_dev, ab_dev, hs_dev,hv_new_dev
664 : #ifdef WITH_GPU_VERSION
665 : call launch_dot_product_kernel_1_c_complex_double(ab_dev, hs_dev, hv_new_dev, x_dev,h_dev,hv_dev,nb, nr, ns)
666 : #endif
667 : end subroutine
668 :
669 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
670 : subroutine launch_dot_product_kernel_1_complex_single(ab_dev, hs_dev, hv_new_dev, x_dev,h_dev,hv_dev,nb, nr, ns)
671 :
672 : use iso_c_binding
673 :
674 : implicit none
675 : integer(kind=c_int) :: nb, nr, ns
676 : integer(kind=C_intptr_T) :: x_dev,h_dev, hv_dev, ab_dev, hs_dev,hv_new_dev
677 : #ifdef WITH_GPU_VERSION
678 : call launch_dot_product_kernel_1_c_complex_single(ab_dev, hs_dev, hv_new_dev, x_dev,h_dev,hv_dev,nb, nr, ns)
679 : #endif
680 : end subroutine
681 :
682 : #endif
683 :
684 : #endif /* not used anywhere */
685 :
686 : #if 0 /* not used anywhere */
687 : subroutine launch_dot_product_kernel_2_complex_double(ab_dev, hs_dev, hv_dev,hd_dev,nb, nr, ne)
688 :
689 : use iso_c_binding
690 :
691 : implicit none
692 : integer(kind=c_int) :: nb, nr, ne
693 : integer(kind=C_intptr_T) :: hd_dev,hv_dev, hs_dev, ab_dev
694 : #ifdef WITH_GPU_VERSION
695 : call launch_dot_product_kernel_2_c_complex_double(ab_dev, hs_dev, hv_dev,hd_dev,nb, nr, ne)
696 : #endif
697 : end subroutine
698 :
699 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
700 :
701 : subroutine launch_dot_product_kernel_2_complex_single(ab_dev, hs_dev, hv_dev,hd_dev,nb, nr, ne)
702 :
703 : use iso_c_binding
704 :
705 : implicit none
706 : integer(kind=c_int) :: nb, nr, ne
707 : integer(kind=C_intptr_T) :: hd_dev,hv_dev, hs_dev, ab_dev
708 : #ifdef WITH_GPU_VERSION
709 : call launch_dot_product_kernel_2_c_complex_single(ab_dev, hs_dev, hv_dev,hd_dev,nb, nr, ne)
710 : #endif
711 : end subroutine
712 : #endif
713 :
714 : #endif /* not used anywhere */
715 :
716 : #if 0 /* not used anywhere */
717 : subroutine launch_double_hh_transform_1_complex_double(ab_dev, hs_dev,hv_dev,nb,ns)
718 :
719 : use iso_c_binding
720 :
721 : implicit none
722 : integer(kind=c_int) :: nb, ns
723 : integer(kind=C_intptr_T) :: hv_dev, ab_dev,hs_dev
724 : #ifdef WITH_GPU_VERSION
725 : call launch_double_hh_transform_1_c_complex_double(ab_dev, hs_dev,hv_dev,nb,ns)
726 : #endif
727 : end subroutine
728 :
729 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
730 : subroutine launch_double_hh_transform_1_complex_single(ab_dev, hs_dev,hv_dev,nb,ns)
731 :
732 : use iso_c_binding
733 :
734 : implicit none
735 : integer(kind=c_int) :: nb, ns
736 : integer(kind=C_intptr_T) :: hv_dev, ab_dev,hs_dev
737 : #ifdef WITH_GPU_VERSION
738 : call launch_double_hh_transform_1_c_complex_single(ab_dev, hs_dev,hv_dev,nb,ns)
739 : #endif
740 : end subroutine
741 :
742 : #endif
743 : #endif /* not used anywhere */
744 :
745 : #if 0 /* not used anywhere */
746 : subroutine launch_double_hh_transform_2_complex_double(ab_dev, hd_dev,hv_dev,nc,ns, nb)
747 :
748 : use iso_c_binding
749 :
750 : implicit none
751 : integer(kind=c_int) :: nc, ns, nb
752 : integer(kind=C_intptr_T) :: hv_dev, ab_dev,hd_dev
753 : #ifdef WITH_GPU_VERSION
754 : call launch_double_hh_transform_2_c_complex_double(ab_dev, hd_dev,hv_dev,nc,ns, nb)
755 : #endif
756 : end subroutine
757 :
758 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
759 :
760 : subroutine launch_double_hh_transform_2_complex_single(ab_dev, hd_dev,hv_dev,nc,ns, nb)
761 :
762 : use iso_c_binding
763 :
764 : implicit none
765 : integer(kind=c_int) :: nc, ns, nb
766 : integer(kind=C_intptr_T) :: hv_dev, ab_dev,hd_dev
767 : #ifdef WITH_GPU_VERSION
768 : call launch_double_hh_transform_2_c_complex_single(ab_dev, hd_dev,hv_dev,nc,ns, nb)
769 : #endif
770 : end subroutine
771 :
772 : #endif
773 : #endif /* not used anywhere */
774 :
775 : #if 0 /* not used anywhere */
776 : subroutine launch_compute_kernel_reduce_complex_double(a_dev, lda, n, nbw, h1_dev)
777 :
778 : use iso_c_binding
779 :
780 : implicit none
781 : integer(kind=c_int) :: n,lda,nbw
782 : integer(kind=C_intptr_T) :: h1_dev ,a_dev
783 : #ifdef WITH_GPU_VERSION
784 : call launch_compute_kernel_reduce_c_complex_double(a_dev, lda, n, nbw, h1_dev)
785 : #endif
786 : end subroutine
787 :
788 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
789 : subroutine launch_compute_kernel_reduce_complex_single(a_dev, lda, n, nbw, h1_dev)
790 :
791 : use iso_c_binding
792 :
793 : implicit none
794 : integer(kind=c_int) :: n,lda,nbw
795 : integer(kind=C_intptr_T) :: h1_dev ,a_dev
796 : #ifdef WITH_GPU_VERSION
797 : call launch_compute_kernel_reduce_c_complex_single(a_dev, lda, n, nbw, h1_dev)
798 : #endif
799 : end subroutine
800 :
801 : #endif
802 : #endif /* not used anywhere */
803 :
804 : #if 0 /* not used anywhere */
805 :
806 : subroutine launch_compute_kernel_reduce_1_complex_double(a_dev, lda, n, h1_dev)
807 :
808 : use iso_c_binding
809 :
810 : implicit none
811 : integer(kind=c_int) :: n,lda
812 : integer(kind=C_intptr_T) :: h1_dev ,a_dev
813 : #ifdef WITH_GPU_VERSION
814 : call launch_compute_kernel_reduce_1_c_complex_double(a_dev, lda, n, h1_dev)
815 : #endif
816 : end subroutine
817 :
818 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
819 : subroutine launch_compute_kernel_reduce_1_complex_single(a_dev, lda, n, h1_dev)
820 :
821 : use iso_c_binding
822 :
823 : implicit none
824 : integer(kind=c_int) :: n,lda
825 : integer(kind=C_intptr_T) :: h1_dev ,a_dev
826 : #ifdef WITH_GPU_VERSION
827 : call launch_compute_kernel_reduce_1_c_complex_single(a_dev, lda, n, h1_dev)
828 : #endif
829 : end subroutine
830 :
831 : #endif
832 : #endif /* not used anywhere */
833 :
834 0 : subroutine launch_compute_hh_trafo_gpu_kernel_real_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
835 :
836 : use iso_c_binding
837 :
838 : implicit none
839 : integer(kind=c_int) :: nev, nb, ldq, off, ncols
840 : integer(kind=c_intptr_t) :: q
841 : integer(kind=c_intptr_t) :: hh_dot
842 : integer(c_intptr_t) :: hh_tau ,hh
843 : #ifdef WITH_GPU_VERSION
844 : call launch_compute_hh_trafo_c_kernel_real_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
845 : #endif
846 0 : end subroutine
847 :
848 : #ifdef WANT_SINGLE_PRECISION_REAL
849 0 : subroutine launch_compute_hh_trafo_gpu_kernel_real_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
850 :
851 : use iso_c_binding
852 :
853 : implicit none
854 : integer(kind=c_int) :: nev, nb, ldq, off, ncols
855 : integer(kind=c_intptr_t) :: q
856 : integer(kind=c_intptr_t) :: hh_dot
857 : integer(c_intptr_t) :: hh_tau ,hh
858 : #ifdef WITH_GPU_VERSION
859 : call launch_compute_hh_trafo_c_kernel_real_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
860 : #endif
861 0 : end subroutine
862 :
863 : #endif
864 :
865 0 : subroutine launch_compute_hh_trafo_gpu_kernel_complex_double(q, hh, hh_tau, nev, nb,ldq,off, ncols)
866 :
867 : use iso_c_binding
868 :
869 : implicit none
870 : integer(kind=c_int) :: nev, nb, ldq, off, ncols
871 : integer(kind=c_intptr_t) :: q
872 : integer(kind=c_intptr_t) :: hh_tau ,hh
873 : #ifdef WITH_GPU_VERSION
874 : call launch_compute_hh_trafo_c_kernel_complex_double(q, hh, hh_tau, nev, nb,ldq,off, ncols)
875 : #endif
876 0 : end subroutine
877 :
878 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
879 0 : subroutine launch_compute_hh_trafo_gpu_kernel_complex_single(q, hh, hh_tau, nev, nb,ldq,off, ncols)
880 :
881 : use iso_c_binding
882 :
883 : implicit none
884 : integer(kind=c_int) :: nev, nb, ldq, off, ncols
885 : integer(kind=c_intptr_t) :: q
886 : integer(kind=c_intptr_t) :: hh_tau ,hh
887 : #ifdef WITH_GPU_VERSION
888 : call launch_compute_hh_trafo_c_kernel_complex_single(q, hh, hh_tau, nev, nb,ldq,off, ncols)
889 : #endif
890 0 : end subroutine
891 :
892 : #endif
893 :
894 : #if 0
895 : subroutine launch_compute_hh_trafo_gpu_kernel_1_complex_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
896 :
897 : use iso_c_binding
898 :
899 : implicit none
900 : integer(kind=c_int) :: nev, nb, ldq, off, ncols
901 : integer(kind=c_intptr_t) :: q
902 : integer(kind=c_intptr_t) :: hh_tau ,hh, hh_dot
903 : #ifdef WITH_GPU_VERSION
904 : call launch_compute_hh_trafo_c_kernel_complex_1_double(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
905 : #endif
906 : end subroutine
907 :
908 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
909 : subroutine launch_compute_hh_trafo_gpu_kernel_1_complex_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
910 :
911 : use iso_c_binding
912 :
913 : implicit none
914 : integer(kind=c_int) :: nev, nb, ldq, off, ncols
915 : integer(kind=c_intptr_t) :: q
916 : integer(kind=c_intptr_t) :: hh_tau ,hh, hh_dot
917 : #ifdef WITH_GPU_VERSION
918 : call launch_compute_hh_trafo_c_kernel_complex_1_single(q, hh, hh_dot, hh_tau, nev, nb, ldq, off, ncols)
919 : #endif
920 : end subroutine
921 : #endif
922 :
923 : #endif
924 :
925 0 : subroutine launch_my_unpack_gpu_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
926 : l_nev,row_group_dev, a_dev)
927 :
928 : use iso_c_binding
929 :
930 : implicit none
931 : integer(kind=c_int) :: row_count
932 : integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
933 : integer(kind=c_intptr_t) :: a_dev, row_group_dev
934 : #ifdef WITH_GPU_VERSION
935 : call launch_my_unpack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
936 : l_nev,row_group_dev, a_dev)
937 : #endif
938 :
939 0 : end subroutine
940 :
941 : #ifdef WANT_SINGLE_PRECISION_REAL
942 0 : subroutine launch_my_unpack_gpu_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
943 : l_nev,row_group_dev, a_dev)
944 :
945 : use iso_c_binding
946 :
947 : implicit none
948 : integer(kind=c_int) :: row_count
949 : integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev
950 : integer(kind=c_intptr_t) :: a_dev, row_group_dev
951 : #ifdef WITH_GPU_VERSION
952 : call launch_my_unpack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, &
953 : l_nev,row_group_dev, a_dev)
954 : #endif
955 :
956 0 : end subroutine
957 :
958 : #endif
959 :
960 0 : subroutine launch_my_pack_gpu_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, &
961 : stripe_count, l_nev, a_dev, row_group_dev)
962 :
963 : use iso_c_binding
964 :
965 : implicit none
966 : integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
967 : integer(kind=c_intptr_t) :: a_dev
968 : integer(kind=c_intptr_t) :: row_group_dev
969 : #ifdef WITH_GPU_VERSION
970 : call launch_my_pack_c_kernel_real_double(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev, a_dev, &
971 : row_group_dev)
972 : #endif
973 :
974 0 : end subroutine
975 :
976 : #ifdef WANT_SINGLE_PRECISION_REAL
977 0 : subroutine launch_my_pack_gpu_kernel_real_single(row_count, n_offset, max_idx,stripe_width, &
978 : a_dim2, stripe_count, l_nev, a_dev, &
979 : row_group_dev)
980 :
981 : use iso_c_binding
982 :
983 : implicit none
984 : integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev
985 : integer(kind=c_intptr_t) :: a_dev
986 : integer(kind=c_intptr_t) :: row_group_dev
987 : #ifdef WITH_GPU_VERSION
988 : call launch_my_pack_c_kernel_real_single(row_count, n_offset, max_idx,stripe_width, a_dim2, stripe_count, l_nev, a_dev, &
989 : row_group_dev)
990 : #endif
991 :
992 0 : end subroutine
993 :
994 : #endif
995 :
996 0 : subroutine launch_compute_hh_dotp_gpu_kernel_real_double(bcast_buffer_dev, hh_dot_dev, nbw, n)
997 :
998 : use iso_c_binding
999 :
1000 : implicit none
1001 : integer(kind=c_intptr_t) :: bcast_buffer_dev
1002 : integer(kind=c_intptr_t) :: hh_dot_dev
1003 : integer(kind=c_int) :: nbw, n
1004 : #ifdef WITH_GPU_VERSION
1005 : call launch_compute_hh_dotp_c_kernel_real_double(bcast_buffer_dev, hh_dot_dev, nbw, n)
1006 : #endif
1007 0 : end subroutine
1008 :
1009 : #ifdef WANT_SINGLE_PRECISION_REAL
1010 :
1011 0 : subroutine launch_compute_hh_dotp_gpu_kernel_real_single(bcast_buffer_dev, hh_dot_dev, nbw, n)
1012 :
1013 : use iso_c_binding
1014 :
1015 : implicit none
1016 : integer(kind=c_intptr_t) :: bcast_buffer_dev
1017 : integer(kind=c_intptr_t) :: hh_dot_dev
1018 : integer(kind=c_int) :: nbw, n
1019 : #ifdef WITH_GPU_VERSION
1020 : call launch_compute_hh_dotp_c_kernel_real_single(bcast_buffer_dev, hh_dot_dev, nbw, n)
1021 : #endif
1022 0 : end subroutine
1023 :
1024 : #endif
1025 :
1026 0 : subroutine launch_extract_hh_tau_gpu_kernel_real_double(hh, hh_tau, nb, n, is_zero)
1027 :
1028 : use iso_c_binding
1029 :
1030 : implicit none
1031 : integer(kind=c_intptr_t) :: hh
1032 : integer(kind=c_intptr_t) :: hh_tau
1033 : integer(kind=c_int) :: nb, n
1034 : integer(kind=c_int) :: is_zero
1035 : #ifdef WITH_GPU_VERSION
1036 : call launch_extract_hh_tau_c_kernel_real_double(hh, hh_tau, nb, n, is_zero)
1037 : #endif
1038 0 : end subroutine
1039 :
1040 : #ifdef WANT_SINGLE_PRECISION_REAL
1041 0 : subroutine launch_extract_hh_tau_gpu_kernel_real_single(hh, hh_tau, nb, n, is_zero)
1042 :
1043 : use iso_c_binding
1044 :
1045 : implicit none
1046 : integer(kind=c_intptr_t) :: hh
1047 : integer(kind=c_intptr_t) :: hh_tau
1048 : integer(kind=c_int) :: nb, n
1049 : integer(kind=c_int) :: is_zero
1050 : #ifdef WITH_GPU_VERSION
1051 : call launch_extract_hh_tau_c_kernel_real_single(hh, hh_tau, nb, n, is_zero)
1052 : #endif
1053 0 : end subroutine
1054 :
1055 : #endif
1056 :
1057 0 : subroutine launch_my_unpack_gpu_kernel_complex_double(row_count, n_offset, max_idx, stripe_width, &
1058 : a_dim2, stripe_count, l_nev, row_group_dev, a_dev)
1059 :
1060 : use iso_c_binding
1061 :
1062 : implicit none
1063 :
1064 : integer(kind=c_int) :: row_count
1065 : integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
1066 : integer(kind=c_intptr_t) :: a_dev, row_group_dev
1067 : #ifdef WITH_GPU_VERSION
1068 : call launch_my_unpack_c_kernel_complex_double(row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev, &
1069 : row_group_dev, a_dev)
1070 : #endif
1071 0 : end subroutine
1072 :
1073 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
1074 0 : subroutine launch_my_unpack_gpu_kernel_complex_single(row_count, n_offset, max_idx, stripe_width, &
1075 : a_dim2, stripe_count, l_nev, row_group_dev, a_dev)
1076 :
1077 : use iso_c_binding
1078 :
1079 : implicit none
1080 :
1081 : integer(kind=c_int) :: row_count
1082 : integer(kind=c_int) :: n_offset, max_idx,stripe_width, a_dim2, stripe_count,l_nev
1083 : integer(kind=c_intptr_t) :: a_dev, row_group_dev
1084 : #ifdef WITH_GPU_VERSION
1085 : call launch_my_unpack_c_kernel_complex_single(row_count, n_offset, max_idx, stripe_width, a_dim2, stripe_count, l_nev, &
1086 : row_group_dev, a_dev)
1087 : #endif
1088 0 : end subroutine
1089 :
1090 : #endif
1091 :
1092 0 : subroutine launch_my_pack_gpu_kernel_complex_double(row_count, n_offset, max_idx,stripe_width,a_dim2, &
1093 : stripe_count, l_nev, a_dev, &
1094 : row_group_dev)
1095 :
1096 : use iso_c_binding
1097 :
1098 : implicit none
1099 : integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
1100 : integer(kind=c_intptr_t) :: a_dev
1101 : integer(kind=c_intptr_t) :: row_group_dev
1102 : #ifdef WITH_GPU_VERSION
1103 : call launch_my_pack_c_kernel_complex_double(row_count, n_offset, max_idx,stripe_width,a_dim2, stripe_count, l_nev, a_dev, &
1104 : row_group_dev)
1105 : #endif
1106 0 : end subroutine
1107 :
1108 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
1109 :
1110 0 : subroutine launch_my_pack_gpu_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, &
1111 : stripe_count, l_nev, a_dev, &
1112 : row_group_dev)
1113 :
1114 : use iso_c_binding
1115 :
1116 : implicit none
1117 : integer(kind=c_int) :: row_count, n_offset, max_idx, stripe_width, a_dim2,stripe_count, l_nev
1118 : integer(kind=c_intptr_t) :: a_dev
1119 : integer(kind=c_intptr_t) :: row_group_dev
1120 : #ifdef WITH_GPU_VERSION
1121 : call launch_my_pack_c_kernel_complex_single(row_count, n_offset, max_idx,stripe_width,a_dim2, stripe_count, l_nev, a_dev, &
1122 : row_group_dev)
1123 : #endif
1124 0 : end subroutine
1125 :
1126 : #endif
1127 :
1128 0 : subroutine launch_compute_hh_dotp_gpu_kernel_complex_double(bcast_buffer_dev, hh_dot_dev, nbw,n)
1129 :
1130 : use iso_c_binding
1131 :
1132 : implicit none
1133 : integer(kind=c_intptr_t) :: bcast_buffer_dev
1134 : integer(kind=c_intptr_t) :: hh_dot_dev
1135 : integer(kind=c_int) :: nbw, n
1136 : #ifdef WITH_GPU_VERSION
1137 : call launch_compute_hh_dotp_c_kernel_complex_double(bcast_buffer_dev, hh_dot_dev, nbw,n)
1138 : #endif
1139 0 : end subroutine
1140 :
1141 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
1142 0 : subroutine launch_compute_hh_dotp_gpu_kernel_complex_single(bcast_buffer_dev, hh_dot_dev, nbw,n)
1143 :
1144 : use iso_c_binding
1145 :
1146 : implicit none
1147 : integer(kind=c_intptr_t) :: bcast_buffer_dev
1148 : integer(kind=c_intptr_t) :: hh_dot_dev
1149 : integer(kind=c_int) :: nbw, n
1150 : #ifdef WITH_GPU_VERSION
1151 : call launch_compute_hh_dotp_c_kernel_complex_single(bcast_buffer_dev, hh_dot_dev, nbw,n)
1152 : #endif
1153 0 : end subroutine
1154 : #endif
1155 :
1156 0 : subroutine launch_extract_hh_tau_gpu_kernel_complex_double(hh, hh_tau, nb, n, is_zero)
1157 :
1158 : use iso_c_binding
1159 :
1160 : implicit none
1161 : integer(kind=c_intptr_t) :: hh
1162 : integer(kind=c_intptr_t) :: hh_tau
1163 : integer(kind=c_int) :: nb, n
1164 : integer(kind=c_int) :: is_zero
1165 : #ifdef WITH_GPU_VERSION
1166 : call launch_extract_hh_tau_c_kernel_complex_double(hh, hh_tau, nb, n, is_zero)
1167 : #endif
1168 0 : end subroutine
1169 :
1170 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
1171 0 : subroutine launch_extract_hh_tau_gpu_kernel_complex_single(hh, hh_tau, nb, n, is_zero)
1172 :
1173 : use iso_c_binding
1174 :
1175 : implicit none
1176 : integer(kind=c_intptr_t) :: hh
1177 : integer(kind=c_intptr_t) :: hh_tau
1178 : integer(kind=c_int) :: nb, n
1179 : integer(kind=c_int) :: is_zero
1180 : #ifdef WITH_GPU_VERSION
1181 : call launch_extract_hh_tau_c_kernel_complex_single(hh, hh_tau, nb, n, is_zero)
1182 : #endif
1183 0 : end subroutine
1184 : #endif
1185 : end module cuda_c_kernel
1186 :
|