Line data Source code
1 : ! This file is part of ELPA.
2 : !
3 : ! The ELPA library was originally created by the ELPA consortium,
4 : ! consisting of the following organizations:
5 : !
6 : ! - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
7 : ! - Bergische Universität Wuppertal, Lehrstuhl für angewandte
8 : ! Informatik,
9 : ! - Technische Universität München, Lehrstuhl für Informatik mit
10 : ! Schwerpunkt Wissenschaftliches Rechnen ,
11 : ! - Fritz-Haber-Institut, Berlin, Abt. Theorie,
12 : ! - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
13 : ! Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
14 : ! and
15 : ! - IBM Deutschland GmbH
16 : !
17 : !
18 : ! More information can be found here:
19 : ! http://elpa.rzg.mpg.de/
20 : !
21 : ! ELPA is free software: you can redistribute it and/or modify
22 : ! it under the terms of the version 3 of the license of the
23 : ! GNU Lesser General Public License as published by the Free
24 : ! Software Foundation.
25 : !
26 : ! ELPA is distributed in the hope that it will be useful,
27 : ! but WITHOUT ANY WARRANTY; without even the implied warranty of
28 : ! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
29 : ! GNU Lesser General Public License for more details.
30 : !
31 : ! You should have received a copy of the GNU Lesser General Public License
32 : ! along with ELPA. If not, see <http://www.gnu.org/licenses/>
33 : !
34 : ! ELPA reflects a substantial effort on the part of the original
35 : ! ELPA consortium, and we ask you to respect the spirit of the
36 : ! license that we chose: i.e., please contribute any changes you
37 : ! may have back to the original ELPA library distribution, and keep
38 : ! any derivatives of ELPA under the same license that we chose for
39 : ! the original distribution, the GNU Lesser General Public License.
40 : !
41 : ! This file was written by A. Marek, MPCDF
42 :
43 :
44 : #include "config-f90.h"
45 : module cuda_functions
46 : use iso_c_binding
47 : use precision
48 : implicit none
49 :
50 : public
51 :
52 : integer(kind=ik) :: cudaMemcpyHostToDevice
53 : integer(kind=ik) :: cudaMemcpyDeviceToHost
54 : integer(kind=ik) :: cudaHostRegisterPortable
55 : integer(kind=ik) :: cudaHostRegisterMapped
56 : integer(kind=ik) :: cudaMemcpyDeviceToDevice
57 :
58 : integer(kind=C_intptr_T) :: cublasHandle
59 :
60 : integer(kind=c_intptr_t), parameter :: size_of_double_real = 8_rk8
61 : #ifdef WANT_SINGLE_PRECISION_REAL
62 : integer(kind=c_intptr_t), parameter :: size_of_single_real = 4_rk4
63 : #endif
64 :
65 : integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8
66 : #ifdef WANT_SINGLE_PRECISION_COMPLEX
67 : integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4
68 : #endif
69 :
70 : ! functions to set and query the CUDA devices
71 : interface
72 : function cublas_create_c(handle) result(istat) &
73 : bind(C, name="cublasCreateFromC")
74 : use iso_c_binding
75 : implicit none
76 : integer(kind=C_intptr_T) :: handle
77 : integer(kind=C_INT) :: istat
78 : end function cublas_create_c
79 : end interface
80 :
81 : interface
82 : function cublas_destroy_c(handle) result(istat) &
83 : bind(C, name="cublasDestroyFromC")
84 : use iso_c_binding
85 : implicit none
86 : integer(kind=C_intptr_T) :: handle
87 : integer(kind=C_INT) :: istat
88 : end function cublas_destroy_c
89 : end interface
90 :
91 : interface
92 : function cuda_threadsynchronize_c() result(istat) &
93 : bind(C,name="cudaThreadSynchronizeFromC")
94 : use iso_c_binding
95 : implicit none
96 : integer(kind=C_INT) :: istat
97 : end function cuda_threadsynchronize_c
98 : end interface
99 :
100 : interface
101 : function cuda_setdevice_c(n) result(istat) &
102 : bind(C, name="cudaSetDeviceFromC")
103 :
104 : use iso_c_binding
105 : implicit none
106 : integer(kind=C_INT), value :: n
107 : integer(kind=C_INT) :: istat
108 : end function cuda_setdevice_c
109 : end interface
110 :
111 : interface
112 : function cuda_getdevicecount_c(n) result(istat) &
113 : bind(C, name="cudaGetDeviceCountFromC")
114 : use iso_c_binding
115 : implicit none
116 : integer(kind=C_INT), intent(out) :: n
117 : integer(kind=C_INT) :: istat
118 : end function cuda_getdevicecount_c
119 : end interface
120 :
121 : interface
122 : function cuda_devicesynchronize_c()result(istat) &
123 : bind(C,name='cudaDeviceSynchronizeFromC')
124 :
125 : use iso_c_binding
126 :
127 : implicit none
128 : integer(kind=C_INT) :: istat
129 :
130 : end function cuda_devicesynchronize_c
131 : end interface
132 :
133 :
134 : ! functions to copy CUDA memory
135 : interface
136 : function cuda_memcpyDeviceToDevice_c() result(flag) &
137 : bind(C, name="cudaMemcpyDeviceToDeviceFromC")
138 : use iso_c_binding
139 : implicit none
140 : integer(kind=c_int) :: flag
141 : end function
142 : end interface
143 :
144 : interface
145 : function cuda_memcpyHostToDevice_c() result(flag) &
146 : bind(C, name="cudaMemcpyHostToDeviceFromC")
147 : use iso_c_binding
148 : implicit none
149 : integer(kind=c_int) :: flag
150 : end function
151 : end interface
152 :
153 : interface
154 : function cuda_memcpyDeviceToHost_c() result(flag) &
155 : bind(C, name="cudaMemcpyDeviceToHostFromC")
156 : use iso_c_binding
157 : implicit none
158 : integer(kind=c_int) :: flag
159 : end function
160 : end interface
161 :
162 : interface
163 : function cuda_hostRegisterPortable_c() result(flag) &
164 : bind(C, name="cudaHostRegisterPortableFromC")
165 : use iso_c_binding
166 : implicit none
167 : integer(kind=c_int) :: flag
168 : end function
169 : end interface
170 :
171 : interface
172 : function cuda_hostRegisterMapped_c() result(flag) &
173 : bind(C, name="cudaHostRegisterMappedFromC")
174 : use iso_c_binding
175 : implicit none
176 : integer(kind=c_int) :: flag
177 : end function
178 : end interface
179 :
180 : interface
181 : function cuda_memcpy_c(dst, src, size, dir) result(istat) &
182 : bind(C, name="cudaMemcpyFromC")
183 :
184 : use iso_c_binding
185 :
186 : implicit none
187 : integer(kind=C_intptr_t), value :: dst
188 : integer(kind=C_intptr_t), value :: src
189 : integer(kind=c_intptr_t), intent(in), value :: size
190 : integer(kind=C_INT), intent(in), value :: dir
191 : integer(kind=C_INT) :: istat
192 :
193 : end function cuda_memcpy_c
194 : end interface
195 :
196 : interface
197 : function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
198 : bind(C, name="cudaMemcpy2dFromC")
199 :
200 : use iso_c_binding
201 :
202 : implicit none
203 :
204 : integer(kind=C_intptr_T), value :: dst
205 : integer(kind=c_intptr_t), intent(in), value :: dpitch
206 : integer(kind=C_intptr_T), value :: src
207 : integer(kind=c_intptr_t), intent(in), value :: spitch
208 : integer(kind=c_intptr_t), intent(in), value :: width
209 : integer(kind=c_intptr_t), intent(in), value :: height
210 : integer(kind=C_INT), intent(in), value :: dir
211 : integer(kind=C_INT) :: istat
212 :
213 : end function cuda_memcpy2d_c
214 : end interface
215 :
216 : ! functions to allocate and free CUDA memory
217 :
218 : interface
219 : function cuda_free_c(a) result(istat) &
220 : bind(C, name="cudaFreeFromC")
221 :
222 : use iso_c_binding
223 :
224 : implicit none
225 : integer(kind=C_intptr_T), value :: a
226 : integer(kind=C_INT) :: istat
227 :
228 : end function cuda_free_c
229 : end interface
230 :
231 : interface
232 : function cuda_malloc_c(a, width_height) result(istat) &
233 : bind(C, name="cudaMallocFromC")
234 :
235 : use iso_c_binding
236 : implicit none
237 :
238 : integer(kind=C_intptr_T) :: a
239 : integer(kind=c_intptr_t), intent(in), value :: width_height
240 : integer(kind=C_INT) :: istat
241 :
242 : end function cuda_malloc_c
243 : end interface
244 :
245 : interface
246 : function cuda_memset_c(a, val, size) result(istat) &
247 : bind(C, name="cudaMemsetFromC")
248 :
249 : use iso_c_binding
250 :
251 : implicit none
252 :
253 : integer(kind=C_intptr_T), value :: a
254 : integer(kind=C_INT), value :: val
255 : integer(kind=c_intptr_t), intent(in), value :: size
256 : integer(kind=C_INT) :: istat
257 :
258 : end function cuda_memset_c
259 : end interface
260 :
261 : ! cuBLAS
262 : interface
263 : subroutine cublas_dgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
264 : bind(C,name='cublasDgemm_elpa_wrapper')
265 :
266 : use iso_c_binding
267 :
268 : implicit none
269 : character(1,C_CHAR),value :: cta, ctb
270 : integer(kind=C_INT),value :: m,n,k
271 : integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
272 : real(kind=C_DOUBLE),value :: alpha,beta
273 : integer(kind=C_intptr_T), value :: a, b, c
274 : integer(kind=C_intptr_T), value :: handle
275 :
276 : end subroutine cublas_dgemm_c
277 : end interface
278 :
279 : interface
280 : subroutine cublas_sgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
281 : bind(C,name='cublasSgemm_elpa_wrapper')
282 :
283 : use iso_c_binding
284 :
285 : implicit none
286 : character(1,C_CHAR),value :: cta, ctb
287 : integer(kind=C_INT),value :: m,n,k
288 : integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
289 : real(kind=C_FLOAT),value :: alpha,beta
290 : integer(kind=C_intptr_T), value :: a, b, c
291 : integer(kind=C_intptr_T), value :: handle
292 :
293 : end subroutine cublas_sgemm_c
294 : end interface
295 :
296 : interface
297 : subroutine cublas_dtrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
298 : bind(C,name='cublasDtrmm_elpa_wrapper')
299 :
300 : use iso_c_binding
301 :
302 : implicit none
303 : character(1,C_CHAR),value :: side, uplo, trans, diag
304 : integer(kind=C_INT),value :: m,n
305 : integer(kind=C_INT), intent(in), value :: lda,ldb
306 : real(kind=C_DOUBLE), value :: alpha
307 : integer(kind=C_intptr_T), value :: a, b
308 : integer(kind=C_intptr_T), value :: handle
309 :
310 : end subroutine cublas_dtrmm_c
311 : end interface
312 :
313 : interface
314 : subroutine cublas_strmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
315 : bind(C,name='cublasStrmm_elpa_wrapper')
316 :
317 : use iso_c_binding
318 :
319 : implicit none
320 : character(1,C_CHAR),value :: side, uplo, trans, diag
321 : integer(kind=C_INT),value :: m,n
322 : integer(kind=C_INT), intent(in), value :: lda,ldb
323 : real(kind=C_FLOAT), value :: alpha
324 : integer(kind=C_intptr_T), value :: a, b
325 : integer(kind=C_intptr_T), value :: handle
326 :
327 : end subroutine cublas_strmm_c
328 : end interface
329 :
330 : interface
331 : subroutine cublas_zgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
332 : bind(C,name='cublasZgemm_elpa_wrapper')
333 :
334 : use iso_c_binding
335 :
336 : implicit none
337 : character(1,C_CHAR),value :: cta, ctb
338 : integer(kind=C_INT),value :: m,n,k
339 : integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
340 : complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta
341 : integer(kind=C_intptr_T), value :: a, b, c
342 : integer(kind=C_intptr_T), value :: handle
343 :
344 : end subroutine cublas_zgemm_c
345 : end interface
346 :
347 : interface
348 : subroutine cublas_cgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
349 : bind(C,name='cublasCgemm_elpa_wrapper')
350 :
351 : use iso_c_binding
352 :
353 : implicit none
354 : character(1,C_CHAR),value :: cta, ctb
355 : integer(kind=C_INT),value :: m,n,k
356 : integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
357 : complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta
358 : integer(kind=C_intptr_T), value :: a, b, c
359 : integer(kind=C_intptr_T), value :: handle
360 :
361 : end subroutine cublas_cgemm_c
362 : end interface
363 :
364 : interface
365 : subroutine cublas_ztrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
366 : bind(C,name='cublasZtrmm_elpa_wrapper')
367 :
368 : use iso_c_binding
369 :
370 : implicit none
371 : character(1,C_CHAR),value :: side, uplo, trans, diag
372 : integer(kind=C_INT),value :: m,n
373 : integer(kind=C_INT), intent(in), value :: lda,ldb
374 : complex(kind=C_DOUBLE_COMPLEX), value :: alpha
375 : integer(kind=C_intptr_T), value :: a, b
376 : integer(kind=C_intptr_T), value :: handle
377 :
378 : end subroutine cublas_ztrmm_c
379 : end interface
380 :
381 : interface
382 : subroutine cublas_ctrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
383 : bind(C,name='cublasCtrmm_elpa_wrapper')
384 :
385 : use iso_c_binding
386 :
387 : implicit none
388 : character(1,C_CHAR),value :: side, uplo, trans, diag
389 : integer(kind=C_INT),value :: m,n
390 : integer(kind=C_INT), intent(in), value :: lda,ldb
391 : complex(kind=C_FLOAT_COMPLEX), value :: alpha
392 : integer(kind=C_intptr_T), value :: a, b
393 : integer(kind=C_intptr_T), value :: handle
394 :
395 : end subroutine cublas_ctrmm_c
396 : end interface
397 :
398 : interface
399 : subroutine cublas_dgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
400 : bind(C,name='cublasDgemv_elpa_wrapper')
401 :
402 : use iso_c_binding
403 :
404 : implicit none
405 : character(1,C_CHAR),value :: cta
406 : integer(kind=C_INT),value :: m,n
407 : integer(kind=C_INT), intent(in), value :: lda,incx,incy
408 : real(kind=C_DOUBLE),value :: alpha,beta
409 : integer(kind=C_intptr_T), value :: a, x, y
410 : integer(kind=C_intptr_T), value :: handle
411 :
412 : end subroutine cublas_dgemv_c
413 : end interface
414 :
415 : interface
416 : subroutine cublas_sgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
417 : bind(C,name='cublasSgemv_elpa_wrapper')
418 :
419 : use iso_c_binding
420 :
421 : implicit none
422 : character(1,C_CHAR),value :: cta
423 : integer(kind=C_INT),value :: m,n
424 : integer(kind=C_INT), intent(in), value :: lda,incx,incy
425 : real(kind=C_FLOAT),value :: alpha,beta
426 : integer(kind=C_intptr_T), value :: a, x, y
427 : integer(kind=C_intptr_T), value :: handle
428 :
429 : end subroutine cublas_sgemv_c
430 : end interface
431 :
432 : interface
433 : subroutine cublas_zgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
434 : bind(C,name='cublasZgemv_elpa_wrapper')
435 :
436 : use iso_c_binding
437 :
438 : implicit none
439 : character(1,C_CHAR),value :: cta
440 : integer(kind=C_INT),value :: m,n
441 : integer(kind=C_INT), intent(in), value :: lda,incx,incy
442 : complex(kind=C_DOUBLE_COMPLEX),value :: alpha,beta
443 : integer(kind=C_intptr_T), value :: a, x, y
444 : integer(kind=C_intptr_T), value :: handle
445 :
446 : end subroutine cublas_zgemv_c
447 : end interface
448 :
449 : interface
450 : subroutine cublas_cgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
451 : bind(C,name='cublasCgemv_elpa_wrapper')
452 :
453 : use iso_c_binding
454 :
455 : implicit none
456 : character(1,C_CHAR),value :: cta
457 : integer(kind=C_INT),value :: m,n
458 : integer(kind=C_INT), intent(in), value :: lda,incx,incy
459 : complex(kind=C_FLOAT_COMPLEX),value :: alpha,beta
460 : integer(kind=C_intptr_T), value :: a, x, y
461 : integer(kind=C_intptr_T), value :: handle
462 :
463 : end subroutine cublas_cgemv_c
464 : end interface
465 :
466 :
467 : contains
468 :
469 : ! functions to set and query the CUDA devices
470 :
471 0 : function cublas_create(handle) result(success)
472 : use iso_c_binding
473 : implicit none
474 :
475 : integer(kind=C_intptr_t) :: handle
476 : logical :: success
477 : #ifdef WITH_GPU_VERSION
478 : success = cublas_create_c(handle) /= 0
479 : #else
480 0 : success = .true.
481 : #endif
482 0 : end function
483 :
484 0 : function cublas_destroy(handle) result(success)
485 : use iso_c_binding
486 : implicit none
487 :
488 : integer(kind=C_intptr_t) :: handle
489 : logical :: success
490 : #ifdef WITH_GPU_VERSION
491 : success = cublas_destroy_c(handle) /= 0
492 : #else
493 0 : success = .true.
494 : #endif
495 0 : end function
496 :
497 0 : function cuda_threadsynchronize() result(success)
498 : use iso_c_binding
499 :
500 : implicit none
501 :
502 : logical :: success
503 : #ifdef WITH_GPU_VERSION
504 : success = cuda_threadsynchronize_c() /= 0
505 : #else
506 0 : success = .true.
507 : #endif
508 0 : end function cuda_threadsynchronize
509 :
510 0 : function cuda_setdevice(n) result(success)
511 : use iso_c_binding
512 :
513 : implicit none
514 :
515 : integer(kind=ik), intent(in) :: n
516 : logical :: success
517 : #ifdef WITH_GPU_VERSION
518 : success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
519 : #else
520 0 : success = .true.
521 : #endif
522 0 : end function cuda_setdevice
523 :
524 0 : function cuda_getdevicecount(n) result(success)
525 : use iso_c_binding
526 : implicit none
527 :
528 : integer(kind=ik) :: n
529 : integer(kind=c_int) :: nCasted
530 : logical :: success
531 : #ifdef WITH_GPU_VERSION
532 : success = cuda_getdevicecount_c(nCasted) /=0
533 : n = int(nCasted)
534 : #else
535 0 : success = .true.
536 0 : n = 0
537 : #endif
538 0 : end function cuda_getdevicecount
539 :
540 0 : function cuda_devicesynchronize()result(success)
541 :
542 : use iso_c_binding
543 :
544 : implicit none
545 : logical :: success
546 : #ifdef WITH_GPU_VERSION
547 : success = cuda_devicesynchronize_c() /=0
548 : #else
549 0 : success = .true.
550 : #endif
551 0 : end function cuda_devicesynchronize
552 : ! functions to allocate and free memory
553 :
554 0 : function cuda_malloc(a, width_height) result(success)
555 :
556 : use iso_c_binding
557 : implicit none
558 :
559 : integer(kind=C_intptr_t) :: a
560 : integer(kind=c_intptr_t), intent(in) :: width_height
561 : logical :: success
562 : #ifdef WITH_GPU_VERSION
563 : success = cuda_malloc_c(a, width_height) /= 0
564 : #else
565 0 : success = .true.
566 : #endif
567 0 : end function
568 :
569 0 : function cuda_free(a) result(success)
570 :
571 : use iso_c_binding
572 :
573 : implicit none
574 : integer(kind=C_intptr_T) :: a
575 : logical :: success
576 : #ifdef WITH_GPU_VERSION
577 : success = cuda_free_c(a) /= 0
578 : #else
579 0 : success = .true.
580 : #endif
581 0 : end function cuda_free
582 :
583 0 : function cuda_memset(a, val, size) result(success)
584 :
585 : use iso_c_binding
586 :
587 : implicit none
588 :
589 : integer(kind=c_intptr_t) :: a
590 : integer(kind=ik) :: val
591 : integer(kind=c_intptr_t), intent(in) :: size
592 : integer(kind=C_INT) :: istat
593 :
594 : logical :: success
595 : #ifdef WITH_GPU_VERSION
596 : success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0
597 : #else
598 0 : success = .true.
599 : #endif
600 0 : end function cuda_memset
601 :
602 : ! functions to memcopy CUDA memory
603 :
604 0 : function cuda_memcpyDeviceToDevice() result(flag)
605 : use iso_c_binding
606 : implicit none
607 : integer(kind=ik) :: flag
608 : #ifdef WITH_GPU_VERSION
609 : flag = int(cuda_memcpyDeviceToDevice_c())
610 : #else
611 0 : flag = 0
612 : #endif
613 0 : end function
614 :
615 0 : function cuda_memcpyHostToDevice() result(flag)
616 : use iso_c_binding
617 : use precision
618 : implicit none
619 : integer(kind=ik) :: flag
620 : #ifdef WITH_GPU_VERSION
621 : flag = int(cuda_memcpyHostToDevice_c())
622 : #else
623 0 : flag = 0
624 : #endif
625 0 : end function
626 :
627 0 : function cuda_memcpyDeviceToHost() result(flag)
628 : use iso_c_binding
629 : use precision
630 : implicit none
631 : integer(kind=ik) :: flag
632 : #ifdef WITH_GPU_VERSION
633 : flag = int( cuda_memcpyDeviceToHost_c())
634 : #else
635 0 : flag = 0
636 : #endif
637 0 : end function
638 :
639 0 : function cuda_hostRegisterPortable() result(flag)
640 : use iso_c_binding
641 : use precision
642 : implicit none
643 : integer(kind=ik) :: flag
644 : #ifdef WITH_GPU_VERSION
645 : flag = int(cuda_hostRegisterPortable_c())
646 : #else
647 0 : flag = 0
648 : #endif
649 0 : end function
650 :
651 0 : function cuda_hostRegisterMapped() result(flag)
652 : use iso_c_binding
653 : use precision
654 : implicit none
655 : integer(kind=ik) :: flag
656 : #ifdef WITH_GPU_VERSION
657 : flag = int(cuda_hostRegisterMapped_c())
658 : #else
659 0 : flag = 0
660 : #endif
661 0 : end function
662 :
663 0 : function cuda_memcpy(dst, src, size, dir) result(success)
664 :
665 : use iso_c_binding
666 :
667 : implicit none
668 : integer(kind=C_intptr_t) :: dst
669 : integer(kind=C_intptr_t) :: src
670 : integer(kind=c_intptr_t), intent(in) :: size
671 : integer(kind=C_INT), intent(in) :: dir
672 : logical :: success
673 :
674 : #ifdef WITH_GPU_VERSION
675 : success = cuda_memcpy_c(dst, src, size, dir) /= 0
676 : #else
677 0 : success = .true.
678 : #endif
679 0 : end function
680 :
681 0 : function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)
682 :
683 : use iso_c_binding
684 :
685 : implicit none
686 :
687 : integer(kind=C_intptr_T) :: dst
688 : integer(kind=c_intptr_t), intent(in) :: dpitch
689 : integer(kind=C_intptr_T) :: src
690 : integer(kind=c_intptr_t), intent(in) :: spitch
691 : integer(kind=c_intptr_t), intent(in) :: width
692 : integer(kind=c_intptr_t), intent(in) :: height
693 : integer(kind=C_INT), intent(in) :: dir
694 : logical :: success
695 : #ifdef WITH_GPU_VERSION
696 : success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
697 : #else
698 0 : success = .true.
699 : #endif
700 0 : end function cuda_memcpy2d
701 :
702 : ! cuBLAS
703 0 : subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
704 : use iso_c_binding
705 :
706 : implicit none
707 : character(1,C_CHAR),value :: cta, ctb
708 : integer(kind=C_INT) :: m,n,k
709 : integer(kind=C_INT), intent(in) :: lda,ldb,ldc
710 : real(kind=C_DOUBLE) :: alpha,beta
711 : integer(kind=C_intptr_T) :: a, b, c
712 : #ifdef WITH_GPU_VERSION
713 : call cublas_dgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
714 : #endif
715 0 : end subroutine cublas_dgemm
716 :
717 0 : subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
718 : use iso_c_binding
719 :
720 : implicit none
721 : character(1,C_CHAR),value :: cta, ctb
722 : integer(kind=C_INT) :: m,n,k
723 : integer(kind=C_INT), intent(in) :: lda,ldb,ldc
724 : real(kind=C_FLOAT) :: alpha,beta
725 : integer(kind=C_intptr_T) :: a, b, c
726 : #ifdef WITH_GPU_VERSION
727 : call cublas_sgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
728 : #endif
729 0 : end subroutine cublas_sgemm
730 :
731 0 : subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
732 :
733 : use iso_c_binding
734 :
735 : implicit none
736 : character(1,C_CHAR),value :: side, uplo, trans, diag
737 : integer(kind=C_INT) :: m,n
738 : integer(kind=C_INT), intent(in) :: lda,ldb
739 : real(kind=C_DOUBLE) :: alpha
740 : integer(kind=C_intptr_T) :: a, b
741 : #ifdef WITH_GPU_VERSION
742 : call cublas_dtrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
743 : #endif
744 0 : end subroutine cublas_dtrmm
745 :
746 0 : subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
747 :
748 : use iso_c_binding
749 :
750 : implicit none
751 : character(1,C_CHAR),value :: side, uplo, trans, diag
752 : integer(kind=C_INT) :: m,n
753 : integer(kind=C_INT), intent(in) :: lda,ldb
754 : real(kind=C_FLOAT) :: alpha
755 : integer(kind=C_intptr_T) :: a, b
756 : #ifdef WITH_GPU_VERSION
757 : call cublas_strmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
758 : #endif
759 0 : end subroutine cublas_strmm
760 :
761 0 : subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
762 :
763 : use iso_c_binding
764 :
765 : implicit none
766 : character(1,C_CHAR),value :: cta, ctb
767 : integer(kind=C_INT) :: m,n,k
768 : integer(kind=C_INT), intent(in) :: lda,ldb,ldc
769 : complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta
770 : integer(kind=C_intptr_T) :: a, b, c
771 : #ifdef WITH_GPU_VERSION
772 : call cublas_zgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
773 : #endif
774 0 : end subroutine cublas_zgemm
775 :
776 0 : subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
777 :
778 : use iso_c_binding
779 :
780 : implicit none
781 : character(1,C_CHAR),value :: cta, ctb
782 : integer(kind=C_INT) :: m,n,k
783 : integer(kind=C_INT), intent(in) :: lda,ldb,ldc
784 : complex(kind=C_FLOAT_COMPLEX) :: alpha,beta
785 : integer(kind=C_intptr_T) :: a, b, c
786 : #ifdef WITH_GPU_VERSION
787 : call cublas_cgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
788 : #endif
789 0 : end subroutine cublas_cgemm
790 :
791 0 : subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
792 :
793 : use iso_c_binding
794 :
795 : implicit none
796 : character(1,C_CHAR),value :: side, uplo, trans, diag
797 : integer(kind=C_INT) :: m,n
798 : integer(kind=C_INT), intent(in) :: lda,ldb
799 : complex(kind=C_DOUBLE_COMPLEX) :: alpha
800 : integer(kind=C_intptr_T) :: a, b
801 : #ifdef WITH_GPU_VERSION
802 : call cublas_ztrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
803 : #endif
804 0 : end subroutine cublas_ztrmm
805 :
806 0 : subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
807 :
808 : use iso_c_binding
809 :
810 : implicit none
811 : character(1,C_CHAR),value :: side, uplo, trans, diag
812 : integer(kind=C_INT) :: m,n
813 : integer(kind=C_INT), intent(in) :: lda,ldb
814 : complex(kind=C_FLOAT_COMPLEX) :: alpha
815 : integer(kind=C_intptr_T) :: a, b
816 : #ifdef WITH_GPU_VERSION
817 : call cublas_ctrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
818 : #endif
819 0 : end subroutine cublas_ctrmm
820 :
821 0 : subroutine cublas_dgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
822 : use iso_c_binding
823 :
824 : implicit none
825 : character(1,C_CHAR),value :: cta
826 : integer(kind=C_INT) :: m,n
827 : integer(kind=C_INT), intent(in) :: lda,incx,incy
828 : real(kind=C_DOUBLE) :: alpha,beta
829 : integer(kind=C_intptr_T) :: a, x, y
830 : #ifdef WITH_GPU_VERSION
831 : call cublas_dgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
832 : #endif
833 0 : end subroutine cublas_dgemv
834 :
835 0 : subroutine cublas_sgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
836 : use iso_c_binding
837 :
838 : implicit none
839 : character(1,C_CHAR),value :: cta
840 : integer(kind=C_INT) :: m,n
841 : integer(kind=C_INT), intent(in) :: lda,incx,incy
842 : real(kind=C_FLOAT) :: alpha,beta
843 : integer(kind=C_intptr_T) :: a, x, y
844 : #ifdef WITH_GPU_VERSION
845 : call cublas_sgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
846 : #endif
847 0 : end subroutine cublas_sgemv
848 :
849 0 : subroutine cublas_zgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
850 : use iso_c_binding
851 :
852 : implicit none
853 : character(1,C_CHAR),value :: cta
854 : integer(kind=C_INT) :: m,n
855 : integer(kind=C_INT), intent(in) :: lda,incx,incy
856 : complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta
857 : integer(kind=C_intptr_T) :: a, x, y
858 : #ifdef WITH_GPU_VERSION
859 : call cublas_zgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
860 : #endif
861 0 : end subroutine cublas_zgemv
862 :
863 0 : subroutine cublas_cgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
864 : use iso_c_binding
865 :
866 : implicit none
867 : character(1,C_CHAR),value :: cta
868 : integer(kind=C_INT) :: m,n
869 : integer(kind=C_INT), intent(in) :: lda,incx,incy
870 : complex(kind=C_FLOAT_COMPLEX) :: alpha,beta
871 : integer(kind=C_intptr_T) :: a, x, y
872 : #ifdef WITH_GPU_VERSION
873 : call cublas_cgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
874 : #endif
875 0 : end subroutine cublas_cgemv
876 :
877 :
878 : ! subroutine cublas_dsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
879 : ! use iso_c_binding
880 : !
881 : ! implicit none
882 : ! character(1,C_CHAR),value :: cta
883 : ! integer(kind=C_INT) :: n
884 : ! integer(kind=C_INT), intent(in) :: lda,incx,incy
885 : ! real(kind=C_DOUBLE) :: alpha,beta
886 : ! integer(kind=C_intptr_T) :: a, x, y
887 : ! #ifdef WITH_GPU_VERSION
888 : ! call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
889 : ! #endif
890 : ! end subroutine cublas_dsymv
891 : !
892 : ! subroutine cublas_ssymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
893 : ! use iso_c_binding
894 : !
895 : ! implicit none
896 : ! character(1,C_CHAR),value :: cta
897 : ! integer(kind=C_INT) :: n
898 : ! integer(kind=C_INT), intent(in) :: lda,incx,incy
899 : ! real(kind=C_FLOAT) :: alpha,beta
900 : ! integer(kind=C_intptr_T) :: a, x, y
901 : ! #ifdef WITH_GPU_VERSION
902 : ! call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
903 : ! #endif
904 : ! end subroutine cublas_ssymv
905 : !
906 : ! subroutine cublas_zsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
907 : ! use iso_c_binding
908 : !
909 : ! implicit none
910 : ! character(1,C_CHAR),value :: cta
911 : ! integer(kind=C_INT) :: n
912 : ! integer(kind=C_INT), intent(in) :: lda,incx,incy
913 : ! complex(kind=C_DOUBLE_COMPLEX) :: alpha,beta
914 : ! integer(kind=C_intptr_T) :: a, x, y
915 : ! #ifdef WITH_GPU_VERSION
916 : ! ! call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
917 : ! #endif
918 : ! end subroutine cublas_zsymv
919 : !
920 : ! subroutine cublas_csymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
921 : ! use iso_c_binding
922 : !
923 : ! implicit none
924 : ! character(1,C_CHAR),value :: cta
925 : ! integer(kind=C_INT) :: n
926 : ! integer(kind=C_INT), intent(in) :: lda,incx,incy
927 : ! complex(kind=C_FLOAT_COMPLEX) :: alpha,beta
928 : ! integer(kind=C_intptr_T) :: a, x, y
929 : ! #ifdef WITH_GPU_VERSION
930 : ! ! call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
931 : ! #endif
932 : ! end subroutine cublas_csymv
933 :
934 :
935 : end module cuda_functions
|