LCOV - code coverage report
Current view: top level - src/GPU - mod_cuda.F90 (source / functions) Hit Total Coverage
Test: coverage_50ab7a7628bba174fc62cee3ab72b26e81f87fe5.info Lines: 0 73 0.0 %
Date: 2018-01-10 09:29:53 Functions: 0 28 0.0 %

          Line data    Source code
       1             : !    This file is part of ELPA.
       2             : !
       3             : !    The ELPA library was originally created by the ELPA consortium,
       4             : !    consisting of the following organizations:
       5             : !
       6             : !    - Rechenzentrum Garching der Max-Planck-Gesellschaft (RZG),
       7             : !    - Bergische Universität Wuppertal, Lehrstuhl für angewandte
       8             : !      Informatik,
       9             : !    - Technische Universität München, Lehrstuhl für Informatik mit
      10             : !      Schwerpunkt Wissenschaftliches Rechnen ,
      11             : !    - Fritz-Haber-Institut, Berlin, Abt. Theorie,
      12             : !    - Max-Plack-Institut für Mathematik in den Naturwissenschaften,
      13             : !      Leipzig, Abt. Komplexe Strukutren in Biologie und Kognition,
      14             : !      and
      15             : !    - IBM Deutschland GmbH
      16             : !
      17             : !
      18             : !    More information can be found here:
      19             : !    http://elpa.rzg.mpg.de/
      20             : !
      21             : !    ELPA is free software: you can redistribute it and/or modify
      22             : !    it under the terms of the version 3 of the license of the
      23             : !    GNU Lesser General Public License as published by the Free
      24             : !    Software Foundation.
      25             : !
      26             : !    ELPA is distributed in the hope that it will be useful,
      27             : !    but WITHOUT ANY WARRANTY; without even the implied warranty of
      28             : !    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      29             : !    GNU Lesser General Public License for more details.
      30             : !
      31             : !    You should have received a copy of the GNU Lesser General Public License
      32             : !    along with ELPA.  If not, see <http://www.gnu.org/licenses/>
      33             : !
      34             : !    ELPA reflects a substantial effort on the part of the original
      35             : !    ELPA consortium, and we ask you to respect the spirit of the
      36             : !    license that we chose: i.e., please contribute any changes you
      37             : !    may have back to the original ELPA library distribution, and keep
      38             : !    any derivatives of ELPA under the same license that we chose for
      39             : !    the original distribution, the GNU Lesser General Public License.
      40             : !
      41             : ! This file was written by A. Marek, MPCDF
      42             : 
      43             : 
      44             : #include "config-f90.h"
      45             : module cuda_functions
      46             :   use iso_c_binding
      47             :   use precision
      48             :   implicit none
      49             : 
      50             :   public
      51             : 
      52             :   integer(kind=ik) :: cudaMemcpyHostToDevice
      53             :   integer(kind=ik) :: cudaMemcpyDeviceToHost
      54             :   integer(kind=ik) :: cudaHostRegisterPortable
      55             :   integer(kind=ik) :: cudaHostRegisterMapped
      56             :   integer(kind=ik) :: cudaMemcpyDeviceToDevice
      57             : 
      58             :   integer(kind=C_intptr_T) :: cublasHandle
      59             : 
      60             :   integer(kind=c_intptr_t), parameter :: size_of_double_real    = 8_rk8
      61             : #ifdef WANT_SINGLE_PRECISION_REAL
      62             :   integer(kind=c_intptr_t), parameter :: size_of_single_real    = 4_rk4
      63             : #endif
      64             : 
      65             :   integer(kind=c_intptr_t), parameter :: size_of_double_complex = 16_ck8
      66             : #ifdef WANT_SINGLE_PRECISION_COMPLEX
      67             :   integer(kind=c_intptr_t), parameter :: size_of_single_complex = 8_ck4
      68             : #endif
      69             : 
      70             :   ! functions to set and query the CUDA devices
      71             :   interface
      72             :     function cublas_create_c(handle) result(istat) &
      73             :              bind(C, name="cublasCreateFromC")
      74             :       use iso_c_binding
      75             :       implicit none
      76             :       integer(kind=C_intptr_T) :: handle
      77             :       integer(kind=C_INT)  :: istat
      78             :     end function cublas_create_c
      79             :   end interface  
      80             : 
      81             :   interface
      82             :     function cublas_destroy_c(handle) result(istat) &
      83             :              bind(C, name="cublasDestroyFromC")
      84             :       use iso_c_binding
      85             :       implicit none
      86             :       integer(kind=C_intptr_T) :: handle
      87             :       integer(kind=C_INT)  :: istat
      88             :     end function cublas_destroy_c
      89             :   end interface  
      90             : 
      91             :   interface
      92             :     function cuda_threadsynchronize_c() result(istat) &
      93             :              bind(C,name="cudaThreadSynchronizeFromC")
      94             :       use iso_c_binding
      95             :       implicit none
      96             :       integer(kind=C_INT)  :: istat
      97             :     end function cuda_threadsynchronize_c
      98             :   end interface
      99             : 
     100             :   interface
     101             :     function cuda_setdevice_c(n) result(istat) &
     102             :              bind(C, name="cudaSetDeviceFromC")
     103             : 
     104             :       use iso_c_binding
     105             :       implicit none
     106             :       integer(kind=C_INT), value    :: n
     107             :       integer(kind=C_INT)           :: istat
     108             :     end function cuda_setdevice_c
     109             :   end interface
     110             : 
     111             :   interface
     112             :     function cuda_getdevicecount_c(n) result(istat) &
     113             :              bind(C, name="cudaGetDeviceCountFromC")
     114             :       use iso_c_binding
     115             :       implicit none
     116             :       integer(kind=C_INT), intent(out) :: n
     117             :       integer(kind=C_INT)              :: istat
     118             :     end function cuda_getdevicecount_c
     119             :   end interface
     120             : 
     121             :   interface
     122             :     function cuda_devicesynchronize_c()result(istat) &
     123             :              bind(C,name='cudaDeviceSynchronizeFromC')
     124             : 
     125             :       use iso_c_binding
     126             : 
     127             :       implicit none
     128             :       integer(kind=C_INT)                       :: istat
     129             : 
     130             :     end function cuda_devicesynchronize_c
     131             :   end interface
     132             : 
     133             : 
     134             :   ! functions to copy CUDA memory
     135             :   interface
     136             :     function cuda_memcpyDeviceToDevice_c() result(flag) &
     137             :              bind(C, name="cudaMemcpyDeviceToDeviceFromC")
     138             :       use iso_c_binding
     139             :       implicit none
     140             :       integer(kind=c_int) :: flag
     141             :     end function
     142             :   end interface
     143             : 
     144             :   interface
     145             :     function cuda_memcpyHostToDevice_c() result(flag) &
     146             :              bind(C, name="cudaMemcpyHostToDeviceFromC")
     147             :       use iso_c_binding
     148             :       implicit none
     149             :       integer(kind=c_int) :: flag
     150             :     end function
     151             :   end interface
     152             : 
     153             :   interface
     154             :     function cuda_memcpyDeviceToHost_c() result(flag) &
     155             :              bind(C, name="cudaMemcpyDeviceToHostFromC")
     156             :       use iso_c_binding
     157             :       implicit none
     158             :       integer(kind=c_int) :: flag
     159             :     end function
     160             :   end interface
     161             : 
     162             :   interface
     163             :     function cuda_hostRegisterPortable_c() result(flag) &
     164             :              bind(C, name="cudaHostRegisterPortableFromC")
     165             :       use iso_c_binding
     166             :       implicit none
     167             :       integer(kind=c_int) :: flag
     168             :     end function
     169             :   end interface
     170             : 
     171             :   interface
     172             :     function cuda_hostRegisterMapped_c() result(flag) &
     173             :              bind(C, name="cudaHostRegisterMappedFromC")
     174             :       use iso_c_binding
     175             :       implicit none
     176             :       integer(kind=c_int) :: flag
     177             :     end function
     178             :   end interface
     179             : 
     180             :   interface
     181             :     function cuda_memcpy_c(dst, src, size, dir) result(istat) &
     182             :              bind(C, name="cudaMemcpyFromC")
     183             : 
     184             :       use iso_c_binding
     185             : 
     186             :       implicit none
     187             :       integer(kind=C_intptr_t), value              :: dst
     188             :       integer(kind=C_intptr_t), value              :: src
     189             :       integer(kind=c_intptr_t), intent(in), value    :: size
     190             :       integer(kind=C_INT), intent(in), value       :: dir
     191             :       integer(kind=C_INT)                          :: istat
     192             : 
     193             :     end function cuda_memcpy_c
     194             :   end interface
     195             : 
     196             :   interface
     197             :     function cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) result(istat) &
     198             :              bind(C, name="cudaMemcpy2dFromC")
     199             : 
     200             :       use iso_c_binding
     201             : 
     202             :       implicit none
     203             : 
     204             :       integer(kind=C_intptr_T), value                :: dst
     205             :       integer(kind=c_intptr_t), intent(in), value    :: dpitch
     206             :       integer(kind=C_intptr_T), value                :: src
     207             :       integer(kind=c_intptr_t), intent(in), value    :: spitch
     208             :       integer(kind=c_intptr_t), intent(in), value    :: width
     209             :       integer(kind=c_intptr_t), intent(in), value    :: height
     210             :       integer(kind=C_INT), intent(in), value         :: dir
     211             :       integer(kind=C_INT)                            :: istat
     212             : 
     213             :     end function cuda_memcpy2d_c
     214             :   end interface
     215             : 
     216             :   ! functions to allocate and free CUDA memory
     217             : 
     218             :   interface
     219             :     function cuda_free_c(a) result(istat) &
     220             :              bind(C, name="cudaFreeFromC")
     221             : 
     222             :       use iso_c_binding
     223             : 
     224             :       implicit none
     225             :       integer(kind=C_intptr_T), value  :: a
     226             :       integer(kind=C_INT)              :: istat
     227             : 
     228             :     end function cuda_free_c
     229             :   end interface
     230             : 
     231             :   interface
     232             :     function cuda_malloc_c(a, width_height) result(istat) &
     233             :              bind(C, name="cudaMallocFromC")
     234             : 
     235             :       use iso_c_binding
     236             :       implicit none
     237             : 
     238             :       integer(kind=C_intptr_T)                    :: a
     239             :       integer(kind=c_intptr_t), intent(in), value   :: width_height
     240             :       integer(kind=C_INT)                         :: istat
     241             : 
     242             :     end function cuda_malloc_c
     243             :   end interface
     244             : 
     245             :   interface
     246             :     function cuda_memset_c(a, val, size) result(istat) &
     247             :              bind(C, name="cudaMemsetFromC")
     248             : 
     249             :       use iso_c_binding
     250             : 
     251             :       implicit none
     252             : 
     253             :       integer(kind=C_intptr_T), value            :: a
     254             :       integer(kind=C_INT), value                 :: val
     255             :       integer(kind=c_intptr_t), intent(in), value  :: size
     256             :       integer(kind=C_INT)                        :: istat
     257             : 
     258             :     end function cuda_memset_c
     259             :   end interface
     260             : 
     261             :   ! cuBLAS
     262             :   interface
     263             :     subroutine cublas_dgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
     264             :                               bind(C,name='cublasDgemm_elpa_wrapper')
     265             : 
     266             :       use iso_c_binding
     267             : 
     268             :       implicit none
     269             :       character(1,C_CHAR),value               :: cta, ctb
     270             :       integer(kind=C_INT),value               :: m,n,k
     271             :       integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
     272             :       real(kind=C_DOUBLE),value               :: alpha,beta
     273             :       integer(kind=C_intptr_T), value         :: a, b, c
     274             :       integer(kind=C_intptr_T), value         :: handle
     275             : 
     276             :     end subroutine cublas_dgemm_c
     277             :   end interface
     278             : 
     279             :   interface
     280             :     subroutine cublas_sgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc) &
     281             :                               bind(C,name='cublasSgemm_elpa_wrapper')
     282             : 
     283             :       use iso_c_binding
     284             : 
     285             :       implicit none
     286             :       character(1,C_CHAR),value               :: cta, ctb
     287             :       integer(kind=C_INT),value               :: m,n,k
     288             :       integer(kind=C_INT), intent(in), value  :: lda,ldb,ldc
     289             :       real(kind=C_FLOAT),value                :: alpha,beta
     290             :       integer(kind=C_intptr_T), value         :: a, b, c
     291             :       integer(kind=C_intptr_T), value         :: handle
     292             :       
     293             :     end subroutine cublas_sgemm_c
     294             :   end interface
     295             : 
     296             :   interface
     297             :     subroutine cublas_dtrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
     298             :                               bind(C,name='cublasDtrmm_elpa_wrapper')
     299             : 
     300             :       use iso_c_binding
     301             : 
     302             :       implicit none
     303             :       character(1,C_CHAR),value               :: side, uplo, trans, diag
     304             :       integer(kind=C_INT),value               :: m,n
     305             :       integer(kind=C_INT), intent(in), value  :: lda,ldb
     306             :       real(kind=C_DOUBLE), value              :: alpha
     307             :       integer(kind=C_intptr_T), value         :: a, b
     308             :       integer(kind=C_intptr_T), value         :: handle
     309             :       
     310             :     end subroutine cublas_dtrmm_c
     311             :   end interface
     312             : 
     313             :   interface
     314             :     subroutine cublas_strmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
     315             :                               bind(C,name='cublasStrmm_elpa_wrapper')
     316             : 
     317             :       use iso_c_binding
     318             : 
     319             :       implicit none
     320             :       character(1,C_CHAR),value               :: side, uplo, trans, diag
     321             :       integer(kind=C_INT),value               :: m,n
     322             :       integer(kind=C_INT), intent(in), value  :: lda,ldb
     323             :       real(kind=C_FLOAT), value               :: alpha
     324             :       integer(kind=C_intptr_T), value         :: a, b
     325             :       integer(kind=C_intptr_T), value         :: handle
     326             : 
     327             :     end subroutine cublas_strmm_c
     328             :   end interface
     329             : 
     330             :   interface
     331             :     subroutine cublas_zgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
     332             :                               bind(C,name='cublasZgemm_elpa_wrapper')
     333             : 
     334             :       use iso_c_binding
     335             : 
     336             :       implicit none
     337             :       character(1,C_CHAR),value              :: cta, ctb
     338             :       integer(kind=C_INT),value              :: m,n,k
     339             :       integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
     340             :       complex(kind=C_DOUBLE_COMPLEX),value           :: alpha,beta
     341             :       integer(kind=C_intptr_T), value        :: a, b, c
     342             :       integer(kind=C_intptr_T), value         :: handle
     343             : 
     344             :     end subroutine cublas_zgemm_c
     345             :   end interface
     346             : 
     347             :   interface
     348             :     subroutine cublas_cgemm_c(handle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc) &
     349             :                               bind(C,name='cublasCgemm_elpa_wrapper')
     350             : 
     351             :       use iso_c_binding
     352             : 
     353             :       implicit none
     354             :       character(1,C_CHAR),value              :: cta, ctb
     355             :       integer(kind=C_INT),value              :: m,n,k
     356             :       integer(kind=C_INT), intent(in), value :: lda,ldb,ldc
     357             :       complex(kind=C_FLOAT_COMPLEX),value            :: alpha,beta
     358             :       integer(kind=C_intptr_T), value        :: a, b, c
     359             :       integer(kind=C_intptr_T), value         :: handle
     360             : 
     361             :     end subroutine cublas_cgemm_c
     362             :   end interface
     363             : 
     364             :   interface
     365             :     subroutine cublas_ztrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
     366             :                               bind(C,name='cublasZtrmm_elpa_wrapper')
     367             : 
     368             :       use iso_c_binding
     369             : 
     370             :       implicit none
     371             :       character(1,C_CHAR),value              :: side, uplo, trans, diag
     372             :       integer(kind=C_INT),value              :: m,n
     373             :       integer(kind=C_INT), intent(in), value :: lda,ldb
     374             :       complex(kind=C_DOUBLE_COMPLEX), value          :: alpha
     375             :       integer(kind=C_intptr_T), value        :: a, b
     376             :       integer(kind=C_intptr_T), value         :: handle
     377             : 
     378             :     end subroutine cublas_ztrmm_c
     379             :   end interface
     380             : 
     381             :   interface
     382             :     subroutine cublas_ctrmm_c(handle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb) &
     383             :                               bind(C,name='cublasCtrmm_elpa_wrapper')
     384             : 
     385             :       use iso_c_binding
     386             : 
     387             :       implicit none
     388             :       character(1,C_CHAR),value              :: side, uplo, trans, diag
     389             :       integer(kind=C_INT),value              :: m,n
     390             :       integer(kind=C_INT), intent(in), value :: lda,ldb
     391             :       complex(kind=C_FLOAT_COMPLEX), value           :: alpha
     392             :       integer(kind=C_intptr_T), value        :: a, b
     393             :       integer(kind=C_intptr_T), value         :: handle
     394             : 
     395             :     end subroutine cublas_ctrmm_c
     396             :   end interface
     397             : 
     398             :   interface
     399             :     subroutine cublas_dgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
     400             :                               bind(C,name='cublasDgemv_elpa_wrapper')
     401             : 
     402             :       use iso_c_binding
     403             : 
     404             :       implicit none
     405             :       character(1,C_CHAR),value               :: cta
     406             :       integer(kind=C_INT),value               :: m,n
     407             :       integer(kind=C_INT), intent(in), value  :: lda,incx,incy
     408             :       real(kind=C_DOUBLE),value               :: alpha,beta
     409             :       integer(kind=C_intptr_T), value         :: a, x, y
     410             :       integer(kind=C_intptr_T), value         :: handle
     411             : 
     412             :     end subroutine cublas_dgemv_c
     413             :   end interface
     414             : 
     415             :   interface
     416             :     subroutine cublas_sgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
     417             :                               bind(C,name='cublasSgemv_elpa_wrapper')
     418             : 
     419             :       use iso_c_binding
     420             : 
     421             :       implicit none
     422             :       character(1,C_CHAR),value               :: cta
     423             :       integer(kind=C_INT),value               :: m,n
     424             :       integer(kind=C_INT), intent(in), value  :: lda,incx,incy
     425             :       real(kind=C_FLOAT),value                :: alpha,beta
     426             :       integer(kind=C_intptr_T), value         :: a, x, y
     427             :       integer(kind=C_intptr_T), value         :: handle
     428             : 
     429             :     end subroutine cublas_sgemv_c
     430             :   end interface
     431             : 
     432             :   interface
     433             :     subroutine cublas_zgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
     434             :                               bind(C,name='cublasZgemv_elpa_wrapper')
     435             : 
     436             :       use iso_c_binding
     437             : 
     438             :       implicit none
     439             :       character(1,C_CHAR),value               :: cta
     440             :       integer(kind=C_INT),value               :: m,n
     441             :       integer(kind=C_INT), intent(in), value  :: lda,incx,incy
     442             :       complex(kind=C_DOUBLE_COMPLEX),value               :: alpha,beta
     443             :       integer(kind=C_intptr_T), value         :: a, x, y
     444             :       integer(kind=C_intptr_T), value         :: handle
     445             : 
     446             :     end subroutine cublas_zgemv_c
     447             :   end interface
     448             : 
     449             :   interface
     450             :     subroutine cublas_cgemv_c(handle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy) &
     451             :                               bind(C,name='cublasCgemv_elpa_wrapper')
     452             : 
     453             :       use iso_c_binding
     454             : 
     455             :       implicit none
     456             :       character(1,C_CHAR),value               :: cta
     457             :       integer(kind=C_INT),value               :: m,n
     458             :       integer(kind=C_INT), intent(in), value  :: lda,incx,incy
     459             :       complex(kind=C_FLOAT_COMPLEX),value                :: alpha,beta
     460             :       integer(kind=C_intptr_T), value         :: a, x, y
     461             :       integer(kind=C_intptr_T), value         :: handle
     462             : 
     463             :     end subroutine cublas_cgemv_c
     464             :   end interface
     465             : 
     466             : 
     467             :   contains
     468             : 
     469             :     ! functions to set and query the CUDA devices
     470             : 
     471           0 :    function cublas_create(handle) result(success)
     472             :      use iso_c_binding
     473             :      implicit none
     474             : 
     475             :      integer(kind=C_intptr_t)                  :: handle
     476             :      logical                                   :: success
     477             : #ifdef WITH_GPU_VERSION
     478             :      success = cublas_create_c(handle) /= 0
     479             : #else
     480           0 :      success = .true.
     481             : #endif
     482           0 :    end function
     483             : 
     484           0 :    function cublas_destroy(handle) result(success)
     485             :      use iso_c_binding
     486             :      implicit none
     487             : 
     488             :      integer(kind=C_intptr_t)                  :: handle
     489             :      logical                                   :: success
     490             : #ifdef WITH_GPU_VERSION
     491             :      success = cublas_destroy_c(handle) /= 0
     492             : #else
     493           0 :      success = .true.
     494             : #endif
     495           0 :    end function
     496             :     
     497           0 :     function cuda_threadsynchronize() result(success)
     498             :       use iso_c_binding
     499             : 
     500             :       implicit none
     501             : 
     502             :       logical :: success
     503             : #ifdef WITH_GPU_VERSION
     504             :       success = cuda_threadsynchronize_c() /= 0
     505             : #else
     506           0 :       success = .true.
     507             : #endif
     508           0 :     end function cuda_threadsynchronize
     509             : 
     510           0 :     function cuda_setdevice(n) result(success)
     511             :       use iso_c_binding
     512             : 
     513             :       implicit none
     514             : 
     515             :       integer(kind=ik), intent(in)  :: n
     516             :       logical                       :: success
     517             : #ifdef WITH_GPU_VERSION
     518             :       success = cuda_setdevice_c(int(n,kind=c_int)) /= 0
     519             : #else
     520           0 :       success = .true.
     521             : #endif
     522           0 :     end function cuda_setdevice
     523             : 
     524           0 :     function cuda_getdevicecount(n) result(success)
     525             :       use iso_c_binding
     526             :       implicit none
     527             : 
     528             :       integer(kind=ik)     :: n
     529             :       integer(kind=c_int)  :: nCasted
     530             :       logical              :: success
     531             : #ifdef WITH_GPU_VERSION
     532             :       success = cuda_getdevicecount_c(nCasted) /=0
     533             :       n = int(nCasted)
     534             : #else
     535           0 :       success = .true.
     536           0 :       n = 0
     537             : #endif
     538           0 :     end function cuda_getdevicecount
     539             : 
     540           0 :     function cuda_devicesynchronize()result(success)
     541             : 
     542             :       use iso_c_binding
     543             : 
     544             :       implicit none
     545             :       logical :: success
     546             : #ifdef WITH_GPU_VERSION
     547             :       success = cuda_devicesynchronize_c() /=0
     548             : #else
     549           0 :       success = .true.
     550             : #endif
     551           0 :     end function cuda_devicesynchronize
     552             :     ! functions to allocate and free memory
     553             : 
     554           0 :     function cuda_malloc(a, width_height) result(success)
     555             : 
     556             :      use iso_c_binding
     557             :      implicit none
     558             : 
     559             :      integer(kind=C_intptr_t)                  :: a
     560             :      integer(kind=c_intptr_t), intent(in)        :: width_height
     561             :      logical                                   :: success
     562             : #ifdef WITH_GPU_VERSION
     563             :      success = cuda_malloc_c(a, width_height) /= 0
     564             : #else
     565           0 :      success = .true.
     566             : #endif
     567           0 :    end function
     568             : 
     569           0 :    function cuda_free(a) result(success)
     570             : 
     571             :      use iso_c_binding
     572             : 
     573             :      implicit none
     574             :      integer(kind=C_intptr_T) :: a
     575             :      logical                  :: success
     576             : #ifdef WITH_GPU_VERSION
     577             :      success = cuda_free_c(a) /= 0
     578             : #else
     579           0 :      success = .true.
     580             : #endif
     581           0 :    end function cuda_free
     582             : 
     583           0 :  function cuda_memset(a, val, size) result(success)
     584             : 
     585             :    use iso_c_binding
     586             : 
     587             :    implicit none
     588             : 
     589             :    integer(kind=c_intptr_t)                :: a
     590             :    integer(kind=ik)                        :: val
     591             :    integer(kind=c_intptr_t), intent(in)      :: size
     592             :    integer(kind=C_INT)                     :: istat
     593             : 
     594             :    logical :: success
     595             : #ifdef WITH_GPU_VERSION
     596             :    success= cuda_memset_c(a, int(val,kind=c_int), int(size,kind=c_intptr_t)) /=0
     597             : #else
     598           0 :    success = .true.
     599             : #endif
     600           0 :  end function cuda_memset
     601             : 
     602             :  ! functions to memcopy CUDA memory
     603             : 
     604           0 :  function cuda_memcpyDeviceToDevice() result(flag)
     605             :    use iso_c_binding
     606             :    implicit none
     607             :    integer(kind=ik) :: flag
     608             : #ifdef WITH_GPU_VERSION
     609             :    flag = int(cuda_memcpyDeviceToDevice_c())
     610             : #else
     611           0 :    flag = 0
     612             : #endif
     613           0 :  end function
     614             : 
     615           0 :  function cuda_memcpyHostToDevice() result(flag)
     616             :    use iso_c_binding
     617             :    use precision
     618             :    implicit none
     619             :    integer(kind=ik) :: flag
     620             : #ifdef WITH_GPU_VERSION
     621             :    flag = int(cuda_memcpyHostToDevice_c())
     622             : #else
     623           0 :    flag = 0
     624             : #endif
     625           0 :  end function
     626             : 
     627           0 :  function cuda_memcpyDeviceToHost() result(flag)
     628             :    use iso_c_binding
     629             :    use precision
     630             :    implicit none
     631             :    integer(kind=ik) :: flag
     632             : #ifdef WITH_GPU_VERSION
     633             :    flag = int( cuda_memcpyDeviceToHost_c())
     634             : #else
     635           0 :    flag = 0
     636             : #endif
     637           0 :  end function
     638             : 
     639           0 :  function cuda_hostRegisterPortable() result(flag)
     640             :    use iso_c_binding
     641             :    use precision
     642             :    implicit none
     643             :    integer(kind=ik) :: flag
     644             : #ifdef WITH_GPU_VERSION
     645             :    flag = int(cuda_hostRegisterPortable_c())
     646             : #else
     647           0 :    flag = 0
     648             : #endif
     649           0 :  end function
     650             : 
     651           0 :  function cuda_hostRegisterMapped() result(flag)
     652             :    use iso_c_binding
     653             :    use precision
     654             :    implicit none
     655             :    integer(kind=ik) :: flag
     656             : #ifdef WITH_GPU_VERSION
     657             :    flag = int(cuda_hostRegisterMapped_c())
     658             : #else
     659           0 :    flag = 0
     660             : #endif
     661           0 :  end function
     662             : 
     663           0 :  function cuda_memcpy(dst, src, size, dir) result(success)
     664             : 
     665             :       use iso_c_binding
     666             : 
     667             :       implicit none
     668             :       integer(kind=C_intptr_t)              :: dst
     669             :       integer(kind=C_intptr_t)              :: src
     670             :       integer(kind=c_intptr_t), intent(in)    :: size
     671             :       integer(kind=C_INT), intent(in)       :: dir
     672             :       logical :: success
     673             : 
     674             : #ifdef WITH_GPU_VERSION
     675             :         success = cuda_memcpy_c(dst, src, size, dir) /= 0
     676             : #else
     677           0 :         success = .true.
     678             : #endif
     679           0 :     end function
     680             : 
     681           0 :     function cuda_memcpy2d(dst, dpitch, src, spitch, width, height , dir) result(success)
     682             : 
     683             :       use iso_c_binding
     684             : 
     685             :       implicit none
     686             : 
     687             :       integer(kind=C_intptr_T)           :: dst
     688             :       integer(kind=c_intptr_t), intent(in) :: dpitch
     689             :       integer(kind=C_intptr_T)           :: src
     690             :       integer(kind=c_intptr_t), intent(in) :: spitch
     691             :       integer(kind=c_intptr_t), intent(in) :: width
     692             :       integer(kind=c_intptr_t), intent(in) :: height
     693             :       integer(kind=C_INT), intent(in)    :: dir
     694             :       logical                            :: success
     695             : #ifdef WITH_GPU_VERSION
     696             :       success = cuda_memcpy2d_c(dst, dpitch, src, spitch, width, height , dir) /= 0
     697             : #else
     698           0 :       success = .true.
     699             : #endif
     700           0 :     end function cuda_memcpy2d
     701             : 
     702             :     ! cuBLAS
     703           0 :     subroutine cublas_dgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
     704             :       use iso_c_binding
     705             : 
     706             :       implicit none
     707             :       character(1,C_CHAR),value       :: cta, ctb
     708             :       integer(kind=C_INT)             :: m,n,k
     709             :       integer(kind=C_INT), intent(in) :: lda,ldb,ldc
     710             :       real(kind=C_DOUBLE)             :: alpha,beta
     711             :       integer(kind=C_intptr_T)        :: a, b, c
     712             : #ifdef WITH_GPU_VERSION
     713             :       call cublas_dgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
     714             : #endif
     715           0 :     end subroutine cublas_dgemm
     716             : 
     717           0 :     subroutine cublas_sgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
     718             :       use iso_c_binding
     719             : 
     720             :       implicit none
     721             :       character(1,C_CHAR),value       :: cta, ctb
     722             :       integer(kind=C_INT)             :: m,n,k
     723             :       integer(kind=C_INT), intent(in) :: lda,ldb,ldc
     724             :       real(kind=C_FLOAT)              :: alpha,beta
     725             :       integer(kind=C_intptr_T)        :: a, b, c
     726             : #ifdef WITH_GPU_VERSION
     727             :       call cublas_sgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc)
     728             : #endif
     729           0 :     end subroutine cublas_sgemm
     730             : 
     731           0 :     subroutine cublas_dtrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     732             : 
     733             :       use iso_c_binding
     734             : 
     735             :       implicit none
     736             :       character(1,C_CHAR),value       :: side, uplo, trans, diag
     737             :       integer(kind=C_INT)             :: m,n
     738             :       integer(kind=C_INT), intent(in) :: lda,ldb
     739             :       real(kind=C_DOUBLE)             :: alpha
     740             :       integer(kind=C_intptr_T)        :: a, b
     741             : #ifdef WITH_GPU_VERSION
     742             :       call cublas_dtrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     743             : #endif
     744           0 :     end subroutine cublas_dtrmm
     745             : 
     746           0 :     subroutine cublas_strmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     747             : 
     748             :       use iso_c_binding
     749             : 
     750             :       implicit none
     751             :       character(1,C_CHAR),value       :: side, uplo, trans, diag
     752             :       integer(kind=C_INT)             :: m,n
     753             :       integer(kind=C_INT), intent(in) :: lda,ldb
     754             :       real(kind=C_FLOAT)              :: alpha
     755             :       integer(kind=C_intptr_T)        :: a, b
     756             : #ifdef WITH_GPU_VERSION
     757             :       call cublas_strmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     758             : #endif
     759           0 :     end subroutine cublas_strmm
     760             : 
     761           0 :     subroutine cublas_zgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
     762             : 
     763             :       use iso_c_binding
     764             : 
     765             :       implicit none
     766             :       character(1,C_CHAR),value       :: cta, ctb
     767             :       integer(kind=C_INT)             :: m,n,k
     768             :       integer(kind=C_INT), intent(in) :: lda,ldb,ldc
     769             :       complex(kind=C_DOUBLE_COMPLEX)          :: alpha,beta
     770             :       integer(kind=C_intptr_T)        :: a, b, c
     771             : #ifdef WITH_GPU_VERSION
     772             :       call cublas_zgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
     773             : #endif
     774           0 :     end subroutine cublas_zgemm
     775             : 
     776           0 :     subroutine cublas_cgemm(cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
     777             : 
     778             :       use iso_c_binding
     779             : 
     780             :       implicit none
     781             :       character(1,C_CHAR),value       :: cta, ctb
     782             :       integer(kind=C_INT)             :: m,n,k
     783             :       integer(kind=C_INT), intent(in) :: lda,ldb,ldc
     784             :       complex(kind=C_FLOAT_COMPLEX)           :: alpha,beta
     785             :       integer(kind=C_intptr_T)        :: a, b, c
     786             : #ifdef WITH_GPU_VERSION
     787             :       call cublas_cgemm_c(cublasHandle, cta, ctb, m, n, k, alpha, a, lda, b, ldb, beta, c,ldc)
     788             : #endif
     789           0 :     end subroutine cublas_cgemm
     790             : 
     791           0 :     subroutine cublas_ztrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     792             : 
     793             :       use iso_c_binding
     794             : 
     795             :       implicit none
     796             :       character(1,C_CHAR),value       :: side, uplo, trans, diag
     797             :       integer(kind=C_INT)             :: m,n
     798             :       integer(kind=C_INT), intent(in) :: lda,ldb
     799             :       complex(kind=C_DOUBLE_COMPLEX)          :: alpha
     800             :       integer(kind=C_intptr_T)        :: a, b
     801             : #ifdef WITH_GPU_VERSION
     802             :       call cublas_ztrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     803             : #endif
     804           0 :     end subroutine cublas_ztrmm
     805             : 
     806           0 :     subroutine cublas_ctrmm(side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     807             : 
     808             :       use iso_c_binding
     809             : 
     810             :       implicit none
     811             :       character(1,C_CHAR),value       :: side, uplo, trans, diag
     812             :       integer(kind=C_INT)             :: m,n
     813             :       integer(kind=C_INT), intent(in) :: lda,ldb
     814             :       complex(kind=C_FLOAT_COMPLEX)           :: alpha
     815             :       integer(kind=C_intptr_T)        :: a, b
     816             : #ifdef WITH_GPU_VERSION
     817             :       call cublas_ctrmm_c(cublasHandle, side, uplo, trans, diag, m, n, alpha, a, lda, b, ldb)
     818             : #endif
     819           0 :     end subroutine cublas_ctrmm
     820             : 
     821           0 :     subroutine cublas_dgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     822             :       use iso_c_binding
     823             : 
     824             :       implicit none
     825             :       character(1,C_CHAR),value       :: cta
     826             :       integer(kind=C_INT)             :: m,n
     827             :       integer(kind=C_INT), intent(in) :: lda,incx,incy
     828             :       real(kind=C_DOUBLE)             :: alpha,beta
     829             :       integer(kind=C_intptr_T)        :: a, x, y
     830             : #ifdef WITH_GPU_VERSION
     831             :       call cublas_dgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     832             : #endif
     833           0 :     end subroutine cublas_dgemv
     834             : 
     835           0 :     subroutine cublas_sgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     836             :       use iso_c_binding
     837             : 
     838             :       implicit none
     839             :       character(1,C_CHAR),value       :: cta
     840             :       integer(kind=C_INT)             :: m,n
     841             :       integer(kind=C_INT), intent(in) :: lda,incx,incy
     842             :       real(kind=C_FLOAT)              :: alpha,beta
     843             :       integer(kind=C_intptr_T)        :: a, x, y
     844             : #ifdef WITH_GPU_VERSION
     845             :       call cublas_sgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     846             : #endif
     847           0 :     end subroutine cublas_sgemv
     848             : 
     849           0 :     subroutine cublas_zgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     850             :       use iso_c_binding
     851             : 
     852             :       implicit none
     853             :       character(1,C_CHAR),value       :: cta
     854             :       integer(kind=C_INT)             :: m,n
     855             :       integer(kind=C_INT), intent(in) :: lda,incx,incy
     856             :       complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
     857             :       integer(kind=C_intptr_T)        :: a, x, y
     858             : #ifdef WITH_GPU_VERSION
     859             :       call cublas_zgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     860             : #endif
     861           0 :     end subroutine cublas_zgemv
     862             : 
     863           0 :     subroutine cublas_cgemv(cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     864             :       use iso_c_binding
     865             : 
     866             :       implicit none
     867             :       character(1,C_CHAR),value       :: cta
     868             :       integer(kind=C_INT)             :: m,n
     869             :       integer(kind=C_INT), intent(in) :: lda,incx,incy
     870             :       complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
     871             :       integer(kind=C_intptr_T)        :: a, x, y
     872             : #ifdef WITH_GPU_VERSION
     873             :       call cublas_cgemv_c(cublasHandle, cta, m, n, alpha, a, lda, x, incx, beta, y, incy)
     874             : #endif
     875           0 :     end subroutine cublas_cgemv
     876             : 
     877             : 
     878             : !     subroutine cublas_dsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     879             : !       use iso_c_binding
     880             : ! 
     881             : !       implicit none
     882             : !       character(1,C_CHAR),value       :: cta
     883             : !       integer(kind=C_INT)             :: n
     884             : !       integer(kind=C_INT), intent(in) :: lda,incx,incy
     885             : !       real(kind=C_DOUBLE)             :: alpha,beta
     886             : !       integer(kind=C_intptr_T)        :: a, x, y
     887             : ! #ifdef WITH_GPU_VERSION
     888             : !       call cublas_dsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     889             : ! #endif
     890             : !     end subroutine cublas_dsymv
     891             : ! 
     892             : !     subroutine cublas_ssymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     893             : !       use iso_c_binding
     894             : ! 
     895             : !       implicit none
     896             : !       character(1,C_CHAR),value       :: cta
     897             : !       integer(kind=C_INT)             :: n
     898             : !       integer(kind=C_INT), intent(in) :: lda,incx,incy
     899             : !       real(kind=C_FLOAT)              :: alpha,beta
     900             : !       integer(kind=C_intptr_T)        :: a, x, y
     901             : ! #ifdef WITH_GPU_VERSION
     902             : !       call cublas_ssymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     903             : ! #endif
     904             : !     end subroutine cublas_ssymv
     905             : ! 
     906             : !     subroutine cublas_zsymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     907             : !       use iso_c_binding
     908             : ! 
     909             : !       implicit none
     910             : !       character(1,C_CHAR),value       :: cta
     911             : !       integer(kind=C_INT)             :: n
     912             : !       integer(kind=C_INT), intent(in) :: lda,incx,incy
     913             : !       complex(kind=C_DOUBLE_COMPLEX)             :: alpha,beta
     914             : !       integer(kind=C_intptr_T)        :: a, x, y
     915             : ! #ifdef WITH_GPU_VERSION
     916             : ! !       call cublas_zsymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     917             : ! #endif
     918             : !     end subroutine cublas_zsymv
     919             : ! 
     920             : !     subroutine cublas_csymv(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     921             : !       use iso_c_binding
     922             : ! 
     923             : !       implicit none
     924             : !       character(1,C_CHAR),value       :: cta
     925             : !       integer(kind=C_INT)             :: n
     926             : !       integer(kind=C_INT), intent(in) :: lda,incx,incy
     927             : !       complex(kind=C_FLOAT_COMPLEX)              :: alpha,beta
     928             : !       integer(kind=C_intptr_T)        :: a, x, y
     929             : ! #ifdef WITH_GPU_VERSION
     930             : ! !       call cublas_csymv_c(cta, n, alpha, a, lda, x, incx, beta, y, incy)
     931             : ! #endif
     932             : !     end subroutine cublas_csymv
     933             : 
     934             : 
     935             : end module cuda_functions

Generated by: LCOV version 1.12