This source file includes following definitions.
- opal_cuda_add_initialization_function
- mca_cuda_convertor_init
- opal_cuda_check_bufs
- opal_cuda_check_one_buf
- opal_cuda_memcpy
- opal_cuda_memcpy_sync
- opal_cuda_memmove
- opal_cuda_support_init
- opal_cuda_set_copy_function_async
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 #include "opal_config.h"
  11 
  12 #include <errno.h>
  13 #include <string.h>
  14 #include <unistd.h>
  15 
  16 #include "opal/align.h"
  17 #include "opal/util/output.h"
  18 #include "opal/datatype/opal_convertor.h"
  19 #include "opal/datatype/opal_datatype_cuda.h"
  20 
  21 static bool initialized = false;
  22 int opal_cuda_verbose = 0;
  23 static int opal_cuda_enabled = 0; 
  24 static int opal_cuda_output = 0;
  25 static void opal_cuda_support_init(void);
  26 static int (*common_cuda_initialization_function)(opal_common_cuda_function_table_t *) = NULL;
  27 static opal_common_cuda_function_table_t ftable;
  28 
  29 
  30 
  31 
  32 
  33 
  34 void opal_cuda_add_initialization_function(int (*fptr)(opal_common_cuda_function_table_t *)) {
  35     common_cuda_initialization_function = fptr;
  36 }
  37 
  38 
  39 
  40 
  41 
  42 
  43 
  44 void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf)
  45 {
  46     
  47     if (!initialized) {
  48         opal_cuda_support_init();
  49     }
  50 
  51     
  52 
  53     convertor->cbmemcpy = (memcpy_fct_t)&opal_cuda_memcpy;
  54 
  55     
  56     if (!opal_cuda_enabled) {
  57         return;
  58     }
  59 
  60     if (ftable.gpu_is_gpu_buffer(pUserBuf, convertor)) {
  61         convertor->flags |= CONVERTOR_CUDA;
  62     }
  63 }
  64 
  65 
  66 
  67 
  68 
  69 
  70 bool opal_cuda_check_bufs(char *dest, char *src)
  71 {
  72     
  73     if (!initialized) {
  74         opal_cuda_support_init();
  75     }
  76 
  77     if (!opal_cuda_enabled) {
  78         return false;
  79     }
  80 
  81     if (ftable.gpu_is_gpu_buffer(dest, NULL) || ftable.gpu_is_gpu_buffer(src, NULL)) {
  82         return true;
  83     } else {
  84         return false;
  85     }
  86 }
  87 
  88 
  89 
  90 
  91 
  92 
  93 
  94 
  95 
  96 
  97 
  98 
  99 
 100 bool  opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor )
 101 {
 102     
 103     if (!initialized) {
 104         opal_cuda_support_init();
 105     }
 106 
 107     if (!opal_cuda_enabled) {
 108         return false;
 109     }
 110 
 111     return ( ftable.gpu_is_gpu_buffer(buf, convertor));
 112 }
 113 
 114 
 115 
 116 
 117 
 118 
 119 
 120 
 121 void *opal_cuda_memcpy(void *dest, const void *src, size_t size, opal_convertor_t* convertor)
 122 {
 123     int res;
 124 
 125     if (!(convertor->flags & CONVERTOR_CUDA)) {
 126         return memcpy(dest, src, size);
 127     }
 128 
 129     if (convertor->flags & CONVERTOR_CUDA_ASYNC) {
 130         res = ftable.gpu_cu_memcpy_async(dest, (void *)src, size, convertor);
 131     } else {
 132         res = ftable.gpu_cu_memcpy(dest, (void *)src, size);
 133     }
 134 
 135     if (res != 0) {
 136         opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
 137                     res, dest, src, (int)size);
 138         abort();
 139     } else {
 140         return dest;
 141     }
 142 }
 143 
 144 
 145 
 146 
 147 
 148 
 149 void *opal_cuda_memcpy_sync(void *dest, const void *src, size_t size)
 150 {
 151     int res;
 152     res = ftable.gpu_cu_memcpy(dest, src, size);
 153     if (res != 0) {
 154         opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
 155                     res, dest, src, (int)size);
 156         abort();
 157     } else {
 158         return dest;
 159     }
 160 }
 161 
 162 
 163 
 164 
 165 
 166 void *opal_cuda_memmove(void *dest, void *src, size_t size)
 167 {
 168     int res;
 169 
 170     res = ftable.gpu_memmove(dest, src, size);
 171     if(res != 0){
 172         opal_output(0, "CUDA: Error in gpu memmove: res=%d, dest=%p, src=%p, size=%d",
 173                     res, dest, src, (int)size);
 174         abort();
 175     }
 176     return dest;
 177 }
 178 
 179 
 180 
 181 
 182 
 183 static void opal_cuda_support_init(void)
 184 {
 185     if (initialized) {
 186         return;
 187     }
 188 
 189     
 190     opal_cuda_output = opal_output_open(NULL);
 191     opal_output_set_verbosity(opal_cuda_output, opal_cuda_verbose);
 192 
 193     
 194 
 195     if (NULL != common_cuda_initialization_function) {
 196         if (0 == common_cuda_initialization_function(&ftable)) {
 197             opal_cuda_enabled = 1;
 198         }
 199     }
 200 
 201     if (1 == opal_cuda_enabled) {
 202         opal_output_verbose(10, opal_cuda_output,
 203                             "CUDA: enabled successfully, CUDA device pointers will work");
 204     } else {
 205         opal_output_verbose(10, opal_cuda_output,
 206                             "CUDA: not enabled, CUDA device pointers will not work");
 207     }
 208 
 209     initialized = true;
 210 }
 211 
 212 
 213 
 214 
 215 
 216 void opal_cuda_set_copy_function_async(opal_convertor_t* convertor, void *stream)
 217 {
 218     convertor->flags |= CONVERTOR_CUDA_ASYNC;
 219     convertor->stream = stream;
 220 }