root/opal/datatype/opal_datatype_cuda.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. opal_cuda_add_initialization_function
  2. mca_cuda_convertor_init
  3. opal_cuda_check_bufs
  4. opal_cuda_check_one_buf
  5. opal_cuda_memcpy
  6. opal_cuda_memcpy_sync
  7. opal_cuda_memmove
  8. opal_cuda_support_init
  9. opal_cuda_set_copy_function_async

   1 /*
   2  * Copyright (c) 2011-2014 NVIDIA Corporation.  All rights reserved.
   3  * $COPYRIGHT$
   4  *
   5  * Additional copyrights may follow
   6  *
   7  * $HEADER$
   8  */
   9 
  10 #include "opal_config.h"
  11 
  12 #include <errno.h>
  13 #include <string.h>
  14 #include <unistd.h>
  15 
  16 #include "opal/align.h"
  17 #include "opal/util/output.h"
  18 #include "opal/datatype/opal_convertor.h"
  19 #include "opal/datatype/opal_datatype_cuda.h"
  20 
  21 static bool initialized = false;
  22 int opal_cuda_verbose = 0;
  23 static int opal_cuda_enabled = 0; /* Starts out disabled */
  24 static int opal_cuda_output = 0;
  25 static void opal_cuda_support_init(void);
  26 static int (*common_cuda_initialization_function)(opal_common_cuda_function_table_t *) = NULL;
  27 static opal_common_cuda_function_table_t ftable;
  28 
  29 /* This function allows the common cuda code to register an
  30  * initialization function that gets called the first time an attempt
  31  * is made to send or receive a GPU pointer.  This allows us to delay
  32  * some CUDA initialization until after MPI_Init().
  33  */
  34 void opal_cuda_add_initialization_function(int (*fptr)(opal_common_cuda_function_table_t *)) {
  35     common_cuda_initialization_function = fptr;
  36 }
  37 
  38 /**
  39  * This function is called when a convertor is instantiated.  It has to call
  40  * the opal_cuda_support_init() function once to figure out if CUDA support
  41  * is enabled or not.  If CUDA is not enabled, then short circuit out
  42  * for all future calls.
  43  */
  44 void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf)
  45 {
  46     /* Only do the initialization on the first GPU access */
  47     if (!initialized) {
  48         opal_cuda_support_init();
  49     }
  50 
  51     /* This is needed to handle case where convertor is not fully initialized
  52      * like when trying to do a sendi with convertor on the statck */
  53     convertor->cbmemcpy = (memcpy_fct_t)&opal_cuda_memcpy;
  54 
  55     /* If not enabled, then nothing else to do */
  56     if (!opal_cuda_enabled) {
  57         return;
  58     }
  59 
  60     if (ftable.gpu_is_gpu_buffer(pUserBuf, convertor)) {
  61         convertor->flags |= CONVERTOR_CUDA;
  62     }
  63 }
  64 
  65 /* Checks the type of pointer
  66  *
  67  * @param dest   One pointer to check
  68  * @param source Another pointer to check
  69  */
  70 bool opal_cuda_check_bufs(char *dest, char *src)
  71 {
  72     /* Only do the initialization on the first GPU access */
  73     if (!initialized) {
  74         opal_cuda_support_init();
  75     }
  76 
  77     if (!opal_cuda_enabled) {
  78         return false;
  79     }
  80 
  81     if (ftable.gpu_is_gpu_buffer(dest, NULL) || ftable.gpu_is_gpu_buffer(src, NULL)) {
  82         return true;
  83     } else {
  84         return false;
  85     }
  86 }
  87 
  88 /*
  89  * With CUDA enabled, all contiguous copies will pass through this function.
  90  * Therefore, the first check is to see if the convertor is a GPU buffer.
  91  * Note that if there is an error with any of the CUDA calls, the program
  92  * aborts as there is no recovering.
  93  */
  94 
  95 /* Checks the type of pointer
  96  *
  97  * @param buf   check one pointer providing a convertor.
  98  *  Provides aditional information, e.g. managed vs. unmanaged GPU buffer
  99  */
 100 bool  opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor )
 101 {
 102     /* Only do the initialization on the first GPU access */
 103     if (!initialized) {
 104         opal_cuda_support_init();
 105     }
 106 
 107     if (!opal_cuda_enabled) {
 108         return false;
 109     }
 110 
 111     return ( ftable.gpu_is_gpu_buffer(buf, convertor));
 112 }
 113 
 114 /*
 115  * With CUDA enabled, all contiguous copies will pass through this function.
 116  * Therefore, the first check is to see if the convertor is a GPU buffer.
 117  * Note that if there is an error with any of the CUDA calls, the program
 118  * aborts as there is no recovering.
 119  */
 120 
 121 void *opal_cuda_memcpy(void *dest, const void *src, size_t size, opal_convertor_t* convertor)
 122 {
 123     int res;
 124 
 125     if (!(convertor->flags & CONVERTOR_CUDA)) {
 126         return memcpy(dest, src, size);
 127     }
 128 
 129     if (convertor->flags & CONVERTOR_CUDA_ASYNC) {
 130         res = ftable.gpu_cu_memcpy_async(dest, (void *)src, size, convertor);
 131     } else {
 132         res = ftable.gpu_cu_memcpy(dest, (void *)src, size);
 133     }
 134 
 135     if (res != 0) {
 136         opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
 137                     res, dest, src, (int)size);
 138         abort();
 139     } else {
 140         return dest;
 141     }
 142 }
 143 
 144 /*
 145  * This function is needed in cases where we do not have contiguous
 146  * datatypes.  The current code has macros that cannot handle a convertor
 147  * argument to the memcpy call.
 148  */
 149 void *opal_cuda_memcpy_sync(void *dest, const void *src, size_t size)
 150 {
 151     int res;
 152     res = ftable.gpu_cu_memcpy(dest, src, size);
 153     if (res != 0) {
 154         opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
 155                     res, dest, src, (int)size);
 156         abort();
 157     } else {
 158         return dest;
 159     }
 160 }
 161 
 162 /*
 163  * In some cases, need an implementation of memmove.  This is not fast, but
 164  * it is not often needed.
 165  */
 166 void *opal_cuda_memmove(void *dest, void *src, size_t size)
 167 {
 168     int res;
 169 
 170     res = ftable.gpu_memmove(dest, src, size);
 171     if(res != 0){
 172         opal_output(0, "CUDA: Error in gpu memmove: res=%d, dest=%p, src=%p, size=%d",
 173                     res, dest, src, (int)size);
 174         abort();
 175     }
 176     return dest;
 177 }
 178 
 179 /**
 180  * This function gets called once to check if the program is running in a cuda
 181  * environment.
 182  */
 183 static void opal_cuda_support_init(void)
 184 {
 185     if (initialized) {
 186         return;
 187     }
 188 
 189     /* Set different levels of verbosity in the cuda related code. */
 190     opal_cuda_output = opal_output_open(NULL);
 191     opal_output_set_verbosity(opal_cuda_output, opal_cuda_verbose);
 192 
 193     /* Callback into the common cuda initialization routine. This is only
 194      * set if some work had been done already in the common cuda code.*/
 195     if (NULL != common_cuda_initialization_function) {
 196         if (0 == common_cuda_initialization_function(&ftable)) {
 197             opal_cuda_enabled = 1;
 198         }
 199     }
 200 
 201     if (1 == opal_cuda_enabled) {
 202         opal_output_verbose(10, opal_cuda_output,
 203                             "CUDA: enabled successfully, CUDA device pointers will work");
 204     } else {
 205         opal_output_verbose(10, opal_cuda_output,
 206                             "CUDA: not enabled, CUDA device pointers will not work");
 207     }
 208 
 209     initialized = true;
 210 }
 211 
 212 /**
 213  * Tell the convertor that copies will be asynchronous CUDA copies.  The
 214  * flags are cleared when the convertor is reinitialized.
 215  */
 216 void opal_cuda_set_copy_function_async(opal_convertor_t* convertor, void *stream)
 217 {
 218     convertor->flags |= CONVERTOR_CUDA_ASYNC;
 219     convertor->stream = stream;
 220 }

/* [<][>][^][v][top][bottom][index][help] */