This source file includes following definitions.
- opal_datatype_copy_content_same_ddt
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 #include "opal_config.h"
  26 
  27 #include <stddef.h>
  28 #include <stdlib.h>
  29 
  30 #include "opal/prefetch.h"
  31 #include "opal/util/output.h"
  32 #include "opal/datatype/opal_datatype.h"
  33 #include "opal/datatype/opal_convertor.h"
  34 #include "opal/datatype/opal_datatype_internal.h"
  35 #include "opal/datatype/opal_datatype_checksum.h"
  36 
  37 
  38 #if OPAL_ENABLE_DEBUG
  39 #define DO_DEBUG(INST)  if( opal_copy_debug ) { INST }
  40 #else
  41 #define DO_DEBUG(INST)
  42 #endif  
  43 
  44 static size_t opal_datatype_memop_block_size = 128 * 1024;
  45 
  46 
  47 
  48 
  49 #undef MEM_OP_NAME
  50 #define MEM_OP_NAME  non_overlap
  51 #undef MEM_OP
  52 #define MEM_OP       MEMCPY
  53 #include "opal_datatype_copy.h"
  54 
  55 #define MEMMOVE(d, s, l)                                  \
  56     do {                                                  \
  57         if( (((d) < (s)) && (((d) + (l)) > (s))) ||       \
  58             (((s) < (d)) && (((s) + (l)) > (d))) ) {      \
  59             memmove( (d), (s), (l) );                     \
  60         } else {                                          \
  61             MEMCPY( (d), (s), (l) );                      \
  62         }                                                 \
  63     } while (0)
  64 
  65 
  66 
  67 
  68 #undef MEM_OP_NAME
  69 #define MEM_OP_NAME  overlap
  70 #undef MEM_OP
  71 #define MEM_OP       MEMMOVE
  72 #include "opal_datatype_copy.h"
  73 
  74 #if OPAL_CUDA_SUPPORT
  75 #include "opal_datatype_cuda.h"
  76 
  77 #undef MEM_OP_NAME
  78 #define MEM_OP_NAME non_overlap_cuda
  79 #undef MEM_OP
  80 #define MEM_OP opal_cuda_memcpy_sync
  81 #include "opal_datatype_copy.h"
  82 
  83 #undef MEM_OP_NAME
  84 #define MEM_OP_NAME overlap_cuda
  85 #undef MEM_OP
  86 #define MEM_OP opal_cuda_memmove
  87 #include "opal_datatype_copy.h"
  88 
  89 #define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function)     \
  90     do {                                                            \
  91         if (true == cuda_device_bufs) {                             \
  92             fct = copy_function;                                    \
  93         }                                                           \
  94     } while(0)
  95 #else
  96 #define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function)
  97 #endif
  98 
  99 int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, int32_t count,
 100                                              char* destination_base, char* source_base )
 101 {
 102     ptrdiff_t extent;
 103     int32_t (*fct)( const opal_datatype_t*, int32_t, char*, char*);
 104 
 105 #if OPAL_CUDA_SUPPORT
 106     bool cuda_device_bufs = opal_cuda_check_bufs(destination_base, source_base);
 107 #endif
 108 
 109     DO_DEBUG( opal_output( 0, "opal_datatype_copy_content_same_ddt( %p, %d, dst %p, src %p )\n",
 110                            (void*)datatype, count, (void*)destination_base, (void*)source_base ); );
 111 
 112     
 113 
 114 
 115     if( 0 == count ) return 1;
 116 
 117     
 118 
 119 
 120 
 121 
 122     extent = (datatype->true_ub - datatype->true_lb) + (count - 1) * (datatype->ub - datatype->lb);
 123 
 124     fct = non_overlap_copy_content_same_ddt;
 125     SET_CUDA_COPY_FCT(cuda_device_bufs, fct, non_overlap_cuda_copy_content_same_ddt);
 126     if( destination_base < source_base ) {
 127         if( (destination_base + extent) > source_base ) {
 128             
 129             fct = overlap_copy_content_same_ddt;
 130             SET_CUDA_COPY_FCT(cuda_device_bufs, fct, overlap_cuda_copy_content_same_ddt);
 131         }
 132     } else {
 133         if( (source_base + extent) > destination_base ) {
 134             
 135             fct = overlap_copy_content_same_ddt;
 136             SET_CUDA_COPY_FCT(cuda_device_bufs, fct, overlap_cuda_copy_content_same_ddt);
 137         }
 138     }
 139     return fct( datatype, count, destination_base, source_base );
 140 }
 141