This source file includes following definitions.
- opal_datatype_copy_content_same_ddt
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "opal_config.h"
26
27 #include <stddef.h>
28 #include <stdlib.h>
29
30 #include "opal/prefetch.h"
31 #include "opal/util/output.h"
32 #include "opal/datatype/opal_datatype.h"
33 #include "opal/datatype/opal_convertor.h"
34 #include "opal/datatype/opal_datatype_internal.h"
35 #include "opal/datatype/opal_datatype_checksum.h"
36
37
38 #if OPAL_ENABLE_DEBUG
39 #define DO_DEBUG(INST) if( opal_copy_debug ) { INST }
40 #else
41 #define DO_DEBUG(INST)
42 #endif
43
44 static size_t opal_datatype_memop_block_size = 128 * 1024;
45
46
47
48
49 #undef MEM_OP_NAME
50 #define MEM_OP_NAME non_overlap
51 #undef MEM_OP
52 #define MEM_OP MEMCPY
53 #include "opal_datatype_copy.h"
54
55 #define MEMMOVE(d, s, l) \
56 do { \
57 if( (((d) < (s)) && (((d) + (l)) > (s))) || \
58 (((s) < (d)) && (((s) + (l)) > (d))) ) { \
59 memmove( (d), (s), (l) ); \
60 } else { \
61 MEMCPY( (d), (s), (l) ); \
62 } \
63 } while (0)
64
65
66
67
68 #undef MEM_OP_NAME
69 #define MEM_OP_NAME overlap
70 #undef MEM_OP
71 #define MEM_OP MEMMOVE
72 #include "opal_datatype_copy.h"
73
74 #if OPAL_CUDA_SUPPORT
75 #include "opal_datatype_cuda.h"
76
77 #undef MEM_OP_NAME
78 #define MEM_OP_NAME non_overlap_cuda
79 #undef MEM_OP
80 #define MEM_OP opal_cuda_memcpy_sync
81 #include "opal_datatype_copy.h"
82
83 #undef MEM_OP_NAME
84 #define MEM_OP_NAME overlap_cuda
85 #undef MEM_OP
86 #define MEM_OP opal_cuda_memmove
87 #include "opal_datatype_copy.h"
88
89 #define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function) \
90 do { \
91 if (true == cuda_device_bufs) { \
92 fct = copy_function; \
93 } \
94 } while(0)
95 #else
96 #define SET_CUDA_COPY_FCT(cuda_device_bufs, fct, copy_function)
97 #endif
98
99 int32_t opal_datatype_copy_content_same_ddt( const opal_datatype_t* datatype, int32_t count,
100 char* destination_base, char* source_base )
101 {
102 ptrdiff_t extent;
103 int32_t (*fct)( const opal_datatype_t*, int32_t, char*, char*);
104
105 #if OPAL_CUDA_SUPPORT
106 bool cuda_device_bufs = opal_cuda_check_bufs(destination_base, source_base);
107 #endif
108
109 DO_DEBUG( opal_output( 0, "opal_datatype_copy_content_same_ddt( %p, %d, dst %p, src %p )\n",
110 (void*)datatype, count, (void*)destination_base, (void*)source_base ); );
111
112
113
114
115 if( 0 == count ) return 1;
116
117
118
119
120
121
122 extent = (datatype->true_ub - datatype->true_lb) + (count - 1) * (datatype->ub - datatype->lb);
123
124 fct = non_overlap_copy_content_same_ddt;
125 SET_CUDA_COPY_FCT(cuda_device_bufs, fct, non_overlap_cuda_copy_content_same_ddt);
126 if( destination_base < source_base ) {
127 if( (destination_base + extent) > source_base ) {
128
129 fct = overlap_copy_content_same_ddt;
130 SET_CUDA_COPY_FCT(cuda_device_bufs, fct, overlap_cuda_copy_content_same_ddt);
131 }
132 } else {
133 if( (source_base + extent) > destination_base ) {
134
135 fct = overlap_copy_content_same_ddt;
136 SET_CUDA_COPY_FCT(cuda_device_bufs, fct, overlap_cuda_copy_content_same_ddt);
137 }
138 }
139 return fct( datatype, count, destination_base, source_base );
140 }
141