This source file includes following definitions.
- opal_cuda_add_initialization_function
- mca_cuda_convertor_init
- opal_cuda_check_bufs
- opal_cuda_check_one_buf
- opal_cuda_memcpy
- opal_cuda_memcpy_sync
- opal_cuda_memmove
- opal_cuda_support_init
- opal_cuda_set_copy_function_async
1
2
3
4
5
6
7
8
9
10 #include "opal_config.h"
11
12 #include <errno.h>
13 #include <string.h>
14 #include <unistd.h>
15
16 #include "opal/align.h"
17 #include "opal/util/output.h"
18 #include "opal/datatype/opal_convertor.h"
19 #include "opal/datatype/opal_datatype_cuda.h"
20
21 static bool initialized = false;
22 int opal_cuda_verbose = 0;
23 static int opal_cuda_enabled = 0;
24 static int opal_cuda_output = 0;
25 static void opal_cuda_support_init(void);
26 static int (*common_cuda_initialization_function)(opal_common_cuda_function_table_t *) = NULL;
27 static opal_common_cuda_function_table_t ftable;
28
29
30
31
32
33
34 void opal_cuda_add_initialization_function(int (*fptr)(opal_common_cuda_function_table_t *)) {
35 common_cuda_initialization_function = fptr;
36 }
37
38
39
40
41
42
43
44 void mca_cuda_convertor_init(opal_convertor_t* convertor, const void *pUserBuf)
45 {
46
47 if (!initialized) {
48 opal_cuda_support_init();
49 }
50
51
52
53 convertor->cbmemcpy = (memcpy_fct_t)&opal_cuda_memcpy;
54
55
56 if (!opal_cuda_enabled) {
57 return;
58 }
59
60 if (ftable.gpu_is_gpu_buffer(pUserBuf, convertor)) {
61 convertor->flags |= CONVERTOR_CUDA;
62 }
63 }
64
65
66
67
68
69
70 bool opal_cuda_check_bufs(char *dest, char *src)
71 {
72
73 if (!initialized) {
74 opal_cuda_support_init();
75 }
76
77 if (!opal_cuda_enabled) {
78 return false;
79 }
80
81 if (ftable.gpu_is_gpu_buffer(dest, NULL) || ftable.gpu_is_gpu_buffer(src, NULL)) {
82 return true;
83 } else {
84 return false;
85 }
86 }
87
88
89
90
91
92
93
94
95
96
97
98
99
100 bool opal_cuda_check_one_buf(char *buf, opal_convertor_t *convertor )
101 {
102
103 if (!initialized) {
104 opal_cuda_support_init();
105 }
106
107 if (!opal_cuda_enabled) {
108 return false;
109 }
110
111 return ( ftable.gpu_is_gpu_buffer(buf, convertor));
112 }
113
114
115
116
117
118
119
120
121 void *opal_cuda_memcpy(void *dest, const void *src, size_t size, opal_convertor_t* convertor)
122 {
123 int res;
124
125 if (!(convertor->flags & CONVERTOR_CUDA)) {
126 return memcpy(dest, src, size);
127 }
128
129 if (convertor->flags & CONVERTOR_CUDA_ASYNC) {
130 res = ftable.gpu_cu_memcpy_async(dest, (void *)src, size, convertor);
131 } else {
132 res = ftable.gpu_cu_memcpy(dest, (void *)src, size);
133 }
134
135 if (res != 0) {
136 opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
137 res, dest, src, (int)size);
138 abort();
139 } else {
140 return dest;
141 }
142 }
143
144
145
146
147
148
149 void *opal_cuda_memcpy_sync(void *dest, const void *src, size_t size)
150 {
151 int res;
152 res = ftable.gpu_cu_memcpy(dest, src, size);
153 if (res != 0) {
154 opal_output(0, "CUDA: Error in cuMemcpy: res=%d, dest=%p, src=%p, size=%d",
155 res, dest, src, (int)size);
156 abort();
157 } else {
158 return dest;
159 }
160 }
161
162
163
164
165
166 void *opal_cuda_memmove(void *dest, void *src, size_t size)
167 {
168 int res;
169
170 res = ftable.gpu_memmove(dest, src, size);
171 if(res != 0){
172 opal_output(0, "CUDA: Error in gpu memmove: res=%d, dest=%p, src=%p, size=%d",
173 res, dest, src, (int)size);
174 abort();
175 }
176 return dest;
177 }
178
179
180
181
182
183 static void opal_cuda_support_init(void)
184 {
185 if (initialized) {
186 return;
187 }
188
189
190 opal_cuda_output = opal_output_open(NULL);
191 opal_output_set_verbosity(opal_cuda_output, opal_cuda_verbose);
192
193
194
195 if (NULL != common_cuda_initialization_function) {
196 if (0 == common_cuda_initialization_function(&ftable)) {
197 opal_cuda_enabled = 1;
198 }
199 }
200
201 if (1 == opal_cuda_enabled) {
202 opal_output_verbose(10, opal_cuda_output,
203 "CUDA: enabled successfully, CUDA device pointers will work");
204 } else {
205 opal_output_verbose(10, opal_cuda_output,
206 "CUDA: not enabled, CUDA device pointers will not work");
207 }
208
209 initialized = true;
210 }
211
212
213
214
215
216 void opal_cuda_set_copy_function_async(opal_convertor_t* convertor, void *stream)
217 {
218 convertor->flags |= CONVERTOR_CUDA_ASYNC;
219 convertor->stream = stream;
220 }