This source file includes following definitions.
- sm_fifo_init
- sm_fifo_write
- sm_fifo_read
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #ifndef MCA_BTL_SMCUDA_H
28 #define MCA_BTL_SMCUDA_H
29
30 #include "opal_config.h"
31 #include <stddef.h>
32 #include <stdlib.h>
33 #include <string.h>
34 #include <stdint.h>
35 #ifdef HAVE_SCHED_H
36 #include <sched.h>
37 #endif
38
39 #include "opal/util/bit_ops.h"
40 #include "opal/class/opal_free_list.h"
41 #include "opal/mca/btl/btl.h"
42 #include "opal/mca/common/sm/common_sm.h"
43
44 BEGIN_C_DECLS
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74 #define SM_FIFO_FREE (void *) (-2)
75
76
77
78
79
80 #define SM_CACHE_LINE_PAD 128
81
82 struct sm_fifo_t {
83
84 volatile void **queue;
85 char pad0[SM_CACHE_LINE_PAD - sizeof(void **)];
86
87 opal_atomic_lock_t head_lock;
88 char pad1[SM_CACHE_LINE_PAD - sizeof(opal_atomic_lock_t)];
89
90 volatile int head;
91 char pad2[SM_CACHE_LINE_PAD - sizeof(int)];
92
93 unsigned int mask;
94 char pad3[SM_CACHE_LINE_PAD - sizeof(int)];
95
96 volatile void **queue_recv;
97 opal_atomic_lock_t tail_lock;
98 volatile int tail;
99 int num_to_clear;
100 int lazy_free;
101 char pad4[SM_CACHE_LINE_PAD - sizeof(void **) -
102 sizeof(opal_atomic_lock_t) -
103 sizeof(int) * 3];
104 };
105 typedef struct sm_fifo_t sm_fifo_t;
106
107
108
109
110
111 #if OPAL_ENABLE_PROGRESS_THREADS == 1
112 #define DATA (char)0
113 #define DONE (char)1
114 #endif
115
116 typedef struct mca_btl_smcuda_mem_node_t {
117 mca_mpool_base_module_t* sm_mpool;
118 } mca_btl_smcuda_mem_node_t;
119
120
121
122
123 struct mca_btl_smcuda_component_t {
124 mca_btl_base_component_2_0_0_t super;
125 int sm_free_list_num;
126 int sm_free_list_max;
127 int sm_free_list_inc;
128 int sm_max_procs;
129 int sm_extra_procs;
130 char* sm_mpool_name;
131 mca_mpool_base_module_t **sm_mpools;
132 mca_mpool_base_module_t *sm_mpool;
133 void* sm_mpool_base;
134 size_t eager_limit;
135 size_t max_frag_size;
136 opal_mutex_t sm_lock;
137 mca_common_sm_module_t *sm_seg;
138 volatile sm_fifo_t **shm_fifo;
139 char **shm_bases;
140 uint16_t *shm_mem_nodes;
141 sm_fifo_t **fifo;
142
143
144
145
146 uint16_t *mem_nodes;
147 unsigned int fifo_size;
148 unsigned int fifo_lazy_free;
149 int nfifos;
150 int32_t num_smp_procs;
151 int32_t my_smp_rank;
152
153 opal_free_list_t sm_frags_eager;
154 opal_free_list_t sm_frags_max;
155 opal_free_list_t sm_frags_user;
156 opal_free_list_t sm_first_frags_to_progress;
157
158
159 struct mca_btl_base_endpoint_t **sm_peers;
160
161 opal_free_list_t pending_send_fl;
162 opal_atomic_int32_t num_outstanding_frags;
163 opal_atomic_int32_t num_pending_sends;
164 int mem_node;
165 int num_mem_nodes;
166
167 #if OPAL_ENABLE_PROGRESS_THREADS == 1
168 char sm_fifo_path[PATH_MAX];
169 int sm_fifo_fd;
170 opal_thread_t sm_fifo_thread;
171 #endif
172 struct mca_btl_smcuda_t **sm_btls;
173 struct mca_btl_smcuda_frag_t **table;
174 size_t sm_num_btls;
175 size_t sm_max_btls;
176
177
178
179
180 int use_knem;
181
182
183
184 unsigned int knem_dma_min;
185
186
187
188 int knem_max_simultaneous;
189
190
191
192 int knem_dma_flag;
193
194
195
196 int use_cma;
197
198
199 char *sm_mpool_ctl_file_name;
200 char *sm_mpool_rndv_file_name;
201 char *sm_ctl_file_name;
202 char *sm_rndv_file_name;
203 #if OPAL_CUDA_SUPPORT
204 int cuda_ipc_verbose;
205 int cuda_ipc_output;
206 int use_cuda_ipc;
207 int use_cuda_ipc_same_gpu;
208 #endif
209 unsigned long mpool_min_size;
210 char *allocator;
211 };
212 typedef struct mca_btl_smcuda_component_t mca_btl_smcuda_component_t;
213 OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_component_t mca_btl_smcuda_component;
214
215
216
217
218 struct mca_btl_smcuda_t {
219 mca_btl_base_module_t super;
220 bool btl_inited;
221 mca_btl_base_module_error_cb_fn_t error_cb;
222 mca_rcache_base_module_t *rcache;
223 };
224 typedef struct mca_btl_smcuda_t mca_btl_smcuda_t;
225 OPAL_MODULE_DECLSPEC extern mca_btl_smcuda_t mca_btl_smcuda;
226
227 struct btl_smcuda_pending_send_item_t
228 {
229 opal_free_list_item_t super;
230 void *data;
231 };
232 typedef struct btl_smcuda_pending_send_item_t btl_smcuda_pending_send_item_t;
233
234
235
236
237
238
239
240
241
242
243
244
245
246 #define VIRTUAL2RELATIVE(VADDR ) ((long)(VADDR) - (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank])
247 #define RELATIVE2VIRTUAL(OFFSET) ((long)(OFFSET) + (long)mca_btl_smcuda_component.shm_bases[mca_btl_smcuda_component.my_smp_rank])
248
249 static inline int sm_fifo_init(int fifo_size, mca_mpool_base_module_t *mpool,
250 sm_fifo_t *fifo, int lazy_free)
251 {
252 int i, qsize;
253
254
255 qsize = opal_next_poweroftwo_inclusive (fifo_size);
256
257
258 fifo->queue_recv = (volatile void **)mpool->mpool_alloc(
259 mpool, sizeof(void *) * qsize, opal_cache_line_size, 0);
260 if(NULL == fifo->queue_recv) {
261 return OPAL_ERR_OUT_OF_RESOURCE;
262 }
263
264
265 for ( i = 0; i < qsize; i++ )
266 fifo->queue_recv[i] = SM_FIFO_FREE;
267
268
269 fifo->queue = (volatile void **) VIRTUAL2RELATIVE(fifo->queue_recv);
270
271
272 opal_atomic_lock_init(&(fifo->head_lock), OPAL_ATOMIC_LOCK_UNLOCKED);
273 opal_atomic_lock_init(&(fifo->tail_lock), OPAL_ATOMIC_LOCK_UNLOCKED);
274 opal_atomic_unlock(&(fifo->head_lock));
275 opal_atomic_unlock(&(fifo->tail_lock));
276
277
278 fifo->head = 0;
279 fifo->mask = qsize - 1;
280 fifo->tail = 0;
281 fifo->num_to_clear = 0;
282 fifo->lazy_free = lazy_free;
283
284 return OPAL_SUCCESS;
285 }
286
287
288 static inline int sm_fifo_write(void *value, sm_fifo_t *fifo)
289 {
290 volatile void **q = (volatile void **) RELATIVE2VIRTUAL(fifo->queue);
291
292
293 opal_atomic_rmb();
294 if ( SM_FIFO_FREE != q[fifo->head] )
295 return OPAL_ERR_OUT_OF_RESOURCE;
296
297
298 q[fifo->head] = value;
299 opal_atomic_wmb();
300 fifo->head = (fifo->head + 1) & fifo->mask;
301 return OPAL_SUCCESS;
302 }
303
304
305 static inline void *sm_fifo_read(sm_fifo_t *fifo)
306 {
307 void *value;
308
309
310 value = (void *) fifo->queue_recv[fifo->tail];
311
312 opal_atomic_rmb();
313
314
315 if ( SM_FIFO_FREE != value ) {
316
317 fifo->tail = ( fifo->tail + 1 ) & fifo->mask;
318 fifo->num_to_clear += 1;
319
320
321 if ( fifo->num_to_clear >= fifo->lazy_free ) {
322 int i = (fifo->tail - fifo->num_to_clear ) & fifo->mask;
323
324 while ( fifo->num_to_clear > 0 ) {
325 fifo->queue_recv[i] = SM_FIFO_FREE;
326 i = (i+1) & fifo->mask;
327 fifo->num_to_clear -= 1;
328 }
329 opal_atomic_wmb();
330 }
331 }
332
333 return value;
334 }
335
336
337
338
339 extern int mca_btl_smcuda_component_progress(void);
340
341
342
343
344
345
346
347
348
349
350 int mca_btl_smcuda_register_error_cb(
351 struct mca_btl_base_module_t* btl,
352 mca_btl_base_module_error_cb_fn_t cbfunc
353 );
354
355
356
357
358
359
360
361
362 extern int mca_btl_smcuda_finalize(
363 struct mca_btl_base_module_t* btl
364 );
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381 extern int mca_btl_smcuda_add_procs(
382 struct mca_btl_base_module_t* btl,
383 size_t nprocs,
384 struct opal_proc_t **procs,
385 struct mca_btl_base_endpoint_t** peers,
386 struct opal_bitmap_t* reachability
387 );
388
389
390
391
392
393
394
395
396
397
398
399 extern int mca_btl_smcuda_del_procs(
400 struct mca_btl_base_module_t* btl,
401 size_t nprocs,
402 struct opal_proc_t **procs,
403 struct mca_btl_base_endpoint_t **peers
404 );
405
406
407
408
409
410
411
412
413 extern mca_btl_base_descriptor_t* mca_btl_smcuda_alloc(
414 struct mca_btl_base_module_t* btl,
415 struct mca_btl_base_endpoint_t* endpoint,
416 uint8_t order,
417 size_t size,
418 uint32_t flags
419 );
420
421
422
423
424
425
426
427 extern int mca_btl_smcuda_free(
428 struct mca_btl_base_module_t* btl,
429 mca_btl_base_descriptor_t* segment
430 );
431
432
433
434
435
436
437
438
439 struct mca_btl_base_descriptor_t* mca_btl_smcuda_prepare_src(
440 struct mca_btl_base_module_t* btl,
441 struct mca_btl_base_endpoint_t* endpoint,
442 struct opal_convertor_t* convertor,
443 uint8_t order,
444 size_t reserve,
445 size_t* size,
446 uint32_t flags
447 );
448
449
450
451
452
453
454
455
456 extern int mca_btl_smcuda_sendi( struct mca_btl_base_module_t* btl,
457 struct mca_btl_base_endpoint_t* endpoint,
458 struct opal_convertor_t* convertor,
459 void* header,
460 size_t header_size,
461 size_t payload_size,
462 uint8_t order,
463 uint32_t flags,
464 mca_btl_base_tag_t tag,
465 mca_btl_base_descriptor_t** descriptor );
466
467
468
469
470
471
472
473 extern int mca_btl_smcuda_send(
474 struct mca_btl_base_module_t* btl,
475 struct mca_btl_base_endpoint_t* endpoint,
476 struct mca_btl_base_descriptor_t* descriptor,
477 mca_btl_base_tag_t tag
478 );
479
480 #if OPAL_CUDA_SUPPORT
481
482
483
484 int mca_btl_smcuda_get_cuda (struct mca_btl_base_module_t *btl,
485 struct mca_btl_base_endpoint_t *ep, void *local_address,
486 uint64_t remote_address, struct mca_btl_base_registration_handle_t *local_handle,
487 struct mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
488 int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
489
490
491 enum ipcCtrlMsg {
492 IPC_REQ = 10,
493 IPC_ACK,
494 IPC_NOTREADY,
495 };
496
497
498 typedef struct ctrlhdr_st {
499 enum ipcCtrlMsg ctag;
500 int cudev;
501 } ctrlhdr_t;
502
503
504 enum ipcState {
505 IPC_INIT = 1,
506 IPC_SENT,
507 IPC_ACKING,
508 IPC_ACKED,
509 IPC_OK,
510 IPC_BAD
511 };
512
513 #endif
514
515
516 extern void mca_btl_smcuda_dump(struct mca_btl_base_module_t* btl,
517 struct mca_btl_base_endpoint_t* endpoint,
518 int verbose);
519
520
521
522
523
524
525 int mca_btl_smcuda_ft_event(int state);
526
527 #if OPAL_ENABLE_PROGRESS_THREADS == 1
528 void mca_btl_smcuda_component_event_thread(opal_object_t*);
529 #endif
530
531 #if OPAL_ENABLE_PROGRESS_THREADS == 1
532 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer) \
533 { \
534 unsigned char cmd = DATA; \
535 if(write(peer->fifo_fd, &cmd, sizeof(cmd)) != sizeof(cmd)) { \
536 opal_output(0, "mca_btl_smcuda_send: write fifo failed: errno=%d\n", errno); \
537 } \
538 }
539 #else
540 #define MCA_BTL_SMCUDA_SIGNAL_PEER(peer)
541 #endif
542
543 END_C_DECLS
544
545 #endif
546