This source file includes following definitions.
- vader_btl_first_time_init
- init_vader_endpoint
- fini_vader_endpoint
- vader_add_procs
- vader_del_procs
- vader_finalize
- vader_register_error_cb
- mca_btl_vader_alloc
- mca_btl_vader_free
- vader_prepare_src
- vader_ft_event
- mca_btl_vader_endpoint_constructor
- mca_btl_vader_endpoint_destructor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #include "opal_config.h"
30
31 #include "btl_vader.h"
32 #include "btl_vader_endpoint.h"
33 #include "btl_vader_fifo.h"
34 #include "btl_vader_fbox.h"
35 #include "btl_vader_xpmem.h"
36
37 #include <string.h>
38
39 static int vader_del_procs (struct mca_btl_base_module_t *btl,
40 size_t nprocs, struct opal_proc_t **procs,
41 struct mca_btl_base_endpoint_t **peers);
42
43 static int vader_register_error_cb (struct mca_btl_base_module_t* btl,
44 mca_btl_base_module_error_cb_fn_t cbfunc);
45
46 static int vader_finalize (struct mca_btl_base_module_t* btl);
47
48 static struct mca_btl_base_descriptor_t *vader_prepare_src (
49 struct mca_btl_base_module_t *btl,
50 struct mca_btl_base_endpoint_t *endpoint,
51 struct opal_convertor_t *convertor,
52 uint8_t order,
53 size_t reserve,
54 size_t *size,
55 uint32_t flags
56 );
57
58 static int vader_add_procs(struct mca_btl_base_module_t* btl,
59 size_t nprocs, struct opal_proc_t **procs,
60 struct mca_btl_base_endpoint_t** peers,
61 struct opal_bitmap_t* reachability);
62
63 static int vader_ft_event (int state);
64
65 mca_btl_vader_t mca_btl_vader = {
66 {
67 &mca_btl_vader_component.super,
68 .btl_add_procs = vader_add_procs,
69 .btl_del_procs = vader_del_procs,
70 .btl_finalize = vader_finalize,
71 .btl_alloc = mca_btl_vader_alloc,
72 .btl_free = mca_btl_vader_free,
73 .btl_prepare_src = vader_prepare_src,
74 .btl_send = mca_btl_vader_send,
75 .btl_sendi = mca_btl_vader_sendi,
76 .btl_dump = mca_btl_base_dump,
77 .btl_register_error = vader_register_error_cb,
78 .btl_ft_event = vader_ft_event
79 }
80 };
81
82 static int vader_btl_first_time_init(mca_btl_vader_t *vader_btl, int n)
83 {
84 mca_btl_vader_component_t *component = &mca_btl_vader_component;
85 int rc;
86
87
88 component->endpoints = (struct mca_btl_base_endpoint_t *) calloc (n + 1, sizeof (struct mca_btl_base_endpoint_t));
89 if (NULL == component->endpoints) {
90 return OPAL_ERR_OUT_OF_RESOURCE;
91 }
92 component->endpoints[n].peer_smp_rank = -1;
93
94 component->fbox_in_endpoints = calloc (n + 1, sizeof (void *));
95 if (NULL == component->fbox_in_endpoints) {
96 free(component->endpoints);
97 return OPAL_ERR_OUT_OF_RESOURCE;
98 }
99
100 component->mpool = mca_mpool_basic_create ((void *) (component->my_segment + MCA_BTL_VADER_FIFO_SIZE),
101 (unsigned long) (mca_btl_vader_component.segment_size - MCA_BTL_VADER_FIFO_SIZE), 64);
102 if (NULL == component->mpool) {
103 free (component->endpoints);
104 return OPAL_ERR_OUT_OF_RESOURCE;
105 }
106
107 rc = opal_free_list_init (&component->vader_fboxes, sizeof (opal_free_list_item_t), 8,
108 OBJ_CLASS(opal_free_list_item_t), mca_btl_vader_component.fbox_size,
109 opal_cache_line_size, 0, mca_btl_vader_component.fbox_max, 4,
110 component->mpool, 0, NULL, NULL, NULL);
111 if (OPAL_SUCCESS != rc) {
112 return rc;
113 }
114
115
116
117 rc = opal_free_list_init (&component->vader_frags_user,
118 sizeof(mca_btl_vader_frag_t),
119 opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
120 mca_btl_vader_component.max_inline_send + sizeof (mca_btl_vader_frag_t),
121 opal_cache_line_size, component->vader_free_list_num,
122 component->vader_free_list_max,
123 component->vader_free_list_inc,
124 component->mpool, 0, NULL, mca_btl_vader_frag_init,
125 &component->vader_frags_user);
126 if (OPAL_SUCCESS != rc) {
127 return rc;
128 }
129
130
131 rc = opal_free_list_init (&component->vader_frags_eager,
132 sizeof (mca_btl_vader_frag_t),
133 opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
134 mca_btl_vader.super.btl_eager_limit + sizeof (mca_btl_vader_frag_t),
135 opal_cache_line_size, component->vader_free_list_num,
136 component->vader_free_list_max,
137 component->vader_free_list_inc,
138 component->mpool, 0, NULL, mca_btl_vader_frag_init,
139 &component->vader_frags_eager);
140 if (OPAL_SUCCESS != rc) {
141 return rc;
142 }
143
144 if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
145
146 rc = opal_free_list_init (&component->vader_frags_max_send,
147 sizeof (mca_btl_vader_frag_t),
148 opal_cache_line_size, OBJ_CLASS(mca_btl_vader_frag_t),
149 mca_btl_vader.super.btl_max_send_size + sizeof (mca_btl_vader_frag_t),
150 opal_cache_line_size, component->vader_free_list_num,
151 component->vader_free_list_max,
152 component->vader_free_list_inc,
153 component->mpool, 0, NULL, mca_btl_vader_frag_init,
154 &component->vader_frags_max_send);
155 if (OPAL_SUCCESS != rc) {
156 return rc;
157 }
158 }
159
160
161 vader_btl->btl_inited = true;
162
163 #if OPAL_BTL_VADER_HAVE_XPMEM
164 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
165 mca_btl_vader_component.vma_module = mca_rcache_base_vma_module_alloc ();
166 }
167 #endif
168
169 return OPAL_SUCCESS;
170 }
171
172
173 static int init_vader_endpoint (struct mca_btl_base_endpoint_t *ep, struct opal_proc_t *proc, int remote_rank) {
174 mca_btl_vader_component_t *component = &mca_btl_vader_component;
175 union vader_modex_t *modex;
176 size_t msg_size;
177 int rc;
178
179 OBJ_CONSTRUCT(ep, mca_btl_vader_endpoint_t);
180
181 ep->peer_smp_rank = remote_rank;
182
183 if (remote_rank != MCA_BTL_VADER_LOCAL_RANK) {
184 OPAL_MODEX_RECV(rc, &component->super.btl_version,
185 &proc->proc_name, (void **) &modex, &msg_size);
186 if (OPAL_SUCCESS != rc) {
187 return rc;
188 }
189
190
191 #if OPAL_BTL_VADER_HAVE_XPMEM
192 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
193
194 ep->segment_data.xpmem.apid = xpmem_get (modex->xpmem.seg_id, XPMEM_RDWR, XPMEM_PERMIT_MODE, (void *) 0666);
195 (void) vader_get_registation (ep, modex->xpmem.segment_base, mca_btl_vader_component.segment_size,
196 MCA_RCACHE_FLAGS_PERSIST, (void **) &ep->segment_base);
197 } else {
198 #endif
199
200 ep->segment_data.other.seg_ds = malloc (msg_size);
201 if (NULL == ep->segment_data.other.seg_ds) {
202 return OPAL_ERR_OUT_OF_RESOURCE;
203 }
204
205 memcpy (ep->segment_data.other.seg_ds, &modex->seg_ds, msg_size);
206
207 ep->segment_base = opal_shmem_segment_attach (ep->segment_data.other.seg_ds);
208 if (NULL == ep->segment_base) {
209 return OPAL_ERROR;
210 }
211 #if OPAL_BTL_VADER_HAVE_XPMEM
212 }
213 #endif
214 OBJ_CONSTRUCT(&ep->lock, opal_mutex_t);
215
216 free (modex);
217 } else {
218
219 ep->segment_base = component->my_segment;
220 }
221
222 ep->fifo = (struct vader_fifo_t *) ep->segment_base;
223
224 return OPAL_SUCCESS;
225 }
226
227 static int fini_vader_endpoint (struct mca_btl_base_endpoint_t *ep)
228 {
229
230 if (ep->fifo) {
231 OBJ_DESTRUCT(ep);
232 }
233
234 return OPAL_SUCCESS;
235 }
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251 static int vader_add_procs (struct mca_btl_base_module_t* btl,
252 size_t nprocs, struct opal_proc_t **procs,
253 struct mca_btl_base_endpoint_t **peers,
254 opal_bitmap_t *reachability)
255 {
256 mca_btl_vader_component_t *component = &mca_btl_vader_component;
257 mca_btl_vader_t *vader_btl = (mca_btl_vader_t *) btl;
258 const opal_proc_t *my_proc;
259 int rc = OPAL_SUCCESS;
260
261
262
263
264 if (NULL == (my_proc = opal_proc_local_get())) {
265 return OPAL_ERR_OUT_OF_RESOURCE;
266 }
267
268
269 if (1 > MCA_BTL_VADER_NUM_LOCAL_PEERS) {
270 return OPAL_SUCCESS;
271 }
272
273
274 if (0 > MCA_BTL_VADER_LOCAL_RANK) {
275 return OPAL_ERROR;
276 }
277
278 if (!vader_btl->btl_inited) {
279 rc = vader_btl_first_time_init (vader_btl, 1 + MCA_BTL_VADER_NUM_LOCAL_PEERS);
280 if (rc != OPAL_SUCCESS) {
281 return rc;
282 }
283 }
284
285 for (int32_t proc = 0, local_rank = 0 ; proc < (int32_t) nprocs ; ++proc) {
286
287
288 if (procs[proc]->proc_name.jobid != my_proc->proc_name.jobid ||
289 !OPAL_PROC_ON_LOCAL_NODE(procs[proc]->proc_flags)) {
290 peers[proc] = NULL;
291 continue;
292 }
293
294 if (my_proc != procs[proc] && NULL != reachability) {
295
296 rc = opal_bitmap_set_bit (reachability, proc);
297 if(OPAL_SUCCESS != rc) {
298 return rc;
299 }
300 }
301
302
303 peers[proc] = component->endpoints + local_rank;
304 rc = init_vader_endpoint (peers[proc], procs[proc], local_rank++);
305 if (OPAL_SUCCESS != rc) {
306 break;
307 }
308 }
309
310 return rc;
311 }
312
313
314
315
316
317
318
319
320
321
322
323 static int vader_del_procs(struct mca_btl_base_module_t *btl,
324 size_t nprocs, struct opal_proc_t **procs,
325 struct mca_btl_base_endpoint_t **peers)
326 {
327 for (size_t i = 0 ; i < nprocs ; ++i) {
328 if (peers[i]) {
329 fini_vader_endpoint (peers[i]);
330 peers[i] = NULL;
331 }
332 }
333
334 return OPAL_SUCCESS;
335 }
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351 static int vader_finalize(struct mca_btl_base_module_t *btl)
352 {
353 mca_btl_vader_component_t *component = &mca_btl_vader_component;
354 mca_btl_vader_t *vader_btl = (mca_btl_vader_t *) btl;
355
356 if (!vader_btl->btl_inited) {
357 return OPAL_SUCCESS;
358 }
359
360 for (int i = 0 ; i < 1 + MCA_BTL_VADER_NUM_LOCAL_PEERS ; ++i) {
361 fini_vader_endpoint (component->endpoints + i);
362 }
363
364 free (component->endpoints);
365 component->endpoints = NULL;
366
367 vader_btl->btl_inited = false;
368
369 free (component->fbox_in_endpoints);
370 component->fbox_in_endpoints = NULL;
371
372 if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
373 opal_shmem_unlink (&mca_btl_vader_component.seg_ds);
374 opal_shmem_segment_detach (&mca_btl_vader_component.seg_ds);
375 }
376
377 #if OPAL_BTL_VADER_HAVE_XPMEM
378 if (NULL != mca_btl_vader_component.vma_module) {
379 OBJ_RELEASE(mca_btl_vader_component.vma_module);
380 }
381 #endif
382
383 return OPAL_SUCCESS;
384 }
385
386
387
388
389
390
391
392
393
394 static int vader_register_error_cb(struct mca_btl_base_module_t* btl,
395 mca_btl_base_module_error_cb_fn_t cbfunc)
396 {
397 ((mca_btl_vader_t *)btl)->error_cb = cbfunc;
398 return OPAL_SUCCESS;
399 }
400
401
402
403
404
405
406
407 mca_btl_base_descriptor_t *mca_btl_vader_alloc(struct mca_btl_base_module_t *btl,
408 struct mca_btl_base_endpoint_t *endpoint,
409 uint8_t order, size_t size, uint32_t flags)
410 {
411 mca_btl_vader_frag_t *frag = NULL;
412
413 if (size <= (size_t) mca_btl_vader_component.max_inline_send) {
414 (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);
415 } else if (size <= mca_btl_vader.super.btl_eager_limit) {
416 (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
417 } else if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism &&
418 size <= mca_btl_vader.super.btl_max_send_size) {
419 (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
420 }
421
422 if (OPAL_LIKELY(frag != NULL)) {
423 frag->segments[0].seg_len = size;
424
425 frag->base.des_flags = flags;
426 frag->base.order = order;
427 }
428
429 return (mca_btl_base_descriptor_t *) frag;
430 }
431
432
433
434
435
436
437
438 int mca_btl_vader_free (struct mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des)
439 {
440 MCA_BTL_VADER_FRAG_RETURN((mca_btl_vader_frag_t *) des);
441
442 return OPAL_SUCCESS;
443 }
444
445
446
447
448
449
450 static struct mca_btl_base_descriptor_t *vader_prepare_src (struct mca_btl_base_module_t *btl,
451 struct mca_btl_base_endpoint_t *endpoint,
452 struct opal_convertor_t *convertor,
453 uint8_t order, size_t reserve, size_t *size,
454 uint32_t flags)
455 {
456 const size_t total_size = reserve + *size;
457 mca_btl_vader_frag_t *frag;
458 void *data_ptr;
459 int rc;
460
461 opal_convertor_get_current_pointer (convertor, &data_ptr);
462 assert (NULL != data_ptr);
463
464
465 if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor))) {
466 uint32_t iov_count = 1;
467 struct iovec iov;
468
469
470 if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism &&
471 total_size > mca_btl_vader.super.btl_eager_limit) {
472 (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
473 } else
474 (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
475
476 if (OPAL_UNLIKELY(NULL == frag)) {
477 return NULL;
478 }
479
480 iov.iov_len = *size;
481 iov.iov_base =
482 (IOVBASE_TYPE *)(((uintptr_t)(frag->segments[0].seg_addr.pval)) +
483 reserve);
484
485 rc = opal_convertor_pack (convertor, &iov, &iov_count, size);
486 if (OPAL_UNLIKELY(rc < 0)) {
487 MCA_BTL_VADER_FRAG_RETURN(frag);
488 return NULL;
489 }
490
491 frag->segments[0].seg_len = *size + reserve;
492 } else {
493 if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
494 if (OPAL_LIKELY(total_size <= mca_btl_vader.super.btl_eager_limit)) {
495 (void) MCA_BTL_VADER_FRAG_ALLOC_EAGER(frag, endpoint);
496 } else {
497 (void) MCA_BTL_VADER_FRAG_ALLOC_MAX(frag, endpoint);
498 }
499 } else
500 (void) MCA_BTL_VADER_FRAG_ALLOC_USER(frag, endpoint);
501
502 if (OPAL_UNLIKELY(NULL == frag)) {
503 return NULL;
504 }
505
506 #if OPAL_BTL_VADER_HAVE_XPMEM
507
508 if (OPAL_UNLIKELY(MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism &&
509 total_size > (size_t) mca_btl_vader_component.max_inline_send)) {
510
511 frag->hdr->flags = MCA_BTL_VADER_FLAG_SINGLE_COPY;
512
513
514 frag->hdr->sc_iov.iov_base = data_ptr;
515 frag->hdr->sc_iov.iov_len = *size;
516
517 frag->segments[0].seg_len = reserve;
518 frag->segments[1].seg_len = *size;
519 frag->segments[1].seg_addr.pval = data_ptr;
520 frag->base.des_segment_count = 2;
521 } else {
522 #endif
523
524 memcpy ((void *)((uintptr_t)frag->segments[0].seg_addr.pval + reserve), data_ptr, *size);
525 frag->segments[0].seg_len = total_size;
526 #if OPAL_BTL_VADER_HAVE_XPMEM
527 }
528 #endif
529 }
530
531 frag->base.order = order;
532 frag->base.des_flags = flags;
533
534 return &frag->base;
535 }
536
537
538
539
540
541
542 static int vader_ft_event (int state)
543 {
544 return OPAL_SUCCESS;
545 }
546
547 static void mca_btl_vader_endpoint_constructor (mca_btl_vader_endpoint_t *ep)
548 {
549 OBJ_CONSTRUCT(&ep->pending_frags, opal_list_t);
550 OBJ_CONSTRUCT(&ep->pending_frags_lock, opal_mutex_t);
551 ep->fifo = NULL;
552 ep->fbox_out.fbox = NULL;
553 }
554
555 #if OPAL_BTL_VADER_HAVE_XPMEM
556 #endif
557
558 static void mca_btl_vader_endpoint_destructor (mca_btl_vader_endpoint_t *ep)
559 {
560 OBJ_DESTRUCT(&ep->pending_frags);
561 OBJ_DESTRUCT(&ep->pending_frags_lock);
562
563 #if OPAL_BTL_VADER_HAVE_XPMEM
564 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
565 mca_btl_vader_xpmem_cleanup_endpoint (ep);
566 } else
567 #endif
568 if (ep->segment_data.other.seg_ds) {
569 opal_shmem_ds_t seg_ds;
570
571
572
573
574 memcpy (&seg_ds, ep->segment_data.other.seg_ds, opal_shmem_sizeof_shmem_ds (ep->segment_data.other.seg_ds));
575 free (ep->segment_data.other.seg_ds);
576 ep->segment_data.other.seg_ds = NULL;
577
578
579 opal_shmem_segment_detach (&seg_ds);
580 }
581 if (ep->fbox_out.fbox) {
582 opal_free_list_return (&mca_btl_vader_component.vader_fboxes, ep->fbox_out.fbox);
583 }
584
585 ep->fbox_in.buffer = ep->fbox_out.buffer = NULL;
586 ep->fbox_out.fbox = NULL;
587 ep->segment_base = NULL;
588 ep->fifo = NULL;
589 }
590
591 OBJ_CLASS_INSTANCE(mca_btl_vader_endpoint_t, opal_list_item_t, mca_btl_vader_endpoint_constructor, mca_btl_vader_endpoint_destructor);