This source file includes following definitions.
- mca_btl_vader_component_register
- mca_btl_vader_component_open
- mca_btl_vader_component_close
- mca_btl_base_vader_modex_send
- mca_btl_vader_select_next_single_copy_mechanism
- mca_btl_vader_check_single_copy
- mca_btl_vader_component_init
- mca_btl_vader_poll_handle_frag
- mca_btl_vader_poll_fifo
- mca_btl_vader_progress_waiting
- mca_btl_vader_progress_endpoints
- mca_btl_vader_component_progress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 #include "opal_config.h"
31
32 #include "opal/util/output.h"
33 #include "opal/util/show_help.h"
34 #include "opal/util/printf.h"
35 #include "opal/threads/mutex.h"
36 #include "opal/mca/btl/base/btl_base_error.h"
37
38 #include "btl_vader.h"
39 #include "btl_vader_frag.h"
40 #include "btl_vader_fifo.h"
41 #include "btl_vader_fbox.h"
42 #include "btl_vader_xpmem.h"
43
44 #include <sys/mman.h>
45 #include <fcntl.h>
46
47 #ifdef HAVE_SYS_PRCTL_H
48 #include <sys/prctl.h>
49 #endif
50
51
52 #if !defined(MAP_ANONYMOUS)
53 #define MAP_ANONYMOUS MAP_ANON
54 #endif
55
56 static int mca_btl_vader_component_progress (void);
57 static int mca_btl_vader_component_open(void);
58 static int mca_btl_vader_component_close(void);
59 static int mca_btl_vader_component_register(void);
60 static mca_btl_base_module_t** mca_btl_vader_component_init(int *num_btls,
61 bool enable_progress_threads,
62 bool enable_mpi_threads);
63
64
65 static mca_base_var_enum_value_t single_copy_mechanisms[] = {
66 #if OPAL_BTL_VADER_HAVE_XPMEM
67 {.value = MCA_BTL_VADER_XPMEM, .string = "xpmem"},
68 #endif
69 #if OPAL_BTL_VADER_HAVE_CMA
70 {.value = MCA_BTL_VADER_CMA, .string = "cma"},
71 #endif
72 #if OPAL_BTL_VADER_HAVE_KNEM
73 {.value = MCA_BTL_VADER_KNEM, .string = "knem"},
74 #endif
75 {.value = MCA_BTL_VADER_EMUL, .string = "emulated"},
76 {.value = MCA_BTL_VADER_NONE, .string = "none"},
77 {.value = 0, .string = NULL}
78 };
79
80
81
82
83 mca_btl_vader_component_t mca_btl_vader_component = {
84 .super = {
85
86
87 .btl_version = {
88 MCA_BTL_DEFAULT_VERSION("vader"),
89 .mca_open_component = mca_btl_vader_component_open,
90 .mca_close_component = mca_btl_vader_component_close,
91 .mca_register_component_params = mca_btl_vader_component_register,
92 },
93 .btl_data = {
94
95 .param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT
96 },
97
98 .btl_init = mca_btl_vader_component_init,
99 .btl_progress = mca_btl_vader_component_progress,
100 }
101 };
102
103 static int mca_btl_vader_component_register (void)
104 {
105 mca_base_var_enum_t *new_enum;
106
107 (void) mca_base_var_group_component_register(&mca_btl_vader_component.super.btl_version,
108 "Enhanced shared memory byte transport later");
109
110
111 mca_btl_vader_component.vader_free_list_num = 8;
112 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
113 "free_list_num", "Initial number of fragments "
114 "to allocate for shared memory communication.",
115 MCA_BASE_VAR_TYPE_INT, NULL, 0,
116 MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
117 MCA_BASE_VAR_SCOPE_LOCAL,
118 &mca_btl_vader_component.vader_free_list_num);
119 mca_btl_vader_component.vader_free_list_max = 512;
120 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
121 "free_list_max", "Maximum number of fragments "
122 "to allocate for shared memory communication.",
123 MCA_BASE_VAR_TYPE_INT, NULL, 0,
124 MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
125 MCA_BASE_VAR_SCOPE_LOCAL,
126 &mca_btl_vader_component.vader_free_list_max);
127 mca_btl_vader_component.vader_free_list_inc = 64;
128 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
129 "free_list_inc", "Number of fragments to create "
130 "on each allocation.", MCA_BASE_VAR_TYPE_INT, NULL, 0,
131 MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
132 MCA_BASE_VAR_SCOPE_LOCAL,
133 &mca_btl_vader_component.vader_free_list_inc);
134
135 mca_btl_vader_component.memcpy_limit = 524288;
136 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
137 "memcpy_limit", "Message size to switch from using "
138 "memove to memcpy. The relative speed of these two "
139 "routines can vary by size.", MCA_BASE_VAR_TYPE_INT,
140 NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
141 MCA_BASE_VAR_SCOPE_LOCAL,
142 &mca_btl_vader_component.memcpy_limit);
143 #if OPAL_BTL_VADER_HAVE_XPMEM
144 mca_btl_vader_component.log_attach_align = 21;
145 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
146 "log_align", "Log base 2 of the alignment to use for xpmem "
147 "segments (default: 21, minimum: 12, maximum: 25)",
148 MCA_BASE_VAR_TYPE_INT, NULL, 0,
149 MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
150 MCA_BASE_VAR_SCOPE_LOCAL,
151 &mca_btl_vader_component.log_attach_align);
152 #endif
153
154 #if OPAL_BTL_VADER_HAVE_XPMEM && 64 == MCA_BTL_VADER_BITNESS
155 mca_btl_vader_component.segment_size = 1 << 24;
156 #else
157 mca_btl_vader_component.segment_size = 1 << 22;
158 #endif
159 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
160 "segment_size", "Maximum size of all shared "
161 #if OPAL_BTL_VADER_HAVE_XPMEM && 64 == MCA_BTL_VADER_BITNESS
162 "memory buffers (default: 16M)",
163 #else
164 "memory buffers (default: 4M)",
165 #endif
166 MCA_BASE_VAR_TYPE_INT, NULL, 0,
167 MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
168 MCA_BASE_VAR_SCOPE_LOCAL,
169 &mca_btl_vader_component.segment_size);
170
171 mca_btl_vader_component.max_inline_send = 256;
172 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
173 "max_inline_send", "Maximum size to transfer "
174 "using copy-in copy-out semantics",
175 MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
176 MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
177 MCA_BASE_VAR_SCOPE_LOCAL,
178 &mca_btl_vader_component.max_inline_send);
179
180 mca_btl_vader_component.fbox_threshold = 16;
181 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
182 "fbox_threshold", "Number of sends required "
183 "before an eager send buffer is setup for a peer "
184 "(default: 16)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
185 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
186 MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.fbox_threshold);
187
188 mca_btl_vader_component.fbox_max = 32;
189 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
190 "fbox_max", "Maximum number of eager send buffers "
191 "to allocate (default: 32)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
192 NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
193 MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.fbox_max);
194
195 mca_btl_vader_component.fbox_size = 4096;
196 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
197 "fbox_size", "Size of per-peer fast transfer buffers (default: 4k)",
198 MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
199 OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.fbox_size);
200
201 (void) mca_base_var_enum_create ("btl_vader_single_copy_mechanisms", single_copy_mechanisms, &new_enum);
202
203
204 mca_btl_vader_component.single_copy_mechanism = single_copy_mechanisms[0].value;
205 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
206 "single_copy_mechanism", "Single copy mechanism to use (defaults to best available)",
207 MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
208 OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP, &mca_btl_vader_component.single_copy_mechanism);
209 OBJ_RELEASE(new_enum);
210
211 if (0 == access ("/dev/shm", W_OK)) {
212 mca_btl_vader_component.backing_directory = "/dev/shm";
213 } else {
214 mca_btl_vader_component.backing_directory = opal_process_info.job_session_dir;
215 }
216 (void) mca_base_component_var_register (&mca_btl_vader_component.super.btl_version, "backing_directory",
217 "Directory to place backing files for shared memory communication. "
218 "This directory should be on a local filesystem such as /tmp or "
219 "/dev/shm (default: (linux) /dev/shm, (others) session directory)",
220 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
221 MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_vader_component.backing_directory);
222
223
224 #if OPAL_BTL_VADER_HAVE_KNEM
225
226 mca_btl_vader_component.knem_dma_min = 0;
227 (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "knem_dma_min",
228 "Minimum message size (in bytes) to use the knem DMA mode; "
229 "ignored if knem does not support DMA mode (0 = do not use the "
230 "knem DMA mode, default: 0)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
231 NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
232 &mca_btl_vader_component.knem_dma_min);
233 #endif
234
235 mca_btl_vader.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
236
237 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
238 mca_btl_vader.super.btl_eager_limit = 32 * 1024;
239 mca_btl_vader.super.btl_rndv_eager_limit = mca_btl_vader.super.btl_eager_limit;
240 mca_btl_vader.super.btl_max_send_size = mca_btl_vader.super.btl_eager_limit;
241 mca_btl_vader.super.btl_min_rdma_pipeline_size = INT_MAX;
242 } else {
243 mca_btl_vader.super.btl_eager_limit = 4 * 1024;
244 mca_btl_vader.super.btl_rndv_eager_limit = 32 * 1024;
245 mca_btl_vader.super.btl_max_send_size = 32 * 1024;
246 mca_btl_vader.super.btl_min_rdma_pipeline_size = INT_MAX;
247 }
248
249 mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
250 mca_btl_vader.super.btl_rdma_pipeline_frag_size = mca_btl_vader.super.btl_eager_limit;
251
252 #if OPAL_HAVE_ATOMIC_MATH_64
253 mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA |
254 MCA_BTL_FLAGS_ATOMIC_OPS | MCA_BTL_FLAGS_ATOMIC_FOPS;
255
256 mca_btl_vader.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_AND |
257 MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP |
258 MCA_BTL_ATOMIC_SUPPORTS_GLOB | MCA_BTL_ATOMIC_SUPPORTS_SWAP;
259 #if OPAL_HAVE_ATOMIC_MATH_32
260 mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_32BIT;
261 #endif
262
263 #if OPAL_HAVE_ATOMIC_MIN_64
264 mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN;
265 #endif
266
267 #if OPAL_HAVE_ATOMIC_MAX_64
268 mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MAX;
269 #endif
270
271 #else
272 mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA;
273 #endif
274
275 if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
276
277 mca_btl_vader.super.btl_bandwidth = 40000;
278 } else {
279 mca_btl_vader.super.btl_bandwidth = 10000;
280 }
281
282 mca_btl_vader.super.btl_get = mca_btl_vader_get_sc_emu;
283 mca_btl_vader.super.btl_put = mca_btl_vader_put_sc_emu;
284 mca_btl_vader.super.btl_atomic_op = mca_btl_vader_emu_aop;
285 mca_btl_vader.super.btl_atomic_fop = mca_btl_vader_emu_afop;
286 mca_btl_vader.super.btl_atomic_cswap = mca_btl_vader_emu_acswap;
287
288 mca_btl_vader.super.btl_latency = 1;
289
290
291 mca_btl_base_param_register(&mca_btl_vader_component.super.btl_version,
292 &mca_btl_vader.super);
293
294 return OPAL_SUCCESS;
295 }
296
297
298
299
300
301
302 static int mca_btl_vader_component_open(void)
303 {
304
305 OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t);
306 OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t);
307 OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t);
308 OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t);
309 OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t);
310 OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t);
311 OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t);
312 #if OPAL_BTL_VADER_HAVE_KNEM
313 mca_btl_vader.knem_fd = -1;
314 #endif
315
316 return OPAL_SUCCESS;
317 }
318
319
320
321
322
323
324 static int mca_btl_vader_component_close(void)
325 {
326 OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
327 OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
328 OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send);
329 OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes);
330 OBJ_DESTRUCT(&mca_btl_vader_component.lock);
331 OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints);
332 OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments);
333
334 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism &&
335 NULL != mca_btl_vader_component.my_segment) {
336 munmap (mca_btl_vader_component.my_segment, mca_btl_vader_component.segment_size);
337 }
338
339 mca_btl_vader_component.my_segment = NULL;
340
341 #if OPAL_BTL_VADER_HAVE_KNEM
342 mca_btl_vader_knem_fini ();
343 #endif
344
345 if (mca_btl_vader_component.mpool) {
346 mca_btl_vader_component.mpool->mpool_finalize (mca_btl_vader_component.mpool);
347 mca_btl_vader_component.mpool = NULL;
348 }
349
350 return OPAL_SUCCESS;
351 }
352
353 static int mca_btl_base_vader_modex_send (void)
354 {
355 union vader_modex_t modex;
356 int modex_size, rc;
357
358 #if OPAL_BTL_VADER_HAVE_XPMEM
359 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
360 modex.xpmem.seg_id = mca_btl_vader_component.my_seg_id;
361 modex.xpmem.segment_base = mca_btl_vader_component.my_segment;
362
363 modex_size = sizeof (modex.xpmem);
364 } else {
365 #endif
366 modex_size = opal_shmem_sizeof_shmem_ds (&mca_btl_vader_component.seg_ds);
367 memmove (&modex.seg_ds, &mca_btl_vader_component.seg_ds, modex_size);
368
369 #if OPAL_BTL_VADER_HAVE_XPMEM
370 }
371 #endif
372
373 OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL,
374 &mca_btl_vader_component.super.btl_version, &modex, modex_size);
375
376 return rc;
377 }
378
379 #if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM
380 static void mca_btl_vader_select_next_single_copy_mechanism (void)
381 {
382 for (int i = 0 ; single_copy_mechanisms[i].value != MCA_BTL_VADER_NONE ; ++i) {
383 if (single_copy_mechanisms[i].value == mca_btl_vader_component.single_copy_mechanism) {
384 mca_btl_vader_component.single_copy_mechanism = single_copy_mechanisms[i+1].value;
385 return;
386 }
387 }
388 }
389 #endif
390
391 static void mca_btl_vader_check_single_copy (void)
392 {
393 #if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM
394 int initial_mechanism = mca_btl_vader_component.single_copy_mechanism;
395 #endif
396
397
398 mca_btl_vader_sc_emu_init ();
399
400 #if OPAL_BTL_VADER_HAVE_XPMEM
401 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
402
403 int rc = mca_btl_vader_xpmem_init ();
404 if (OPAL_SUCCESS != rc) {
405 if (MCA_BTL_VADER_XPMEM == initial_mechanism) {
406 opal_show_help("help-btl-vader.txt", "xpmem-make-failed",
407 true, opal_process_info.nodename, errno,
408 strerror(errno));
409 }
410
411 mca_btl_vader_select_next_single_copy_mechanism ();
412 }
413 }
414 #endif
415
416 #if OPAL_BTL_VADER_HAVE_CMA
417 if (MCA_BTL_VADER_CMA == mca_btl_vader_component.single_copy_mechanism) {
418
419 char buffer = '0';
420 bool cma_happy = false;
421 int fd;
422
423
424 fd = open ("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
425 if (0 <= fd) {
426 read (fd, &buffer, 1);
427 close (fd);
428 }
429
430
431
432
433 if ('0' != buffer) {
434 #if defined PR_SET_PTRACER
435
436 int ret = prctl (PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
437 if (0 == ret) {
438 cma_happy = true;
439 }
440 #endif
441 } else {
442 cma_happy = true;
443 }
444
445 if (!cma_happy) {
446 mca_btl_vader_select_next_single_copy_mechanism ();
447
448 if (MCA_BTL_VADER_CMA == initial_mechanism) {
449 opal_show_help("help-btl-vader.txt", "cma-permission-denied",
450 true, opal_process_info.nodename);
451 }
452 } else {
453
454 mca_btl_vader.super.btl_get = mca_btl_vader_get_cma;
455 mca_btl_vader.super.btl_put = mca_btl_vader_put_cma;
456 }
457 }
458 #endif
459
460 #if OPAL_BTL_VADER_HAVE_KNEM
461 if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) {
462
463 int rc = mca_btl_vader_knem_init ();
464 if (OPAL_SUCCESS != rc) {
465 if (MCA_BTL_VADER_KNEM == initial_mechanism) {
466 opal_show_help("help-btl-vader.txt", "knem requested but not available",
467 true, opal_process_info.nodename);
468 }
469
470
471 mca_btl_vader_select_next_single_copy_mechanism ();
472 }
473 }
474 #endif
475
476 if (MCA_BTL_VADER_NONE == mca_btl_vader_component.single_copy_mechanism) {
477 mca_btl_vader.super.btl_flags &= ~MCA_BTL_FLAGS_RDMA;
478 mca_btl_vader.super.btl_get = NULL;
479 mca_btl_vader.super.btl_put = NULL;
480 }
481
482 if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) {
483
484 mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
485 mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
486 }
487 }
488
489
490
491
492 static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
493 bool enable_progress_threads,
494 bool enable_mpi_threads)
495 {
496 mca_btl_vader_component_t *component = &mca_btl_vader_component;
497 mca_btl_base_module_t **btls = NULL;
498 int rc;
499
500 *num_btls = 0;
501
502
503 if (0 == MCA_BTL_VADER_NUM_LOCAL_PEERS) {
504 BTL_VERBOSE(("No peers to communicate with. Disabling vader."));
505 return NULL;
506 }
507
508 #if OPAL_BTL_VADER_HAVE_XPMEM
509
510 if (component->log_attach_align < 12) {
511 component->log_attach_align = 12;
512 } else if (component->log_attach_align > 25) {
513 component->log_attach_align = 25;
514 }
515 #endif
516
517 btls = (mca_btl_base_module_t **) calloc (1, sizeof (mca_btl_base_module_t *));
518 if (NULL == btls) {
519 return NULL;
520 }
521
522
523 if (component->segment_size < (2 << 20)) {
524 component->segment_size = (2 << 20);
525 }
526
527 component->fbox_size = (component->fbox_size + MCA_BTL_VADER_FBOX_ALIGNMENT_MASK) & ~MCA_BTL_VADER_FBOX_ALIGNMENT_MASK;
528
529 if (component->segment_size > (1ul << MCA_BTL_VADER_OFFSET_BITS)) {
530 component->segment_size = 2ul << MCA_BTL_VADER_OFFSET_BITS;
531 }
532
533
534 component->num_fbox_in_endpoints = 0;
535
536 mca_btl_vader_check_single_copy ();
537
538 if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
539 char *sm_file;
540
541 rc = opal_asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%x.%d", mca_btl_vader_component.backing_directory,
542 opal_process_info.nodename, OPAL_PROC_MY_NAME.jobid, MCA_BTL_VADER_LOCAL_RANK);
543 if (0 > rc) {
544 free (btls);
545 return NULL;
546 }
547 if (NULL != opal_pmix.register_cleanup) {
548 opal_pmix.register_cleanup (sm_file, false, false, false);
549 }
550
551 rc = opal_shmem_segment_create (&component->seg_ds, sm_file, component->segment_size);
552 free (sm_file);
553 if (OPAL_SUCCESS != rc) {
554 BTL_VERBOSE(("Could not create shared memory segment"));
555 free (btls);
556 return NULL;
557 }
558
559 component->my_segment = opal_shmem_segment_attach (&component->seg_ds);
560 if (NULL == component->my_segment) {
561 BTL_VERBOSE(("Could not attach to just created shared memory segment"));
562 goto failed;
563 }
564 } else {
565
566 component->my_segment = mmap (NULL, component->segment_size, PROT_READ |
567 PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
568 if ((void *)-1 == component->my_segment) {
569 BTL_VERBOSE(("Could not create anonymous memory segment"));
570 free (btls);
571 return NULL;
572 }
573 }
574
575
576 vader_fifo_init ((struct vader_fifo_t *) component->my_segment);
577
578 rc = mca_btl_base_vader_modex_send ();
579 if (OPAL_SUCCESS != rc) {
580 BTL_VERBOSE(("Error sending modex"));
581 goto failed;
582 }
583
584 *num_btls = 1;
585
586
587 btls[0] = (mca_btl_base_module_t *) &mca_btl_vader;
588
589
590 mca_btl_vader.btl_inited = false;
591
592 return btls;
593 failed:
594 #if OPAL_BTL_VADER_HAVE_XPMEM
595 if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
596 munmap (component->my_segment, component->segment_size);
597 } else
598 #endif
599 opal_shmem_unlink (&component->seg_ds);
600
601 if (btls) {
602 free (btls);
603 }
604
605 return NULL;
606 }
607
608 void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *endpoint)
609 {
610 mca_btl_base_segment_t segments[2];
611 mca_btl_base_descriptor_t frag = {.des_segments = segments, .des_segment_count = 1};
612 const mca_btl_active_message_callback_t *reg;
613
614 if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) {
615 mca_btl_vader_frag_complete (hdr->frag);
616 return;
617 }
618
619 reg = mca_btl_base_active_message_trigger + hdr->tag;
620 segments[0].seg_addr.pval = (void *) (hdr + 1);
621 segments[0].seg_len = hdr->len;
622
623 if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) {
624 mca_rcache_base_registration_t *xpmem_reg;
625
626 xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base,
627 hdr->sc_iov.iov_len, 0,
628 &segments[1].seg_addr.pval);
629 assert (NULL != xpmem_reg);
630
631 segments[1].seg_len = hdr->sc_iov.iov_len;
632 frag.des_segment_count = 2;
633
634
635 reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata);
636 vader_return_registration (xpmem_reg, endpoint);
637 } else {
638 reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata);
639 }
640
641 if (OPAL_UNLIKELY(MCA_BTL_VADER_FLAG_SETUP_FBOX & hdr->flags)) {
642 mca_btl_vader_endpoint_setup_fbox_recv (endpoint, relative2virtual(hdr->fbox_base));
643 mca_btl_vader_component.fbox_in_endpoints[mca_btl_vader_component.num_fbox_in_endpoints++] = endpoint;
644 }
645
646 hdr->flags = MCA_BTL_VADER_FLAG_COMPLETE;
647 vader_fifo_write_back (hdr, endpoint);
648 }
649
650 static int mca_btl_vader_poll_fifo (void)
651 {
652 struct mca_btl_base_endpoint_t *endpoint;
653 mca_btl_vader_hdr_t *hdr;
654
655
656 for (int fifo_count = 0 ; fifo_count < 31 ; ++fifo_count) {
657 hdr = vader_fifo_read (mca_btl_vader_component.my_fifo, &endpoint);
658 if (NULL == hdr) {
659 return fifo_count;
660 }
661
662 mca_btl_vader_poll_handle_frag (hdr, endpoint);
663 }
664
665 return 1;
666 }
667
668
669
670
671
672
673
674
675
676 static void mca_btl_vader_progress_waiting (mca_btl_base_endpoint_t *ep)
677 {
678 mca_btl_vader_frag_t *frag, *next;
679 int ret = 1;
680
681 if (OPAL_UNLIKELY(NULL == ep)) {
682 return;
683 }
684
685 OPAL_THREAD_LOCK(&ep->pending_frags_lock);
686 OPAL_LIST_FOREACH_SAFE(frag, next, &ep->pending_frags, mca_btl_vader_frag_t) {
687 ret = vader_fifo_write_ep (frag->hdr, ep);
688 if (!ret) {
689 OPAL_THREAD_UNLOCK(&ep->pending_frags_lock);
690 return;
691 }
692
693 (void) opal_list_remove_first (&ep->pending_frags);
694 }
695
696 ep->waiting = false;
697 opal_list_remove_item (&mca_btl_vader_component.pending_endpoints, &ep->super);
698
699 OPAL_THREAD_UNLOCK(&ep->pending_frags_lock);
700 }
701
702
703
704
705
706
707 static void mca_btl_vader_progress_endpoints (void)
708 {
709 mca_btl_base_endpoint_t *ep, *next;
710 int count;
711
712 count = opal_list_get_size (&mca_btl_vader_component.pending_endpoints);
713 if (OPAL_LIKELY(0 == count)) {
714 return;
715 }
716
717 OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
718 OPAL_LIST_FOREACH_SAFE(ep, next, &mca_btl_vader_component.pending_endpoints, mca_btl_base_endpoint_t) {
719 mca_btl_vader_progress_waiting (ep);
720 }
721 OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
722 }
723
724 static int mca_btl_vader_component_progress (void)
725 {
726 static opal_atomic_int32_t lock = 0;
727 int count = 0;
728
729 if (opal_using_threads()) {
730 if (opal_atomic_swap_32 (&lock, 1)) {
731 return 0;
732 }
733 }
734
735
736 if (mca_btl_vader_component.num_fbox_in_endpoints) {
737 count = mca_btl_vader_check_fboxes ();
738 }
739
740 mca_btl_vader_progress_endpoints ();
741
742 if (VADER_FIFO_FREE == mca_btl_vader_component.my_fifo->fifo_head) {
743 lock = 0;
744 return count;
745 }
746
747 count += mca_btl_vader_poll_fifo ();
748 opal_atomic_mb ();
749 lock = 0;
750
751 return count;
752 }