This source file includes following definitions.
- btl_exclusivity_compare
- mca_bml_r2_add_btls
- btl_bandwidth_compare
- mca_bml_r2_calculate_bandwidth_latency
- mca_bml_r2_allocate_endpoint
- mca_bml_r2_register_progress
- mca_bml_r2_endpoint_add_btl
- mca_bml_r2_compute_endpoint_metrics
- mca_bml_r2_add_proc
- mca_bml_r2_add_procs
- mca_bml_r2_del_procs
- bml_r2_remove_btl_progress
- mca_bml_r2_del_proc_btl
- mca_bml_r2_finalize
- mca_bml_r2_del_btl
- mca_bml_r2_add_btl
- mca_bml_r2_register
- mca_bml_r2_register_error
- mca_bml_r2_component_fini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 #include "ompi_config.h"
31
32 #include <stdlib.h>
33 #include <string.h>
34
35 #include "opal/class/opal_bitmap.h"
36 #include "opal/util/argv.h"
37 #include "opal/util/show_help.h"
38 #include "opal/util/output.h"
39 #include "ompi/mca/bml/bml.h"
40 #include "ompi/mca/bml/base/base.h"
41 #include "opal/mca/btl/btl.h"
42 #include "opal/mca/btl/base/base.h"
43 #include "ompi/mca/bml/base/bml_base_btl.h"
44 #include "bml_r2.h"
45 #include "ompi/proc/proc.h"
46
47 extern mca_bml_base_component_t mca_bml_r2_component;
48
49
50 static char *btl_names = NULL;
51
52 static int btl_exclusivity_compare(const void* arg1, const void* arg2)
53 {
54 mca_btl_base_module_t* btl1 = *(struct mca_btl_base_module_t**)arg1;
55 mca_btl_base_module_t* btl2 = *(struct mca_btl_base_module_t**)arg2;
56 if( btl1->btl_exclusivity > btl2->btl_exclusivity ) {
57 return -1;
58 } else if (btl1->btl_exclusivity == btl2->btl_exclusivity ) {
59 return 0;
60 } else {
61 return 1;
62 }
63 }
64
65 static int mca_bml_r2_add_btls( void )
66 {
67 int i;
68 opal_list_t *btls = NULL;
69 mca_btl_base_selected_module_t* selected_btl;
70 size_t num_btls = 0;
71 char **btl_names_argv = NULL;
72
73 if(true == mca_bml_r2.btls_added) {
74 return OMPI_SUCCESS;
75 }
76
77
78 btls = &mca_btl_base_modules_initialized;
79 num_btls = opal_list_get_size(btls);
80
81 mca_bml_r2.num_btl_modules = 0;
82 mca_bml_r2.num_btl_progress = 0;
83
84 mca_bml_r2.btl_modules = (mca_btl_base_module_t **)malloc(sizeof(mca_btl_base_module_t*) * num_btls);
85 mca_bml_r2.btl_progress = (mca_btl_base_component_progress_fn_t*)malloc(sizeof(mca_btl_base_component_progress_fn_t) * num_btls);
86
87 if (NULL == mca_bml_r2.btl_modules ||
88 NULL == mca_bml_r2.btl_progress) {
89 return OMPI_ERR_OUT_OF_RESOURCE;
90 }
91
92 OPAL_LIST_FOREACH(selected_btl, btls, mca_btl_base_selected_module_t) {
93 mca_btl_base_module_t *btl = selected_btl->btl_module;
94 mca_bml_r2.btl_modules[mca_bml_r2.num_btl_modules++] = btl;
95 for (i = 0; NULL != btl_names_argv && NULL != btl_names_argv[i]; ++i) {
96 if (0 ==
97 strcmp(btl_names_argv[i],
98 btl->btl_component->btl_version.mca_component_name)) {
99 break;
100 }
101 }
102 if (NULL == btl_names_argv || NULL == btl_names_argv[i]) {
103 opal_argv_append_nosize(&btl_names_argv,
104 btl->btl_component->btl_version.mca_component_name);
105 }
106 }
107 if (NULL != btl_names_argv) {
108 btl_names = opal_argv_join(btl_names_argv, ' ');
109 opal_argv_free(btl_names_argv);
110 } else {
111 btl_names = strdup("no devices available");
112 }
113
114
115 qsort(mca_bml_r2.btl_modules,
116 mca_bml_r2.num_btl_modules,
117 sizeof(struct mca_btl_base_module_t*),
118 btl_exclusivity_compare);
119 mca_bml_r2.btls_added = true;
120 return OMPI_SUCCESS;
121 }
122
123 static int btl_bandwidth_compare(const void *v1, const void *v2)
124 {
125 mca_bml_base_btl_t *b1 = (mca_bml_base_btl_t*)v1,
126 *b2 = (mca_bml_base_btl_t*)v2;
127
128 return b2->btl->btl_bandwidth - b1->btl->btl_bandwidth;
129 }
130
131 static void mca_bml_r2_calculate_bandwidth_latency (mca_bml_base_btl_array_t *btl_array, double *total_bandwidth, uint32_t *latency)
132 {
133 const size_t array_length = mca_bml_base_btl_array_get_size (btl_array);
134
135 *latency = UINT_MAX;
136 *total_bandwidth = 0.;
137
138 for (size_t i = 0 ; i < array_length ; ++i) {
139 mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_index (btl_array, i);
140 mca_btl_base_module_t *btl = bml_btl->btl;
141 *total_bandwidth += btl->btl_bandwidth;
142 if (btl->btl_latency < *latency) {
143 *latency = btl->btl_latency;
144 }
145 }
146 }
147
148 static mca_bml_base_endpoint_t *mca_bml_r2_allocate_endpoint (ompi_proc_t *proc) {
149 mca_bml_base_endpoint_t *bml_endpoint;
150
151
152 bml_endpoint = OBJ_NEW(mca_bml_base_endpoint_t);
153 if (NULL == bml_endpoint) {
154 opal_output(0, "%s: unable to allocate resources", __func__);
155 return NULL;
156 }
157
158
159 mca_bml_base_btl_array_reserve(&bml_endpoint->btl_eager, mca_bml_r2.num_btl_modules);
160 mca_bml_base_btl_array_reserve(&bml_endpoint->btl_send, mca_bml_r2.num_btl_modules);
161 mca_bml_base_btl_array_reserve(&bml_endpoint->btl_rdma, mca_bml_r2.num_btl_modules);
162 bml_endpoint->btl_max_send_size = -1;
163 bml_endpoint->btl_proc = proc;
164
165 bml_endpoint->btl_flags_or = 0;
166 return bml_endpoint;
167 }
168
169 static void mca_bml_r2_register_progress (mca_btl_base_module_t *btl, bool hp)
170 {
171 if (NULL != btl->btl_component->btl_progress) {
172 bool found = false;
173 size_t p;
174
175 for (p = 0 ; p < mca_bml_r2.num_btl_progress ; ++p) {
176 if(mca_bml_r2.btl_progress[p] == btl->btl_component->btl_progress) {
177 found = true;
178 break;
179 }
180 }
181
182 if (found == false || hp) {
183 if (found == false) {
184 mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress++] =
185 btl->btl_component->btl_progress;
186 }
187
188 if (hp) {
189 opal_progress_register (btl->btl_component->btl_progress);
190 } else {
191 opal_progress_register_lp (btl->btl_component->btl_progress);
192 }
193 }
194 }
195 }
196
197 static int mca_bml_r2_endpoint_add_btl (struct ompi_proc_t *proc, mca_bml_base_endpoint_t *bml_endpoint,
198 mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *btl_endpoint)
199 {
200 mca_bml_base_btl_t* bml_btl = NULL;
201 int btl_flags = btl->btl_flags;
202 bool btl_in_use = false;
203 size_t size;
204
205
206
207 if ((btl_flags & MCA_BTL_FLAGS_PUT) && (NULL == btl->btl_put)) {
208 opal_output(0, "%s: The PUT flag is specified for"
209 " the %s BTL without any PUT function attached. Discard the flag !",
210 __func__,
211 btl->btl_component->btl_version.mca_component_name);
212 btl_flags ^= MCA_BTL_FLAGS_PUT;
213 }
214 if ((btl_flags & MCA_BTL_FLAGS_GET) && (NULL == btl->btl_get)) {
215 opal_output(0, "%s: The GET flag is specified for"
216 " the %s BTL without any GET function attached. Discard the flag !",
217 __func__, btl->btl_component->btl_version.mca_component_name);
218 btl_flags ^= MCA_BTL_FLAGS_GET;
219 }
220
221 if ((btl_flags & (MCA_BTL_FLAGS_PUT | MCA_BTL_FLAGS_GET | MCA_BTL_FLAGS_SEND)) == 0) {
222
223
224
225
226 btl_flags |= MCA_BTL_FLAGS_SEND;
227 }
228
229 if (btl_flags & MCA_BTL_FLAGS_SEND) {
230
231 size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
232 bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, size - 1);
233
234 if (!bml_btl || bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity) {
235
236
237 opal_output_verbose(1, opal_btl_base_framework.framework_output,
238 "mca: bml: Using %s btl for send to %s on node %s",
239 btl->btl_component->btl_version.mca_component_name,
240 OMPI_NAME_PRINT(&proc->super.proc_name),
241 proc->super.proc_hostname);
242
243
244 if (NULL == bml_btl || (bml_btl->btl->btl_exclusivity <= btl->btl_exclusivity)) {
245 bml_btl = mca_bml_base_btl_array_insert (&bml_endpoint->btl_send);
246 bml_btl->btl = btl;
247 bml_btl->btl_endpoint = btl_endpoint;
248 bml_btl->btl_weight = 0;
249 bml_btl->btl_flags = btl_flags;
250
251
252
253
254 bml_endpoint->btl_flags_or |= bml_btl->btl_flags;
255 } else {
256 opal_output_verbose(20, opal_btl_base_framework.framework_output,
257 "mca: bml: Not using %s btl for send to %s on node %s "
258 "because %s btl has higher exclusivity (%d > %d)",
259 btl->btl_component->btl_version.mca_component_name,
260 OMPI_NAME_PRINT(&proc->super.proc_name), proc->super.proc_hostname,
261 bml_btl->btl->btl_component->btl_version.mca_component_name,
262 bml_btl->btl->btl_exclusivity,
263 btl->btl_exclusivity);
264 }
265
266 btl_in_use = true;
267 }
268 }
269
270
271 if (((btl_in_use && (btl_flags & MCA_BTL_FLAGS_RDMA)) ||
272 (btl_flags & (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) == (MCA_BTL_FLAGS_RDMA | MCA_BTL_FLAGS_ATOMIC_FOPS)) &&
273 !((proc->super.proc_arch != ompi_proc_local_proc->super.proc_arch) &&
274 (0 == (btl->btl_flags & MCA_BTL_FLAGS_HETEROGENEOUS_RDMA)))) {
275 mca_bml_base_btl_t *bml_btl_rdma = mca_bml_base_btl_array_insert(&bml_endpoint->btl_rdma);
276
277 bml_btl_rdma->btl = btl;
278 bml_btl_rdma->btl_endpoint = btl_endpoint;
279 bml_btl_rdma->btl_weight = 0;
280 bml_btl_rdma->btl_flags = btl_flags;
281
282 if (bml_endpoint->btl_pipeline_send_length < btl->btl_rdma_pipeline_send_length) {
283 bml_endpoint->btl_pipeline_send_length = btl->btl_rdma_pipeline_send_length;
284 }
285
286 if (bml_endpoint->btl_send_limit < btl->btl_min_rdma_pipeline_size) {
287 bml_endpoint->btl_send_limit = btl->btl_min_rdma_pipeline_size;
288 }
289
290 btl_in_use = true;
291 }
292
293 return btl_in_use ? OMPI_SUCCESS : OMPI_ERR_NOT_AVAILABLE;
294 }
295
296 static void mca_bml_r2_compute_endpoint_metrics (mca_bml_base_endpoint_t *bml_endpoint)
297 {
298 double total_bandwidth = 0;
299 uint32_t latency;
300 size_t n_send, n_rdma;
301
302
303
304
305
306
307
308
309 n_send = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
310 n_rdma = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
311
312
313 qsort (bml_endpoint->btl_send.bml_btls, n_send,
314 sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
315
316 bml_endpoint->btl_rdma_index = 0;
317
318 mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_send, &total_bandwidth, &latency);
319
320
321
322
323
324 for (size_t n_index = 0 ; n_index < n_send ; ++n_index) {
325 mca_bml_base_btl_t *bml_btl =
326 mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, n_index);
327 mca_btl_base_module_t *btl = bml_btl->btl;
328
329
330 if(btl->btl_bandwidth > 0) {
331 bml_btl->btl_weight = (float)(btl->btl_bandwidth / total_bandwidth);
332 } else {
333 bml_btl->btl_weight = (float)(1.0 / n_send);
334 }
335
336
337
338
339 if(btl->btl_latency == latency) {
340 mca_bml_base_btl_t* bml_btl_new =
341 mca_bml_base_btl_array_insert(&bml_endpoint->btl_eager);
342 *bml_btl_new = *bml_btl;
343 }
344
345
346 if (bml_endpoint->btl_max_send_size > btl->btl_max_send_size)
347 bml_endpoint->btl_max_send_size = btl->btl_max_send_size;
348 }
349
350
351 qsort(bml_endpoint->btl_rdma.bml_btls, n_rdma,
352 sizeof(mca_bml_base_btl_t), btl_bandwidth_compare);
353
354 mca_bml_r2_calculate_bandwidth_latency (&bml_endpoint->btl_rdma, &total_bandwidth, &latency);
355
356
357 for (size_t n_index = 0 ; n_index < n_rdma ; ++n_index) {
358 mca_bml_base_btl_t *bml_btl =
359 mca_bml_base_btl_array_get_index(&bml_endpoint->btl_rdma, n_index);
360
361
362 if (bml_btl->btl->btl_bandwidth > 0.0) {
363 bml_btl->btl_weight = (float)(bml_btl->btl->btl_bandwidth / total_bandwidth);
364 } else {
365 bml_btl->btl_weight = (float)(1.0 / n_rdma);
366 }
367 }
368 }
369
370 static int mca_bml_r2_add_proc (struct ompi_proc_t *proc)
371 {
372 mca_bml_base_endpoint_t *bml_endpoint;
373
374 bool btl_in_use = false;
375 int rc;
376
377 if (OPAL_UNLIKELY(NULL == proc)) {
378 return OMPI_ERR_BAD_PARAM;
379 }
380
381
382 if (NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
383 OBJ_RETAIN(proc);
384 return OMPI_SUCCESS;
385 }
386
387
388 if (OMPI_SUCCESS != (rc = mca_bml_r2_add_btls())) {
389 return rc;
390 }
391
392 bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
393 if (OPAL_UNLIKELY(NULL == bml_endpoint)) {
394 return OMPI_ERR_OUT_OF_RESOURCE;
395 }
396
397 for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
398 mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
399 struct mca_btl_base_endpoint_t *btl_endpoint = NULL;
400
401
402
403
404
405
406 rc = btl->btl_add_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint, NULL);
407 if (OMPI_SUCCESS != rc || NULL == btl_endpoint) {
408
409
410 continue;
411 }
412
413 rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoint);
414 if (OMPI_SUCCESS != rc) {
415 btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &btl_endpoint);
416 } else {
417 mca_bml_r2_register_progress (btl, true);
418 btl_in_use = true;
419 }
420 }
421
422 if (!btl_in_use) {
423 proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
424 OBJ_RELEASE(bml_endpoint);
425
426 if (mca_bml_r2.show_unreach_errors) {
427 opal_show_help ("help-mca-bml-r2.txt", "unreachable proc", true,
428 OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
429 (NULL != ompi_proc_local_proc->super.proc_hostname ?
430 ompi_proc_local_proc->super.proc_hostname : "unknown!"),
431 OMPI_NAME_PRINT(&(proc->super.proc_name)),
432 (NULL != proc->super.proc_hostname ?
433 proc->super.proc_hostname : "unknown!"),
434 btl_names);
435 }
436
437 return OMPI_ERR_UNREACH;
438 }
439
440
441 mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
442
443
444 opal_atomic_wmb();
445 proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
446
447 return OMPI_SUCCESS;
448 }
449
450
451
452
453
454
455
456 static int mca_bml_r2_add_procs( size_t nprocs,
457 struct ompi_proc_t** procs,
458 struct opal_bitmap_t* reachable )
459 {
460 size_t n_new_procs = 0;
461 struct mca_btl_base_endpoint_t ** btl_endpoints = NULL;
462 struct ompi_proc_t** new_procs = NULL;
463 int rc, ret = OMPI_SUCCESS;
464
465 if(0 == nprocs) {
466 return OMPI_SUCCESS;
467 }
468
469 if(OMPI_SUCCESS != (rc = mca_bml_r2_add_btls()) ) {
470 return rc;
471 }
472
473
474
475
476 for (size_t p_index = 0 ; p_index < nprocs ; ++p_index) {
477 struct ompi_proc_t* proc = procs[p_index];
478
479 if(NULL != proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
480 continue;
481 }
482
483 if( NULL == new_procs ) {
484 new_procs = (struct ompi_proc_t **)malloc(nprocs * sizeof(struct ompi_proc_t *));
485 if( NULL == new_procs ) {
486 return OMPI_ERR_OUT_OF_RESOURCE;
487 }
488 }
489 OBJ_RETAIN(proc);
490 new_procs[n_new_procs++] = proc;
491 }
492
493 if ( 0 == n_new_procs ) {
494 return OMPI_SUCCESS;
495 }
496
497
498 procs = new_procs;
499 nprocs = n_new_procs;
500
501
502 btl_endpoints = (struct mca_btl_base_endpoint_t **)
503 malloc(nprocs * sizeof(struct mca_btl_base_endpoint_t*));
504 if (NULL == btl_endpoints) {
505 free(new_procs);
506 return OMPI_ERR_OUT_OF_RESOURCE;
507 }
508
509 for (size_t p_index = 0 ; p_index < mca_bml_r2.num_btl_modules ; ++p_index) {
510 mca_btl_base_module_t *btl = mca_bml_r2.btl_modules[p_index];
511 int btl_inuse = 0;
512
513
514
515
516
517
518 opal_bitmap_clear_all_bits(reachable);
519 memset(btl_endpoints, 0, nprocs *sizeof(struct mca_btl_base_endpoint_t*));
520
521 rc = btl->btl_add_procs(btl, n_new_procs, (opal_proc_t**)new_procs, btl_endpoints, reachable);
522 if (OMPI_SUCCESS != rc) {
523
524
525 continue;
526 }
527
528
529 for (size_t p = 0 ; p < n_new_procs ; ++p) {
530 if (!opal_bitmap_is_set_bit(reachable, p)) {
531 continue;
532 }
533
534 ompi_proc_t *proc = new_procs[p];
535 mca_bml_base_endpoint_t *bml_endpoint =
536 (mca_bml_base_endpoint_t *) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
537
538 if (NULL == bml_endpoint) {
539 bml_endpoint = mca_bml_r2_allocate_endpoint (proc);
540 proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = bml_endpoint;
541 if (NULL == bml_endpoint) {
542 free(btl_endpoints);
543 free(new_procs);
544 return OPAL_ERR_OUT_OF_RESOURCE;
545 }
546 }
547
548 rc = mca_bml_r2_endpoint_add_btl (proc, bml_endpoint, btl, btl_endpoints[p]);
549 if (OMPI_SUCCESS != rc) {
550 btl->btl_del_procs(btl, 1, (opal_proc_t**)&proc, &btl_endpoints[p]);
551 continue;
552 }
553
554
555 btl_inuse++;
556 }
557
558 mca_bml_r2_register_progress (btl, !!(btl_inuse));
559 }
560
561 free(btl_endpoints);
562
563
564 for (size_t p = 0; p < n_new_procs ; ++p) {
565 mca_bml_base_endpoint_t *bml_endpoint =
566 (mca_bml_base_endpoint_t *) new_procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
567
568
569 if (NULL != bml_endpoint) {
570 mca_bml_r2_compute_endpoint_metrics (bml_endpoint);
571 }
572 }
573
574
575 for(size_t p = 0; p < n_new_procs ; ++p) {
576 ompi_proc_t *proc = new_procs[p];
577
578 if (NULL == proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
579 ret = OMPI_ERR_UNREACH;
580 if (mca_bml_r2.show_unreach_errors) {
581 opal_show_help("help-mca-bml-r2.txt", "unreachable proc", true,
582 OMPI_NAME_PRINT(&(ompi_proc_local_proc->super.proc_name)),
583 (NULL != ompi_proc_local_proc->super.proc_hostname ?
584 ompi_proc_local_proc->super.proc_hostname : "unknown!"),
585 OMPI_NAME_PRINT(&(proc->super.proc_name)),
586 (NULL != proc->super.proc_hostname ?
587 proc->super.proc_hostname : "unknown!"),
588 btl_names);
589 }
590
591 break;
592 }
593 }
594
595 free(new_procs);
596
597 return ret;
598 }
599
600
601
602
603
604
605 static int mca_bml_r2_del_procs(size_t nprocs,
606 struct ompi_proc_t** procs)
607 {
608 for (size_t p = 0 ; p < nprocs ; ++p) {
609 ompi_proc_t *proc = procs[p];
610 mca_bml_base_endpoint_t *bml_endpoint =
611 (mca_bml_base_endpoint_t*) proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
612
613 if (!bml_endpoint) {
614
615 continue;
616 }
617
618
619 size_t f_size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_send);
620 for (size_t f_index = 0 ; f_index < f_size ; ++f_index) {
621 mca_bml_base_btl_t* bml_btl = mca_bml_base_btl_array_get_index(&bml_endpoint->btl_send, f_index);
622 mca_btl_base_module_t *btl = bml_btl->btl;
623
624 int rc = btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &bml_btl->btl_endpoint);
625 if (OPAL_SUCCESS != rc) {
626 return rc;
627 }
628
629
630
631
632
633 }
634
635
636 size_t r_size = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
637 for (size_t r_index = 0 ; r_index < r_size ; ++r_index) {
638 mca_bml_base_btl_t *rdma_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_rdma, r_index);
639 mca_btl_base_module_t *btl = rdma_btl->btl;
640 bool needs_del = true;
641
642 for (size_t f_index = 0 ; f_index < f_size ; ++f_index) {
643 mca_bml_base_btl_t *bml_btl = mca_bml_base_btl_array_get_index (&bml_endpoint->btl_send, f_index);
644 if (bml_btl->btl_endpoint == rdma_btl->btl_endpoint) {
645 needs_del = false;
646 break;
647 }
648 }
649
650 if (needs_del) {
651 int rc = btl->btl_del_procs (btl, 1, (opal_proc_t **) &proc, &rdma_btl->btl_endpoint);
652 if (OPAL_SUCCESS != rc) {
653 return rc;
654 }
655 }
656 }
657
658 proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
659
660
661 OBJ_RELEASE(proc);
662
663
664 OBJ_RELEASE(bml_endpoint);
665 }
666
667 return OMPI_SUCCESS;
668 }
669
670 static inline int bml_r2_remove_btl_progress(mca_btl_base_module_t* btl)
671 {
672 unsigned int p;
673
674 if(NULL == btl->btl_component->btl_progress) {
675 return OMPI_SUCCESS;
676 }
677 for(p = 0; p < mca_bml_r2.num_btl_progress; p++) {
678 if(btl->btl_component->btl_progress != mca_bml_r2.btl_progress[p])
679 continue;
680 opal_progress_unregister( btl->btl_component->btl_progress );
681 if( p < (mca_bml_r2.num_btl_progress-1) ) {
682 mca_bml_r2.btl_progress[p] = mca_bml_r2.btl_progress[mca_bml_r2.num_btl_progress-1];
683 }
684 mca_bml_r2.num_btl_progress--;
685 return OMPI_SUCCESS;
686 }
687 return OMPI_ERR_NOT_FOUND;
688 }
689
690 static int mca_bml_r2_del_proc_btl(ompi_proc_t* proc, mca_btl_base_module_t* btl)
691 {
692 mca_bml_base_endpoint_t* ep = (mca_bml_base_endpoint_t*)proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
693 mca_bml_base_btl_t* bml_btl;
694 mca_btl_base_module_t* ep_btl;
695 double total_bandwidth = 0;
696 size_t b;
697
698 if(NULL == ep)
699 return OMPI_SUCCESS;
700
701
702 mca_bml_base_btl_array_remove(&ep->btl_eager, btl);
703
704
705 if(mca_bml_base_btl_array_remove(&ep->btl_send, btl)) {
706
707
708
709 total_bandwidth = 0;
710 ep->btl_max_send_size = -1;
711 for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
712 bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
713 ep_btl = bml_btl->btl;
714
715 total_bandwidth += ep_btl->btl_bandwidth;
716 if (ep->btl_max_send_size > ep_btl->btl_max_send_size) {
717 ep->btl_max_send_size = ep_btl->btl_max_send_size;
718 }
719 }
720
721
722 for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_send); b++) {
723 bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_send, b);
724 ep_btl = bml_btl->btl;
725
726 if(ep_btl->btl_bandwidth > 0) {
727 bml_btl->btl_weight = (float)(ep_btl->btl_bandwidth / total_bandwidth);
728 } else {
729 bml_btl->btl_weight = (float)(1.0 / mca_bml_base_btl_array_get_size(&ep->btl_send));
730 }
731 }
732 }
733
734
735 if(mca_bml_base_btl_array_remove(&ep->btl_rdma, btl)) {
736
737
738 total_bandwidth = 0;
739 ep->btl_pipeline_send_length = 0;
740 ep->btl_send_limit = 0;
741 for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
742 bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
743 ep_btl = bml_btl->btl;
744
745
746 total_bandwidth += ep_btl->btl_bandwidth;
747 if (ep->btl_pipeline_send_length < ep_btl->btl_rdma_pipeline_send_length) {
748 ep->btl_pipeline_send_length = ep_btl->btl_rdma_pipeline_send_length;
749 }
750 if (ep->btl_send_limit < ep_btl->btl_min_rdma_pipeline_size) {
751 ep->btl_send_limit = ep_btl->btl_min_rdma_pipeline_size;
752 }
753 }
754
755
756 for(b=0; b< mca_bml_base_btl_array_get_size(&ep->btl_rdma); b++) {
757 bml_btl = mca_bml_base_btl_array_get_index(&ep->btl_rdma, b);
758 ep_btl = bml_btl->btl;
759
760 if(ep_btl->btl_bandwidth > 0) {
761 bml_btl->btl_weight = (float)(ep_btl->btl_bandwidth / total_bandwidth);
762 } else {
763 bml_btl->btl_weight = (float)(1.0 / mca_bml_base_btl_array_get_size(&ep->btl_rdma));
764 }
765 }
766 }
767
768 return OMPI_SUCCESS;
769 }
770
771 int mca_bml_r2_finalize( void )
772 {
773 ompi_proc_t** procs;
774 size_t p, num_procs;
775 opal_list_item_t* w_item;
776
777 if (NULL != btl_names) {
778 free(btl_names);
779 btl_names = NULL;
780 }
781
782
783 procs = ompi_proc_all(&num_procs);
784 if(NULL == procs)
785 goto CLEANUP;
786
787 for (w_item = opal_list_get_first(&mca_btl_base_modules_initialized);
788 w_item != opal_list_get_end(&mca_btl_base_modules_initialized);
789 w_item = opal_list_get_next(w_item)) {
790 mca_btl_base_selected_module_t *sm = (mca_btl_base_selected_module_t *) w_item;
791 mca_btl_base_module_t* btl = sm->btl_module;
792
793
794 bml_r2_remove_btl_progress(btl);
795
796
797 for( p = 0; p < num_procs; p++ ) {
798 ompi_proc_t* proc = procs[p];
799 mca_bml_r2_del_proc_btl(proc, sm->btl_module);
800 }
801 }
802
803 for( p = 0; p < num_procs; p++ ) {
804 OBJ_RELEASE(procs[p]);
805 }
806 free(procs);
807
808 CLEANUP:
809 mca_bml_r2.num_btl_modules = 0;
810 mca_bml_r2.num_btl_progress = 0;
811
812 if( NULL != mca_bml_r2.btl_modules) {
813 free(mca_bml_r2.btl_modules);
814 mca_bml_r2.btl_modules = NULL;
815 }
816 if( NULL != mca_bml_r2.btl_progress ) {
817 free(mca_bml_r2.btl_progress);
818 mca_bml_r2.btl_progress = NULL;
819 }
820
821
822
823
824 return OMPI_SUCCESS;
825 }
826
827
828
829
830
831
832
833 static int mca_bml_r2_del_btl(mca_btl_base_module_t* btl)
834 {
835 ompi_proc_t** procs;
836 size_t i, m, p, num_procs;
837 opal_list_item_t* item;
838 mca_btl_base_module_t** modules;
839 bool found = false;
840
841 if(opal_list_get_size(&mca_btl_base_modules_initialized) == 2) {
842 opal_output(0, "only one BTL left, can't failover");
843 return OMPI_SUCCESS;
844 }
845
846 procs = ompi_proc_all(&num_procs);
847 if(NULL == procs)
848 return OMPI_SUCCESS;
849
850
851 bml_r2_remove_btl_progress(btl);
852
853
854 for( p = 0; p < num_procs; p++ ) {
855 ompi_proc_t* proc = procs[p];
856 mca_bml_r2_del_proc_btl(proc, btl);
857 }
858
859
860 for (item = opal_list_get_first(&mca_btl_base_modules_initialized);
861 item != opal_list_get_end(&mca_btl_base_modules_initialized);
862 item = opal_list_get_next(item)) {
863 mca_btl_base_selected_module_t *sm = (mca_btl_base_selected_module_t *) item;
864 if(sm->btl_module == btl) {
865 opal_list_remove_item(&mca_btl_base_modules_initialized, item);
866 free(sm);
867 found = true;
868 break;
869 }
870 }
871 if(!found) {
872
873 goto CLEANUP;
874 }
875
876 modules = (mca_btl_base_module_t**)malloc(sizeof(mca_btl_base_module_t*) * (mca_bml_r2.num_btl_modules-1));
877 for(i=0,m=0; i<mca_bml_r2.num_btl_modules; i++) {
878 if(mca_bml_r2.btl_modules[i] != btl) {
879 modules[m++] = mca_bml_r2.btl_modules[i];
880 }
881 }
882 free(mca_bml_r2.btl_modules);
883 mca_bml_r2.btl_modules = modules;
884 mca_bml_r2.num_btl_modules = m;
885
886
887 btl->btl_finalize(btl);
888 CLEANUP:
889
890 for( p = 0; p < num_procs; p++ ) {
891 OBJ_RELEASE(procs[p]);
892 }
893 free(procs);
894 return OMPI_SUCCESS;
895 }
896
897 static int mca_bml_r2_add_btl(mca_btl_base_module_t* btl)
898 {
899 return OMPI_ERR_NOT_IMPLEMENTED;
900 }
901
902
903
904
905
906 static int mca_bml_r2_register( mca_btl_base_tag_t tag,
907 mca_btl_base_module_recv_cb_fn_t cbfunc,
908 void* data )
909 {
910 mca_btl_base_active_message_trigger[tag].cbfunc = cbfunc;
911 mca_btl_base_active_message_trigger[tag].cbdata = data;
912
913
914
915 {
916 int i, rc;
917 mca_btl_base_module_t *btl;
918
919 for(i = 0; i < (int)mca_bml_r2.num_btl_modules; i++) {
920 btl = mca_bml_r2.btl_modules[i];
921 if( NULL == btl->btl_register )
922 continue;
923 rc = btl->btl_register(btl, tag, cbfunc, data);
924 if(OMPI_SUCCESS != rc) {
925 return rc;
926 }
927 }
928 }
929
930 return OMPI_SUCCESS;
931 }
932
933
934
935
936
937
938
939 static int mca_bml_r2_register_error( mca_btl_base_module_error_cb_fn_t cbfunc)
940 {
941 uint32_t i;
942 int rc;
943 mca_btl_base_module_t *btl;
944 uint32_t ver;
945
946 for(i = 0; i < mca_bml_r2.num_btl_modules; i++) {
947 btl = mca_bml_r2.btl_modules[i];
948
949
950 ver = btl->btl_component->btl_version.mca_type_major_version << 16 |
951 btl->btl_component->btl_version.mca_type_minor_version << 8 |
952 btl->btl_component->btl_version.mca_type_release_version;
953
954 if(ver >= ((1 << 16) | (0 << 8) | 1) &&
955 NULL != btl->btl_register_error) {
956 rc = btl->btl_register_error(btl, cbfunc);
957 if(OMPI_SUCCESS != rc) {
958 return rc;
959 }
960 }
961 }
962 return OMPI_SUCCESS;
963 }
964
965
966 int mca_bml_r2_component_fini(void)
967 {
968 return OMPI_SUCCESS;
969 }
970
971 mca_bml_r2_module_t mca_bml_r2 = {
972 .super = {
973 .bml_component = &mca_bml_r2_component,
974 .bml_add_proc = mca_bml_r2_add_proc,
975 .bml_add_procs = mca_bml_r2_add_procs,
976 .bml_del_procs = mca_bml_r2_del_procs,
977 .bml_add_btl = mca_bml_r2_add_btl,
978 .bml_del_btl = mca_bml_r2_del_btl,
979 .bml_del_proc_btl = mca_bml_r2_del_proc_btl,
980 .bml_register = mca_bml_r2_register,
981 .bml_register_error = mca_bml_r2_register_error,
982 .bml_finalize = mca_bml_r2_finalize,
983 .bml_ft_event = mca_bml_r2_ft_event,
984 },
985 };
986