This source file includes following definitions.
- usnic_component_open
- usnic_component_close
- usnic_modex_send
- check_reg_mem_basics
- check_usnic_config
- usnic_clock_callback
- parse_ifex_str
- filter_module
- free_filter
- usnic_component_init
- usnic_component_progress
- usnic_handle_completion
- usnic_handle_cq_error
- usnic_component_progress_2
- dump_endpoint
- opal_btl_usnic_component_debug
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39 #include "opal_config.h"
40
41 #include <string.h>
42 #include <ctype.h>
43 #include <errno.h>
44 #include <unistd.h>
45 #include <stdlib.h>
46 #include <sys/time.h>
47 #include <sys/resource.h>
48 #include <sys/types.h>
49 #include <sys/stat.h>
50 #include <fcntl.h>
51
52 #include <rdma/fabric.h>
53
54 #include "opal_stdint.h"
55 #include "opal/prefetch.h"
56 #include "opal/mca/timer/base/base.h"
57 #include "opal/util/argv.h"
58 #include "opal/util/net.h"
59 #include "opal/util/if.h"
60 #include "opal/util/printf.h"
61 #include "opal/mca/base/mca_base_var.h"
62 #include "opal/mca/memchecker/base/base.h"
63 #include "opal/util/show_help.h"
64 #include "opal/constants.h"
65
66 #include "opal/mca/btl/btl.h"
67 #include "opal/mca/btl/base/base.h"
68 #include "opal/util/proc.h"
69
70 #include "btl_usnic.h"
71 #include "btl_usnic_connectivity.h"
72 #include "btl_usnic_frag.h"
73 #include "btl_usnic_endpoint.h"
74 #include "btl_usnic_module.h"
75 #include "btl_usnic_stats.h"
76 #include "btl_usnic_util.h"
77 #include "btl_usnic_ack.h"
78 #include "btl_usnic_send.h"
79 #include "btl_usnic_recv.h"
80 #include "btl_usnic_proc.h"
81 #include "btl_usnic_test.h"
82
83 #define OPAL_BTL_USNIC_NUM_COMPLETIONS 500
84
85
86 opal_recursive_mutex_t btl_usnic_lock = OPAL_RECURSIVE_MUTEX_STATIC_INIT;
87
88
89 opal_rng_buff_t opal_btl_usnic_rand_buff = {{0}};
90
91
92 uint64_t opal_btl_usnic_ticks = 0;
93
94 static opal_event_t usnic_clock_timer_event;
95 static bool usnic_clock_timer_event_set = false;
96 static struct timeval usnic_clock_timeout;
97
98
99
100 static volatile bool dump_bitvectors = false;
101
102 static int usnic_component_open(void);
103 static int usnic_component_close(void);
104 static mca_btl_base_module_t **
105 usnic_component_init(int* num_btl_modules, bool want_progress_threads,
106 bool want_mpi_threads);
107 static int usnic_component_progress(void);
108
109
110 typedef struct filter_elt_t {
111 bool is_netmask;
112
113
114 char *if_name;
115
116
117 uint32_t addr_be;
118 uint32_t netmask_be;
119 } filter_elt_t;
120
121 typedef struct usnic_if_filter_t {
122 int n_elt;
123 filter_elt_t *elts;
124 } usnic_if_filter_t;
125
126 static bool filter_module(opal_btl_usnic_module_t *module,
127 usnic_if_filter_t *filter,
128 bool filter_incl);
129 static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
130 const char *name);
131 static void free_filter(usnic_if_filter_t *filter);
132
133
134 opal_btl_usnic_component_t mca_btl_usnic_component = {
135 .super = {
136
137
138 .btl_version = {
139 USNIC_BTL_DEFAULT_VERSION("usnic"),
140 .mca_open_component = usnic_component_open,
141 .mca_close_component = usnic_component_close,
142 .mca_register_component_params = opal_btl_usnic_component_register,
143 },
144 .btl_data = {
145
146 .param_field = MCA_BASE_METADATA_PARAM_NONE
147 },
148
149 .btl_init = usnic_component_init,
150 .btl_progress = usnic_component_progress,
151 }
152 };
153
154
155
156
157
158 static int usnic_component_open(void)
159 {
160
161 mca_btl_usnic_component.num_modules = 0;
162 mca_btl_usnic_component.usnic_all_modules = NULL;
163 mca_btl_usnic_component.usnic_active_modules = NULL;
164 mca_btl_usnic_component.transport_header_len = -1;
165 mca_btl_usnic_component.prefix_send_offset = 0;
166
167
168 OBJ_CONSTRUCT(&mca_btl_usnic_component.usnic_procs, opal_list_t);
169
170
171
172 if (OPAL_SUCCESS !=
173 mca_base_var_check_exclusive("opal",
174 mca_btl_usnic_component.super.btl_version.mca_type_name,
175 mca_btl_usnic_component.super.btl_version.mca_component_name,
176 "if_include",
177 mca_btl_usnic_component.super.btl_version.mca_type_name,
178 mca_btl_usnic_component.super.btl_version.mca_component_name,
179 "if_exclude")) {
180
181
182 return OPAL_ERR_NOT_AVAILABLE;
183 }
184
185 return OPAL_SUCCESS;
186 }
187
188
189
190
191
192 static int usnic_component_close(void)
193 {
194
195
196
197
198
199
200 OBJ_DESTRUCT(&mca_btl_usnic_component.usnic_procs);
201
202 if (usnic_clock_timer_event_set) {
203 opal_event_del(&usnic_clock_timer_event);
204 usnic_clock_timer_event_set = false;
205 }
206
207
208 if (mca_btl_usnic_component.connectivity_enabled) {
209 opal_btl_usnic_connectivity_client_finalize();
210 opal_btl_usnic_connectivity_agent_finalize();
211 }
212 if (mca_btl_usnic_component.opal_evbase) {
213 opal_progress_thread_finalize(NULL);
214 }
215
216 free(mca_btl_usnic_component.usnic_all_modules);
217 free(mca_btl_usnic_component.usnic_active_modules);
218
219 #if OPAL_BTL_USNIC_UNIT_TESTS
220
221 opal_btl_usnic_cleanup_tests();
222 #endif
223
224 OBJ_DESTRUCT(&btl_usnic_lock);
225
226 return OPAL_SUCCESS;
227 }
228
229
230
231
232
233
234 static int usnic_modex_send(void)
235 {
236 int rc;
237 int i;
238 size_t size;
239 opal_btl_usnic_modex_t* modexes = NULL;
240
241 if (0 == mca_btl_usnic_component.num_modules) {
242 return OPAL_SUCCESS;
243 }
244
245 size = mca_btl_usnic_component.num_modules *
246 sizeof(opal_btl_usnic_modex_t);
247 modexes = (opal_btl_usnic_modex_t*) malloc(size);
248 if (NULL == modexes) {
249 return OPAL_ERR_OUT_OF_RESOURCE;
250 }
251
252 for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
253 opal_btl_usnic_module_t* module =
254 mca_btl_usnic_component.usnic_active_modules[i];
255 modexes[i] = module->local_modex;
256 opal_output_verbose(5, USNIC_OUT,
257 "btl:usnic: "
258 "control port:%d, "
259 "modex_send data port:%d, "
260 "%s",
261 modexes[i].ports[USNIC_PRIORITY_CHANNEL],
262 modexes[i].ports[USNIC_DATA_CHANNEL],
263 module->if_ipv4_addr_str);
264 }
265
266 usnic_compat_modex_send(&rc, &mca_btl_usnic_component.super.btl_version,
267 modexes, size);
268 free(modexes);
269
270 return rc;
271 }
272
273
274
275
276
277
278
279
280
281
282
283
284 static int check_reg_mem_basics(void)
285 {
286 #if HAVE_DECL_RLIMIT_MEMLOCK
287 int ret = OPAL_SUCCESS;
288 struct rlimit limit;
289 char *str_limit = NULL;
290
291 ret = getrlimit(RLIMIT_MEMLOCK, &limit);
292 if (0 == ret) {
293 if ((long) limit.rlim_cur > (64 * 1024) ||
294 limit.rlim_cur == RLIM_INFINITY) {
295 return OPAL_SUCCESS;
296 } else {
297 opal_asprintf(&str_limit, "%ld", (long)limit.rlim_cur);
298 }
299 } else {
300 opal_asprintf(&str_limit, "Unknown");
301 }
302
303 opal_show_help("help-mpi-btl-usnic.txt", "check_reg_mem_basics fail",
304 true,
305 opal_process_info.nodename,
306 str_limit);
307
308 return OPAL_ERR_OUT_OF_RESOURCE;
309 #else
310
311
312 return OPAL_SUCCESS;
313 #endif
314 }
315
316
317
318
319
320 static int check_usnic_config(opal_btl_usnic_module_t *module,
321 int num_local_procs)
322 {
323 char str[128];
324 unsigned unlp;
325 struct fi_usnic_info *uip;
326
327 uip = &module->usnic_info;
328
329
330
331 unlp = (unsigned) num_local_procs + 1;
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346 if (uip->ui.v1.ui_num_vf < unlp) {
347 snprintf(str, sizeof(str), "Not enough usNICs (found %d, need %d)",
348 uip->ui.v1.ui_num_vf, unlp);
349 goto error;
350 }
351
352 if (uip->ui.v1.ui_qp_per_vf < USNIC_NUM_CHANNELS) {
353 snprintf(str, sizeof(str), "Not enough transmit/receive queues per usNIC (found %d, need %d)",
354 uip->ui.v1.ui_qp_per_vf,
355 USNIC_NUM_CHANNELS);
356 goto error;
357 }
358 if (uip->ui.v1.ui_cq_per_vf < USNIC_NUM_CHANNELS) {
359 snprintf(str, sizeof(str),
360 "Not enough completion queues per usNIC (found %d, need %d)",
361 uip->ui.v1.ui_cq_per_vf,
362 USNIC_NUM_CHANNELS);
363 goto error;
364 }
365
366
367 return OPAL_SUCCESS;
368
369 error:
370
371 opal_show_help("help-mpi-btl-usnic.txt",
372 "not enough usnic resources",
373 true,
374 opal_process_info.nodename,
375 module->linux_device_name,
376 str);
377 return OPAL_ERROR;
378 }
379
380
381 static void usnic_clock_callback(int fd, short flags, void *timeout)
382 {
383
384 opal_btl_usnic_ticks += 1000000;
385
386
387 usnic_component_progress();
388
389 opal_event_add(&usnic_clock_timer_event, timeout);
390 }
391
392
393
394
395
396
397
398
399
400
401
402
403
404 static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
405 const char *name)
406 {
407 int i, ret;
408 char **argv, *str, *tmp;
409 struct sockaddr_storage argv_inaddr;
410 uint32_t argv_prefix, addr;
411 usnic_if_filter_t *filter;
412 int n_argv;
413
414 if (NULL == orig_str) {
415 return NULL;
416 }
417
418
419 filter = calloc(sizeof(*filter), 1);
420 if (NULL == filter) {
421 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
422 return NULL;
423 }
424
425 argv = opal_argv_split(orig_str, ',');
426 if (NULL == argv || 0 == (n_argv = opal_argv_count(argv))) {
427 free(filter);
428 opal_argv_free(argv);
429 return NULL;
430 }
431
432
433 filter->elts = malloc(sizeof(*filter->elts) * n_argv);
434 if (NULL == filter->elts) {
435 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
436 free(filter);
437 opal_argv_free(argv);
438 return NULL;
439 }
440
441
442
443
444 filter->n_elt = 0;
445 for (i = 0; NULL != argv[i]; ++i) {
446
447
448 if (isalpha(argv[i][0])) {
449 filter->elts[filter->n_elt].is_netmask = false;
450 filter->elts[filter->n_elt].if_name = strdup(argv[i]);
451 opal_output_verbose(20, USNIC_OUT,
452 "btl:usnic:parse_ifex_str: parsed %s device name: %s",
453 name, filter->elts[filter->n_elt].if_name);
454
455 ++filter->n_elt;
456 continue;
457 }
458
459
460
461 argv_prefix = 0;
462 tmp = strdup(argv[i]);
463 str = strchr(argv[i], '/');
464 if (NULL == str) {
465 opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
466 true, name, opal_process_info.nodename,
467 tmp, "Invalid specification (missing \"/\")");
468 free(tmp);
469 continue;
470 }
471 *str = '\0';
472 argv_prefix = atoi(str + 1);
473 if (argv_prefix < 1 || argv_prefix > 32) {
474 opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
475 true, name, opal_process_info.nodename,
476 tmp, "Invalid specification (prefix < 1 or prefix >32)");
477 free(tmp);
478 continue;
479 }
480
481
482 ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET;
483 ret = inet_pton(AF_INET, argv[i],
484 &((struct sockaddr_in*) &argv_inaddr)->sin_addr);
485 if (1 != ret) {
486 opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
487 true, name, opal_process_info.nodename, tmp,
488 "Invalid specification (inet_pton() failed)");
489 free(tmp);
490 continue;
491 }
492 opal_output_verbose(20, USNIC_OUT,
493 "btl:usnic:parse_ifex_str: parsed %s address+prefix: %s / %u",
494 name,
495 opal_net_get_hostname((struct sockaddr*) &argv_inaddr),
496 argv_prefix);
497
498 memcpy(&addr,
499 &((struct sockaddr_in*) &argv_inaddr)->sin_addr,
500 sizeof(addr));
501
502
503
504 filter->elts[filter->n_elt].is_netmask = true;
505 filter->elts[filter->n_elt].if_name = NULL;
506 filter->elts[filter->n_elt].netmask_be =
507 usnic_cidrlen_to_netmask(argv_prefix);
508 filter->elts[filter->n_elt].addr_be = addr &
509 filter->elts[filter->n_elt].netmask_be;
510 ++filter->n_elt;
511
512 free(tmp);
513 }
514 assert(i == n_argv);
515
516 opal_argv_free(argv);
517
518
519 if (filter->n_elt == 0) {
520 free_filter(filter);
521 return NULL;
522 }
523
524 return filter;
525 }
526
527
528
529
530 static bool filter_module(opal_btl_usnic_module_t *module,
531 usnic_if_filter_t *filter,
532 bool filter_incl)
533 {
534 int i;
535 uint32_t module_mask;
536 struct sockaddr_in *src;
537 struct fi_usnic_info *uip;
538 struct fi_info *info;
539 bool match;
540 const char *linux_device_name;
541
542 info = module->fabric_info;
543 uip = &module->usnic_info;
544 src = info->src_addr;
545 linux_device_name = module->linux_device_name;
546 module_mask = src->sin_addr.s_addr & uip->ui.v1.ui_netmask_be;
547 match = false;
548 for (i = 0; i < filter->n_elt; ++i) {
549 if (filter->elts[i].is_netmask) {
550
551 if (filter->elts[i].netmask_be == uip->ui.v1.ui_netmask_be &&
552 filter->elts[i].addr_be == module_mask) {
553 match = true;
554 break;
555 }
556 }
557 else {
558 if (strcmp(filter->elts[i].if_name, linux_device_name) == 0) {
559 match = true;
560 break;
561 }
562 }
563 }
564
565
566 return match ^ !filter_incl;
567 }
568
569
570 static void free_filter(usnic_if_filter_t *filter)
571 {
572 int i;
573
574 if (filter == NULL) {
575 return;
576 }
577
578 if (NULL != filter->elts) {
579 for (i = 0; i < filter->n_elt; ++i) {
580 if (!filter->elts[i].is_netmask) {
581 free(filter->elts[i].if_name);
582 }
583 }
584 free(filter->elts);
585 }
586 free(filter);
587 }
588
589
590
591
592
593
594
595
596 static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
597 bool want_progress_threads,
598 bool want_mpi_threads)
599 {
600 mca_btl_base_module_t **btls = NULL;
601 int i, j, num_final_modules;
602 int num_devs;
603 opal_btl_usnic_module_t *module;
604 usnic_if_filter_t *filter = NULL;
605 bool keep_module;
606 bool filter_incl = false;
607 int min_distance, num_local_procs;
608 struct fi_info *info_list;
609 struct fi_info *info;
610 struct fid_fabric *fabric;
611 struct fid_domain *domain;
612 int ret;
613
614 *num_btl_modules = 0;
615
616
617 if (want_mpi_threads && !mca_btl_base_thread_multiple_override) {
618 if (OPAL_MAJOR_VERSION >= 2) {
619 opal_output_verbose(5, USNIC_OUT,
620 "btl:usnic: MPI_THREAD_MULTIPLE support is in testing phase.");
621 }
622 else {
623 opal_output_verbose(5, USNIC_OUT,
624 "btl:usnic: MPI_THREAD_MULTIPLE is not supported in version < 2.");
625 return NULL;
626 }
627 }
628
629 OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t);
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686 uint32_t libfabric_api;
687 libfabric_api = fi_version();
688 if (libfabric_api < FI_VERSION(1, 3)) {
689 opal_output_verbose(5, USNIC_OUT,
690 "btl:usnic: disqualifiying myself because Libfabric does not support v1.3 of the API (v1.3 is *required* for correct usNIC functionality).");
691 return NULL;
692 }
693
694
695
696 if (libfabric_api > FI_VERSION(1, 3)) {
697 libfabric_api = FI_VERSION(1, 4);
698 }
699
700 struct fi_info hints = {0};
701 struct fi_ep_attr ep_attr = {0};
702 struct fi_fabric_attr fabric_attr = {0};
703 struct fi_rx_attr rx_attr = {0};
704 struct fi_tx_attr tx_attr = {0};
705
706
707 fabric_attr.prov_name = "usnic";
708 ep_attr.type = FI_EP_DGRAM;
709
710 hints.caps = FI_MSG;
711 hints.mode = FI_LOCAL_MR | FI_MSG_PREFIX;
712 hints.addr_format = FI_SOCKADDR;
713 hints.ep_attr = &ep_attr;
714 hints.fabric_attr = &fabric_attr;
715 hints.tx_attr = &tx_attr;
716 hints.rx_attr = &rx_attr;
717
718 tx_attr.iov_limit = 1;
719 rx_attr.iov_limit = 1;
720
721 ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list);
722 if (0 != ret) {
723 opal_output_verbose(5, USNIC_OUT,
724 "btl:usnic: disqualifiying myself due to fi_getinfo(3) failure: %s (%d)", strerror(-ret), ret);
725 return NULL;
726 }
727
728 num_devs = 0;
729 for (info = info_list; NULL != info; info = info->next) {
730 ++num_devs;
731 }
732 if (0 == num_devs) {
733 opal_output_verbose(5, USNIC_OUT,
734 "btl:usnic: disqualifiying myself due to lack of libfabric providers");
735 return NULL;
736 }
737
738
739
740 if (OPAL_SUCCESS != check_reg_mem_basics()) {
741 opal_output_verbose(5, USNIC_OUT,
742 "btl:usnic: disqualifiying myself due to lack of lockable memory");
743 return NULL;
744 }
745
746
747
748
749
750
751
752 opal_output_verbose(5, USNIC_OUT,
753 "btl:usnic: usNIC fabrics found");
754
755 opal_proc_t *me = opal_proc_local_get();
756 opal_process_name_t *name = &(me->proc_name);
757 mca_btl_usnic_component.my_hashed_rte_name =
758 usnic_compat_rte_hash_name(name);
759 MSGDEBUG1_OUT("%s: my_hashed_rte_name=0x%" PRIx64,
760 __func__, mca_btl_usnic_component.my_hashed_rte_name);
761
762 opal_srand(&opal_btl_usnic_rand_buff, ((uint32_t) getpid()));
763
764
765
766 mca_btl_usnic_component.num_modules = num_devs;
767 btls = (struct mca_btl_base_module_t**)
768 malloc(mca_btl_usnic_component.num_modules *
769 sizeof(opal_btl_usnic_module_t*));
770 if (NULL == btls) {
771 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
772 goto send_modex;
773 }
774
775
776 mca_btl_usnic_component.usnic_all_modules =
777 calloc(mca_btl_usnic_component.num_modules,
778 sizeof(*mca_btl_usnic_component.usnic_all_modules));
779 mca_btl_usnic_component.usnic_active_modules =
780 calloc(mca_btl_usnic_component.num_modules,
781 sizeof(*mca_btl_usnic_component.usnic_active_modules));
782 if (NULL == mca_btl_usnic_component.usnic_all_modules ||
783 NULL == mca_btl_usnic_component.usnic_active_modules) {
784 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
785 goto error;
786 }
787
788
789
790
791
792 if (NULL != mca_btl_usnic_component.if_include) {
793 opal_output_verbose(20, USNIC_OUT,
794 "btl:usnic:filter_module: if_include=%s",
795 mca_btl_usnic_component.if_include);
796
797 filter_incl = true;
798 filter = parse_ifex_str(mca_btl_usnic_component.if_include, "include");
799 } else if (NULL != mca_btl_usnic_component.if_exclude) {
800 opal_output_verbose(20, USNIC_OUT,
801 "btl:usnic:filter_module: if_exclude=%s",
802 mca_btl_usnic_component.if_exclude);
803
804 filter_incl = false;
805 filter = parse_ifex_str(mca_btl_usnic_component.if_exclude, "exclude");
806 }
807
808 num_local_procs = opal_process_info.num_local_peers;
809
810
811
812 info = info_list;
813 for (j = i = 0; i < num_devs &&
814 (0 == mca_btl_usnic_component.max_modules ||
815 i < mca_btl_usnic_component.max_modules);
816 ++i, info = info->next) {
817
818
819 char *linux_device_name;
820 if (libfabric_api <= FI_VERSION(1, 3)) {
821 linux_device_name = info->fabric_attr->name;
822 } else {
823 linux_device_name = info->domain_attr->name;
824 }
825
826 ret = fi_fabric(info->fabric_attr, &fabric, NULL);
827 if (0 != ret) {
828 opal_show_help("help-mpi-btl-usnic.txt",
829 "libfabric API failed",
830 true,
831 opal_process_info.nodename,
832 linux_device_name,
833 "fi_fabric()", __FILE__, __LINE__,
834 ret,
835 strerror(-ret));
836 continue;
837 }
838 opal_memchecker_base_mem_defined(&fabric, sizeof(fabric));
839
840 ret = fi_domain(fabric, info, &domain, NULL);
841 if (0 != ret) {
842 opal_show_help("help-mpi-btl-usnic.txt",
843 "libfabric API failed",
844 true,
845 opal_process_info.nodename,
846 linux_device_name,
847 "fi_domain()", __FILE__, __LINE__,
848 ret,
849 strerror(-ret));
850 continue;
851 }
852 opal_memchecker_base_mem_defined(&domain, sizeof(domain));
853
854 opal_output_verbose(5, USNIC_OUT,
855 "btl:usnic: found: usNIC device %s",
856 linux_device_name);
857
858
859
860
861 module = &(mca_btl_usnic_component.usnic_all_modules[j]);
862 memcpy(module, &opal_btl_usnic_module_template,
863 sizeof(opal_btl_usnic_module_t));
864 module->fabric = fabric;
865 module->domain = domain;
866 module->fabric_info = info;
867 module->libfabric_api = libfabric_api;
868 module->linux_device_name = strdup(linux_device_name);
869 if (NULL == module->linux_device_name) {
870 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
871 goto error;
872 }
873
874
875
876
877 ret = fi_open_ops(&fabric->fid, FI_USNIC_FABRIC_OPS_1, 0,
878 (void **)&module->usnic_fabric_ops, NULL);
879 if (ret != 0) {
880 opal_output_verbose(5, USNIC_OUT,
881 "btl:usnic: device %s fabric_open_ops failed %d (%s)",
882 module->linux_device_name, ret, fi_strerror(-ret));
883 fi_close(&domain->fid);
884 fi_close(&fabric->fid);
885 continue;
886 }
887
888 ret =
889 module->usnic_fabric_ops->getinfo(1,
890 fabric,
891 &module->usnic_info);
892 if (ret != 0) {
893 opal_output_verbose(5, USNIC_OUT,
894 "btl:usnic: device %s usnic_getinfo failed %d (%s)",
895 module->linux_device_name, ret, fi_strerror(-ret));
896 fi_close(&domain->fid);
897 fi_close(&fabric->fid);
898 continue;
899 }
900 opal_output_verbose(5, USNIC_OUT,
901 "btl:usnic: device %s usnic_info: link speed=%d, netmask=0x%x, ifname=%s, num_vf=%d, qp/vf=%d, cq/vf=%d",
902 module->linux_device_name,
903 (unsigned int) module->usnic_info.ui.v1.ui_link_speed,
904 (unsigned int) module->usnic_info.ui.v1.ui_netmask_be,
905 module->usnic_info.ui.v1.ui_ifname,
906 module->usnic_info.ui.v1.ui_num_vf,
907 module->usnic_info.ui.v1.ui_qp_per_vf,
908 module->usnic_info.ui.v1.ui_cq_per_vf);
909
910
911 if (filter != NULL) {
912 keep_module = filter_module(module, filter, filter_incl);
913 opal_output_verbose(5, USNIC_OUT,
914 "btl:usnic: %s %s due to %s",
915 (keep_module ? "keeping" : "skipping"),
916 module->linux_device_name,
917 (filter_incl ? "if_include" : "if_exclude"));
918 if (!keep_module) {
919 fi_close(&domain->fid);
920 fi_close(&fabric->fid);
921 continue;
922 }
923 }
924
925
926
927
928
929
930 if (0 == j &&
931 check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) {
932 opal_output_verbose(5, USNIC_OUT,
933 "btl:usnic: device %s is not provisioned with enough resources -- skipping",
934 module->linux_device_name);
935 fi_close(&domain->fid);
936 fi_close(&fabric->fid);
937
938 mca_btl_usnic_component.num_modules = 0;
939 goto error;
940 }
941
942
943
944
945
946 opal_output_verbose(5, USNIC_OUT,
947 "btl:usnic: device %s looks good!",
948 module->linux_device_name);
949
950
951 btls[j++] = &(module->super);
952 }
953 mca_btl_usnic_component.num_modules = j;
954
955
956 if (filter != NULL) {
957 free_filter(filter);
958 filter = NULL;
959 }
960
961
962
963 if (mca_btl_usnic_component.num_modules > 0 &&
964 mca_btl_usnic_component.connectivity_enabled) {
965 mca_btl_usnic_component.opal_evbase = opal_progress_thread_init(NULL);
966 if (OPAL_SUCCESS != opal_btl_usnic_connectivity_agent_init() ||
967 OPAL_SUCCESS != opal_btl_usnic_connectivity_client_init()) {
968 opal_progress_thread_finalize(NULL);
969 return NULL;
970 }
971 }
972
973
974
975
976 for (num_final_modules = i = 0;
977 i < mca_btl_usnic_component.num_modules; ++i) {
978 module = (opal_btl_usnic_module_t*) btls[i];
979
980
981 if (OPAL_SUCCESS != opal_btl_usnic_module_init(module)) {
982 opal_output_verbose(5, USNIC_OUT,
983 "btl:usnic: failed to init module for %s",
984 module->if_ipv4_addr_str);
985 continue;
986 }
987
988
989
990
991
992
993
994
995 btls[num_final_modules++] = &(module->super);
996
997
998 const char *devname = module->linux_device_name;
999 opal_output_verbose(5, USNIC_OUT,
1000 "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d, num aveqe=%d",
1001 devname,
1002 module->sd_num,
1003 module->rd_num,
1004 module->cq_num,
1005 module->av_eq_num);
1006 opal_output_verbose(5, USNIC_OUT,
1007 "btl:usnic: %s priority MTU = %" PRIsize_t,
1008 devname,
1009 module->max_tiny_msg_size);
1010 opal_output_verbose(5, USNIC_OUT,
1011 "btl:usnic: %s priority limit = %" PRIsize_t,
1012 devname,
1013 module->max_tiny_payload);
1014 opal_output_verbose(5, USNIC_OUT,
1015 "btl:usnic: %s eager limit = %" PRIsize_t,
1016 devname,
1017 module->super.btl_eager_limit);
1018 opal_output_verbose(5, USNIC_OUT,
1019 "btl:usnic: %s eager rndv limit = %" PRIsize_t,
1020 devname,
1021 module->super.btl_rndv_eager_limit);
1022 opal_output_verbose(5, USNIC_OUT,
1023 "btl:usnic: %s max send size= %" PRIsize_t
1024 " (not overrideable)",
1025 devname,
1026 module->super.btl_max_send_size);
1027 opal_output_verbose(5, USNIC_OUT,
1028 "btl:usnic: %s exclusivity = %d",
1029 devname,
1030 module->super.btl_exclusivity);
1031 }
1032
1033
1034
1035 mca_btl_usnic_component.num_modules = num_final_modules;
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045 if (0 == num_final_modules) {
1046 opal_output_verbose(5, USNIC_OUT,
1047 "btl:usnic: returning 0 modules");
1048 goto error;
1049 }
1050
1051
1052
1053 memcpy(mca_btl_usnic_component.usnic_active_modules, btls,
1054 num_final_modules * sizeof(*btls));
1055
1056
1057
1058
1059
1060
1061 min_distance = 9999999;
1062 for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
1063 module = (opal_btl_usnic_module_t*) btls[i];
1064 if (module->numa_distance < min_distance) {
1065 min_distance = module->numa_distance;
1066 }
1067 }
1068 for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
1069 module = (opal_btl_usnic_module_t*) btls[i];
1070 if (module->numa_distance > min_distance) {
1071 ++module->super.btl_latency;
1072 opal_output_verbose(5, USNIC_OUT,
1073 "btl:usnic: %s is far from me; increasing latency rating",
1074 module->if_ipv4_addr_str);
1075 }
1076 }
1077
1078
1079 opal_event_set(opal_sync_event_base, &usnic_clock_timer_event,
1080 -1, 0, usnic_clock_callback,
1081 &usnic_clock_timeout);
1082 usnic_clock_timer_event_set = true;
1083
1084
1085 usnic_clock_timeout.tv_sec = 0;
1086 usnic_clock_timeout.tv_usec = 1000;
1087 opal_event_add(&usnic_clock_timer_event, &usnic_clock_timeout);
1088
1089
1090 opal_btl_usnic_setup_mpit_pvars();
1091
1092
1093 *num_btl_modules = mca_btl_usnic_component.num_modules;
1094 opal_output_verbose(5, USNIC_OUT,
1095 "btl:usnic: returning %d modules", *num_btl_modules);
1096
1097 send_modex:
1098 usnic_modex_send();
1099 return btls;
1100
1101 error:
1102
1103 free(btls);
1104 btls = NULL;
1105 free(mca_btl_usnic_component.usnic_all_modules);
1106 mca_btl_usnic_component.usnic_all_modules = NULL;
1107 free(mca_btl_usnic_component.usnic_active_modules);
1108 mca_btl_usnic_component.usnic_active_modules = NULL;
1109
1110
1111 if (filter != NULL) {
1112 free_filter(filter);
1113 filter = NULL;
1114 }
1115
1116 goto send_modex;
1117 }
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129 static int usnic_handle_completion(opal_btl_usnic_module_t* module,
1130 opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
1131 static int usnic_component_progress_2(void);
1132 static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
1133 opal_btl_usnic_channel_t *channel, int cq_ret);
1134
1135 static int usnic_component_progress(void)
1136 {
1137 int i;
1138 int count;
1139 opal_btl_usnic_recv_segment_t* rseg;
1140 opal_btl_usnic_module_t* module;
1141 struct fi_cq_entry completion;
1142 opal_btl_usnic_channel_t *channel;
1143 static bool fastpath_ok = true;
1144
1145
1146 opal_btl_usnic_ticks += 5000;
1147
1148 count = 0;
1149 if (fastpath_ok) {
1150 for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
1151 module = mca_btl_usnic_component.usnic_active_modules[i];
1152 channel = &module->mod_channels[USNIC_PRIORITY_CHANNEL];
1153
1154 assert(channel->chan_deferred_recv == NULL);
1155
1156 int ret = fi_cq_read(channel->cq, &completion, 1);
1157 assert(0 != ret);
1158 if (OPAL_LIKELY(1 == ret)) {
1159 opal_memchecker_base_mem_defined(&completion,
1160 sizeof(completion));
1161 rseg = (opal_btl_usnic_recv_segment_t*) completion.op_context;
1162 if (OPAL_LIKELY(OPAL_BTL_USNIC_SEG_RECV ==
1163 rseg->rs_base.us_type)) {
1164 opal_btl_usnic_recv_fast(module, rseg, channel);
1165 ++module->stats.num_seg_total_completions;
1166 ++module->stats.num_seg_recv_completions;
1167 fastpath_ok = false;
1168 return 1;
1169 } else {
1170 count += usnic_handle_completion(module, channel,
1171 &completion);
1172 }
1173 } else if (OPAL_LIKELY(-FI_EAGAIN == ret)) {
1174 continue;
1175 } else {
1176 usnic_handle_cq_error(module, channel, ret);
1177 }
1178 }
1179 }
1180
1181 fastpath_ok = true;
1182 return count + usnic_component_progress_2();
1183 }
1184
1185 static int usnic_handle_completion(
1186 opal_btl_usnic_module_t* module,
1187 opal_btl_usnic_channel_t *channel,
1188 struct fi_cq_entry *completion)
1189 {
1190 opal_btl_usnic_segment_t* seg;
1191 opal_btl_usnic_recv_segment_t* rseg;
1192
1193 seg = (opal_btl_usnic_segment_t*)completion->op_context;
1194 rseg = (opal_btl_usnic_recv_segment_t*)seg;
1195
1196 ++module->stats.num_seg_total_completions;
1197
1198
1199 opal_memchecker_base_mem_defined(seg, sizeof(*seg));
1200
1201 OPAL_THREAD_LOCK(&btl_usnic_lock);
1202
1203
1204 switch(seg->us_type) {
1205
1206
1207 case OPAL_BTL_USNIC_SEG_ACK:
1208 ++module->stats.num_seg_ack_completions;
1209 opal_btl_usnic_ack_complete(module,
1210 (opal_btl_usnic_ack_segment_t *)seg);
1211 break;
1212
1213
1214
1215 case OPAL_BTL_USNIC_SEG_FRAG:
1216 ++module->stats.num_seg_frag_completions;
1217 opal_btl_usnic_frag_send_complete(module,
1218 (opal_btl_usnic_frag_segment_t*)seg);
1219 break;
1220
1221
1222
1223 case OPAL_BTL_USNIC_SEG_CHUNK:
1224 ++module->stats.num_seg_chunk_completions;
1225 opal_btl_usnic_chunk_send_complete(module,
1226 (opal_btl_usnic_chunk_segment_t*)seg);
1227 break;
1228
1229
1230 case OPAL_BTL_USNIC_SEG_RECV:
1231 ++module->stats.num_seg_recv_completions;
1232 opal_btl_usnic_recv(module, rseg, channel);
1233 break;
1234
1235 default:
1236 BTL_ERROR(("Unhandled completion segment type %d", seg->us_type));
1237 break;
1238 }
1239
1240 OPAL_THREAD_UNLOCK(&btl_usnic_lock);
1241 return 1;
1242 }
1243
1244 static void
1245 usnic_handle_cq_error(opal_btl_usnic_module_t* module,
1246 opal_btl_usnic_channel_t *channel, int cq_ret)
1247 {
1248 int rc;
1249 struct fi_cq_err_entry err_entry;
1250 opal_btl_usnic_recv_segment_t* rseg;
1251
1252 if (cq_ret != -FI_EAVAIL) {
1253 BTL_ERROR(("%s: cq_read ret = %d (%s)",
1254 module->linux_device_name, cq_ret,
1255 fi_strerror(-cq_ret)));
1256 channel->chan_error = true;
1257 }
1258
1259 rc = fi_cq_readerr(channel->cq, &err_entry, 0);
1260 if (rc == -FI_EAGAIN) {
1261 return;
1262 } else if (rc != 1) {
1263 BTL_ERROR(("%s: cq_readerr ret = %d (expected 1)",
1264 module->linux_device_name, rc));
1265 channel->chan_error = true;
1266 }
1267
1268
1269
1270 else if (FI_ECRC == err_entry.prov_errno ||
1271 FI_ETRUNC == err_entry.prov_errno) {
1272 #if MSGDEBUG1
1273 static int once = 0;
1274 if (once++ == 0) {
1275 BTL_ERROR(("%s: Channel %d, %s",
1276 module->linux_device_name,
1277 channel->chan_index,
1278 FI_ECRC == err_entry.prov_errno ?
1279 "CRC error" : "message truncation"));
1280 }
1281 #endif
1282
1283
1284 ++module->stats.num_crc_errors;
1285
1286
1287 ++module->stats.num_recv_reposts;
1288
1289
1290 rseg = err_entry.op_context;
1291 if (OPAL_BTL_USNIC_SEG_RECV == rseg->rs_base.us_type) {
1292 rseg->rs_next = channel->repost_recv_head;
1293 channel->repost_recv_head = rseg;
1294 }
1295 } else {
1296 BTL_ERROR(("%s: CQ[%d] prov_err = %d",
1297 module->linux_device_name, channel->chan_index,
1298 err_entry.prov_errno));
1299 channel->chan_error = true;
1300 }
1301 }
1302
1303 static int usnic_component_progress_2(void)
1304 {
1305 int i, j, count = 0, num_events, ret;
1306 opal_btl_usnic_module_t* module;
1307 static struct fi_cq_entry completions[OPAL_BTL_USNIC_NUM_COMPLETIONS];
1308 opal_btl_usnic_channel_t *channel;
1309 int rc;
1310 int c;
1311
1312
1313 opal_btl_usnic_ticks += 5000;
1314
1315
1316 for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
1317 module = mca_btl_usnic_component.usnic_active_modules[i];
1318
1319
1320 for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
1321 channel = &module->mod_channels[c];
1322
1323 if (channel->chan_deferred_recv != NULL) {
1324 (void) opal_btl_usnic_recv_frag_bookkeeping(module,
1325 channel->chan_deferred_recv, channel);
1326 channel->chan_deferred_recv = NULL;
1327 }
1328
1329 num_events = ret =
1330 fi_cq_read(channel->cq, completions,
1331 OPAL_BTL_USNIC_NUM_COMPLETIONS);
1332 assert(0 != ret);
1333 opal_memchecker_base_mem_defined(&ret, sizeof(ret));
1334 if (OPAL_UNLIKELY(ret < 0 && -FI_EAGAIN != ret)) {
1335 usnic_handle_cq_error(module, channel, num_events);
1336 num_events = 0;
1337 } else if (-FI_EAGAIN == ret) {
1338 num_events = 0;
1339 }
1340
1341 opal_memchecker_base_mem_defined(completions,
1342 sizeof(completions[0]) *
1343 num_events);
1344
1345 for (j = 0; j < num_events; j++) {
1346 count += usnic_handle_completion(module, channel,
1347 &completions[j]);
1348 }
1349
1350
1351
1352
1353 if (channel->chan_error) {
1354 channel->chan_error = false;
1355 return OPAL_ERROR;
1356 }
1357
1358
1359 opal_btl_usnic_module_progress_sends(module);
1360
1361
1362 if (OPAL_LIKELY(NULL != channel->repost_recv_head)) {
1363 rc = opal_btl_usnic_post_recv_list(channel);
1364 if (OPAL_UNLIKELY(rc != 0)) {
1365 BTL_ERROR(("error posting recv: %s\n", strerror(errno)));
1366 return OPAL_ERROR;
1367 }
1368 }
1369 }
1370 }
1371
1372 return count;
1373 }
1374
1375
1376 static void dump_endpoint(opal_btl_usnic_endpoint_t *endpoint)
1377 {
1378 int i;
1379 opal_btl_usnic_frag_t *frag;
1380 opal_btl_usnic_send_segment_t *sseg;
1381 struct in_addr ia;
1382 char ep_addr_str[INET_ADDRSTRLEN];
1383 char tmp[128], str[2048];
1384
1385 memset(ep_addr_str, 0x00, sizeof(ep_addr_str));
1386 ia.s_addr = endpoint->endpoint_remote_modex.ipv4_addr;
1387 inet_ntop(AF_INET, &ia, ep_addr_str, sizeof(ep_addr_str));
1388
1389 opal_output(0, " endpoint %p, %s job=%u, rank=%u rts=%s s_credits=%"PRIi32"\n",
1390 (void *)endpoint, ep_addr_str,
1391 endpoint->endpoint_proc->proc_opal->proc_name.jobid,
1392 endpoint->endpoint_proc->proc_opal->proc_name.vpid,
1393 (endpoint->endpoint_ready_to_send ? "true" : "false"),
1394 endpoint->endpoint_send_credits);
1395 opal_output(0, " endpoint->frag_send_queue:\n");
1396
1397 OPAL_LIST_FOREACH(frag, &endpoint->endpoint_frag_send_queue,
1398 opal_btl_usnic_frag_t) {
1399 opal_btl_usnic_small_send_frag_t *ssfrag;
1400 opal_btl_usnic_large_send_frag_t *lsfrag;
1401
1402 snprintf(str, sizeof(str), " --> frag %p, %s", (void *)frag,
1403 usnic_frag_type(frag->uf_type));
1404 switch (frag->uf_type) {
1405 case OPAL_BTL_USNIC_FRAG_LARGE_SEND:
1406 lsfrag = (opal_btl_usnic_large_send_frag_t *)frag;
1407 snprintf(tmp, sizeof(tmp), " tag=%"PRIu8" id=%"PRIu32" offset=%llu/%llu post_cnt=%"PRIu32" ack_bytes_left=%llu\n",
1408 lsfrag->lsf_tag,
1409 lsfrag->lsf_frag_id,
1410 (unsigned long long)lsfrag->lsf_cur_offset,
1411 (unsigned long long)lsfrag->lsf_base.sf_size,
1412 lsfrag->lsf_base.sf_seg_post_cnt,
1413 (unsigned long long)lsfrag->lsf_base.sf_ack_bytes_left);
1414 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
1415 opal_output(0, "%s", str);
1416
1417 OPAL_LIST_FOREACH(sseg, &lsfrag->lsf_seg_chain,
1418 opal_btl_usnic_send_segment_t) {
1419
1420 opal_output(0, " chunk seg %p, chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
1421 (void *)sseg,
1422 (USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
1423 "prio" : "data"),
1424 sseg->ss_hotel_room,
1425 sseg->ss_send_posted,
1426 (sseg->ss_ack_pending ? "true" : "false"));
1427 }
1428 break;
1429
1430 case OPAL_BTL_USNIC_FRAG_SMALL_SEND:
1431 ssfrag = (opal_btl_usnic_small_send_frag_t *)frag;
1432 snprintf(tmp, sizeof(tmp), " sf_size=%llu post_cnt=%"PRIu32" ack_bytes_left=%llu\n",
1433 (unsigned long long)ssfrag->ssf_base.sf_size,
1434 ssfrag->ssf_base.sf_seg_post_cnt,
1435 (unsigned long long)ssfrag->ssf_base.sf_ack_bytes_left);
1436 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
1437 opal_output(0, "%s", str);
1438
1439 sseg = &ssfrag->ssf_segment;
1440 opal_output(0, " small seg %p, chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
1441 (void *)sseg,
1442 (USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
1443 "prio" : "data"),
1444 sseg->ss_hotel_room,
1445 sseg->ss_send_posted,
1446 (sseg->ss_ack_pending ? "true" : "false"));
1447 break;
1448
1449 case OPAL_BTL_USNIC_FRAG_PUT_DEST:
1450
1451 snprintf(tmp, sizeof(tmp), " put_addr=%p\n", frag->uf_remote_seg[0].seg_addr.pval);
1452 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
1453 opal_output(0, "%s", str);
1454 break;
1455 }
1456 }
1457
1458
1459
1460
1461
1462 opal_output(0, " endpoint->endpoint_sent_segs (%p):\n",
1463 (void *)endpoint->endpoint_sent_segs);
1464 for (i = 0; i < WINDOW_SIZE; ++i) {
1465 sseg = endpoint->endpoint_sent_segs[i];
1466 if (NULL != sseg) {
1467 opal_output(0, " [%d] sseg=%p %s chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
1468 i,
1469 (void *)sseg,
1470 usnic_seg_type_str(sseg->ss_base.us_type),
1471 (USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
1472 "prio" : "data"),
1473 sseg->ss_hotel_room,
1474 sseg->ss_send_posted,
1475 (sseg->ss_ack_pending ? "true" : "false"));
1476 }
1477 }
1478
1479 opal_output(0, " ack_needed=%s n_t=%"UDSEQ" n_a=%"UDSEQ" n_r=%"UDSEQ" n_s=%"UDSEQ" rfstart=%"PRIu32"\n",
1480 (endpoint->endpoint_ack_needed?"true":"false"),
1481 endpoint->endpoint_next_seq_to_send,
1482 endpoint->endpoint_ack_seq_rcvd,
1483 endpoint->endpoint_next_contig_seq_to_recv,
1484 endpoint->endpoint_highest_seq_rcvd,
1485 endpoint->endpoint_rfstart);
1486
1487 if (dump_bitvectors) {
1488 opal_btl_usnic_snprintf_bool_array(str, sizeof(str),
1489 endpoint->endpoint_rcvd_segs,
1490 WINDOW_SIZE);
1491 opal_output(0, " rcvd_segs 0x%s", str);
1492 }
1493 }
1494
1495 void opal_btl_usnic_component_debug(void)
1496 {
1497 int i;
1498 opal_btl_usnic_module_t *module;
1499 opal_btl_usnic_endpoint_t *endpoint;
1500 opal_btl_usnic_send_segment_t *sseg;
1501 opal_list_item_t *item;
1502 const opal_proc_t *proc = opal_proc_local_get();
1503
1504 opal_output(0, "*** dumping usnic state for MPI_COMM_WORLD rank %u ***\n",
1505 proc->proc_name.vpid);
1506 for (i = 0; i < (int)mca_btl_usnic_component.num_modules; ++i) {
1507 module = mca_btl_usnic_component.usnic_active_modules[i];
1508
1509 opal_output(0, "active_modules[%d]=%p %s max{frag,chunk,tiny}=%llu,%llu,%llu\n",
1510 i, (void *)module, module->linux_device_name,
1511 (unsigned long long)module->max_frag_payload,
1512 (unsigned long long)module->max_chunk_payload,
1513 (unsigned long long)module->max_tiny_payload);
1514
1515 opal_output(0, " endpoints_with_sends:\n");
1516 OPAL_LIST_FOREACH(endpoint, &module->endpoints_with_sends,
1517 opal_btl_usnic_endpoint_t) {
1518 dump_endpoint(endpoint);
1519 }
1520
1521 opal_output(0, " endpoints_that_need_acks:\n");
1522 OPAL_LIST_FOREACH(endpoint, &module->endpoints_that_need_acks,
1523 opal_btl_usnic_endpoint_t) {
1524 dump_endpoint(endpoint);
1525 }
1526
1527
1528 opal_output(0, " all_endpoints:\n");
1529 opal_mutex_lock(&module->all_endpoints_lock);
1530 item = opal_list_get_first(&module->all_endpoints);
1531 while (item != opal_list_get_end(&module->all_endpoints)) {
1532 endpoint = container_of(item, mca_btl_base_endpoint_t,
1533 endpoint_endpoint_li);
1534 item = opal_list_get_next(item);
1535 dump_endpoint(endpoint);
1536 }
1537 opal_mutex_unlock(&module->all_endpoints_lock);
1538
1539 opal_output(0, " pending_resend_segs:\n");
1540 OPAL_LIST_FOREACH(sseg, &module->pending_resend_segs,
1541 opal_btl_usnic_send_segment_t) {
1542 opal_output(0, " sseg %p\n", (void *)sseg);
1543 }
1544
1545 opal_btl_usnic_print_stats(module, " manual", false);
1546 }
1547 }
1548
1549 #include "test/btl_usnic_component_test.h"