This source file includes following definitions.
- usnic_component_open
- usnic_component_close
- usnic_modex_send
- check_reg_mem_basics
- check_usnic_config
- usnic_clock_callback
- parse_ifex_str
- filter_module
- free_filter
- usnic_component_init
- usnic_component_progress
- usnic_handle_completion
- usnic_handle_cq_error
- usnic_component_progress_2
- dump_endpoint
- opal_btl_usnic_component_debug
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 
  31 
  32 
  33 
  34 
  35 
  36 
  37 
  38 
  39 #include "opal_config.h"
  40 
  41 #include <string.h>
  42 #include <ctype.h>
  43 #include <errno.h>
  44 #include <unistd.h>
  45 #include <stdlib.h>
  46 #include <sys/time.h>
  47 #include <sys/resource.h>
  48 #include <sys/types.h>
  49 #include <sys/stat.h>
  50 #include <fcntl.h>
  51 
  52 #include <rdma/fabric.h>
  53 
  54 #include "opal_stdint.h"
  55 #include "opal/prefetch.h"
  56 #include "opal/mca/timer/base/base.h"
  57 #include "opal/util/argv.h"
  58 #include "opal/util/net.h"
  59 #include "opal/util/if.h"
  60 #include "opal/util/printf.h"
  61 #include "opal/mca/base/mca_base_var.h"
  62 #include "opal/mca/memchecker/base/base.h"
  63 #include "opal/util/show_help.h"
  64 #include "opal/constants.h"
  65 
  66 #include "opal/mca/btl/btl.h"
  67 #include "opal/mca/btl/base/base.h"
  68 #include "opal/util/proc.h"
  69 
  70 #include "btl_usnic.h"
  71 #include "btl_usnic_connectivity.h"
  72 #include "btl_usnic_frag.h"
  73 #include "btl_usnic_endpoint.h"
  74 #include "btl_usnic_module.h"
  75 #include "btl_usnic_stats.h"
  76 #include "btl_usnic_util.h"
  77 #include "btl_usnic_ack.h"
  78 #include "btl_usnic_send.h"
  79 #include "btl_usnic_recv.h"
  80 #include "btl_usnic_proc.h"
  81 #include "btl_usnic_test.h"
  82 
  83 #define OPAL_BTL_USNIC_NUM_COMPLETIONS 500
  84 
  85 
  86 opal_recursive_mutex_t btl_usnic_lock =  OPAL_RECURSIVE_MUTEX_STATIC_INIT;
  87 
  88 
  89 opal_rng_buff_t opal_btl_usnic_rand_buff = {{0}};
  90 
  91 
  92 uint64_t opal_btl_usnic_ticks = 0;
  93 
  94 static opal_event_t usnic_clock_timer_event;
  95 static bool usnic_clock_timer_event_set = false;
  96 static struct timeval usnic_clock_timeout;
  97 
  98 
  99 
 100 static volatile bool dump_bitvectors = false;
 101 
 102 static int usnic_component_open(void);
 103 static int usnic_component_close(void);
 104 static mca_btl_base_module_t **
 105 usnic_component_init(int* num_btl_modules, bool want_progress_threads,
 106                        bool want_mpi_threads);
 107 static int usnic_component_progress(void);
 108 
 109 
 110 typedef struct filter_elt_t {
 111     bool is_netmask;
 112 
 113     
 114     char *if_name;
 115 
 116     
 117     uint32_t addr_be; 
 118     uint32_t netmask_be;
 119 } filter_elt_t;
 120 
 121 typedef struct usnic_if_filter_t {
 122     int n_elt;
 123     filter_elt_t *elts;
 124 } usnic_if_filter_t;
 125 
 126 static bool filter_module(opal_btl_usnic_module_t *module,
 127                           usnic_if_filter_t *filter,
 128                           bool filter_incl);
 129 static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
 130                                          const char *name);
 131 static void free_filter(usnic_if_filter_t *filter);
 132 
 133 
 134 opal_btl_usnic_component_t mca_btl_usnic_component = {
 135     .super = {
 136         
 137 
 138         .btl_version = {
 139             USNIC_BTL_DEFAULT_VERSION("usnic"),
 140             .mca_open_component = usnic_component_open,
 141             .mca_close_component = usnic_component_close,
 142             .mca_register_component_params = opal_btl_usnic_component_register,
 143         },
 144         .btl_data = {
 145             
 146             .param_field = MCA_BASE_METADATA_PARAM_NONE
 147         },
 148 
 149         .btl_init = usnic_component_init,
 150         .btl_progress = usnic_component_progress,
 151     }
 152 };
 153 
 154 
 155 
 156 
 157 
 158 static int usnic_component_open(void)
 159 {
 160     
 161     mca_btl_usnic_component.num_modules = 0;
 162     mca_btl_usnic_component.usnic_all_modules = NULL;
 163     mca_btl_usnic_component.usnic_active_modules = NULL;
 164     mca_btl_usnic_component.transport_header_len = -1;
 165     mca_btl_usnic_component.prefix_send_offset = 0;
 166 
 167     
 168     OBJ_CONSTRUCT(&mca_btl_usnic_component.usnic_procs, opal_list_t);
 169 
 170     
 171 
 172     if (OPAL_SUCCESS !=
 173         mca_base_var_check_exclusive("opal",
 174             mca_btl_usnic_component.super.btl_version.mca_type_name,
 175             mca_btl_usnic_component.super.btl_version.mca_component_name,
 176             "if_include",
 177             mca_btl_usnic_component.super.btl_version.mca_type_name,
 178             mca_btl_usnic_component.super.btl_version.mca_component_name,
 179             "if_exclude")) {
 180         
 181 
 182         return OPAL_ERR_NOT_AVAILABLE;
 183     }
 184 
 185     return OPAL_SUCCESS;
 186 }
 187 
 188 
 189 
 190 
 191 
 192 static int usnic_component_close(void)
 193 {
 194     
 195 
 196 
 197 
 198 
 199 
 200     OBJ_DESTRUCT(&mca_btl_usnic_component.usnic_procs);
 201 
 202     if (usnic_clock_timer_event_set) {
 203         opal_event_del(&usnic_clock_timer_event);
 204         usnic_clock_timer_event_set = false;
 205     }
 206 
 207     
 208     if (mca_btl_usnic_component.connectivity_enabled) {
 209         opal_btl_usnic_connectivity_client_finalize();
 210         opal_btl_usnic_connectivity_agent_finalize();
 211     }
 212     if (mca_btl_usnic_component.opal_evbase) {
 213         opal_progress_thread_finalize(NULL);
 214     }
 215 
 216     free(mca_btl_usnic_component.usnic_all_modules);
 217     free(mca_btl_usnic_component.usnic_active_modules);
 218 
 219 #if OPAL_BTL_USNIC_UNIT_TESTS
 220     
 221     opal_btl_usnic_cleanup_tests();
 222 #endif
 223 
 224     OBJ_DESTRUCT(&btl_usnic_lock);
 225 
 226     return OPAL_SUCCESS;
 227 }
 228 
 229 
 230 
 231 
 232 
 233 
 234 static int usnic_modex_send(void)
 235 {
 236     int rc;
 237     int i;
 238     size_t size;
 239     opal_btl_usnic_modex_t* modexes = NULL;
 240 
 241     if (0 == mca_btl_usnic_component.num_modules) {
 242         return OPAL_SUCCESS;
 243     }
 244 
 245     size = mca_btl_usnic_component.num_modules *
 246         sizeof(opal_btl_usnic_modex_t);
 247     modexes = (opal_btl_usnic_modex_t*) malloc(size);
 248     if (NULL == modexes) {
 249         return OPAL_ERR_OUT_OF_RESOURCE;
 250     }
 251 
 252     for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
 253         opal_btl_usnic_module_t* module =
 254             mca_btl_usnic_component.usnic_active_modules[i];
 255         modexes[i] = module->local_modex;
 256         opal_output_verbose(5, USNIC_OUT,
 257                             "btl:usnic: "
 258                             "control port:%d, "
 259                             "modex_send data port:%d, "
 260                             "%s",
 261                             modexes[i].ports[USNIC_PRIORITY_CHANNEL],
 262                             modexes[i].ports[USNIC_DATA_CHANNEL],
 263                             module->if_ipv4_addr_str);
 264     }
 265 
 266     usnic_compat_modex_send(&rc, &mca_btl_usnic_component.super.btl_version,
 267                             modexes, size);
 268     free(modexes);
 269 
 270     return rc;
 271 }
 272 
 273 
 274 
 275 
 276 
 277 
 278 
 279 
 280 
 281 
 282 
 283 
 284 static int check_reg_mem_basics(void)
 285 {
 286 #if HAVE_DECL_RLIMIT_MEMLOCK
 287     int ret = OPAL_SUCCESS;
 288     struct rlimit limit;
 289     char *str_limit = NULL;
 290 
 291     ret = getrlimit(RLIMIT_MEMLOCK, &limit);
 292     if (0 == ret) {
 293         if ((long) limit.rlim_cur > (64 * 1024) ||
 294             limit.rlim_cur == RLIM_INFINITY) {
 295             return OPAL_SUCCESS;
 296         } else {
 297             opal_asprintf(&str_limit, "%ld", (long)limit.rlim_cur);
 298         }
 299     } else {
 300         opal_asprintf(&str_limit, "Unknown");
 301     }
 302 
 303     opal_show_help("help-mpi-btl-usnic.txt", "check_reg_mem_basics fail",
 304                    true,
 305                    opal_process_info.nodename,
 306                    str_limit);
 307 
 308     return OPAL_ERR_OUT_OF_RESOURCE;
 309 #else
 310     
 311 
 312     return OPAL_SUCCESS;
 313 #endif
 314 }
 315 
 316 
 317 
 318 
 319 
 320 static int check_usnic_config(opal_btl_usnic_module_t *module,
 321         int num_local_procs)
 322 {
 323     char str[128];
 324     unsigned unlp;
 325     struct fi_usnic_info *uip;
 326 
 327     uip = &module->usnic_info;
 328 
 329     
 330 
 331     unlp = (unsigned) num_local_procs + 1;
 332 
 333     
 334 
 335 
 336 
 337 
 338 
 339 
 340 
 341 
 342 
 343 
 344 
 345 
 346     if (uip->ui.v1.ui_num_vf < unlp) {
 347         snprintf(str, sizeof(str), "Not enough usNICs (found %d, need %d)",
 348                  uip->ui.v1.ui_num_vf, unlp);
 349         goto error;
 350     }
 351 
 352     if (uip->ui.v1.ui_qp_per_vf < USNIC_NUM_CHANNELS) {
 353         snprintf(str, sizeof(str), "Not enough transmit/receive queues per usNIC (found %d, need %d)",
 354                  uip->ui.v1.ui_qp_per_vf,
 355                  USNIC_NUM_CHANNELS);
 356         goto error;
 357     }
 358     if (uip->ui.v1.ui_cq_per_vf < USNIC_NUM_CHANNELS) {
 359         snprintf(str, sizeof(str),
 360                  "Not enough completion queues per usNIC (found %d, need %d)",
 361                  uip->ui.v1.ui_cq_per_vf,
 362                  USNIC_NUM_CHANNELS);
 363         goto error;
 364     }
 365 
 366     
 367     return OPAL_SUCCESS;
 368 
 369  error:
 370     
 371     opal_show_help("help-mpi-btl-usnic.txt",
 372                    "not enough usnic resources",
 373                    true,
 374                    opal_process_info.nodename,
 375                    module->linux_device_name,
 376                    str);
 377     return OPAL_ERROR;
 378 }
 379 
 380 
 381 static void usnic_clock_callback(int fd, short flags, void *timeout)
 382 {
 383     
 384     opal_btl_usnic_ticks += 1000000;
 385 
 386     
 387     usnic_component_progress();
 388 
 389     opal_event_add(&usnic_clock_timer_event, timeout);
 390 }
 391 
 392 
 393 
 394 
 395 
 396 
 397 
 398 
 399 
 400 
 401 
 402 
 403 
 404 static usnic_if_filter_t *parse_ifex_str(const char *orig_str,
 405                                          const char *name)
 406 {
 407     int i, ret;
 408     char **argv, *str, *tmp;
 409     struct sockaddr_storage argv_inaddr;
 410     uint32_t argv_prefix, addr;
 411     usnic_if_filter_t *filter;
 412     int n_argv;
 413 
 414     if (NULL == orig_str) {
 415         return NULL;
 416     }
 417 
 418     
 419     filter = calloc(sizeof(*filter), 1);
 420     if (NULL == filter) {
 421         OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
 422         return NULL;
 423     }
 424 
 425     argv = opal_argv_split(orig_str, ',');
 426     if (NULL == argv || 0 == (n_argv = opal_argv_count(argv))) {
 427         free(filter);
 428         opal_argv_free(argv);
 429         return NULL;
 430     }
 431 
 432     
 433     filter->elts = malloc(sizeof(*filter->elts) * n_argv);
 434     if (NULL == filter->elts) {
 435         OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
 436         free(filter);
 437         opal_argv_free(argv);
 438         return NULL;
 439     }
 440 
 441     
 442 
 443 
 444     filter->n_elt = 0;
 445     for (i = 0; NULL != argv[i]; ++i) {
 446         
 447 
 448         if (isalpha(argv[i][0])) {
 449             filter->elts[filter->n_elt].is_netmask = false;
 450             filter->elts[filter->n_elt].if_name = strdup(argv[i]);
 451             opal_output_verbose(20, USNIC_OUT,
 452                                 "btl:usnic:parse_ifex_str: parsed %s device name: %s",
 453                                 name, filter->elts[filter->n_elt].if_name);
 454 
 455             ++filter->n_elt;
 456             continue;
 457         }
 458 
 459         
 460 
 461         argv_prefix = 0;
 462         tmp = strdup(argv[i]);
 463         str = strchr(argv[i], '/');
 464         if (NULL == str) {
 465             opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
 466                            true, name, opal_process_info.nodename,
 467                            tmp, "Invalid specification (missing \"/\")");
 468             free(tmp);
 469             continue;
 470         }
 471         *str = '\0';
 472         argv_prefix = atoi(str + 1);
 473         if (argv_prefix < 1 || argv_prefix > 32) {
 474             opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
 475                            true, name, opal_process_info.nodename,
 476                            tmp, "Invalid specification (prefix < 1 or prefix >32)");
 477             free(tmp);
 478             continue;
 479         }
 480 
 481         
 482         ((struct sockaddr*) &argv_inaddr)->sa_family = AF_INET;
 483         ret = inet_pton(AF_INET, argv[i],
 484                         &((struct sockaddr_in*) &argv_inaddr)->sin_addr);
 485         if (1 != ret) {
 486             opal_show_help("help-mpi-btl-usnic.txt", "invalid if_inexclude",
 487                            true, name, opal_process_info.nodename, tmp,
 488                            "Invalid specification (inet_pton() failed)");
 489             free(tmp);
 490             continue;
 491         }
 492         opal_output_verbose(20, USNIC_OUT,
 493                             "btl:usnic:parse_ifex_str: parsed %s address+prefix: %s / %u",
 494                             name,
 495                             opal_net_get_hostname((struct sockaddr*) &argv_inaddr),
 496                             argv_prefix);
 497 
 498         memcpy(&addr,
 499                &((struct sockaddr_in*) &argv_inaddr)->sin_addr,
 500                sizeof(addr));
 501 
 502         
 503 
 504         filter->elts[filter->n_elt].is_netmask = true;
 505         filter->elts[filter->n_elt].if_name = NULL;
 506         filter->elts[filter->n_elt].netmask_be =
 507             usnic_cidrlen_to_netmask(argv_prefix);
 508         filter->elts[filter->n_elt].addr_be = addr &
 509             filter->elts[filter->n_elt].netmask_be;
 510         ++filter->n_elt;
 511 
 512         free(tmp);
 513     }
 514     assert(i == n_argv); 
 515 
 516     opal_argv_free(argv);
 517 
 518     
 519     if (filter->n_elt == 0) {
 520         free_filter(filter);
 521         return NULL;
 522     }
 523 
 524     return filter;
 525 }
 526 
 527 
 528 
 529 
 530 static bool filter_module(opal_btl_usnic_module_t *module,
 531                           usnic_if_filter_t *filter,
 532                           bool filter_incl)
 533 {
 534     int i;
 535     uint32_t module_mask;
 536     struct sockaddr_in *src;
 537     struct fi_usnic_info *uip;
 538     struct fi_info *info;
 539     bool match;
 540     const char *linux_device_name;
 541 
 542     info = module->fabric_info;
 543     uip = &module->usnic_info;
 544     src = info->src_addr;
 545     linux_device_name = module->linux_device_name;
 546     module_mask = src->sin_addr.s_addr & uip->ui.v1.ui_netmask_be;
 547     match = false;
 548     for (i = 0; i < filter->n_elt; ++i) {
 549         if (filter->elts[i].is_netmask) {
 550             
 551             if (filter->elts[i].netmask_be == uip->ui.v1.ui_netmask_be &&
 552                 filter->elts[i].addr_be == module_mask) {
 553                 match = true;
 554                 break;
 555             }
 556         }
 557         else {
 558             if (strcmp(filter->elts[i].if_name, linux_device_name) == 0) {
 559                 match = true;
 560                 break;
 561             }
 562         }
 563     }
 564 
 565     
 566     return match ^ !filter_incl;
 567 }
 568 
 569 
 570 static void free_filter(usnic_if_filter_t *filter)
 571 {
 572     int i;
 573 
 574     if (filter == NULL) {
 575         return;
 576     }
 577 
 578     if (NULL != filter->elts) {
 579         for (i = 0; i < filter->n_elt; ++i) {
 580             if (!filter->elts[i].is_netmask) {
 581                 free(filter->elts[i].if_name);
 582             }
 583         }
 584         free(filter->elts);
 585     }
 586     free(filter);
 587 }
 588 
 589 
 590 
 591 
 592 
 593 
 594 
 595 
 596 static mca_btl_base_module_t** usnic_component_init(int* num_btl_modules,
 597                                                     bool want_progress_threads,
 598                                                     bool want_mpi_threads)
 599 {
 600     mca_btl_base_module_t **btls = NULL;
 601     int i, j, num_final_modules;
 602     int num_devs;
 603     opal_btl_usnic_module_t *module;
 604     usnic_if_filter_t *filter = NULL;
 605     bool keep_module;
 606     bool filter_incl = false;
 607     int min_distance, num_local_procs;
 608     struct fi_info *info_list;
 609     struct fi_info *info;
 610     struct fid_fabric *fabric;
 611     struct fid_domain *domain;
 612     int ret;
 613 
 614     *num_btl_modules = 0;
 615 
 616     
 617     if (want_mpi_threads && !mca_btl_base_thread_multiple_override) {
 618         if (OPAL_MAJOR_VERSION >= 2) {
 619             opal_output_verbose(5, USNIC_OUT,
 620                                 "btl:usnic: MPI_THREAD_MULTIPLE support is in testing phase.");
 621         }
 622         else {
 623             opal_output_verbose(5, USNIC_OUT,
 624                                 "btl:usnic: MPI_THREAD_MULTIPLE is not supported in version < 2.");
 625             return NULL;
 626         }
 627     }
 628 
 629     OBJ_CONSTRUCT(&btl_usnic_lock, opal_recursive_mutex_t);
 630 
 631     
 632 
 633 
 634 
 635 
 636 
 637 
 638 
 639 
 640 
 641 
 642 
 643 
 644 
 645 
 646 
 647 
 648 
 649 
 650 
 651 
 652 
 653 
 654 
 655 
 656 
 657 
 658 
 659 
 660 
 661 
 662 
 663 
 664 
 665 
 666 
 667 
 668 
 669 
 670 
 671 
 672 
 673 
 674 
 675 
 676 
 677 
 678 
 679 
 680 
 681 
 682 
 683 
 684     
 685 
 686     uint32_t libfabric_api;
 687     libfabric_api = fi_version();
 688     if (libfabric_api < FI_VERSION(1, 3)) {
 689         opal_output_verbose(5, USNIC_OUT,
 690                             "btl:usnic: disqualifiying myself because Libfabric does not support v1.3 of the API (v1.3 is *required* for correct usNIC functionality).");
 691         return NULL;
 692     }
 693 
 694     
 695 
 696     if (libfabric_api > FI_VERSION(1, 3)) {
 697         libfabric_api = FI_VERSION(1, 4);
 698     }
 699 
 700     struct fi_info hints = {0};
 701     struct fi_ep_attr ep_attr = {0};
 702     struct fi_fabric_attr fabric_attr = {0};
 703     struct fi_rx_attr rx_attr = {0};
 704     struct fi_tx_attr tx_attr = {0};
 705 
 706     
 707     fabric_attr.prov_name = "usnic";
 708     ep_attr.type = FI_EP_DGRAM;
 709 
 710     hints.caps = FI_MSG;
 711     hints.mode = FI_LOCAL_MR | FI_MSG_PREFIX;
 712     hints.addr_format = FI_SOCKADDR;
 713     hints.ep_attr = &ep_attr;
 714     hints.fabric_attr = &fabric_attr;
 715     hints.tx_attr = &tx_attr;
 716     hints.rx_attr = &rx_attr;
 717 
 718     tx_attr.iov_limit = 1;
 719     rx_attr.iov_limit = 1;
 720 
 721     ret = fi_getinfo(libfabric_api, NULL, 0, 0, &hints, &info_list);
 722     if (0 != ret) {
 723         opal_output_verbose(5, USNIC_OUT,
 724                             "btl:usnic: disqualifiying myself due to fi_getinfo(3) failure: %s (%d)", strerror(-ret), ret);
 725         return NULL;
 726     }
 727 
 728     num_devs = 0;
 729     for (info = info_list; NULL != info; info = info->next) {
 730         ++num_devs;
 731     }
 732     if (0 == num_devs) {
 733         opal_output_verbose(5, USNIC_OUT,
 734             "btl:usnic: disqualifiying myself due to lack of libfabric providers");
 735         return NULL;
 736     }
 737 
 738     
 739 
 740     if (OPAL_SUCCESS != check_reg_mem_basics()) {
 741         opal_output_verbose(5, USNIC_OUT,
 742                             "btl:usnic: disqualifiying myself due to lack of lockable memory");
 743         return NULL;
 744     }
 745 
 746     
 747 
 748 
 749 
 750 
 751 
 752     opal_output_verbose(5, USNIC_OUT,
 753                         "btl:usnic: usNIC fabrics found");
 754 
 755     opal_proc_t *me = opal_proc_local_get();
 756     opal_process_name_t *name = &(me->proc_name);
 757     mca_btl_usnic_component.my_hashed_rte_name =
 758         usnic_compat_rte_hash_name(name);
 759     MSGDEBUG1_OUT("%s: my_hashed_rte_name=0x%" PRIx64,
 760                    __func__, mca_btl_usnic_component.my_hashed_rte_name);
 761 
 762     opal_srand(&opal_btl_usnic_rand_buff, ((uint32_t) getpid()));
 763 
 764     
 765 
 766     mca_btl_usnic_component.num_modules = num_devs;
 767     btls = (struct mca_btl_base_module_t**)
 768         malloc(mca_btl_usnic_component.num_modules *
 769                sizeof(opal_btl_usnic_module_t*));
 770     if (NULL == btls) {
 771         OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
 772         goto send_modex;
 773     }
 774 
 775     
 776     mca_btl_usnic_component.usnic_all_modules =
 777         calloc(mca_btl_usnic_component.num_modules,
 778                sizeof(*mca_btl_usnic_component.usnic_all_modules));
 779     mca_btl_usnic_component.usnic_active_modules =
 780         calloc(mca_btl_usnic_component.num_modules,
 781                sizeof(*mca_btl_usnic_component.usnic_active_modules));
 782     if (NULL == mca_btl_usnic_component.usnic_all_modules ||
 783         NULL == mca_btl_usnic_component.usnic_active_modules) {
 784         OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
 785         goto error;
 786     }
 787 
 788     
 789 
 790 
 791 
 792     if (NULL != mca_btl_usnic_component.if_include) {
 793         opal_output_verbose(20, USNIC_OUT,
 794                             "btl:usnic:filter_module: if_include=%s",
 795                             mca_btl_usnic_component.if_include);
 796 
 797         filter_incl = true;
 798         filter = parse_ifex_str(mca_btl_usnic_component.if_include, "include");
 799     } else if (NULL != mca_btl_usnic_component.if_exclude) {
 800         opal_output_verbose(20, USNIC_OUT,
 801                             "btl:usnic:filter_module: if_exclude=%s",
 802                             mca_btl_usnic_component.if_exclude);
 803 
 804         filter_incl = false;
 805         filter = parse_ifex_str(mca_btl_usnic_component.if_exclude, "exclude");
 806     }
 807 
 808     num_local_procs = opal_process_info.num_local_peers;
 809 
 810     
 811 
 812     info = info_list;
 813     for (j = i = 0; i < num_devs &&
 814              (0 == mca_btl_usnic_component.max_modules ||
 815               i < mca_btl_usnic_component.max_modules);
 816              ++i, info = info->next) {
 817 
 818         
 819         char *linux_device_name;
 820         if (libfabric_api <= FI_VERSION(1, 3)) {
 821             linux_device_name = info->fabric_attr->name;
 822         } else {
 823             linux_device_name = info->domain_attr->name;
 824         }
 825 
 826         ret = fi_fabric(info->fabric_attr, &fabric, NULL);
 827         if (0 != ret) {
 828             opal_show_help("help-mpi-btl-usnic.txt",
 829                            "libfabric API failed",
 830                            true,
 831                            opal_process_info.nodename,
 832                            linux_device_name,
 833                            "fi_fabric()", __FILE__, __LINE__,
 834                            ret,
 835                            strerror(-ret));
 836             continue;
 837         }
 838         opal_memchecker_base_mem_defined(&fabric, sizeof(fabric));
 839 
 840         ret = fi_domain(fabric, info, &domain, NULL);
 841         if (0 != ret) {
 842             opal_show_help("help-mpi-btl-usnic.txt",
 843                            "libfabric API failed",
 844                            true,
 845                            opal_process_info.nodename,
 846                            linux_device_name,
 847                            "fi_domain()", __FILE__, __LINE__,
 848                            ret,
 849                            strerror(-ret));
 850             continue;
 851         }
 852         opal_memchecker_base_mem_defined(&domain, sizeof(domain));
 853 
 854         opal_output_verbose(5, USNIC_OUT,
 855                             "btl:usnic: found: usNIC device %s",
 856                             linux_device_name);
 857 
 858         
 859 
 860 
 861         module = &(mca_btl_usnic_component.usnic_all_modules[j]);
 862         memcpy(module, &opal_btl_usnic_module_template,
 863                sizeof(opal_btl_usnic_module_t));
 864         module->fabric = fabric;
 865         module->domain = domain;
 866         module->fabric_info = info;
 867         module->libfabric_api = libfabric_api;
 868         module->linux_device_name = strdup(linux_device_name);
 869         if (NULL == module->linux_device_name) {
 870             OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
 871             goto error;
 872         }
 873 
 874         
 875 
 876 
 877         ret = fi_open_ops(&fabric->fid, FI_USNIC_FABRIC_OPS_1, 0,
 878                 (void **)&module->usnic_fabric_ops, NULL);
 879         if (ret != 0) {
 880             opal_output_verbose(5, USNIC_OUT,
 881                         "btl:usnic: device %s fabric_open_ops failed %d (%s)",
 882                         module->linux_device_name, ret, fi_strerror(-ret));
 883             fi_close(&domain->fid);
 884             fi_close(&fabric->fid);
 885             continue;
 886         }
 887 
 888         ret =
 889             module->usnic_fabric_ops->getinfo(1,
 890                                             fabric,
 891                                             &module->usnic_info);
 892         if (ret != 0) {
 893             opal_output_verbose(5, USNIC_OUT,
 894                         "btl:usnic: device %s usnic_getinfo failed %d (%s)",
 895                         module->linux_device_name, ret, fi_strerror(-ret));
 896             fi_close(&domain->fid);
 897             fi_close(&fabric->fid);
 898             continue;
 899         }
 900         opal_output_verbose(5, USNIC_OUT,
 901                             "btl:usnic: device %s usnic_info: link speed=%d, netmask=0x%x, ifname=%s, num_vf=%d, qp/vf=%d, cq/vf=%d",
 902                             module->linux_device_name,
 903                             (unsigned int) module->usnic_info.ui.v1.ui_link_speed,
 904                             (unsigned int) module->usnic_info.ui.v1.ui_netmask_be,
 905                             module->usnic_info.ui.v1.ui_ifname,
 906                             module->usnic_info.ui.v1.ui_num_vf,
 907                             module->usnic_info.ui.v1.ui_qp_per_vf,
 908                             module->usnic_info.ui.v1.ui_cq_per_vf);
 909 
 910         
 911         if (filter != NULL) {
 912             keep_module = filter_module(module, filter, filter_incl);
 913             opal_output_verbose(5, USNIC_OUT,
 914                                 "btl:usnic: %s %s due to %s",
 915                                 (keep_module ? "keeping" : "skipping"),
 916                                 module->linux_device_name,
 917                                 (filter_incl ? "if_include" : "if_exclude"));
 918             if (!keep_module) {
 919                 fi_close(&domain->fid);
 920                 fi_close(&fabric->fid);
 921                 continue;
 922             }
 923         }
 924 
 925         
 926 
 927 
 928 
 929 
 930         if (0 == j &&
 931             check_usnic_config(module, num_local_procs) != OPAL_SUCCESS) {
 932             opal_output_verbose(5, USNIC_OUT,
 933                                 "btl:usnic: device %s is not provisioned with enough resources -- skipping",
 934                                 module->linux_device_name);
 935             fi_close(&domain->fid);
 936             fi_close(&fabric->fid);
 937 
 938             mca_btl_usnic_component.num_modules = 0;
 939             goto error;
 940         }
 941 
 942         
 943         
 944         
 945 
 946         opal_output_verbose(5, USNIC_OUT,
 947                             "btl:usnic: device %s looks good!",
 948                             module->linux_device_name);
 949 
 950         
 951         btls[j++] = &(module->super);
 952     }
 953     mca_btl_usnic_component.num_modules = j;
 954 
 955     
 956     if (filter != NULL) {
 957         free_filter(filter);
 958         filter = NULL;
 959     }
 960 
 961     
 962 
 963     if (mca_btl_usnic_component.num_modules > 0 &&
 964         mca_btl_usnic_component.connectivity_enabled) {
 965         mca_btl_usnic_component.opal_evbase = opal_progress_thread_init(NULL);
 966         if (OPAL_SUCCESS != opal_btl_usnic_connectivity_agent_init() ||
 967             OPAL_SUCCESS != opal_btl_usnic_connectivity_client_init()) {
 968             opal_progress_thread_finalize(NULL);
 969             return NULL;
 970         }
 971     }
 972 
 973     
 974 
 975 
 976     for (num_final_modules = i = 0;
 977          i < mca_btl_usnic_component.num_modules; ++i) {
 978         module = (opal_btl_usnic_module_t*) btls[i];
 979 
 980         
 981         if (OPAL_SUCCESS != opal_btl_usnic_module_init(module)) {
 982             opal_output_verbose(5, USNIC_OUT,
 983                                 "btl:usnic: failed to init module for %s",
 984                                 module->if_ipv4_addr_str);
 985             continue;
 986         }
 987 
 988         
 989         
 990         
 991 
 992         
 993 
 994 
 995         btls[num_final_modules++] = &(module->super);
 996 
 997         
 998         const char *devname = module->linux_device_name;
 999         opal_output_verbose(5, USNIC_OUT,
1000                             "btl:usnic: %s num sqe=%d, num rqe=%d, num cqe=%d, num aveqe=%d",
1001                             devname,
1002                             module->sd_num,
1003                             module->rd_num,
1004                             module->cq_num,
1005                             module->av_eq_num);
1006         opal_output_verbose(5, USNIC_OUT,
1007                             "btl:usnic: %s priority MTU = %" PRIsize_t,
1008                             devname,
1009                             module->max_tiny_msg_size);
1010         opal_output_verbose(5, USNIC_OUT,
1011                             "btl:usnic: %s priority limit = %" PRIsize_t,
1012                             devname,
1013                             module->max_tiny_payload);
1014         opal_output_verbose(5, USNIC_OUT,
1015                             "btl:usnic: %s eager limit = %" PRIsize_t,
1016                             devname,
1017                             module->super.btl_eager_limit);
1018         opal_output_verbose(5, USNIC_OUT,
1019                             "btl:usnic: %s eager rndv limit = %" PRIsize_t,
1020                             devname,
1021                             module->super.btl_rndv_eager_limit);
1022         opal_output_verbose(5, USNIC_OUT,
1023                             "btl:usnic: %s max send size= %" PRIsize_t
1024                             " (not overrideable)",
1025                             devname,
1026                             module->super.btl_max_send_size);
1027         opal_output_verbose(5, USNIC_OUT,
1028                             "btl:usnic: %s exclusivity = %d",
1029                             devname,
1030                             module->super.btl_exclusivity);
1031     }
1032 
1033     
1034 
1035     mca_btl_usnic_component.num_modules = num_final_modules;
1036 
1037     
1038 
1039 
1040 
1041 
1042 
1043 
1044 
1045     if (0 == num_final_modules) {
1046         opal_output_verbose(5, USNIC_OUT,
1047                             "btl:usnic: returning 0 modules");
1048         goto error;
1049     }
1050 
1051     
1052 
1053     memcpy(mca_btl_usnic_component.usnic_active_modules, btls,
1054            num_final_modules * sizeof(*btls));
1055 
1056     
1057 
1058 
1059 
1060 
1061     min_distance = 9999999;
1062     for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
1063         module = (opal_btl_usnic_module_t*) btls[i];
1064         if (module->numa_distance < min_distance) {
1065             min_distance = module->numa_distance;
1066         }
1067     }
1068     for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
1069         module = (opal_btl_usnic_module_t*) btls[i];
1070         if (module->numa_distance > min_distance) {
1071             ++module->super.btl_latency;
1072             opal_output_verbose(5, USNIC_OUT,
1073                                 "btl:usnic: %s is far from me; increasing latency rating",
1074                                 module->if_ipv4_addr_str);
1075         }
1076     }
1077 
1078     
1079     opal_event_set(opal_sync_event_base, &usnic_clock_timer_event,
1080                    -1, 0, usnic_clock_callback,
1081                    &usnic_clock_timeout);
1082     usnic_clock_timer_event_set = true;
1083 
1084     
1085     usnic_clock_timeout.tv_sec = 0;
1086     usnic_clock_timeout.tv_usec = 1000;
1087     opal_event_add(&usnic_clock_timer_event, &usnic_clock_timeout);
1088 
1089     
1090     opal_btl_usnic_setup_mpit_pvars();
1091 
1092     
1093     *num_btl_modules = mca_btl_usnic_component.num_modules;
1094     opal_output_verbose(5, USNIC_OUT,
1095                         "btl:usnic: returning %d modules", *num_btl_modules);
1096 
1097  send_modex:
1098     usnic_modex_send();
1099     return btls;
1100 
1101  error:
1102     
1103     free(btls);
1104     btls = NULL;
1105     free(mca_btl_usnic_component.usnic_all_modules);
1106     mca_btl_usnic_component.usnic_all_modules = NULL;
1107     free(mca_btl_usnic_component.usnic_active_modules);
1108     mca_btl_usnic_component.usnic_active_modules = NULL;
1109 
1110     
1111     if (filter != NULL) {
1112         free_filter(filter);
1113         filter = NULL;
1114     }
1115 
1116     goto send_modex;
1117 }
1118 
1119 
1120 
1121 
1122 
1123 
1124 
1125 
1126 
1127 
1128 
1129 static int usnic_handle_completion(opal_btl_usnic_module_t* module,
1130     opal_btl_usnic_channel_t *channel, struct fi_cq_entry *completion);
1131 static int usnic_component_progress_2(void);
1132 static void usnic_handle_cq_error(opal_btl_usnic_module_t* module,
1133     opal_btl_usnic_channel_t *channel, int cq_ret);
1134 
1135 static int usnic_component_progress(void)
1136 {
1137     int i;
1138     int count;
1139     opal_btl_usnic_recv_segment_t* rseg;
1140     opal_btl_usnic_module_t* module;
1141     struct fi_cq_entry completion;
1142     opal_btl_usnic_channel_t *channel;
1143     static bool fastpath_ok = true;
1144 
1145     
1146     opal_btl_usnic_ticks += 5000;
1147 
1148     count = 0;
1149     if (fastpath_ok) {
1150         for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
1151             module = mca_btl_usnic_component.usnic_active_modules[i];
1152             channel = &module->mod_channels[USNIC_PRIORITY_CHANNEL];
1153 
1154             assert(channel->chan_deferred_recv == NULL);
1155 
1156             int ret = fi_cq_read(channel->cq, &completion, 1);
1157             assert(0 != ret);
1158             if (OPAL_LIKELY(1 == ret)) {
1159                 opal_memchecker_base_mem_defined(&completion,
1160                                                  sizeof(completion));
1161                 rseg = (opal_btl_usnic_recv_segment_t*) completion.op_context;
1162                 if (OPAL_LIKELY(OPAL_BTL_USNIC_SEG_RECV ==
1163                             rseg->rs_base.us_type)) {
1164                     opal_btl_usnic_recv_fast(module, rseg, channel);
1165                     ++module->stats.num_seg_total_completions;
1166                     ++module->stats.num_seg_recv_completions;
1167                     fastpath_ok = false;    
1168                     return 1;
1169                 } else {
1170                     count += usnic_handle_completion(module, channel,
1171                                                      &completion);
1172                 }
1173             } else if (OPAL_LIKELY(-FI_EAGAIN == ret)) {
1174                 continue;
1175             } else {
1176                 usnic_handle_cq_error(module, channel, ret);
1177             }
1178         }
1179     }
1180 
1181     fastpath_ok = true;
1182     return count + usnic_component_progress_2();
1183 }
1184 
1185 static int usnic_handle_completion(
1186     opal_btl_usnic_module_t* module,
1187     opal_btl_usnic_channel_t *channel,
1188     struct fi_cq_entry *completion)
1189 {
1190     opal_btl_usnic_segment_t* seg;
1191     opal_btl_usnic_recv_segment_t* rseg;
1192 
1193     seg = (opal_btl_usnic_segment_t*)completion->op_context;
1194     rseg = (opal_btl_usnic_recv_segment_t*)seg;
1195 
1196     ++module->stats.num_seg_total_completions;
1197 
1198     
1199     opal_memchecker_base_mem_defined(seg, sizeof(*seg));
1200 
1201     OPAL_THREAD_LOCK(&btl_usnic_lock);
1202 
1203     
1204     switch(seg->us_type) {
1205 
1206     
1207     case OPAL_BTL_USNIC_SEG_ACK:
1208         ++module->stats.num_seg_ack_completions;
1209         opal_btl_usnic_ack_complete(module,
1210                 (opal_btl_usnic_ack_segment_t *)seg);
1211         break;
1212 
1213     
1214 
1215     case OPAL_BTL_USNIC_SEG_FRAG:
1216         ++module->stats.num_seg_frag_completions;
1217         opal_btl_usnic_frag_send_complete(module,
1218                 (opal_btl_usnic_frag_segment_t*)seg);
1219         break;
1220 
1221     
1222 
1223     case OPAL_BTL_USNIC_SEG_CHUNK:
1224         ++module->stats.num_seg_chunk_completions;
1225         opal_btl_usnic_chunk_send_complete(module,
1226                 (opal_btl_usnic_chunk_segment_t*)seg);
1227         break;
1228 
1229     
1230     case OPAL_BTL_USNIC_SEG_RECV:
1231         ++module->stats.num_seg_recv_completions;
1232         opal_btl_usnic_recv(module, rseg, channel);
1233         break;
1234 
1235     default:
1236         BTL_ERROR(("Unhandled completion segment type %d", seg->us_type));
1237         break;
1238     }
1239 
1240     OPAL_THREAD_UNLOCK(&btl_usnic_lock);
1241     return 1;
1242 }
1243 
1244 static void
1245 usnic_handle_cq_error(opal_btl_usnic_module_t* module,
1246     opal_btl_usnic_channel_t *channel, int cq_ret)
1247 {
1248     int rc;
1249     struct fi_cq_err_entry err_entry;
1250     opal_btl_usnic_recv_segment_t* rseg;
1251 
1252     if (cq_ret != -FI_EAVAIL) {
1253         BTL_ERROR(("%s: cq_read ret = %d (%s)",
1254                module->linux_device_name, cq_ret,
1255                fi_strerror(-cq_ret)));
1256         channel->chan_error = true;
1257     }
1258 
1259     rc = fi_cq_readerr(channel->cq, &err_entry, 0);
1260     if (rc == -FI_EAGAIN) {
1261         return;
1262     } else if (rc != 1) {
1263         BTL_ERROR(("%s: cq_readerr ret = %d (expected 1)",
1264                    module->linux_device_name, rc));
1265         channel->chan_error = true;
1266     }
1267 
1268     
1269 
1270     else if (FI_ECRC == err_entry.prov_errno ||
1271              FI_ETRUNC == err_entry.prov_errno) {
1272 #if MSGDEBUG1
1273         static int once = 0;
1274         if (once++ == 0) {
1275             BTL_ERROR(("%s: Channel %d, %s",
1276                        module->linux_device_name,
1277                        channel->chan_index,
1278                        FI_ECRC == err_entry.prov_errno ?
1279                        "CRC error" : "message truncation"));
1280         }
1281 #endif
1282 
1283         
1284         ++module->stats.num_crc_errors;
1285 
1286         
1287         ++module->stats.num_recv_reposts;
1288 
1289         
1290         rseg = err_entry.op_context;
1291         if (OPAL_BTL_USNIC_SEG_RECV == rseg->rs_base.us_type) {
1292             rseg->rs_next = channel->repost_recv_head;
1293             channel->repost_recv_head = rseg;
1294         }
1295     } else {
1296         BTL_ERROR(("%s: CQ[%d] prov_err = %d",
1297                    module->linux_device_name, channel->chan_index,
1298                    err_entry.prov_errno));
1299         channel->chan_error = true;
1300     }
1301 }
1302 
1303 static int usnic_component_progress_2(void)
1304 {
1305     int i, j, count = 0, num_events, ret;
1306     opal_btl_usnic_module_t* module;
1307     static struct fi_cq_entry completions[OPAL_BTL_USNIC_NUM_COMPLETIONS];
1308     opal_btl_usnic_channel_t *channel;
1309     int rc;
1310     int c;
1311 
1312     
1313     opal_btl_usnic_ticks += 5000;
1314 
1315     
1316     for (i = 0; i < mca_btl_usnic_component.num_modules; i++) {
1317         module = mca_btl_usnic_component.usnic_active_modules[i];
1318 
1319         
1320         for (c=0; c<USNIC_NUM_CHANNELS; ++c) {
1321             channel = &module->mod_channels[c];
1322 
1323             if (channel->chan_deferred_recv != NULL) {
1324                 (void) opal_btl_usnic_recv_frag_bookkeeping(module,
1325                         channel->chan_deferred_recv, channel);
1326                 channel->chan_deferred_recv = NULL;
1327             }
1328 
1329             num_events = ret =
1330                 fi_cq_read(channel->cq, completions,
1331                            OPAL_BTL_USNIC_NUM_COMPLETIONS);
1332             assert(0 != ret);
1333             opal_memchecker_base_mem_defined(&ret, sizeof(ret));
1334             if (OPAL_UNLIKELY(ret < 0 && -FI_EAGAIN != ret)) {
1335                 usnic_handle_cq_error(module, channel, num_events);
1336                 num_events = 0;
1337             } else if (-FI_EAGAIN == ret) {
1338                 num_events = 0;
1339             }
1340 
1341             opal_memchecker_base_mem_defined(completions,
1342                                              sizeof(completions[0]) *
1343                                              num_events);
1344             
1345             for (j = 0; j < num_events; j++) {
1346                 count += usnic_handle_completion(module, channel,
1347                                                  &completions[j]);
1348             }
1349 
1350             
1351 
1352 
1353             if (channel->chan_error) {
1354                 channel->chan_error = false;
1355                 return OPAL_ERROR;
1356             }
1357 
1358             
1359             opal_btl_usnic_module_progress_sends(module);
1360 
1361             
1362             if (OPAL_LIKELY(NULL != channel->repost_recv_head)) {
1363                 rc = opal_btl_usnic_post_recv_list(channel);
1364                 if (OPAL_UNLIKELY(rc != 0)) {
1365                     BTL_ERROR(("error posting recv: %s\n", strerror(errno)));
1366                     return OPAL_ERROR;
1367                 }
1368             }
1369         }
1370     }
1371 
1372     return count;
1373 }
1374 
1375 
1376 static void dump_endpoint(opal_btl_usnic_endpoint_t *endpoint)
1377 {
1378     int i;
1379     opal_btl_usnic_frag_t *frag;
1380     opal_btl_usnic_send_segment_t *sseg;
1381     struct in_addr ia;
1382     char ep_addr_str[INET_ADDRSTRLEN];
1383     char tmp[128], str[2048];
1384 
1385     memset(ep_addr_str, 0x00, sizeof(ep_addr_str));
1386     ia.s_addr = endpoint->endpoint_remote_modex.ipv4_addr;
1387     inet_ntop(AF_INET, &ia, ep_addr_str, sizeof(ep_addr_str));
1388 
1389     opal_output(0, "    endpoint %p, %s job=%u, rank=%u rts=%s s_credits=%"PRIi32"\n",
1390                 (void *)endpoint, ep_addr_str,
1391                 endpoint->endpoint_proc->proc_opal->proc_name.jobid,
1392                 endpoint->endpoint_proc->proc_opal->proc_name.vpid,
1393                 (endpoint->endpoint_ready_to_send ? "true" : "false"),
1394                 endpoint->endpoint_send_credits);
1395     opal_output(0, "      endpoint->frag_send_queue:\n");
1396 
1397     OPAL_LIST_FOREACH(frag, &endpoint->endpoint_frag_send_queue,
1398                       opal_btl_usnic_frag_t) {
1399         opal_btl_usnic_small_send_frag_t *ssfrag;
1400         opal_btl_usnic_large_send_frag_t *lsfrag;
1401 
1402         snprintf(str, sizeof(str), "      --> frag %p, %s", (void *)frag,
1403                  usnic_frag_type(frag->uf_type));
1404         switch (frag->uf_type) {
1405             case OPAL_BTL_USNIC_FRAG_LARGE_SEND:
1406                 lsfrag = (opal_btl_usnic_large_send_frag_t *)frag;
1407                 snprintf(tmp, sizeof(tmp), " tag=%"PRIu8" id=%"PRIu32" offset=%llu/%llu post_cnt=%"PRIu32" ack_bytes_left=%llu\n",
1408                         lsfrag->lsf_tag,
1409                         lsfrag->lsf_frag_id,
1410                         (unsigned long long)lsfrag->lsf_cur_offset,
1411                         (unsigned long long)lsfrag->lsf_base.sf_size,
1412                         lsfrag->lsf_base.sf_seg_post_cnt,
1413                         (unsigned long long)lsfrag->lsf_base.sf_ack_bytes_left);
1414                 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
1415                 opal_output(0, "%s", str);
1416 
1417                 OPAL_LIST_FOREACH(sseg, &lsfrag->lsf_seg_chain,
1418                                   opal_btl_usnic_send_segment_t) {
1419                     
1420                     opal_output(0, "        chunk seg %p, chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
1421                                 (void *)sseg,
1422                                 (USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
1423                                 "prio" : "data"),
1424                                 sseg->ss_hotel_room,
1425                                 sseg->ss_send_posted,
1426                                 (sseg->ss_ack_pending ? "true" : "false"));
1427                 }
1428             break;
1429 
1430             case OPAL_BTL_USNIC_FRAG_SMALL_SEND:
1431                 ssfrag = (opal_btl_usnic_small_send_frag_t *)frag;
1432                 snprintf(tmp, sizeof(tmp), " sf_size=%llu post_cnt=%"PRIu32" ack_bytes_left=%llu\n",
1433                         (unsigned long long)ssfrag->ssf_base.sf_size,
1434                         ssfrag->ssf_base.sf_seg_post_cnt,
1435                         (unsigned long long)ssfrag->ssf_base.sf_ack_bytes_left);
1436                 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
1437                 opal_output(0, "%s", str);
1438 
1439                 sseg = &ssfrag->ssf_segment;
1440                 opal_output(0, "        small seg %p, chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
1441                     (void *)sseg,
1442                     (USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
1443                         "prio" : "data"),
1444                     sseg->ss_hotel_room,
1445                     sseg->ss_send_posted,
1446                     (sseg->ss_ack_pending ? "true" : "false"));
1447             break;
1448 
1449             case OPAL_BTL_USNIC_FRAG_PUT_DEST:
1450                 
1451                 snprintf(tmp, sizeof(tmp), " put_addr=%p\n", frag->uf_remote_seg[0].seg_addr.pval);
1452                 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
1453                 opal_output(0, "%s", str);
1454             break;
1455         }
1456     }
1457 
1458     
1459 
1460 
1461 
1462     opal_output(0, "      endpoint->endpoint_sent_segs (%p):\n",
1463            (void *)endpoint->endpoint_sent_segs);
1464     for (i = 0; i < WINDOW_SIZE; ++i) {
1465         sseg = endpoint->endpoint_sent_segs[i];
1466         if (NULL != sseg) {
1467             opal_output(0, "        [%d] sseg=%p %s chan=%s hotel=%d times_posted=%"PRIu32" pending=%s\n",
1468                    i,
1469                    (void *)sseg,
1470                    usnic_seg_type_str(sseg->ss_base.us_type),
1471                    (USNIC_PRIORITY_CHANNEL == sseg->ss_channel ?
1472                     "prio" : "data"),
1473                    sseg->ss_hotel_room,
1474                    sseg->ss_send_posted,
1475                    (sseg->ss_ack_pending ? "true" : "false"));
1476         }
1477     }
1478 
1479     opal_output(0, "      ack_needed=%s n_t=%"UDSEQ" n_a=%"UDSEQ" n_r=%"UDSEQ" n_s=%"UDSEQ" rfstart=%"PRIu32"\n",
1480                 (endpoint->endpoint_ack_needed?"true":"false"),
1481                 endpoint->endpoint_next_seq_to_send,
1482                 endpoint->endpoint_ack_seq_rcvd,
1483                 endpoint->endpoint_next_contig_seq_to_recv,
1484                 endpoint->endpoint_highest_seq_rcvd,
1485                 endpoint->endpoint_rfstart);
1486 
1487     if (dump_bitvectors) {
1488         opal_btl_usnic_snprintf_bool_array(str, sizeof(str),
1489                                            endpoint->endpoint_rcvd_segs,
1490                                            WINDOW_SIZE);
1491         opal_output(0, "      rcvd_segs 0x%s", str);
1492     }
1493 }
1494 
1495 void opal_btl_usnic_component_debug(void)
1496 {
1497     int i;
1498     opal_btl_usnic_module_t *module;
1499     opal_btl_usnic_endpoint_t *endpoint;
1500     opal_btl_usnic_send_segment_t *sseg;
1501     opal_list_item_t *item;
1502     const opal_proc_t *proc = opal_proc_local_get();
1503 
1504     opal_output(0, "*** dumping usnic state for MPI_COMM_WORLD rank %u ***\n",
1505                 proc->proc_name.vpid);
1506     for (i = 0; i < (int)mca_btl_usnic_component.num_modules; ++i) {
1507         module = mca_btl_usnic_component.usnic_active_modules[i];
1508 
1509         opal_output(0, "active_modules[%d]=%p %s max{frag,chunk,tiny}=%llu,%llu,%llu\n",
1510                i, (void *)module, module->linux_device_name,
1511                (unsigned long long)module->max_frag_payload,
1512                (unsigned long long)module->max_chunk_payload,
1513                (unsigned long long)module->max_tiny_payload);
1514 
1515         opal_output(0, "  endpoints_with_sends:\n");
1516         OPAL_LIST_FOREACH(endpoint, &module->endpoints_with_sends,
1517                           opal_btl_usnic_endpoint_t) {
1518             dump_endpoint(endpoint);
1519         }
1520 
1521         opal_output(0, "  endpoints_that_need_acks:\n");
1522         OPAL_LIST_FOREACH(endpoint, &module->endpoints_that_need_acks,
1523                           opal_btl_usnic_endpoint_t) {
1524             dump_endpoint(endpoint);
1525         }
1526 
1527         
1528         opal_output(0, "  all_endpoints:\n");
1529         opal_mutex_lock(&module->all_endpoints_lock);
1530         item = opal_list_get_first(&module->all_endpoints);
1531         while (item != opal_list_get_end(&module->all_endpoints)) {
1532             endpoint = container_of(item, mca_btl_base_endpoint_t,
1533                                     endpoint_endpoint_li);
1534             item = opal_list_get_next(item);
1535             dump_endpoint(endpoint);
1536         }
1537         opal_mutex_unlock(&module->all_endpoints_lock);
1538 
1539         opal_output(0, "  pending_resend_segs:\n");
1540         OPAL_LIST_FOREACH(sseg, &module->pending_resend_segs,
1541                           opal_btl_usnic_send_segment_t) {
1542             opal_output(0, "    sseg %p\n", (void *)sseg);
1543         }
1544 
1545         opal_btl_usnic_print_stats(module, "  manual", false);
1546     }
1547 }
1548 
1549 #include "test/btl_usnic_component_test.h"