root/opal/mca/btl/usnic/btl_usnic_mca.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. reg_string
  2. reg_int
  3. reg_bool
  4. opal_btl_usnic_component_register

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2008 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2011 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006      Sandia National Laboratories. All rights
  14  *                         reserved.
  15  * Copyright (c) 2008-2019 Cisco Systems, Inc.  All rights reserved
  16  * Copyright (c) 2012-2016 Los Alamos National Security, LLC.  All rights
  17  *                         reserved.
  18  * Copyright (c) 2015      Intel, Inc. All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 
  26 #include "opal_config.h"
  27 
  28 #include <string.h>
  29 #include <errno.h>
  30 
  31 #include "opal/mca/base/mca_base_var.h"
  32 #include "opal/util/argv.h"
  33 
  34 #include "opal/constants.h"
  35 
  36 #include "opal/mca/btl/btl.h"
  37 #include "opal/mca/btl/base/base.h"
  38 
  39 #include "btl_usnic.h"
  40 #include "btl_usnic_frag.h"
  41 #include "btl_usnic_endpoint.h"
  42 #include "btl_usnic_module.h"
  43 
  44 
  45 /*
  46  * Local flags
  47  */
  48 enum {
  49     REGINT_NEG_ONE_OK = 0x01,
  50     REGINT_GE_ZERO = 0x02,
  51     REGINT_GE_ONE = 0x04,
  52     REGINT_NONZERO = 0x08,
  53 
  54     REGINT_MAX = 0x88
  55 };
  56 
  57 
  58 enum {
  59     REGSTR_EMPTY_OK = 0x01,
  60 
  61     REGSTR_MAX = 0x88
  62 };
  63 
  64 
  65 /*
  66  * utility routine for string parameter registration
  67  */
  68 static int reg_string(const char* param_name,
  69                       const char* help_string,
  70                       const char* default_value, char **storage,
  71                       int flags, int level)
  72 {
  73     *storage = (char*) default_value;
  74     mca_base_component_var_register(&mca_btl_usnic_component.super.btl_version,
  75                                     param_name, help_string,
  76                                     MCA_BASE_VAR_TYPE_STRING,
  77                                     NULL,
  78                                     0,
  79                                     0,
  80                                     level,
  81                                     MCA_BASE_VAR_SCOPE_READONLY,
  82                                     storage);
  83 
  84     if (0 == (flags & REGSTR_EMPTY_OK) &&
  85         (NULL == *storage || 0 == strlen(*storage))) {
  86         opal_output(0, "Bad parameter value for parameter \"%s\"",
  87                     param_name);
  88         return OPAL_ERR_BAD_PARAM;
  89     }
  90 
  91     return OPAL_SUCCESS;
  92 }
  93 
  94 
  95 /*
  96  * utility routine for integer parameter registration
  97  */
  98 static int reg_int(const char* param_name,
  99                    const char* help_string,
 100                    int default_value, int *storage, int flags, int level)
 101 {
 102     *storage = default_value;
 103     mca_base_component_var_register(&mca_btl_usnic_component.super.btl_version,
 104                                     param_name, help_string,
 105                                     MCA_BASE_VAR_TYPE_INT,
 106                                     NULL,
 107                                     0,
 108                                     0,
 109                                     level,
 110                                     MCA_BASE_VAR_SCOPE_READONLY,
 111                                     storage);
 112 
 113     if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
 114         return OPAL_SUCCESS;
 115     }
 116     if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
 117         (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
 118         (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
 119         opal_output(0, "Bad parameter value for parameter \"%s\"",
 120                     param_name);
 121         return OPAL_ERR_BAD_PARAM;
 122     }
 123 
 124     return OPAL_SUCCESS;
 125 }
 126 
 127 
 128 /*
 129  * utility routine for integer parameter registration
 130  */
 131 static int reg_bool(const char* param_name,
 132                     const char* help_string,
 133                     bool default_value, bool *storage, int level)
 134 {
 135     *storage = default_value;
 136     mca_base_component_var_register(&mca_btl_usnic_component.super.btl_version,
 137                                     param_name, help_string,
 138                                     MCA_BASE_VAR_TYPE_BOOL,
 139                                     NULL,
 140                                     0,
 141                                     0,
 142                                     level,
 143                                     MCA_BASE_VAR_SCOPE_READONLY,
 144                                     storage);
 145 
 146     return OPAL_SUCCESS;
 147 }
 148 
 149 
 150 int opal_btl_usnic_component_register(void)
 151 {
 152     int tmp, ret = 0;
 153     static int max_modules;
 154     static int stats_relative;
 155     static int want_numa_device_assignment;
 156     static int sd_num;
 157     static int rd_num;
 158     static int prio_sd_num;
 159     static int prio_rd_num;
 160     static int cq_num;
 161     static int av_eq_num;
 162     static int udp_port_base;
 163     static int max_tiny_msg_size;
 164     static int eager_limit;
 165     static int rndv_eager_limit;
 166     static int pack_lazy_threshold;
 167     static int max_short_packets;
 168 
 169 #define CHECK(expr) do {\
 170         tmp = (expr); \
 171         if (OPAL_SUCCESS != tmp) ret = tmp; \
 172      } while (0)
 173 
 174     CHECK(reg_int("max_btls",
 175                   "Maximum number of usNICs to use (default: 0 = as many as are available)",
 176                   0, &max_modules,
 177                   REGINT_GE_ZERO, OPAL_INFO_LVL_2));
 178     mca_btl_usnic_component.max_modules = (size_t) max_modules;
 179 
 180     CHECK(reg_string("if_include",
 181                      "Comma-delimited list of usNIC devices/networks to be used (e.g. \"eth3,usnic_0,10.10.0.0/16\"; empty value means to use all available usNICs).  Mutually exclusive with btl_usnic_if_exclude.",
 182                      NULL, &mca_btl_usnic_component.if_include,
 183                      REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
 184 
 185     CHECK(reg_string("if_exclude",
 186                      "Comma-delimited list of usNIC devices/networks to be excluded (empty value means to not exclude any usNICs).  Mutually exclusive with btl_usnic_if_include.",
 187                      NULL, &mca_btl_usnic_component.if_exclude,
 188                      REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
 189 
 190     CHECK(reg_int("stats",
 191                   "A non-negative integer specifying the frequency at which each usnic BTL will output statistics (default: 0 seconds, meaning that statistics are disabled)",
 192                   0, &mca_btl_usnic_component.stats_frequency, 0,
 193                   OPAL_INFO_LVL_4));
 194     mca_btl_usnic_component.stats_enabled =
 195         (bool) (mca_btl_usnic_component.stats_frequency > 0);
 196 
 197     CHECK(reg_int("stats_relative",
 198                   "If stats are enabled, output relative stats between the timestamps (vs. cumulative stats since the beginning of the job) (default: 0 -- i.e., absolute)",
 199                   0, &stats_relative, 0, OPAL_INFO_LVL_4));
 200     mca_btl_usnic_component.stats_relative = (bool) stats_relative;
 201 
 202     CHECK(reg_string("mpool_hints", "Hints to use when selecting mpool",
 203                      NULL, &mca_btl_usnic_component.usnic_mpool_hints,
 204                      REGSTR_EMPTY_OK,
 205                      OPAL_INFO_LVL_5));
 206 
 207     CHECK(reg_string("rcache", "Name of the registration cache to be used",
 208                      "grdma", &mca_btl_usnic_component.usnic_rcache_name, 0,
 209                      OPAL_INFO_LVL_5));
 210 
 211     want_numa_device_assignment = 1;
 212     CHECK(reg_int("want_numa_device_assignment",
 213                   "If 1, use only Cisco VIC ports thare are a minimum NUMA distance from the MPI process for short messages.  If 0, use all available Cisco VIC ports for short messages.  This parameter is meaningless (and ignored) unless MPI proceses are bound to processor cores.  Defaults to 1 if NUMA support is included in Open MPI; -1 otherwise.",
 214                   want_numa_device_assignment,
 215                   &want_numa_device_assignment,
 216                   0, OPAL_INFO_LVL_5));
 217     mca_btl_usnic_component.want_numa_device_assignment =
 218         (1 == want_numa_device_assignment) ? true : false;
 219 
 220     CHECK(reg_int("sd_num", "Maximum send descriptors to post (-1 = pre-set defaults; depends on number and type of devices available)",
 221                   -1, &sd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
 222     mca_btl_usnic_component.sd_num = (int32_t) sd_num;
 223 
 224     CHECK(reg_int("rd_num", "Number of pre-posted receive buffers (-1 = pre-set defaults; depends on number and type of devices available)",
 225                   -1, &rd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
 226     mca_btl_usnic_component.rd_num = (int32_t) rd_num;
 227 
 228     CHECK(reg_int("prio_sd_num", "Maximum priority send descriptors to post (-1 = pre-set defaults; depends on number and type of devices available)",
 229                   -1, &prio_sd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
 230     mca_btl_usnic_component.prio_sd_num = (int32_t) prio_sd_num;
 231 
 232     CHECK(reg_int("prio_rd_num", "Number of pre-posted priority receive buffers (-1 = pre-set defaults; depends on number and type of devices available)",
 233                   -1, &prio_rd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
 234     mca_btl_usnic_component.prio_rd_num = (int32_t) prio_rd_num;
 235 
 236     CHECK(reg_int("cq_num", "Number of completion queue entries (-1 = pre-set defaults; depends on number and type of devices available; will error if (sd_num+rd_num)>cq_num)",
 237                   -1, &cq_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
 238     mca_btl_usnic_component.cq_num = (int32_t) cq_num;
 239 
 240     CHECK(reg_int("av_eq_num", "Number of event queue entries for peer address resolution",
 241                   1024, &av_eq_num, REGINT_GE_ONE, OPAL_INFO_LVL_5));
 242     mca_btl_usnic_component.av_eq_num = (int32_t) av_eq_num;
 243 
 244     CHECK(reg_int("base_udp_port", "Base UDP port to use for usNIC communications.  If 0, system will pick the port number.  If non-zero, it will be added to each process' local rank to obtain the final port number (default: 0)",
 245                   0, &udp_port_base, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
 246     mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
 247 
 248     CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
 249                   5000, &mca_btl_usnic_component.retrans_timeout,
 250                   REGINT_GE_ONE, OPAL_INFO_LVL_5));
 251 
 252     CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
 253                   0, &max_tiny_msg_size,
 254                   REGINT_GE_ZERO, OPAL_INFO_LVL_5));
 255     opal_btl_usnic_module_template.max_tiny_msg_size =
 256         (size_t) max_tiny_msg_size;
 257 
 258     CHECK(reg_int("eager_limit", "Eager send limit (0 = use pre-set defaults; depends on number and type of devices available)",
 259                   0, &eager_limit, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
 260     opal_btl_usnic_module_template.super.btl_eager_limit = eager_limit;
 261 
 262     CHECK(reg_int("rndv_eager_limit", "Eager rendezvous limit (0 = use pre-set defaults; depends on number and type of devices available)",
 263                   0, &rndv_eager_limit, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
 264     opal_btl_usnic_module_template.super.btl_rndv_eager_limit =
 265         rndv_eager_limit;
 266 
 267     CHECK(reg_int("pack_lazy_threshold", "Convertor packing on-the-fly threshold (-1 = always pack eagerly, 0 = always pack lazily, otherwise will pack on the fly if fragment size is > limit)",
 268                   USNIC_DFLT_PACK_LAZY_THRESHOLD, &pack_lazy_threshold, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
 269     mca_btl_usnic_component.pack_lazy_threshold = pack_lazy_threshold;
 270 
 271     CHECK(reg_int("max_short_packets", "Number of abnormally-short packets received before outputting a warning (0 = never show the warning)",
 272                   25, &max_short_packets,
 273                   REGINT_GE_ZERO, OPAL_INFO_LVL_5));
 274     mca_btl_usnic_component.max_short_packets = max_short_packets;
 275 
 276     /* Default to bandwidth auto-detection */
 277     opal_btl_usnic_module_template.super.btl_bandwidth = 0;
 278     opal_btl_usnic_module_template.super.btl_latency = 2;
 279 
 280     /* Show "cannot find route" warnings? */
 281     mca_btl_usnic_component.show_route_failures = true;
 282     CHECK(reg_bool("show_route_failures",
 283                    "Whether to show a warning when route failures between MPI process peers are detected (default = 1, enabled; 0 = disabled)",
 284                    mca_btl_usnic_component.show_route_failures,
 285                    &mca_btl_usnic_component.show_route_failures,
 286                    OPAL_INFO_LVL_3));
 287 
 288     /* Connectivity verification */
 289     mca_btl_usnic_component.connectivity_enabled = true;
 290     CHECK(reg_bool("connectivity_check",
 291                    "Whether to enable the usNIC connectivity check upon first send (default = 1, enabled; 0 = disabled)",
 292                    mca_btl_usnic_component.connectivity_enabled,
 293                    &mca_btl_usnic_component.connectivity_enabled,
 294                    OPAL_INFO_LVL_3));
 295 
 296     mca_btl_usnic_component.connectivity_ack_timeout = 250;
 297     CHECK(reg_int("connectivity_ack_timeout",
 298                   "Timeout, in milliseconds, while waiting for an ACK while verification connectivity between usNIC interfaces.  If 0, the connectivity check is disabled (must be >=0).",
 299                   mca_btl_usnic_component.connectivity_ack_timeout,
 300                   &mca_btl_usnic_component.connectivity_ack_timeout,
 301                   REGINT_GE_ZERO, OPAL_INFO_LVL_3));
 302 
 303     mca_btl_usnic_component.connectivity_num_retries = 40;
 304     CHECK(reg_int("connectivity_error_num_retries",
 305                   "Number of times to retry usNIC connectivity verification before aborting the MPI job (must be >0).",
 306                   mca_btl_usnic_component.connectivity_num_retries,
 307                   &mca_btl_usnic_component.connectivity_num_retries,
 308                   REGINT_GE_ONE, OPAL_INFO_LVL_3));
 309 
 310     mca_btl_usnic_component.connectivity_map_prefix = NULL;
 311     CHECK(reg_string("connectivity_map",
 312                      "Write a per-process file containing the usNIC connectivity map.  If this parameter is specified, it is the filename prefix emitted by each MPI process.  The full filename emitted by each process is of the form: <prefix>-<hostname>.<pid>.<jobid>.<MCW rank>.txt.",
 313                      mca_btl_usnic_component.connectivity_map_prefix,
 314                      &mca_btl_usnic_component.connectivity_map_prefix,
 315                      REGSTR_EMPTY_OK, OPAL_INFO_LVL_3));
 316 
 317     return ret;
 318 }

/* [<][>][^][v][top][bottom][index][help] */