This source file includes following definitions.
- reg_string
- reg_int
- reg_bool
- opal_btl_usnic_component_register
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include "opal_config.h"
27
28 #include <string.h>
29 #include <errno.h>
30
31 #include "opal/mca/base/mca_base_var.h"
32 #include "opal/util/argv.h"
33
34 #include "opal/constants.h"
35
36 #include "opal/mca/btl/btl.h"
37 #include "opal/mca/btl/base/base.h"
38
39 #include "btl_usnic.h"
40 #include "btl_usnic_frag.h"
41 #include "btl_usnic_endpoint.h"
42 #include "btl_usnic_module.h"
43
44
45
46
47
48 enum {
49 REGINT_NEG_ONE_OK = 0x01,
50 REGINT_GE_ZERO = 0x02,
51 REGINT_GE_ONE = 0x04,
52 REGINT_NONZERO = 0x08,
53
54 REGINT_MAX = 0x88
55 };
56
57
58 enum {
59 REGSTR_EMPTY_OK = 0x01,
60
61 REGSTR_MAX = 0x88
62 };
63
64
65
66
67
68 static int reg_string(const char* param_name,
69 const char* help_string,
70 const char* default_value, char **storage,
71 int flags, int level)
72 {
73 *storage = (char*) default_value;
74 mca_base_component_var_register(&mca_btl_usnic_component.super.btl_version,
75 param_name, help_string,
76 MCA_BASE_VAR_TYPE_STRING,
77 NULL,
78 0,
79 0,
80 level,
81 MCA_BASE_VAR_SCOPE_READONLY,
82 storage);
83
84 if (0 == (flags & REGSTR_EMPTY_OK) &&
85 (NULL == *storage || 0 == strlen(*storage))) {
86 opal_output(0, "Bad parameter value for parameter \"%s\"",
87 param_name);
88 return OPAL_ERR_BAD_PARAM;
89 }
90
91 return OPAL_SUCCESS;
92 }
93
94
95
96
97
98 static int reg_int(const char* param_name,
99 const char* help_string,
100 int default_value, int *storage, int flags, int level)
101 {
102 *storage = default_value;
103 mca_base_component_var_register(&mca_btl_usnic_component.super.btl_version,
104 param_name, help_string,
105 MCA_BASE_VAR_TYPE_INT,
106 NULL,
107 0,
108 0,
109 level,
110 MCA_BASE_VAR_SCOPE_READONLY,
111 storage);
112
113 if (0 != (flags & REGINT_NEG_ONE_OK) && -1 == *storage) {
114 return OPAL_SUCCESS;
115 }
116 if ((0 != (flags & REGINT_GE_ZERO) && *storage < 0) ||
117 (0 != (flags & REGINT_GE_ONE) && *storage < 1) ||
118 (0 != (flags & REGINT_NONZERO) && 0 == *storage)) {
119 opal_output(0, "Bad parameter value for parameter \"%s\"",
120 param_name);
121 return OPAL_ERR_BAD_PARAM;
122 }
123
124 return OPAL_SUCCESS;
125 }
126
127
128
129
130
131 static int reg_bool(const char* param_name,
132 const char* help_string,
133 bool default_value, bool *storage, int level)
134 {
135 *storage = default_value;
136 mca_base_component_var_register(&mca_btl_usnic_component.super.btl_version,
137 param_name, help_string,
138 MCA_BASE_VAR_TYPE_BOOL,
139 NULL,
140 0,
141 0,
142 level,
143 MCA_BASE_VAR_SCOPE_READONLY,
144 storage);
145
146 return OPAL_SUCCESS;
147 }
148
149
150 int opal_btl_usnic_component_register(void)
151 {
152 int tmp, ret = 0;
153 static int max_modules;
154 static int stats_relative;
155 static int want_numa_device_assignment;
156 static int sd_num;
157 static int rd_num;
158 static int prio_sd_num;
159 static int prio_rd_num;
160 static int cq_num;
161 static int av_eq_num;
162 static int udp_port_base;
163 static int max_tiny_msg_size;
164 static int eager_limit;
165 static int rndv_eager_limit;
166 static int pack_lazy_threshold;
167 static int max_short_packets;
168
169 #define CHECK(expr) do {\
170 tmp = (expr); \
171 if (OPAL_SUCCESS != tmp) ret = tmp; \
172 } while (0)
173
174 CHECK(reg_int("max_btls",
175 "Maximum number of usNICs to use (default: 0 = as many as are available)",
176 0, &max_modules,
177 REGINT_GE_ZERO, OPAL_INFO_LVL_2));
178 mca_btl_usnic_component.max_modules = (size_t) max_modules;
179
180 CHECK(reg_string("if_include",
181 "Comma-delimited list of usNIC devices/networks to be used (e.g. \"eth3,usnic_0,10.10.0.0/16\"; empty value means to use all available usNICs). Mutually exclusive with btl_usnic_if_exclude.",
182 NULL, &mca_btl_usnic_component.if_include,
183 REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
184
185 CHECK(reg_string("if_exclude",
186 "Comma-delimited list of usNIC devices/networks to be excluded (empty value means to not exclude any usNICs). Mutually exclusive with btl_usnic_if_include.",
187 NULL, &mca_btl_usnic_component.if_exclude,
188 REGSTR_EMPTY_OK, OPAL_INFO_LVL_1));
189
190 CHECK(reg_int("stats",
191 "A non-negative integer specifying the frequency at which each usnic BTL will output statistics (default: 0 seconds, meaning that statistics are disabled)",
192 0, &mca_btl_usnic_component.stats_frequency, 0,
193 OPAL_INFO_LVL_4));
194 mca_btl_usnic_component.stats_enabled =
195 (bool) (mca_btl_usnic_component.stats_frequency > 0);
196
197 CHECK(reg_int("stats_relative",
198 "If stats are enabled, output relative stats between the timestamps (vs. cumulative stats since the beginning of the job) (default: 0 -- i.e., absolute)",
199 0, &stats_relative, 0, OPAL_INFO_LVL_4));
200 mca_btl_usnic_component.stats_relative = (bool) stats_relative;
201
202 CHECK(reg_string("mpool_hints", "Hints to use when selecting mpool",
203 NULL, &mca_btl_usnic_component.usnic_mpool_hints,
204 REGSTR_EMPTY_OK,
205 OPAL_INFO_LVL_5));
206
207 CHECK(reg_string("rcache", "Name of the registration cache to be used",
208 "grdma", &mca_btl_usnic_component.usnic_rcache_name, 0,
209 OPAL_INFO_LVL_5));
210
211 want_numa_device_assignment = 1;
212 CHECK(reg_int("want_numa_device_assignment",
213 "If 1, use only Cisco VIC ports thare are a minimum NUMA distance from the MPI process for short messages. If 0, use all available Cisco VIC ports for short messages. This parameter is meaningless (and ignored) unless MPI proceses are bound to processor cores. Defaults to 1 if NUMA support is included in Open MPI; -1 otherwise.",
214 want_numa_device_assignment,
215 &want_numa_device_assignment,
216 0, OPAL_INFO_LVL_5));
217 mca_btl_usnic_component.want_numa_device_assignment =
218 (1 == want_numa_device_assignment) ? true : false;
219
220 CHECK(reg_int("sd_num", "Maximum send descriptors to post (-1 = pre-set defaults; depends on number and type of devices available)",
221 -1, &sd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
222 mca_btl_usnic_component.sd_num = (int32_t) sd_num;
223
224 CHECK(reg_int("rd_num", "Number of pre-posted receive buffers (-1 = pre-set defaults; depends on number and type of devices available)",
225 -1, &rd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
226 mca_btl_usnic_component.rd_num = (int32_t) rd_num;
227
228 CHECK(reg_int("prio_sd_num", "Maximum priority send descriptors to post (-1 = pre-set defaults; depends on number and type of devices available)",
229 -1, &prio_sd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
230 mca_btl_usnic_component.prio_sd_num = (int32_t) prio_sd_num;
231
232 CHECK(reg_int("prio_rd_num", "Number of pre-posted priority receive buffers (-1 = pre-set defaults; depends on number and type of devices available)",
233 -1, &prio_rd_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
234 mca_btl_usnic_component.prio_rd_num = (int32_t) prio_rd_num;
235
236 CHECK(reg_int("cq_num", "Number of completion queue entries (-1 = pre-set defaults; depends on number and type of devices available; will error if (sd_num+rd_num)>cq_num)",
237 -1, &cq_num, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
238 mca_btl_usnic_component.cq_num = (int32_t) cq_num;
239
240 CHECK(reg_int("av_eq_num", "Number of event queue entries for peer address resolution",
241 1024, &av_eq_num, REGINT_GE_ONE, OPAL_INFO_LVL_5));
242 mca_btl_usnic_component.av_eq_num = (int32_t) av_eq_num;
243
244 CHECK(reg_int("base_udp_port", "Base UDP port to use for usNIC communications. If 0, system will pick the port number. If non-zero, it will be added to each process' local rank to obtain the final port number (default: 0)",
245 0, &udp_port_base, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
246 mca_btl_usnic_component.udp_port_base = (int) udp_port_base;
247
248 CHECK(reg_int("retrans_timeout", "Number of microseconds before retransmitting a frame",
249 5000, &mca_btl_usnic_component.retrans_timeout,
250 REGINT_GE_ONE, OPAL_INFO_LVL_5));
251
252 CHECK(reg_int("priority_limit", "Max size of \"priority\" messages (0 = use pre-set defaults; depends on number and type of devices available)",
253 0, &max_tiny_msg_size,
254 REGINT_GE_ZERO, OPAL_INFO_LVL_5));
255 opal_btl_usnic_module_template.max_tiny_msg_size =
256 (size_t) max_tiny_msg_size;
257
258 CHECK(reg_int("eager_limit", "Eager send limit (0 = use pre-set defaults; depends on number and type of devices available)",
259 0, &eager_limit, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
260 opal_btl_usnic_module_template.super.btl_eager_limit = eager_limit;
261
262 CHECK(reg_int("rndv_eager_limit", "Eager rendezvous limit (0 = use pre-set defaults; depends on number and type of devices available)",
263 0, &rndv_eager_limit, REGINT_GE_ZERO, OPAL_INFO_LVL_5));
264 opal_btl_usnic_module_template.super.btl_rndv_eager_limit =
265 rndv_eager_limit;
266
267 CHECK(reg_int("pack_lazy_threshold", "Convertor packing on-the-fly threshold (-1 = always pack eagerly, 0 = always pack lazily, otherwise will pack on the fly if fragment size is > limit)",
268 USNIC_DFLT_PACK_LAZY_THRESHOLD, &pack_lazy_threshold, REGINT_NEG_ONE_OK, OPAL_INFO_LVL_5));
269 mca_btl_usnic_component.pack_lazy_threshold = pack_lazy_threshold;
270
271 CHECK(reg_int("max_short_packets", "Number of abnormally-short packets received before outputting a warning (0 = never show the warning)",
272 25, &max_short_packets,
273 REGINT_GE_ZERO, OPAL_INFO_LVL_5));
274 mca_btl_usnic_component.max_short_packets = max_short_packets;
275
276
277 opal_btl_usnic_module_template.super.btl_bandwidth = 0;
278 opal_btl_usnic_module_template.super.btl_latency = 2;
279
280
281 mca_btl_usnic_component.show_route_failures = true;
282 CHECK(reg_bool("show_route_failures",
283 "Whether to show a warning when route failures between MPI process peers are detected (default = 1, enabled; 0 = disabled)",
284 mca_btl_usnic_component.show_route_failures,
285 &mca_btl_usnic_component.show_route_failures,
286 OPAL_INFO_LVL_3));
287
288
289 mca_btl_usnic_component.connectivity_enabled = true;
290 CHECK(reg_bool("connectivity_check",
291 "Whether to enable the usNIC connectivity check upon first send (default = 1, enabled; 0 = disabled)",
292 mca_btl_usnic_component.connectivity_enabled,
293 &mca_btl_usnic_component.connectivity_enabled,
294 OPAL_INFO_LVL_3));
295
296 mca_btl_usnic_component.connectivity_ack_timeout = 250;
297 CHECK(reg_int("connectivity_ack_timeout",
298 "Timeout, in milliseconds, while waiting for an ACK while verification connectivity between usNIC interfaces. If 0, the connectivity check is disabled (must be >=0).",
299 mca_btl_usnic_component.connectivity_ack_timeout,
300 &mca_btl_usnic_component.connectivity_ack_timeout,
301 REGINT_GE_ZERO, OPAL_INFO_LVL_3));
302
303 mca_btl_usnic_component.connectivity_num_retries = 40;
304 CHECK(reg_int("connectivity_error_num_retries",
305 "Number of times to retry usNIC connectivity verification before aborting the MPI job (must be >0).",
306 mca_btl_usnic_component.connectivity_num_retries,
307 &mca_btl_usnic_component.connectivity_num_retries,
308 REGINT_GE_ONE, OPAL_INFO_LVL_3));
309
310 mca_btl_usnic_component.connectivity_map_prefix = NULL;
311 CHECK(reg_string("connectivity_map",
312 "Write a per-process file containing the usNIC connectivity map. If this parameter is specified, it is the filename prefix emitted by each MPI process. The full filename emitted by each process is of the form: <prefix>-<hostname>.<pid>.<jobid>.<MCW rank>.txt.",
313 mca_btl_usnic_component.connectivity_map_prefix,
314 &mca_btl_usnic_component.connectivity_map_prefix,
315 REGSTR_EMPTY_OK, OPAL_INFO_LVL_3));
316
317 return ret;
318 }