This source file includes following definitions.
- usnic_stats_reset
- opal_btl_usnic_print_stats
- usnic_stats_callback
- opal_btl_usnic_stats_init
- opal_btl_usnic_stats_finalize
- usnic_pvar_notify
- usnic_pvar_read
- register_pvar_highwater
- usnic_pvar_enum_read
- register_pvar_counter
- setup_mpit_pvar_type
- setup_mpit_pvars_enum
- setup_mpit_pvars_highwatermark
- setup_mpit_pvars_counters
- opal_btl_usnic_setup_mpit_pvars
1
2
3
4
5
6
7
8
9
10
11 #include "opal_config.h"
12
13 #include <unistd.h>
14 #include <stdlib.h>
15
16 #include "opal/util/output.h"
17 #include "opal/util/printf.h"
18 #include "opal/mca/base/mca_base_var.h"
19 #include "opal/mca/base/mca_base_pvar.h"
20
21 #include "btl_usnic_compat.h"
22 #include "btl_usnic.h"
23 #include "btl_usnic_module.h"
24 #include "btl_usnic_stats.h"
25 #include "btl_usnic_util.h"
26
27
28
29
30 static mca_base_var_type_t pvar_type = MCA_BASE_VAR_TYPE_MAX;
31
32 static inline void usnic_stats_reset(opal_btl_usnic_module_t *module)
33 {
34 int i;
35
36 module->stats.num_total_sends =
37 module->stats.num_resends =
38 module->stats.num_chunk_sends =
39 module->stats.num_frag_sends =
40 module->stats.num_ack_recvs =
41
42 module->stats.num_total_recvs =
43 module->stats.num_unk_recvs =
44 module->stats.num_dup_recvs =
45 module->stats.num_oow_low_recvs =
46 module->stats.num_oow_high_recvs =
47 module->stats.num_frag_recvs =
48 module->stats.num_chunk_recvs =
49 module->stats.num_badfrag_recvs =
50 module->stats.num_ack_sends =
51 module->stats.num_recv_reposts =
52 module->stats.num_crc_errors =
53
54 module->stats.num_old_dup_acks =
55 module->stats.num_dup_acks =
56 module->stats.num_fast_retrans =
57 module->stats.num_timeout_retrans =
58
59 module->stats.max_sent_window_size =
60 module->stats.max_rcvd_window_size =
61
62 module->stats.pml_module_sends =
63 module->stats.pml_send_callbacks =
64
65 module->stats.num_seg_total_completions =
66 module->stats.num_seg_ack_completions =
67 module->stats.num_seg_frag_completions =
68 module->stats.num_seg_chunk_completions =
69 module->stats.num_seg_recv_completions =
70
71 0;
72
73 for (i=0; i<USNIC_NUM_CHANNELS; ++i) {
74 module->mod_channels[i].num_channel_sends = 0;
75 }
76 }
77
78
79
80
81
82
83
84
85 void opal_btl_usnic_print_stats(
86 opal_btl_usnic_module_t *module,
87 const char *prefix,
88 bool reset_stats)
89 {
90 char tmp[128], str[2048];
91
92
93 snprintf(str, sizeof(str), "%s:MCW:%3u, %s, ST(P+D)/F/C/R(T+F)/A:%8lu(%8u+%8u)/%8lu/%8lu/%4lu(%4lu+%4lu)/%8lu, RcvTot/Chk/F/C/L/H/D/BF/A:%8lu/%c%c/%8lu/%8lu/%4lu+%2lu/%4lu/%4lu/%6lu Comp:T(A/F/C/R) %8lu(%8lu/%8lu/%8lu/%8lu), OA/DA %4lu/%4lu CRC:%4lu ",
94 prefix,
95 opal_proc_local_get()->proc_name.vpid,
96
97 module->linux_device_name,
98
99 module->stats.num_total_sends,
100 module->mod_channels[USNIC_PRIORITY_CHANNEL].num_channel_sends,
101 module->mod_channels[USNIC_DATA_CHANNEL].num_channel_sends,
102 module->stats.num_frag_sends,
103 module->stats.num_chunk_sends,
104 module->stats.num_resends,
105 module->stats.num_timeout_retrans,
106 module->stats.num_fast_retrans,
107 module->stats.num_ack_sends,
108
109 module->stats.num_total_recvs,
110 (module->stats.num_total_recvs -
111 module->stats.num_recv_reposts) == 0 ? 'g' : 'B',
112 (module->stats.num_total_recvs -
113 module->stats.num_frag_recvs -
114 module->stats.num_chunk_recvs -
115 module->stats.num_badfrag_recvs -
116 module->stats.num_oow_low_recvs -
117 module->stats.num_oow_high_recvs -
118 module->stats.num_dup_recvs -
119 module->stats.num_ack_recvs -
120 module->stats.num_unk_recvs) == 0 ? 'g' : 'B',
121 module->stats.num_frag_recvs,
122 module->stats.num_chunk_recvs,
123 module->stats.num_oow_low_recvs,
124 module->stats.num_oow_high_recvs,
125 module->stats.num_dup_recvs,
126 module->stats.num_badfrag_recvs,
127 module->stats.num_ack_recvs,
128
129 module->stats.num_seg_total_completions,
130 module->stats.num_seg_ack_completions,
131 module->stats.num_seg_frag_completions,
132 module->stats.num_seg_chunk_completions,
133 module->stats.num_seg_recv_completions,
134
135 module->stats.num_old_dup_acks,
136 module->stats.num_dup_acks,
137
138 module->stats.num_crc_errors);
139
140
141
142
143 str[sizeof(str) - 1] = '\0';
144
145
146
147 if (module->stats.pml_module_sends +
148 module->stats.pml_send_callbacks == 0) {
149 int64_t send_unacked, su_min = WINDOW_SIZE * 2, su_max = 0;
150 int64_t recv_depth, rd_min = WINDOW_SIZE * 2, rd_max = 0;
151 opal_btl_usnic_endpoint_t *endpoint;
152 opal_list_item_t *item;
153
154 rd_min = su_min = WINDOW_SIZE * 2;
155 rd_max = su_max = 0;
156
157 opal_mutex_lock(&module->all_endpoints_lock);
158 item = opal_list_get_first(&module->all_endpoints);
159 while (item != opal_list_get_end(&(module->all_endpoints))) {
160 endpoint = container_of(item, mca_btl_base_endpoint_t,
161 endpoint_endpoint_li);
162 item = opal_list_get_next(item);
163
164
165
166 send_unacked =
167 SEQ_DIFF(endpoint->endpoint_next_seq_to_send,
168 SEQ_DIFF(endpoint->endpoint_ack_seq_rcvd, 1));
169
170 if (send_unacked > su_max) su_max = send_unacked;
171 if (send_unacked < su_min) su_min = send_unacked;
172
173
174
175
176 recv_depth =
177 endpoint->endpoint_highest_seq_rcvd -
178 endpoint->endpoint_next_contig_seq_to_recv;
179 if (recv_depth > rd_max) rd_max = recv_depth;
180 if (recv_depth < rd_min) rd_min = recv_depth;
181 }
182 opal_mutex_unlock(&module->all_endpoints_lock);
183 snprintf(tmp, sizeof(tmp), "PML S:%1ld, Win!A/R:%4ld/%4ld %4ld/%4ld",
184 module->stats.pml_module_sends,
185 su_min, su_max,
186 rd_min, rd_max);
187 } else {
188 snprintf(tmp, sizeof(tmp), "PML S/CB/Diff:%4lu/%4lu=%4ld",
189 module->stats.pml_module_sends,
190 module->stats.pml_send_callbacks,
191 module->stats.pml_module_sends -
192 module->stats.pml_send_callbacks);
193 }
194
195 strncat(str, tmp, sizeof(str) - strlen(str) - 1);
196 opal_output(0, "%s", str);
197
198 if (reset_stats) {
199 usnic_stats_reset(module);
200 }
201 }
202
203
204
205
206 static void usnic_stats_callback(int fd, short flags, void *arg)
207 {
208 opal_btl_usnic_module_t *module = (opal_btl_usnic_module_t*) arg;
209 char tmp[128];
210
211 if (!mca_btl_usnic_component.stats_enabled) {
212 return;
213 }
214
215 snprintf(tmp, sizeof(tmp), "%4lu", ++module->stats.report_num);
216
217 opal_btl_usnic_print_stats(module, tmp,
218 mca_btl_usnic_component.stats_relative);
219 }
220
221
222
223
224 int opal_btl_usnic_stats_init(opal_btl_usnic_module_t *module)
225 {
226 if (mca_btl_usnic_component.stats_enabled) {
227 usnic_stats_reset(module);
228
229 module->stats.timeout.tv_sec = mca_btl_usnic_component.stats_frequency;
230 module->stats.timeout.tv_usec = 0;
231
232 opal_event_set(mca_btl_usnic_component.opal_evbase,
233 &(module->stats.timer_event),
234 -1, EV_TIMEOUT | EV_PERSIST,
235 &usnic_stats_callback, module);
236 opal_event_add(&(module->stats.timer_event),
237 &(module->stats.timeout));
238 }
239
240 return OPAL_SUCCESS;
241 }
242
243
244
245
246 int opal_btl_usnic_stats_finalize(opal_btl_usnic_module_t *module)
247 {
248
249
250 if (mca_btl_usnic_component.stats_enabled) {
251 opal_event_del(&(module->stats.timer_event));
252 opal_btl_usnic_print_stats(module, "final", false);
253 }
254
255 return OPAL_SUCCESS;
256 }
257
258
259
260
261
262
263
264 static int usnic_pvar_notify(struct mca_base_pvar_t *pvar,
265 mca_base_pvar_event_t event,
266 void *obj, int *count)
267 {
268 if (MCA_BASE_PVAR_HANDLE_BIND == event) {
269 *count = mca_btl_usnic_component.num_modules;
270 }
271
272
273
274 return OPAL_SUCCESS;
275 }
276
277
278
279
280
281
282 static int usnic_pvar_read(const struct mca_base_pvar_t *pvar,
283 void *value, void *bound_obj)
284 {
285 size_t offset = (size_t) pvar->ctx;
286 uint64_t *array = (uint64_t*) value;
287
288 for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
289 char *base = (char*) &(mca_btl_usnic_component.usnic_active_modules[i]->stats);
290 array[i] = *((uint64_t*) (base + offset));
291 }
292
293 return OPAL_SUCCESS;
294 }
295
296
297
298
299
300 static void register_pvar_highwater(char *name, char *desc, size_t offset)
301 {
302 int rc __opal_attribute_unused__;
303
304 rc = mca_base_component_pvar_register(&mca_btl_usnic_component.super.btl_version,
305 name, desc,
306 OPAL_INFO_LVL_5,
307 MCA_BASE_PVAR_CLASS_HIGHWATERMARK,
308 pvar_type,
309 NULL,
310 MCA_BASE_VAR_BIND_NO_OBJECT,
311 (MCA_BASE_PVAR_FLAG_READONLY |
312 MCA_BASE_PVAR_FLAG_CONTINUOUS),
313 usnic_pvar_read,
314 NULL,
315 usnic_pvar_notify,
316 (void *) offset);
317 assert(rc >= 0);
318 }
319
320
321
322
323
324
325
326
327 static int usnic_pvar_enum_read(const struct mca_base_pvar_t *pvar,
328 void *value, void *bound_obj)
329 {
330 int *array = (int *) value;
331
332 for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
333 array[i] = i;
334 }
335
336 return OPAL_SUCCESS;
337 }
338
339
340
341
342
343 static void register_pvar_counter(char *name, char *desc, size_t offset)
344 {
345 int rc __opal_attribute_unused__;
346
347 rc = mca_base_component_pvar_register(&mca_btl_usnic_component.super.btl_version,
348 name, desc,
349 OPAL_INFO_LVL_5,
350 MCA_BASE_PVAR_CLASS_COUNTER,
351 pvar_type,
352 NULL,
353 MCA_BASE_VAR_BIND_NO_OBJECT,
354 (MCA_BASE_PVAR_FLAG_READONLY |
355 MCA_BASE_PVAR_FLAG_CONTINUOUS),
356 usnic_pvar_read,
357 NULL,
358 usnic_pvar_notify,
359 (void *) offset);
360 assert(rc >= 0);
361 }
362
363
364
365
366
367
368 static bool setup_mpit_pvar_type(void)
369 {
370
371
372 if (sizeof(uint64_t) == sizeof(unsigned int)) {
373 pvar_type = MCA_BASE_VAR_TYPE_UNSIGNED_INT;
374 } else if (sizeof(uint64_t) == sizeof(unsigned long)) {
375 pvar_type = MCA_BASE_VAR_TYPE_UNSIGNED_LONG;
376 } else if (sizeof(uint64_t) == sizeof(unsigned long long)) {
377 pvar_type = MCA_BASE_VAR_TYPE_UNSIGNED_LONG_LONG;
378 }
379
380
381 if (MCA_BASE_VAR_TYPE_MAX == pvar_type) {
382 return false;
383 }
384 return true;
385 }
386
387
388
389
390
391 static void setup_mpit_pvars_enum(void)
392 {
393 int i;
394 int rc __opal_attribute_unused__;
395 mca_base_var_enum_value_t *devices;
396 static mca_base_var_enum_t *devices_enum;
397 opal_btl_usnic_module_t *m;
398 unsigned char *c;
399 struct sockaddr_in *sin;
400
401 devices = calloc(mca_btl_usnic_component.num_modules + 1,
402 sizeof(*devices));
403 assert(devices != NULL);
404
405 for (i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
406 char *str;
407
408 m = mca_btl_usnic_component.usnic_active_modules[i];
409 sin = m->fabric_info->src_addr;
410 c = (unsigned char*) &sin->sin_addr.s_addr;
411
412 devices[i].value = i;
413 rc = opal_asprintf(&str, "%s,%hhu.%hhu.%hhu.%hhu/%" PRIu32,
414 m->linux_device_name,
415 c[0], c[1], c[2], c[3],
416 usnic_netmask_to_cidrlen(sin->sin_addr.s_addr));
417 assert(rc > 0);
418 devices[i].string = str;
419 }
420 devices[i].string = NULL;
421
422 rc = mca_base_var_enum_create("btl_usnic", devices, &devices_enum);
423 assert(OPAL_SUCCESS == rc);
424
425 rc = mca_base_component_pvar_register(&mca_btl_usnic_component.super.btl_version,
426 "devices",
427 "Enumeration representing which slot in btl_usnic_* MPI_T pvar value arrays correspond to which usnic_X Linux device",
428 OPAL_INFO_LVL_5,
429 MCA_BASE_PVAR_CLASS_STATE,
430 MCA_BASE_VAR_TYPE_INT,
431 devices_enum,
432 MCA_BASE_VAR_BIND_NO_OBJECT,
433 (MCA_BASE_PVAR_FLAG_READONLY |
434 MCA_BASE_PVAR_FLAG_CONTINUOUS),
435 usnic_pvar_enum_read,
436 NULL,
437 usnic_pvar_notify,
438 NULL );
439 assert(rc >= 0);
440
441
442
443 for (int i = 0; i < mca_btl_usnic_component.num_modules; ++i) {
444 free((char*) devices[i].string);
445 }
446 free(devices);
447
448
449
450
451 OBJ_RELEASE(devices_enum);
452 }
453
454
455
456
457
458 static void setup_mpit_pvars_highwatermark(void)
459 {
460 #define REGISTERHW(field, desc) \
461 register_pvar_highwater(#field, (desc), offsetof(opal_btl_usnic_module_stats_t, field))
462
463 REGISTERHW(max_sent_window_size,
464 "Maximum number of entries in all send windows from this peer");
465 REGISTERHW(max_rcvd_window_size,
466 "Maximum number of entries in all receive windows to this peer");
467 }
468
469
470
471
472
473 static void setup_mpit_pvars_counters(void)
474 {
475 #define REGISTERC(field, desc) \
476 register_pvar_counter(#field, (desc), offsetof(opal_btl_usnic_module_stats_t, field))
477
478 REGISTERC(num_total_sends,
479 "Total number of sends (MPI data, ACKs, retransmissions, etc.)");
480 REGISTERC(num_resends,
481 "Total number of all retransmissions");
482 REGISTERC(num_timeout_retrans,
483 "Number of times chunk retransmissions have occured because an ACK was not received within the timeout");
484 REGISTERC(num_fast_retrans,
485 "Number of times chunk retransmissions have occured because due to a repeated ACK");
486 REGISTERC(num_chunk_sends,
487 "Number of sends that were part of a larger MPI message fragment (i.e., the MPI message was so long that it had to be split into multiple MTU/network sends)");
488 REGISTERC(num_frag_sends,
489 "Number of sends where the entire MPI message fragment fit into a single MTU/network send");
490 REGISTERC(num_ack_sends,
491 "Number of ACKs sent (i.e., usNIC-BTL-to-usNIC-BTL control messages)");
492
493 REGISTERC(num_total_recvs,
494 "Total number of receives completed");
495 REGISTERC(num_unk_recvs,
496 "Number of receives with an unknown source or type, and therefore ignored by the usNIC BTL (this should never be >0)");
497 REGISTERC(num_dup_recvs,
498 "Number of duplicate receives");
499 REGISTERC(num_oow_low_recvs,
500 "Number of times a receive was out of the sliding window (on the low side)");
501 REGISTERC(num_oow_high_recvs,
502 "Number of times a receive was out of the sliding window (on the high side)");
503 REGISTERC(num_frag_recvs,
504 "Number of receives where the entire MPI message fragment fit into a single MTU/network send");
505 REGISTERC(num_chunk_recvs,
506 "Number of receives that were part of a larger MPI message fragment (i.e., this receive was reassembled into a larger MPI message fragment)");
507 REGISTERC(num_badfrag_recvs,
508 "Number of chunks received that had a bad fragment ID (this should never be >0)");
509
510 REGISTERC(num_ack_recvs,
511 "Total number of ACKs received");
512 REGISTERC(num_old_dup_acks,
513 "Number of old duplicate ACKs received (i.e., before the current expected ACK)");
514 REGISTERC(num_dup_acks,
515 "Number of duplicate ACKs received (i.e., the current expected ACK)");
516
517 REGISTERC(num_recv_reposts,
518 "Number of times buffers have been reposted for receives");
519 REGISTERC(num_crc_errors,
520 "Number of times receives were aborted because of a CRC error");
521
522 REGISTERC(pml_module_sends,
523 "Number of times the PML has called down to send a message");
524 REGISTERC(pml_send_callbacks,
525 "Number of times the usNIC BTL has called up to the PML to complete a send");
526 }
527
528
529
530
531
532 int opal_btl_usnic_setup_mpit_pvars(void)
533 {
534
535
536 if (!setup_mpit_pvar_type()) {
537 return OPAL_SUCCESS;
538 }
539
540
541 setup_mpit_pvars_enum();
542
543
544 setup_mpit_pvars_highwatermark();
545
546
547
548
549 if (!mca_btl_usnic_component.stats_relative) {
550 setup_mpit_pvars_counters();
551 }
552
553
554 return OPAL_SUCCESS;
555 }