This source file includes following definitions.
- usage
- parse_options
- fill_remote_ranks
- store_double
- get_mem_usage
- main
1
2
3
4
5
6
7
8
9
10
11
12 #include <stdio.h>
13 #include <stdlib.h>
14 #include <unistd.h>
15 #include <time.h>
16 #include <getopt.h>
17 #include <limits.h>
18 #include <string.h>
19
20 #include "pmi.h"
21
22 #include <time.h>
23 #define GET_TS ({ \
24 struct timespec ts; \
25 double ret; \
26 clock_gettime(CLOCK_MONOTONIC, &ts); \
27 ret = ts.tv_sec + 1E-9 * ts.tv_nsec; \
28 ret; \
29 })
30
31
32 int key_size = 100, key_count = 10, rank_shift;
33 int direct_modex = 0, debug_on = 0;
34
35 static void usage(const char *argv0)
36 {
37 printf("Usage:\n");
38 printf(" %s [options] start the benchmark\n", argv0);
39 printf("\n");
40 printf(" -s, --key-size=<size> size of the key's submitted\n");
41 printf(" -c, --key-count=<size> number of keys submitted to local and remote parts\n");
42 printf(" -d, --direct-modex use direct modex if available\n");
43 printf(" --debug force all processes to print out the timings themself\n");
44 }
45
46
47 void parse_options(int argc, char **argv)
48 {
49 extern char *optarg;
50 extern int optind;
51 struct option long_options[] = {
52 { "help", 0, NULL, 'h' },
53
54 { "key-size", 1, NULL, 's' },
55 { "key-count", 1, NULL, 'c' },
56 { "direct-modex", 0, NULL, 'd' },
57 { "debug", 0, NULL, '0' },
58 { 0 }
59 };
60
61 while (1) {
62 int c;
63 c = getopt_long(argc, argv, "hs:c:d0", long_options, NULL);
64
65 if (c == -1)
66 break;
67 switch (c) {
68 case 's':
69 key_size = atoi(optarg);
70
71
72
73 key_size = key_size / 4 + !!(key_size % 4);
74 break;
75 case 'c':
76 key_count = atoi(optarg);
77 break;
78 case 'd':
79 direct_modex = 1;
80 break;
81 case '0':
82 debug_on = 1;
83 break;
84 case 'h':
85 default:
86 usage(argv[0]);
87 exit(0);
88 }
89 }
90
91 rank_shift = 10;
92 while( rank_shift <= key_count ){
93 rank_shift *= 10;
94 }
95 }
96
97 void fill_remote_ranks(int *local_ranks, int local_cnt, int *remote_ranks, int size)
98 {
99 int i, k;
100 for(i = 0, k = 0; i < size && k < (size - local_cnt); i++ ){
101 int j, flag = 1;
102 for(j=0; j < local_cnt; j++){
103 if( i == local_ranks[j] ){
104 flag = 0;
105 break;
106 }
107 }
108 if( flag ){
109 remote_ranks[k] = i;
110 k++;
111 }
112 }
113 }
114
115 int store_double(char *name, double val)
116 {
117 char buf[128];
118 sprintf(buf,"%lf",val);
119
120 }
121
122 int get_mem_usage(double *_pss, double *_rss) {
123 char data[PATH_MAX];
124 FILE *smaps;
125 double pss = 0.0, rss = 0.0;
126 char *line = NULL;
127 size_t size = 0;
128 pid_t pid = getpid();
129
130 *_pss = 0.0;
131 *_rss = 0.0;
132
133 memset(data, 0, sizeof(data));
134 snprintf(data, sizeof(data), "/proc/%d/smaps", pid);
135
136 if (NULL == (smaps = fopen(data, "r"))) {
137 return -1;
138 }
139
140 while ((size = getline(&line, &size, smaps)) != -1) {
141 if (0 == strncmp(line, "Pss", strlen("Pss"))) {
142 sscanf(line, "Pss: %lf", &pss);
143 *_pss += pss;
144 }
145 if (0 == strncmp(line, "Rss", strlen("Pss"))) {
146 sscanf(line, "Rss: %lf", &rss);
147 *_rss += pss;
148 }
149 }
150 free(line);
151 fclose(smaps);
152
153 return 0;
154 }
155
156 int main(int argc, char **argv)
157 {
158 int rc;
159 char *key_name;
160 int *key_val;
161 int rank, nproc;
162 int cnt;
163 int *local_ranks, local_cnt;
164 int *remote_ranks, remote_cnt;
165 double start, total_start, get_loc_time = 0, get_rem_time = 0, put_loc_time = 0,
166 put_rem_time = 0, commit_time = 0, fence_time = 0, init_time = 0, total_time = 0;
167 int get_loc_cnt = 0, get_rem_cnt = 0, put_loc_cnt = 0, put_rem_cnt = 0;
168 double mem_pss = 0.0, mem_rss = 0.0;
169 char have_shmem;
170 size_t shmem_job_info, shmem_all;
171
172 parse_options(argc, argv);
173
174 total_start = GET_TS;
175 start = GET_TS;
176 pmi_init(&rank, &nproc);
177 init_time += GET_TS - start;
178
179 pmi_get_local_ranks(&local_ranks, &local_cnt);
180 remote_cnt = nproc - local_cnt;
181 if( remote_cnt ){
182 remote_ranks = calloc(remote_cnt, sizeof(int));
183 fill_remote_ranks(local_ranks, local_cnt, remote_ranks, nproc);
184 }
185
186 pmi_get_shmem_size(&have_shmem, &shmem_job_info);
187
188
189
190
191
192 pmi_fence( 0 );
193
194 if( 0 == rank && debug_on ){
195 int i;
196 fprintf(stderr,"%d: local ranks: ", rank);
197 for(i = 0; i < local_cnt; i++){
198 fprintf(stderr,"%d ", local_ranks[i]);
199 }
200 fprintf(stderr,"\n");
201 fflush(stderr);
202 }
203
204 key_val = calloc(key_size, sizeof(int));
205 for (cnt=0; cnt < key_count; cnt++) {
206 int i;
207 if( local_cnt > 0 ){
208 (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt);
209 for(i=0; i < key_size; i++){
210 key_val[i] = rank * rank_shift + cnt;
211 }
212 put_loc_cnt++;
213 start = GET_TS;
214 pmi_put_key_loc(key_name, key_val, key_size);
215 put_loc_time += GET_TS - start;
216 free(key_name);
217 }
218 if( remote_cnt > 0 ){
219 (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt);
220 for(i=0; i < key_size; i++){
221 key_val[i] = rank * rank_shift + cnt;
222 }
223 put_rem_cnt++;
224 start = GET_TS;
225 pmi_put_key_rem(key_name, key_val, key_size);
226 put_rem_time += GET_TS - start;
227 free(key_name);
228 }
229 }
230 free(key_val);
231
232 start = GET_TS;
233 pmi_commit();
234 commit_time += GET_TS - start;
235
236 start = GET_TS;
237 pmi_fence( !direct_modex );
238 fence_time += GET_TS - start;
239
240
241
242 for (cnt=0; cnt < key_count; cnt++) {
243 int i;
244
245 for(i = 0; i < remote_cnt; i++){
246 int rank = remote_ranks[i], j;
247 int *key_val, key_size_new;
248 double start;
249 (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt);
250
251 start = GET_TS;
252 pmi_get_key_rem(rank, key_name, &key_val, &key_size_new);
253 get_rem_time += GET_TS - start;
254 get_rem_cnt++;
255
256 if( key_size != key_size_new ){
257 fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n",
258 rank, key_name, key_size, key_size_new);
259 abort();
260 }
261
262 for(j=0; j < key_size; j++){
263 if( key_val[j] != rank * rank_shift + cnt ){
264 fprintf(stderr, "%d: error in key %s value (byte %d)\n",
265 rank, key_name, j);
266 abort();
267 }
268 }
269 free(key_name);
270 free(key_val);
271 }
272
273
274 for(i = 0; i < local_cnt; i++){
275 int rank = local_ranks[i], j;
276 int *key_val, key_size_new;
277 double start;
278 (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt);
279
280 start = GET_TS;
281 pmi_get_key_loc(rank, key_name, &key_val, &key_size_new);
282 get_loc_time += GET_TS - start;
283 get_loc_cnt++;
284
285 if( key_size != key_size_new ){
286 fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n",
287 rank, key_name, key_size, key_size_new);
288 abort();
289 }
290
291 for(j=0; j < key_size; j++){
292 if( key_val[j] != rank * rank_shift + cnt ){
293 fprintf(stderr, "%d: error in key %s value (byte %d)",
294 rank, key_name, j);
295 abort();
296 }
297 }
298 free(key_name);
299 free(key_val);
300 }
301 }
302
303 total_time = GET_TS - total_start;
304
305 if (0 != get_mem_usage(&mem_pss, &mem_rss)) {
306 fprintf(stderr, "Rank %d: error get memory usage", rank);
307 abort();
308 }
309
310 if( debug_on ){
311 fprintf(stderr,"%d: get: total %lf avg loc %lf rem %lf all %lf ; put: %lf %lf commit: %lf fence %lf\n",
312 rank, (get_loc_time + get_rem_time),
313 get_loc_time/get_loc_cnt, get_rem_time/get_rem_cnt,
314 (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt),
315 put_loc_time/put_loc_cnt, put_rem_time/put_rem_cnt,
316 commit_time, fence_time);
317 }
318
319 pmi_get_shmem_size(&have_shmem, &shmem_all);
320
321
322
323
324 pmi_fence( 0 );
325
326
327 char key[128];
328 sprintf(key, "PMIX_PERF_get_total_time.%d", rank);
329 pmi_put_double(key, get_rem_time + get_loc_time);
330
331 sprintf(key, "PMIX_PERF_get_loc_time.%d", rank);
332 pmi_put_double(key, get_loc_cnt ? get_loc_time/get_loc_cnt : 0 );
333
334 sprintf(key, "PMIX_PERF_get_rem_time.%d", rank);
335 pmi_put_double(key, get_rem_cnt ? get_rem_time/get_rem_cnt : 0);
336
337 sprintf(key, "PMIX_PERF_get_time.%d", rank);
338 pmi_put_double(key, (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt) );
339
340 sprintf(key, "PMIX_PERF_put_loc_time.%d", rank);
341 pmi_put_double(key, put_loc_cnt ? put_loc_time / put_loc_cnt : 0);
342
343 sprintf(key, "PMIX_PERF_put_rem_time.%d", rank);
344 pmi_put_double(key, put_rem_cnt ? put_rem_time / put_rem_cnt : 0);
345
346 sprintf(key, "PMIX_PERF_commit_time.%d", rank);
347 pmi_put_double(key, commit_time);
348
349 sprintf(key, "PMIX_PERF_fence_time.%d", rank);
350 pmi_put_double(key, fence_time);
351
352 sprintf(key, "PMIX_PERF_init_time.%d", rank);
353 pmi_put_double(key, init_time);
354
355 sprintf(key, "PMIX_PERF_total_time.%d", rank);
356 pmi_put_double(key, total_time);
357
358 sprintf(key, "PMIX_PERF_mem_pss.%d", rank);
359 pmi_put_double(key, mem_pss);
360
361 sprintf(key, "PMIX_PERF_mem_rss.%d", rank);
362 pmi_put_double(key, mem_rss);
363
364 pmi_commit();
365 pmi_fence( 1 );
366
367 if( rank == 0 ){
368 double cum_get_total_time = 0,
369 cum_get_loc_time = 0,
370 cum_get_rem_time = 0,
371 cum_get_time = 0,
372 cum_put_total_time = 0,
373 cum_put_loc_time = 0,
374 cum_put_rem_time = 0,
375 cum_commit_time = 0,
376 cum_fence_time = 0,
377 cum_init_time = 0,
378 cum_total_time = 0,
379 cum_mem_pss = 0.0;
380
381 double min_get_loc_time = get_loc_time / get_loc_cnt,
382 max_get_loc_time = get_loc_time / get_loc_cnt,
383 min_get_rem_time = get_rem_time / get_rem_cnt,
384 max_get_rem_time = get_rem_time / get_rem_cnt,
385 min_init_time = init_time,
386 max_init_time = init_time,
387 min_total_time = total_time,
388 max_total_time = total_time,
389 min_mem_pss = mem_pss,
390 max_mem_pss = 0.0;
391
392 int min_get_loc_idx = 0, max_get_loc_idx = 0;
393 int min_get_rem_idx = 0, max_get_rem_idx = 0;
394
395 char c_get_ltime[128], c_get_rtime[128], c_get_ttime[128];
396 char c_put_ltime[128], c_put_rtime[128];
397 int i;
398 for(i = 0; i < nproc; i++){
399 double val;
400 sprintf(key, "PMIX_PERF_get_total_time.%d", i);
401 cum_get_total_time += pmi_get_double(i, key);
402
403 sprintf(key, "PMIX_PERF_get_loc_time.%d", i);
404 val = pmi_get_double(i, key);
405 cum_get_loc_time += val;
406 if( min_get_loc_time > val ){
407 min_get_loc_time = val;
408 min_get_loc_idx = i;
409 }
410 if( max_get_loc_time < val ){
411 max_get_loc_time = val;
412 max_get_loc_idx = i;
413 }
414
415 sprintf(key, "PMIX_PERF_get_rem_time.%d", i);
416 val = pmi_get_double(i, key);
417 cum_get_rem_time += val;
418 if( min_get_rem_time > val ){
419 min_get_rem_time = val;
420 min_get_rem_idx = i;
421 }
422 if( max_get_rem_time < val ){
423 max_get_rem_time = val;
424 max_get_rem_idx = i;
425 }
426
427 sprintf(key, "PMIX_PERF_get_time.%d", i);
428 cum_get_time += pmi_get_double(i, key);
429
430 sprintf(key, "PMIX_PERF_put_loc_time.%d", i);
431 cum_put_loc_time += pmi_get_double(i, key);
432
433 sprintf(key, "PMIX_PERF_put_rem_time.%d", i);
434 cum_put_rem_time += pmi_get_double(i, key);
435
436 sprintf(key, "PMIX_PERF_commit_time.%d", i);
437 cum_commit_time += pmi_get_double(i, key);
438
439 sprintf(key, "PMIX_PERF_fence_time.%d", i);
440 cum_fence_time += pmi_get_double(i, key);
441
442 sprintf(key, "PMIX_PERF_init_time.%d", i);
443 val = pmi_get_double(i, key);
444 cum_init_time += val;
445 if (min_init_time > val) {
446 min_init_time = val;
447 }
448 if (max_init_time < val) {
449 max_init_time = val;
450 }
451
452 sprintf(key, "PMIX_PERF_total_time.%d", i);
453 val = pmi_get_double(i, key);
454 cum_total_time += val;
455 if (min_total_time > val) {
456 min_total_time = val;
457 }
458 if (max_total_time < val) {
459 max_total_time = val;
460 }
461
462 sprintf(key, "PMIX_PERF_mem_pss.%d", i);
463 val = pmi_get_double(i, key);
464 cum_mem_pss += val;
465 if (min_mem_pss > val) {
466 min_mem_pss = val;
467 }
468 if (max_mem_pss < val) {
469 max_mem_pss = val;
470 }
471 }
472
473 if( get_loc_cnt ){
474 sprintf(c_get_ltime,"%lf", cum_get_loc_time / nproc);
475 } else {
476 sprintf(c_get_ltime,"--------");
477 }
478 if( get_rem_cnt ){
479 sprintf(c_get_rtime,"%lf", cum_get_rem_time / nproc);
480 } else {
481 sprintf(c_get_rtime,"--------");
482 }
483
484 if( get_loc_cnt + get_rem_cnt ){
485 sprintf(c_get_ttime,"%lf", cum_get_time / nproc);
486 } else {
487 sprintf(c_get_ttime,"--------");
488 }
489
490 if( put_loc_cnt ){
491 sprintf(c_put_ltime,"%lf", cum_put_loc_time / nproc);
492 cum_put_total_time += cum_put_loc_time;
493 } else {
494 sprintf(c_put_ltime,"--------");
495 }
496 if( put_rem_cnt ){
497 sprintf(c_put_rtime,"%lf", cum_put_rem_time / nproc);
498 cum_put_total_time += cum_put_rem_time;
499 } else {
500 sprintf(c_put_rtime,"--------");
501 }
502
503 fprintf(stderr,"init: %lf; put: %lf; commit: %lf; fence: %lf; get: %lf; total: %lf\n",
504 cum_init_time / nproc,
505 cum_put_total_time / nproc,
506 cum_commit_time / nproc, cum_fence_time / nproc,
507 cum_get_total_time / nproc,
508 cum_total_time / nproc);
509 fprintf(stderr,"init: max %lf min %lf\n", max_init_time, min_init_time);
510 fprintf(stderr,"put: loc %s rem %s\n", c_put_ltime, c_put_rtime);
511 fprintf(stderr,"get: loc %s rem %s all %s\n", c_get_ltime, c_get_rtime, c_get_ttime);
512 fprintf(stderr,"get: min loc %lf rem %lf (loc: %d, rem: %d)\n",
513 min_get_loc_time, min_get_rem_time, min_get_loc_idx, min_get_rem_idx);
514 fprintf(stderr,"get: max loc %lf rem %lf (loc: %d, rem: %d)\n",
515 max_get_loc_time, max_get_rem_time, max_get_loc_idx, max_get_rem_idx);
516 fprintf(stderr,"total: max %lf min %lf\n", max_total_time, min_total_time);
517 fprintf(stderr,"mem: loc %0.2lf avg %0.2lf min %0.2lf max %0.2lf total %0.2lf Kb\n",
518 mem_pss, cum_mem_pss / nproc, min_mem_pss, max_mem_pss, cum_mem_pss);
519 if( have_shmem ) {
520 fprintf(stderr,"shmem: job_info: %0.2lf total %0.2lf Kb\n",
521 (double)shmem_job_info / 1024, (double)shmem_all / 1024);
522 }
523
524
525
526
527
528
529
530
531
532
533
534
535 }
536
537 pmi_fini();
538
539 return 0;
540 }