root/opal/mca/pmix/pmix4x/pmix/contrib/perf_tools/pmi_intra_perf.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. usage
  2. parse_options
  3. fill_remote_ranks
  4. store_double
  5. get_mem_usage
  6. main

   1 /*
   2  * Copyright (c) 2015      Mellanox Technologies, Inc.  All rights reserved.
   3  * Copyright (c) 2016      Intel, Inc.  All rights reserved.
   4  * $COPYRIGHT$
   5  *
   6  * Additional copyrights may follow
   7  *
   8  * $HEADER$
   9  *
  10  */
  11 
  12 #include <stdio.h>
  13 #include <stdlib.h>
  14 #include <unistd.h>
  15 #include <time.h>
  16 #include <getopt.h>
  17 #include <limits.h>
  18 #include <string.h>
  19 
  20 #include "pmi.h"
  21 
  22 #include <time.h>
  23 #define GET_TS ({ \
  24     struct timespec ts;                     \
  25     double ret;                             \
  26     clock_gettime(CLOCK_MONOTONIC, &ts);    \
  27     ret = ts.tv_sec + 1E-9 * ts.tv_nsec;    \
  28     ret;                                    \
  29 })
  30 
  31 
  32 int key_size = 100, key_count = 10, rank_shift;
  33 int direct_modex = 0, debug_on = 0;
  34 
  35 static void usage(const char *argv0)
  36 {
  37     printf("Usage:\n");
  38     printf("  %s [options]              start the benchmark\n", argv0);
  39     printf("\n");
  40     printf("  -s, --key-size=<size>     size of the key's submitted\n");
  41     printf("  -c, --key-count=<size>    number of keys submitted to local and remote parts\n");
  42     printf("  -d, --direct-modex        use direct modex if available\n");
  43     printf("  --debug                   force all processes to print out the timings themself\n");
  44 }
  45 
  46 
  47 void parse_options(int argc, char **argv)
  48 {
  49     extern char *optarg;
  50     extern int optind;
  51     struct option long_options[] = {
  52         { "help",           0, NULL, 'h' },
  53         /* IB options */
  54         { "key-size",       1, NULL, 's' },
  55         { "key-count",      1, NULL, 'c' },
  56         { "direct-modex",   0, NULL, 'd' },
  57         { "debug",          0, NULL, '0' },
  58         { 0 }
  59     };
  60 
  61     while (1) {
  62         int c;
  63         c = getopt_long(argc, argv, "hs:c:d0", long_options, NULL);
  64 
  65         if (c == -1)
  66             break;
  67         switch (c) {
  68         case 's':
  69             key_size = atoi(optarg);
  70             /* Make sure that we transform it to int as
  71              * this is what will be the key value type
  72              */
  73             key_size = key_size / 4 + !!(key_size % 4);
  74             break;
  75         case 'c':
  76             key_count = atoi(optarg);
  77             break;
  78         case 'd':
  79             direct_modex = 1;
  80             break;
  81         case '0':
  82             debug_on = 1;
  83             break;
  84         case 'h':
  85         default:
  86             usage(argv[0]);
  87             exit(0);
  88         }
  89     }
  90 
  91     rank_shift = 10;
  92     while( rank_shift <= key_count ){
  93         rank_shift *= 10;
  94     }
  95 }
  96 
  97 void fill_remote_ranks(int *local_ranks, int local_cnt, int *remote_ranks, int size)
  98 {
  99     int i, k;
 100     for(i = 0, k = 0; i < size && k < (size - local_cnt); i++ ){
 101         int j, flag = 1;
 102         for(j=0; j < local_cnt; j++){
 103             if( i == local_ranks[j] ){
 104                 flag = 0;
 105                 break;
 106             }
 107         }
 108         if( flag ){
 109             remote_ranks[k] = i;
 110             k++;
 111         }
 112     }
 113 }
 114 
 115 int store_double(char *name, double val)
 116 {
 117     char buf[128];
 118     sprintf(buf,"%lf",val);
 119 
 120 }
 121 
 122 int get_mem_usage(double *_pss, double *_rss) {
 123     char data[PATH_MAX];
 124     FILE *smaps;
 125     double pss = 0.0, rss = 0.0;
 126     char *line = NULL;
 127     size_t size = 0;
 128     pid_t pid = getpid();
 129 
 130     *_pss = 0.0;
 131     *_rss = 0.0;
 132 
 133     memset(data, 0, sizeof(data));
 134     snprintf(data, sizeof(data), "/proc/%d/smaps", pid);
 135 
 136     if (NULL == (smaps = fopen(data, "r"))) {
 137         return -1;
 138     }
 139 
 140     while ((size = getline(&line, &size, smaps)) != -1) {
 141         if (0 == strncmp(line, "Pss", strlen("Pss"))) {
 142             sscanf(line, "Pss: %lf", &pss);
 143             *_pss += pss;
 144         }
 145         if (0 == strncmp(line, "Rss", strlen("Pss"))) {
 146             sscanf(line, "Rss: %lf", &rss);
 147             *_rss += pss;
 148         }
 149     }
 150     free(line);
 151     fclose(smaps);
 152 
 153     return 0;
 154 }
 155 
 156 int main(int argc, char **argv)
 157 {
 158     int rc;
 159     char *key_name;
 160     int *key_val;
 161     int rank, nproc;
 162     int cnt;
 163     int *local_ranks, local_cnt;
 164     int *remote_ranks, remote_cnt;
 165     double start, total_start, get_loc_time = 0, get_rem_time = 0, put_loc_time = 0,
 166            put_rem_time = 0, commit_time = 0, fence_time = 0, init_time = 0, total_time = 0;
 167     int get_loc_cnt = 0, get_rem_cnt = 0, put_loc_cnt = 0, put_rem_cnt = 0;
 168     double mem_pss = 0.0, mem_rss = 0.0;
 169     char have_shmem;
 170     size_t shmem_job_info, shmem_all;
 171 
 172     parse_options(argc, argv);
 173 
 174     total_start = GET_TS;
 175     start = GET_TS;
 176     pmi_init(&rank, &nproc);
 177     init_time += GET_TS - start;
 178 
 179     pmi_get_local_ranks(&local_ranks, &local_cnt);
 180     remote_cnt = nproc - local_cnt;
 181     if( remote_cnt ){
 182         remote_ranks = calloc(remote_cnt, sizeof(int));
 183         fill_remote_ranks(local_ranks, local_cnt, remote_ranks, nproc);
 184     }
 185 
 186     pmi_get_shmem_size(&have_shmem, &shmem_job_info);
 187 
 188     /*
 189      * Make sure that no other rank started publishing keys in the dstore
 190      * before we finished with shmem size screening
 191      */
 192     pmi_fence( 0 );
 193 
 194     if( 0 == rank && debug_on ){
 195         int i;
 196         fprintf(stderr,"%d: local ranks: ", rank);
 197         for(i = 0; i < local_cnt; i++){
 198             fprintf(stderr,"%d ", local_ranks[i]);
 199         }
 200         fprintf(stderr,"\n");
 201         fflush(stderr);
 202     }
 203 
 204     key_val = calloc(key_size, sizeof(int));
 205     for (cnt=0; cnt < key_count; cnt++) {
 206         int i;
 207         if( local_cnt > 0 ){
 208             (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt);
 209             for(i=0; i < key_size; i++){
 210                 key_val[i] = rank * rank_shift + cnt;
 211             }
 212             put_loc_cnt++;
 213             start = GET_TS;
 214             pmi_put_key_loc(key_name, key_val, key_size);
 215             put_loc_time += GET_TS - start;
 216             free(key_name);
 217         }
 218         if( remote_cnt > 0 ){
 219             (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt);
 220             for(i=0; i < key_size; i++){
 221                 key_val[i] = rank * rank_shift + cnt;
 222             }
 223             put_rem_cnt++;
 224             start = GET_TS;
 225             pmi_put_key_rem(key_name, key_val, key_size);
 226             put_rem_time += GET_TS - start;
 227             free(key_name);
 228         }
 229     }
 230     free(key_val);
 231 
 232     start = GET_TS;
 233     pmi_commit();
 234     commit_time += GET_TS - start;
 235 
 236     start = GET_TS;
 237     pmi_fence( !direct_modex );
 238     fence_time += GET_TS - start;
 239 
 240 
 241 
 242     for (cnt=0; cnt < key_count; cnt++) {
 243         int i;
 244 
 245         for(i = 0; i < remote_cnt; i++){
 246             int rank = remote_ranks[i], j;
 247             int *key_val, key_size_new;
 248             double start;
 249             (void)asprintf(&key_name, "KEY-%d-remote-%d", rank, cnt);
 250 
 251             start = GET_TS;
 252             pmi_get_key_rem(rank, key_name, &key_val, &key_size_new);
 253             get_rem_time += GET_TS - start;
 254             get_rem_cnt++;
 255 
 256             if( key_size != key_size_new ){
 257                 fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n",
 258                         rank, key_name, key_size, key_size_new);
 259                 abort();
 260             }
 261 
 262             for(j=0; j < key_size; j++){
 263                 if( key_val[j] != rank * rank_shift + cnt ){
 264                     fprintf(stderr, "%d: error in key %s value (byte %d)\n",
 265                             rank, key_name, j);
 266                     abort();
 267                 }
 268             }
 269             free(key_name);
 270             free(key_val);
 271         }
 272 
 273          // check the returned data
 274         for(i = 0; i < local_cnt; i++){
 275             int rank = local_ranks[i], j;
 276             int *key_val, key_size_new;
 277             double start;
 278             (void)asprintf(&key_name, "KEY-%d-local-%d", rank, cnt);
 279 
 280             start = GET_TS;
 281             pmi_get_key_loc(rank, key_name, &key_val, &key_size_new);
 282             get_loc_time += GET_TS - start;
 283             get_loc_cnt++;
 284 
 285             if( key_size != key_size_new ){
 286                 fprintf(stderr,"%d: error in key %s sizes: %d vs %d\n",
 287                         rank, key_name, key_size, key_size_new);
 288                 abort();
 289             }
 290 
 291             for(j=0; j < key_size; j++){
 292                 if( key_val[j] != rank * rank_shift + cnt ){
 293                     fprintf(stderr, "%d: error in key %s value (byte %d)",
 294                             rank, key_name, j);
 295                     abort();
 296                 }
 297             }
 298             free(key_name);
 299             free(key_val);
 300         }
 301     }
 302 
 303     total_time = GET_TS - total_start;
 304 
 305     if (0 != get_mem_usage(&mem_pss, &mem_rss)) {
 306         fprintf(stderr, "Rank %d: error get memory usage", rank);
 307         abort();
 308     }
 309 
 310     if( debug_on ){
 311         fprintf(stderr,"%d: get: total %lf avg loc %lf rem %lf all %lf ; put: %lf %lf commit: %lf fence %lf\n",
 312                 rank, (get_loc_time + get_rem_time),
 313                 get_loc_time/get_loc_cnt, get_rem_time/get_rem_cnt,
 314                 (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt),
 315                 put_loc_time/put_loc_cnt, put_rem_time/put_rem_cnt,
 316                 commit_time, fence_time);
 317     }
 318 
 319     pmi_get_shmem_size(&have_shmem, &shmem_all);
 320     /*
 321      * The barrier ensures that all procs finished key fetching
 322      * we had issues with dstor/lockless case evaluation
 323      */
 324     pmi_fence( 0 );
 325 
 326     /* Out of the perf path - send our results to rank 0 using same PMI */
 327     char key[128];
 328     sprintf(key, "PMIX_PERF_get_total_time.%d", rank);
 329     pmi_put_double(key, get_rem_time + get_loc_time);
 330 
 331     sprintf(key, "PMIX_PERF_get_loc_time.%d", rank);
 332     pmi_put_double(key, get_loc_cnt ? get_loc_time/get_loc_cnt : 0 );
 333 
 334     sprintf(key, "PMIX_PERF_get_rem_time.%d", rank);
 335     pmi_put_double(key, get_rem_cnt ? get_rem_time/get_rem_cnt : 0);
 336 
 337     sprintf(key, "PMIX_PERF_get_time.%d", rank);
 338     pmi_put_double(key, (get_loc_time + get_rem_time)/(get_loc_cnt + get_rem_cnt) );
 339 
 340     sprintf(key, "PMIX_PERF_put_loc_time.%d", rank);
 341     pmi_put_double(key, put_loc_cnt ? put_loc_time / put_loc_cnt : 0);
 342 
 343     sprintf(key, "PMIX_PERF_put_rem_time.%d", rank);
 344     pmi_put_double(key, put_rem_cnt ? put_rem_time / put_rem_cnt : 0);
 345 
 346     sprintf(key, "PMIX_PERF_commit_time.%d", rank);
 347     pmi_put_double(key, commit_time);
 348 
 349     sprintf(key, "PMIX_PERF_fence_time.%d", rank);
 350     pmi_put_double(key, fence_time);
 351 
 352     sprintf(key, "PMIX_PERF_init_time.%d", rank);
 353     pmi_put_double(key, init_time);
 354 
 355     sprintf(key, "PMIX_PERF_total_time.%d", rank);
 356     pmi_put_double(key, total_time);
 357 
 358     sprintf(key, "PMIX_PERF_mem_pss.%d", rank);
 359     pmi_put_double(key, mem_pss);
 360 
 361     sprintf(key, "PMIX_PERF_mem_rss.%d", rank);
 362     pmi_put_double(key, mem_rss);
 363 
 364     pmi_commit();
 365     pmi_fence( 1 );
 366 
 367     if( rank == 0 ){
 368         double  cum_get_total_time = 0,
 369                 cum_get_loc_time = 0,
 370                 cum_get_rem_time = 0,
 371                 cum_get_time = 0,
 372                 cum_put_total_time = 0,
 373                 cum_put_loc_time = 0,
 374                 cum_put_rem_time = 0,
 375                 cum_commit_time = 0,
 376                 cum_fence_time = 0,
 377                 cum_init_time = 0,
 378                 cum_total_time = 0,
 379                 cum_mem_pss = 0.0;
 380 
 381         double  min_get_loc_time = get_loc_time / get_loc_cnt,
 382                 max_get_loc_time = get_loc_time / get_loc_cnt,
 383                 min_get_rem_time = get_rem_time / get_rem_cnt,
 384                 max_get_rem_time = get_rem_time / get_rem_cnt,
 385                 min_init_time = init_time,
 386                 max_init_time = init_time,
 387                 min_total_time = total_time,
 388                 max_total_time = total_time,
 389                 min_mem_pss = mem_pss,
 390                 max_mem_pss = 0.0;
 391 
 392         int min_get_loc_idx = 0, max_get_loc_idx = 0;
 393         int min_get_rem_idx = 0, max_get_rem_idx = 0;
 394 
 395         char c_get_ltime[128], c_get_rtime[128], c_get_ttime[128];
 396         char c_put_ltime[128], c_put_rtime[128];
 397         int i;
 398         for(i = 0; i < nproc; i++){
 399             double val;
 400             sprintf(key, "PMIX_PERF_get_total_time.%d", i);
 401             cum_get_total_time += pmi_get_double(i, key);
 402 
 403             sprintf(key, "PMIX_PERF_get_loc_time.%d", i);
 404             val = pmi_get_double(i, key);
 405             cum_get_loc_time += val;
 406             if( min_get_loc_time > val ){
 407                 min_get_loc_time = val;
 408                 min_get_loc_idx = i;
 409             }
 410             if( max_get_loc_time < val ){
 411                 max_get_loc_time = val;
 412                 max_get_loc_idx = i;
 413             }
 414 
 415             sprintf(key, "PMIX_PERF_get_rem_time.%d", i);
 416             val = pmi_get_double(i, key);
 417             cum_get_rem_time += val;
 418             if( min_get_rem_time > val ){
 419                 min_get_rem_time = val;
 420                 min_get_rem_idx = i;
 421             }
 422             if( max_get_rem_time < val ){
 423                 max_get_rem_time = val;
 424                 max_get_rem_idx = i;
 425             }
 426 
 427             sprintf(key, "PMIX_PERF_get_time.%d", i);
 428             cum_get_time += pmi_get_double(i, key);
 429 
 430             sprintf(key, "PMIX_PERF_put_loc_time.%d", i);
 431             cum_put_loc_time += pmi_get_double(i, key);
 432 
 433             sprintf(key, "PMIX_PERF_put_rem_time.%d", i);
 434             cum_put_rem_time += pmi_get_double(i, key);
 435 
 436             sprintf(key, "PMIX_PERF_commit_time.%d", i);
 437             cum_commit_time += pmi_get_double(i, key);
 438 
 439             sprintf(key, "PMIX_PERF_fence_time.%d", i);
 440             cum_fence_time += pmi_get_double(i, key);
 441 
 442             sprintf(key, "PMIX_PERF_init_time.%d", i);
 443             val = pmi_get_double(i, key);
 444             cum_init_time += val;
 445             if (min_init_time > val) {
 446                 min_init_time = val;
 447             }
 448             if (max_init_time < val) {
 449                 max_init_time = val;
 450             }
 451 
 452             sprintf(key, "PMIX_PERF_total_time.%d", i);
 453             val = pmi_get_double(i, key);
 454             cum_total_time += val;
 455             if (min_total_time > val) {
 456                 min_total_time = val;
 457             }
 458             if (max_total_time < val) {
 459                 max_total_time = val;
 460             }
 461 
 462             sprintf(key, "PMIX_PERF_mem_pss.%d", i);
 463             val = pmi_get_double(i, key);
 464             cum_mem_pss += val;
 465             if (min_mem_pss > val) {
 466                 min_mem_pss = val;
 467             }
 468             if (max_mem_pss < val) {
 469                 max_mem_pss = val;
 470             }
 471         }
 472 
 473         if( get_loc_cnt ){
 474             sprintf(c_get_ltime,"%lf", cum_get_loc_time / nproc);
 475         } else {
 476             sprintf(c_get_ltime,"--------");
 477         }
 478         if( get_rem_cnt ){
 479             sprintf(c_get_rtime,"%lf", cum_get_rem_time / nproc);
 480         } else {
 481             sprintf(c_get_rtime,"--------");
 482         }
 483 
 484         if( get_loc_cnt + get_rem_cnt ){
 485             sprintf(c_get_ttime,"%lf", cum_get_time / nproc);
 486         } else {
 487             sprintf(c_get_ttime,"--------");
 488         }
 489 
 490         if( put_loc_cnt ){
 491             sprintf(c_put_ltime,"%lf", cum_put_loc_time / nproc);
 492             cum_put_total_time += cum_put_loc_time;
 493         } else {
 494             sprintf(c_put_ltime,"--------");
 495         }
 496         if( put_rem_cnt ){
 497             sprintf(c_put_rtime,"%lf", cum_put_rem_time / nproc);
 498             cum_put_total_time += cum_put_rem_time;
 499         } else {
 500             sprintf(c_put_rtime,"--------");
 501         }
 502 
 503         fprintf(stderr,"init: %lf; put: %lf; commit: %lf; fence: %lf; get: %lf; total: %lf\n",
 504                 cum_init_time / nproc,
 505                 cum_put_total_time / nproc,
 506                 cum_commit_time / nproc, cum_fence_time / nproc,
 507                 cum_get_total_time / nproc,
 508                 cum_total_time / nproc);
 509         fprintf(stderr,"init:          max %lf min %lf\n",  max_init_time, min_init_time);
 510         fprintf(stderr,"put:           loc %s rem %s\n", c_put_ltime, c_put_rtime);
 511         fprintf(stderr,"get:           loc %s rem %s all %s\n", c_get_ltime, c_get_rtime, c_get_ttime);
 512         fprintf(stderr,"get:           min loc %lf rem %lf (loc: %d, rem: %d)\n",
 513                 min_get_loc_time, min_get_rem_time, min_get_loc_idx, min_get_rem_idx);
 514         fprintf(stderr,"get:           max loc %lf rem %lf (loc: %d, rem: %d)\n",
 515                 max_get_loc_time, max_get_rem_time, max_get_loc_idx, max_get_rem_idx);
 516         fprintf(stderr,"total:         max %lf min %lf\n", max_total_time, min_total_time);
 517         fprintf(stderr,"mem:           loc %0.2lf avg %0.2lf min %0.2lf max %0.2lf total %0.2lf Kb\n",
 518                 mem_pss, cum_mem_pss / nproc, min_mem_pss, max_mem_pss, cum_mem_pss);
 519         if( have_shmem ) {
 520             fprintf(stderr,"shmem:         job_info: %0.2lf total %0.2lf Kb\n",
 521                     (double)shmem_job_info / 1024, (double)shmem_all / 1024);
 522         }
 523 
 524         /* debug printout *//*
 525         for(i = 0; i < nproc; i++){
 526             double val;
 527             printf("%d: ", i);
 528             sprintf(key, "PMIX_PERF_get_loc_time.%d", i);
 529             printf("local = %lf ", pmi_get_double(i, key));
 530 
 531             sprintf(key, "PMIX_PERF_get_rem_time.%d", i);
 532             printf("remote = %lf\n", pmi_get_double(i, key));
 533         }
 534 */
 535     }
 536 
 537     pmi_fini();
 538 
 539     return 0;
 540 }

/* [<][>][^][v][top][bottom][index][help] */