root/ompi/util/timings.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2017-2018 Mellanox Technologies Ltd. All rights reserved.
   3  * Copyright (c) 2017      Intel, Inc. All rights reserved.
   4  * $COPYRIGHT$
   5  *
   6  * Additional copyrights may follow
   7  *
   8  * $HEADER$
   9  */
  10 
  11 #ifndef OMPI_UTIL_TIMING_H
  12 #define OMPI_UTIL_TIMING_H
  13 
  14 #include "opal/util/timings.h"
  15 /* TODO: we need access to MPI_* functions */
  16 
  17 #if (OPAL_ENABLE_TIMING)
  18 
  19 typedef struct {
  20     char desc[OPAL_TIMING_STR_LEN];
  21     double ts;
  22     char *file;
  23     char *prefix;
  24     int imported;
  25 }   ompi_timing_val_t;
  26 
  27 typedef struct {
  28     ompi_timing_val_t *val;
  29     int use;
  30     struct ompi_timing_list_t *next;
  31 } ompi_timing_list_t;
  32 
  33 typedef struct ompi_timing_t {
  34     double ts;
  35     const char *prefix;
  36     int size;
  37     int cnt;
  38     int error;
  39     int enabled;
  40     int import_cnt;
  41     opal_timing_ts_func_t get_ts;
  42     ompi_timing_list_t *timing;
  43     ompi_timing_list_t *cur_timing;
  44 } ompi_timing_t;
  45 
  46 #define OMPI_TIMING_ENABLED \
  47     (getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0)
  48 
  49 #define OMPI_TIMING_INIT(_size)                                                \
  50     ompi_timing_t OMPI_TIMING;                                                 \
  51     OMPI_TIMING.prefix = __func__;                                             \
  52     OMPI_TIMING.size = _size;                                                  \
  53     OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER);     \
  54     OMPI_TIMING.cnt = 0;                                                       \
  55     OMPI_TIMING.error = 0;                                                     \
  56     OMPI_TIMING.ts = OMPI_TIMING.get_ts();                                     \
  57     OMPI_TIMING.enabled = 0;                                                   \
  58     OMPI_TIMING.import_cnt = 0;                                                \
  59     {                                                                          \
  60         char *ptr;                                                             \
  61         ptr = getenv("OMPI_TIMING_ENABLE");                                    \
  62         if (NULL != ptr) {                                                     \
  63             OMPI_TIMING.enabled = atoi(ptr);                                   \
  64         }                                                                      \
  65         if (OMPI_TIMING.enabled) {                                             \
  66             setenv("OPAL_TIMING_ENABLE", "1", 1);                              \
  67             OMPI_TIMING.timing = (ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t));              \
  68             memset(OMPI_TIMING.timing, 0, sizeof(ompi_timing_list_t));         \
  69             OMPI_TIMING.timing->val = (ompi_timing_val_t*)malloc(sizeof(ompi_timing_val_t) * _size);   \
  70             OMPI_TIMING.cur_timing = OMPI_TIMING.timing;                       \
  71         }                                                                      \
  72     }
  73 
  74 #define OMPI_TIMING_ITEM_EXTEND                                                    \
  75     do {                                                                           \
  76         if (OMPI_TIMING.enabled) {                                                 \
  77             OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \
  78             OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next;                    \
  79             memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t));                                 \
  80             OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size);            \
  81         }                                                                          \
  82     } while(0)
  83 
  84 #define OMPI_TIMING_FINALIZE                                                       \
  85     do {                                                                           \
  86         if (OMPI_TIMING.enabled) {                                                 \
  87             ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp;                      \
  88             while ( NULL != t) {                                                   \
  89                 tmp = t;                                                           \
  90                 t = (ompi_timing_list_t*)t->next;                                  \
  91                 free(tmp->val);                                                    \
  92                 free(tmp);                                                         \
  93             }                                                                      \
  94             OMPI_TIMING.timing = NULL;                                             \
  95             OMPI_TIMING.cur_timing = NULL;                                         \
  96             OMPI_TIMING.cnt = 0;                                                   \
  97         }                                                                          \
  98     } while(0)
  99 
 100 #define OMPI_TIMING_NEXT(...)                                                      \
 101     do {                                                                           \
 102         if (!OMPI_TIMING.error && OMPI_TIMING.enabled) {                           \
 103             char *f = strrchr(__FILE__, '/');                                      \
 104             f = (f == NULL) ? strdup(__FILE__) : f+1;                              \
 105             int len = 0;                                                           \
 106             if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){                  \
 107                 OMPI_TIMING_ITEM_EXTEND;                                           \
 108             }                                                                      \
 109             len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc,        \
 110                 OPAL_TIMING_STR_LEN, ##__VA_ARGS__);                               \
 111             if (len >= OPAL_TIMING_STR_LEN) {                                      \
 112                 OMPI_TIMING.error = 1;                                             \
 113             }                                                                      \
 114             OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = strdup(f);     \
 115             OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = strdup(__func__);      \
 116             OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts =        \
 117                 OMPI_TIMING.get_ts() - OMPI_TIMING.ts;                             \
 118             OMPI_TIMING.cnt++;                                                     \
 119             OMPI_TIMING.ts = OMPI_TIMING.get_ts();                                 \
 120         }                                                                          \
 121     } while(0)
 122 
 123 #define OMPI_TIMING_APPEND(filename,func,desc,ts)                                  \
 124     do {                                                                           \
 125         if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){                      \
 126             OMPI_TIMING_ITEM_EXTEND;                                               \
 127         }                                                                          \
 128         int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc,        \
 129             OPAL_TIMING_STR_LEN, "%s", desc);                                      \
 130         if (len >= OPAL_TIMING_STR_LEN) {                                          \
 131             OMPI_TIMING.error = 1;                                                 \
 132         }                                                                          \
 133         OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func;    \
 134         OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename;  \
 135         OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = ts;        \
 136         OMPI_TIMING.cnt++;                                                         \
 137     } while(0)
 138 
 139 #define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func)                              \
 140     do {                                                                           \
 141         if (!OMPI_TIMING.error && OMPI_TIMING.enabled) {                           \
 142             int cnt;                                                               \
 143             int i;                                                                 \
 144             double ts;                                                             \
 145             OMPI_TIMING.import_cnt++;                                              \
 146             OPAL_TIMING_ENV_CNT(func, cnt);                                        \
 147             OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error);        \
 148             for(i = 0; i < cnt; i++){                                              \
 149                 char *desc, *filename;                                             \
 150                 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \
 151                     OMPI_TIMING.import_cnt;                                        \
 152                 OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts);  \
 153                 OMPI_TIMING_APPEND(filename, func, desc, ts);                      \
 154             }                                                                      \
 155         }                                                                          \
 156     } while(0)
 157 
 158 #define OMPI_TIMING_IMPORT_OPAL(func)                                              \
 159         OMPI_TIMING_IMPORT_OPAL_PREFIX("", func);
 160 
 161 #define OMPI_TIMING_OUT                                                           \
 162     do {                                                                          \
 163         if (OMPI_TIMING.enabled) {                                                \
 164             int i, size, rank;                                                    \
 165             MPI_Comm_size(MPI_COMM_WORLD, &size);                                 \
 166             MPI_Comm_rank(MPI_COMM_WORLD, &rank);                                 \
 167             int error = 0;                                                        \
 168             int imported = 0;                                                     \
 169                                                                                   \
 170             MPI_Reduce(&OMPI_TIMING.error, &error, 1,                             \
 171                 MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD);                             \
 172                                                                                   \
 173             if (error) {                                                          \
 174                 if (0 == rank) {                                                  \
 175                     printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \
 176                 }                                                                 \
 177             }                                                                     \
 178             else {                                                                \
 179                 double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt);  \
 180                 double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt);  \
 181                 double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt);  \
 182                 char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt);    \
 183                 char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt);  \
 184                 char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt);    \
 185                 double total_avg = 0, total_min = 0, total_max = 0;               \
 186                                                                                   \
 187                 if( OMPI_TIMING.cnt > 0 ) {                                       \
 188                     OMPI_TIMING.ts = OMPI_TIMING.get_ts();                        \
 189                     ompi_timing_list_t *timing = OMPI_TIMING.timing;              \
 190                     i = 0;                                                        \
 191                     do {                                                          \
 192                         int use;                                                  \
 193                         for (use = 0; use < timing->use; use++) {                 \
 194                             MPI_Reduce(&timing->val[use].ts, avg + i, 1,          \
 195                                 MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD);          \
 196                             MPI_Reduce(&timing->val[use].ts, min + i, 1,          \
 197                                 MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD);          \
 198                             MPI_Reduce(&timing->val[use].ts, max + i, 1,          \
 199                                 MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD);          \
 200                             desc[i] = timing->val[use].desc;                      \
 201                             prefix[i] = timing->val[use].prefix;                  \
 202                             file[i] = timing->val[use].file;                      \
 203                             i++;                                                  \
 204                         }                                                         \
 205                         timing = (ompi_timing_list_t*)timing->next;               \
 206                     } while (timing != NULL);                                     \
 207                                                                                   \
 208                     if( 0 == rank ) {                                             \
 209                         if (OMPI_TIMING.timing->next) {                           \
 210                             printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n"            \
 211                                    "==OMPI_TIMING==          Increase the inited size of timings to avoid extra allocation during runtime.\n");    \
 212                         }                                                         \
 213                                                                                   \
 214                         printf("------------------ %s ------------------\n",      \
 215                             OMPI_TIMING.prefix);                                  \
 216                         imported = OMPI_TIMING.timing->val[0].imported;           \
 217                         for(i=0; i< OMPI_TIMING.cnt; i++){                        \
 218                             bool print_total = 0;                                 \
 219                             imported = OMPI_TIMING.timing->val[i].imported;       \
 220                             avg[i] /= size;                                       \
 221                             printf("%s[%s:%s:%s]: %lf / %lf / %lf\n",             \
 222                                 imported ? " -- " : "",                           \
 223                                 file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \
 224                             if (OMPI_TIMING.timing->val[i].imported) {            \
 225                                 total_avg += avg[i];                              \
 226                                 total_min += min[i];                              \
 227                                 total_max += max[i];                              \
 228                             }                                                     \
 229                             if (i == (OMPI_TIMING.cnt-1)) {                       \
 230                                 print_total = true;                               \
 231                             } else {                                              \
 232                                 print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \
 233                             }                                                     \
 234                             if (print_total && OMPI_TIMING.timing->val[i].imported) {            \
 235                                 printf("%s[%s:%s:%s]: %lf / %lf / %lf\n",         \
 236                                     imported ? " !! " : "",                       \
 237                                     file[i], prefix[i], "total",                  \
 238                                     total_avg, total_min, total_max);             \
 239                                     total_avg = 0; total_min = 0; total_max = 0;  \
 240                             }                                                     \
 241                         }                                                         \
 242                         total_avg = 0; total_min = 0; total_max = 0;              \
 243                         for(i=0; i< OMPI_TIMING.cnt; i++) {                       \
 244                             if (!OMPI_TIMING.timing->val[i].imported) {           \
 245                                 total_avg += avg[i];                              \
 246                                 total_min += min[i];                              \
 247                                 total_max += max[i];                              \
 248                             }                                                     \
 249                         }                                                         \
 250                         printf("[%s:total] %lf / %lf / %lf\n",                    \
 251                             OMPI_TIMING.prefix,                                   \
 252                             total_avg, total_min, total_max);                     \
 253                         printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix,       \
 254                             OMPI_TIMING.get_ts() - OMPI_TIMING.ts);               \
 255                     }                                                             \
 256                 }                                                                 \
 257                 free(avg);                                                        \
 258                 free(min);                                                        \
 259                 free(max);                                                        \
 260                 free(desc);                                                       \
 261                 free(prefix);                                                     \
 262                 free(file);                                                       \
 263             }                                                                     \
 264         }                                                                         \
 265     } while(0)
 266 
 267 #else
 268 #define OMPI_TIMING_INIT(size)
 269 
 270 #define OMPI_TIMING_NEXT(...)
 271 
 272 #define OMPI_TIMING_APPEND(desc,ts)
 273 
 274 #define OMPI_TIMING_OUT
 275 
 276 #define OMPI_TIMING_IMPORT_OPAL(func)
 277 
 278 #define OMPI_TIMING_FINALIZE
 279 
 280 #define OMPI_TIMING_ENABLED 0
 281 
 282 #endif
 283 
 284 #endif

/* [<][>][^][v][top][bottom][index][help] */