1
2
3
4
5
6
7
8
9
10
11 #ifndef OMPI_UTIL_TIMING_H
12 #define OMPI_UTIL_TIMING_H
13
14 #include "opal/util/timings.h"
15
16
17 #if (OPAL_ENABLE_TIMING)
18
19 typedef struct {
20 char desc[OPAL_TIMING_STR_LEN];
21 double ts;
22 char *file;
23 char *prefix;
24 int imported;
25 } ompi_timing_val_t;
26
27 typedef struct {
28 ompi_timing_val_t *val;
29 int use;
30 struct ompi_timing_list_t *next;
31 } ompi_timing_list_t;
32
33 typedef struct ompi_timing_t {
34 double ts;
35 const char *prefix;
36 int size;
37 int cnt;
38 int error;
39 int enabled;
40 int import_cnt;
41 opal_timing_ts_func_t get_ts;
42 ompi_timing_list_t *timing;
43 ompi_timing_list_t *cur_timing;
44 } ompi_timing_t;
45
46 #define OMPI_TIMING_ENABLED \
47 (getenv("OMPI_TIMING_ENABLE") ? atoi(getenv("OMPI_TIMING_ENABLE")) : 0)
48
49 #define OMPI_TIMING_INIT(_size) \
50 ompi_timing_t OMPI_TIMING; \
51 OMPI_TIMING.prefix = __func__; \
52 OMPI_TIMING.size = _size; \
53 OMPI_TIMING.get_ts = opal_timing_ts_func(OPAL_TIMING_AUTOMATIC_TIMER); \
54 OMPI_TIMING.cnt = 0; \
55 OMPI_TIMING.error = 0; \
56 OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
57 OMPI_TIMING.enabled = 0; \
58 OMPI_TIMING.import_cnt = 0; \
59 { \
60 char *ptr; \
61 ptr = getenv("OMPI_TIMING_ENABLE"); \
62 if (NULL != ptr) { \
63 OMPI_TIMING.enabled = atoi(ptr); \
64 } \
65 if (OMPI_TIMING.enabled) { \
66 setenv("OPAL_TIMING_ENABLE", "1", 1); \
67 OMPI_TIMING.timing = (ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \
68 memset(OMPI_TIMING.timing, 0, sizeof(ompi_timing_list_t)); \
69 OMPI_TIMING.timing->val = (ompi_timing_val_t*)malloc(sizeof(ompi_timing_val_t) * _size); \
70 OMPI_TIMING.cur_timing = OMPI_TIMING.timing; \
71 } \
72 }
73
74 #define OMPI_TIMING_ITEM_EXTEND \
75 do { \
76 if (OMPI_TIMING.enabled) { \
77 OMPI_TIMING.cur_timing->next = (struct ompi_timing_list_t*)malloc(sizeof(ompi_timing_list_t)); \
78 OMPI_TIMING.cur_timing = (ompi_timing_list_t*)OMPI_TIMING.cur_timing->next; \
79 memset(OMPI_TIMING.cur_timing, 0, sizeof(ompi_timing_list_t)); \
80 OMPI_TIMING.cur_timing->val = malloc(sizeof(ompi_timing_val_t) * OMPI_TIMING.size); \
81 } \
82 } while(0)
83
84 #define OMPI_TIMING_FINALIZE \
85 do { \
86 if (OMPI_TIMING.enabled) { \
87 ompi_timing_list_t *t = OMPI_TIMING.timing, *tmp; \
88 while ( NULL != t) { \
89 tmp = t; \
90 t = (ompi_timing_list_t*)t->next; \
91 free(tmp->val); \
92 free(tmp); \
93 } \
94 OMPI_TIMING.timing = NULL; \
95 OMPI_TIMING.cur_timing = NULL; \
96 OMPI_TIMING.cnt = 0; \
97 } \
98 } while(0)
99
100 #define OMPI_TIMING_NEXT(...) \
101 do { \
102 if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
103 char *f = strrchr(__FILE__, '/'); \
104 f = (f == NULL) ? strdup(__FILE__) : f+1; \
105 int len = 0; \
106 if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
107 OMPI_TIMING_ITEM_EXTEND; \
108 } \
109 len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \
110 OPAL_TIMING_STR_LEN, ##__VA_ARGS__); \
111 if (len >= OPAL_TIMING_STR_LEN) { \
112 OMPI_TIMING.error = 1; \
113 } \
114 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = strdup(f); \
115 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = strdup(__func__); \
116 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = \
117 OMPI_TIMING.get_ts() - OMPI_TIMING.ts; \
118 OMPI_TIMING.cnt++; \
119 OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
120 } \
121 } while(0)
122
123 #define OMPI_TIMING_APPEND(filename,func,desc,ts) \
124 do { \
125 if (OMPI_TIMING.cur_timing->use >= OMPI_TIMING.size){ \
126 OMPI_TIMING_ITEM_EXTEND; \
127 } \
128 int len = snprintf(OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].desc, \
129 OPAL_TIMING_STR_LEN, "%s", desc); \
130 if (len >= OPAL_TIMING_STR_LEN) { \
131 OMPI_TIMING.error = 1; \
132 } \
133 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].prefix = func; \
134 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].file = filename; \
135 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use++].ts = ts; \
136 OMPI_TIMING.cnt++; \
137 } while(0)
138
139 #define OMPI_TIMING_IMPORT_OPAL_PREFIX(_prefix, func) \
140 do { \
141 if (!OMPI_TIMING.error && OMPI_TIMING.enabled) { \
142 int cnt; \
143 int i; \
144 double ts; \
145 OMPI_TIMING.import_cnt++; \
146 OPAL_TIMING_ENV_CNT(func, cnt); \
147 OPAL_TIMING_ENV_ERROR_PREFIX(_prefix, func, OMPI_TIMING.error); \
148 for(i = 0; i < cnt; i++){ \
149 char *desc, *filename; \
150 OMPI_TIMING.cur_timing->val[OMPI_TIMING.cur_timing->use].imported= \
151 OMPI_TIMING.import_cnt; \
152 OPAL_TIMING_ENV_GETDESC_PREFIX(_prefix, &filename, func, i, &desc, ts); \
153 OMPI_TIMING_APPEND(filename, func, desc, ts); \
154 } \
155 } \
156 } while(0)
157
158 #define OMPI_TIMING_IMPORT_OPAL(func) \
159 OMPI_TIMING_IMPORT_OPAL_PREFIX("", func);
160
161 #define OMPI_TIMING_OUT \
162 do { \
163 if (OMPI_TIMING.enabled) { \
164 int i, size, rank; \
165 MPI_Comm_size(MPI_COMM_WORLD, &size); \
166 MPI_Comm_rank(MPI_COMM_WORLD, &rank); \
167 int error = 0; \
168 int imported = 0; \
169 \
170 MPI_Reduce(&OMPI_TIMING.error, &error, 1, \
171 MPI_INT, MPI_SUM, 0, MPI_COMM_WORLD); \
172 \
173 if (error) { \
174 if (0 == rank) { \
175 printf("==OMPI_TIMING== error: something went wrong, timings doesn't work\n"); \
176 } \
177 } \
178 else { \
179 double *avg = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \
180 double *min = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \
181 double *max = (double*)malloc(sizeof(double) * OMPI_TIMING.cnt); \
182 char **desc = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
183 char **prefix = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
184 char **file = (char**)malloc(sizeof(char*) * OMPI_TIMING.cnt); \
185 double total_avg = 0, total_min = 0, total_max = 0; \
186 \
187 if( OMPI_TIMING.cnt > 0 ) { \
188 OMPI_TIMING.ts = OMPI_TIMING.get_ts(); \
189 ompi_timing_list_t *timing = OMPI_TIMING.timing; \
190 i = 0; \
191 do { \
192 int use; \
193 for (use = 0; use < timing->use; use++) { \
194 MPI_Reduce(&timing->val[use].ts, avg + i, 1, \
195 MPI_DOUBLE, MPI_SUM, 0, MPI_COMM_WORLD); \
196 MPI_Reduce(&timing->val[use].ts, min + i, 1, \
197 MPI_DOUBLE, MPI_MIN, 0, MPI_COMM_WORLD); \
198 MPI_Reduce(&timing->val[use].ts, max + i, 1, \
199 MPI_DOUBLE, MPI_MAX, 0, MPI_COMM_WORLD); \
200 desc[i] = timing->val[use].desc; \
201 prefix[i] = timing->val[use].prefix; \
202 file[i] = timing->val[use].file; \
203 i++; \
204 } \
205 timing = (ompi_timing_list_t*)timing->next; \
206 } while (timing != NULL); \
207 \
208 if( 0 == rank ) { \
209 if (OMPI_TIMING.timing->next) { \
210 printf("==OMPI_TIMING== warning: added the extra timings allocation that might misrepresent the results.\n" \
211 "==OMPI_TIMING== Increase the inited size of timings to avoid extra allocation during runtime.\n"); \
212 } \
213 \
214 printf("------------------ %s ------------------\n", \
215 OMPI_TIMING.prefix); \
216 imported = OMPI_TIMING.timing->val[0].imported; \
217 for(i=0; i< OMPI_TIMING.cnt; i++){ \
218 bool print_total = 0; \
219 imported = OMPI_TIMING.timing->val[i].imported; \
220 avg[i] /= size; \
221 printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
222 imported ? " -- " : "", \
223 file[i], prefix[i], desc[i], avg[i], min[i], max[i]); \
224 if (OMPI_TIMING.timing->val[i].imported) { \
225 total_avg += avg[i]; \
226 total_min += min[i]; \
227 total_max += max[i]; \
228 } \
229 if (i == (OMPI_TIMING.cnt-1)) { \
230 print_total = true; \
231 } else { \
232 print_total = imported != OMPI_TIMING.timing->val[i+1].imported; \
233 } \
234 if (print_total && OMPI_TIMING.timing->val[i].imported) { \
235 printf("%s[%s:%s:%s]: %lf / %lf / %lf\n", \
236 imported ? " !! " : "", \
237 file[i], prefix[i], "total", \
238 total_avg, total_min, total_max); \
239 total_avg = 0; total_min = 0; total_max = 0; \
240 } \
241 } \
242 total_avg = 0; total_min = 0; total_max = 0; \
243 for(i=0; i< OMPI_TIMING.cnt; i++) { \
244 if (!OMPI_TIMING.timing->val[i].imported) { \
245 total_avg += avg[i]; \
246 total_min += min[i]; \
247 total_max += max[i]; \
248 } \
249 } \
250 printf("[%s:total] %lf / %lf / %lf\n", \
251 OMPI_TIMING.prefix, \
252 total_avg, total_min, total_max); \
253 printf("[%s:overhead]: %lf \n", OMPI_TIMING.prefix, \
254 OMPI_TIMING.get_ts() - OMPI_TIMING.ts); \
255 } \
256 } \
257 free(avg); \
258 free(min); \
259 free(max); \
260 free(desc); \
261 free(prefix); \
262 free(file); \
263 } \
264 } \
265 } while(0)
266
267 #else
268 #define OMPI_TIMING_INIT(size)
269
270 #define OMPI_TIMING_NEXT(...)
271
272 #define OMPI_TIMING_APPEND(desc,ts)
273
274 #define OMPI_TIMING_OUT
275
276 #define OMPI_TIMING_IMPORT_OPAL(func)
277
278 #define OMPI_TIMING_FINALIZE
279
280 #define OMPI_TIMING_ENABLED 0
281
282 #endif
283
284 #endif