This source file includes following definitions.
- ompi_mpi_errors_are_fatal_comm_handler
- ompi_mpi_errors_are_fatal_file_handler
- ompi_mpi_errors_are_fatal_win_handler
- ompi_mpi_errors_return_comm_handler
- ompi_mpi_errors_return_file_handler
- ompi_mpi_errors_return_win_handler
- out
- backend_fatal_aggregate
- backend_fatal_no_aggregate
- backend_fatal
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "ompi_config.h"
28 #include <stdlib.h>
29 #include <stdarg.h>
30 #ifdef HAVE_UNISTD_H
31 #include <unistd.h>
32 #endif
33 #ifdef HAVE_SYS_PARAM_H
34 #include <sys/param.h>
35 #endif
36
37 #include "opal/util/show_help.h"
38 #include "ompi/mca/rte/rte.h"
39 #include "ompi/errhandler/errhandler_predefined.h"
40 #include "ompi/errhandler/errcode.h"
41 #include "ompi/communicator/communicator.h"
42 #include "ompi/file/file.h"
43 #include "ompi/win/win.h"
44 #include "opal/util/printf.h"
45 #include "opal/util/output.h"
46
47
48
49
50 static void backend_fatal(char *type, struct ompi_communicator_t *comm,
51 char *name, int *error_code, va_list arglist);
52 static void out(char *str, char *arg);
53
54
55 void ompi_mpi_errors_are_fatal_comm_handler(struct ompi_communicator_t **comm,
56 int *error_code, ...)
57 {
58 char *name;
59 struct ompi_communicator_t *abort_comm;
60 va_list arglist;
61
62 va_start(arglist, error_code);
63
64 if (NULL != comm) {
65 name = (*comm)->c_name;
66 abort_comm = *comm;
67 } else {
68 name = NULL;
69 abort_comm = NULL;
70 }
71 backend_fatal("communicator", abort_comm, name, error_code, arglist);
72 va_end(arglist);
73 }
74
75
76 void ompi_mpi_errors_are_fatal_file_handler(struct ompi_file_t **file,
77 int *error_code, ...)
78 {
79 char *name;
80 struct ompi_communicator_t *abort_comm;
81 va_list arglist;
82
83 va_start(arglist, error_code);
84
85 if (NULL != file) {
86 name = (*file)->f_filename;
87 abort_comm = (*file)->f_comm;
88 } else {
89 name = NULL;
90 abort_comm = NULL;
91 }
92 backend_fatal("file", abort_comm, name, error_code, arglist);
93 va_end(arglist);
94 }
95
96
97 void ompi_mpi_errors_are_fatal_win_handler(struct ompi_win_t **win,
98 int *error_code, ...)
99 {
100 char *name;
101 struct ompi_communicator_t *abort_comm = NULL;
102 va_list arglist;
103
104 va_start(arglist, error_code);
105
106 if (NULL != win) {
107 name = (*win)->w_name;
108 } else {
109 name = NULL;
110 }
111 backend_fatal("win", abort_comm, name, error_code, arglist);
112 va_end(arglist);
113 }
114
115 void ompi_mpi_errors_return_comm_handler(struct ompi_communicator_t **comm,
116 int *error_code, ...)
117 {
118
119
120
121 va_list arglist;
122 va_start(arglist, error_code);
123 va_end(arglist);
124 }
125
126
127 void ompi_mpi_errors_return_file_handler(struct ompi_file_t **file,
128 int *error_code, ...)
129 {
130
131
132
133 va_list arglist;
134 va_start(arglist, error_code);
135 va_end(arglist);
136 }
137
138
139 void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win,
140 int *error_code, ...)
141 {
142
143
144
145 va_list arglist;
146 va_start(arglist, error_code);
147 va_end(arglist);
148 }
149
150
151 static void out(char *str, char *arg)
152 {
153 if (ompi_rte_initialized &&
154 ompi_mpi_state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
155 if (NULL != arg) {
156 opal_output(0, str, arg);
157 } else {
158 opal_output(0, "%s", str);
159 }
160 } else {
161 if (NULL != arg) {
162 fprintf(stderr, str, arg);
163 } else {
164 fprintf(stderr, "%s", str);
165 }
166 }
167 }
168
169
170
171
172
173
174
175
176
177
178 static void backend_fatal_aggregate(char *type,
179 struct ompi_communicator_t *comm,
180 char *name, int *error_code,
181 va_list arglist)
182 {
183 char *arg = NULL, *prefix = NULL, *err_msg = NULL;
184 const char* const unknown_error_code = "Error code: %d (no associated error message)";
185 const char* const unknown_error = "Unknown error";
186 const char* const unknown_prefix = "[?:?]";
187 bool generated = false;
188
189
190
191
192 const char* usable_prefix = unknown_prefix;
193 const char* usable_err_msg = unknown_error;
194
195 arg = va_arg(arglist, char*);
196 va_end(arglist);
197
198 if (opal_asprintf(&prefix, "[%s:%05d]",
199 ompi_process_info.nodename,
200 (int) ompi_process_info.pid) == -1) {
201 prefix = NULL;
202
203 opal_output(0, "%s", "Could not write node and PID to prefix");
204 opal_output(0, "Node: %s", ompi_process_info.nodename);
205 opal_output(0, "PID: %d", (int) ompi_process_info.pid);
206 }
207
208 if (NULL != error_code) {
209 err_msg = ompi_mpi_errnum_get_string(*error_code);
210 if (NULL == err_msg) {
211 if (opal_asprintf(&err_msg, unknown_error_code,
212 *error_code) == -1) {
213 err_msg = NULL;
214 opal_output(0, "%s", "Could not write to err_msg");
215 opal_output(0, unknown_error_code, *error_code);
216 } else {
217 generated = true;
218 }
219 }
220 }
221
222 usable_prefix = (NULL == prefix) ? unknown_prefix : prefix;
223 usable_err_msg = (NULL == err_msg) ? unknown_error : err_msg;
224
225 if (NULL != name) {
226 opal_show_help("help-mpi-errors.txt",
227 "mpi_errors_are_fatal",
228 false,
229 usable_prefix,
230 (NULL == arg) ? "" : "in",
231 (NULL == arg) ? "" : arg,
232 usable_prefix,
233 OMPI_PROC_MY_NAME->jobid,
234 OMPI_PROC_MY_NAME->vpid,
235 usable_prefix,
236 type,
237 name,
238 usable_prefix,
239 usable_err_msg,
240 usable_prefix,
241 type,
242 usable_prefix);
243 } else {
244 opal_show_help("help-mpi-errors.txt",
245 "mpi_errors_are_fatal unknown handle",
246 false,
247 usable_prefix,
248 (NULL == arg) ? "" : "in",
249 (NULL == arg) ? "" : arg,
250 usable_prefix,
251 OMPI_PROC_MY_NAME->jobid,
252 OMPI_PROC_MY_NAME->vpid,
253 usable_prefix,
254 type,
255 usable_prefix,
256 usable_err_msg,
257 usable_prefix,
258 type,
259 usable_prefix);
260 }
261
262 free(prefix);
263 if (generated) {
264 free(err_msg);
265 }
266 }
267
268
269
270
271
272
273
274
275
276
277
278 static void backend_fatal_no_aggregate(char *type,
279 struct ompi_communicator_t *comm,
280 char *name, int *error_code,
281 va_list arglist)
282 {
283 char *arg;
284
285 int32_t state = ompi_mpi_state;
286 assert(state < OMPI_MPI_STATE_INIT_COMPLETED ||
287 state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
288
289 fflush(stdout);
290 fflush(stderr);
291
292 arg = va_arg(arglist, char*);
293
294
295
296 if (state < OMPI_MPI_STATE_INIT_STARTED) {
297 if (NULL != arg) {
298 out("*** The %s() function was called before MPI_INIT was invoked.\n"
299 "*** This is disallowed by the MPI standard.\n", arg);
300 } else {
301 out("*** An MPI function was called before MPI_INIT was invoked.\n"
302 "*** This is disallowed by the MPI standard.\n"
303 "*** Unfortunately, no further information is available on *which* MPI\n"
304 "*** function was invoked, sorry. :-(\n", NULL);
305 }
306 out("*** Your MPI job will now abort.\n", NULL);
307 } else if (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
308 if (NULL != arg) {
309 out("*** The %s() function was called after MPI_FINALIZE was invoked.\n"
310 "*** This is disallowed by the MPI standard.\n", arg);
311 } else {
312 out("*** An MPI function was called after MPI_FINALIZE was invoked.\n"
313 "*** This is disallowed by the MPI standard.\n"
314 "*** Unfortunately, no further information is available on *which* MPI\n"
315 "*** function was invoked, sorry. :-(\n", NULL);
316 }
317 out("*** Your MPI job will now abort.\n", NULL);
318 }
319
320 else {
321 int len;
322 char str[MPI_MAX_PROCESSOR_NAME * 2];
323
324
325
326
327
328
329
330 if (NULL != arg) {
331 out("*** An error occurred in %s\n", arg);
332 } else {
333 out("*** An error occurred\n", NULL);
334 }
335
336 if (NULL != name) {
337
338
339
340 str[0] = '\0';
341 len = sizeof(str) - 1;
342 strncat(str, type, len);
343
344 len -= strlen(type);
345 if (len > 0) {
346 strncat(str, " ", len);
347
348 --len;
349 if (len > 0) {
350 strncat(str, name, len);
351 }
352 }
353 out("*** on %s", str);
354 } else if (NULL == name) {
355 out("*** on a NULL %s\n", type);
356 }
357
358 if (NULL != error_code) {
359 char *tmp = ompi_mpi_errnum_get_string(*error_code);
360 if (NULL != tmp) {
361 out("*** %s\n", tmp);
362 } else {
363 char intbuf[32];
364 snprintf(intbuf, 32, "%d", *error_code);
365 out("*** Error code: %d (no associated error message)\n", intbuf);
366 }
367 }
368
369 out("*** MPI_ERRORS_ARE_FATAL (processes in this %s will now abort,\n", type);
370 out("*** and potentially your MPI job)\n", NULL);
371
372 }
373 va_end(arglist);
374 }
375
376 static void backend_fatal(char *type, struct ompi_communicator_t *comm,
377 char *name, int *error_code,
378 va_list arglist)
379 {
380
381 if (ompi_rte_initialized) {
382 backend_fatal_aggregate(type, comm, name, error_code, arglist);
383 } else {
384 backend_fatal_no_aggregate(type, comm, name, error_code, arglist);
385 }
386
387
388
389
390
391 if (comm == NULL) {
392 comm = &ompi_mpi_comm_self.comm;
393 }
394
395 if (NULL != error_code) {
396 ompi_mpi_abort(comm, *error_code);
397 } else {
398 ompi_mpi_abort(comm, 1);
399 }
400 }