This source file includes following definitions.
- ompi_mpi_errors_are_fatal_comm_handler
- ompi_mpi_errors_are_fatal_file_handler
- ompi_mpi_errors_are_fatal_win_handler
- ompi_mpi_errors_return_comm_handler
- ompi_mpi_errors_return_file_handler
- ompi_mpi_errors_return_win_handler
- out
- backend_fatal_aggregate
- backend_fatal_no_aggregate
- backend_fatal
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 #include "ompi_config.h"
  28 #include <stdlib.h>
  29 #include <stdarg.h>
  30 #ifdef HAVE_UNISTD_H
  31 #include <unistd.h>
  32 #endif
  33 #ifdef HAVE_SYS_PARAM_H
  34 #include <sys/param.h>
  35 #endif
  36 
  37 #include "opal/util/show_help.h"
  38 #include "ompi/mca/rte/rte.h"
  39 #include "ompi/errhandler/errhandler_predefined.h"
  40 #include "ompi/errhandler/errcode.h"
  41 #include "ompi/communicator/communicator.h"
  42 #include "ompi/file/file.h"
  43 #include "ompi/win/win.h"
  44 #include "opal/util/printf.h"
  45 #include "opal/util/output.h"
  46 
  47 
  48 
  49 
  50 static void backend_fatal(char *type, struct ompi_communicator_t *comm,
  51                           char *name, int *error_code, va_list arglist);
  52 static void out(char *str, char *arg);
  53 
  54 
  55 void ompi_mpi_errors_are_fatal_comm_handler(struct ompi_communicator_t **comm,
  56                                             int *error_code, ...)
  57 {
  58   char *name;
  59   struct ompi_communicator_t *abort_comm;
  60   va_list arglist;
  61 
  62   va_start(arglist, error_code);
  63 
  64   if (NULL != comm) {
  65       name = (*comm)->c_name;
  66       abort_comm = *comm;
  67   } else {
  68       name = NULL;
  69       abort_comm = NULL;
  70   }
  71   backend_fatal("communicator", abort_comm, name, error_code, arglist);
  72   va_end(arglist);
  73 }
  74 
  75 
  76 void ompi_mpi_errors_are_fatal_file_handler(struct ompi_file_t **file,
  77                                             int *error_code, ...)
  78 {
  79   char *name;
  80   struct ompi_communicator_t *abort_comm;
  81   va_list arglist;
  82 
  83   va_start(arglist, error_code);
  84 
  85   if (NULL != file) {
  86       name = (*file)->f_filename;
  87       abort_comm = (*file)->f_comm;
  88   } else {
  89       name = NULL;
  90       abort_comm = NULL;
  91   }
  92   backend_fatal("file", abort_comm, name, error_code, arglist);
  93   va_end(arglist);
  94 }
  95 
  96 
  97 void ompi_mpi_errors_are_fatal_win_handler(struct ompi_win_t **win,
  98                                            int *error_code, ...)
  99 {
 100   char *name;
 101   struct ompi_communicator_t *abort_comm = NULL;
 102   va_list arglist;
 103 
 104   va_start(arglist, error_code);
 105 
 106   if (NULL != win) {
 107       name = (*win)->w_name;
 108   } else {
 109       name = NULL;
 110   }
 111   backend_fatal("win", abort_comm, name, error_code, arglist);
 112   va_end(arglist);
 113 }
 114 
 115 void ompi_mpi_errors_return_comm_handler(struct ompi_communicator_t **comm,
 116                                          int *error_code, ...)
 117 {
 118     
 119     
 120 
 121     va_list arglist;
 122     va_start(arglist, error_code);
 123     va_end(arglist);
 124 }
 125 
 126 
 127 void ompi_mpi_errors_return_file_handler(struct ompi_file_t **file,
 128                                          int *error_code, ...)
 129 {
 130     
 131     
 132 
 133     va_list arglist;
 134     va_start(arglist, error_code);
 135     va_end(arglist);
 136 }
 137 
 138 
 139 void ompi_mpi_errors_return_win_handler(struct ompi_win_t **win,
 140                                         int *error_code, ...)
 141 {
 142     
 143     
 144 
 145     va_list arglist;
 146     va_start(arglist, error_code);
 147     va_end(arglist);
 148 }
 149 
 150 
 151 static void out(char *str, char *arg)
 152 {
 153     if (ompi_rte_initialized &&
 154         ompi_mpi_state < OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
 155         if (NULL != arg) {
 156             opal_output(0, str, arg);
 157         } else {
 158             opal_output(0, "%s", str);
 159         }
 160     } else {
 161         if (NULL != arg) {
 162             fprintf(stderr, str, arg);
 163         } else {
 164             fprintf(stderr, "%s", str);
 165         }
 166     }
 167 }
 168 
 169 
 170 
 171 
 172 
 173 
 174 
 175 
 176 
 177 
 178 static void backend_fatal_aggregate(char *type,
 179                                     struct ompi_communicator_t *comm,
 180                                     char *name, int *error_code,
 181                                     va_list arglist)
 182 {
 183     char *arg = NULL, *prefix = NULL, *err_msg = NULL;
 184     const char* const unknown_error_code = "Error code: %d (no associated error message)";
 185     const char* const unknown_error = "Unknown error";
 186     const char* const unknown_prefix = "[?:?]";
 187     bool generated = false;
 188 
 189     
 190     
 191     
 192     const char* usable_prefix = unknown_prefix;
 193     const char* usable_err_msg = unknown_error;
 194 
 195     arg = va_arg(arglist, char*);
 196     va_end(arglist);
 197 
 198     if (opal_asprintf(&prefix, "[%s:%05d]",
 199                  ompi_process_info.nodename,
 200                  (int) ompi_process_info.pid) == -1) {
 201         prefix = NULL;
 202         
 203         opal_output(0, "%s", "Could not write node and PID to prefix");
 204         opal_output(0, "Node: %s", ompi_process_info.nodename);
 205         opal_output(0, "PID: %d", (int) ompi_process_info.pid);
 206     }
 207 
 208     if (NULL != error_code) {
 209         err_msg = ompi_mpi_errnum_get_string(*error_code);
 210         if (NULL == err_msg) {
 211             if (opal_asprintf(&err_msg, unknown_error_code,
 212                          *error_code) == -1) {
 213                 err_msg = NULL;
 214                 opal_output(0, "%s", "Could not write to err_msg");
 215                 opal_output(0, unknown_error_code, *error_code);
 216             } else {
 217                 generated = true;
 218             }
 219         }
 220     }
 221 
 222     usable_prefix  = (NULL == prefix)  ? unknown_prefix : prefix;
 223     usable_err_msg = (NULL == err_msg) ? unknown_error  : err_msg;
 224 
 225     if (NULL != name) {
 226         opal_show_help("help-mpi-errors.txt",
 227                        "mpi_errors_are_fatal",
 228                        false,
 229                        usable_prefix,
 230                        (NULL == arg) ? "" : "in",
 231                        (NULL == arg) ? "" : arg,
 232                        usable_prefix,
 233                        OMPI_PROC_MY_NAME->jobid,
 234                        OMPI_PROC_MY_NAME->vpid,
 235                        usable_prefix,
 236                        type,
 237                        name,
 238                        usable_prefix,
 239                        usable_err_msg,
 240                        usable_prefix,
 241                        type,
 242                        usable_prefix);
 243     } else {
 244         opal_show_help("help-mpi-errors.txt",
 245                        "mpi_errors_are_fatal unknown handle",
 246                        false,
 247                        usable_prefix,
 248                        (NULL == arg) ? "" : "in",
 249                        (NULL == arg) ? "" : arg,
 250                        usable_prefix,
 251                        OMPI_PROC_MY_NAME->jobid,
 252                        OMPI_PROC_MY_NAME->vpid,
 253                        usable_prefix,
 254                        type,
 255                        usable_prefix,
 256                        usable_err_msg,
 257                        usable_prefix,
 258                        type,
 259                        usable_prefix);
 260     }
 261 
 262     free(prefix);
 263     if (generated) {
 264         free(err_msg);
 265     }
 266 }
 267 
 268 
 269 
 270 
 271 
 272 
 273 
 274 
 275 
 276 
 277 
 278 static void backend_fatal_no_aggregate(char *type,
 279                                        struct ompi_communicator_t *comm,
 280                                        char *name, int *error_code,
 281                                        va_list arglist)
 282 {
 283     char *arg;
 284 
 285     int32_t state = ompi_mpi_state;
 286     assert(state < OMPI_MPI_STATE_INIT_COMPLETED ||
 287            state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT);
 288 
 289     fflush(stdout);
 290     fflush(stderr);
 291 
 292     arg = va_arg(arglist, char*);
 293 
 294     
 295 
 296     if (state < OMPI_MPI_STATE_INIT_STARTED) {
 297         if (NULL != arg) {
 298             out("*** The %s() function was called before MPI_INIT was invoked.\n"
 299                 "*** This is disallowed by the MPI standard.\n", arg);
 300         } else {
 301             out("*** An MPI function was called before MPI_INIT was invoked.\n"
 302                 "*** This is disallowed by the MPI standard.\n"
 303                 "*** Unfortunately, no further information is available on *which* MPI\n"
 304                 "*** function was invoked, sorry.  :-(\n", NULL);
 305         }
 306         out("*** Your MPI job will now abort.\n", NULL);
 307     } else if (state >= OMPI_MPI_STATE_FINALIZE_PAST_COMM_SELF_DESTRUCT) {
 308         if (NULL != arg) {
 309             out("*** The %s() function was called after MPI_FINALIZE was invoked.\n"
 310                 "*** This is disallowed by the MPI standard.\n", arg);
 311         } else {
 312             out("*** An MPI function was called after MPI_FINALIZE was invoked.\n"
 313                 "*** This is disallowed by the MPI standard.\n"
 314                 "*** Unfortunately, no further information is available on *which* MPI\n"
 315                 "*** function was invoked, sorry.  :-(\n", NULL);
 316         }
 317         out("*** Your MPI job will now abort.\n", NULL);
 318     }
 319 
 320     else {
 321         int len;
 322         char str[MPI_MAX_PROCESSOR_NAME * 2];
 323 
 324         
 325 
 326 
 327 
 328         
 329 
 330         if (NULL != arg) {
 331             out("*** An error occurred in %s\n", arg);
 332         } else {
 333             out("*** An error occurred\n", NULL);
 334         }
 335 
 336         if (NULL != name) {
 337             
 338 
 339 
 340             str[0] = '\0';
 341             len = sizeof(str) - 1;
 342             strncat(str, type, len);
 343 
 344             len -= strlen(type);
 345             if (len > 0) {
 346                 strncat(str, " ", len);
 347 
 348                 --len;
 349                 if (len > 0) {
 350                     strncat(str, name, len);
 351                 }
 352             }
 353             out("*** on %s", str);
 354         } else if (NULL == name) {
 355             out("*** on a NULL %s\n", type);
 356         }
 357 
 358         if (NULL != error_code) {
 359             char *tmp = ompi_mpi_errnum_get_string(*error_code);
 360             if (NULL != tmp) {
 361                 out("*** %s\n", tmp);
 362             } else {
 363                 char intbuf[32];
 364                 snprintf(intbuf, 32, "%d", *error_code);
 365                 out("*** Error code: %d (no associated error message)\n", intbuf);
 366             }
 367         }
 368         
 369         out("*** MPI_ERRORS_ARE_FATAL (processes in this %s will now abort,\n", type);
 370         out("***    and potentially your MPI job)\n", NULL);
 371 
 372     }
 373     va_end(arglist);
 374 }
 375 
 376 static void backend_fatal(char *type, struct ompi_communicator_t *comm,
 377                           char *name, int *error_code,
 378                           va_list arglist)
 379 {
 380     
 381     if (ompi_rte_initialized) {
 382         backend_fatal_aggregate(type, comm, name, error_code, arglist);
 383     } else {
 384         backend_fatal_no_aggregate(type, comm, name, error_code, arglist);
 385     }
 386 
 387     
 388 
 389 
 390 
 391     if (comm == NULL) {
 392         comm = &ompi_mpi_comm_self.comm;
 393     }
 394 
 395     if (NULL != error_code) {
 396         ompi_mpi_abort(comm, *error_code);
 397     } else {
 398         ompi_mpi_abort(comm, 1);
 399     }
 400 }