root/oshmem/runtime/oshmem_shmem_abort.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. oshmem_shmem_abort

   1 /*
   2  * Copyright (c) 2013      Mellanox Technologies, Inc.
   3  *                         All rights reserved.
   4  * Copyright (c) 2017      FUJITSU LIMITED.  All rights reserved.
   5  * Copyright (c) 2019      Research Organization for Information Science
   6  *                         and Technology (RIST).  All rights reserved.
   7  * $COPYRIGHT$
   8  *
   9  * Additional copyrights may follow
  10  *
  11  * $HEADER$
  12  */
  13 
  14 #include "oshmem_config.h"
  15 
  16 #ifdef HAVE_UNISTD_H
  17 #include <unistd.h>
  18 #endif
  19 #ifdef HAVE_SYS_TYPES_H
  20 #include <sys/types.h>
  21 #endif
  22 #ifdef HAVE_SYS_PARAM_H
  23 #include <sys/param.h>
  24 #endif
  25 #ifdef HAVE_NETDB_H
  26 #include <netdb.h>
  27 #endif
  28 
  29 #include "opal/mca/backtrace/backtrace.h"
  30 #include "opal/util/error.h"
  31 #include "opal/runtime/opal_params.h"
  32 #include "opal/util/show_help.h"
  33 
  34 #include "oshmem/runtime/params.h"
  35 #include "oshmem/runtime/runtime.h"
  36 #include "oshmem/constants.h"
  37 #include "oshmem/proc/proc.h"
  38 
  39 static bool have_been_invoked = false;
  40 
  41 int oshmem_shmem_abort(int errcode)
  42 {
  43     char *host, hostname[OPAL_MAXHOSTNAMELEN];
  44     pid_t pid = 0;
  45 
  46     /* Protection for recursive invocation */
  47     if (have_been_invoked) {
  48         return OSHMEM_SUCCESS;
  49     }
  50     have_been_invoked = true;
  51 
  52     /* If ORTE is initialized, use its nodename.  Otherwise, call
  53      gethostname. */
  54 
  55     /* If MPI is initialized, we know we have a runtime nodename, so
  56        use that.  Otherwise, call gethostname. */
  57     if (ompi_rte_initialized) {
  58         host = ompi_process_info.nodename;
  59     } else {
  60         gethostname(hostname, sizeof(hostname));
  61         host = hostname;
  62     }
  63     pid = getpid();
  64 
  65     opal_show_help("help-shmem-api.txt",
  66                    "shmem-abort",
  67                    true,
  68                    OMPI_PROC_MY_NAME->vpid,
  69                    pid,
  70                    host,
  71                    errcode);
  72 
  73     /* Should we print a stack trace?  Not aggregated because they
  74      might be different on all processes. */
  75     if (opal_abort_print_stack) {
  76         char **messages;
  77         int len, i;
  78 
  79         if (OPAL_SUCCESS == opal_backtrace_buffer(&messages, &len)) {
  80             for (i = 0; i < len; ++i) {
  81                 fprintf(stderr,
  82                         "[%s:%05d] [%d] func:%s\n",
  83                         host,
  84                         (int) pid,
  85                         i,
  86                         messages[i]);
  87                 fflush(stderr);
  88             }
  89             free(messages);
  90         } else {
  91             /* This will print an message if it's unable to print the
  92              backtrace, so we don't need an additional "else" clause
  93              if opal_backtrace_print() is not supported. */
  94             opal_backtrace_print(stderr, NULL, 1);
  95         }
  96     }
  97 
  98     /* Wait for a while before aborting */
  99     opal_delay_abort();
 100 
 101     if (!oshmem_shmem_initialized) {
 102         if (!opal_initialized) {
 103             /* TODO help message from SHMEM not from MPI is needed*/
 104             opal_show_help("help-shmem-runtime.txt",
 105                            "oshmem shmem abort:cannot guarantee all killed",
 106                            true,
 107                            host,
 108                            (int) pid);
 109         } else {
 110             fprintf(stderr,
 111                     "[%s:%05d] Local abort completed successfully; not able to aggregate error messages, and not able to guarantee that all other processes were killed!\n",
 112                     host,
 113                     (int) pid);
 114         }
 115         oshmem_shmem_aborted = true;
 116         exit(errcode);
 117     }
 118 
 119     /* abort local procs in the communicator.  If the communicator is
 120      an intercommunicator AND the abort has explicitly requested
 121      that we abort the remote procs, then do that as well. */
 122 
 123     oshmem_shmem_aborted = true;
 124     /* now that we've aborted everyone else, gracefully die. */
 125 
 126     ompi_rte_abort(errcode, NULL );
 127 
 128     return OSHMEM_SUCCESS;
 129 }

/* [<][>][^][v][top][bottom][index][help] */