root/orte/mca/errmgr/default_tool/errmgr_default_tool.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init
  2. finalize
  3. proc_errors
  4. abort_peers

   1 /*
   2  * Copyright (c) 2009-2011 The Trustees of Indiana University.
   3  *                         All rights reserved.
   4  *
   5  * Copyright (c) 2010      Cisco Systems, Inc.  All rights reserved.
   6  *
   7  * Copyright (c) 2004-2006 The University of Tennessee and The University
   8  *                         of Tennessee Research Foundation.  All rights
   9  *                         reserved.
  10  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
  11  *                         All rights reserved.
  12  * Copyright (c) 2013-2017 Intel, Inc.  All rights reserved.
  13  * $COPYRIGHT$
  14  *
  15  * Additional copyrights may follow
  16  *
  17  * $HEADER$
  18  */
  19 
  20 #include "orte_config.h"
  21 
  22 #include <sys/types.h>
  23 #ifdef HAVE_UNISTD_H
  24 #include <unistd.h>
  25 #endif  /* HAVE_UNISTD_H */
  26 #include <string.h>
  27 
  28 #include "opal/util/output.h"
  29 #include "opal/dss/dss.h"
  30 
  31 #include "orte/util/error_strings.h"
  32 #include "orte/util/name_fns.h"
  33 #include "orte/util/show_help.h"
  34 #include "orte/util/threads.h"
  35 #include "orte/runtime/orte_globals.h"
  36 #include "orte/mca/rml/rml.h"
  37 #include "orte/mca/odls/odls_types.h"
  38 #include "orte/mca/state/state.h"
  39 
  40 #include "orte/mca/errmgr/base/base.h"
  41 #include "orte/mca/errmgr/base/errmgr_private.h"
  42 #include "errmgr_default_tool.h"
  43 
  44 /*
  45  * Module functions: Global
  46  */
  47 static int init(void);
  48 static int finalize(void);
  49 
  50 static int abort_peers(orte_process_name_t *procs,
  51                        orte_std_cntr_t num_procs,
  52                        int error_code);
  53 
  54 /******************
  55  * HNP module
  56  ******************/
  57 orte_errmgr_base_module_t orte_errmgr_default_tool_module = {
  58     .init= init,
  59     .finalize = finalize,
  60     .logfn = orte_errmgr_base_log,
  61     .abort = orte_errmgr_base_abort,
  62     .abort_peers = abort_peers
  63 };
  64 
  65 static void proc_errors(int fd, short args, void *cbdata);
  66 
  67 /************************
  68  * API Definitions
  69  ************************/
  70 static int init(void)
  71 {
  72     /* setup state machine to trap proc errors */
  73     orte_state.add_proc_state(ORTE_PROC_STATE_ERROR, proc_errors, ORTE_ERROR_PRI);
  74 
  75     return ORTE_SUCCESS;
  76 }
  77 
  78 static int finalize(void)
  79 {
  80     return ORTE_SUCCESS;
  81 }
  82 
  83 static void proc_errors(int fd, short args, void *cbdata)
  84 {
  85     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
  86 
  87     ORTE_ACQUIRE_OBJECT(caddy);
  88 
  89     OPAL_OUTPUT_VERBOSE((1, orte_errmgr_base_framework.framework_output,
  90                          "%s errmgr:default_tool: proc %s state %s",
  91                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
  92                          ORTE_NAME_PRINT(&caddy->name),
  93                          orte_proc_state_to_str(caddy->proc_state)));
  94 
  95     /*
  96      * if orte is trying to shutdown, just let it
  97      */
  98     if (orte_finalizing) {
  99         OBJ_RELEASE(caddy);
 100         return;
 101     }
 102 
 103     /* if we lost our lifeline, then just stop the event loop
 104      * so the main program can cleanly terminate */
 105     if (ORTE_PROC_STATE_LIFELINE_LOST == caddy->proc_state) {
 106         ORTE_POST_OBJECT(caddy);
 107         orte_event_base_active = false;
 108     } else {
 109         /* all other errors require abort */
 110         orte_errmgr_base_abort(ORTE_ERROR_DEFAULT_EXIT_CODE, NULL);
 111     }
 112 
 113     OBJ_RELEASE(caddy);
 114 }
 115 
 116 static int abort_peers(orte_process_name_t *procs,
 117                        orte_std_cntr_t num_procs,
 118                        int error_code)
 119 {
 120     /* just abort */
 121     if (0 < opal_output_get_verbosity(orte_errmgr_base_framework.framework_output)) {
 122         orte_errmgr_base_abort(error_code, "%s called abort_peers",
 123                                ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
 124     } else {
 125         orte_errmgr_base_abort(error_code, NULL);
 126     }
 127     return ORTE_SUCCESS;
 128 }

/* [<][>][^][v][top][bottom][index][help] */