root/orte/mca/ess/tool/ess_tool_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rte_init
  2. rte_finalize
  3. rte_abort

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2015-2018 Intel, Inc. All rights reserved.
  13  * $COPYRIGHT$
  14  *
  15  * Additional copyrights may follow
  16  *
  17  * $HEADER$
  18  *
  19  */
  20 
  21 #include "orte_config.h"
  22 #include "orte/constants.h"
  23 #include "opal/hash_string.h"
  24 
  25 #include <sys/types.h>
  26 #include <stdio.h>
  27 #ifdef HAVE_FCNTL_H
  28 #include <fcntl.h>
  29 #endif
  30 #ifdef HAVE_UNISTD_H
  31 #include <unistd.h>
  32 #endif
  33 
  34 #include "opal/runtime/opal_progress_threads.h"
  35 #include "opal/mca/pmix/pmix_types.h"
  36 
  37 #include "orte/util/show_help.h"
  38 #include "orte/mca/plm/base/base.h"
  39 #include "orte/mca/plm/base/plm_private.h"
  40 #include "orte/mca/plm/plm.h"
  41 #include "orte/mca/errmgr/errmgr.h"
  42 #include "orte/util/proc_info.h"
  43 
  44 #include "orte/mca/ess/ess.h"
  45 #include "orte/mca/ess/base/base.h"
  46 #include "orte/mca/ess/tool/ess_tool.h"
  47 
  48 static int rte_init(void);
  49 static void rte_abort(int status, bool report) __opal_attribute_noreturn__;
  50 static int rte_finalize(void);
  51 
  52 
  53 orte_ess_base_module_t orte_ess_tool_module = {
  54     rte_init,
  55     rte_finalize,
  56     rte_abort,
  57     NULL /* ft_event */
  58 };
  59 
  60 static bool progress_thread_running = false;
  61 
  62 static int rte_init(void)
  63 {
  64     int ret;
  65     char *error = NULL;
  66     opal_list_t flags;
  67     opal_value_t *val;
  68 
  69     /* run the prolog */
  70     if (ORTE_SUCCESS != (ret = orte_ess_base_std_prolog())) {
  71         error = "orte_ess_base_std_prolog";
  72         goto error;
  73     }
  74 
  75 
  76     /* if requested, get an async event base - we use the
  77      * opal_async one so we don't startup extra threads if
  78      * not needed */
  79     if (mca_ess_tool_component.async) {
  80         orte_event_base = opal_progress_thread_init(NULL);
  81         progress_thread_running = true;
  82     }
  83 
  84     /* setup the tool connection flags */
  85     OBJ_CONSTRUCT(&flags, opal_list_t);
  86     if (mca_ess_tool_component.do_not_connect) {
  87         val = OBJ_NEW(opal_value_t);
  88         val->key = strdup(OPAL_PMIX_TOOL_DO_NOT_CONNECT);
  89         val->type = OPAL_BOOL;
  90         val->data.flag = true;
  91         opal_list_append(&flags, &val->super);
  92     } else if (mca_ess_tool_component.system_server_first) {
  93         val = OBJ_NEW(opal_value_t);
  94         val->key = strdup(OPAL_PMIX_CONNECT_SYSTEM_FIRST);
  95         val->type = OPAL_BOOL;
  96         val->data.flag = true;
  97         opal_list_append(&flags, &val->super);
  98     } else if (mca_ess_tool_component.system_server_only) {
  99         val = OBJ_NEW(opal_value_t);
 100         val->key = strdup(OPAL_PMIX_CONNECT_TO_SYSTEM);
 101         val->type = OPAL_BOOL;
 102         val->data.flag = true;
 103         opal_list_append(&flags, &val->super);
 104     }
 105     if (0 < mca_ess_tool_component.wait_to_connect) {
 106         val = OBJ_NEW(opal_value_t);
 107         val->key = strdup(OPAL_PMIX_CONNECT_RETRY_DELAY);
 108         val->type = OPAL_UINT32;
 109         val->data.uint32 = mca_ess_tool_component.wait_to_connect;
 110         opal_list_append(&flags, &val->super);
 111     }
 112     if (0 < mca_ess_tool_component.num_retries) {
 113         val = OBJ_NEW(opal_value_t);
 114         val->key = strdup(OPAL_PMIX_CONNECT_MAX_RETRIES);
 115         val->type = OPAL_UINT32;
 116         val->data.uint32 = mca_ess_tool_component.num_retries;
 117         opal_list_append(&flags, &val->super);
 118     }
 119     if (0 < mca_ess_tool_component.pid) {
 120         val = OBJ_NEW(opal_value_t);
 121         val->key = strdup(OPAL_PMIX_SERVER_PIDINFO);
 122         val->type = OPAL_PID;
 123         val->data.pid = mca_ess_tool_component.pid;
 124         opal_list_append(&flags, &val->super);
 125     }
 126 
 127     /* do the standard tool init */
 128     if (ORTE_SUCCESS != (ret = orte_ess_base_tool_setup(&flags))) {
 129         ORTE_ERROR_LOG(ret);
 130         OPAL_LIST_DESTRUCT(&flags);
 131         error = "orte_ess_base_tool_setup";
 132         goto error;
 133     }
 134     OPAL_LIST_DESTRUCT(&flags);
 135 
 136     return ORTE_SUCCESS;
 137 
 138   error:
 139     if (ORTE_ERR_SILENT != ret && !orte_report_silent_errors) {
 140         orte_show_help("help-orte-runtime.txt",
 141                        "orte_init:startup:internal-failure",
 142                        true, error, ORTE_ERROR_NAME(ret), ret);
 143     }
 144 
 145     return ret;
 146 }
 147 
 148 static int rte_finalize(void)
 149 {
 150     /* use the std finalize routing */
 151     orte_ess_base_tool_finalize();
 152 
 153     /* release the event base */
 154     if (progress_thread_running) {
 155         opal_progress_thread_finalize(NULL);
 156         progress_thread_running = false;
 157     }
 158     return ORTE_SUCCESS;
 159 }
 160 
 161 /*
 162  * If we are a tool-without-name, then we look just like the HNP.
 163  * In that scenario, it could be beneficial to get a core file, so
 164  * we call abort.
 165  */
 166 static void rte_abort(int status, bool report)
 167 {
 168     /* do NOT do a normal finalize as this will very likely
 169      * hang the process. We are aborting due to an abnormal condition
 170      * that precludes normal cleanup
 171      *
 172      * We do need to do the following bits to make sure we leave a
 173      * clean environment. Taken from orte_finalize():
 174      * - Assume errmgr cleans up child processes before we exit.
 175      */
 176 
 177     /* - Clean out the global structures
 178      * (not really necessary, but good practice)
 179      */
 180     orte_proc_info_finalize();
 181 
 182     /* Now just exit */
 183     exit(status);
 184 }

/* [<][>][^][v][top][bottom][index][help] */