root/orte/mca/ess/base/ess_base_std_tool.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. infocb
  2. orte_ess_base_tool_setup
  3. orte_ess_base_tool_finalize

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2009 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2011-2013 Los Alamos National Security, LLC.
  13  *                         All rights reserved.
  14  * Copyright (c) 2013-2019 Intel, Inc.  All rights reserved.
  15  * Copyright (c) 2014      Hochschule Esslingen.  All rights reserved.
  16  *
  17  * Copyright (c) 2015      Cisco Systems, Inc.  All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 #include "orte_config.h"
  26 #include "orte/constants.h"
  27 
  28 #include <sys/types.h>
  29 #include <stdio.h>
  30 #ifdef HAVE_FCNTL_H
  31 #include <fcntl.h>
  32 #endif
  33 #ifdef HAVE_UNISTD_H
  34 #include <unistd.h>
  35 #endif
  36 
  37 #include "opal/mca/event/event.h"
  38 #include "opal/mca/pmix/base/base.h"
  39 #include "opal/runtime/opal.h"
  40 #include "opal/runtime/opal_progress_threads.h"
  41 #include "opal/util/arch.h"
  42 #include "opal/util/opal_environ.h"
  43 #include "opal/util/argv.h"
  44 #include "opal/util/proc.h"
  45 
  46 #include "orte/mca/iof/base/base.h"
  47 #include "orte/mca/oob/base/base.h"
  48 #include "orte/mca/plm/base/base.h"
  49 #include "orte/mca/rml/base/base.h"
  50 #include "orte/mca/rml/base/rml_contact.h"
  51 #include "orte/mca/routed/base/base.h"
  52 #include "orte/mca/errmgr/base/base.h"
  53 #include "orte/mca/state/base/base.h"
  54 #include "orte/util/proc_info.h"
  55 #include "orte/util/session_dir.h"
  56 #include "orte/util/show_help.h"
  57 
  58 #include "orte/runtime/orte_globals.h"
  59 #include "orte/runtime/orte_wait.h"
  60 
  61 #include "orte/mca/ess/base/base.h"
  62 
  63 
  64 static void infocb(int status,
  65                    opal_list_t *info,
  66                    void *cbdata,
  67                    opal_pmix_release_cbfunc_t release_fn,
  68                    void *release_cbdata)
  69 {
  70     opal_value_t *kv;
  71     opal_pmix_lock_t *lock = (opal_pmix_lock_t*)cbdata;
  72 
  73     if (OPAL_SUCCESS != status) {
  74         ORTE_ERROR_LOG(status);
  75     } else {
  76         kv = (opal_value_t*)opal_list_get_first(info);
  77         if (NULL == kv) {
  78             ORTE_ERROR_LOG(ORTE_ERR_NOT_SUPPORTED);
  79         } else {
  80             if (0 == strcmp(kv->key, OPAL_PMIX_SERVER_URI)) {
  81                 orte_process_info.my_hnp_uri = strdup(kv->data.string);
  82             } else {
  83                 ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
  84             }
  85         }
  86     }
  87     if (NULL != release_fn) {
  88         release_fn(release_cbdata);
  89     }
  90     OPAL_PMIX_WAKEUP_THREAD(lock);
  91 }
  92 
  93 int orte_ess_base_tool_setup(opal_list_t *flags)
  94 {
  95     int ret;
  96     char *error = NULL;
  97     opal_list_t info;
  98     opal_value_t *kv, *knext, val;
  99     opal_pmix_query_t *q;
 100     opal_pmix_lock_t lock;
 101     opal_buffer_t *buf;
 102 
 103     /* we need an external progress thread to ensure that things run
 104      * async with the PMIx code */
 105     orte_event_base = opal_progress_thread_init("tool");
 106 
 107     /* setup the PMIx framework - ensure it skips all non-PMIx components,
 108      * but do not override anything we were given */
 109     opal_setenv("OMPI_MCA_pmix", "^s1,s2,cray,isolated", false, &environ);
 110     if (OPAL_SUCCESS != (ret = mca_base_framework_open(&opal_pmix_base_framework, 0))) {
 111         ORTE_ERROR_LOG(ret);
 112         error = "orte_pmix_base_open";
 113         goto error;
 114     }
 115     if (ORTE_SUCCESS != (ret = opal_pmix_base_select())) {
 116         ORTE_ERROR_LOG(ret);
 117         error = "opal_pmix_base_select";
 118         goto error;
 119     }
 120     if (NULL == opal_pmix.tool_init) {
 121         /* we no longer support non-pmix tools */
 122         orte_show_help("help-ess-base.txt",
 123                        "legacy-tool", true);
 124         ret = ORTE_ERR_SILENT;
 125         error = "opal_pmix.tool_init";
 126         goto error;
 127     }
 128     /* set the event base for the pmix component code */
 129     opal_pmix_base_set_evbase(orte_event_base);
 130 
 131     /* initialize */
 132     OBJ_CONSTRUCT(&info, opal_list_t);
 133     if (NULL != flags) {
 134         /* pass along any directives */
 135         OPAL_LIST_FOREACH_SAFE(kv, knext, flags, opal_value_t) {
 136             opal_list_remove_item(flags, &kv->super);
 137             opal_list_append(&info, &kv->super);
 138         }
 139     }
 140     if (OPAL_SUCCESS != (ret = opal_pmix.tool_init(&info))) {
 141         ORTE_ERROR_LOG(ret);
 142         error = "opal_pmix.init";
 143         OPAL_LIST_DESTRUCT(&info);
 144         goto error;
 145     }
 146     OPAL_LIST_DESTRUCT(&info);
 147     /* the PMIx server set our name - record it here */
 148     ORTE_PROC_MY_NAME->jobid = OPAL_PROC_MY_NAME.jobid;
 149     ORTE_PROC_MY_NAME->vpid = OPAL_PROC_MY_NAME.vpid;
 150     orte_process_info.super.proc_hostname = strdup(orte_process_info.nodename);
 151     orte_process_info.super.proc_flags = OPAL_PROC_ALL_LOCAL;
 152     orte_process_info.super.proc_arch = opal_local_arch;
 153     opal_proc_local_set(&orte_process_info.super);
 154 
 155     if (NULL != opal_pmix.query) {
 156         /* query the server for its URI so we can get any IO forwarded to us */
 157         OBJ_CONSTRUCT(&info, opal_list_t);
 158         q = OBJ_NEW(opal_pmix_query_t);
 159         opal_argv_append_nosize(&q->keys, OPAL_PMIX_SERVER_URI);
 160         opal_list_append(&info, &q->super);
 161         OPAL_PMIX_CONSTRUCT_LOCK(&lock);
 162         opal_pmix.query(&info, infocb, &lock);
 163         OPAL_PMIX_WAIT_THREAD(&lock);
 164         OPAL_PMIX_DESTRUCT_LOCK(&lock);
 165         OPAL_LIST_DESTRUCT(&info);
 166     }
 167 
 168     /* open and setup the state machine */
 169     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_state_base_framework, 0))) {
 170         ORTE_ERROR_LOG(ret);
 171         error = "orte_state_base_open";
 172         goto error;
 173     }
 174     if (ORTE_SUCCESS != (ret = orte_state_base_select())) {
 175         ORTE_ERROR_LOG(ret);
 176         error = "orte_state_base_select";
 177         goto error;
 178     }
 179     /* open and setup the error manager */
 180     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_errmgr_base_framework, 0))) {
 181         ORTE_ERROR_LOG(ret);
 182         error = "orte_errmgr_base_open";
 183         goto error;
 184     }
 185     if (ORTE_SUCCESS != (ret = orte_errmgr_base_select())) {
 186         ORTE_ERROR_LOG(ret);
 187         error = "orte_errmgr_base_select";
 188         goto error;
 189     }
 190     /* Setup the communication infrastructure */
 191     /* Routed system */
 192     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_routed_base_framework, 0))) {
 193         ORTE_ERROR_LOG(ret);
 194         error = "orte_rml_base_open";
 195         goto error;
 196     }
 197     if (ORTE_SUCCESS != (ret = orte_routed_base_select())) {
 198         ORTE_ERROR_LOG(ret);
 199         error = "orte_routed_base_select";
 200         goto error;
 201     }
 202     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_oob_base_framework, 0))) {
 203         ORTE_ERROR_LOG(ret);
 204         error = "orte_oob_base_open";
 205         goto error;
 206     }
 207     if (ORTE_SUCCESS != (ret = orte_oob_base_select())) {
 208         ORTE_ERROR_LOG(ret);
 209         error = "orte_oob_base_select";
 210         goto error;
 211     }
 212     /* Runtime Messaging Layer */
 213     if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_rml_base_framework, 0))) {
 214         ORTE_ERROR_LOG(ret);
 215         error = "orte_rml_base_open";
 216         goto error;
 217     }
 218     if (ORTE_SUCCESS != (ret = orte_rml_base_select())) {
 219         ORTE_ERROR_LOG(ret);
 220         error = "orte_rml_base_select";
 221         goto error;
 222     }
 223 
 224     /* we -may- need to know the name of the head
 225      * of our session directory tree, particularly the
 226      * tmp base where any other session directories on
 227      * this node might be located
 228      */
 229 
 230     ret = orte_session_setup_base(ORTE_PROC_MY_NAME);
 231     if (ORTE_SUCCESS != ret ) {
 232         ORTE_ERROR_LOG(ret);
 233         error = "define session dir names";
 234         goto error;
 235     }
 236 
 237     /* setup I/O forwarding system - must come after we init routes */
 238     if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) {
 239         /* extract the name */
 240         if (ORTE_SUCCESS != orte_rml_base_parse_uris(orte_process_info.my_hnp_uri, ORTE_PROC_MY_HNP, NULL)) {
 241             orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
 242             exit(1);
 243         }
 244         /* Set the contact info in the RML - this won't actually establish
 245          * the connection, but just tells the RML how to reach the HNP
 246          * if/when we attempt to send to it
 247          */
 248         OBJ_CONSTRUCT(&val, opal_value_t);
 249         val.key = OPAL_PMIX_PROC_URI;
 250         val.type = OPAL_STRING;
 251         val.data.string = orte_process_info.my_hnp_uri;
 252         if (OPAL_SUCCESS != (ret = opal_pmix.store_local(ORTE_PROC_MY_HNP, &val))) {
 253             ORTE_ERROR_LOG(ret);
 254             val.key = NULL;
 255             val.data.string = NULL;
 256             OBJ_DESTRUCT(&val);
 257             error = "store HNP URI";
 258             goto error;
 259         }
 260         val.key = NULL;
 261         val.data.string = NULL;
 262         OBJ_DESTRUCT(&val);
 263         /* set the route to be direct */
 264         if (ORTE_SUCCESS != orte_routed.update_route(ORTE_PROC_MY_HNP, ORTE_PROC_MY_HNP)) {
 265             orte_show_help("help-orte-top.txt", "orte-top:hnp-uri-bad", true, orte_process_info.my_hnp_uri);
 266             orte_finalize();
 267             exit(1);
 268         }
 269 
 270         /* connect to the HNP so we can recv forwarded output */
 271         buf = OBJ_NEW(opal_buffer_t);
 272         ret = orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP,
 273                                       buf, ORTE_RML_TAG_WARMUP_CONNECTION,
 274                                       orte_rml_send_callback, NULL);
 275         if (ORTE_SUCCESS != ret) {
 276             ORTE_ERROR_LOG(ret);
 277             error = "warmup connection";
 278             goto error;
 279         }
 280 
 281         /* set the target hnp as our lifeline so we will terminate if it exits */
 282         orte_routed.set_lifeline(ORTE_PROC_MY_HNP);
 283 
 284         /* setup the IOF */
 285         if (ORTE_SUCCESS != (ret = mca_base_framework_open(&orte_iof_base_framework, 0))) {
 286             ORTE_ERROR_LOG(ret);
 287             error = "orte_iof_base_open";
 288             goto error;
 289         }
 290         if (ORTE_SUCCESS != (ret = orte_iof_base_select())) {
 291             ORTE_ERROR_LOG(ret);
 292             error = "orte_iof_base_select";
 293             goto error;
 294         }
 295 
 296     }
 297 
 298     return ORTE_SUCCESS;
 299 
 300  error:
 301     orte_show_help("help-orte-runtime.txt",
 302                    "orte_init:startup:internal-failure",
 303                    true, error, ORTE_ERROR_NAME(ret), ret);
 304 
 305     return ret;
 306 }
 307 
 308 int orte_ess_base_tool_finalize(void)
 309 {
 310     orte_wait_finalize();
 311 
 312     /* if I am a tool, then all I will have done is
 313      * a very small subset of orte_init - ensure that
 314      * I only back those elements out
 315      */
 316     if (NULL != orte_process_info.my_hnp_uri && NULL == opal_pmix.server_iof_push) {
 317         (void) mca_base_framework_close(&orte_iof_base_framework);
 318     }
 319     (void) mca_base_framework_close(&orte_routed_base_framework);
 320     (void) mca_base_framework_close(&orte_rml_base_framework);
 321     (void) mca_base_framework_close(&orte_errmgr_base_framework);
 322 
 323     opal_pmix.finalize();
 324     (void) mca_base_framework_close(&opal_pmix_base_framework);
 325 
 326     return ORTE_SUCCESS;
 327 }

/* [<][>][^][v][top][bottom][index][help] */