root/orte/mca/ras/base/ras_base_allocate.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_ras_base_display_alloc
  2. orte_ras_base_allocate
  3. orte_ras_base_add_hosts

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2005 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.  All rights
  13  *                         reserved.
  14  * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
  15  * Copyright (c) 2018      Research Organization for Information Science
  16  *                         and Technology (RIST). All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "orte_config.h"
  25 
  26 #include <string.h>
  27 
  28 #include "orte/constants.h"
  29 #include "orte/types.h"
  30 
  31 #include "orte/mca/mca.h"
  32 #include "opal/mca/base/base.h"
  33 #include "opal/class/opal_list.h"
  34 #include "opal/util/output.h"
  35 #include "opal/util/printf.h"
  36 #include "opal/dss/dss.h"
  37 #include "opal/util/argv.h"
  38 #include "opal/mca/if/if.h"
  39 
  40 #include "orte/util/show_help.h"
  41 #include "orte/mca/errmgr/errmgr.h"
  42 #include "orte/mca/rmaps/base/base.h"
  43 #include "orte/util/name_fns.h"
  44 #include "orte/runtime/orte_globals.h"
  45 #include "orte/runtime/orte_wait.h"
  46 #include "orte/util/hostfile/hostfile.h"
  47 #include "orte/util/dash_host/dash_host.h"
  48 #include "orte/util/proc_info.h"
  49 #include "orte/util/comm/comm.h"
  50 #include "orte/util/error_strings.h"
  51 #include "orte/util/threads.h"
  52 #include "orte/mca/state/state.h"
  53 #include "orte/runtime/orte_quit.h"
  54 
  55 #include "orte/mca/ras/base/ras_private.h"
  56 
  57 /* function to display allocation */
  58 void orte_ras_base_display_alloc(void)
  59 {
  60     char *tmp=NULL, *tmp2, *tmp3;
  61     int i, istart;
  62     orte_node_t *alloc;
  63 
  64     if (orte_xml_output) {
  65         opal_asprintf(&tmp, "<allocation>\n");
  66     } else {
  67         opal_asprintf(&tmp, "\n======================   ALLOCATED NODES   ======================\n");
  68     }
  69     if (orte_hnp_is_allocated) {
  70             istart = 0;
  71     } else {
  72         istart = 1;
  73     }
  74     for (i=istart; i < orte_node_pool->size; i++) {
  75         if (NULL == (alloc = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
  76             continue;
  77         }
  78         if (orte_xml_output) {
  79             /* need to create the output in XML format */
  80             opal_asprintf(&tmp2, "\t<host name=\"%s\" slots=\"%d\" max_slots=\"%d\" slots_inuse=\"%d\">\n",
  81                      (NULL == alloc->name) ? "UNKNOWN" : alloc->name,
  82                      (int)alloc->slots, (int)alloc->slots_max, (int)alloc->slots_inuse);
  83         } else {
  84             opal_asprintf(&tmp2, "\t%s: flags=0x%02x slots=%d max_slots=%d slots_inuse=%d state=%s\n",
  85                      (NULL == alloc->name) ? "UNKNOWN" : alloc->name, alloc->flags,
  86                      (int)alloc->slots, (int)alloc->slots_max, (int)alloc->slots_inuse,
  87                      orte_node_state_to_str(alloc->state));
  88         }
  89         if (NULL == tmp) {
  90             tmp = tmp2;
  91         } else {
  92             opal_asprintf(&tmp3, "%s%s", tmp, tmp2);
  93             free(tmp);
  94             free(tmp2);
  95             tmp = tmp3;
  96         }
  97     }
  98     if (orte_xml_output) {
  99         fprintf(orte_xml_fp, "%s</allocation>\n", tmp);
 100         fflush(orte_xml_fp);
 101     } else {
 102         opal_output(orte_clean_output, "%s=================================================================\n", tmp);
 103     }
 104     free(tmp);
 105 }
 106 
 107 /*
 108  * Function for selecting one component from all those that are
 109  * available.
 110  */
 111 void orte_ras_base_allocate(int fd, short args, void *cbdata)
 112 {
 113     int rc;
 114     orte_job_t *jdata;
 115     opal_list_t nodes;
 116     orte_node_t *node;
 117     orte_std_cntr_t i;
 118     orte_app_context_t *app;
 119     orte_state_caddy_t *caddy = (orte_state_caddy_t*)cbdata;
 120     char *hosts=NULL;
 121 
 122     ORTE_ACQUIRE_OBJECT(caddy);
 123 
 124     OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 125                          "%s ras:base:allocate",
 126                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 127 
 128     /* convenience */
 129     jdata = caddy->jdata;
 130 
 131     /* if we already did this, don't do it again - the pool of
 132      * global resources is set.
 133      */
 134     if (orte_ras_base.allocation_read) {
 135 
 136         OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 137                              "%s ras:base:allocate allocation already read",
 138                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 139         goto next_state;
 140     }
 141     orte_ras_base.allocation_read = true;
 142 
 143     /* Otherwise, we have to create
 144      * the initial set of resources that will delineate all
 145      * further operations serviced by this HNP. This list will
 146      * contain ALL nodes that can be used by any subsequent job.
 147      *
 148      * In other words, if a node isn't found in this step, then
 149      * no job launched by this HNP will be able to utilize it.
 150      */
 151 
 152     /* construct a list to hold the results */
 153     OBJ_CONSTRUCT(&nodes, opal_list_t);
 154 
 155     /* if a component was selected, then we know we are in a managed
 156      * environment.  - the active module will return a list of what it found
 157      */
 158     if (NULL != orte_ras_base.active_module)  {
 159         /* read the allocation */
 160         if (ORTE_SUCCESS != (rc = orte_ras_base.active_module->allocate(jdata, &nodes))) {
 161             if (ORTE_ERR_ALLOCATION_PENDING == rc) {
 162                 /* an allocation request is underway, so just do nothing */
 163                 OBJ_DESTRUCT(&nodes);
 164                 OBJ_RELEASE(caddy);
 165                 return;
 166             }
 167             if (ORTE_ERR_SYSTEM_WILL_BOOTSTRAP == rc) {
 168                 /* this module indicates that nodes will be discovered
 169                  * on a bootstrap basis, so all we do here is add our
 170                  * own node to the list
 171                  */
 172                 goto addlocal;
 173             }
 174             if (ORTE_ERR_TAKE_NEXT_OPTION == rc) {
 175                 /* we have an active module, but it is unable to
 176                  * allocate anything for this job - this indicates
 177                  * that it isn't a fatal error, but could be if
 178                  * an allocation is required
 179                  */
 180                 if (orte_allocation_required) {
 181                     /* an allocation is required, so this is fatal */
 182                     OBJ_DESTRUCT(&nodes);
 183                     orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true);
 184                     ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 185                     OBJ_RELEASE(caddy);
 186                     return;
 187                 } else {
 188                     /* an allocation is not required, so we can just
 189                      * run on the local node - go add it
 190                      */
 191                     goto addlocal;
 192                 }
 193             }
 194             ORTE_ERROR_LOG(rc);
 195             OBJ_DESTRUCT(&nodes);
 196             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 197             OBJ_RELEASE(caddy);
 198             return;
 199         }
 200     }
 201     /* If something came back, save it and we are done */
 202     if (!opal_list_is_empty(&nodes)) {
 203         /* flag that the allocation is managed */
 204         orte_managed_allocation = true;
 205         /* since it is managed, we do not attempt to resolve
 206          * the nodenames */
 207         opal_if_do_not_resolve = true;
 208         /* store the results in the global resource pool - this removes the
 209          * list items
 210          */
 211         if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 212             ORTE_ERROR_LOG(rc);
 213             OBJ_DESTRUCT(&nodes);
 214             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 215             OBJ_RELEASE(caddy);
 216             return;
 217         }
 218         OBJ_DESTRUCT(&nodes);
 219         goto DISPLAY;
 220     } else if (orte_allocation_required) {
 221         /* if nothing was found, and an allocation is
 222          * required, then error out
 223          */
 224         OBJ_DESTRUCT(&nodes);
 225         orte_show_help("help-ras-base.txt", "ras-base:no-allocation", true);
 226         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 227         OBJ_RELEASE(caddy);
 228         return;
 229     }
 230 
 231     OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 232                          "%s ras:base:allocate nothing found in module - proceeding to hostfile",
 233                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 234 
 235     /* nothing was found, or no active module was alive. We first see
 236      * if we were given a rankfile - if so, use it as the hosts will be
 237      * taken from the mapping */
 238     if (NULL != orte_rankfile) {
 239         OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 240                              "%s ras:base:allocate parsing rankfile %s",
 241                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 242                              orte_rankfile));
 243 
 244         /* a rankfile was provided - parse it */
 245         if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
 246                                                                orte_rankfile))) {
 247             OBJ_DESTRUCT(&nodes);
 248             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 249             OBJ_RELEASE(caddy);
 250             return;
 251         }
 252     }
 253 
 254     /* if something was found in the rankfile, we use that as our global
 255      * pool - set it and we are done
 256      */
 257     if (!opal_list_is_empty(&nodes)) {
 258         /* store the results in the global resource pool - this removes the
 259          * list items
 260          */
 261         if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 262             ORTE_ERROR_LOG(rc);
 263             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 264             OBJ_RELEASE(caddy);
 265             return;
 266         }
 267         /* rankfile is considered equivalent to an RM allocation */
 268         if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping))) {
 269             ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_OVERSUBSCRIBE);
 270         }
 271         /* cleanup */
 272         OBJ_DESTRUCT(&nodes);
 273         goto DISPLAY;
 274     }
 275 
 276     /* if a dash-host has been provided, aggregate across all the
 277      * app_contexts. Any hosts the user wants to add via comm_spawn
 278      * can be done so using the add_host option */
 279     for (i=0; i < jdata->apps->size; i++) {
 280         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 281             continue;
 282         }
 283         if (!orte_soft_locations &&
 284             orte_get_attribute(&app->attributes, ORTE_APP_DASH_HOST, (void**)&hosts, OPAL_STRING)) {
 285             /* if we are using soft locations, then any dash-host would
 286              * just include desired nodes and not required. We don't want
 287              * to pick them up here as this would mean the request was
 288              * always satisfied - instead, we want to allow the request
 289              * to fail later on and use whatever nodes are actually
 290              * available
 291              */
 292             OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 293                                  "%s ras:base:allocate adding dash_hosts",
 294                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 295             if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
 296                 free(hosts);
 297                 OBJ_DESTRUCT(&nodes);
 298                 ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 299                 OBJ_RELEASE(caddy);
 300                 return;
 301             }
 302             free(hosts);
 303         }
 304     }
 305 
 306     /* if something was found in the dash-host(s), we use that as our global
 307      * pool - set it and we are done
 308      */
 309     if (!opal_list_is_empty(&nodes)) {
 310         /* store the results in the global resource pool - this removes the
 311          * list items
 312          */
 313         if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 314             ORTE_ERROR_LOG(rc);
 315             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 316             OBJ_RELEASE(caddy);
 317             return;
 318         }
 319         /* cleanup */
 320         OBJ_DESTRUCT(&nodes);
 321         goto DISPLAY;
 322     }
 323 
 324     /* Our next option is to look for a hostfile and assign our global
 325      * pool from there.
 326      *
 327      * Individual hostfile names, if given, are included
 328      * in the app_contexts for this job. We therefore need to
 329      * retrieve the app_contexts for the job, and then cycle
 330      * through them to see if anything is there. The parser will
 331      * add the nodes found in each hostfile to our list - i.e.,
 332      * the resulting list contains the UNION of all nodes specified
 333      * in hostfiles from across all app_contexts
 334      *
 335      * Note that any relative node syntax found in the hostfiles will
 336      * generate an error in this scenario, so only non-relative syntax
 337      * can be present
 338      */
 339     for (i=0; i < jdata->apps->size; i++) {
 340         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 341             continue;
 342         }
 343         if (orte_get_attribute(&app->attributes, ORTE_APP_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
 344             OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 345                                  "%s ras:base:allocate adding hostfile %s",
 346                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
 347 
 348             /* hostfile was specified - parse it and add it to the list */
 349             if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) {
 350                 free(hosts);
 351                 OBJ_DESTRUCT(&nodes);
 352                 /* set an error event */
 353                 ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 354                 OBJ_RELEASE(caddy);
 355                 return;
 356             }
 357             free(hosts);
 358         }
 359     }
 360 
 361     /* if something was found in the hostfiles(s), we use that as our global
 362      * pool - set it and we are done
 363      */
 364     if (!opal_list_is_empty(&nodes)) {
 365         /* store the results in the global resource pool - this removes the
 366          * list items
 367          */
 368         if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 369             ORTE_ERROR_LOG(rc);
 370             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 371             OBJ_RELEASE(caddy);
 372             return;
 373         }
 374         /* cleanup */
 375         OBJ_DESTRUCT(&nodes);
 376         goto DISPLAY;
 377     }
 378 
 379     /* if nothing was found so far, then look for a default hostfile */
 380     if (NULL != orte_default_hostfile) {
 381         OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 382                              "%s ras:base:allocate parsing default hostfile %s",
 383                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 384                              orte_default_hostfile));
 385 
 386         /* a default hostfile was provided - parse it */
 387         if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes,
 388                                                                orte_default_hostfile))) {
 389             OBJ_DESTRUCT(&nodes);
 390             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 391             OBJ_RELEASE(caddy);
 392             return;
 393         }
 394     }
 395 
 396     /* if something was found in the default hostfile, we use that as our global
 397      * pool - set it and we are done
 398      */
 399     if (!opal_list_is_empty(&nodes)) {
 400         /* store the results in the global resource pool - this removes the
 401          * list items
 402          */
 403         if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 404             ORTE_ERROR_LOG(rc);
 405             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 406             OBJ_RELEASE(caddy);
 407             return;
 408         }
 409         /* cleanup */
 410         OBJ_DESTRUCT(&nodes);
 411         goto DISPLAY;
 412     }
 413 
 414     OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 415                          "%s ras:base:allocate nothing found in hostfiles - inserting current node",
 416                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 417 
 418   addlocal:
 419     /* if nothing was found by any of the above methods, then we have no
 420      * earthly idea what to do - so just add the local host
 421      */
 422     node = OBJ_NEW(orte_node_t);
 423     if (NULL == node) {
 424         ORTE_ERROR_LOG(ORTE_ERR_OUT_OF_RESOURCE);
 425         OBJ_DESTRUCT(&nodes);
 426         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 427         OBJ_RELEASE(caddy);
 428         return;
 429     }
 430     /* use the same name we got in orte_process_info so we avoid confusion in
 431      * the session directories
 432      */
 433     node->name = strdup(orte_process_info.nodename);
 434     node->state = ORTE_NODE_STATE_UP;
 435     node->slots_inuse = 0;
 436     node->slots_max = 0;
 437     node->slots = 1;
 438     opal_list_append(&nodes, &node->super);
 439     /* mark the HNP as "allocated" since we have nothing else to use */
 440     orte_hnp_is_allocated = true;
 441 
 442     /* store the results in the global resource pool - this removes the
 443      * list items
 444      */
 445     if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 446         ORTE_ERROR_LOG(rc);
 447         OBJ_DESTRUCT(&nodes);
 448         ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 449         OBJ_RELEASE(caddy);
 450         return;
 451     }
 452     OBJ_DESTRUCT(&nodes);
 453 
 454   DISPLAY:
 455     /* shall we display the results? */
 456     if (4 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {
 457         orte_ras_base_display_alloc();
 458     }
 459 
 460   next_state:
 461     /* are we to report this event? */
 462     if (orte_report_events) {
 463         if (ORTE_SUCCESS != (rc = orte_util_comm_report_event(ORTE_COMM_EVENT_ALLOCATE))) {
 464             ORTE_ERROR_LOG(rc);
 465             ORTE_FORCED_TERMINATE(ORTE_ERROR_DEFAULT_EXIT_CODE);
 466             OBJ_RELEASE(caddy);
 467         }
 468     }
 469 
 470     /* set total slots alloc */
 471     jdata->total_slots_alloc = orte_ras_base.total_slots_alloc;
 472 
 473     /* set the job state to the next position */
 474     ORTE_ACTIVATE_JOB_STATE(jdata, ORTE_JOB_STATE_ALLOCATION_COMPLETE);
 475 
 476     /* cleanup */
 477     OBJ_RELEASE(caddy);
 478 }
 479 
 480 int orte_ras_base_add_hosts(orte_job_t *jdata)
 481 {
 482     int rc;
 483     opal_list_t nodes;
 484     int i, n;
 485     orte_app_context_t *app;
 486     orte_node_t *node, *next, *nptr;
 487     char *hosts;
 488 
 489     /* construct a list to hold the results */
 490     OBJ_CONSTRUCT(&nodes, opal_list_t);
 491 
 492     /* Individual add-hostfile names, if given, are included
 493      * in the app_contexts for this job. We therefore need to
 494      * retrieve the app_contexts for the job, and then cycle
 495      * through them to see if anything is there. The parser will
 496      * add the nodes found in each add-hostfile to our list - i.e.,
 497      * the resulting list contains the UNION of all nodes specified
 498      * in add-hostfiles from across all app_contexts
 499      *
 500      * Note that any relative node syntax found in the add-hostfiles will
 501      * generate an error in this scenario, so only non-relative syntax
 502      * can be present
 503      */
 504 
 505     for (i=0; i < jdata->apps->size; i++) {
 506         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 507             continue;
 508         }
 509         if (orte_get_attribute(&app->attributes, ORTE_APP_ADD_HOSTFILE, (void**)&hosts, OPAL_STRING)) {
 510             OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
 511                                  "%s ras:base:add_hosts checking add-hostfile %s",
 512                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts));
 513 
 514             /* hostfile was specified - parse it and add it to the list */
 515             if (ORTE_SUCCESS != (rc = orte_util_add_hostfile_nodes(&nodes, hosts))) {
 516                 ORTE_ERROR_LOG(rc);
 517                 OBJ_DESTRUCT(&nodes);
 518                 free(hosts);
 519                 return rc;
 520             }
 521             /* now indicate that this app is to run across it */
 522             orte_set_attribute(&app->attributes, ORTE_APP_HOSTFILE, ORTE_ATTR_LOCAL, (void**)hosts, OPAL_STRING);
 523             orte_remove_attribute(&app->attributes, ORTE_APP_ADD_HOSTFILE);
 524             free(hosts);
 525         }
 526     }
 527 
 528     /* We next check for and add any add-host options. Note this is
 529      * a -little- different than dash-host in that (a) we add these
 530      * nodes to the global pool regardless of what may already be there,
 531      * and (b) as a result, any job and/or app_context can access them.
 532      *
 533      * Note that any relative node syntax found in the add-host lists will
 534      * generate an error in this scenario, so only non-relative syntax
 535      * can be present
 536      */
 537     for (i=0; i < jdata->apps->size; i++) {
 538         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 539             continue;
 540         }
 541         if (orte_get_attribute(&app->attributes, ORTE_APP_ADD_HOST, (void**)&hosts, OPAL_STRING)) {
 542             opal_output_verbose(5, orte_ras_base_framework.framework_output,
 543                                 "%s ras:base:add_hosts checking add-host %s",
 544                                 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hosts);
 545             if (ORTE_SUCCESS != (rc = orte_util_add_dash_host_nodes(&nodes, hosts, true))) {
 546                 ORTE_ERROR_LOG(rc);
 547                 OBJ_DESTRUCT(&nodes);
 548                 free(hosts);
 549                 return rc;
 550             }
 551             /* now indicate that this app is to run across them */
 552             orte_set_attribute(&app->attributes, ORTE_APP_DASH_HOST, ORTE_ATTR_LOCAL, hosts, OPAL_STRING);
 553             orte_remove_attribute(&app->attributes, ORTE_APP_ADD_HOST);
 554             free(hosts);
 555         }
 556     }
 557 
 558     /* if something was found, we add that to our global pool */
 559     if (!opal_list_is_empty(&nodes)) {
 560         /* the node insert code doesn't check for uniqueness, so we will
 561          * do so here - yes, this is an ugly, non-scalable loop, but this
 562          * is the exception case and so we can do it here */
 563         OPAL_LIST_FOREACH_SAFE(node, next, &nodes, orte_node_t) {
 564             node->state = ORTE_NODE_STATE_ADDED;
 565             for (n=0; n < orte_node_pool->size; n++) {
 566                 if (NULL == (nptr = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, n))) {
 567                     continue;
 568                 }
 569                 if (0 == strcmp(node->name, nptr->name)) {
 570                     opal_list_remove_item(&nodes, &node->super);
 571                     OBJ_RELEASE(node);
 572                     break;
 573                 }
 574             }
 575         }
 576         if (!opal_list_is_empty(&nodes)) {
 577             /* store the results in the global resource pool - this removes the
 578              * list items
 579              */
 580             if (ORTE_SUCCESS != (rc = orte_ras_base_node_insert(&nodes, jdata))) {
 581                 ORTE_ERROR_LOG(rc);
 582             }
 583             /* mark that an updated nidmap must be communicated to existing daemons */
 584             orte_nidmap_communicated = false;
 585         }
 586     }
 587     /* cleanup */
 588     OPAL_LIST_DESTRUCT(&nodes);
 589 
 590     /* shall we display the results? */
 591     if (0 < opal_output_get_verbosity(orte_ras_base_framework.framework_output)) {
 592         orte_ras_base_display_alloc();
 593     }
 594 
 595     return ORTE_SUCCESS;
 596 }

/* [<][>][^][v][top][bottom][index][help] */