root/orte/util/hostfile/hostfile.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. hostfile_parse_error
  2. hostfile_parse_int
  3. hostfile_parse_string
  4. hostfile_lookup
  5. hostfile_parse_line
  6. hostfile_parse
  7. orte_util_add_hostfile_nodes
  8. orte_util_filter_hostfile_nodes
  9. orte_util_get_ordered_host_list

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2008 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2007      Los Alamos National Security, LLC.  All rights
  13  *                         reserved.
  14  * Copyright (c) 2011      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2013-2017 Intel, Inc. All rights reserved.
  16  * Copyright (c) 2015-2018 Research Organization for Information Science
  17  *                         and Technology (RIST). All rights reserved.
  18  * Copyright (c) 2016      IBM Corporation.  All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 #include "orte_config.h"
  26 #include "orte/constants.h"
  27 
  28 #ifdef HAVE_UNISTD_H
  29 #include <unistd.h>
  30 #endif
  31 #include <errno.h>
  32 #include <string.h>
  33 #include <sys/stat.h>
  34 
  35 #include "opal/class/opal_list.h"
  36 #include "opal/util/argv.h"
  37 #include "opal/util/output.h"
  38 #include "opal/mca/mca.h"
  39 #include "opal/mca/base/base.h"
  40 #include "opal/util/if.h"
  41 #include "opal/util/net.h"
  42 #include "opal/mca/installdirs/installdirs.h"
  43 
  44 #include "orte/util/show_help.h"
  45 #include "orte/util/proc_info.h"
  46 #include "orte/util/name_fns.h"
  47 #include "orte/mca/errmgr/errmgr.h"
  48 #include "orte/mca/ras/base/base.h"
  49 #include "orte/runtime/orte_globals.h"
  50 
  51 #include "orte/util/hostfile/hostfile_lex.h"
  52 #include "orte/util/hostfile/hostfile.h"
  53 
  54 
  55 static const char *cur_hostfile_name = NULL;
  56 
  57 static void hostfile_parse_error(int token)
  58 {
  59     switch (token) {
  60     case ORTE_HOSTFILE_STRING:
  61         orte_show_help("help-hostfile.txt", "parse_error_string",
  62                        true,
  63                        cur_hostfile_name,
  64                        orte_util_hostfile_line,
  65                        token,
  66                        orte_util_hostfile_value.sval);
  67         break;
  68     case ORTE_HOSTFILE_IPV4:
  69     case ORTE_HOSTFILE_IPV6:
  70     case ORTE_HOSTFILE_INT:
  71         orte_show_help("help-hostfile.txt", "parse_error_int",
  72                        true,
  73                        cur_hostfile_name,
  74                        orte_util_hostfile_line,
  75                        token,
  76                        orte_util_hostfile_value.ival);
  77         break;
  78      default:
  79         orte_show_help("help-hostfile.txt", "parse_error",
  80                        true,
  81                        cur_hostfile_name,
  82                        orte_util_hostfile_line,
  83                        token );
  84         break;
  85     }
  86 }
  87 
  88  /**
  89   * Return the integer following an = (actually may only return positive ints)
  90   */
  91 static int hostfile_parse_int(void)
  92 {
  93     if (ORTE_HOSTFILE_EQUAL != orte_util_hostfile_lex())
  94         return -1;
  95     if (ORTE_HOSTFILE_INT != orte_util_hostfile_lex())
  96         return -1;
  97     return orte_util_hostfile_value.ival;
  98 }
  99 
 100 /**
 101  * Return the string following an = (option to a keyword)
 102  */
 103 static char *hostfile_parse_string(void)
 104 {
 105     int rc;
 106     if (ORTE_HOSTFILE_EQUAL != orte_util_hostfile_lex()){
 107         return NULL;
 108     }
 109     rc = orte_util_hostfile_lex();
 110     if (ORTE_HOSTFILE_STRING != rc){
 111         return NULL;
 112     }
 113     return strdup(orte_util_hostfile_value.sval);
 114 }
 115 
 116 static orte_node_t* hostfile_lookup(opal_list_t* nodes, const char* name)
 117 {
 118     opal_list_item_t* item;
 119     for(item =  opal_list_get_first(nodes);
 120         item != opal_list_get_end(nodes);
 121         item =  opal_list_get_next(item)) {
 122         orte_node_t* node = (orte_node_t*)item;
 123         if (strcmp(node->name, name) == 0) {
 124             return node;
 125         }
 126     }
 127     return NULL;
 128 }
 129 
 130 static int hostfile_parse_line(int token, opal_list_t* updates,
 131                                opal_list_t* exclude, bool keep_all)
 132 {
 133     int rc;
 134     orte_node_t* node;
 135     bool got_max = false;
 136     char* value;
 137     char **argv;
 138     char* node_name = NULL;
 139     char* username = NULL;
 140     int cnt;
 141     int number_of_slots = 0;
 142     char buff[64];
 143 
 144     if (ORTE_HOSTFILE_STRING == token ||
 145         ORTE_HOSTFILE_HOSTNAME == token ||
 146         ORTE_HOSTFILE_INT == token ||
 147         ORTE_HOSTFILE_IPV4 == token ||
 148         ORTE_HOSTFILE_IPV6 == token) {
 149 
 150         if(ORTE_HOSTFILE_INT == token) {
 151             snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
 152             value = buff;
 153         } else {
 154             value = orte_util_hostfile_value.sval;
 155         }
 156         argv = opal_argv_split (value, '@');
 157 
 158         cnt = opal_argv_count (argv);
 159         if (1 == cnt) {
 160             node_name = strdup(argv[0]);
 161         } else if (2 == cnt) {
 162             username = strdup(argv[0]);
 163             node_name = strdup(argv[1]);
 164         } else {
 165             opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
 166         }
 167         opal_argv_free (argv);
 168 
 169         // Strip off the FQDN if present, ignore IP addresses
 170         if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(node_name) ) {
 171             char *ptr;
 172             if (NULL != (ptr = strchr(node_name, '.'))) {
 173                 *ptr = '\0';
 174             }
 175         }
 176 
 177         /* if the first letter of the name is '^', then this is a node
 178          * to be excluded. Remove the ^ character so the nodename is
 179          * usable, and put it on the exclude list
 180          */
 181         if ('^' == node_name[0]) {
 182             int i, len;
 183             len = strlen(node_name);
 184             for (i=1; i < len; i++) {
 185                 node_name[i-1] = node_name[i];
 186             }
 187             node_name[len-1] = '\0';  /* truncate */
 188 
 189             OPAL_OUTPUT_VERBOSE((3, orte_ras_base_framework.framework_output,
 190                                  "%s hostfile: node %s is being excluded",
 191                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name));
 192 
 193             /* see if this is another name for us */
 194             if (orte_ifislocal(node_name)) {
 195                 /* Nodename has been allocated, that is for sure */
 196                 free (node_name);
 197                 node_name = strdup(orte_process_info.nodename);
 198             }
 199 
 200             /* Do we need to make a new node object?  First check to see
 201                if it's already in the exclude list */
 202             if (NULL == (node = hostfile_lookup(exclude, node_name))) {
 203                 node = OBJ_NEW(orte_node_t);
 204                 node->name = node_name;
 205                 if (NULL != username) {
 206                     orte_set_attribute(&node->attributes, ORTE_NODE_USERNAME, ORTE_ATTR_LOCAL, username, OPAL_STRING);
 207                 }
 208                 opal_list_append(exclude, &node->super);
 209             } else {
 210                 free(node_name);
 211             }
 212             return ORTE_SUCCESS;
 213         }
 214 
 215         /* this is not a node to be excluded, so we need to process it and
 216          * add it to the "include" list. See if this host is actually us.
 217          */
 218         if (orte_ifislocal(node_name)) {
 219             /* Nodename has been allocated, that is for sure */
 220             free (node_name);
 221             node_name = strdup(orte_process_info.nodename);
 222         }
 223 
 224         OPAL_OUTPUT_VERBOSE((3, orte_ras_base_framework.framework_output,
 225                              "%s hostfile: node %s is being included - keep all is %s",
 226                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node_name,
 227                              keep_all ? "TRUE" : "FALSE"));
 228 
 229         /* Do we need to make a new node object? */
 230         if (keep_all || NULL == (node = hostfile_lookup(updates, node_name))) {
 231             node = OBJ_NEW(orte_node_t);
 232             node->name = node_name;
 233             node->slots = 1;
 234             if (NULL != username) {
 235                 orte_set_attribute(&node->attributes, ORTE_NODE_USERNAME, ORTE_ATTR_LOCAL, username, OPAL_STRING);
 236             }
 237             opal_list_append(updates, &node->super);
 238         } else {
 239             /* this node was already found once - add a slot and mark slots as "given" */
 240             node->slots++;
 241             ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
 242             free(node_name);
 243         }
 244     } else if (ORTE_HOSTFILE_RELATIVE == token) {
 245         /* store this for later processing */
 246         node = OBJ_NEW(orte_node_t);
 247         node->name = strdup(orte_util_hostfile_value.sval);
 248         opal_list_append(updates, &node->super);
 249     } else if (ORTE_HOSTFILE_RANK == token) {
 250         /* we can ignore the rank, but we need to extract the node name. we
 251          * first need to shift over to the other side of the equal sign as
 252          * this is where the node name will be
 253          */
 254         while (!orte_util_hostfile_done &&
 255                ORTE_HOSTFILE_EQUAL != token) {
 256             token = orte_util_hostfile_lex();
 257         }
 258         if (orte_util_hostfile_done) {
 259             /* bad syntax somewhere */
 260             return ORTE_ERROR;
 261         }
 262         /* next position should be the node name */
 263         token = orte_util_hostfile_lex();
 264         if(ORTE_HOSTFILE_INT == token) {
 265             snprintf(buff, 64, "%d", orte_util_hostfile_value.ival);
 266             value = buff;
 267         } else {
 268             value = orte_util_hostfile_value.sval;
 269         }
 270 
 271         argv = opal_argv_split (value, '@');
 272 
 273         cnt = opal_argv_count (argv);
 274         if (1 == cnt) {
 275             node_name = strdup(argv[0]);
 276         } else if (2 == cnt) {
 277             username = strdup(argv[0]);
 278             node_name = strdup(argv[1]);
 279         } else {
 280             opal_output(0, "WARNING: Unhandled user@host-combination\n"); /* XXX */
 281         }
 282         opal_argv_free (argv);
 283 
 284         // Strip off the FQDN if present, ignore IP addresses
 285         if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(node_name) ) {
 286             char *ptr;
 287             if (NULL != (ptr = strchr(node_name, '.'))) {
 288                 *ptr = '\0';
 289             }
 290         }
 291 
 292         /* Do we need to make a new node object? */
 293         if (NULL == (node = hostfile_lookup(updates, node_name))) {
 294             node = OBJ_NEW(orte_node_t);
 295             node->name = node_name;
 296             node->slots = 1;
 297             if (NULL != username) {
 298                 orte_set_attribute(&node->attributes, ORTE_NODE_USERNAME, ORTE_ATTR_LOCAL, username, OPAL_STRING);
 299             }
 300             opal_list_append(updates, &node->super);
 301         } else {
 302             /* add a slot */
 303             node->slots++;
 304             free(node_name);
 305         }
 306         OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 307                              "%s hostfile: node %s slots %d nodes-given %s",
 308                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name, node->slots,
 309                              ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN) ? "TRUE" : "FALSE"));
 310         /* mark the slots as "given" since we take them as being the
 311          * number specified via the rankfile
 312          */
 313         ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
 314         /* skip to end of line */
 315         while (!orte_util_hostfile_done &&
 316                ORTE_HOSTFILE_NEWLINE != token) {
 317             token = orte_util_hostfile_lex();
 318         }
 319         return ORTE_SUCCESS;
 320     } else {
 321         hostfile_parse_error(token);
 322         return ORTE_ERROR;
 323     }
 324     free(username);
 325 
 326     while (!orte_util_hostfile_done) {
 327         token = orte_util_hostfile_lex();
 328 
 329         switch (token) {
 330         case ORTE_HOSTFILE_DONE:
 331             goto done;
 332 
 333         case ORTE_HOSTFILE_NEWLINE:
 334             goto done;
 335 
 336         case ORTE_HOSTFILE_USERNAME:
 337             username = hostfile_parse_string();
 338             if (NULL != username) {
 339                 orte_set_attribute(&node->attributes, ORTE_NODE_USERNAME, ORTE_ATTR_LOCAL, username, OPAL_STRING);
 340                 free(username);
 341             }
 342             break;
 343 
 344         case ORTE_HOSTFILE_PORT:
 345             rc = hostfile_parse_int();
 346             if (rc < 0) {
 347                 orte_show_help("help-hostfile.txt", "port",
 348                                true,
 349                                cur_hostfile_name, rc);
 350                 return ORTE_ERROR;
 351             }
 352             orte_set_attribute(&node->attributes, ORTE_NODE_PORT, ORTE_ATTR_LOCAL, &rc, OPAL_INT);
 353             break;
 354 
 355         case ORTE_HOSTFILE_COUNT:
 356         case ORTE_HOSTFILE_CPU:
 357         case ORTE_HOSTFILE_SLOTS:
 358             rc = hostfile_parse_int();
 359             if (rc < 0) {
 360                 orte_show_help("help-hostfile.txt", "slots",
 361                                true,
 362                                cur_hostfile_name, rc);
 363                 opal_list_remove_item(updates, &node->super);
 364                 OBJ_RELEASE(node);
 365                 return ORTE_ERROR;
 366             }
 367             if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
 368                 /* multiple definitions were given for the
 369                  * slot count - this is not allowed
 370                  */
 371                 orte_show_help("help-hostfile.txt", "slots-given",
 372                                true,
 373                                cur_hostfile_name, node->name);
 374                 opal_list_remove_item(updates, &node->super);
 375                 OBJ_RELEASE(node);
 376                 return ORTE_ERROR;
 377             }
 378             node->slots = rc;
 379             ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
 380 
 381             /* Ensure that slots_max >= slots */
 382             if (node->slots_max != 0 && node->slots_max < node->slots) {
 383                 node->slots_max = node->slots;
 384             }
 385             break;
 386 
 387         case ORTE_HOSTFILE_SLOTS_MAX:
 388             rc = hostfile_parse_int();
 389             if (rc < 0) {
 390                 orte_show_help("help-hostfile.txt", "max_slots",
 391                                true,
 392                                cur_hostfile_name, ((size_t) rc));
 393                 opal_list_remove_item(updates, &node->super);
 394                 OBJ_RELEASE(node);
 395                 return ORTE_ERROR;
 396             }
 397             /* Only take this update if it puts us >= node_slots */
 398             if (rc >= node->slots) {
 399                 if (node->slots_max != rc) {
 400                     node->slots_max = rc;
 401                     got_max = true;
 402                 }
 403             } else {
 404                 orte_show_help("help-hostfile.txt", "max_slots_lt",
 405                                true,
 406                                cur_hostfile_name, node->slots, rc);
 407                 ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 408                 opal_list_remove_item(updates, &node->super);
 409                 OBJ_RELEASE(node);
 410                 return ORTE_ERROR;
 411             }
 412             break;
 413 
 414         case ORTE_HOSTFILE_STRING:
 415         case ORTE_HOSTFILE_INT:
 416             /* just ignore it */
 417             break;
 418 
 419         default:
 420             hostfile_parse_error(token);
 421             opal_list_remove_item(updates, &node->super);
 422             OBJ_RELEASE(node);
 423             return ORTE_ERROR;
 424         }
 425         if (number_of_slots > node->slots) {
 426             ORTE_ERROR_LOG(ORTE_ERR_BAD_PARAM);
 427             opal_list_remove_item(updates, &node->super);
 428             OBJ_RELEASE(node);
 429             return ORTE_ERROR;
 430         }
 431     }
 432 
 433  done:
 434     if (got_max && !ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
 435         node->slots = node->slots_max;
 436         ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
 437     }
 438 
 439     return ORTE_SUCCESS;
 440 }
 441 
 442 
 443 /**
 444  * Parse the specified file into a node list.
 445  */
 446 
 447 static int hostfile_parse(const char *hostfile, opal_list_t* updates,
 448                           opal_list_t* exclude, bool keep_all)
 449 {
 450     int token;
 451     int rc = ORTE_SUCCESS;
 452 
 453 
 454     cur_hostfile_name = hostfile;
 455 
 456     orte_util_hostfile_done = false;
 457     orte_util_hostfile_in = fopen(hostfile, "r");
 458     if (NULL == orte_util_hostfile_in) {
 459         if (NULL == orte_default_hostfile ||
 460             0 != strcmp(orte_default_hostfile, hostfile)) {
 461             /* not the default hostfile, so not finding it
 462              * is an error
 463              */
 464             orte_show_help("help-hostfile.txt", "no-hostfile", true, hostfile);
 465             rc = ORTE_ERR_SILENT;
 466             goto unlock;
 467         }
 468         /* if this is the default hostfile and it was given,
 469          * then it's an error
 470          */
 471         if (orte_default_hostfile_given) {
 472             orte_show_help("help-hostfile.txt", "no-hostfile", true, hostfile);
 473             rc = ORTE_ERR_NOT_FOUND;
 474             goto unlock;
 475         }
 476         /* otherwise, not finding it is okay */
 477         rc = ORTE_SUCCESS;
 478         goto unlock;
 479     }
 480 
 481     while (!orte_util_hostfile_done) {
 482         token = orte_util_hostfile_lex();
 483 
 484         switch (token) {
 485         case ORTE_HOSTFILE_DONE:
 486             orte_util_hostfile_done = true;
 487             break;
 488 
 489         case ORTE_HOSTFILE_NEWLINE:
 490             break;
 491 
 492         /*
 493          * This looks odd, since we have several forms of host-definitions:
 494          *   hostname              just plain as it is, being a ORTE_HOSTFILE_STRING
 495          *   IP4s and user@IPv4s
 496          *   hostname.domain and user@hostname.domain
 497          */
 498         case ORTE_HOSTFILE_STRING:
 499         case ORTE_HOSTFILE_INT:
 500         case ORTE_HOSTFILE_HOSTNAME:
 501         case ORTE_HOSTFILE_IPV4:
 502         case ORTE_HOSTFILE_IPV6:
 503         case ORTE_HOSTFILE_RELATIVE:
 504         case ORTE_HOSTFILE_RANK:
 505             rc = hostfile_parse_line(token, updates, exclude, keep_all);
 506             if (ORTE_SUCCESS != rc) {
 507                 goto unlock;
 508             }
 509             break;
 510 
 511         default:
 512             hostfile_parse_error(token);
 513             goto unlock;
 514         }
 515     }
 516     fclose(orte_util_hostfile_in);
 517     orte_util_hostfile_in = NULL;
 518     orte_util_hostfile_lex_destroy();
 519 
 520 unlock:
 521     cur_hostfile_name = NULL;
 522 
 523     return rc;
 524 }
 525 
 526 
 527 /**
 528  * Parse the provided hostfile and add the nodes to the list.
 529  */
 530 
 531 int orte_util_add_hostfile_nodes(opal_list_t *nodes,
 532                                  char *hostfile)
 533 {
 534     opal_list_t exclude, adds;
 535     opal_list_item_t *item, *itm;
 536     int rc;
 537     orte_node_t *nd, *node;
 538     bool found;
 539 
 540     OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 541                          "%s hostfile: checking hostfile %s for nodes",
 542                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
 543 
 544     OBJ_CONSTRUCT(&exclude, opal_list_t);
 545     OBJ_CONSTRUCT(&adds, opal_list_t);
 546 
 547     /* parse the hostfile and add any new contents to the list */
 548     if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &adds, &exclude, false))) {
 549         goto cleanup;
 550     }
 551 
 552     /* check for any relative node directives */
 553     for (item = opal_list_get_first(&adds);
 554          item != opal_list_get_end(&adds);
 555          item = opal_list_get_next(item)) {
 556         node=(orte_node_t*)item;
 557 
 558         if ('+' == node->name[0]) {
 559             orte_show_help("help-hostfile.txt", "hostfile:relative-syntax",
 560                            true, node->name);
 561             rc = ORTE_ERR_SILENT;
 562             goto cleanup;
 563         }
 564     }
 565 
 566     /* remove from the list of nodes those that are in the exclude list */
 567     while (NULL != (item = opal_list_remove_first(&exclude))) {
 568         nd = (orte_node_t*)item;
 569         /* check for matches on nodes */
 570         for (itm = opal_list_get_first(&adds);
 571              itm != opal_list_get_end(&adds);
 572              itm = opal_list_get_next(itm)) {
 573             node = (orte_node_t*)itm;
 574             if (0 == strcmp(nd->name, node->name)) {
 575                 /* match - remove it */
 576                 opal_list_remove_item(&adds, itm);
 577                 OBJ_RELEASE(itm);
 578                 break;
 579             }
 580         }
 581         OBJ_RELEASE(item);
 582     }
 583 
 584     /* transfer across all unique nodes */
 585     while (NULL != (item = opal_list_remove_first(&adds))) {
 586         nd = (orte_node_t*)item;
 587         found = false;
 588         for (itm = opal_list_get_first(nodes);
 589              itm != opal_list_get_end(nodes);
 590              itm = opal_list_get_next(itm)) {
 591             node = (orte_node_t*)itm;
 592             if (0 == strcmp(nd->name, node->name)) {
 593                 found = true;
 594                 break;
 595             }
 596         }
 597         if (!found) {
 598             opal_list_append(nodes, &nd->super);
 599             OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 600                                  "%s hostfile: adding node %s slots %d",
 601                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), nd->name, nd->slots));
 602         } else {
 603             OBJ_RELEASE(item);
 604         }
 605     }
 606 
 607 cleanup:
 608     OPAL_LIST_DESTRUCT(&exclude);
 609     OPAL_LIST_DESTRUCT(&adds);
 610 
 611     return rc;
 612 }
 613 
 614 /* Parse the provided hostfile and filter the nodes that are
 615  * on the input list, removing those that
 616  * are not found in the hostfile
 617  */
 618 int orte_util_filter_hostfile_nodes(opal_list_t *nodes,
 619                                     char *hostfile,
 620                                     bool remove)
 621 {
 622     opal_list_t newnodes, exclude;
 623     opal_list_item_t *item1, *item2, *next, *item3;
 624     orte_node_t *node_from_list, *node_from_file, *node_from_pool, *node3;
 625     int rc = ORTE_SUCCESS;
 626     char *cptr;
 627     int num_empty, nodeidx;
 628     bool want_all_empty = false;
 629     opal_list_t keep;
 630     bool found;
 631 
 632     OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 633                         "%s hostfile: filtering nodes through hostfile %s",
 634                         ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
 635 
 636     /* parse the hostfile and create local list of findings */
 637     OBJ_CONSTRUCT(&newnodes, opal_list_t);
 638     OBJ_CONSTRUCT(&exclude, opal_list_t);
 639     if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, &newnodes, &exclude, false))) {
 640         OBJ_DESTRUCT(&newnodes);
 641         OBJ_DESTRUCT(&exclude);
 642         return rc;
 643     }
 644 
 645     /* if the hostfile was empty, then treat it as a no-op filter */
 646     if (0 == opal_list_get_size(&newnodes)) {
 647         OBJ_DESTRUCT(&newnodes);
 648         OBJ_DESTRUCT(&exclude);
 649         /* indicate that the hostfile was empty */
 650         return ORTE_ERR_TAKE_NEXT_OPTION;
 651     }
 652 
 653     /* remove from the list of newnodes those that are in the exclude list
 654      * since we could have added duplicate names above due to the */
 655     while (NULL != (item1 = opal_list_remove_first(&exclude))) {
 656         node_from_file = (orte_node_t*)item1;
 657         /* check for matches on nodes */
 658         for (item2 = opal_list_get_first(&newnodes);
 659              item2 != opal_list_get_end(&newnodes);
 660              item2 = opal_list_get_next(item2)) {
 661             orte_node_t *node = (orte_node_t*)item2;
 662             if (0 == strcmp(node_from_file->name, node->name)) {
 663                 /* match - remove it */
 664                 opal_list_remove_item(&newnodes, item2);
 665                 OBJ_RELEASE(item2);
 666                 break;
 667             }
 668         }
 669         OBJ_RELEASE(item1);
 670     }
 671 
 672     /* now check our nodes and keep or mark those that match. We can
 673      * destruct our hostfile list as we go since this won't be needed
 674      */
 675     OBJ_CONSTRUCT(&keep, opal_list_t);
 676     while (NULL != (item2 = opal_list_remove_first(&newnodes))) {
 677         node_from_file = (orte_node_t*)item2;
 678 
 679         next = opal_list_get_next(item2);
 680 
 681         /* see if this is a relative node syntax */
 682         if ('+' == node_from_file->name[0]) {
 683             /* see if we specified empty nodes */
 684             if ('e' == node_from_file->name[1] ||
 685                 'E' == node_from_file->name[1]) {
 686                 /* request for empty nodes - do they want
 687                  * all of them?
 688                  */
 689                 if (NULL != (cptr = strchr(node_from_file->name, ':'))) {
 690                     /* the colon indicates a specific # are requested */
 691                     cptr++; /* step past : */
 692                     num_empty = strtol(cptr, NULL, 10);
 693                 } else {
 694                     /* want them all - set num_empty to max */
 695                     num_empty = INT_MAX;
 696                     want_all_empty = true;
 697                 }
 698                 /* search the list of nodes provided to us and find those
 699                  * that are empty
 700                  */
 701                 item1 = opal_list_get_first(nodes);
 702                 while (0 < num_empty && item1 != opal_list_get_end(nodes)) {
 703                     node_from_list = (orte_node_t*)item1;
 704                     next = opal_list_get_next(item1);  /* keep our place */
 705                     if (0 == node_from_list->slots_inuse) {
 706                         /* check to see if this node is explicitly called
 707                          * out later - if so, don't use it here
 708                          */
 709                         for (item3 = opal_list_get_first(&newnodes);
 710                              item3 != opal_list_get_end(&newnodes);
 711                              item3 = opal_list_get_next(item3)) {
 712                             node3 = (orte_node_t*)item3;
 713                             if (0 == strcmp(node3->name, node_from_list->name)) {
 714                                 /* match - don't use it */
 715                                 goto skipnode;
 716                             }
 717                         }
 718                         if (remove) {
 719                             /* remove item from list */
 720                             opal_list_remove_item(nodes, item1);
 721                             /* xfer to keep list */
 722                             opal_list_append(&keep, item1);
 723                         } else {
 724                             /* mark as included */
 725                             ORTE_FLAG_SET(node_from_list, ORTE_NODE_FLAG_MAPPED);
 726                         }
 727                         --num_empty;
 728                     }
 729                 skipnode:
 730                     item1 = next;
 731                 }
 732                 /* did they get everything they wanted? */
 733                 if (!want_all_empty && 0 < num_empty) {
 734                     orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
 735                                    true, num_empty);
 736                     rc = ORTE_ERR_SILENT;
 737                     goto cleanup;
 738                 }
 739             } else if ('n' == node_from_file->name[1] ||
 740                        'N' == node_from_file->name[1]) {
 741                 /* they want a specific relative node #, so
 742                  * look it up on global pool
 743                  */
 744                 nodeidx = strtol(&node_from_file->name[2], NULL, 10);
 745                 if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
 746                     /* this is an error */
 747                     orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
 748                                    true, nodeidx, node_from_file->name);
 749                     rc = ORTE_ERR_SILENT;
 750                     goto cleanup;
 751                 }
 752                 /* search the list of nodes provided to us and find it */
 753                 for (item1 = opal_list_get_first(nodes);
 754                      item1 != opal_list_get_end(nodes);
 755                      item1 = opal_list_get_next(nodes)) {
 756                     node_from_list = (orte_node_t*)item1;
 757                     if (0 == strcmp(node_from_list->name, node_from_pool->name)) {
 758                         if (remove) {
 759                             /* match - remove item from list */
 760                             opal_list_remove_item(nodes, item1);
 761                             /* xfer to keep list */
 762                             opal_list_append(&keep, item1);
 763                         } else {
 764                             /* mark as included */
 765                             ORTE_FLAG_SET(node_from_list, ORTE_NODE_FLAG_MAPPED);
 766                         }
 767                         break;
 768                     }
 769                 }
 770             } else {
 771                 /* invalid relative node syntax */
 772                 orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
 773                                true, node_from_file->name);
 774                 rc = ORTE_ERR_SILENT;
 775                 goto cleanup;
 776             }
 777         } else {
 778             /* we are looking for a specific node on the list
 779              * search the provided list of nodes to see if this
 780              * one is found
 781              */
 782             found = false;
 783             for (item1 = opal_list_get_first(nodes);
 784                  item1 != opal_list_get_end(nodes);
 785                  item1 = opal_list_get_next(item1)) {
 786                 node_from_list = (orte_node_t*)item1;
 787                 /* we have converted all aliases for ourself
 788                  * to our own detected nodename, so no need
 789                  * to check for interfaces again - a simple
 790                  * strcmp will suffice */
 791                 if (0 == strcmp(node_from_file->name, node_from_list->name)) {
 792                     /* if the slot count here is less than the
 793                      * total slots avail on this node, set it
 794                      * to the specified count - this allows people
 795                      * to subdivide an allocation
 796                      */
 797                     if (ORTE_FLAG_TEST(node_from_file, ORTE_NODE_FLAG_SLOTS_GIVEN) &&
 798                         node_from_file->slots < node_from_list->slots) {
 799                         node_from_list->slots = node_from_file->slots;
 800                     }
 801                     if (remove) {
 802                         /* remove the node from the list */
 803                         opal_list_remove_item(nodes, item1);
 804                         /* xfer it to keep list */
 805                         opal_list_append(&keep, item1);
 806                     } else {
 807                         /* mark as included */
 808                         ORTE_FLAG_SET(node_from_list, ORTE_NODE_FLAG_MAPPED);
 809                     }
 810                     found = true;
 811                     break;
 812                 }
 813             }
 814             /* if the host in the newnode list wasn't found,
 815              * then that is an error we need to report to the
 816              * user and abort
 817              */
 818             if (!found) {
 819                 orte_show_help("help-hostfile.txt", "hostfile:extra-node-not-found",
 820                                true, hostfile, node_from_file->name);
 821                 rc = ORTE_ERR_SILENT;
 822                 goto cleanup;
 823             }
 824         }
 825         /* cleanup the newnode list */
 826         OBJ_RELEASE(item2);
 827     }
 828 
 829     /* if we still have entries on our hostfile list, then
 830      * there were requested hosts that were not in our allocation.
 831      * This is an error - report it to the user and return an error
 832      */
 833     if (0 != opal_list_get_size(&newnodes)) {
 834         orte_show_help("help-hostfile.txt", "not-all-mapped-alloc",
 835                        true, hostfile);
 836         while (NULL != (item1 = opal_list_remove_first(&newnodes))) {
 837             OBJ_RELEASE(item1);
 838         }
 839         OBJ_DESTRUCT(&newnodes);
 840         return ORTE_ERR_SILENT;
 841     }
 842 
 843     if (!remove) {
 844         /* all done */
 845         OBJ_DESTRUCT(&newnodes);
 846         return ORTE_SUCCESS;
 847     }
 848 
 849     /* clear the rest of the nodes list */
 850     while (NULL != (item1 = opal_list_remove_first(nodes))) {
 851         OBJ_RELEASE(item1);
 852     }
 853 
 854     /* the nodes list has been cleared - rebuild it in order */
 855     while (NULL != (item1 = opal_list_remove_first(&keep))) {
 856         opal_list_append(nodes, item1);
 857     }
 858 
 859 cleanup:
 860     OBJ_DESTRUCT(&newnodes);
 861 
 862     return rc;
 863 }
 864 
 865 int orte_util_get_ordered_host_list(opal_list_t *nodes,
 866                                     char *hostfile)
 867 {
 868     opal_list_t exclude;
 869     opal_list_item_t *item, *itm, *item2, *item1;
 870     char *cptr;
 871     int num_empty, i, nodeidx, startempty=0;
 872     bool want_all_empty=false;
 873     orte_node_t *node_from_pool, *newnode;
 874     int rc;
 875 
 876     OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
 877                          "%s hostfile: creating ordered list of hosts from hostfile %s",
 878                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostfile));
 879 
 880     OBJ_CONSTRUCT(&exclude, opal_list_t);
 881 
 882     /* parse the hostfile and add the contents to the list, keeping duplicates */
 883     if (ORTE_SUCCESS != (rc = hostfile_parse(hostfile, nodes, &exclude, true))) {
 884         goto cleanup;
 885     }
 886 
 887     /* parse the nodes to process any relative node directives */
 888     item2 = opal_list_get_first(nodes);
 889     while (item2 != opal_list_get_end(nodes)) {
 890         orte_node_t *node=(orte_node_t*)item2;
 891 
 892         /* save the next location in case this one gets removed */
 893         item1 = opal_list_get_next(item2);
 894 
 895         if ('+' != node->name[0]) {
 896             item2 = item1;
 897             continue;
 898         }
 899 
 900         /* see if we specified empty nodes */
 901         if ('e' == node->name[1] ||
 902             'E' == node->name[1]) {
 903             /* request for empty nodes - do they want
 904              * all of them?
 905              */
 906             if (NULL != (cptr = strchr(node->name, ':'))) {
 907                 /* the colon indicates a specific # are requested */
 908                 cptr++; /* step past : */
 909                 num_empty = strtol(cptr, NULL, 10);
 910             } else {
 911                 /* want them all - set num_empty to max */
 912                 num_empty = INT_MAX;
 913                 want_all_empty = true;
 914             }
 915             /* insert empty nodes into newnodes list in place of the current item.
 916              * since item1 is the next item, we insert in front of it
 917              */
 918             if (!orte_hnp_is_allocated && 0 == startempty) {
 919                startempty = 1;
 920             }
 921             for (i=startempty; 0 < num_empty && i < orte_node_pool->size; i++) {
 922                 if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, i))) {
 923                     continue;
 924                 }
 925                 if (0 == node_from_pool->slots_inuse) {
 926                     newnode = OBJ_NEW(orte_node_t);
 927                     newnode->name = strdup(node_from_pool->name);
 928                     /* if the slot count here is less than the
 929                      * total slots avail on this node, set it
 930                      * to the specified count - this allows people
 931                      * to subdivide an allocation
 932                      */
 933                     if (node->slots < node_from_pool->slots) {
 934                         newnode->slots = node->slots;
 935                     } else {
 936                         newnode->slots = node_from_pool->slots;
 937                     }
 938                     opal_list_insert_pos(nodes, item1, &newnode->super);
 939                     /* track number added */
 940                     --num_empty;
 941                 }
 942             }
 943             /* bookmark where we stopped in case they ask for more */
 944             startempty = i;
 945             /* did they get everything they wanted? */
 946             if (!want_all_empty && 0 < num_empty) {
 947                 orte_show_help("help-hostfile.txt", "hostfile:not-enough-empty",
 948                                true, num_empty);
 949                 rc = ORTE_ERR_SILENT;
 950                 goto cleanup;
 951             }
 952             /* since we have expanded the provided node, remove
 953              * it from list
 954              */
 955             opal_list_remove_item(nodes, item2);
 956             OBJ_RELEASE(item2);
 957         } else if ('n' == node->name[1] ||
 958                    'N' == node->name[1]) {
 959             /* they want a specific relative node #, so
 960              * look it up on global pool
 961              */
 962             nodeidx = strtol(&node->name[2], NULL, 10);
 963             /* if the HNP is not allocated, then we need to
 964              * adjust the index as the node pool is offset
 965              * by one
 966              */
 967             if (!orte_hnp_is_allocated) {
 968                 nodeidx++;
 969             }
 970             /* see if that location is filled */
 971             if (NULL == (node_from_pool = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, nodeidx))) {
 972                 /* this is an error */
 973                 orte_show_help("help-hostfile.txt", "hostfile:relative-node-not-found",
 974                                true, nodeidx, node->name);
 975                 rc = ORTE_ERR_SILENT;
 976                 goto cleanup;
 977             }
 978             /* create the node object */
 979             newnode = OBJ_NEW(orte_node_t);
 980             newnode->name = strdup(node_from_pool->name);
 981             /* if the slot count here is less than the
 982              * total slots avail on this node, set it
 983              * to the specified count - this allows people
 984              * to subdivide an allocation
 985              */
 986             if (node->slots < node_from_pool->slots) {
 987                 newnode->slots = node->slots;
 988             } else {
 989                 newnode->slots = node_from_pool->slots;
 990             }
 991             /* insert it before item1 */
 992             opal_list_insert_pos(nodes, item1, &newnode->super);
 993             /* since we have expanded the provided node, remove
 994              * it from list
 995              */
 996             opal_list_remove_item(nodes, item2);
 997             OBJ_RELEASE(item2);
 998         } else {
 999             /* invalid relative node syntax */
1000             orte_show_help("help-hostfile.txt", "hostfile:invalid-relative-node-syntax",
1001                            true, node->name);
1002             rc = ORTE_ERR_SILENT;
1003             goto cleanup;
1004         }
1005 
1006         /* move to next */
1007         item2 = item1;
1008     }
1009 
1010     /* remove from the list of nodes those that are in the exclude list */
1011     while(NULL != (item = opal_list_remove_first(&exclude))) {
1012         orte_node_t *exnode = (orte_node_t*)item;
1013         /* check for matches on nodes */
1014         for (itm = opal_list_get_first(nodes);
1015              itm != opal_list_get_end(nodes);
1016              itm = opal_list_get_next(itm)) {
1017             orte_node_t *node=(orte_node_t*)itm;
1018             if (0 == strcmp(exnode->name, node->name)) {
1019                 /* match - remove it */
1020                 opal_list_remove_item(nodes, itm);
1021                 OBJ_RELEASE(itm);
1022                 /* have to cycle through the entire list as we could
1023                  * have duplicates
1024                  */
1025             }
1026         }
1027         OBJ_RELEASE(item);
1028     }
1029 
1030 cleanup:
1031     OBJ_DESTRUCT(&exclude);
1032 
1033     return rc;
1034 }

/* [<][>][^][v][top][bottom][index][help] */