root/orte/util/proc_info.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_proc_info
  2. orte_proc_info_finalize
  3. orte_ifislocal

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2009-2016 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2012      Los Alamos National Security, LLC.
  14  *                         All rights reserved.
  15  * Copyright (c) 2014-2017 Intel, Inc. All rights reserved.
  16  * Copyright (c) 2016      IBM Corporation.  All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "orte_config.h"
  25 #include "orte/constants.h"
  26 
  27 #include <stdio.h>
  28 #include <stdlib.h>
  29 #include <string.h>
  30 #ifdef HAVE_UNISTD_H
  31 #include <unistd.h>
  32 #endif
  33 #ifdef HAVE_SYS_TYPES_H
  34 #include <sys/types.h>
  35 #endif
  36 #include <ctype.h>
  37 
  38 #include "opal/mca/base/base.h"
  39 #include "opal/mca/base/mca_base_var.h"
  40 #include "opal/util/argv.h"
  41 #include "opal/util/if.h"
  42 #include "opal/util/net.h"
  43 #include "opal/util/output.h"
  44 #include "opal/util/proc.h"
  45 
  46 #include "orte/util/attr.h"
  47 
  48 #include "orte/util/proc_info.h"
  49 
  50 /* provide a connection to a reqd variable */
  51 extern bool orte_keep_fqdn_hostnames;
  52 
  53 #define ORTE_NAME_INVALID {ORTE_JOBID_INVALID, ORTE_VPID_INVALID}
  54 
  55 ORTE_DECLSPEC orte_proc_info_t orte_process_info = {
  56     .my_name =                         ORTE_NAME_INVALID,
  57     .my_daemon =                       ORTE_NAME_INVALID,
  58     .my_daemon_uri =                   NULL,
  59     .my_hnp =                          ORTE_NAME_INVALID,
  60     .my_hnp_uri =                      NULL,
  61     .my_parent =                       ORTE_NAME_INVALID,
  62     .hnp_pid =                         0,
  63     .app_num =                         0,
  64     .num_procs =                       1,
  65     .max_procs =                       1,
  66     .num_daemons =                     1,
  67     .num_nodes =                       1,
  68     .nodename =                        NULL,
  69     .aliases =                         NULL,
  70     .pid =                             0,
  71     .proc_type =                       ORTE_PROC_TYPE_NONE,
  72     .my_port =                         0,
  73     .num_restarts =                    0,
  74     .my_node_rank =                    ORTE_NODE_RANK_INVALID,
  75     .my_local_rank =                   ORTE_LOCAL_RANK_INVALID,
  76     .num_local_peers =                 0,
  77     .tmpdir_base =                     NULL,
  78     .top_session_dir =                 NULL,
  79     .jobfam_session_dir =              NULL,
  80     .job_session_dir =                 NULL,
  81     .proc_session_dir =                NULL,
  82     .sock_stdin =                      NULL,
  83     .sock_stdout =                     NULL,
  84     .sock_stderr =                     NULL,
  85     .cpuset =                          NULL,
  86     .app_rank =                        -1,
  87     .my_hostid =                       ORTE_VPID_INVALID
  88 };
  89 
  90 static bool init=false;
  91 static int orte_ess_node_rank;
  92 static char *orte_strip_prefix;
  93 
  94 int orte_proc_info(void)
  95 {
  96 
  97     int idx, i;
  98     char *ptr;
  99     char hostname[OPAL_MAXHOSTNAMELEN];
 100     char **prefixes;
 101     bool match;
 102 
 103     if (init) {
 104         return ORTE_SUCCESS;
 105     }
 106 
 107     init = true;
 108 
 109     OBJ_CONSTRUCT(&orte_process_info.super, opal_proc_t);
 110 
 111     orte_process_info.my_hnp_uri = NULL;
 112     mca_base_var_register ("orte", "orte", NULL, "hnp_uri",
 113                            "HNP contact info",
 114                            MCA_BASE_VAR_TYPE_STRING, NULL, 0,
 115                            MCA_BASE_VAR_FLAG_INTERNAL,
 116                            OPAL_INFO_LVL_9,
 117                            MCA_BASE_VAR_SCOPE_READONLY,
 118                            &orte_process_info.my_hnp_uri);
 119 
 120     if (NULL != orte_process_info.my_hnp_uri) {
 121         ptr = orte_process_info.my_hnp_uri;
 122         /* the uri value passed to us will have quote marks around it to protect
 123         * the value if passed on the command line. We must remove those
 124         * to have a correct uri string
 125         */
 126         if ('"' == ptr[0]) {
 127             /* if the first char is a quote, then so will the last one be */
 128             ptr[strlen(ptr)-1] = '\0';
 129             memmove (ptr, ptr + 1, strlen (ptr));
 130         }
 131     }
 132 
 133     orte_process_info.my_daemon_uri = NULL;
 134     (void) mca_base_var_register ("orte", "orte", NULL, "local_daemon_uri",
 135                                   "Daemon contact info",
 136                                   MCA_BASE_VAR_TYPE_STRING, NULL, 0,
 137                                   MCA_BASE_VAR_FLAG_INTERNAL,
 138                                   OPAL_INFO_LVL_9,
 139                                   MCA_BASE_VAR_SCOPE_READONLY,
 140                                   &orte_process_info.my_daemon_uri);
 141 
 142     if (NULL != orte_process_info.my_daemon_uri) {
 143         ptr = orte_process_info.my_daemon_uri;
 144         /* the uri value passed to us may have quote marks around it to protect
 145          * the value if passed on the command line. We must remove those
 146          * to have a correct uri string
 147          */
 148         if ('"' == ptr[0]) {
 149             /* if the first char is a quote, then so will the last one be */
 150             ptr[strlen(ptr)-1] = '\0';
 151             memmove (ptr, ptr + 1, strlen (ptr) - 1);
 152         }
 153     }
 154 
 155     orte_process_info.app_num = 0;
 156     (void) mca_base_var_register ("orte", "orte", NULL, "app_num",
 157                                   "Index of the app_context that defines this proc",
 158                                   MCA_BASE_VAR_TYPE_INT, NULL, 0,
 159                                   MCA_BASE_VAR_FLAG_INTERNAL,
 160                                   OPAL_INFO_LVL_9,
 161                                   MCA_BASE_VAR_SCOPE_READONLY,
 162                                   &orte_process_info.app_num);
 163 
 164     /* get the process id */
 165     orte_process_info.pid = getpid();
 166 
 167     /* get the nodename */
 168     gethostname(hostname, sizeof(hostname));
 169     /* add this to our list of aliases */
 170     opal_argv_append_nosize(&orte_process_info.aliases, hostname);
 171 
 172     // Strip off the FQDN if present, ignore IP addresses
 173     if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hostname) ) {
 174         if (NULL != (ptr = strchr(hostname, '.'))) {
 175             *ptr = '\0';
 176             /* add this to our list of aliases */
 177             opal_argv_append_nosize(&orte_process_info.aliases, hostname);
 178         }
 179     }
 180 
 181     orte_strip_prefix = NULL;
 182     (void) mca_base_var_register ("orte", "orte", NULL, "strip_prefix",
 183                   "Prefix(es) to match when deciding whether to strip leading characters and zeroes from "
 184                   "node names returned by daemons", MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 185                   OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 186                   &orte_strip_prefix);
 187 
 188     /* we have to strip node names here, if user directs, to ensure that
 189      * the names exchanged in the modex match the names found locally
 190      */
 191     if (NULL != orte_strip_prefix) {
 192         prefixes = opal_argv_split(orte_strip_prefix, ',');
 193         match = false;
 194         for (i=0; NULL != prefixes[i]; i++) {
 195             if (0 == strncmp(hostname, prefixes[i], strlen(prefixes[i]))) {
 196                 /* remove the prefix and leading zeroes */
 197                 idx = strlen(prefixes[i]);
 198                 while (idx < (int)strlen(hostname) &&
 199                        (hostname[idx] <= '0' || '9' < hostname[idx])) {
 200                     idx++;
 201                 }
 202                 if ((int)strlen(hostname) <= idx) {
 203                     /* there were no non-zero numbers in the name */
 204                     orte_process_info.nodename = strdup(&hostname[strlen(prefixes[i])]);
 205                 } else {
 206                     orte_process_info.nodename = strdup(&hostname[idx]);
 207                 }
 208                 /* add this to our list of aliases */
 209                 opal_argv_append_nosize(&orte_process_info.aliases, orte_process_info.nodename);
 210                 match = true;
 211                 break;
 212             }
 213         }
 214         /* if we didn't find a match, then just use the hostname as-is */
 215         if (!match) {
 216             orte_process_info.nodename = strdup(hostname);
 217         }
 218         opal_argv_free(prefixes);
 219     } else {
 220         orte_process_info.nodename = strdup(hostname);
 221     }
 222 
 223     /* add "localhost" to our list of aliases */
 224     opal_argv_append_nosize(&orte_process_info.aliases, "localhost");
 225 
 226     /* get the number of nodes in the job */
 227     orte_process_info.num_nodes = 1;
 228     (void) mca_base_var_register ("orte", "orte", NULL, "num_nodes",
 229                                   "Number of nodes in the job",
 230                                   MCA_BASE_VAR_TYPE_INT, NULL, 0,
 231                                   MCA_BASE_VAR_FLAG_INTERNAL,
 232                                   OPAL_INFO_LVL_9,
 233                                   MCA_BASE_VAR_SCOPE_READONLY,
 234                                   &orte_process_info.num_nodes);
 235 
 236     /* get the number of times this proc has restarted */
 237     orte_process_info.num_restarts = 0;
 238     (void) mca_base_var_register ("orte", "orte", NULL, "num_restarts",
 239                                   "Number of times this proc has restarted",
 240                                   MCA_BASE_VAR_TYPE_INT, NULL, 0,
 241                                   MCA_BASE_VAR_FLAG_INTERNAL,
 242                                   OPAL_INFO_LVL_9,
 243                                   MCA_BASE_VAR_SCOPE_READONLY,
 244                                   &orte_process_info.num_restarts);
 245 
 246     orte_process_info.app_rank = 0;
 247     (void) mca_base_var_register ("orte", "orte", NULL, "app_rank",
 248                                   "Rank of this proc within its app_context",
 249                                   MCA_BASE_VAR_TYPE_INT, NULL, 0,
 250                                   MCA_BASE_VAR_FLAG_INTERNAL,
 251                                   OPAL_INFO_LVL_9,
 252                                   MCA_BASE_VAR_SCOPE_READONLY,
 253                                   &orte_process_info.app_rank);
 254 
 255     /* get my node rank in case we are using static ports - this won't
 256      * be present for daemons, so don't error out if we don't have it
 257      */
 258     orte_ess_node_rank = ORTE_NODE_RANK_INVALID;
 259     (void) mca_base_var_register ("orte", "orte", NULL, "ess_node_rank", "Process node rank",
 260                                   MCA_BASE_VAR_TYPE_INT, NULL, 0,
 261                                   MCA_BASE_VAR_FLAG_INTERNAL,
 262                                   OPAL_INFO_LVL_9,
 263                                   MCA_BASE_VAR_SCOPE_CONSTANT,
 264                                   &orte_ess_node_rank);
 265     orte_process_info.my_node_rank = (orte_node_rank_t) orte_ess_node_rank;
 266 
 267     return ORTE_SUCCESS;
 268 }
 269 
 270 
 271 int orte_proc_info_finalize(void)
 272 {
 273     if (!init) {
 274         return ORTE_SUCCESS;
 275     }
 276 
 277     if (NULL != orte_process_info.tmpdir_base) {
 278         free(orte_process_info.tmpdir_base);
 279         orte_process_info.tmpdir_base = NULL;
 280     }
 281 
 282     if (NULL != orte_process_info.top_session_dir) {
 283         free(orte_process_info.top_session_dir);
 284         orte_process_info.top_session_dir = NULL;
 285     }
 286 
 287     if (NULL != orte_process_info.jobfam_session_dir) {
 288         free(orte_process_info.jobfam_session_dir);
 289         orte_process_info.jobfam_session_dir = NULL;
 290     }
 291 
 292     if (NULL != orte_process_info.job_session_dir) {
 293         free(orte_process_info.job_session_dir);
 294         orte_process_info.job_session_dir = NULL;
 295     }
 296 
 297     if (NULL != orte_process_info.proc_session_dir) {
 298         free(orte_process_info.proc_session_dir);
 299         orte_process_info.proc_session_dir = NULL;
 300     }
 301 
 302     if (NULL != orte_process_info.nodename) {
 303         free(orte_process_info.nodename);
 304         orte_process_info.nodename = NULL;
 305     }
 306 
 307     if (NULL != orte_process_info.cpuset) {
 308         free(orte_process_info.cpuset);
 309         orte_process_info.cpuset = NULL;
 310     }
 311 
 312     if (NULL != orte_process_info.sock_stdin) {
 313         free(orte_process_info.sock_stdin);
 314         orte_process_info.sock_stdin = NULL;
 315     }
 316 
 317     if (NULL != orte_process_info.sock_stdout) {
 318         free(orte_process_info.sock_stdout);
 319         orte_process_info.sock_stdout = NULL;
 320     }
 321 
 322     if (NULL != orte_process_info.sock_stderr) {
 323         free(orte_process_info.sock_stderr);
 324         orte_process_info.sock_stderr = NULL;
 325     }
 326 
 327     orte_process_info.proc_type = ORTE_PROC_TYPE_NONE;
 328 
 329     opal_argv_free(orte_process_info.aliases);
 330 
 331     init = false;
 332     return ORTE_SUCCESS;
 333 }
 334 
 335 bool orte_ifislocal(const char *hostname)
 336 {
 337     int i;
 338 
 339     /* see if it matches any of our known aliases */
 340     if (NULL != orte_process_info.aliases) {
 341         for (i=0; NULL != orte_process_info.aliases[i]; i++) {
 342             if (0 == strcmp(hostname, orte_process_info.aliases[i])) {
 343                 return true;
 344             }
 345         }
 346     }
 347 
 348     /* okay, have to resolve the address - the opal_ifislocal
 349      * function will not attempt to resolve the address if
 350      * told not to do so */
 351     if (opal_ifislocal(hostname)) {
 352         /* add this to our known aliases */
 353         opal_argv_append_nosize(&orte_process_info.aliases, hostname);
 354         return true;
 355     }
 356 
 357     /* not me */
 358     return false;
 359 }

/* [<][>][^][v][top][bottom][index][help] */