root/orte/mca/ras/simulator/ras_sim_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. allocate
  2. finalize

   1 /*
   2  * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
   3  * Copyright (c) 2012      Los Alamos National Security, LLC. All rights reserved
   4  * Copyright (c) 2015-2017 Research Organization for Information Science
   5  *                         and Technology (RIST). All rights reserved.
   6  * Copyright (c) 2015-2018 Intel, Inc.  All rights reserved.
   7  *
   8  * $COPYRIGHT$
   9  *
  10  * Additional copyrights may follow
  11  *
  12  * $HEADER$
  13  */
  14 #include "orte_config.h"
  15 #include "orte/constants.h"
  16 #include "orte/types.h"
  17 
  18 #include <unistd.h>
  19 #include <string.h>
  20 #include <ctype.h>
  21 
  22 #include "opal/class/opal_list.h"
  23 #include "opal/mca/hwloc/hwloc-internal.h"
  24 #include "opal/util/argv.h"
  25 
  26 #include "orte/mca/errmgr/errmgr.h"
  27 #include "orte/util/show_help.h"
  28 #include "orte/runtime/orte_globals.h"
  29 
  30 #include "ras_sim.h"
  31 
  32 
  33 /*
  34  * Local functions
  35  */
  36 static int allocate(orte_job_t *jdata, opal_list_t *nodes);
  37 static int finalize(void);
  38 
  39 
  40 /*
  41  * Global variable
  42  */
  43 orte_ras_base_module_t orte_ras_sim_module = {
  44     NULL,
  45     allocate,
  46     NULL,
  47     finalize
  48 };
  49 
  50 static int allocate(orte_job_t *jdata, opal_list_t *nodes)
  51 {
  52     int i, n, val, dig, num_nodes;
  53     orte_node_t *node;
  54     orte_topology_t *t;
  55     hwloc_topology_t topo;
  56     hwloc_obj_t obj;
  57     unsigned j, k;
  58     struct hwloc_topology_support *support;
  59     char **files=NULL;
  60     char **topos = NULL;
  61     bool use_local_topology = false;
  62     char **node_cnt=NULL;
  63     char **slot_cnt=NULL;
  64     char **max_slot_cnt=NULL;
  65     char *tmp;
  66     char prefix[6];
  67 
  68     node_cnt = opal_argv_split(mca_ras_simulator_component.num_nodes, ',');
  69     if (NULL != mca_ras_simulator_component.slots) {
  70         slot_cnt = opal_argv_split(mca_ras_simulator_component.slots, ',');
  71         /* backfile the slot_cnt so every topology has a cnt */
  72         tmp = slot_cnt[opal_argv_count(slot_cnt)-1];
  73         for (n=opal_argv_count(slot_cnt); n < opal_argv_count(node_cnt); n++) {
  74             opal_argv_append_nosize(&slot_cnt, tmp);
  75         }
  76     }
  77     if (NULL != mca_ras_simulator_component.slots_max) {
  78         max_slot_cnt = opal_argv_split(mca_ras_simulator_component.slots_max, ',');
  79         /* backfill the max_slot_cnt as reqd */
  80         tmp = max_slot_cnt[opal_argv_count(slot_cnt)-1];
  81         for (n=opal_argv_count(max_slot_cnt); n < opal_argv_count(max_slot_cnt); n++) {
  82             opal_argv_append_nosize(&max_slot_cnt, tmp);
  83         }
  84     }
  85 
  86     if (NULL != mca_ras_simulator_component.topofiles) {
  87         files = opal_argv_split(mca_ras_simulator_component.topofiles, ',');
  88         if (opal_argv_count(files) != opal_argv_count(node_cnt)) {
  89             orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
  90             goto error_silent;
  91         }
  92     } else if (NULL != mca_ras_simulator_component.topologies) {
  93         topos = opal_argv_split(mca_ras_simulator_component.topologies, ',');
  94         if (opal_argv_count(topos) != opal_argv_count(node_cnt)) {
  95             orte_show_help("help-ras-base.txt", "ras-sim:mismatch", true);
  96             goto error_silent;
  97         }
  98     } else {
  99         /* use our topology */
 100         use_local_topology = true;
 101     }
 102 
 103     /* setup the prefix to the node names */
 104     snprintf(prefix, 6, "nodeA");
 105 
 106     /* process the request */
 107     for (n=0; NULL != node_cnt[n]; n++) {
 108         num_nodes = strtol(node_cnt[n], NULL, 10);
 109 
 110         /* get number of digits */
 111         val = num_nodes;
 112         for (dig=0; 0 != val; dig++) {
 113             val /= 10;
 114         }
 115 
 116         /* set the prefix for this group of nodes */
 117         prefix[4] += n;
 118 
 119         /* check for topology */
 120         if (use_local_topology) {
 121             /* use our topology */
 122             t = (orte_topology_t*)opal_pointer_array_get_item(orte_node_topologies, 0);
 123         } else if (NULL != files) {
 124             if (0 != hwloc_topology_init(&topo)) {
 125                 orte_show_help("help-ras-simulator.txt",
 126                                "hwloc API fail", true,
 127                                __FILE__, __LINE__, "hwloc_topology_init");
 128                 goto error_silent;
 129             }
 130             if (0 != hwloc_topology_set_xml(topo, files[n])) {
 131                 orte_show_help("help-ras-simulator.txt",
 132                                "hwloc failed to load xml", true, files[n]);
 133                 hwloc_topology_destroy(topo);
 134                 goto error_silent;
 135             }
 136             /* since we are loading this from an external source, we have to
 137              * explicitly set a flag so hwloc sets things up correctly
 138              */
 139             if (0 != opal_hwloc_base_topology_set_flags(topo, HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM, false)) {
 140                 orte_show_help("help-ras-simulator.txt",
 141                                "hwloc API fail", true,
 142                                __FILE__, __LINE__, "hwloc_topology_set_flags");
 143                 hwloc_topology_destroy(topo);
 144                 goto error_silent;
 145             }
 146             if (0 != hwloc_topology_load(topo)) {
 147                 orte_show_help("help-ras-simulator.txt",
 148                                "hwloc API fail", true,
 149                                __FILE__, __LINE__, "hwloc_topology_load");
 150                 hwloc_topology_destroy(topo);
 151                 goto error_silent;
 152             }
 153             /* remove the hostname from the topology. Unfortunately, hwloc
 154              * decided to add the source hostname to the "topology", thus
 155              * rendering it unusable as a pure topological description. So
 156              * we remove that information here.
 157              */
 158             obj = hwloc_get_root_obj(topo);
 159             for (k=0; k < obj->infos_count; k++) {
 160                 if (NULL == obj->infos[k].name ||
 161                     NULL == obj->infos[k].value) {
 162                     continue;
 163                 }
 164                 if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
 165                     free(obj->infos[k].name);
 166                     free(obj->infos[k].value);
 167                     /* left justify the array */
 168                     for (j=k; j < obj->infos_count-1; j++) {
 169                         obj->infos[j] = obj->infos[j+1];
 170                     }
 171                     obj->infos[obj->infos_count-1].name = NULL;
 172                     obj->infos[obj->infos_count-1].value = NULL;
 173                     obj->infos_count--;
 174                     break;
 175                 }
 176             }
 177             /* unfortunately, hwloc does not include support info in its
 178              * xml output :-(( To aid in debugging, we set it here
 179              */
 180             support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
 181             support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
 182             support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
 183             /* pass it thru the filter so we create the summaries required by the mappers */
 184             if (OPAL_SUCCESS != opal_hwloc_base_filter_cpus(topo)) {
 185                 ORTE_ERROR_LOG(ORTE_ERROR);
 186             }
 187             /* add it to our array */
 188             t = OBJ_NEW(orte_topology_t);
 189             t->topo = topo;
 190             t->sig = opal_hwloc_base_get_topo_signature(topo);
 191             opal_pointer_array_add(orte_node_topologies, t);
 192         } else {
 193             if (0 != hwloc_topology_init(&topo)) {
 194                 orte_show_help("help-ras-simulator.txt",
 195                                "hwloc API fail", true,
 196                                __FILE__, __LINE__, "hwloc_topology_init");
 197                 goto error_silent;
 198             }
 199             if (0 != hwloc_topology_set_synthetic(topo, topos[n])) {
 200                 orte_show_help("help-ras-simulator.txt",
 201                                "hwloc API fail", true,
 202                                __FILE__, __LINE__, "hwloc_topology_set_synthetic");
 203                 hwloc_topology_destroy(topo);
 204                 goto error_silent;
 205             }
 206             if (0 != hwloc_topology_load(topo)) {
 207                 orte_show_help("help-ras-simulator.txt",
 208                                "hwloc API fail", true,
 209                                __FILE__, __LINE__, "hwloc_topology_load");
 210                 hwloc_topology_destroy(topo);
 211                 goto error_silent;
 212             }
 213             /* remove the hostname from the topology. Unfortunately, hwloc
 214              * decided to add the source hostname to the "topology", thus
 215              * rendering it unusable as a pure topological description. So
 216              * we remove that information here.
 217              */
 218             obj = hwloc_get_root_obj(topo);
 219             for (k=0; k < obj->infos_count; k++) {
 220                 if (NULL == obj->infos[k].name ||
 221                     NULL == obj->infos[k].value) {
 222                     continue;
 223                 }
 224                 if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
 225                     free(obj->infos[k].name);
 226                     free(obj->infos[k].value);
 227                     /* left justify the array */
 228                     for (j=k; j < obj->infos_count-1; j++) {
 229                         obj->infos[j] = obj->infos[j+1];
 230                     }
 231                     obj->infos[obj->infos_count-1].name = NULL;
 232                     obj->infos[obj->infos_count-1].value = NULL;
 233                     obj->infos_count--;
 234                     break;
 235                 }
 236             }
 237             /* unfortunately, hwloc does not include support info in its
 238              * xml output :-(( To aid in debugging, we set it here
 239              */
 240             support = (struct hwloc_topology_support*)hwloc_topology_get_support(topo);
 241             support->cpubind->set_thisproc_cpubind = mca_ras_simulator_component.have_cpubind;
 242             support->membind->set_thisproc_membind = mca_ras_simulator_component.have_membind;
 243             /* add it to our array */
 244             t = OBJ_NEW(orte_topology_t);
 245             t->topo = topo;
 246             t->sig = opal_hwloc_base_get_topo_signature(topo);
 247             opal_pointer_array_add(orte_node_topologies, t);
 248         }
 249 
 250         for (i=0; i < num_nodes; i++) {
 251             node = OBJ_NEW(orte_node_t);
 252             opal_asprintf(&node->name, "%s%0*d", prefix, dig, i);
 253             node->state = ORTE_NODE_STATE_UP;
 254             node->slots_inuse = 0;
 255             if (NULL == max_slot_cnt || NULL == max_slot_cnt[n]) {
 256                 node->slots_max = 0;
 257             } else {
 258                 obj = hwloc_get_root_obj(t->topo);
 259                 node->slots_max = opal_hwloc_base_get_npus(t->topo, obj);
 260             }
 261             if (NULL == slot_cnt || NULL == slot_cnt[n]) {
 262                 node->slots = 0;
 263             } else {
 264                 obj = hwloc_get_root_obj(t->topo);
 265                 node->slots = opal_hwloc_base_get_npus(t->topo, obj);
 266             }
 267             OBJ_RETAIN(t);
 268             node->topology = t;
 269             opal_output_verbose(1, orte_ras_base_framework.framework_output,
 270                                 "Created Node <%10s> [%3d : %3d]",
 271                                 node->name, node->slots, node->slots_max);
 272             opal_list_append(nodes, &node->super);
 273         }
 274     }
 275 
 276     /* record the number of allocated nodes */
 277     orte_num_allocated_nodes = opal_list_get_size(nodes);
 278 
 279     if (NULL != max_slot_cnt) {
 280         opal_argv_free(max_slot_cnt);
 281     }
 282     if (NULL != slot_cnt) {
 283         opal_argv_free(slot_cnt);
 284     }
 285     if (NULL != node_cnt) {
 286         opal_argv_free(node_cnt);
 287     }
 288     if (NULL != topos) {
 289         opal_argv_free(topos);
 290     }
 291     return ORTE_SUCCESS;
 292 
 293 error_silent:
 294     if (NULL != max_slot_cnt) {
 295         opal_argv_free(max_slot_cnt);
 296     }
 297     if (NULL != slot_cnt) {
 298         opal_argv_free(slot_cnt);
 299     }
 300     if (NULL != node_cnt) {
 301         opal_argv_free(node_cnt);
 302     }
 303     return ORTE_ERR_SILENT;
 304 
 305 }
 306 
 307 /*
 308  * There's really nothing to do here
 309  */
 310 static int finalize(void)
 311 {
 312     return ORTE_SUCCESS;
 313 }

/* [<][>][^][v][top][bottom][index][help] */