root/orte/mca/rmaps/round_robin/rmaps_rr.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_rmaps_rr_map
  2. orte_rmaps_rr_assign_locations

   1 /*
   2  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2006 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2006-2013 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
  14  *                         All rights reserved.
  15  * Copyright (c) 2014-2017 Intel, Inc.  All rights reserved.
  16  * Copyright (c) 2017      Research Organization for Information Science
  17  *                         and Technology (RIST). All rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 #include "orte_config.h"
  26 #include "orte/constants.h"
  27 #include "orte/types.h"
  28 
  29 #include <errno.h>
  30 #ifdef HAVE_UNISTD_H
  31 #include <unistd.h>
  32 #endif  /* HAVE_UNISTD_H */
  33 #include <string.h>
  34 
  35 #include "orte/util/show_help.h"
  36 #include "orte/mca/errmgr/errmgr.h"
  37 #include "orte/util/error_strings.h"
  38 
  39 #include "orte/mca/rmaps/base/rmaps_private.h"
  40 #include "orte/mca/rmaps/base/base.h"
  41 #include "rmaps_rr.h"
  42 
  43 /*
  44  * Create a round-robin mapping for the job.
  45  */
  46 static int orte_rmaps_rr_map(orte_job_t *jdata)
  47 {
  48     orte_app_context_t *app;
  49     int i;
  50     opal_list_t node_list;
  51     opal_list_item_t *item;
  52     orte_std_cntr_t num_slots;
  53     int rc;
  54     mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
  55     bool initial_map=true;
  56 
  57     /* this mapper can only handle initial launch
  58      * when rr mapping is desired - allow
  59      * restarting of failed apps
  60      */
  61     if (ORTE_FLAG_TEST(jdata, ORTE_JOB_FLAG_RESTART)) {
  62         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  63                             "mca:rmaps:rr: job %s is being restarted - rr cannot map",
  64                             ORTE_JOBID_PRINT(jdata->jobid));
  65         return ORTE_ERR_TAKE_NEXT_OPTION;
  66     }
  67     if (NULL != jdata->map->req_mapper &&
  68         0 != strcasecmp(jdata->map->req_mapper, c->mca_component_name)) {
  69         /* a mapper has been specified, and it isn't me */
  70         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  71                             "mca:rmaps:rr: job %s not using rr mapper",
  72                             ORTE_JOBID_PRINT(jdata->jobid));
  73         return ORTE_ERR_TAKE_NEXT_OPTION;
  74     }
  75     if (ORTE_MAPPING_RR < ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
  76         /* I don't know how to do these - defer */
  77         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  78                             "mca:rmaps:rr: job %s not using rr mapper",
  79                             ORTE_JOBID_PRINT(jdata->jobid));
  80         return ORTE_ERR_TAKE_NEXT_OPTION;
  81     }
  82 
  83     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  84                         "mca:rmaps:rr: mapping job %s",
  85                         ORTE_JOBID_PRINT(jdata->jobid));
  86 
  87     /* flag that I did the mapping */
  88     if (NULL != jdata->map->last_mapper) {
  89         free(jdata->map->last_mapper);
  90     }
  91     jdata->map->last_mapper = strdup(c->mca_component_name);
  92 
  93     /* start at the beginning... */
  94     jdata->num_procs = 0;
  95 
  96     /* cycle through the app_contexts, mapping them sequentially */
  97     for(i=0; i < jdata->apps->size; i++) {
  98         hwloc_obj_type_t target;
  99         unsigned cache_level;
 100         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 101             continue;
 102         }
 103 
 104         /* setup the nodelist here in case we jump to error */
 105         OBJ_CONSTRUCT(&node_list, opal_list_t);
 106 
 107         /* if the number of processes wasn't specified, then we know there can be only
 108          * one app_context allowed in the launch, and that we are to launch it across
 109          * all available slots. We'll double-check the single app_context rule first
 110          */
 111         if (0 == app->num_procs && 1 < jdata->num_apps) {
 112             orte_show_help("help-orte-rmaps-rr.txt", "orte-rmaps-rr:multi-apps-and-zero-np",
 113                            true, jdata->num_apps, NULL);
 114             rc = ORTE_ERR_SILENT;
 115             goto error;
 116         }
 117 
 118         /* for each app_context, we have to get the list of nodes that it can
 119          * use since that can now be modified with a hostfile and/or -host
 120          * option
 121          */
 122         if(ORTE_SUCCESS != (rc = orte_rmaps_base_get_target_nodes(&node_list, &num_slots, app,
 123                                                                   jdata->map->mapping, initial_map, false))) {
 124             ORTE_ERROR_LOG(rc);
 125             goto error;
 126         }
 127         /* flag that all subsequent requests should not reset the node->mapped flag */
 128         initial_map = false;
 129 
 130         /* if a bookmark exists from some prior mapping, set us to start there */
 131         jdata->bookmark = orte_rmaps_base_get_starting_point(&node_list, jdata);
 132 
 133         if (0 == app->num_procs) {
 134             /* set the num_procs to equal the number of slots on these
 135              * mapped nodes, taking into account the number of cpus/rank
 136              */
 137             app->num_procs = num_slots;
 138             /* sometimes, we have only one "slot" assigned, but may
 139              * want more than one cpu/rank - so ensure we always wind
 140              * up with at least one proc */
 141             if (0 == app->num_procs) {
 142                 app->num_procs = 1;
 143             }
 144         }
 145 
 146         /* Make assignments */
 147         if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 148             rc = orte_rmaps_rr_bynode(jdata, app, &node_list, num_slots,
 149                                       app->num_procs);
 150         } else if (ORTE_MAPPING_BYSLOT == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 151             rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 152                                       app->num_procs);
 153         } else if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 154             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
 155                                      app->num_procs, HWLOC_OBJ_PU, 0);
 156             if (ORTE_ERR_NOT_FOUND == rc) {
 157                 /* if the mapper couldn't map by this object because
 158                  * it isn't available, but the error allows us to try
 159                  * byslot, then do so
 160                  */
 161                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 162                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 163                                           app->num_procs);
 164             }
 165         } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 166             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
 167                                      app->num_procs, HWLOC_OBJ_CORE, 0);
 168             if (ORTE_ERR_NOT_FOUND == rc) {
 169                 /* if the mapper couldn't map by this object because
 170                  * it isn't available, but the error allows us to try
 171                  * byslot, then do so
 172                  */
 173                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 174                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 175                                           app->num_procs);
 176             }
 177         } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 178             OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
 179             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs,
 180                                      target, cache_level);
 181             if (ORTE_ERR_NOT_FOUND == rc) {
 182                 /* if the mapper couldn't map by this object because
 183                  * it isn't available, but the error allows us to try
 184                  * byslot, then do so
 185                  */
 186                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 187                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 188                                           app->num_procs);
 189             }
 190         } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 191             OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
 192             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs,
 193                                      target, cache_level);
 194             if (ORTE_ERR_NOT_FOUND == rc) {
 195                 /* if the mapper couldn't map by this object because
 196                  * it isn't available, but the error allows us to try
 197                  * byslot, then do so
 198                  */
 199                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 200                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 201                                           app->num_procs);
 202             }
 203         } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 204             OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
 205             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots, app->num_procs,
 206                                      target, cache_level);
 207             if (ORTE_ERR_NOT_FOUND == rc) {
 208                 /* if the mapper couldn't map by this object because
 209                  * it isn't available, but the error allows us to try
 210                  * byslot, then do so
 211                  */
 212                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 213                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 214                                           app->num_procs);
 215             }
 216         } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 217             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
 218                                      app->num_procs, HWLOC_OBJ_SOCKET, 0);
 219             if (ORTE_ERR_NOT_FOUND == rc) {
 220                 /* if the mapper couldn't map by this object because
 221                  * it isn't available, but the error allows us to try
 222                  * byslot, then do so
 223                  */
 224                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 225                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 226                                           app->num_procs);
 227             }
 228         } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 229             rc = orte_rmaps_rr_byobj(jdata, app, &node_list, num_slots,
 230                                      app->num_procs, HWLOC_OBJ_NODE, 0);
 231             if (ORTE_ERR_NOT_FOUND == rc) {
 232                 /* if the mapper couldn't map by this object because
 233                  * it isn't available, but the error allows us to try
 234                  * byslot, then do so
 235                  */
 236                 ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 237                 rc = orte_rmaps_rr_byslot(jdata, app, &node_list, num_slots,
 238                                           app->num_procs);
 239             }
 240         } else {
 241             /* unrecognized mapping directive */
 242             orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy",
 243                            true, "mapping",
 244                            orte_rmaps_base_print_mapping(jdata->map->mapping));
 245             rc = ORTE_ERR_SILENT;
 246             goto error;
 247         }
 248         if (ORTE_SUCCESS != rc) {
 249             ORTE_ERROR_LOG(rc);
 250             goto error;
 251         }
 252 
 253         /* track the total number of processes we mapped - must update
 254          * this value AFTER we compute vpids so that computation
 255          * is done correctly
 256          */
 257         jdata->num_procs += app->num_procs;
 258 
 259         /* cleanup the node list - it can differ from one app_context
 260          * to another, so we have to get it every time
 261          */
 262         while (NULL != (item = opal_list_remove_first(&node_list))) {
 263             OBJ_RELEASE(item);
 264         }
 265         OBJ_DESTRUCT(&node_list);
 266     }
 267 
 268     return ORTE_SUCCESS;
 269 
 270  error:
 271     while(NULL != (item = opal_list_remove_first(&node_list))) {
 272         OBJ_RELEASE(item);
 273     }
 274     OBJ_DESTRUCT(&node_list);
 275 
 276     return rc;
 277 }
 278 
 279 static int orte_rmaps_rr_assign_locations(orte_job_t *jdata)
 280 {
 281     mca_base_component_t *c = &mca_rmaps_round_robin_component.base_version;
 282     hwloc_obj_type_t target;
 283     unsigned cache_level;
 284     int rc;
 285 
 286     if (NULL == jdata->map->last_mapper ||
 287         0 != strcasecmp(jdata->map->last_mapper, c->mca_component_name)) {
 288         /* a mapper has been specified, and it isn't me */
 289         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 290                             "mca:rmaps:rr: job %s not using rr mapper",
 291                             ORTE_JOBID_PRINT(jdata->jobid));
 292         return ORTE_ERR_TAKE_NEXT_OPTION;
 293     }
 294 
 295     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 296                         "mca:rmaps:rr: assign locations for job %s",
 297                         ORTE_JOBID_PRINT(jdata->jobid));
 298 
 299     /* if the mapping directive was byslot or bynode, then we
 300      * assign locations to the root object level */
 301     if (ORTE_MAPPING_BYNODE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping) ||
 302         ORTE_MAPPING_BYSLOT == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 303         return orte_rmaps_rr_assign_root_level(jdata);
 304     }
 305 
 306     /* otherwise, assign by object */
 307     if (ORTE_MAPPING_BYHWTHREAD == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 308         rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_PU, 0);
 309         if (ORTE_ERR_NOT_FOUND == rc) {
 310             /* if the mapper couldn't assign by this object because
 311              * it isn't available, but the error allows us to try
 312              * byslot, then do so
 313              */
 314             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 315             rc = orte_rmaps_rr_assign_root_level(jdata);
 316         }
 317     } else if (ORTE_MAPPING_BYCORE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 318         rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_CORE, 0);
 319         if (ORTE_ERR_NOT_FOUND == rc) {
 320             /* if the mapper couldn't map by this object because
 321              * it isn't available, but the error allows us to try
 322              * byslot, then do so
 323              */
 324             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 325             rc = orte_rmaps_rr_assign_root_level(jdata);
 326         }
 327     } else if (ORTE_MAPPING_BYL1CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 328         OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
 329         rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level);
 330         if (ORTE_ERR_NOT_FOUND == rc) {
 331             /* if the mapper couldn't map by this object because
 332              * it isn't available, but the error allows us to try
 333              * byslot, then do so
 334              */
 335             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 336             rc = orte_rmaps_rr_assign_root_level(jdata);
 337         }
 338     } else if (ORTE_MAPPING_BYL2CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 339         OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
 340         rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level);
 341         if (ORTE_ERR_NOT_FOUND == rc) {
 342             /* if the mapper couldn't map by this object because
 343              * it isn't available, but the error allows us to try
 344              * byslot, then do so
 345              */
 346             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 347             rc = orte_rmaps_rr_assign_root_level(jdata);
 348         }
 349     } else if (ORTE_MAPPING_BYL3CACHE == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 350         OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
 351         rc = orte_rmaps_rr_assign_byobj(jdata, target, cache_level);
 352         if (ORTE_ERR_NOT_FOUND == rc) {
 353             /* if the mapper couldn't map by this object because
 354              * it isn't available, but the error allows us to try
 355              * byslot, then do so
 356              */
 357             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 358             rc = orte_rmaps_rr_assign_root_level(jdata);
 359         }
 360     } else if (ORTE_MAPPING_BYSOCKET == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 361         rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_SOCKET, 0);
 362         if (ORTE_ERR_NOT_FOUND == rc) {
 363             /* if the mapper couldn't map by this object because
 364              * it isn't available, but the error allows us to try
 365              * byslot, then do so
 366              */
 367             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 368             rc = orte_rmaps_rr_assign_root_level(jdata);
 369         }
 370     } else if (ORTE_MAPPING_BYNUMA == ORTE_GET_MAPPING_POLICY(jdata->map->mapping)) {
 371         rc = orte_rmaps_rr_assign_byobj(jdata, HWLOC_OBJ_NODE, 0);
 372         if (ORTE_ERR_NOT_FOUND == rc) {
 373             /* if the mapper couldn't map by this object because
 374              * it isn't available, but the error allows us to try
 375              * byslot, then do so
 376              */
 377             ORTE_SET_MAPPING_POLICY(jdata->map->mapping, ORTE_MAPPING_BYSLOT);
 378             rc = orte_rmaps_rr_assign_root_level(jdata);
 379         }
 380     } else {
 381         /* unrecognized mapping directive */
 382         orte_show_help("help-orte-rmaps-base.txt", "unrecognized-policy",
 383                        true, "mapping",
 384                        orte_rmaps_base_print_mapping(jdata->map->mapping));
 385         rc = ORTE_ERR_SILENT;
 386     }
 387     return rc;
 388 }
 389 
 390 orte_rmaps_base_module_t orte_rmaps_round_robin_module = {
 391     .map_job = orte_rmaps_rr_map,
 392     .assign_locations = orte_rmaps_rr_assign_locations
 393 };

/* [<][>][^][v][top][bottom][index][help] */