root/orte/mca/rmaps/round_robin/rmaps_rr_assign.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. orte_rmaps_rr_assign_root_level
  2. orte_rmaps_rr_assign_byobj

   1 /*
   2  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2009-2013 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2013-2017 Intel, Inc.  All rights reserved.
  14  * Copyright (c) 2015      Research Organization for Information Science
  15  *                         and Technology (RIST). All rights reserved.
  16  * $COPYRIGHT$
  17  *
  18  * Additional copyrights may follow
  19  *
  20  * $HEADER$
  21  */
  22 
  23 #include "orte_config.h"
  24 #include "orte/constants.h"
  25 
  26 #include <string.h>
  27 
  28 #include "opal/util/output.h"
  29 #include "opal/mca/hwloc/base/base.h"
  30 
  31 #include "orte/util/show_help.h"
  32 #include "orte/util/name_fns.h"
  33 #include "orte/runtime/orte_globals.h"
  34 #include "orte/mca/errmgr/errmgr.h"
  35 
  36 #include "orte/mca/rmaps/base/rmaps_private.h"
  37 #include "orte/mca/rmaps/base/base.h"
  38 #include "rmaps_rr.h"
  39 
  40 int orte_rmaps_rr_assign_root_level(orte_job_t *jdata)
  41 {
  42     int i, m;
  43     orte_node_t *node;
  44     orte_proc_t *proc;
  45     hwloc_obj_t obj=NULL;
  46 
  47     opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
  48                         "mca:rmaps:rr: assigning procs to root level for job %s",
  49                         ORTE_JOBID_PRINT(jdata->jobid));
  50 
  51     for (m=0; m < jdata->map->nodes->size; m++) {
  52         if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
  53             continue;
  54         }
  55         opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
  56                             "mca:rmaps:rr:slot working node %s",
  57                             node->name);
  58         /* get the root object as we are not assigning
  59          * locale here except at the node level */
  60         if (NULL == node->topology || NULL == node->topology->topo) {
  61             /* nothing we can do */
  62             continue;
  63         }
  64         obj = hwloc_get_root_obj(node->topology->topo);
  65         for (i=0; i < node->procs->size; i++) {
  66             if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, i))) {
  67                 continue;
  68             }
  69             /* ignore procs from other jobs */
  70             if (proc->name.jobid != jdata->jobid) {
  71                 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  72                                     "mca:rmaps:rr:assign skipping proc %s - from another job",
  73                                     ORTE_NAME_PRINT(&proc->name));
  74                 continue;
  75             }
  76             orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
  77         }
  78     }
  79     return ORTE_SUCCESS;
  80 }
  81 
  82 /* mapping by hwloc object looks a lot like mapping by node,
  83  * but has the added complication of possibly having different
  84  * numbers of objects on each node
  85  */
  86 int orte_rmaps_rr_assign_byobj(orte_job_t *jdata,
  87                                hwloc_obj_type_t target,
  88                                unsigned cache_level)
  89 {
  90     int start, j, m, n;
  91     orte_app_context_t *app;
  92     orte_node_t *node;
  93     orte_proc_t *proc;
  94     hwloc_obj_t obj=NULL;
  95     unsigned int nobjs;
  96 
  97     opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
  98                         "mca:rmaps:rr: assigning locations by %s for job %s",
  99                         hwloc_obj_type_string(target),
 100                         ORTE_JOBID_PRINT(jdata->jobid));
 101 
 102 
 103     /* start mapping procs onto objects, filling each object as we go until
 104      * all procs are mapped. If one pass doesn't catch all the required procs,
 105      * then loop thru the list again to handle the oversubscription
 106      */
 107     for (n=0; n < jdata->apps->size; n++) {
 108         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 109             continue;
 110         }
 111         for (m=0; m < jdata->map->nodes->size; m++) {
 112             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 113                 continue;
 114             }
 115             if (NULL == node->topology || NULL == node->topology->topo) {
 116                 orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
 117                                true, node->name);
 118                 return ORTE_ERR_SILENT;
 119             }
 120             /* get the number of objects of this type on this node */
 121             nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE);
 122             if (0 == nobjs) {
 123                 continue;
 124             }
 125             opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
 126                                 "mca:rmaps:rr: found %u %s objects on node %s",
 127                                 nobjs, hwloc_obj_type_string(target), node->name);
 128 
 129             /* if this is a comm_spawn situation, start with the object
 130              * where the parent left off and increment */
 131             if (ORTE_JOBID_INVALID != jdata->originator.jobid) {
 132                 start = (jdata->bkmark_obj + 1) % nobjs;
 133             } else {
 134                 start = 0;
 135             }
 136             /* loop over the procs on this node */
 137             for (j=0; j < node->procs->size; j++) {
 138                 if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 139                     continue;
 140                 }
 141                 /* ignore procs from other jobs */
 142                 if (proc->name.jobid != jdata->jobid) {
 143                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 144                                         "mca:rmaps:rr:assign skipping proc %s - from another job",
 145                                         ORTE_NAME_PRINT(&proc->name));
 146                     continue;
 147                 }
 148                 /* ignore procs from other apps */
 149                 if (proc->app_idx != app->idx) {
 150                     continue;
 151                 }
 152                 opal_output_verbose(20, orte_rmaps_base_framework.framework_output,
 153                                     "mca:rmaps:rr: assigning proc to object %d", (j + start) % nobjs);
 154                 /* get the hwloc object */
 155                 if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (j + start) % nobjs, OPAL_HWLOC_AVAILABLE))) {
 156                     ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
 157                     return ORTE_ERR_NOT_FOUND;
 158                 }
 159                 if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) {
 160                     orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true,
 161                                    orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj),
 162                                    orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
 163                     return ORTE_ERR_SILENT;
 164                 }
 165                 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
 166             }
 167         }
 168     }
 169 
 170     return ORTE_SUCCESS;
 171 }

/* [<][>][^][v][top][bottom][index][help] */