root/orte/mca/rmaps/base/rmaps_base_ranking.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rank_span
  2. rank_fill
  3. rank_by
  4. orte_rmaps_base_compute_vpids
  5. orte_rmaps_base_compute_local_ranks
  6. orte_rmaps_base_update_local_ranks

   1 /*
   2  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2011-2017 Cisco Systems, Inc.  All rights reserved
  13  * Copyright (c) 2014-2018 Intel, Inc.  All rights reserved.
  14  * Copyright (c) 2017      Research Organization for Information Science
  15  *                         and Technology (RIST). All rights reserved.
  16  * Copyright (c) 2019 IBM Corporation. All rights reserved.
  17  * $COPYRIGHT$
  18  *
  19  * Additional copyrights may follow
  20  *
  21  * $HEADER$
  22  */
  23 
  24 #include "orte_config.h"
  25 #include "orte/constants.h"
  26 
  27 #include <sys/types.h>
  28 #ifdef HAVE_UNISTD_H
  29 #include <unistd.h>
  30 #endif  /* HAVE_UNISTD_H */
  31 #include <string.h>
  32 
  33 #include "opal/class/opal_pointer_array.h"
  34 #include "opal/util/if.h"
  35 #include "opal/util/output.h"
  36 #include "orte/mca/mca.h"
  37 #include "opal/mca/base/base.h"
  38 #include "opal/mca/hwloc/base/base.h"
  39 #include "opal/threads/tsd.h"
  40 
  41 #include "orte/types.h"
  42 #include "orte/util/show_help.h"
  43 #include "orte/util/name_fns.h"
  44 #include "orte/runtime/orte_globals.h"
  45 #include "orte/util/hostfile/hostfile.h"
  46 #include "orte/util/dash_host/dash_host.h"
  47 #include "orte/mca/errmgr/errmgr.h"
  48 #include "orte/mca/ess/ess.h"
  49 #include "orte/runtime/data_type_support/orte_dt_support.h"
  50 
  51 #include "orte/mca/rmaps/base/rmaps_private.h"
  52 #include "orte/mca/rmaps/base/base.h"
  53 
  54 static int rank_span(orte_job_t *jdata,
  55                      hwloc_obj_type_t target,
  56                      unsigned cache_level)
  57 {
  58     orte_app_context_t *app;
  59     hwloc_obj_t obj;
  60     int num_objs, i, j, m, n, rc;
  61     orte_vpid_t num_ranked=0;
  62     orte_node_t *node;
  63     orte_proc_t *proc, *pptr;
  64     orte_vpid_t vpid;
  65     int cnt;
  66     hwloc_obj_t locale;
  67 
  68     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  69                         "mca:rmaps:rank_span: for job %s",
  70                         ORTE_JOBID_PRINT(jdata->jobid));
  71 
  72     /* if the ranking is spanned, then we perform the
  73      * ranking as if it was one big node - i.e., we
  74      * rank one proc on each object, step to the next object
  75      * moving across all the nodes, then wrap around to the
  76      * first object on the first node.
  77      *
  78      *        Node 0                Node 1
  79      *    Obj 0     Obj 1       Obj 0     Obj 1
  80      *     0 4       1 5         2 6       3 7
  81      *     8 12      9 13       10 14     11 15
  82      */
  83 
  84     /* In the interest of getting this committed in finite time,
  85      * just loop across the nodes and objects until all procs
  86      * are mapped
  87      */
  88 
  89     vpid = 0;
  90     for (n=0; n < jdata->apps->size; n++) {
  91         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
  92             continue;
  93         }
  94 
  95         cnt = 0;
  96         while (cnt < app->num_procs) {
  97             for (m=0; m < jdata->map->nodes->size; m++) {
  98                 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
  99                     continue;
 100                 }
 101                 /* get the number of objects - only consider those we can actually use */
 102                 num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
 103                                                               cache_level, OPAL_HWLOC_AVAILABLE);
 104                 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 105                                     "mca:rmaps:rank_span: found %d objects on node %s with %d procs",
 106                                     num_objs, node->name, (int)node->num_procs);
 107                 if (0 == num_objs) {
 108                     return ORTE_ERR_NOT_SUPPORTED;
 109                 }
 110 
 111                 /* for each object */
 112                 for (i=0; i < num_objs && cnt < app->num_procs; i++) {
 113                     obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target,
 114                                                           cache_level, i, OPAL_HWLOC_AVAILABLE);
 115 
 116                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 117                                         "mca:rmaps:rank_span: working object %d", i);
 118 
 119                     /* cycle thru the procs on this node */
 120                     for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
 121                         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 122                             continue;
 123                         }
 124                         /* ignore procs from other jobs */
 125                         if (proc->name.jobid != jdata->jobid) {
 126                             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 127                                                 "mca:rmaps:rank_span skipping proc %s - from another job, num_ranked %d",
 128                                                 ORTE_NAME_PRINT(&proc->name), num_ranked);
 129                             continue;
 130                         }
 131                         /* ignore procs that are already assigned */
 132                         if (ORTE_VPID_INVALID != proc->name.vpid) {
 133                             continue;
 134                         }
 135                         /* ignore procs from other apps */
 136                         if (proc->app_idx != app->idx) {
 137                             continue;
 138                         }
 139                         /* protect against bozo case */
 140                         locale = NULL;
 141                         if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
 142                             ORTE_ERROR_LOG(ORTE_ERROR);
 143                             return ORTE_ERROR;
 144                         }
 145                         /* ignore procs not on this object */
 146                         if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
 147                             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 148                                                 "mca:rmaps:rank_span: proc at position %d is not on object %d",
 149                                                 j, i);
 150                             continue;
 151                         }
 152                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 153                                             "mca:rmaps:rank_span: assigning vpid %s", ORTE_VPID_PRINT(vpid));
 154                         proc->name.vpid = vpid++;
 155                         if (0 == cnt) {
 156                             app->first_rank = proc->name.vpid;
 157                         }
 158                         cnt++;
 159 
 160                         /* insert the proc into the jdata array */
 161                         if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 162                             OBJ_RELEASE(pptr);
 163                         }
 164                         OBJ_RETAIN(proc);
 165                         if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 166                             ORTE_ERROR_LOG(rc);
 167                             return rc;
 168                         }
 169                         /* track where the highest vpid landed - this is our
 170                          * new bookmark
 171                          */
 172                         jdata->bookmark = node;
 173                         /* move to next object */
 174                         break;
 175                     }
 176                 }
 177             }
 178         }
 179     }
 180 
 181     return ORTE_SUCCESS;
 182 }
 183 
 184 static int rank_fill(orte_job_t *jdata,
 185                      hwloc_obj_type_t target,
 186                      unsigned cache_level)
 187 {
 188     orte_app_context_t *app;
 189     hwloc_obj_t obj;
 190     int num_objs, i, j, m, n, rc;
 191     orte_vpid_t num_ranked=0;
 192     orte_node_t *node;
 193     orte_proc_t *proc, *pptr;
 194     orte_vpid_t vpid;
 195     int cnt;
 196     hwloc_obj_t locale;
 197 
 198     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 199                         "mca:rmaps:rank_fill: for job %s",
 200                         ORTE_JOBID_PRINT(jdata->jobid));
 201 
 202     /* if the ranking is fill, then we rank all the procs
 203      * within a given object before moving on to the next
 204      *
 205      *        Node 0                Node 1
 206      *    Obj 0     Obj 1       Obj 0     Obj 1
 207      *     0 1       4 5         8 9      12 13
 208      *     2 3       6 7        10 11     14 15
 209      */
 210 
 211     vpid = 0;
 212     for (n=0; n < jdata->apps->size; n++) {
 213         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 214             continue;
 215         }
 216 
 217         cnt = 0;
 218         for (m=0; m < jdata->map->nodes->size; m++) {
 219             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 220                 continue;
 221             }
 222             /* get the number of objects - only consider those we can actually use */
 223             num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
 224                                                           cache_level, OPAL_HWLOC_AVAILABLE);
 225             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 226                                 "mca:rmaps:rank_fill: found %d objects on node %s with %d procs",
 227                                 num_objs, node->name, (int)node->num_procs);
 228             if (0 == num_objs) {
 229                 return ORTE_ERR_NOT_SUPPORTED;
 230             }
 231 
 232             /* for each object */
 233             for (i=0; i < num_objs && cnt < app->num_procs; i++) {
 234                 obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target,
 235                                                       cache_level, i, OPAL_HWLOC_AVAILABLE);
 236 
 237                 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 238                                     "mca:rmaps:rank_fill: working object %d", i);
 239 
 240                 /* cycle thru the procs on this node */
 241                 for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
 242                     if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 243                         continue;
 244                     }
 245                     /* ignore procs from other jobs */
 246                     if (proc->name.jobid != jdata->jobid) {
 247                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 248                                             "mca:rmaps:rank_fill skipping proc %s - from another job, num_ranked %d",
 249                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 250                         continue;
 251                     }
 252                     /* ignore procs that are already assigned */
 253                     if (ORTE_VPID_INVALID != proc->name.vpid) {
 254                         continue;
 255                     }
 256                     /* ignore procs from other apps */
 257                     if (proc->app_idx != app->idx) {
 258                         continue;
 259                     }
 260                      /* protect against bozo case */
 261                     locale = NULL;
 262                     if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
 263                         ORTE_ERROR_LOG(ORTE_ERROR);
 264                         return ORTE_ERROR;
 265                     }
 266                     /* ignore procs not on this object */
 267                     if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
 268                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 269                                             "mca:rmaps:rank_fill: proc at position %d is not on object %d",
 270                                             j, i);
 271                         continue;
 272                     }
 273                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 274                                         "mca:rmaps:rank_fill: assigning vpid %s", ORTE_VPID_PRINT(vpid));
 275                     proc->name.vpid = vpid++;
 276                     if (0 == cnt) {
 277                         app->first_rank = proc->name.vpid;
 278                     }
 279                     cnt++;
 280 
 281                     /* insert the proc into the jdata array */
 282                     if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 283                         OBJ_RELEASE(pptr);
 284                     }
 285                     OBJ_RETAIN(proc);
 286                     if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 287                         ORTE_ERROR_LOG(rc);
 288                         return rc;
 289                     }
 290                     /* track where the highest vpid landed - this is our
 291                      * new bookmark
 292                      */
 293                     jdata->bookmark = node;
 294                 }
 295             }
 296         }
 297     }
 298 
 299     return ORTE_SUCCESS;
 300 }
 301 
 302 static int rank_by(orte_job_t *jdata,
 303                    hwloc_obj_type_t target,
 304                    unsigned cache_level)
 305 {
 306     orte_app_context_t *app;
 307     hwloc_obj_t obj;
 308     int num_objs, i, j, m, n, rc, nn;
 309     orte_vpid_t num_ranked=0;
 310     orte_node_t *node;
 311     orte_proc_t *proc, *pptr;
 312     orte_vpid_t vpid, np;
 313     int cnt;
 314     opal_pointer_array_t objs;
 315     hwloc_obj_t locale;
 316     orte_app_idx_t napp;
 317 
 318     if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
 319         return rank_span(jdata, target, cache_level);
 320     } else if (ORTE_RANKING_FILL & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
 321         return rank_fill(jdata, target, cache_level);
 322     }
 323 
 324     /* if ranking is not spanned or filled, then we
 325      * default to assign ranks sequentially across
 326      * target objects within a node until that node
 327      * is fully ranked, and then move on to the next
 328      * node
 329      *
 330      *        Node 0                Node 1
 331      *    Obj 0     Obj 1       Obj 0     Obj 1
 332      *     0 2       1 3         8 10      9 11
 333      *     4 6       5 7        12 14     13 15
 334      */
 335 
 336     vpid = 0;
 337     for (n=0, napp=0; napp < jdata->num_apps && n < jdata->apps->size; n++) {
 338         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 339             continue;
 340         }
 341         napp++;
 342         /* setup the pointer array */
 343         OBJ_CONSTRUCT(&objs, opal_pointer_array_t);
 344         opal_pointer_array_init(&objs, 2, INT_MAX, 2);
 345 
 346         cnt = 0;
 347         for (m=0, nn=0; nn < jdata->map->num_nodes && m < jdata->map->nodes->size; m++) {
 348             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 349                 continue;
 350             }
 351             nn++;
 352 
 353             /* get the number of objects - only consider those we can actually use */
 354             num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
 355                                                           cache_level, OPAL_HWLOC_AVAILABLE);
 356             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 357                                 "mca:rmaps:rank_by: found %d objects on node %s with %d procs",
 358                                 num_objs, node->name, (int)node->num_procs);
 359             if (0 == num_objs) {
 360                 OBJ_DESTRUCT(&objs);
 361                 return ORTE_ERR_NOT_SUPPORTED;
 362             }
 363             /* collect all the objects */
 364             for (i=0; i < num_objs; i++) {
 365                 obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target,
 366                                                       cache_level, i, OPAL_HWLOC_AVAILABLE);
 367                 opal_pointer_array_set_item(&objs, i, obj);
 368             }
 369 
 370             /* cycle across the objects, assigning a proc to each one,
 371              * until all procs have been assigned - unfortunately, since
 372              * more than this job may be mapped onto a node, the number
 373              * of procs on the node can't be used to tell us when we
 374              * are done. Instead, we have to just keep going until all
 375              * procs are ranked - which means we have to make one extra
 376              * pass thru the loop
 377              *
 378              * Perhaps someday someone will come up with a more efficient
 379              * algorithm, but this works for now.
 380              */
 381 // In 3.x this was two loops:
 382 //     while (cnt < app->num_procs)
 383 //         for (i=0; i<num_objs; ...)
 384 // Then in 4.x it switched to
 385 //    while (cnt < app->num_procs && i < (int)node->num_procs)
 386 // where that extra i part seems wrong to me.  First of all if anything
 387 // it seems like it should be i<num_objs since that's the array i is
 388 // cycling through, but even then all the usage of i below is
 389 // (i % num_objs) so I think i is intended to wrap and you should
 390 // keep looping until you've made all the assignments you can for
 391 // this node.
 392 //
 393 // So that's what I added the other loop counter for, figuring if it
 394 // cycles through the whole array of objs without making an assignment
 395 // it's time for this loop to end and the outer loop to take us to the
 396 // next node.
 397             i = 0;
 398             int niters_of_i_without_assigning_a_proc = 0;
 399             while (cnt < app->num_procs && niters_of_i_without_assigning_a_proc <= num_objs) {
 400                 /* get the next object */
 401                 obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i % num_objs);
 402                 if (NULL == obj) {
 403                     break;
 404                 }
 405                 /* scan across the procs and find the one that is on this object */
 406                 np = 0;
 407                 for (j=0; np < node->num_procs && j < node->procs->size && cnt < app->num_procs; j++) {
 408                     if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 409                         continue;
 410                     }
 411                     np++;
 412                     /* ignore procs from other jobs */
 413                     if (proc->name.jobid != jdata->jobid) {
 414                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 415                                             "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
 416                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 417                         continue;
 418                     }
 419                     /* ignore procs that are already ranked */
 420                     if (ORTE_VPID_INVALID != proc->name.vpid) {
 421                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 422                                             "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
 423                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 424                         continue;
 425                     }
 426                     /* ignore procs from other apps */
 427                     if (proc->app_idx != app->idx) {
 428                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 429                                             "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
 430                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 431                         continue;
 432                     }
 433                      /* protect against bozo case */
 434                     locale = NULL;
 435                     if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
 436                         ORTE_ERROR_LOG(ORTE_ERROR);
 437                         return ORTE_ERROR;
 438                     }
 439                     /* ignore procs not on this object */
 440                     if (NULL == locale ||
 441                         !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
 442                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 443                                             "mca:rmaps:rank_by: proc at position %d is not on object %d",
 444                                             j, i);
 445                         continue;
 446                     }
 447                     /* assign the vpid */
 448                     proc->name.vpid = vpid++;
 449                     if (0 == cnt) {
 450                         app->first_rank = proc->name.vpid;
 451                     }
 452                     cnt++;
 453                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 454                                         "mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s",
 455                                         j, i, ORTE_VPID_PRINT(proc->name.vpid));
 456                     /* insert the proc into the jdata array */
 457                     if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 458                         OBJ_RELEASE(pptr);
 459                     }
 460                     OBJ_RETAIN(proc);
 461                     if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 462                         ORTE_ERROR_LOG(rc);
 463                         OBJ_DESTRUCT(&objs);
 464                         return rc;
 465                     }
 466                     num_ranked++;
 467                     niters_of_i_without_assigning_a_proc = 0;
 468                     /* track where the highest vpid landed - this is our
 469                      * new bookmark
 470                      */
 471                     jdata->bookmark = node;
 472                     /* move to next object */
 473                     break;
 474                 }
 475                 i++;
 476                 ++niters_of_i_without_assigning_a_proc;
 477             }
 478         }
 479         /* cleanup */
 480         OBJ_DESTRUCT(&objs);
 481     }
 482     return ORTE_SUCCESS;
 483 }
 484 
 485 int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
 486 {
 487     orte_job_map_t *map;
 488     orte_app_context_t *app;
 489     orte_vpid_t vpid;
 490     int j, m, n, cnt;
 491     orte_node_t *node;
 492     orte_proc_t *proc, *pptr;
 493     int rc;
 494     bool one_found;
 495     hwloc_obj_type_t target;
 496     unsigned cache_level;
 497 
 498     map = jdata->map;
 499 
 500     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 501                         "RANKING POLICY: %s", orte_rmaps_base_print_ranking(map->ranking));
 502 
 503     /* start with the rank-by object options - if the object isn't
 504      * included in the topology, then we obviously cannot rank by it.
 505      * However, if this was the default ranking policy (as opposed to
 506      * something given by the user), then fall back to rank-by slot
 507      */
 508     if (ORTE_RANK_BY_NUMA == ORTE_GET_RANKING_POLICY(map->ranking)) {
 509         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 510                             "mca:rmaps: computing ranks by NUMA for job %s",
 511                             ORTE_JOBID_PRINT(jdata->jobid));
 512         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_NODE, 0))) {
 513             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 514                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 515                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 516                 goto rankbyslot;
 517             }
 518             ORTE_ERROR_LOG(rc);
 519         }
 520         return rc;
 521     }
 522 
 523     if (ORTE_RANK_BY_SOCKET == ORTE_GET_RANKING_POLICY(map->ranking)) {
 524         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 525                             "mca:rmaps: computing ranks by socket for job %s",
 526                             ORTE_JOBID_PRINT(jdata->jobid));
 527         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_SOCKET, 0))) {
 528             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 529                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 530                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 531                 goto rankbyslot;
 532             }
 533             ORTE_ERROR_LOG(rc);
 534         }
 535         return rc;
 536     }
 537 
 538     if (ORTE_RANK_BY_L3CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 539         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 540                             "mca:rmaps: computing ranks by L3cache for job %s",
 541                             ORTE_JOBID_PRINT(jdata->jobid));
 542         OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
 543         if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
 544             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 545                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 546                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 547                 goto rankbyslot;
 548             }
 549             ORTE_ERROR_LOG(rc);
 550         }
 551         return rc;
 552     }
 553 
 554     if (ORTE_RANK_BY_L2CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 555         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 556                             "mca:rmaps: computing ranks by L2cache for job %s",
 557                             ORTE_JOBID_PRINT(jdata->jobid));
 558         OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
 559         if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
 560             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 561                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 562                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 563                 goto rankbyslot;
 564             }
 565             ORTE_ERROR_LOG(rc);
 566         }
 567         return rc;
 568     }
 569 
 570     if (ORTE_RANK_BY_L1CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 571         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 572                             "mca:rmaps: computing ranks by L1cache for job %s",
 573                             ORTE_JOBID_PRINT(jdata->jobid));
 574         OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
 575         if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
 576             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 577                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 578                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 579                 goto rankbyslot;
 580             }
 581             ORTE_ERROR_LOG(rc);
 582         }
 583         return rc;
 584     }
 585 
 586     if (ORTE_RANK_BY_CORE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 587         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 588                             "mca:rmaps: computing ranks by core for job %s",
 589                             ORTE_JOBID_PRINT(jdata->jobid));
 590         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CORE, 0))) {
 591             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 592                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 593                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 594                 goto rankbyslot;
 595             }
 596             ORTE_ERROR_LOG(rc);
 597         }
 598         return rc;
 599     }
 600 
 601     if (ORTE_RANK_BY_HWTHREAD == ORTE_GET_RANKING_POLICY(map->ranking)) {
 602         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 603                             "mca:rmaps: computing ranks by hwthread for job %s",
 604                             ORTE_JOBID_PRINT(jdata->jobid));
 605         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_PU, 0))) {
 606             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 607                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 608                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 609                 goto rankbyslot;
 610             }
 611             ORTE_ERROR_LOG(rc);
 612         }
 613         return rc;
 614     }
 615 
 616     if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
 617         ORTE_RANK_BY_BOARD == ORTE_GET_RANKING_POLICY(map->ranking)) {
 618         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 619                             "mca:rmaps:base: computing vpids by node for job %s",
 620                             ORTE_JOBID_PRINT(jdata->jobid));
 621         /* assign the ranks round-robin across nodes - only one board/node
 622          * at this time, so they are equivalent
 623          */
 624         vpid=0;
 625         for (n=0; n < jdata->apps->size; n++) {
 626             if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 627                 continue;
 628             }
 629             cnt=0;
 630             one_found = true;
 631             while (cnt < app->num_procs && one_found) {
 632                 one_found = false;
 633                 for (m=0; m < jdata->map->nodes->size; m++) {
 634                     if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 635                         continue;
 636                     }
 637                     for (j=0; j < node->procs->size; j++) {
 638                         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 639                             continue;
 640                         }
 641                         /* ignore procs from other jobs */
 642                         if (proc->name.jobid != jdata->jobid) {
 643                             continue;
 644                         }
 645                         /* ignore procs from other apps */
 646                         if (proc->app_idx != app->idx) {
 647                             continue;
 648                         }
 649                         if (ORTE_VPID_INVALID != proc->name.vpid) {
 650                             continue;
 651                         }
 652                         proc->name.vpid = vpid++;
 653                         /* insert the proc into the jdata array */
 654                         if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 655                             OBJ_RELEASE(pptr);
 656                         }
 657                         OBJ_RETAIN(proc);
 658                         if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 659                             ORTE_ERROR_LOG(rc);
 660                             return rc;
 661                         }
 662                         cnt++;
 663                         one_found = true;
 664                         /* track where the highest vpid landed - this is our
 665                          * new bookmark
 666                          */
 667                         jdata->bookmark = node;
 668                         break;  /* move on to next node */
 669                     }
 670                 }
 671             }
 672             if (cnt < app->num_procs) {
 673                 ORTE_ERROR_LOG(ORTE_ERR_FATAL);
 674                 return ORTE_ERR_FATAL;
 675             }
 676         }
 677         return ORTE_SUCCESS;
 678     }
 679 
 680   rankbyslot:
 681     if (ORTE_RANK_BY_SLOT == ORTE_GET_RANKING_POLICY(map->ranking)) {
 682         /* assign the ranks sequentially */
 683         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 684                             "mca:rmaps:base: computing vpids by slot for job %s",
 685                             ORTE_JOBID_PRINT(jdata->jobid));
 686         vpid = 0;
 687         for (n=0; n < jdata->apps->size; n++) {
 688             if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 689                 continue;
 690             }
 691             for (m=0; m < jdata->map->nodes->size; m++) {
 692                 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 693                     continue;
 694                 }
 695 
 696                 for (j=0; j < node->procs->size; j++) {
 697                     if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 698                         continue;
 699                     }
 700                     /* ignore procs from other jobs */
 701                     if (proc->name.jobid != jdata->jobid) {
 702                         continue;
 703                     }
 704                     /* ignore procs from other apps */
 705                     if (proc->app_idx != app->idx) {
 706                         continue;
 707                     }
 708                     if (ORTE_VPID_INVALID == proc->name.vpid) {
 709                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 710                                             "mca:rmaps:base: assigning rank %s to node %s",
 711                                             ORTE_VPID_PRINT(vpid), node->name);
 712                         proc->name.vpid = vpid++;
 713                        /* track where the highest vpid landed - this is our
 714                          * new bookmark
 715                          */
 716                         jdata->bookmark = node;
 717                     }
 718                     /* insert the proc into the jdata array */
 719                     if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 720                         OBJ_RELEASE(pptr);
 721                     }
 722                     OBJ_RETAIN(proc);
 723                     if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 724                         ORTE_ERROR_LOG(rc);
 725                         return rc;
 726                     }
 727                 }
 728             }
 729         }
 730         return ORTE_SUCCESS;
 731     }
 732 
 733     return ORTE_ERR_NOT_IMPLEMENTED;
 734 }
 735 
 736 int orte_rmaps_base_compute_local_ranks(orte_job_t *jdata)
 737 {
 738     orte_std_cntr_t i;
 739     int j, k;
 740     orte_node_t *node;
 741     orte_proc_t *proc, *psave, *psave2;
 742     orte_vpid_t minv, minv2;
 743     orte_local_rank_t local_rank;
 744     orte_job_map_t *map;
 745     orte_app_context_t *app;
 746 
 747     OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
 748                          "%s rmaps:base:compute_usage",
 749                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 750 
 751     /* point to map */
 752     map = jdata->map;
 753 
 754     /* for each node in the map... */
 755     for (i=0; i < map->nodes->size; i++) {
 756         /* cycle through the array of procs on this node, setting
 757          * local and node ranks, until we
 758          * have done so for all procs on nodes in this map
 759          */
 760         if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
 761             continue;
 762         }
 763 
 764         /* init search values */
 765         local_rank = 0;
 766 
 767         /* the proc map may have holes in it, so cycle
 768          * all the way through and avoid the holes
 769          */
 770         for (k=0; k < node->procs->size; k++) {
 771             /* if this proc is NULL, skip it */
 772             if (NULL == opal_pointer_array_get_item(node->procs, k)) {
 773                 continue;
 774             }
 775             minv = ORTE_VPID_MAX;
 776             minv2 = ORTE_VPID_MAX;
 777             psave = NULL;
 778             psave2 = NULL;
 779             /* find the minimum vpid proc */
 780             for (j=0; j < node->procs->size; j++) {
 781                 /* if this proc is NULL, skip it */
 782                 if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 783                     continue;
 784                 }
 785                 /* only look at procs for this job when
 786                  * determining local rank
 787                  */
 788                 if (proc->name.jobid == jdata->jobid &&
 789                     ORTE_LOCAL_RANK_INVALID == proc->local_rank &&
 790                     proc->name.vpid < minv) {
 791                     minv = proc->name.vpid;
 792                     psave = proc;
 793                 }
 794                 /* no matter what job...still have to handle node_rank */
 795                 if (ORTE_NODE_RANK_INVALID == proc->node_rank &&
 796                     proc->name.vpid < minv2) {
 797                     minv2 = proc->name.vpid;
 798                     psave2 = proc;
 799                 }
 800             }
 801             if (NULL == psave && NULL == psave2) {
 802                 /* we must have processed them all for this node! */
 803                 break;
 804             }
 805             if (NULL != psave) {
 806                 psave->local_rank = local_rank;
 807                 ++local_rank;
 808             }
 809             if (NULL != psave2) {
 810                 psave2->node_rank = node->next_node_rank;
 811                 node->next_node_rank++;
 812             }
 813         }
 814     }
 815 
 816     /* compute app_rank */
 817     for (i=0; i < jdata->apps->size; i++) {
 818         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 819             continue;
 820         }
 821         k=0;
 822         /* loop thru all procs in job to find those from this app_context */
 823         for (j=0; j < jdata->procs->size; j++) {
 824             if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
 825                 continue;
 826             }
 827             if (proc->app_idx != app->idx) {
 828                 continue;
 829             }
 830             proc->app_rank = k++;
 831         }
 832     }
 833 
 834     return ORTE_SUCCESS;
 835 }
 836 
 837 /* when we restart a process on a different node, we have to
 838  * ensure that the node and local ranks assigned to the proc
 839  * don't overlap with any pre-existing proc on that node. If
 840  * we don't, then it would be possible for procs to conflict
 841  * when opening static ports, should that be enabled.
 842  */
 843 void orte_rmaps_base_update_local_ranks(orte_job_t *jdata, orte_node_t *oldnode,
 844                                         orte_node_t *newnode, orte_proc_t *newproc)
 845 {
 846     int k;
 847     orte_node_rank_t node_rank;
 848     orte_local_rank_t local_rank;
 849     orte_proc_t *proc;
 850 
 851     OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
 852                          "%s rmaps:base:update_usage",
 853                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 854 
 855     /* if the node hasn't changed, then we can just use the
 856      * pre-defined values
 857      */
 858     if (oldnode == newnode) {
 859         return;
 860     }
 861 
 862     /* if the node has changed, then search the new node for the
 863      * lowest unused local and node rank
 864      */
 865     node_rank = 0;
 866 retry_nr:
 867     for (k=0; k < newnode->procs->size; k++) {
 868         /* if this proc is NULL, skip it */
 869         if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(newnode->procs, k))) {
 870             continue;
 871         }
 872         if (node_rank == proc->node_rank) {
 873             node_rank++;
 874             goto retry_nr;
 875         }
 876     }
 877     newproc->node_rank = node_rank;
 878 
 879     local_rank = 0;
 880 retry_lr:
 881     for (k=0; k < newnode->procs->size; k++) {
 882         /* if this proc is NULL, skip it */
 883         if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(newnode->procs, k))) {
 884             continue;
 885         }
 886         /* ignore procs from other jobs */
 887         if (proc->name.jobid != jdata->jobid) {
 888             continue;
 889         }
 890         if (local_rank == proc->local_rank) {
 891             local_rank++;
 892             goto retry_lr;
 893         }
 894     }
 895     newproc->local_rank = local_rank;
 896 }

/* [<][>][^][v][top][bottom][index][help] */