This source file includes following definitions.
- rank_span
- rank_fill
- rank_by
- orte_rmaps_base_compute_vpids
- orte_rmaps_base_compute_local_ranks
- orte_rmaps_base_update_local_ranks
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 #include "orte_config.h"
  25 #include "orte/constants.h"
  26 
  27 #include <sys/types.h>
  28 #ifdef HAVE_UNISTD_H
  29 #include <unistd.h>
  30 #endif  
  31 #include <string.h>
  32 
  33 #include "opal/class/opal_pointer_array.h"
  34 #include "opal/util/if.h"
  35 #include "opal/util/output.h"
  36 #include "orte/mca/mca.h"
  37 #include "opal/mca/base/base.h"
  38 #include "opal/mca/hwloc/base/base.h"
  39 #include "opal/threads/tsd.h"
  40 
  41 #include "orte/types.h"
  42 #include "orte/util/show_help.h"
  43 #include "orte/util/name_fns.h"
  44 #include "orte/runtime/orte_globals.h"
  45 #include "orte/util/hostfile/hostfile.h"
  46 #include "orte/util/dash_host/dash_host.h"
  47 #include "orte/mca/errmgr/errmgr.h"
  48 #include "orte/mca/ess/ess.h"
  49 #include "orte/runtime/data_type_support/orte_dt_support.h"
  50 
  51 #include "orte/mca/rmaps/base/rmaps_private.h"
  52 #include "orte/mca/rmaps/base/base.h"
  53 
  54 static int rank_span(orte_job_t *jdata,
  55                      hwloc_obj_type_t target,
  56                      unsigned cache_level)
  57 {
  58     orte_app_context_t *app;
  59     hwloc_obj_t obj;
  60     int num_objs, i, j, m, n, rc;
  61     orte_vpid_t num_ranked=0;
  62     orte_node_t *node;
  63     orte_proc_t *proc, *pptr;
  64     orte_vpid_t vpid;
  65     int cnt;
  66     hwloc_obj_t locale;
  67 
  68     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
  69                         "mca:rmaps:rank_span: for job %s",
  70                         ORTE_JOBID_PRINT(jdata->jobid));
  71 
  72     
  73 
  74 
  75 
  76 
  77 
  78 
  79 
  80 
  81 
  82 
  83 
  84     
  85 
  86 
  87 
  88 
  89     vpid = 0;
  90     for (n=0; n < jdata->apps->size; n++) {
  91         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
  92             continue;
  93         }
  94 
  95         cnt = 0;
  96         while (cnt < app->num_procs) {
  97             for (m=0; m < jdata->map->nodes->size; m++) {
  98                 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
  99                     continue;
 100                 }
 101                 
 102                 num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
 103                                                               cache_level, OPAL_HWLOC_AVAILABLE);
 104                 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 105                                     "mca:rmaps:rank_span: found %d objects on node %s with %d procs",
 106                                     num_objs, node->name, (int)node->num_procs);
 107                 if (0 == num_objs) {
 108                     return ORTE_ERR_NOT_SUPPORTED;
 109                 }
 110 
 111                 
 112                 for (i=0; i < num_objs && cnt < app->num_procs; i++) {
 113                     obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target,
 114                                                           cache_level, i, OPAL_HWLOC_AVAILABLE);
 115 
 116                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 117                                         "mca:rmaps:rank_span: working object %d", i);
 118 
 119                     
 120                     for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
 121                         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 122                             continue;
 123                         }
 124                         
 125                         if (proc->name.jobid != jdata->jobid) {
 126                             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 127                                                 "mca:rmaps:rank_span skipping proc %s - from another job, num_ranked %d",
 128                                                 ORTE_NAME_PRINT(&proc->name), num_ranked);
 129                             continue;
 130                         }
 131                         
 132                         if (ORTE_VPID_INVALID != proc->name.vpid) {
 133                             continue;
 134                         }
 135                         
 136                         if (proc->app_idx != app->idx) {
 137                             continue;
 138                         }
 139                         
 140                         locale = NULL;
 141                         if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
 142                             ORTE_ERROR_LOG(ORTE_ERROR);
 143                             return ORTE_ERROR;
 144                         }
 145                         
 146                         if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
 147                             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 148                                                 "mca:rmaps:rank_span: proc at position %d is not on object %d",
 149                                                 j, i);
 150                             continue;
 151                         }
 152                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 153                                             "mca:rmaps:rank_span: assigning vpid %s", ORTE_VPID_PRINT(vpid));
 154                         proc->name.vpid = vpid++;
 155                         if (0 == cnt) {
 156                             app->first_rank = proc->name.vpid;
 157                         }
 158                         cnt++;
 159 
 160                         
 161                         if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 162                             OBJ_RELEASE(pptr);
 163                         }
 164                         OBJ_RETAIN(proc);
 165                         if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 166                             ORTE_ERROR_LOG(rc);
 167                             return rc;
 168                         }
 169                         
 170 
 171 
 172                         jdata->bookmark = node;
 173                         
 174                         break;
 175                     }
 176                 }
 177             }
 178         }
 179     }
 180 
 181     return ORTE_SUCCESS;
 182 }
 183 
 184 static int rank_fill(orte_job_t *jdata,
 185                      hwloc_obj_type_t target,
 186                      unsigned cache_level)
 187 {
 188     orte_app_context_t *app;
 189     hwloc_obj_t obj;
 190     int num_objs, i, j, m, n, rc;
 191     orte_vpid_t num_ranked=0;
 192     orte_node_t *node;
 193     orte_proc_t *proc, *pptr;
 194     orte_vpid_t vpid;
 195     int cnt;
 196     hwloc_obj_t locale;
 197 
 198     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 199                         "mca:rmaps:rank_fill: for job %s",
 200                         ORTE_JOBID_PRINT(jdata->jobid));
 201 
 202     
 203 
 204 
 205 
 206 
 207 
 208 
 209 
 210 
 211     vpid = 0;
 212     for (n=0; n < jdata->apps->size; n++) {
 213         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 214             continue;
 215         }
 216 
 217         cnt = 0;
 218         for (m=0; m < jdata->map->nodes->size; m++) {
 219             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 220                 continue;
 221             }
 222             
 223             num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
 224                                                           cache_level, OPAL_HWLOC_AVAILABLE);
 225             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 226                                 "mca:rmaps:rank_fill: found %d objects on node %s with %d procs",
 227                                 num_objs, node->name, (int)node->num_procs);
 228             if (0 == num_objs) {
 229                 return ORTE_ERR_NOT_SUPPORTED;
 230             }
 231 
 232             
 233             for (i=0; i < num_objs && cnt < app->num_procs; i++) {
 234                 obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target,
 235                                                       cache_level, i, OPAL_HWLOC_AVAILABLE);
 236 
 237                 opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 238                                     "mca:rmaps:rank_fill: working object %d", i);
 239 
 240                 
 241                 for (j=0; j < node->procs->size && cnt < app->num_procs; j++) {
 242                     if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 243                         continue;
 244                     }
 245                     
 246                     if (proc->name.jobid != jdata->jobid) {
 247                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 248                                             "mca:rmaps:rank_fill skipping proc %s - from another job, num_ranked %d",
 249                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 250                         continue;
 251                     }
 252                     
 253                     if (ORTE_VPID_INVALID != proc->name.vpid) {
 254                         continue;
 255                     }
 256                     
 257                     if (proc->app_idx != app->idx) {
 258                         continue;
 259                     }
 260                      
 261                     locale = NULL;
 262                     if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
 263                         ORTE_ERROR_LOG(ORTE_ERROR);
 264                         return ORTE_ERROR;
 265                     }
 266                     
 267                     if (!hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
 268                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 269                                             "mca:rmaps:rank_fill: proc at position %d is not on object %d",
 270                                             j, i);
 271                         continue;
 272                     }
 273                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 274                                         "mca:rmaps:rank_fill: assigning vpid %s", ORTE_VPID_PRINT(vpid));
 275                     proc->name.vpid = vpid++;
 276                     if (0 == cnt) {
 277                         app->first_rank = proc->name.vpid;
 278                     }
 279                     cnt++;
 280 
 281                     
 282                     if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 283                         OBJ_RELEASE(pptr);
 284                     }
 285                     OBJ_RETAIN(proc);
 286                     if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 287                         ORTE_ERROR_LOG(rc);
 288                         return rc;
 289                     }
 290                     
 291 
 292 
 293                     jdata->bookmark = node;
 294                 }
 295             }
 296         }
 297     }
 298 
 299     return ORTE_SUCCESS;
 300 }
 301 
 302 static int rank_by(orte_job_t *jdata,
 303                    hwloc_obj_type_t target,
 304                    unsigned cache_level)
 305 {
 306     orte_app_context_t *app;
 307     hwloc_obj_t obj;
 308     int num_objs, i, j, m, n, rc, nn;
 309     orte_vpid_t num_ranked=0;
 310     orte_node_t *node;
 311     orte_proc_t *proc, *pptr;
 312     orte_vpid_t vpid, np;
 313     int cnt;
 314     opal_pointer_array_t objs;
 315     hwloc_obj_t locale;
 316     orte_app_idx_t napp;
 317 
 318     if (ORTE_RANKING_SPAN & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
 319         return rank_span(jdata, target, cache_level);
 320     } else if (ORTE_RANKING_FILL & ORTE_GET_RANKING_DIRECTIVE(jdata->map->ranking)) {
 321         return rank_fill(jdata, target, cache_level);
 322     }
 323 
 324     
 325 
 326 
 327 
 328 
 329 
 330 
 331 
 332 
 333 
 334 
 335 
 336     vpid = 0;
 337     for (n=0, napp=0; napp < jdata->num_apps && n < jdata->apps->size; n++) {
 338         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 339             continue;
 340         }
 341         napp++;
 342         
 343         OBJ_CONSTRUCT(&objs, opal_pointer_array_t);
 344         opal_pointer_array_init(&objs, 2, INT_MAX, 2);
 345 
 346         cnt = 0;
 347         for (m=0, nn=0; nn < jdata->map->num_nodes && m < jdata->map->nodes->size; m++) {
 348             if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 349                 continue;
 350             }
 351             nn++;
 352 
 353             
 354             num_objs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target,
 355                                                           cache_level, OPAL_HWLOC_AVAILABLE);
 356             opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 357                                 "mca:rmaps:rank_by: found %d objects on node %s with %d procs",
 358                                 num_objs, node->name, (int)node->num_procs);
 359             if (0 == num_objs) {
 360                 OBJ_DESTRUCT(&objs);
 361                 return ORTE_ERR_NOT_SUPPORTED;
 362             }
 363             
 364             for (i=0; i < num_objs; i++) {
 365                 obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target,
 366                                                       cache_level, i, OPAL_HWLOC_AVAILABLE);
 367                 opal_pointer_array_set_item(&objs, i, obj);
 368             }
 369 
 370             
 371 
 372 
 373 
 374 
 375 
 376 
 377 
 378 
 379 
 380 
 381 
 382 
 383 
 384 
 385 
 386 
 387 
 388 
 389 
 390 
 391 
 392 
 393 
 394 
 395 
 396 
 397             i = 0;
 398             int niters_of_i_without_assigning_a_proc = 0;
 399             while (cnt < app->num_procs && niters_of_i_without_assigning_a_proc <= num_objs) {
 400                 
 401                 obj = (hwloc_obj_t)opal_pointer_array_get_item(&objs, i % num_objs);
 402                 if (NULL == obj) {
 403                     break;
 404                 }
 405                 
 406                 np = 0;
 407                 for (j=0; np < node->num_procs && j < node->procs->size && cnt < app->num_procs; j++) {
 408                     if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 409                         continue;
 410                     }
 411                     np++;
 412                     
 413                     if (proc->name.jobid != jdata->jobid) {
 414                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 415                                             "mca:rmaps:rank_by skipping proc %s - from another job, num_ranked %d",
 416                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 417                         continue;
 418                     }
 419                     
 420                     if (ORTE_VPID_INVALID != proc->name.vpid) {
 421                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 422                                             "mca:rmaps:rank_by skipping proc %s - already ranked, num_ranked %d",
 423                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 424                         continue;
 425                     }
 426                     
 427                     if (proc->app_idx != app->idx) {
 428                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 429                                             "mca:rmaps:rank_by skipping proc %s - from another app, num_ranked %d",
 430                                             ORTE_NAME_PRINT(&proc->name), num_ranked);
 431                         continue;
 432                     }
 433                      
 434                     locale = NULL;
 435                     if (!orte_get_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, (void**)&locale, OPAL_PTR)) {
 436                         ORTE_ERROR_LOG(ORTE_ERROR);
 437                         return ORTE_ERROR;
 438                     }
 439                     
 440                     if (NULL == locale ||
 441                         !hwloc_bitmap_intersects(obj->cpuset, locale->cpuset)) {
 442                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 443                                             "mca:rmaps:rank_by: proc at position %d is not on object %d",
 444                                             j, i);
 445                         continue;
 446                     }
 447                     
 448                     proc->name.vpid = vpid++;
 449                     if (0 == cnt) {
 450                         app->first_rank = proc->name.vpid;
 451                     }
 452                     cnt++;
 453                     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 454                                         "mca:rmaps:rank_by: proc in position %d is on object %d assigned rank %s",
 455                                         j, i, ORTE_VPID_PRINT(proc->name.vpid));
 456                     
 457                     if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 458                         OBJ_RELEASE(pptr);
 459                     }
 460                     OBJ_RETAIN(proc);
 461                     if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 462                         ORTE_ERROR_LOG(rc);
 463                         OBJ_DESTRUCT(&objs);
 464                         return rc;
 465                     }
 466                     num_ranked++;
 467                     niters_of_i_without_assigning_a_proc = 0;
 468                     
 469 
 470 
 471                     jdata->bookmark = node;
 472                     
 473                     break;
 474                 }
 475                 i++;
 476                 ++niters_of_i_without_assigning_a_proc;
 477             }
 478         }
 479         
 480         OBJ_DESTRUCT(&objs);
 481     }
 482     return ORTE_SUCCESS;
 483 }
 484 
 485 int orte_rmaps_base_compute_vpids(orte_job_t *jdata)
 486 {
 487     orte_job_map_t *map;
 488     orte_app_context_t *app;
 489     orte_vpid_t vpid;
 490     int j, m, n, cnt;
 491     orte_node_t *node;
 492     orte_proc_t *proc, *pptr;
 493     int rc;
 494     bool one_found;
 495     hwloc_obj_type_t target;
 496     unsigned cache_level;
 497 
 498     map = jdata->map;
 499 
 500     opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 501                         "RANKING POLICY: %s", orte_rmaps_base_print_ranking(map->ranking));
 502 
 503     
 504 
 505 
 506 
 507 
 508     if (ORTE_RANK_BY_NUMA == ORTE_GET_RANKING_POLICY(map->ranking)) {
 509         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 510                             "mca:rmaps: computing ranks by NUMA for job %s",
 511                             ORTE_JOBID_PRINT(jdata->jobid));
 512         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_NODE, 0))) {
 513             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 514                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 515                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 516                 goto rankbyslot;
 517             }
 518             ORTE_ERROR_LOG(rc);
 519         }
 520         return rc;
 521     }
 522 
 523     if (ORTE_RANK_BY_SOCKET == ORTE_GET_RANKING_POLICY(map->ranking)) {
 524         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 525                             "mca:rmaps: computing ranks by socket for job %s",
 526                             ORTE_JOBID_PRINT(jdata->jobid));
 527         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_SOCKET, 0))) {
 528             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 529                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 530                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 531                 goto rankbyslot;
 532             }
 533             ORTE_ERROR_LOG(rc);
 534         }
 535         return rc;
 536     }
 537 
 538     if (ORTE_RANK_BY_L3CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 539         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 540                             "mca:rmaps: computing ranks by L3cache for job %s",
 541                             ORTE_JOBID_PRINT(jdata->jobid));
 542         OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
 543         if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
 544             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 545                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 546                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 547                 goto rankbyslot;
 548             }
 549             ORTE_ERROR_LOG(rc);
 550         }
 551         return rc;
 552     }
 553 
 554     if (ORTE_RANK_BY_L2CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 555         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 556                             "mca:rmaps: computing ranks by L2cache for job %s",
 557                             ORTE_JOBID_PRINT(jdata->jobid));
 558         OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
 559         if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
 560             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 561                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 562                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 563                 goto rankbyslot;
 564             }
 565             ORTE_ERROR_LOG(rc);
 566         }
 567         return rc;
 568     }
 569 
 570     if (ORTE_RANK_BY_L1CACHE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 571         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 572                             "mca:rmaps: computing ranks by L1cache for job %s",
 573                             ORTE_JOBID_PRINT(jdata->jobid));
 574         OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
 575         if (ORTE_SUCCESS != (rc = rank_by(jdata, target, cache_level))) {
 576             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 577                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 578                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 579                 goto rankbyslot;
 580             }
 581             ORTE_ERROR_LOG(rc);
 582         }
 583         return rc;
 584     }
 585 
 586     if (ORTE_RANK_BY_CORE == ORTE_GET_RANKING_POLICY(map->ranking)) {
 587         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 588                             "mca:rmaps: computing ranks by core for job %s",
 589                             ORTE_JOBID_PRINT(jdata->jobid));
 590         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_CORE, 0))) {
 591             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 592                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 593                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 594                 goto rankbyslot;
 595             }
 596             ORTE_ERROR_LOG(rc);
 597         }
 598         return rc;
 599     }
 600 
 601     if (ORTE_RANK_BY_HWTHREAD == ORTE_GET_RANKING_POLICY(map->ranking)) {
 602         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 603                             "mca:rmaps: computing ranks by hwthread for job %s",
 604                             ORTE_JOBID_PRINT(jdata->jobid));
 605         if (ORTE_SUCCESS != (rc = rank_by(jdata, HWLOC_OBJ_PU, 0))) {
 606             if (ORTE_ERR_NOT_SUPPORTED == rc &&
 607                 !(ORTE_RANKING_GIVEN & ORTE_GET_RANKING_DIRECTIVE(map->ranking))) {
 608                 ORTE_SET_RANKING_POLICY(map->ranking, ORTE_RANK_BY_SLOT);
 609                 goto rankbyslot;
 610             }
 611             ORTE_ERROR_LOG(rc);
 612         }
 613         return rc;
 614     }
 615 
 616     if (ORTE_RANK_BY_NODE == ORTE_GET_RANKING_POLICY(map->ranking) ||
 617         ORTE_RANK_BY_BOARD == ORTE_GET_RANKING_POLICY(map->ranking)) {
 618         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 619                             "mca:rmaps:base: computing vpids by node for job %s",
 620                             ORTE_JOBID_PRINT(jdata->jobid));
 621         
 622 
 623 
 624         vpid=0;
 625         for (n=0; n < jdata->apps->size; n++) {
 626             if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 627                 continue;
 628             }
 629             cnt=0;
 630             one_found = true;
 631             while (cnt < app->num_procs && one_found) {
 632                 one_found = false;
 633                 for (m=0; m < jdata->map->nodes->size; m++) {
 634                     if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 635                         continue;
 636                     }
 637                     for (j=0; j < node->procs->size; j++) {
 638                         if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 639                             continue;
 640                         }
 641                         
 642                         if (proc->name.jobid != jdata->jobid) {
 643                             continue;
 644                         }
 645                         
 646                         if (proc->app_idx != app->idx) {
 647                             continue;
 648                         }
 649                         if (ORTE_VPID_INVALID != proc->name.vpid) {
 650                             continue;
 651                         }
 652                         proc->name.vpid = vpid++;
 653                         
 654                         if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 655                             OBJ_RELEASE(pptr);
 656                         }
 657                         OBJ_RETAIN(proc);
 658                         if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 659                             ORTE_ERROR_LOG(rc);
 660                             return rc;
 661                         }
 662                         cnt++;
 663                         one_found = true;
 664                         
 665 
 666 
 667                         jdata->bookmark = node;
 668                         break;  
 669                     }
 670                 }
 671             }
 672             if (cnt < app->num_procs) {
 673                 ORTE_ERROR_LOG(ORTE_ERR_FATAL);
 674                 return ORTE_ERR_FATAL;
 675             }
 676         }
 677         return ORTE_SUCCESS;
 678     }
 679 
 680   rankbyslot:
 681     if (ORTE_RANK_BY_SLOT == ORTE_GET_RANKING_POLICY(map->ranking)) {
 682         
 683         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 684                             "mca:rmaps:base: computing vpids by slot for job %s",
 685                             ORTE_JOBID_PRINT(jdata->jobid));
 686         vpid = 0;
 687         for (n=0; n < jdata->apps->size; n++) {
 688             if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, n))) {
 689                 continue;
 690             }
 691             for (m=0; m < jdata->map->nodes->size; m++) {
 692                 if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(jdata->map->nodes, m))) {
 693                     continue;
 694                 }
 695 
 696                 for (j=0; j < node->procs->size; j++) {
 697                     if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 698                         continue;
 699                     }
 700                     
 701                     if (proc->name.jobid != jdata->jobid) {
 702                         continue;
 703                     }
 704                     
 705                     if (proc->app_idx != app->idx) {
 706                         continue;
 707                     }
 708                     if (ORTE_VPID_INVALID == proc->name.vpid) {
 709                         opal_output_verbose(5, orte_rmaps_base_framework.framework_output,
 710                                             "mca:rmaps:base: assigning rank %s to node %s",
 711                                             ORTE_VPID_PRINT(vpid), node->name);
 712                         proc->name.vpid = vpid++;
 713                        
 714 
 715 
 716                         jdata->bookmark = node;
 717                     }
 718                     
 719                     if (NULL != (pptr = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, proc->name.vpid))) {
 720                         OBJ_RELEASE(pptr);
 721                     }
 722                     OBJ_RETAIN(proc);
 723                     if (ORTE_SUCCESS != (rc = opal_pointer_array_set_item(jdata->procs, proc->name.vpid, proc))) {
 724                         ORTE_ERROR_LOG(rc);
 725                         return rc;
 726                     }
 727                 }
 728             }
 729         }
 730         return ORTE_SUCCESS;
 731     }
 732 
 733     return ORTE_ERR_NOT_IMPLEMENTED;
 734 }
 735 
 736 int orte_rmaps_base_compute_local_ranks(orte_job_t *jdata)
 737 {
 738     orte_std_cntr_t i;
 739     int j, k;
 740     orte_node_t *node;
 741     orte_proc_t *proc, *psave, *psave2;
 742     orte_vpid_t minv, minv2;
 743     orte_local_rank_t local_rank;
 744     orte_job_map_t *map;
 745     orte_app_context_t *app;
 746 
 747     OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
 748                          "%s rmaps:base:compute_usage",
 749                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 750 
 751     
 752     map = jdata->map;
 753 
 754     
 755     for (i=0; i < map->nodes->size; i++) {
 756         
 757 
 758 
 759 
 760         if (NULL == (node = (orte_node_t*)opal_pointer_array_get_item(map->nodes, i))) {
 761             continue;
 762         }
 763 
 764         
 765         local_rank = 0;
 766 
 767         
 768 
 769 
 770         for (k=0; k < node->procs->size; k++) {
 771             
 772             if (NULL == opal_pointer_array_get_item(node->procs, k)) {
 773                 continue;
 774             }
 775             minv = ORTE_VPID_MAX;
 776             minv2 = ORTE_VPID_MAX;
 777             psave = NULL;
 778             psave2 = NULL;
 779             
 780             for (j=0; j < node->procs->size; j++) {
 781                 
 782                 if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(node->procs, j))) {
 783                     continue;
 784                 }
 785                 
 786 
 787 
 788                 if (proc->name.jobid == jdata->jobid &&
 789                     ORTE_LOCAL_RANK_INVALID == proc->local_rank &&
 790                     proc->name.vpid < minv) {
 791                     minv = proc->name.vpid;
 792                     psave = proc;
 793                 }
 794                 
 795                 if (ORTE_NODE_RANK_INVALID == proc->node_rank &&
 796                     proc->name.vpid < minv2) {
 797                     minv2 = proc->name.vpid;
 798                     psave2 = proc;
 799                 }
 800             }
 801             if (NULL == psave && NULL == psave2) {
 802                 
 803                 break;
 804             }
 805             if (NULL != psave) {
 806                 psave->local_rank = local_rank;
 807                 ++local_rank;
 808             }
 809             if (NULL != psave2) {
 810                 psave2->node_rank = node->next_node_rank;
 811                 node->next_node_rank++;
 812             }
 813         }
 814     }
 815 
 816     
 817     for (i=0; i < jdata->apps->size; i++) {
 818         if (NULL == (app = (orte_app_context_t*)opal_pointer_array_get_item(jdata->apps, i))) {
 819             continue;
 820         }
 821         k=0;
 822         
 823         for (j=0; j < jdata->procs->size; j++) {
 824             if (NULL == (proc = (orte_proc_t*)opal_pointer_array_get_item(jdata->procs, j))) {
 825                 continue;
 826             }
 827             if (proc->app_idx != app->idx) {
 828                 continue;
 829             }
 830             proc->app_rank = k++;
 831         }
 832     }
 833 
 834     return ORTE_SUCCESS;
 835 }
 836 
 837 
 838 
 839 
 840 
 841 
 842 
 843 void orte_rmaps_base_update_local_ranks(orte_job_t *jdata, orte_node_t *oldnode,
 844                                         orte_node_t *newnode, orte_proc_t *newproc)
 845 {
 846     int k;
 847     orte_node_rank_t node_rank;
 848     orte_local_rank_t local_rank;
 849     orte_proc_t *proc;
 850 
 851     OPAL_OUTPUT_VERBOSE((5, orte_rmaps_base_framework.framework_output,
 852                          "%s rmaps:base:update_usage",
 853                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 854 
 855     
 856 
 857 
 858     if (oldnode == newnode) {
 859         return;
 860     }
 861 
 862     
 863 
 864 
 865     node_rank = 0;
 866 retry_nr:
 867     for (k=0; k < newnode->procs->size; k++) {
 868         
 869         if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(newnode->procs, k))) {
 870             continue;
 871         }
 872         if (node_rank == proc->node_rank) {
 873             node_rank++;
 874             goto retry_nr;
 875         }
 876     }
 877     newproc->node_rank = node_rank;
 878 
 879     local_rank = 0;
 880 retry_lr:
 881     for (k=0; k < newnode->procs->size; k++) {
 882         
 883         if (NULL == (proc = (orte_proc_t *) opal_pointer_array_get_item(newnode->procs, k))) {
 884             continue;
 885         }
 886         
 887         if (proc->name.jobid != jdata->jobid) {
 888             continue;
 889         }
 890         if (local_rank == proc->local_rank) {
 891             local_rank++;
 892             goto retry_lr;
 893         }
 894     }
 895     newproc->local_rank = local_rank;
 896 }