root/orte/mca/routed/binomial/routed_binomial.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init
  2. finalize
  3. delete_route
  4. update_route
  5. get_route
  6. route_lost
  7. route_is_defined
  8. set_lifeline
  9. binomial_tree
  10. update_routing_plan
  11. get_routing_list
  12. num_routes
  13. binomial_ft_event

   1 /*
   2  * Copyright (c) 2004-2011 The University of Tennessee and The University
   3  *                         of Tennessee Research Foundation.  All rights
   4  *                         reserved.
   5  * Copyright (c) 2007-2012 Los Alamos National Security, LLC.  All rights
   6  *                         reserved.
   7  * Copyright (c) 2013      Cisco Systems, Inc.  All rights reserved.
   8  * Copyright (c) 2013-2018 Intel, Inc.  All rights reserved.
   9  * $COPYRIGHT$
  10  *
  11  * Additional copyrights may follow
  12  *
  13  * $HEADER$
  14  */
  15 
  16 #include "orte_config.h"
  17 #include "orte/constants.h"
  18 
  19 #include <stddef.h>
  20 
  21 #include "opal/dss/dss.h"
  22 #include "opal/class/opal_pointer_array.h"
  23 #include "opal/class/opal_bitmap.h"
  24 #include "opal/util/bit_ops.h"
  25 #include "opal/util/output.h"
  26 
  27 #include "orte/mca/errmgr/errmgr.h"
  28 #include "orte/mca/ess/ess.h"
  29 #include "orte/mca/rml/rml.h"
  30 #include "orte/mca/rml/rml_types.h"
  31 #include "orte/util/name_fns.h"
  32 #include "orte/runtime/orte_globals.h"
  33 #include "orte/runtime/orte_wait.h"
  34 #include "orte/runtime/runtime.h"
  35 #include "orte/runtime/data_type_support/orte_dt_support.h"
  36 
  37 #include "orte/mca/rml/base/rml_contact.h"
  38 
  39 #include "orte/mca/routed/base/base.h"
  40 #include "routed_binomial.h"
  41 
  42 static int init(void);
  43 static int finalize(void);
  44 static int delete_route(orte_process_name_t *proc);
  45 static int update_route(orte_process_name_t *target,
  46                         orte_process_name_t *route);
  47 static orte_process_name_t get_route(orte_process_name_t *target);
  48 static int route_lost(const orte_process_name_t *route);
  49 static bool route_is_defined(const orte_process_name_t *target);
  50 static void update_routing_plan(void);
  51 static void get_routing_list(opal_list_t *coll);
  52 static int set_lifeline(orte_process_name_t *proc);
  53 static size_t num_routes(void);
  54 
  55 #if OPAL_ENABLE_FT_CR == 1
  56 static int binomial_ft_event(int state);
  57 #endif
  58 
  59 orte_routed_module_t orte_routed_binomial_module = {
  60     .initialize = init,
  61     .finalize = finalize,
  62     .delete_route = delete_route,
  63     .update_route = update_route,
  64     .get_route = get_route,
  65     .route_lost = route_lost,
  66     .route_is_defined = route_is_defined,
  67     .set_lifeline = set_lifeline,
  68     .update_routing_plan = update_routing_plan,
  69     .get_routing_list = get_routing_list,
  70     .num_routes = num_routes,
  71 #if OPAL_ENABLE_FT_CR == 1
  72     .ft_event = binomial_ft_event
  73 #else
  74     NULL
  75 #endif
  76 };
  77 
  78 /* local globals */
  79 static orte_process_name_t      *lifeline=NULL;
  80 static orte_process_name_t      local_lifeline;
  81 static int                      num_children;
  82 static opal_list_t              my_children;
  83 static bool                     hnp_direct=true;
  84 
  85 static int init(void)
  86 {
  87     lifeline = NULL;
  88 
  89     if (ORTE_PROC_IS_DAEMON) {
  90         /* if we are using static ports, set my lifeline to point at my parent */
  91         if (orte_static_ports) {
  92             lifeline = ORTE_PROC_MY_PARENT;
  93         } else {
  94             /* set our lifeline to the HNP - we will abort if that connection is lost */
  95             lifeline = ORTE_PROC_MY_HNP;
  96         }
  97         ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid;
  98     } else if (ORTE_PROC_IS_APP) {
  99         /* if we don't have a designated daemon, just
 100          * disqualify ourselves */
 101         if (NULL == orte_process_info.my_daemon_uri) {
 102             return ORTE_ERR_TAKE_NEXT_OPTION;
 103         }
 104         /* set our lifeline to the local daemon - we will abort if this connection is lost */
 105         lifeline = ORTE_PROC_MY_DAEMON;
 106         orte_routing_is_enabled = true;
 107     }
 108 
 109     /* setup the list of children */
 110     OBJ_CONSTRUCT(&my_children, opal_list_t);
 111     num_children = 0;
 112 
 113     return ORTE_SUCCESS;
 114 }
 115 
 116 static int finalize(void)
 117 {
 118     opal_list_item_t *item;
 119 
 120     lifeline = NULL;
 121 
 122     /* deconstruct the list of children */
 123     while (NULL != (item = opal_list_remove_first(&my_children))) {
 124         OBJ_RELEASE(item);
 125     }
 126     OBJ_DESTRUCT(&my_children);
 127     num_children = 0;
 128 
 129     return ORTE_SUCCESS;
 130 }
 131 
 132 static int delete_route(orte_process_name_t *proc)
 133 {
 134     if (proc->jobid == ORTE_JOBID_INVALID ||
 135         proc->vpid == ORTE_VPID_INVALID) {
 136         return ORTE_ERR_BAD_PARAM;
 137     }
 138 
 139     /* if I am an application process, I don't have any routes
 140      * so there is nothing for me to do
 141      */
 142     if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
 143         !ORTE_PROC_IS_TOOL) {
 144         return ORTE_SUCCESS;
 145     }
 146 
 147     OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
 148                          "%s routed_binomial_delete_route for %s",
 149                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 150                          ORTE_NAME_PRINT(proc)));
 151 
 152 
 153     /* THIS CAME FROM OUR OWN JOB FAMILY...there is nothing
 154      * to do here. The routes will be redefined when we update
 155      * the routing tree
 156      */
 157 
 158     return ORTE_SUCCESS;
 159 }
 160 
 161 static int update_route(orte_process_name_t *target,
 162                         orte_process_name_t *route)
 163 {
 164     if (target->jobid == ORTE_JOBID_INVALID ||
 165         target->vpid == ORTE_VPID_INVALID) {
 166         return ORTE_ERR_BAD_PARAM;
 167     }
 168 
 169     /* if I am an application process, we don't update the route since
 170      * we automatically route everything through the local daemon
 171      */
 172     if (ORTE_PROC_IS_APP) {
 173         return ORTE_SUCCESS;
 174     }
 175 
 176     OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
 177                          "%s routed_binomial_update: %s --> %s",
 178                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 179                          ORTE_NAME_PRINT(target),
 180                          ORTE_NAME_PRINT(route)));
 181 
 182 
 183     /* if I am a daemon and the target is my HNP, then check
 184      * the route - if it isn't direct, then we just flag that
 185      * we have a route to the HNP
 186      */
 187     if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) &&
 188         OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) {
 189         hnp_direct = false;
 190         return ORTE_SUCCESS;
 191     }
 192 
 193     return ORTE_SUCCESS;
 194 }
 195 
 196 
 197 static orte_process_name_t get_route(orte_process_name_t *target)
 198 {
 199     orte_process_name_t *ret, daemon;
 200     opal_list_item_t *item;
 201     orte_routed_tree_t *child;
 202 
 203     if (!orte_routing_is_enabled) {
 204         ret = target;
 205         goto found;
 206     }
 207 
 208     /* initialize */
 209     daemon.jobid = ORTE_PROC_MY_DAEMON->jobid;
 210     daemon.vpid = ORTE_PROC_MY_DAEMON->vpid;
 211 
 212     if (target->jobid == ORTE_JOBID_INVALID ||
 213         target->vpid == ORTE_VPID_INVALID) {
 214         ret = ORTE_NAME_INVALID;
 215         goto found;
 216     }
 217 
 218     /* if it is me, then the route is just direct */
 219     if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) {
 220         ret = target;
 221         goto found;
 222     }
 223 
 224     /* if I am an application process, always route via my local daemon */
 225     if (ORTE_PROC_IS_APP) {
 226         ret = ORTE_PROC_MY_DAEMON;
 227         goto found;
 228     }
 229 
 230     /* if I am a tool, the route is direct if target is in
 231      * my own job family, and to the target's HNP if not
 232      */
 233     if (ORTE_PROC_IS_TOOL) {
 234         if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
 235             ret = target;
 236             goto found;
 237         } else {
 238             ORTE_HNP_NAME_FROM_JOB(&daemon, target->jobid);
 239             ret = &daemon;
 240             goto found;
 241         }
 242     }
 243 
 244     /******     HNP AND DAEMONS ONLY     ******/
 245     if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
 246         if (!hnp_direct || orte_static_ports) {
 247             OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 248                                  "%s routing to the HNP through my parent %s",
 249                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 250                                  ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT)));
 251             ret = ORTE_PROC_MY_PARENT;
 252             goto found;
 253         } else {
 254             OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 255                                  "%s routing direct to the HNP",
 256                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 257             ret = ORTE_PROC_MY_HNP;
 258             goto found;
 259         }
 260     }
 261 
 262 
 263     daemon.jobid = ORTE_PROC_MY_NAME->jobid;
 264     /* find out what daemon hosts this proc */
 265     if (ORTE_VPID_INVALID == (daemon.vpid = orte_get_proc_daemon_vpid(target))) {
 266         /*ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);*/
 267         ret = ORTE_NAME_INVALID;
 268         goto found;
 269     }
 270 
 271     /* if the daemon is me, then send direct to the target! */
 272     if (ORTE_PROC_MY_NAME->vpid == daemon.vpid) {
 273         ret = target;
 274         goto found;
 275     }
 276 
 277     /* search routing tree for next step to that daemon */
 278     for (item = opal_list_get_first(&my_children);
 279             item != opal_list_get_end(&my_children);
 280             item = opal_list_get_next(item)) {
 281         child = (orte_routed_tree_t*)item;
 282         if (child->vpid == daemon.vpid) {
 283             /* the child is hosting the proc - just send it there */
 284             ret = &daemon;
 285             goto found;
 286         }
 287         /* otherwise, see if the daemon we need is below the child */
 288         if (opal_bitmap_is_set_bit(&child->relatives, daemon.vpid)) {
 289             /* yep - we need to step through this child */
 290             daemon.vpid = child->vpid;
 291 
 292             ret = &daemon;
 293             goto found;
 294         }
 295     }
 296 
 297     /* if we get here, then the target daemon is not beneath
 298      * any of our children, so we have to step up through our parent
 299      */
 300     daemon.vpid = ORTE_PROC_MY_PARENT->vpid;
 301 
 302     ret = &daemon;
 303 
 304  found:
 305     OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
 306                          "%s routed_binomial_get(%s) --> %s",
 307                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 308                          ORTE_NAME_PRINT(target),
 309                          ORTE_NAME_PRINT(ret)));
 310 
 311     return *ret;
 312 }
 313 
 314 static int route_lost(const orte_process_name_t *route)
 315 {
 316     opal_list_item_t *item;
 317     orte_routed_tree_t *child;
 318 
 319     OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 320                          "%s route to %s lost",
 321                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 322                          ORTE_NAME_PRINT(route)));
 323 
 324     /* if we lose the connection to the lifeline and we are NOT already,
 325      * in finalize, tell the OOB to abort.
 326      * NOTE: we cannot call abort from here as the OOB needs to first
 327      * release a thread-lock - otherwise, we will hang!!
 328      */
 329     if (!orte_finalizing &&
 330         NULL != lifeline &&
 331         OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
 332         OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 333                              "%s routed:binomial: Connection to lifeline %s lost",
 334                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 335                              ORTE_NAME_PRINT(lifeline)));
 336         return ORTE_ERR_FATAL;
 337     }
 338 
 339     /* if we are the HNP or a daemon, is it a daemon, and one of my children? if so, then
 340      * remove it from the child list
 341      */
 342     if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) &&
 343         route->jobid == ORTE_PROC_MY_NAME->jobid) {
 344         for (item = opal_list_get_first(&my_children);
 345              item != opal_list_get_end(&my_children);
 346              item = opal_list_get_next(item)) {
 347             child = (orte_routed_tree_t*)item;
 348             if (child->vpid == route->vpid) {
 349                 OPAL_OUTPUT_VERBOSE((4, orte_routed_base_framework.framework_output,
 350                                      "%s routed_binomial: removing route to child daemon %s",
 351                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 352                                      ORTE_NAME_PRINT(route)));
 353                 opal_list_remove_item(&my_children, item);
 354                 OBJ_RELEASE(item);
 355                 return ORTE_SUCCESS;
 356             }
 357         }
 358     }
 359 
 360     /* we don't care about this one, so return success */
 361     return ORTE_SUCCESS;
 362 }
 363 
 364 
 365 static bool route_is_defined(const orte_process_name_t *target)
 366 {
 367     /* find out what daemon hosts this proc */
 368     if (ORTE_VPID_INVALID == orte_get_proc_daemon_vpid((orte_process_name_t*)target)) {
 369         return false;
 370     }
 371 
 372     return true;
 373 }
 374 
 375 static int set_lifeline(orte_process_name_t *proc)
 376 {
 377     /* we have to copy the proc data because there is no
 378      * guarantee that it will be preserved
 379      */
 380     local_lifeline.jobid = proc->jobid;
 381     local_lifeline.vpid = proc->vpid;
 382     lifeline = &local_lifeline;
 383 
 384     return ORTE_SUCCESS;
 385 }
 386 
 387 static int binomial_tree(int rank, int parent, int me, int num_procs,
 388                          int *nchildren, opal_list_t *childrn,
 389                          opal_bitmap_t *relatives, bool mine)
 390 {
 391     int i, bitmap, peer, hibit, mask, found;
 392     orte_routed_tree_t *child;
 393     opal_bitmap_t *relations;
 394 
 395     OPAL_OUTPUT_VERBOSE((3, orte_routed_base_framework.framework_output,
 396                          "%s routed:binomial rank %d parent %d me %d num_procs %d",
 397                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 398                          rank, parent, me, num_procs));
 399 
 400     /* is this me? */
 401     if (me == rank) {
 402         bitmap = opal_cube_dim(num_procs);
 403 
 404         hibit = opal_hibit(rank, bitmap);
 405         --bitmap;
 406 
 407         for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) {
 408             peer = rank | mask;
 409             if (peer < num_procs) {
 410                 child = OBJ_NEW(orte_routed_tree_t);
 411                 child->vpid = peer;
 412                 OPAL_OUTPUT_VERBOSE((3, orte_routed_base_framework.framework_output,
 413                                      "%s routed:binomial %d found child %s",
 414                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 415                                      rank,
 416                                      ORTE_VPID_PRINT(child->vpid)));
 417 
 418                 if (mine) {
 419                     /* this is a direct child - add it to my list */
 420                     opal_list_append(childrn, &child->super);
 421                     (*nchildren)++;
 422                     /* setup the relatives bitmap */
 423                     opal_bitmap_init(&child->relatives, num_procs);
 424 
 425                     /* point to the relatives */
 426                     relations = &child->relatives;
 427                 } else {
 428                     /* we are recording someone's relatives - set the bit */
 429                     opal_bitmap_set_bit(relatives, peer);
 430                     /* point to this relations */
 431                     relations = relatives;
 432                 }
 433                 /* search for this child's relatives */
 434                 binomial_tree(0, 0, peer, num_procs, nchildren, childrn, relations, false);
 435             }
 436         }
 437         return parent;
 438     }
 439 
 440     /* find the children of this rank */
 441     OPAL_OUTPUT_VERBOSE((5, orte_routed_base_framework.framework_output,
 442                          "%s routed:binomial find children of rank %d",
 443                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), rank));
 444     bitmap = opal_cube_dim(num_procs);
 445 
 446     hibit = opal_hibit(rank, bitmap);
 447     --bitmap;
 448 
 449     for (i = hibit + 1, mask = 1 << i; i <= bitmap; ++i, mask <<= 1) {
 450         peer = rank | mask;
 451         OPAL_OUTPUT_VERBOSE((5, orte_routed_base_framework.framework_output,
 452                              "%s routed:binomial find children checking peer %d",
 453                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), peer));
 454         if (peer < num_procs) {
 455             OPAL_OUTPUT_VERBOSE((5, orte_routed_base_framework.framework_output,
 456                                  "%s routed:binomial find children computing tree",
 457                                  ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 458             /* execute compute on this child */
 459             if (0 <= (found = binomial_tree(peer, rank, me, num_procs, nchildren, childrn, relatives, mine))) {
 460                 OPAL_OUTPUT_VERBOSE((5, orte_routed_base_framework.framework_output,
 461                                      "%s routed:binomial find children returning found value %d",
 462                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), found));
 463                 return found;
 464             }
 465         }
 466     }
 467     return -1;
 468 }
 469 
 470 static void update_routing_plan(void)
 471 {
 472     orte_routed_tree_t *child;
 473     int j;
 474     opal_list_item_t *item;
 475 
 476     /* if I am anything other than a daemon or the HNP, this
 477      * is a meaningless command as I am not allowed to route
 478      */
 479     if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) {
 480         return;
 481     }
 482 
 483     /* clear the list of children if any are already present */
 484     while (NULL != (item = opal_list_remove_first(&my_children))) {
 485         OBJ_RELEASE(item);
 486     }
 487     num_children = 0;
 488 
 489     /* compute my direct children and the bitmap that shows which vpids
 490      * lie underneath their branch
 491      */
 492     ORTE_PROC_MY_PARENT->vpid = binomial_tree(0, 0, ORTE_PROC_MY_NAME->vpid,
 493                                    orte_process_info.max_procs,
 494                                    &num_children, &my_children, NULL, true);
 495 
 496     if (0 < opal_output_get_verbosity(orte_routed_base_framework.framework_output)) {
 497         opal_output(0, "%s: parent %d num_children %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), ORTE_PROC_MY_PARENT->vpid, num_children);
 498         for (item = opal_list_get_first(&my_children);
 499              item != opal_list_get_end(&my_children);
 500              item = opal_list_get_next(item)) {
 501             child = (orte_routed_tree_t*)item;
 502             opal_output(0, "%s: \tchild %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), child->vpid);
 503             for (j=0; j < (int)orte_process_info.max_procs; j++) {
 504                 if (opal_bitmap_is_set_bit(&child->relatives, j)) {
 505                     opal_output(0, "%s: \t\trelation %d", ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), j);
 506                 }
 507             }
 508         }
 509     }
 510 }
 511 
 512 static void get_routing_list(opal_list_t *coll)
 513 {
 514 
 515     /* if I am anything other than a daemon or the HNP, this
 516      * is a meaningless command as I am not allowed to route
 517      */
 518     if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) {
 519         return;
 520     }
 521 
 522     orte_routed_base_xcast_routing(coll, &my_children);
 523 }
 524 
 525 static size_t num_routes(void)
 526 {
 527     OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 528                          "%s num routes %d",
 529                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 530                          (int)opal_list_get_size(&my_children)));
 531     return opal_list_get_size(&my_children);
 532 }
 533 
 534 #if OPAL_ENABLE_FT_CR == 1
 535 static int binomial_ft_event(int state)
 536 {
 537     int ret, exit_status = ORTE_SUCCESS;
 538 
 539     /******** Checkpoint Prep ********/
 540     if(OPAL_CRS_CHECKPOINT == state) {
 541     }
 542     /******** Continue Recovery ********/
 543     else if (OPAL_CRS_CONTINUE == state ) {
 544     }
 545     else if (OPAL_CRS_TERM == state ) {
 546         /* Nothing */
 547     }
 548     else {
 549         /* Error state = Nothing */
 550     }
 551 
 552  cleanup:
 553     return exit_status;
 554 }
 555 #endif
 556 

/* [<][>][^][v][top][bottom][index][help] */