root/orte/mca/routed/debruijn/routed_debruijn.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init
  2. finalize
  3. delete_route
  4. update_route
  5. debruijn_next_hop
  6. get_route
  7. route_lost
  8. route_is_defined
  9. set_lifeline
  10. ilog2
  11. update_routing_plan
  12. get_routing_list
  13. num_routes
  14. debruijn_ft_event

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2007-2012 Los Alamos National Security, LLC.
   4  *                         All rights reserved.
   5  * Copyright (c) 2004-2011 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2013-2016 Intel, Inc.  All rights reserved.
   9  * $COPYRIGHT$
  10  *
  11  * Additional copyrights may follow
  12  *
  13  * $HEADER$
  14  */
  15 
  16 #include "orte_config.h"
  17 #include "orte/constants.h"
  18 
  19 #include <stddef.h>
  20 
  21 #include "opal/dss/dss.h"
  22 #include "opal/class/opal_hash_table.h"
  23 #include "opal/class/opal_bitmap.h"
  24 #include "opal/util/output.h"
  25 
  26 #include "orte/mca/errmgr/errmgr.h"
  27 #include "orte/mca/ess/ess.h"
  28 #include "orte/mca/rml/rml.h"
  29 #include "orte/mca/rml/rml_types.h"
  30 #include "orte/util/name_fns.h"
  31 #include "orte/runtime/orte_globals.h"
  32 #include "orte/runtime/orte_wait.h"
  33 #include "orte/runtime/runtime.h"
  34 #include "orte/runtime/data_type_support/orte_dt_support.h"
  35 
  36 #include "orte/mca/rml/base/rml_contact.h"
  37 
  38 #include "orte/mca/routed/base/base.h"
  39 #include "routed_debruijn.h"
  40 
  41 
  42 static int init(void);
  43 static int finalize(void);
  44 static int delete_route(orte_process_name_t *proc);
  45 static int update_route(orte_process_name_t *target,
  46                         orte_process_name_t *route);
  47 static orte_process_name_t get_route(orte_process_name_t *target);
  48 static int route_lost(const orte_process_name_t *route);
  49 static bool route_is_defined(const orte_process_name_t *target);
  50 static void update_routing_plan(void);
  51 static void get_routing_list(opal_list_t *coll);
  52 static int set_lifeline(orte_process_name_t *proc);
  53 static size_t num_routes(void);
  54 
  55 #if OPAL_ENABLE_FT_CR == 1
  56 static int debruijn_ft_event(int state);
  57 #endif
  58 
  59 orte_routed_module_t orte_routed_debruijn_module = {
  60     .initialize = init,
  61     .finalize = finalize,
  62     .delete_route = delete_route,
  63     .update_route = update_route,
  64     .get_route = get_route,
  65     .route_lost = route_lost,
  66     .route_is_defined = route_is_defined,
  67     .set_lifeline = set_lifeline,
  68     .update_routing_plan = update_routing_plan,
  69     .get_routing_list = get_routing_list,
  70     .num_routes = num_routes,
  71 #if OPAL_ENABLE_FT_CR == 1
  72     .ft_event = debruijn_ft_event
  73 #else
  74     NULL
  75 #endif
  76 };
  77 
  78 /* local globals */
  79 static orte_process_name_t      *lifeline=NULL;
  80 static orte_process_name_t      local_lifeline;
  81 static opal_list_t              my_children;
  82 static bool                     hnp_direct=true;
  83 static int                      log_nranks;
  84 static int                      log_npeers;
  85 static unsigned int             rank_mask;
  86 
  87 static int init(void)
  88 {
  89     lifeline = NULL;
  90 
  91     if (ORTE_PROC_IS_DAEMON) {
  92         /* if we are using static ports, set my lifeline to point at my parent */
  93         if (orte_static_ports) {
  94             lifeline = ORTE_PROC_MY_PARENT;
  95         } else {
  96             /* set our lifeline to the HNP - we will abort if that connection is lost */
  97             lifeline = ORTE_PROC_MY_HNP;
  98         }
  99         ORTE_PROC_MY_PARENT->jobid = ORTE_PROC_MY_NAME->jobid;
 100     } else if (ORTE_PROC_IS_APP) {
 101         /* if we don't have a designated daemon, just
 102          * disqualify ourselves */
 103         if (NULL == orte_process_info.my_daemon_uri) {
 104             return ORTE_ERR_TAKE_NEXT_OPTION;
 105         }
 106         /* set our lifeline to the local daemon - we will abort if this connection is lost */
 107         lifeline = ORTE_PROC_MY_DAEMON;
 108         orte_routing_is_enabled = true;
 109     }
 110 
 111     /* setup the list of children */
 112     OBJ_CONSTRUCT(&my_children, opal_list_t);
 113 
 114     return ORTE_SUCCESS;
 115 }
 116 
 117 static int finalize(void)
 118 {
 119     opal_list_item_t *item;
 120 
 121     lifeline = NULL;
 122 
 123     /* deconstruct the list of children */
 124     while (NULL != (item = opal_list_remove_first(&my_children))) {
 125         OBJ_RELEASE(item);
 126     }
 127     OBJ_DESTRUCT(&my_children);
 128 
 129     return ORTE_SUCCESS;
 130 }
 131 
 132 static int delete_route(orte_process_name_t *proc)
 133 {
 134     if (proc->jobid == ORTE_JOBID_INVALID ||
 135         proc->vpid == ORTE_VPID_INVALID) {
 136         return ORTE_ERR_BAD_PARAM;
 137     }
 138 
 139     /* if I am an application process, I don't have any routes
 140      * so there is nothing for me to do
 141      */
 142     if (!ORTE_PROC_IS_HNP && !ORTE_PROC_IS_DAEMON &&
 143         !ORTE_PROC_IS_TOOL) {
 144         return ORTE_SUCCESS;
 145     }
 146 
 147     OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
 148                          "%s routed_debruijn_delete_route for %s",
 149                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 150                          ORTE_NAME_PRINT(proc)));
 151 
 152     /* THIS CAME FROM OUR OWN JOB FAMILY...there is nothing
 153      * to do here. The routes will be redefined when we update
 154      * the routing tree
 155      */
 156 
 157     return ORTE_SUCCESS;
 158 }
 159 
 160 static int update_route(orte_process_name_t *target,
 161                         orte_process_name_t *route)
 162 {
 163     if (target->jobid == ORTE_JOBID_INVALID ||
 164         target->vpid == ORTE_VPID_INVALID) {
 165         return ORTE_ERR_BAD_PARAM;
 166     }
 167 
 168     /* if I am an application process, we don't update the route since
 169      * we automatically route everything through the local daemon
 170      */
 171     if (ORTE_PROC_IS_APP) {
 172         return ORTE_SUCCESS;
 173     }
 174 
 175     OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
 176                          "%s routed_debruijn_update: %s --> %s",
 177                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 178                          ORTE_NAME_PRINT(target),
 179                          ORTE_NAME_PRINT(route)));
 180 
 181 
 182     /* if I am a daemon and the target is my HNP, then check
 183      * the route - if it isn't direct, then we just flag that
 184      * we have a route to the HNP
 185      */
 186     if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target) &&
 187         OPAL_EQUAL != orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, route)) {
 188         hnp_direct = false;
 189         return ORTE_SUCCESS;
 190     }
 191 
 192     return ORTE_SUCCESS;
 193 }
 194 
 195 static inline unsigned int debruijn_next_hop (int target)
 196 {
 197     const int my_id = ORTE_PROC_MY_NAME->vpid;
 198     uint64_t route, mask = rank_mask;
 199     unsigned int i, next_hop;
 200 
 201     if (target == my_id) {
 202         return my_id;
 203     }
 204 
 205     i = -log_npeers;
 206     do {
 207         i += log_npeers;
 208         mask = (mask >> i) << i;
 209         route = (my_id << i) | target;
 210     } while ((route & mask) != (((my_id << i) & target) & mask));
 211 
 212     next_hop = (int)((route >> (i - log_npeers)) & rank_mask);
 213 
 214     /* if the next hop does not exist route to the lowest proc with the same lower routing bits */
 215     return (next_hop < orte_process_info.num_procs) ? next_hop : (next_hop & (rank_mask >> log_npeers));
 216 }
 217 
 218 static orte_process_name_t get_route(orte_process_name_t *target)
 219 {
 220     orte_process_name_t ret;
 221 
 222     /* initialize */
 223 
 224     do {
 225         ret = *ORTE_NAME_INVALID;
 226 
 227         if (ORTE_JOBID_INVALID == target->jobid ||
 228             ORTE_VPID_INVALID == target->vpid) {
 229             break;
 230         }
 231 
 232         /* if it is me, then the route is just direct */
 233         if (OPAL_EQUAL == opal_dss.compare(ORTE_PROC_MY_NAME, target, ORTE_NAME)) {
 234             ret = *target;
 235             break;
 236         }
 237 
 238         /* if I am an application process, always route via my local daemon */
 239         if (ORTE_PROC_IS_APP) {
 240             ret = *ORTE_PROC_MY_DAEMON;
 241             break;
 242         }
 243 
 244         /* if I am a tool, the route is direct if target is in
 245          * my own job family, and to the target's HNP if not
 246          */
 247         if (ORTE_PROC_IS_TOOL) {
 248             if (ORTE_JOB_FAMILY(target->jobid) == ORTE_JOB_FAMILY(ORTE_PROC_MY_NAME->jobid)) {
 249                 ret = *target;
 250             } else {
 251                 ORTE_HNP_NAME_FROM_JOB(&ret, target->jobid);
 252             }
 253 
 254             break;
 255         }
 256 
 257         /******     HNP AND DAEMONS ONLY     ******/
 258 
 259         if (OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, ORTE_PROC_MY_HNP, target)) {
 260             if (!hnp_direct || orte_static_ports) {
 261                 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 262                                      "%s routing to the HNP through my parent %s",
 263                                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 264                                      ORTE_NAME_PRINT(ORTE_PROC_MY_PARENT)));
 265                 ret = *ORTE_PROC_MY_PARENT;
 266             } else {
 267                 OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 268                                      "%s routing direct to the HNP",
 269                                      ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
 270                 ret = *ORTE_PROC_MY_HNP;
 271             }
 272 
 273             break;
 274         }
 275 
 276         ret.jobid = ORTE_PROC_MY_NAME->jobid;
 277         /* find out what daemon hosts this proc */
 278         if (ORTE_VPID_INVALID == (ret.vpid = orte_get_proc_daemon_vpid(target))) {
 279             /* we don't yet know about this daemon. just route this to the "parent" */
 280             ret = *ORTE_PROC_MY_PARENT;
 281             break;
 282         }
 283 
 284         /* if the daemon is me, then send direct to the target! */
 285         if (ORTE_PROC_MY_NAME->vpid == ret.vpid) {
 286             ret = *target;
 287             break;
 288         }
 289 
 290         /* find next hop */
 291         ret.vpid = debruijn_next_hop (ret.vpid);
 292     } while (0);
 293 
 294     OPAL_OUTPUT_VERBOSE((1, orte_routed_base_framework.framework_output,
 295                          "%s routed_debruijn_get(%s) --> %s",
 296                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 297                          ORTE_NAME_PRINT(target),
 298                          ORTE_NAME_PRINT(&ret)));
 299 
 300     return ret;
 301 }
 302 
 303 static int route_lost(const orte_process_name_t *route)
 304 {
 305     opal_list_item_t *item;
 306     orte_routed_tree_t *child;
 307 
 308     OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 309                          "%s route to %s lost",
 310                          ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 311                          ORTE_NAME_PRINT(route)));
 312 
 313     /* if we lose the connection to the lifeline and we are NOT already,
 314      * in finalize, tell the OOB to abort.
 315      * NOTE: we cannot call abort from here as the OOB needs to first
 316      * release a thread-lock - otherwise, we will hang!!
 317      */
 318     if (!orte_finalizing &&
 319         NULL != lifeline &&
 320         OPAL_EQUAL == orte_util_compare_name_fields(ORTE_NS_CMP_ALL, route, lifeline)) {
 321         OPAL_OUTPUT_VERBOSE((2, orte_routed_base_framework.framework_output,
 322                              "%s routed:debruijn: Connection to lifeline %s lost",
 323                              ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
 324                              ORTE_NAME_PRINT(lifeline)));
 325         return ORTE_ERR_FATAL;
 326     }
 327 
 328     /* if we are the HNP or daemon, and the route is a daemon,
 329      * see if it is one of our children - if so, remove it
 330      */
 331     if ((ORTE_PROC_IS_DAEMON || ORTE_PROC_IS_HNP) &&
 332         route->jobid == ORTE_PROC_MY_NAME->jobid) {
 333         for (item = opal_list_get_first(&my_children);
 334              item != opal_list_get_end(&my_children);
 335              item = opal_list_get_next(item)) {
 336             child = (orte_routed_tree_t*)item;
 337             if (child->vpid == route->vpid) {
 338                 opal_list_remove_item(&my_children, item);
 339                 OBJ_RELEASE(item);
 340                 return ORTE_SUCCESS;
 341             }
 342         }
 343     }
 344 
 345     /* we don't care about this one, so return success */
 346     return ORTE_SUCCESS;
 347 }
 348 
 349 static bool route_is_defined(const orte_process_name_t *target)
 350 {
 351     /* find out what daemon hosts this proc */
 352     if (ORTE_VPID_INVALID == orte_get_proc_daemon_vpid((orte_process_name_t*)target)) {
 353         return false;
 354     }
 355 
 356     return true;
 357 }
 358 
 359 static int set_lifeline(orte_process_name_t *proc)
 360 {
 361     /* we have to copy the proc data because there is no
 362      * guarantee that it will be preserved
 363      */
 364     local_lifeline.jobid = proc->jobid;
 365     local_lifeline.vpid = proc->vpid;
 366     lifeline = &local_lifeline;
 367 
 368     return ORTE_SUCCESS;
 369 }
 370 
 371 static unsigned int ilog2 (unsigned int v)
 372 {
 373     const unsigned int b[] = {0x2, 0xC, 0xF0, 0xFF00, 0xFFFF0000};
 374     const unsigned int S[] = {1, 2, 4, 8, 16};
 375     int i;
 376 
 377     register unsigned int r = 0;
 378     for (i = 4; i >= 0; i--) {
 379         if (v & b[i]) {
 380             v >>= S[i];
 381             r |= S[i];
 382         }
 383     }
 384 
 385     return r;
 386 }
 387 
 388 static void update_routing_plan(void)
 389 {
 390     orte_routed_tree_t *child;
 391     opal_list_item_t *item;
 392     int my_vpid = ORTE_PROC_MY_NAME->vpid;
 393     int i;
 394 
 395     /* if I am anything other than a daemon or the HNP, this
 396      * is a meaningless command as I am not allowed to route
 397      */
 398     if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) {
 399         return;
 400     }
 401 
 402     /* clear the list of children if any are already present */
 403     while (NULL != (item = opal_list_remove_first(&my_children))) {
 404         OBJ_RELEASE(item);
 405     }
 406 
 407     log_nranks = (int) ilog2 ((unsigned int)orte_process_info.num_procs) ;
 408     assert(log_nranks < 31);
 409 
 410     if (log_nranks < 3) {
 411       log_npeers = 1;
 412     } else if (log_nranks < 7) {
 413       log_npeers = 2;
 414     } else {
 415       log_npeers = 4;
 416     }
 417 
 418     /* round log_nranks to a multiple of log_npeers */
 419     log_nranks = ((log_nranks + log_npeers) & ~(log_npeers - 1)) - 1;
 420 
 421     rank_mask = (1 << (log_nranks + 1)) - 1;
 422 
 423     /* compute my parent */
 424     ORTE_PROC_MY_PARENT->vpid = my_vpid ? my_vpid >> log_npeers : -1;
 425 
 426     /* only add peers to the routing tree if this rank is the smallest rank that will send to
 427        the any peer */
 428     if ((my_vpid >> (log_nranks + 1 - log_npeers)) == 0) {
 429         for (i = (1 << log_npeers) - 1 ; i >= 0 ; --i) {
 430             int next = ((my_vpid << log_npeers) | i) & rank_mask;
 431 
 432             /* add a peer to the routing tree only if its vpid is smaller than this rank */
 433             if (next > my_vpid && next < (int)orte_process_info.num_procs) {
 434                 child = OBJ_NEW(orte_routed_tree_t);
 435                 child->vpid = next;
 436                 opal_list_append (&my_children, &child->super);
 437             }
 438         }
 439     }
 440 }
 441 
 442 static void get_routing_list(opal_list_t *coll)
 443 {
 444     /* if I am anything other than a daemon or the HNP, this
 445      * is a meaningless command as I am not allowed to route
 446      */
 447     if (!ORTE_PROC_IS_DAEMON && !ORTE_PROC_IS_HNP) {
 448         return;
 449     }
 450 
 451     orte_routed_base_xcast_routing(coll, &my_children);
 452 }
 453 
 454 static size_t num_routes(void)
 455 {
 456     return opal_list_get_size(&my_children);
 457 }
 458 
 459 #if OPAL_ENABLE_FT_CR == 1
 460 static int debruijn_ft_event(int state)
 461 {
 462     int ret, exit_status = ORTE_SUCCESS;
 463 
 464     /******** Checkpoint Prep ********/
 465     if(OPAL_CRS_CHECKPOINT == state) {
 466     }
 467     /******** Continue Recovery ********/
 468     else if (OPAL_CRS_CONTINUE == state ) {
 469     }
 470     else if (OPAL_CRS_TERM == state ) {
 471         /* Nothing */
 472     }
 473     else {
 474         /* Error state = Nothing */
 475     }
 476 
 477  cleanup:
 478     return exit_status;
 479 }
 480 #endif
 481 

/* [<][>][^][v][top][bottom][index][help] */