1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights 4 * reserved. 5 * Copyright (c) 2004-2008 The Trustees of Indiana University. 6 * All rights reserved. 7 * Copyright (c) 2004-2011 The University of Tennessee and The University 8 * of Tennessee Research Foundation. All rights 9 * reserved. 10 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. 11 * $COPYRIGHT$ 12 * 13 * Additional copyrights may follow 14 * 15 * $HEADER$ 16 */ 17 18 /** 19 * @file 20 * 21 * Routing table for the RML 22 * 23 * A flexible routing infrastructure for the RML. Provides "next hop" 24 * service. Only deals with orte_process_name_ts. 25 */ 26 27 28 #ifndef ORTE_MCA_ROUTED_ROUTED_H_ 29 #define ORTE_MCA_ROUTED_ROUTED_H_ 30 31 #include "orte_config.h" 32 33 #ifdef HAVE_UNISTD_H 34 #include <unistd.h> 35 #endif 36 37 #include "orte/types.h" 38 #include "orte/mca/mca.h" 39 40 #include "opal/dss/dss_types.h" 41 42 #include "opal/mca/crs/crs.h" 43 #include "opal/mca/crs/base/base.h" 44 45 #include "orte/mca/routed/routed_types.h" 46 47 BEGIN_C_DECLS 48 49 50 /* ******************************************************************** */ 51 52 53 struct opal_buffer_t; 54 struct orte_rml_module_t; 55 56 57 /* ******************************************************************** */ 58 /** 59 * Initialize the routed module 60 * 61 * Do whatever needs to be done to initialize the selected module 62 * 63 * @retval ORTE_SUCCESS Success 64 * @retval ORTE_ERROR Error code from whatever was encountered 65 */ 66 typedef int (*orte_routed_module_init_fn_t)(void); 67 68 /** 69 * Finalize the routed module 70 * 71 * Finalize the routed module, ending cleaning up all resources 72 * associated with the module. After the finalize function is called, 73 * all interface functions (and the module structure itself) are not 74 * available for use. 75 * 76 * @note Whether or not the finalize function returns successfully, 77 * the module should not be used once this function is called. 78 * 79 * @retval ORTE_SUCCESS Success 80 * @retval ORTE_ERROR An unspecified error occurred 81 */ 82 typedef int (*orte_routed_module_finalize_fn_t)(void); 83 84 85 /* 86 * Delete route 87 * 88 * Delete the route to the specified proc from the routing table. Note 89 * that wildcards are supported to remove routes from, for example, all 90 * procs in a given job 91 */ 92 typedef int (*orte_routed_module_delete_route_fn_t)(orte_process_name_t *proc); 93 94 /** 95 * Update route table with new information 96 * 97 * Update routing table with a new entry. If an existing exact match 98 * for the entry exists, it will be replaced with the current 99 * information. If the entry is new, it will be inserted behind all 100 * entries of similar "mask". So a wildcard cellid entry will be 101 * inserted after any fully-specified entries and any other wildcard 102 * cellid entries, but before any wildcard cellid and jobid entries. 103 * 104 * @retval ORTE_SUCCESS Success 105 * @retval ORTE_ERR_NOT_SUPPORTED The updated is not supported. This 106 * is likely due to using partially-specified 107 * names with a component that does not support 108 * such functionality 109 * @retval ORTE_ERROR An unspecified error occurred 110 */ 111 typedef int (*orte_routed_module_update_route_fn_t)(orte_process_name_t *target, 112 orte_process_name_t *route); 113 114 /** 115 * Get the next hop towards the target 116 * 117 * Obtain the next process on the route to the target. ORTE's routing system 118 * works one hop at-a-time, so this function doesn't return the entire path 119 * to the target - it only returns the next hop. This could be the target itself, 120 * or it could be an intermediate relay. By design, we -never- use application 121 * procs as relays, so any relay will be an orted. 122 */ 123 typedef orte_process_name_t (*orte_routed_module_get_route_fn_t)(orte_process_name_t *target); 124 125 /** 126 * Report a route as "lost" 127 * 128 * Report that an existing connection has been lost, therefore potentially 129 * "breaking" a route in the routing table. It is critical that broken 130 * connections be reported so that the selected routing module has the 131 * option of dealing with it. This could consist of nothing more than 132 * removing that route from the routing table, or could - in the case 133 * of a "lifeline" connection - result in abort of the process. 134 */ 135 typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *route); 136 137 /* 138 * Is this route defined? 139 * 140 * Check to see if a route to the specified target has been defined. The 141 * function returns "true" if it has, and "false" if no route to the 142 * target was previously defined. 143 * 144 * This is needed because routed modules will return their "wildcard" 145 * route if we request a route to a target that they don't know about. 146 * In some cases, though, we truly -do- need to know if a route was 147 * specifically defined. 148 */ 149 typedef bool (*orte_routed_module_route_is_defined_fn_t)(const orte_process_name_t *target); 150 151 /* 152 * Update the module's routing plan 153 * 154 * Called only by a daemon and the HNP, this function creates a plan 155 * for routing messages within ORTE, especially for routing collectives 156 * used during wireup 157 */ 158 typedef void (*orte_routed_module_update_routing_plan_fn_t)(void); 159 160 /* 161 * Get the routing list for an xcast collective 162 * 163 * Fills the target list with orte_namelist_t so that 164 * the grpcomm framework will know who to send xcast to 165 * next 166 */ 167 typedef void (*orte_routed_module_get_routing_list_fn_t)(opal_list_t *coll); 168 169 /* 170 * Set lifeline process 171 * 172 * Defines the lifeline to be the specified process. Should contact to 173 * that process be lost, the errmgr will be called, possibly resulting 174 * in termination of the process and job. 175 */ 176 typedef int (*orte_routed_module_set_lifeline_fn_t)(orte_process_name_t *proc); 177 178 /* 179 * Get the number of routes supported by this process 180 * 181 * Returns the size of the routing tree using an O(1) function 182 */ 183 typedef size_t (*orte_routed_module_num_routes_fn_t)(void); 184 185 /** 186 * Handle fault tolerance updates 187 * 188 * @param[in] state Fault tolerance state update 189 * 190 * @retval ORTE_SUCCESS The operation completed successfully 191 * @retval ORTE_ERROR An unspecifed error occurred 192 */ 193 typedef int (*orte_routed_module_ft_event_fn_t)(int state); 194 195 /* ******************************************************************** */ 196 197 198 /** 199 * routed module interface 200 * 201 * Module interface to the routed communication system. A global 202 * instance of this module, orte_routed, provices an interface into the 203 * active routed interface. 204 */ 205 typedef struct { 206 /** Startup/shutdown the communication system and clean up resources */ 207 orte_routed_module_init_fn_t initialize; 208 orte_routed_module_finalize_fn_t finalize; 209 /* API functions */ 210 orte_routed_module_delete_route_fn_t delete_route; 211 orte_routed_module_update_route_fn_t update_route; 212 orte_routed_module_get_route_fn_t get_route; 213 orte_routed_module_route_lost_fn_t route_lost; 214 orte_routed_module_route_is_defined_fn_t route_is_defined; 215 orte_routed_module_set_lifeline_fn_t set_lifeline; 216 /* fns for daemons */ 217 orte_routed_module_update_routing_plan_fn_t update_routing_plan; 218 orte_routed_module_get_routing_list_fn_t get_routing_list; 219 orte_routed_module_num_routes_fn_t num_routes; 220 /* FT Notification */ 221 orte_routed_module_ft_event_fn_t ft_event; 222 } orte_routed_module_t; 223 224 /* provide an interface to the routed framework stub functions */ 225 ORTE_DECLSPEC extern orte_routed_module_t orte_routed; 226 227 /* ******************************************************************** */ 228 229 /** 230 * routed component interface 231 * 232 * Component interface for the routed framework. A public instance of 233 * this structure, called mca_routed_[component name]_component, must 234 * exist in any routed component. 235 */ 236 237 struct orte_routed_component_3_0_0_t { 238 /* Base component description */ 239 mca_base_component_t base_version; 240 /* Base component data block */ 241 mca_base_component_data_t base_data; 242 /* priority */ 243 int priority; 244 }; 245 /** Convienence typedef */ 246 typedef struct orte_routed_component_3_0_0_t orte_routed_component_t; 247 248 249 /* ******************************************************************** */ 250 251 252 /** Macro for use in components that are of type routed */ 253 #define ORTE_ROUTED_BASE_VERSION_3_0_0 \ 254 ORTE_MCA_BASE_VERSION_2_1_0("routed", 3, 0, 0) 255 256 257 /* ******************************************************************** */ 258 259 260 END_C_DECLS 261 262 #endif