1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2007-2015 Los Alamos National Security, LLC. All rights
4 * reserved.
5 * Copyright (c) 2004-2008 The Trustees of Indiana University.
6 * All rights reserved.
7 * Copyright (c) 2004-2011 The University of Tennessee and The University
8 * of Tennessee Research Foundation. All rights
9 * reserved.
10 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
11 * $COPYRIGHT$
12 *
13 * Additional copyrights may follow
14 *
15 * $HEADER$
16 */
17
18 /**
19 * @file
20 *
21 * Routing table for the RML
22 *
23 * A flexible routing infrastructure for the RML. Provides "next hop"
24 * service. Only deals with orte_process_name_ts.
25 */
26
27
28 #ifndef ORTE_MCA_ROUTED_ROUTED_H_
29 #define ORTE_MCA_ROUTED_ROUTED_H_
30
31 #include "orte_config.h"
32
33 #ifdef HAVE_UNISTD_H
34 #include <unistd.h>
35 #endif
36
37 #include "orte/types.h"
38 #include "orte/mca/mca.h"
39
40 #include "opal/dss/dss_types.h"
41
42 #include "opal/mca/crs/crs.h"
43 #include "opal/mca/crs/base/base.h"
44
45 #include "orte/mca/routed/routed_types.h"
46
47 BEGIN_C_DECLS
48
49
50 /* ******************************************************************** */
51
52
53 struct opal_buffer_t;
54 struct orte_rml_module_t;
55
56
57 /* ******************************************************************** */
58 /**
59 * Initialize the routed module
60 *
61 * Do whatever needs to be done to initialize the selected module
62 *
63 * @retval ORTE_SUCCESS Success
64 * @retval ORTE_ERROR Error code from whatever was encountered
65 */
66 typedef int (*orte_routed_module_init_fn_t)(void);
67
68 /**
69 * Finalize the routed module
70 *
71 * Finalize the routed module, ending cleaning up all resources
72 * associated with the module. After the finalize function is called,
73 * all interface functions (and the module structure itself) are not
74 * available for use.
75 *
76 * @note Whether or not the finalize function returns successfully,
77 * the module should not be used once this function is called.
78 *
79 * @retval ORTE_SUCCESS Success
80 * @retval ORTE_ERROR An unspecified error occurred
81 */
82 typedef int (*orte_routed_module_finalize_fn_t)(void);
83
84
85 /*
86 * Delete route
87 *
88 * Delete the route to the specified proc from the routing table. Note
89 * that wildcards are supported to remove routes from, for example, all
90 * procs in a given job
91 */
92 typedef int (*orte_routed_module_delete_route_fn_t)(orte_process_name_t *proc);
93
94 /**
95 * Update route table with new information
96 *
97 * Update routing table with a new entry. If an existing exact match
98 * for the entry exists, it will be replaced with the current
99 * information. If the entry is new, it will be inserted behind all
100 * entries of similar "mask". So a wildcard cellid entry will be
101 * inserted after any fully-specified entries and any other wildcard
102 * cellid entries, but before any wildcard cellid and jobid entries.
103 *
104 * @retval ORTE_SUCCESS Success
105 * @retval ORTE_ERR_NOT_SUPPORTED The updated is not supported. This
106 * is likely due to using partially-specified
107 * names with a component that does not support
108 * such functionality
109 * @retval ORTE_ERROR An unspecified error occurred
110 */
111 typedef int (*orte_routed_module_update_route_fn_t)(orte_process_name_t *target,
112 orte_process_name_t *route);
113
114 /**
115 * Get the next hop towards the target
116 *
117 * Obtain the next process on the route to the target. ORTE's routing system
118 * works one hop at-a-time, so this function doesn't return the entire path
119 * to the target - it only returns the next hop. This could be the target itself,
120 * or it could be an intermediate relay. By design, we -never- use application
121 * procs as relays, so any relay will be an orted.
122 */
123 typedef orte_process_name_t (*orte_routed_module_get_route_fn_t)(orte_process_name_t *target);
124
125 /**
126 * Report a route as "lost"
127 *
128 * Report that an existing connection has been lost, therefore potentially
129 * "breaking" a route in the routing table. It is critical that broken
130 * connections be reported so that the selected routing module has the
131 * option of dealing with it. This could consist of nothing more than
132 * removing that route from the routing table, or could - in the case
133 * of a "lifeline" connection - result in abort of the process.
134 */
135 typedef int (*orte_routed_module_route_lost_fn_t)(const orte_process_name_t *route);
136
137 /*
138 * Is this route defined?
139 *
140 * Check to see if a route to the specified target has been defined. The
141 * function returns "true" if it has, and "false" if no route to the
142 * target was previously defined.
143 *
144 * This is needed because routed modules will return their "wildcard"
145 * route if we request a route to a target that they don't know about.
146 * In some cases, though, we truly -do- need to know if a route was
147 * specifically defined.
148 */
149 typedef bool (*orte_routed_module_route_is_defined_fn_t)(const orte_process_name_t *target);
150
151 /*
152 * Update the module's routing plan
153 *
154 * Called only by a daemon and the HNP, this function creates a plan
155 * for routing messages within ORTE, especially for routing collectives
156 * used during wireup
157 */
158 typedef void (*orte_routed_module_update_routing_plan_fn_t)(void);
159
160 /*
161 * Get the routing list for an xcast collective
162 *
163 * Fills the target list with orte_namelist_t so that
164 * the grpcomm framework will know who to send xcast to
165 * next
166 */
167 typedef void (*orte_routed_module_get_routing_list_fn_t)(opal_list_t *coll);
168
169 /*
170 * Set lifeline process
171 *
172 * Defines the lifeline to be the specified process. Should contact to
173 * that process be lost, the errmgr will be called, possibly resulting
174 * in termination of the process and job.
175 */
176 typedef int (*orte_routed_module_set_lifeline_fn_t)(orte_process_name_t *proc);
177
178 /*
179 * Get the number of routes supported by this process
180 *
181 * Returns the size of the routing tree using an O(1) function
182 */
183 typedef size_t (*orte_routed_module_num_routes_fn_t)(void);
184
185 /**
186 * Handle fault tolerance updates
187 *
188 * @param[in] state Fault tolerance state update
189 *
190 * @retval ORTE_SUCCESS The operation completed successfully
191 * @retval ORTE_ERROR An unspecifed error occurred
192 */
193 typedef int (*orte_routed_module_ft_event_fn_t)(int state);
194
195 /* ******************************************************************** */
196
197
198 /**
199 * routed module interface
200 *
201 * Module interface to the routed communication system. A global
202 * instance of this module, orte_routed, provices an interface into the
203 * active routed interface.
204 */
205 typedef struct {
206 /** Startup/shutdown the communication system and clean up resources */
207 orte_routed_module_init_fn_t initialize;
208 orte_routed_module_finalize_fn_t finalize;
209 /* API functions */
210 orte_routed_module_delete_route_fn_t delete_route;
211 orte_routed_module_update_route_fn_t update_route;
212 orte_routed_module_get_route_fn_t get_route;
213 orte_routed_module_route_lost_fn_t route_lost;
214 orte_routed_module_route_is_defined_fn_t route_is_defined;
215 orte_routed_module_set_lifeline_fn_t set_lifeline;
216 /* fns for daemons */
217 orte_routed_module_update_routing_plan_fn_t update_routing_plan;
218 orte_routed_module_get_routing_list_fn_t get_routing_list;
219 orte_routed_module_num_routes_fn_t num_routes;
220 /* FT Notification */
221 orte_routed_module_ft_event_fn_t ft_event;
222 } orte_routed_module_t;
223
224 /* provide an interface to the routed framework stub functions */
225 ORTE_DECLSPEC extern orte_routed_module_t orte_routed;
226
227 /* ******************************************************************** */
228
229 /**
230 * routed component interface
231 *
232 * Component interface for the routed framework. A public instance of
233 * this structure, called mca_routed_[component name]_component, must
234 * exist in any routed component.
235 */
236
237 struct orte_routed_component_3_0_0_t {
238 /* Base component description */
239 mca_base_component_t base_version;
240 /* Base component data block */
241 mca_base_component_data_t base_data;
242 /* priority */
243 int priority;
244 };
245 /** Convienence typedef */
246 typedef struct orte_routed_component_3_0_0_t orte_routed_component_t;
247
248
249 /* ******************************************************************** */
250
251
252 /** Macro for use in components that are of type routed */
253 #define ORTE_ROUTED_BASE_VERSION_3_0_0 \
254 ORTE_MCA_BASE_VERSION_2_1_0("routed", 3, 0, 0)
255
256
257 /* ******************************************************************** */
258
259
260 END_C_DECLS
261
262 #endif