1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2005 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
14 * reserved.
15 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
16 * Copyright (c) 2015 Research Organization for Information Science
17 * and Technology (RIST). All rights reserved.
18 *
19 * $COPYRIGHT$
20 *
21 * Additional copyrights may follow
22 *
23 * $HEADER$
24 */
25
26 /**
27 * @file
28 *
29 * Runtime Messaging Layer (RML) Communication Interface
30 *
31 * The Runtime Messaging Layer (RML) provices basic point-to-point
32 * communication between ORTE processes. The system is available for
33 * most architectures, with some exceptions (the Cray XT3/XT4, for example).
34 */
35
36
37 #ifndef ORTE_MCA_RML_RML_H_
38 #define ORTE_MCA_RML_RML_H_
39
40 #include "orte_config.h"
41 #include "orte/types.h"
42
43 #ifdef HAVE_UNISTD_H
44 #include <unistd.h>
45 #endif
46
47 #include "orte/mca/mca.h"
48 #include "opal/mca/crs/crs.h"
49 #include "opal/mca/crs/base/base.h"
50 #include "orte/mca/routed/routed.h"
51
52 #include "orte/mca/rml/rml_types.h"
53
54 BEGIN_C_DECLS
55
56
57 /* ******************************************************************** */
58
59 typedef struct {
60 opal_object_t super;
61 orte_process_name_t name;
62 opal_buffer_t data;
63 bool active;
64 } orte_rml_recv_cb_t;
65 OBJ_CLASS_DECLARATION(orte_rml_recv_cb_t);
66
67 /* Provide a generic callback function to release buffers
68 * following a non-blocking send as this happens all over
69 * the code base
70 */
71 ORTE_DECLSPEC void orte_rml_send_callback(int status, orte_process_name_t* sender,
72 opal_buffer_t* buffer, orte_rml_tag_t tag,
73 void* cbdata);
74
75 ORTE_DECLSPEC void orte_rml_recv_callback(int status, orte_process_name_t* sender,
76 opal_buffer_t *buffer,
77 orte_rml_tag_t tag, void *cbdata);
78
79 /* ******************************************************************** */
80 /* RML CALLBACK FUNCTION DEFINITIONS */
81
82 /**
83 * Funtion prototype for callback from non-blocking iovec send and recv
84 *
85 * Funtion prototype for callback from non-blocking iovec send and recv.
86 * On send, the iovec pointer will be the same pointer passed to
87 * send_nb and count will equal the count given to send.
88 *
89 * On recv, the iovec pointer will be the address of a single iovec
90 * allocated and owned by the RML, not the process receiving the
91 * callback. Ownership of the data block can be transferred by setting
92 * a user variable to point to the data block, and setting the
93 * iovec->iov_base pointer to NULL.
94 *
95 * @note The parameter in/out parameters are relative to the user's callback
96 * function.
97 *
98 * @param[in] status Completion status
99 * @param[in] peer Opaque name of peer process
100 * @param[in] msg Pointer to the array of iovec that was sent
101 * or to a single iovec that has been recvd
102 * @param[in] count Number of iovecs in the array
103 * @param[in] tag User defined tag for matching send/recv
104 * @param[in] cbdata User data passed to send_nb()
105 */
106 typedef void (*orte_rml_callback_fn_t)(int status,
107 orte_process_name_t* peer,
108 struct iovec* msg,
109 int count,
110 orte_rml_tag_t tag,
111 void* cbdata);
112
113
114 /**
115 * Funtion prototype for callback from non-blocking buffer send and receive
116 *
117 * Function prototype for callback from non-blocking buffer send and
118 * receive. On send, the buffer will be the same pointer passed to
119 * send_buffer_nb. On receive, the buffer will be allocated and owned
120 * by the RML, not the process receiving the callback.
121 *
122 * @note The parameter in/out parameters are relative to the user's callback
123 * function.
124 *
125 * @param[in] status Completion status
126 * @param[in] peer Name of peer process
127 * @param[in] buffer Message buffer
128 * @param[in] tag User defined tag for matching send/recv
129 * @param[in] cbdata User data passed to send_buffer_nb() or recv_buffer_nb()
130 */
131 typedef void (*orte_rml_buffer_callback_fn_t)(int status,
132 orte_process_name_t* peer,
133 struct opal_buffer_t* buffer,
134 orte_rml_tag_t tag,
135 void* cbdata);
136
137 /**
138 * Function prototype for exception callback
139 *
140 * Function prototype for callback triggered when a communication error is detected.
141 *
142 * @note The parameter in/out parameters are relative to the user's callback
143 * function.
144 *
145 * @param[in] peer Name of peer process
146 * @param[in] exception Description of the error causing the exception
147 */
148 typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer,
149 orte_rml_exception_t exception);
150
151
152 /* ******************************************************************** */
153 /* RML INTERNAL MODULE API DEFINITION */
154
155
156 /**
157 * "Ping" another process to determine availability
158 *
159 * Ping another process to determine if it is available. This
160 * function only verifies that the process is alive and will allow a
161 * connection to the local process. It does *not* qualify as
162 * establishing communication with the remote process, as required by
163 * the note for set_contact_info().
164 *
165 * @param[in] contact_info The contact info string for the remote process
166 * @param[in] tv Timeout after which the ping should be failed
167 *
168 * @retval ORTE_SUCESS The process is available and will allow connections
169 * from the local process
170 * @retval ORTE_ERROR An unspecified error occurred during the update
171 */
172 typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info,
173 const struct timeval* tv);
174
175
176 /**
177 * Send an iovec non-blocking message
178 *
179 * Send an array of iovecs to the specified peer. The call
180 * will return immediately, although the iovecs may not be modified
181 * until the completion callback is triggered. The iovecs *may* be
182 * passed to another call to send_nb before the completion callback is
183 * triggered. The callback being triggered does not give any
184 * indication of remote completion.
185 *
186 * @param[in] peer Name of receiving process
187 * @param[in] msg Pointer to an array of iovecs to be sent
188 * @param[in] count Number of iovecs in array
189 * @param[in] tag User defined tag for matching send/recv
190 * @param[in] cbfunc Callback function on message comlpetion
191 * @param[in] cbdata User data to provide during completion callback
192 *
193 * @retval ORTE_SUCCESS The message was successfully started
194 * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
195 * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
196 * receiving process is not available
197 * @retval ORTE_ERROR An unspecified error occurred
198 */
199 typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer,
200 struct iovec* msg,
201 int count,
202 orte_rml_tag_t tag,
203 orte_rml_callback_fn_t cbfunc,
204 void* cbdata);
205
206
207 /**
208 * Send a buffer non-blocking message
209 *
210 * Send a buffer to the specified peer. The call
211 * will return immediately, although the buffer may not be modified
212 * until the completion callback is triggered. The buffer *may* be
213 * passed to another call to send_nb before the completion callback is
214 * triggered. The callback being triggered does not give any
215 * indication of remote completion.
216 *
217 * @param[in] peer Name of receiving process
218 * @param[in] buffer Pointer to buffer to be sent
219 * @param[in] tag User defined tag for matching send/recv
220 * @param[in] cbfunc Callback function on message comlpetion
221 * @param[in] cbdata User data to provide during completion callback
222 *
223 * @retval ORTE_SUCCESS The message was successfully started
224 * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid
225 * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the
226 * receiving process is not available
227 * @retval ORTE_ERROR An unspecified error occurred
228 */
229 typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer,
230 struct opal_buffer_t* buffer,
231 orte_rml_tag_t tag,
232 orte_rml_buffer_callback_fn_t cbfunc,
233 void* cbdata);
234
235 /**
236 * Purge the RML/OOB of contact info and pending messages
237 * to/from a specified process. Used when a process aborts
238 * and is to be restarted
239 */
240 typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer);
241
242
243 /**
244 * Receive an iovec non-blocking message
245 *
246 * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
247 * @param[in] tag User defined tag for matching send/recv
248 * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
249 * @param[in] cbfunc Callback function on message comlpetion
250 * @param[in] cbdata User data to provide during completion callback
251 */
252 typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer,
253 orte_rml_tag_t tag,
254 bool persistent,
255 orte_rml_callback_fn_t cbfunc,
256 void* cbdata);
257
258
259 /**
260 * Receive a buffer non-blocking message
261 *
262 * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive
263 * @param[in] tag User defined tag for matching send/recv
264 * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv
265 * @param[in] cbfunc Callback function on message comlpetion
266 * @param[in] cbdata User data to provide during completion callback
267 */
268 typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer,
269 orte_rml_tag_t tag,
270 bool persistent,
271 orte_rml_buffer_callback_fn_t cbfunc,
272 void* cbdata);
273
274 /**
275 * Cancel a posted non-blocking receive
276 *
277 * Attempt to cancel a posted non-blocking receive.
278 *
279 * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed
280 * to the non-blocking receive call
281 * @param[in] tag Posted receive tag
282 */
283 typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer,
284 orte_rml_tag_t tag);
285
286
287 /**
288 * RML internal module interface - these will be implemented by all RML components
289 */
290 typedef struct orte_rml_base_module_t {
291 /* pointer to the parent component for this module */
292 struct orte_rml_component_t *component;
293 /* the routed module to be used */
294 char *routed;
295 /** Ping process for connectivity check */
296 orte_rml_module_ping_fn_t ping;
297
298 /** Send non-blocking iovec message */
299 orte_rml_module_send_nb_fn_t send_nb;
300
301 /** Send non-blocking buffer message */
302 orte_rml_module_send_buffer_nb_fn_t send_buffer_nb;
303
304 orte_rml_module_recv_nb_fn_t recv_nb;
305 orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb;
306 orte_rml_module_recv_cancel_fn_t recv_cancel;
307
308 /** Purge information */
309 orte_rml_module_purge_fn_t purge;
310 } orte_rml_base_module_t;
311
312
313 /** Interface for RML communication */
314 ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml;
315
316 /* ******************************************************************** */
317 /* RML COMPONENT DEFINITION */
318
319 /**
320 * RML component interface
321 *
322 * Component interface for the RML framework. A public instance of
323 * this structure, called mca_rml_[component name]_component, must
324 * exist in any RML component.
325 */
326 typedef struct orte_rml_component_t {
327 /* Base component description */
328 mca_base_component_t base;
329 /* Base component data block */
330 mca_base_component_data_t data;
331 /* Component priority */
332 int priority;
333 } orte_rml_component_t;
334
335
336
337 /* ******************************************************************** */
338
339
340 /** Macro for use in components that are of type rml */
341 #define ORTE_RML_BASE_VERSION_3_0_0 \
342 ORTE_MCA_BASE_VERSION_2_1_0("rml", 3, 0, 0)
343
344
345 /* ******************************************************************** */
346
347
348 END_C_DECLS
349
350 #endif