1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana 4 * University Research and Technology 5 * Corporation. All rights reserved. 6 * Copyright (c) 2004-2005 The University of Tennessee and The University 7 * of Tennessee Research Foundation. All rights 8 * reserved. 9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 10 * University of Stuttgart. All rights reserved. 11 * Copyright (c) 2004-2005 The Regents of the University of California. 12 * All rights reserved. 13 * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights 14 * reserved. 15 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. 16 * Copyright (c) 2015 Research Organization for Information Science 17 * and Technology (RIST). All rights reserved. 18 * 19 * $COPYRIGHT$ 20 * 21 * Additional copyrights may follow 22 * 23 * $HEADER$ 24 */ 25 26 /** 27 * @file 28 * 29 * Runtime Messaging Layer (RML) Communication Interface 30 * 31 * The Runtime Messaging Layer (RML) provices basic point-to-point 32 * communication between ORTE processes. The system is available for 33 * most architectures, with some exceptions (the Cray XT3/XT4, for example). 34 */ 35 36 37 #ifndef ORTE_MCA_RML_RML_H_ 38 #define ORTE_MCA_RML_RML_H_ 39 40 #include "orte_config.h" 41 #include "orte/types.h" 42 43 #ifdef HAVE_UNISTD_H 44 #include <unistd.h> 45 #endif 46 47 #include "orte/mca/mca.h" 48 #include "opal/mca/crs/crs.h" 49 #include "opal/mca/crs/base/base.h" 50 #include "orte/mca/routed/routed.h" 51 52 #include "orte/mca/rml/rml_types.h" 53 54 BEGIN_C_DECLS 55 56 57 /* ******************************************************************** */ 58 59 typedef struct { 60 opal_object_t super; 61 orte_process_name_t name; 62 opal_buffer_t data; 63 bool active; 64 } orte_rml_recv_cb_t; 65 OBJ_CLASS_DECLARATION(orte_rml_recv_cb_t); 66 67 /* Provide a generic callback function to release buffers 68 * following a non-blocking send as this happens all over 69 * the code base 70 */ 71 ORTE_DECLSPEC void orte_rml_send_callback(int status, orte_process_name_t* sender, 72 opal_buffer_t* buffer, orte_rml_tag_t tag, 73 void* cbdata); 74 75 ORTE_DECLSPEC void orte_rml_recv_callback(int status, orte_process_name_t* sender, 76 opal_buffer_t *buffer, 77 orte_rml_tag_t tag, void *cbdata); 78 79 /* ******************************************************************** */ 80 /* RML CALLBACK FUNCTION DEFINITIONS */ 81 82 /** 83 * Funtion prototype for callback from non-blocking iovec send and recv 84 * 85 * Funtion prototype for callback from non-blocking iovec send and recv. 86 * On send, the iovec pointer will be the same pointer passed to 87 * send_nb and count will equal the count given to send. 88 * 89 * On recv, the iovec pointer will be the address of a single iovec 90 * allocated and owned by the RML, not the process receiving the 91 * callback. Ownership of the data block can be transferred by setting 92 * a user variable to point to the data block, and setting the 93 * iovec->iov_base pointer to NULL. 94 * 95 * @note The parameter in/out parameters are relative to the user's callback 96 * function. 97 * 98 * @param[in] status Completion status 99 * @param[in] peer Opaque name of peer process 100 * @param[in] msg Pointer to the array of iovec that was sent 101 * or to a single iovec that has been recvd 102 * @param[in] count Number of iovecs in the array 103 * @param[in] tag User defined tag for matching send/recv 104 * @param[in] cbdata User data passed to send_nb() 105 */ 106 typedef void (*orte_rml_callback_fn_t)(int status, 107 orte_process_name_t* peer, 108 struct iovec* msg, 109 int count, 110 orte_rml_tag_t tag, 111 void* cbdata); 112 113 114 /** 115 * Funtion prototype for callback from non-blocking buffer send and receive 116 * 117 * Function prototype for callback from non-blocking buffer send and 118 * receive. On send, the buffer will be the same pointer passed to 119 * send_buffer_nb. On receive, the buffer will be allocated and owned 120 * by the RML, not the process receiving the callback. 121 * 122 * @note The parameter in/out parameters are relative to the user's callback 123 * function. 124 * 125 * @param[in] status Completion status 126 * @param[in] peer Name of peer process 127 * @param[in] buffer Message buffer 128 * @param[in] tag User defined tag for matching send/recv 129 * @param[in] cbdata User data passed to send_buffer_nb() or recv_buffer_nb() 130 */ 131 typedef void (*orte_rml_buffer_callback_fn_t)(int status, 132 orte_process_name_t* peer, 133 struct opal_buffer_t* buffer, 134 orte_rml_tag_t tag, 135 void* cbdata); 136 137 /** 138 * Function prototype for exception callback 139 * 140 * Function prototype for callback triggered when a communication error is detected. 141 * 142 * @note The parameter in/out parameters are relative to the user's callback 143 * function. 144 * 145 * @param[in] peer Name of peer process 146 * @param[in] exception Description of the error causing the exception 147 */ 148 typedef void (*orte_rml_exception_callback_t)(orte_process_name_t* peer, 149 orte_rml_exception_t exception); 150 151 152 /* ******************************************************************** */ 153 /* RML INTERNAL MODULE API DEFINITION */ 154 155 156 /** 157 * "Ping" another process to determine availability 158 * 159 * Ping another process to determine if it is available. This 160 * function only verifies that the process is alive and will allow a 161 * connection to the local process. It does *not* qualify as 162 * establishing communication with the remote process, as required by 163 * the note for set_contact_info(). 164 * 165 * @param[in] contact_info The contact info string for the remote process 166 * @param[in] tv Timeout after which the ping should be failed 167 * 168 * @retval ORTE_SUCESS The process is available and will allow connections 169 * from the local process 170 * @retval ORTE_ERROR An unspecified error occurred during the update 171 */ 172 typedef int (*orte_rml_module_ping_fn_t)(const char* contact_info, 173 const struct timeval* tv); 174 175 176 /** 177 * Send an iovec non-blocking message 178 * 179 * Send an array of iovecs to the specified peer. The call 180 * will return immediately, although the iovecs may not be modified 181 * until the completion callback is triggered. The iovecs *may* be 182 * passed to another call to send_nb before the completion callback is 183 * triggered. The callback being triggered does not give any 184 * indication of remote completion. 185 * 186 * @param[in] peer Name of receiving process 187 * @param[in] msg Pointer to an array of iovecs to be sent 188 * @param[in] count Number of iovecs in array 189 * @param[in] tag User defined tag for matching send/recv 190 * @param[in] cbfunc Callback function on message comlpetion 191 * @param[in] cbdata User data to provide during completion callback 192 * 193 * @retval ORTE_SUCCESS The message was successfully started 194 * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid 195 * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the 196 * receiving process is not available 197 * @retval ORTE_ERROR An unspecified error occurred 198 */ 199 typedef int (*orte_rml_module_send_nb_fn_t)(orte_process_name_t* peer, 200 struct iovec* msg, 201 int count, 202 orte_rml_tag_t tag, 203 orte_rml_callback_fn_t cbfunc, 204 void* cbdata); 205 206 207 /** 208 * Send a buffer non-blocking message 209 * 210 * Send a buffer to the specified peer. The call 211 * will return immediately, although the buffer may not be modified 212 * until the completion callback is triggered. The buffer *may* be 213 * passed to another call to send_nb before the completion callback is 214 * triggered. The callback being triggered does not give any 215 * indication of remote completion. 216 * 217 * @param[in] peer Name of receiving process 218 * @param[in] buffer Pointer to buffer to be sent 219 * @param[in] tag User defined tag for matching send/recv 220 * @param[in] cbfunc Callback function on message comlpetion 221 * @param[in] cbdata User data to provide during completion callback 222 * 223 * @retval ORTE_SUCCESS The message was successfully started 224 * @retval ORTE_ERR_BAD_PARAM One of the parameters was invalid 225 * @retval ORTE_ERR_ADDRESSEE_UNKNOWN Contact information for the 226 * receiving process is not available 227 * @retval ORTE_ERROR An unspecified error occurred 228 */ 229 typedef int (*orte_rml_module_send_buffer_nb_fn_t)(orte_process_name_t* peer, 230 struct opal_buffer_t* buffer, 231 orte_rml_tag_t tag, 232 orte_rml_buffer_callback_fn_t cbfunc, 233 void* cbdata); 234 235 /** 236 * Purge the RML/OOB of contact info and pending messages 237 * to/from a specified process. Used when a process aborts 238 * and is to be restarted 239 */ 240 typedef void (*orte_rml_module_purge_fn_t)(orte_process_name_t *peer); 241 242 243 /** 244 * Receive an iovec non-blocking message 245 * 246 * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive 247 * @param[in] tag User defined tag for matching send/recv 248 * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv 249 * @param[in] cbfunc Callback function on message comlpetion 250 * @param[in] cbdata User data to provide during completion callback 251 */ 252 typedef void (*orte_rml_module_recv_nb_fn_t)(orte_process_name_t* peer, 253 orte_rml_tag_t tag, 254 bool persistent, 255 orte_rml_callback_fn_t cbfunc, 256 void* cbdata); 257 258 259 /** 260 * Receive a buffer non-blocking message 261 * 262 * @param[in] peer Peer process or ORTE_NAME_WILDCARD for wildcard receive 263 * @param[in] tag User defined tag for matching send/recv 264 * @param[in] persistent Boolean flag indicating whether or not this is a one-time recv 265 * @param[in] cbfunc Callback function on message comlpetion 266 * @param[in] cbdata User data to provide during completion callback 267 */ 268 typedef void (*orte_rml_module_recv_buffer_nb_fn_t)(orte_process_name_t* peer, 269 orte_rml_tag_t tag, 270 bool persistent, 271 orte_rml_buffer_callback_fn_t cbfunc, 272 void* cbdata); 273 274 /** 275 * Cancel a posted non-blocking receive 276 * 277 * Attempt to cancel a posted non-blocking receive. 278 * 279 * @param[in] peer Peer process or ORTE_NAME_WILDCARD, exactly as passed 280 * to the non-blocking receive call 281 * @param[in] tag Posted receive tag 282 */ 283 typedef void (*orte_rml_module_recv_cancel_fn_t)(orte_process_name_t* peer, 284 orte_rml_tag_t tag); 285 286 287 /** 288 * RML internal module interface - these will be implemented by all RML components 289 */ 290 typedef struct orte_rml_base_module_t { 291 /* pointer to the parent component for this module */ 292 struct orte_rml_component_t *component; 293 /* the routed module to be used */ 294 char *routed; 295 /** Ping process for connectivity check */ 296 orte_rml_module_ping_fn_t ping; 297 298 /** Send non-blocking iovec message */ 299 orte_rml_module_send_nb_fn_t send_nb; 300 301 /** Send non-blocking buffer message */ 302 orte_rml_module_send_buffer_nb_fn_t send_buffer_nb; 303 304 orte_rml_module_recv_nb_fn_t recv_nb; 305 orte_rml_module_recv_buffer_nb_fn_t recv_buffer_nb; 306 orte_rml_module_recv_cancel_fn_t recv_cancel; 307 308 /** Purge information */ 309 orte_rml_module_purge_fn_t purge; 310 } orte_rml_base_module_t; 311 312 313 /** Interface for RML communication */ 314 ORTE_DECLSPEC extern orte_rml_base_module_t orte_rml; 315 316 /* ******************************************************************** */ 317 /* RML COMPONENT DEFINITION */ 318 319 /** 320 * RML component interface 321 * 322 * Component interface for the RML framework. A public instance of 323 * this structure, called mca_rml_[component name]_component, must 324 * exist in any RML component. 325 */ 326 typedef struct orte_rml_component_t { 327 /* Base component description */ 328 mca_base_component_t base; 329 /* Base component data block */ 330 mca_base_component_data_t data; 331 /* Component priority */ 332 int priority; 333 } orte_rml_component_t; 334 335 336 337 /* ******************************************************************** */ 338 339 340 /** Macro for use in components that are of type rml */ 341 #define ORTE_RML_BASE_VERSION_3_0_0 \ 342 ORTE_MCA_BASE_VERSION_2_1_0("rml", 3, 0, 0) 343 344 345 /* ******************************************************************** */ 346 347 348 END_C_DECLS 349 350 #endif