1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana 4 * University Research and Technology 5 * Corporation. All rights reserved. 6 * Copyright (c) 2004-2016 The University of Tennessee and The University 7 * of Tennessee Research Foundation. All rights 8 * reserved. 9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 10 * University of Stuttgart. All rights reserved. 11 * Copyright (c) 2004-2005 The Regents of the University of California. 12 * All rights reserved. 13 * Copyright (c) 2006-2017 Cisco Systems, Inc. All rights reserved 14 * Copyright (c) 2009-2012 Oracle and/or its affiliates. All rights reserved. 15 * Copyright (c) 2012 Oak Ridge National Labs. All rights reserved. 16 * Copyright (c) 2015-2017 Los Alamos National Security, LLC. All rights 17 * reserved. 18 * Copyright (c) 2018 FUJITSU LIMITED. All rights reserved. 19 * $COPYRIGHT$ 20 * 21 * Additional copyrights may follow 22 * 23 * $HEADER$ 24 */ 25 /** 26 * @file 27 * 28 * Top-level description of requests 29 */ 30 31 #ifndef OMPI_REQUEST_H 32 #define OMPI_REQUEST_H 33 34 #include "ompi_config.h" 35 #include "mpi.h" 36 #include "opal/class/opal_free_list.h" 37 #include "opal/class/opal_pointer_array.h" 38 #include "opal/threads/condition.h" 39 #include "opal/threads/wait_sync.h" 40 #include "ompi/constants.h" 41 42 BEGIN_C_DECLS 43 44 /** 45 * Request class 46 */ 47 OMPI_DECLSPEC OBJ_CLASS_DECLARATION(ompi_request_t); 48 49 /* 50 * The following include pulls in shared typedefs with debugger plugins. 51 * For more information on why we do this see the Notice to developers 52 * comment at the top of the ompi_msgq_dll.c file. 53 */ 54 55 #include "request_dbg.h" 56 57 struct ompi_request_t; 58 59 /** 60 * Initiate one or more persistent requests. 61 * 62 * This function is called by MPI_START and MPI_STARTALL. 63 * 64 * When called by MPI_START, count is 1. 65 * 66 * When called by MPI_STARTALL, multiple requests which have the same 67 * req_start value are passed. This may help scheduling optimization 68 * of multiple communications. 69 * 70 * @param count (IN) Number of requests 71 * @param requests (IN/OUT) Array of persistent requests 72 * @return OMPI_SUCCESS or failure status. 73 */ 74 typedef int (*ompi_request_start_fn_t)( 75 size_t count, 76 struct ompi_request_t ** requests 77 ); 78 79 /* 80 * Required function to free the request and any associated resources. 81 */ 82 typedef int (*ompi_request_free_fn_t)(struct ompi_request_t** rptr); 83 84 /* 85 * Optional function to cancel a pending request. 86 */ 87 typedef int (*ompi_request_cancel_fn_t)(struct ompi_request_t* request, int flag); 88 89 /* 90 * Optional function called when the request is completed from the MPI 91 * library perspective. This function is allowed to release the request if 92 * the request will not be used with ompi_request_wait* or ompi_request_test. 93 * If the function reposts (using start) a request or calls ompi_request_free() 94 * on the request it *MUST* return 1. It should return 0 otherwise. 95 */ 96 typedef int (*ompi_request_complete_fn_t)(struct ompi_request_t* request); 97 98 /** 99 * Forward declaration 100 */ 101 struct ompi_communicator_t; 102 103 /** 104 * Forward declaration 105 */ 106 struct ompi_win_t; 107 108 /** 109 * Forward declaration 110 */ 111 struct ompi_file_t; 112 113 /** 114 * Union for holding several different MPI pointer types on the request 115 */ 116 typedef union ompi_mpi_object_t { 117 struct ompi_communicator_t *comm; 118 struct ompi_file_t *file; 119 struct ompi_win_t *win; 120 } ompi_mpi_object_t; 121 122 /** 123 * Main top-level request struct definition 124 */ 125 struct ompi_request_t { 126 opal_free_list_item_t super; /**< Base type */ 127 ompi_request_type_t req_type; /**< Enum indicating the type of the request */ 128 ompi_status_public_t req_status; /**< Completion status */ 129 volatile void *req_complete; /**< Flag indicating wether request has completed */ 130 volatile ompi_request_state_t req_state; /**< enum indicate state of the request */ 131 bool req_persistent; /**< flag indicating if the this is a persistent request */ 132 int req_f_to_c_index; /**< Index in Fortran <-> C translation array */ 133 ompi_request_start_fn_t req_start; /**< Called by MPI_START and MPI_STARTALL */ 134 ompi_request_free_fn_t req_free; /**< Called by free */ 135 ompi_request_cancel_fn_t req_cancel; /**< Optional function to cancel the request */ 136 ompi_request_complete_fn_t req_complete_cb; /**< Called when the request is MPI completed */ 137 void *req_complete_cb_data; 138 ompi_mpi_object_t req_mpi_object; /**< Pointer to MPI object that created this request */ 139 }; 140 141 /** 142 * Convenience typedef 143 */ 144 typedef struct ompi_request_t ompi_request_t; 145 146 147 /** 148 * Padded struct to maintain back compatibiltiy. 149 * See ompi/communicator/communicator.h comments with struct ompi_communicator_t 150 * for full explanation why we chose the following padding construct for predefines. 151 */ 152 #define PREDEFINED_REQUEST_PAD 256 153 154 struct ompi_predefined_request_t { 155 struct ompi_request_t request; 156 char padding[PREDEFINED_REQUEST_PAD - sizeof(ompi_request_t)]; 157 }; 158 159 typedef struct ompi_predefined_request_t ompi_predefined_request_t; 160 161 /** 162 * Initialize a request. This is a macro to avoid function call 163 * overhead, since this is typically invoked in the critical 164 * performance path (since requests may be re-used, it is possible 165 * that we will have to initialize a request multiple times). 166 */ 167 #define OMPI_REQUEST_INIT(request, persistent) \ 168 do { \ 169 (request)->req_complete = \ 170 (persistent) ? REQUEST_COMPLETED : REQUEST_PENDING; \ 171 (request)->req_state = OMPI_REQUEST_INACTIVE; \ 172 (request)->req_persistent = (persistent); \ 173 (request)->req_complete_cb = NULL; \ 174 (request)->req_complete_cb_data = NULL; \ 175 } while (0); 176 177 178 #define REQUEST_COMPLETE(req) (REQUEST_COMPLETED == (req)->req_complete) 179 /** 180 * Finalize a request. This is a macro to avoid function call 181 * overhead, since this is typically invoked in the critical 182 * performance path (since requests may be re-used, it is possible 183 * that we will have to finalize a request multiple times). 184 * 185 * When finalizing a request, if MPI_Request_f2c() was previously 186 * invoked on that request, then this request was added to the f2c 187 * table, and we need to remove it 188 * 189 * This function should be called only from the MPI layer. It should 190 * never be called from the PML. It take care of the upper level clean-up. 191 * When the user call MPI_Request_free we should release all MPI level 192 * ressources, so we have to call this function too. 193 */ 194 #define OMPI_REQUEST_FINI(request) \ 195 do { \ 196 (request)->req_state = OMPI_REQUEST_INVALID; \ 197 if (MPI_UNDEFINED != (request)->req_f_to_c_index) { \ 198 opal_pointer_array_set_item(&ompi_request_f_to_c_table, \ 199 (request)->req_f_to_c_index, NULL); \ 200 (request)->req_f_to_c_index = MPI_UNDEFINED; \ 201 } \ 202 } while (0); 203 204 /** 205 * Non-blocking test for request completion. 206 * 207 * @param request (IN) Array of requests 208 * @param complete (OUT) Flag indicating if index is valid (a request completed). 209 * @param status (OUT) Status of completed request. 210 * @return OMPI_SUCCESS or failure status. 211 * 212 * Note that upon completion, the request is freed, and the 213 * request handle at index set to NULL. 214 */ 215 typedef int (*ompi_request_test_fn_t)(ompi_request_t ** rptr, 216 int *completed, 217 ompi_status_public_t * status ); 218 /** 219 * Non-blocking test for request completion. 220 * 221 * @param count (IN) Number of requests 222 * @param request (IN) Array of requests 223 * @param index (OUT) Index of first completed request. 224 * @param complete (OUT) Flag indicating if index is valid (a request completed). 225 * @param status (OUT) Status of completed request. 226 * @return OMPI_SUCCESS or failure status. 227 * 228 * Note that upon completion, the request is freed, and the 229 * request handle at index set to NULL. 230 */ 231 typedef int (*ompi_request_test_any_fn_t)(size_t count, 232 ompi_request_t ** requests, 233 int *index, 234 int *completed, 235 ompi_status_public_t * status); 236 /** 237 * Non-blocking test for request completion. 238 * 239 * @param count (IN) Number of requests 240 * @param requests (IN) Array of requests 241 * @param completed (OUT) Flag indicating wether all requests completed. 242 * @param statuses (OUT) Array of completion statuses. 243 * @return OMPI_SUCCESS or failure status. 244 * 245 * This routine returns completed==true if all requests have completed. 246 * The statuses parameter is only updated if all requests completed. Likewise, 247 * the requests array is not modified (no requests freed), unless all requests 248 * have completed. 249 */ 250 typedef int (*ompi_request_test_all_fn_t)(size_t count, 251 ompi_request_t ** requests, 252 int *completed, 253 ompi_status_public_t * statuses); 254 /** 255 * Non-blocking test for some of N requests to complete. 256 * 257 * @param count (IN) Number of requests 258 * @param requests (INOUT) Array of requests 259 * @param outcount (OUT) Number of finished requests 260 * @param indices (OUT) Indices of the finished requests 261 * @param statuses (OUT) Array of completion statuses. 262 * @return OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status. 263 * 264 */ 265 typedef int (*ompi_request_test_some_fn_t)(size_t count, 266 ompi_request_t ** requests, 267 int * outcount, 268 int * indices, 269 ompi_status_public_t * statuses); 270 /** 271 * Wait (blocking-mode) for one requests to complete. 272 * 273 * @param request (IN) Pointer to request. 274 * @param status (OUT) Status of completed request. 275 * @return OMPI_SUCCESS or failure status. 276 * 277 */ 278 typedef int (*ompi_request_wait_fn_t)(ompi_request_t ** req_ptr, 279 ompi_status_public_t * status); 280 /** 281 * Wait (blocking-mode) for one of N requests to complete. 282 * 283 * @param count (IN) Number of requests 284 * @param requests (IN) Array of requests 285 * @param index (OUT) Index into request array of completed request. 286 * @param status (OUT) Status of completed request. 287 * @return OMPI_SUCCESS or failure status. 288 * 289 */ 290 typedef int (*ompi_request_wait_any_fn_t)(size_t count, 291 ompi_request_t ** requests, 292 int *index, 293 ompi_status_public_t * status); 294 /** 295 * Wait (blocking-mode) for all of N requests to complete. 296 * 297 * @param count (IN) Number of requests 298 * @param requests (IN) Array of requests 299 * @param statuses (OUT) Array of completion statuses. 300 * @return OMPI_SUCCESS or failure status. 301 * 302 */ 303 typedef int (*ompi_request_wait_all_fn_t)(size_t count, 304 ompi_request_t ** requests, 305 ompi_status_public_t * statuses); 306 /** 307 * Wait (blocking-mode) for some of N requests to complete. 308 * 309 * @param count (IN) Number of requests 310 * @param requests (INOUT) Array of requests 311 * @param outcount (OUT) Number of finished requests 312 * @param indices (OUT) Indices of the finished requests 313 * @param statuses (OUT) Array of completion statuses. 314 * @return OMPI_SUCCESS, OMPI_ERR_IN_STATUS or failure status. 315 * 316 */ 317 typedef int (*ompi_request_wait_some_fn_t)(size_t count, 318 ompi_request_t ** requests, 319 int * outcount, 320 int * indices, 321 ompi_status_public_t * statuses); 322 323 /** 324 * Replaceable request functions 325 */ 326 typedef struct ompi_request_fns_t { 327 ompi_request_test_fn_t req_test; 328 ompi_request_test_any_fn_t req_test_any; 329 ompi_request_test_all_fn_t req_test_all; 330 ompi_request_test_some_fn_t req_test_some; 331 ompi_request_wait_fn_t req_wait; 332 ompi_request_wait_any_fn_t req_wait_any; 333 ompi_request_wait_all_fn_t req_wait_all; 334 ompi_request_wait_some_fn_t req_wait_some; 335 } ompi_request_fns_t; 336 337 /** 338 * Globals used for tracking requests and request completion. 339 */ 340 OMPI_DECLSPEC extern opal_pointer_array_t ompi_request_f_to_c_table; 341 OMPI_DECLSPEC extern ompi_predefined_request_t ompi_request_null; 342 OMPI_DECLSPEC extern ompi_predefined_request_t *ompi_request_null_addr; 343 OMPI_DECLSPEC extern ompi_request_t ompi_request_empty; 344 OMPI_DECLSPEC extern ompi_status_public_t ompi_status_empty; 345 OMPI_DECLSPEC extern ompi_request_fns_t ompi_request_functions; 346 347 /** 348 * Initialize the MPI_Request subsystem; invoked during MPI_INIT. 349 */ 350 int ompi_request_init(void); 351 352 /** 353 * Shut down the MPI_Request subsystem; invoked during MPI_FINALIZE. 354 */ 355 int ompi_request_finalize(void); 356 357 /** 358 * Create a persistent request that does nothing (e.g., to MPI_PROC_NULL). 359 */ 360 int ompi_request_persistent_noop_create(ompi_request_t **request); 361 362 /** 363 * Cancel a pending request. 364 */ 365 static inline int ompi_request_cancel(ompi_request_t* request) 366 { 367 if (request->req_cancel != NULL) { 368 return request->req_cancel(request, true); 369 } 370 return OMPI_SUCCESS; 371 } 372 373 /** 374 * Free a request. 375 * 376 * @param request (INOUT) Pointer to request. 377 */ 378 static inline int ompi_request_free(ompi_request_t** request) 379 { 380 return (*request)->req_free(request); 381 } 382 383 #define ompi_request_test (ompi_request_functions.req_test) 384 #define ompi_request_test_any (ompi_request_functions.req_test_any) 385 #define ompi_request_test_all (ompi_request_functions.req_test_all) 386 #define ompi_request_test_some (ompi_request_functions.req_test_some) 387 #define ompi_request_wait (ompi_request_functions.req_wait) 388 #define ompi_request_wait_any (ompi_request_functions.req_wait_any) 389 #define ompi_request_wait_all (ompi_request_functions.req_wait_all) 390 #define ompi_request_wait_some (ompi_request_functions.req_wait_some) 391 392 /** 393 * Wait a particular request for completion 394 */ 395 396 static inline void ompi_request_wait_completion(ompi_request_t *req) 397 { 398 if (opal_using_threads () && !REQUEST_COMPLETE(req)) { 399 void *_tmp_ptr = REQUEST_PENDING; 400 ompi_wait_sync_t sync; 401 402 WAIT_SYNC_INIT(&sync, 1); 403 404 if (OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&req->req_complete, &_tmp_ptr, &sync)) { 405 SYNC_WAIT(&sync); 406 } else { 407 /* completed before we had a chance to swap in the sync object */ 408 WAIT_SYNC_SIGNALLED(&sync); 409 } 410 411 assert(REQUEST_COMPLETE(req)); 412 WAIT_SYNC_RELEASE(&sync); 413 } else { 414 while(!REQUEST_COMPLETE(req)) { 415 opal_progress(); 416 } 417 } 418 } 419 420 /** 421 * Signal or mark a request as complete. If with_signal is true this will 422 * wake any thread pending on the request. If with_signal is false, the 423 * opposite will be true, the request will simply be marked as completed 424 * and no effort will be made to correctly (atomically) handle the associated 425 * synchronization primitive. This is a special case when the function 426 * is called from the critical path for small messages, where we know 427 * the current execution flow created the request, and no synchronized wait 428 * has been set. 429 * BEWARE: The error code should be set on the request prior to calling 430 * this function, or the synchronization primitive might not be correctly 431 * triggered. 432 */ 433 static inline int ompi_request_complete(ompi_request_t* request, bool with_signal) 434 { 435 int rc = 0; 436 437 if( NULL != request->req_complete_cb) { 438 rc = request->req_complete_cb( request ); 439 request->req_complete_cb = NULL; 440 } 441 442 if (0 == rc) { 443 if( OPAL_LIKELY(with_signal) ) { 444 void *_tmp_ptr = REQUEST_PENDING; 445 446 if(!OPAL_ATOMIC_COMPARE_EXCHANGE_STRONG_PTR(&request->req_complete, &_tmp_ptr, REQUEST_COMPLETED)) { 447 ompi_wait_sync_t *tmp_sync = (ompi_wait_sync_t *) OPAL_ATOMIC_SWAP_PTR(&request->req_complete, 448 REQUEST_COMPLETED); 449 /* In the case where another thread concurrently changed the request to REQUEST_PENDING */ 450 if( REQUEST_PENDING != tmp_sync ) 451 wait_sync_update(tmp_sync, 1, request->req_status.MPI_ERROR); 452 } 453 } else 454 request->req_complete = REQUEST_COMPLETED; 455 } 456 457 return OMPI_SUCCESS; 458 } 459 460 END_C_DECLS 461 462 #endif