1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana 4 * University Research and Technology 5 * Corporation. All rights reserved. 6 * Copyright (c) 2004-2005 The University of Tennessee and The University 7 * of Tennessee Research Foundation. All rights 8 * reserved. 9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 10 * University of Stuttgart. All rights reserved. 11 * Copyright (c) 2004-2006 The Regents of the University of California. 12 * All rights reserved. 13 * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights 14 * reserved. 15 * Copyright (c) 2011 Sandia National Laboratories. All rights reserved. 16 * Copyright (c) 2015 Research Organization for Information Science 17 * and Technology (RIST). All rights reserved. 18 * $COPYRIGHT$ 19 * 20 * Additional copyrights may follow 21 * 22 * $HEADER$ 23 */ 24 /** 25 * @file 26 * 27 * P2P Management Layer (PML) 28 * 29 * An MCA component type that provides the P2P interface functionality 30 * required by the MPI layer. The PML is a relatively thin layer that 31 * primarily provides for the fragmentation and scheduling of messages 32 * over multiple transports (instances of the Byte Transfer Layer 33 * (BTL) MCA component type) as depicted below: 34 * 35 * ------------------------------------ 36 * | MPI | 37 * ------------------------------------ 38 * | PML | 39 * ------------------------------------ 40 * | BTL (TCP) | BTL (SM) | BTL (...) | 41 * ------------------------------------ 42 * 43 * A single PML component is selected by the MCA framework during 44 * library initialization. Initially, all available PMLs are loaded 45 * (potentially as shared libraries) and their component open and init 46 * functions called. The MCA framework selects the component 47 * returning the highest priority and closes/unloads any other PML 48 * components that may have been opened. 49 * 50 * After all of the MCA components are initialized, the MPI/RTE will 51 * make downcalls into the PML to provide the initial list of 52 * processes (ompi_proc_t instances), and notification of changes 53 * (add/delete). 54 * 55 * The PML module must select the set of BTL components that are to be 56 * used to reach a given destination. These should be cached on a PML 57 * specific data structure that is hung off the ompi_proc_t. 58 * 59 * The PML should then apply a scheduling algorithm (round-robin, 60 * weighted distribution, etc), to schedule the delivery of messages 61 * over the available BTLs. 62 * 63 */ 64 65 #ifndef MCA_PML_H 66 #define MCA_PML_H 67 68 #include "ompi_config.h" 69 #include "ompi/mca/mca.h" 70 #include "mpi.h" /* needed for MPI_ANY_TAG */ 71 #include "ompi/mca/pml/pml_constants.h" 72 #include "ompi/request/request.h" 73 74 BEGIN_C_DECLS 75 76 /* 77 * PML component types 78 */ 79 80 typedef uint64_t mca_pml_sequence_t; 81 struct ompi_proc_t; 82 83 /** 84 * MCA->PML Called by MCA framework to initialize the component. 85 * 86 * @param priority (OUT) Relative priority or ranking used by MCA to 87 * selected a component. 88 * 89 * @param enable_progress_threads (IN) Whether this component is 90 * allowed to run a hidden/progress thread or not. 91 * 92 * @param enable_mpi_threads (IN) Whether support for multiple MPI 93 * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which 94 * indicates whether multiple threads may invoke this component 95 * simultaneously or not. 96 */ 97 typedef struct mca_pml_base_module_1_0_1_t * (*mca_pml_base_component_init_fn_t)( 98 int *priority, 99 bool enable_progress_threads, 100 bool enable_mpi_threads); 101 102 typedef int (*mca_pml_base_component_finalize_fn_t)(void); 103 104 /** 105 * PML component version and interface functions. 106 */ 107 108 struct mca_pml_base_component_2_0_0_t { 109 mca_base_component_t pmlm_version; 110 mca_base_component_data_t pmlm_data; 111 mca_pml_base_component_init_fn_t pmlm_init; 112 mca_pml_base_component_finalize_fn_t pmlm_finalize; 113 }; 114 typedef struct mca_pml_base_component_2_0_0_t mca_pml_base_component_2_0_0_t; 115 typedef mca_pml_base_component_2_0_0_t mca_pml_base_component_t; 116 117 118 /** 119 * MCA management functions. 120 */ 121 122 123 /** 124 * Downcall from MPI/RTE layer when new processes are created. 125 * 126 * @param procs Array of new processes 127 * @param nprocs Size of process array 128 * @return OMPI_SUCCESS or failure status. 129 * 130 * Provides a notification to the PML that new processes have been 131 * created, and provides the PML the opportunity to cache data 132 * (e.g. list of BTLs to use) on the ompi_proc_t data structure. 133 */ 134 typedef int (*mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs); 135 136 137 /** 138 * Downcall from MPI/RTE layer when processes are terminated. 139 * 140 * @param procs Array of processes 141 * @param nprocs Size of process array 142 * @return OMPI_SUCCESS or failure status. 143 * 144 * Provides a notification to the PML that processes have 145 * gone away, and provides the PML the opportunity to cleanup 146 * any data cached on the ompi_proc_t data structure. 147 */ 148 typedef int (*mca_pml_base_module_del_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs); 149 150 /** 151 * Downcall from MCA layer to enable the PML/BTLs. 152 * 153 * @param enable Enable/Disable PML forwarding 154 * @return OMPI_SUCCESS or failure status. 155 */ 156 typedef int (*mca_pml_base_module_enable_fn_t)( 157 bool enable 158 ); 159 160 161 /** 162 * For non-threaded case, provides MCA the opportunity to 163 * progress outstanding requests on all btls. 164 * 165 * * @return Count of "completions", a metric of 166 * how many items where completed in the call 167 * to progress. 168 */ 169 typedef int (*mca_pml_base_module_progress_fn_t)(void); 170 171 /** 172 * MPI Interface Functions 173 */ 174 175 176 /** 177 * Downcall from MPI layer when a new communicator is created. 178 * 179 * @param comm Communicator 180 * @return OMPI_SUCCESS or failure status. 181 * 182 * Provides the PML the opportunity to initialize/cache a data structure 183 * on the communicator. 184 */ 185 typedef int (*mca_pml_base_module_add_comm_fn_t)(struct ompi_communicator_t* comm); 186 187 188 /** 189 * Downcall from MPI layer when a communicator is destroyed. 190 * 191 * @param comm Communicator 192 * @return OMPI_SUCCESS or failure status. 193 * 194 * Provides the PML the opportunity to cleanup any datastructures 195 * associated with the communicator. 196 */ 197 typedef int (*mca_pml_base_module_del_comm_fn_t)(struct ompi_communicator_t* comm); 198 199 /** 200 * Initialize a persistent receive request. 201 * 202 * @param buf (IN) User buffer. 203 * @param count (IN) Number of elements of the specified datatype. 204 * @param datatype (IN) User defined datatype. 205 * @param src (IN) Source rank w/in communicator. 206 * @param tag (IN) User defined tag. 207 * @param comm (IN) Communicator. 208 * @param request (OUT) Request handle. 209 * @return OMPI_SUCCESS or failure status. 210 */ 211 typedef int (*mca_pml_base_module_irecv_init_fn_t)( 212 void *buf, 213 size_t count, 214 struct ompi_datatype_t *datatype, 215 int src, 216 int tag, 217 struct ompi_communicator_t* comm, 218 struct ompi_request_t **request 219 ); 220 221 /** 222 * Post a receive request. 223 * 224 * @param buf (IN) User buffer. 225 * @param count (IN) Number of elements of the specified datatype. 226 * @param datatype (IN) User defined datatype. 227 * @param src (IN) Source rank w/in communicator. 228 * @param tag (IN) User defined tag. 229 * @param comm (IN) Communicator. 230 * @param request (OUT) Request handle. 231 * @return OMPI_SUCCESS or failure status. 232 */ 233 typedef int (*mca_pml_base_module_irecv_fn_t)( 234 void *buf, 235 size_t count, 236 struct ompi_datatype_t *datatype, 237 int src, 238 int tag, 239 struct ompi_communicator_t* comm, 240 struct ompi_request_t **request 241 ); 242 typedef int (*mca_pml_base_module_imrecv_fn_t)( 243 void *buf, 244 size_t count, 245 struct ompi_datatype_t *datatype, 246 struct ompi_message_t **message, 247 struct ompi_request_t **request 248 ); 249 250 /** 251 * Post a receive and wait for completion. 252 * 253 * @param buf (IN) User buffer 254 * @param count (IN) Number of elements of the specified datatype 255 * @param datatype (IN) User defined datatype 256 * @param src (IN) Source rank w/in communicator 257 * @param tag (IN) User defined tag 258 * @param comm (IN) Communicator 259 * @param status (OUT) Completion status 260 * @return OMPI_SUCCESS or failure status. 261 */ 262 typedef int (*mca_pml_base_module_recv_fn_t)( 263 void *buf, 264 size_t count, 265 struct ompi_datatype_t *datatype, 266 int src, 267 int tag, 268 struct ompi_communicator_t* comm, 269 ompi_status_public_t* status 270 ); 271 typedef int (*mca_pml_base_module_mrecv_fn_t)( 272 void *buf, 273 size_t count, 274 struct ompi_datatype_t *datatype, 275 struct ompi_message_t **message, 276 ompi_status_public_t* status 277 ); 278 279 /** 280 * Initialize a persistent send request. 281 * 282 * @param buf (IN) User buffer. 283 * @param count (IN) Number of elements of the specified datatype. 284 * @param datatype (IN) User defined datatype. 285 * @param dst (IN) Peer rank w/in communicator. 286 * @param tag (IN) User defined tag. 287 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) 288 * @param comm (IN) Communicator. 289 * @param request (OUT) Request handle. 290 * @return OMPI_SUCCESS or failure status. 291 */ 292 typedef int (*mca_pml_base_module_isend_init_fn_t)( 293 const void *buf, 294 size_t count, 295 struct ompi_datatype_t *datatype, 296 int dst, 297 int tag, 298 mca_pml_base_send_mode_t mode, 299 struct ompi_communicator_t* comm, 300 struct ompi_request_t **request 301 ); 302 303 304 /** 305 * Post a send request. 306 * 307 * @param buf (IN) User buffer. 308 * @param count (IN) Number of elements of the specified datatype. 309 * @param datatype (IN) User defined datatype. 310 * @param dst (IN) Peer rank w/in communicator. 311 * @param tag (IN) User defined tag. 312 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) 313 * @param comm (IN) Communicator. 314 * @param request (OUT) Request handle. 315 * @return OMPI_SUCCESS or failure status. 316 */ 317 typedef int (*mca_pml_base_module_isend_fn_t)( 318 const void *buf, 319 size_t count, 320 struct ompi_datatype_t *datatype, 321 int dst, 322 int tag, 323 mca_pml_base_send_mode_t mode, 324 struct ompi_communicator_t* comm, 325 struct ompi_request_t **request 326 ); 327 328 329 /** 330 * Post a send request and wait for completion. 331 * 332 * @param buf (IN) User buffer. 333 * @param count (IN) Number of elements of the specified datatype. 334 * @param datatype (IN) User defined datatype. 335 * @param dst (IN) Peer rank w/in communicator. 336 * @param tag (IN) User defined tag. 337 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) 338 * @param comm (IN) Communicator. 339 * @return OMPI_SUCCESS or failure status. 340 */ 341 typedef int (*mca_pml_base_module_send_fn_t)( 342 const void *buf, 343 size_t count, 344 struct ompi_datatype_t *datatype, 345 int dst, 346 int tag, 347 mca_pml_base_send_mode_t mode, 348 struct ompi_communicator_t* comm 349 ); 350 351 /** 352 * Initiate one or more persistent requests. 353 * 354 * @param count (IN) Number of requests 355 * @param requests (IN/OUT) Array of persistent requests 356 * @return OMPI_SUCCESS or failure status. 357 */ 358 typedef ompi_request_start_fn_t mca_pml_base_module_start_fn_t; 359 360 /** 361 * Probe to poll for pending recv. 362 * 363 * @param src (IN) Source rank w/in communicator. 364 * @param tag (IN) User defined tag. 365 * @param comm (IN) Communicator. 366 * @param matched (OUT) Flag indicating if matching recv exists. 367 * @param status (OUT) Completion statuses. 368 * @return OMPI_SUCCESS or failure status. 369 * 370 */ 371 typedef int (*mca_pml_base_module_iprobe_fn_t)( 372 int src, 373 int tag, 374 struct ompi_communicator_t* comm, 375 int *matched, 376 ompi_status_public_t *status 377 ); 378 379 typedef int (*mca_pml_base_module_improbe_fn_t)( 380 int src, 381 int tag, 382 struct ompi_communicator_t* comm, 383 int *matched, 384 struct ompi_message_t **message, 385 ompi_status_public_t *status 386 ); 387 388 /** 389 * Blocking probe to wait for pending recv. 390 * 391 * @param src (IN) Source rank w/in communicator. 392 * @param tag (IN) User defined tag. 393 * @param comm (IN) Communicator. 394 * @param status (OUT) Completion statuses. 395 * @return OMPI_SUCCESS or failure status. 396 * 397 */ 398 typedef int (*mca_pml_base_module_probe_fn_t)( 399 int src, 400 int tag, 401 struct ompi_communicator_t* comm, 402 ompi_status_public_t *status 403 ); 404 405 typedef int (*mca_pml_base_module_mprobe_fn_t)( 406 int src, 407 int tag, 408 struct ompi_communicator_t* comm, 409 struct ompi_message_t **message, 410 ompi_status_public_t *status 411 ); 412 413 /** 414 * Cancel pending operation. 415 * 416 * @param request (IN) Request 417 * @return OMPI_SUCCESS or failure status. 418 * 419 */ 420 typedef int (*mca_pml_base_module_cancel_fn_t)( 421 struct ompi_request_t* request 422 ); 423 424 425 /** 426 * Has a request been cancelled? 427 * 428 * @param request (IN) Request 429 * @return OMPI_SUCCESS or failure status. 430 * 431 */ 432 typedef int (*mca_pml_base_module_cancelled_fn_t)( 433 struct ompi_request_t* request, 434 int *flag 435 ); 436 437 /** 438 * Release resources held by a persistent mode request. 439 * 440 * @param request (IN) Request 441 * @return OMPI_SUCCESS or failure status. 442 * 443 */ 444 typedef int (*mca_pml_base_module_free_fn_t)( 445 struct ompi_request_t** request 446 ); 447 448 449 /** 450 * A special NULL request handle. 451 * 452 * @param request (OUT) Request 453 * @return OMPI_SUCCESS or failure status. 454 * 455 */ 456 typedef int (*mca_pml_base_module_null_fn_t)( 457 struct ompi_request_t** request 458 ); 459 460 /** 461 * Diagnostics function. 462 * 463 * @param request (IN) Communicator 464 * @param verbose (IN) Verbosity level (passed to BTL) 465 * @return OMPI_SUCCESS or failure status. 466 * 467 */ 468 typedef int (*mca_pml_base_module_dump_fn_t)( 469 struct ompi_communicator_t* comm, 470 int verbose 471 ); 472 473 /** 474 * Fault Tolerance Awareness function 475 * @param status Checkpoint status 476 * @return OMPI_SUCCESS or failure status 477 */ 478 typedef int (*mca_pml_base_module_ft_event_fn_t) (int status); 479 480 /** 481 * pml module flags 482 */ 483 /** PML requires requires all procs in the job on the first call to 484 * add_procs */ 485 #define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001 486 487 /** 488 * PML instance. 489 */ 490 491 struct mca_pml_base_module_1_0_1_t { 492 493 /* downcalls from MCA to PML */ 494 mca_pml_base_module_add_procs_fn_t pml_add_procs; 495 mca_pml_base_module_del_procs_fn_t pml_del_procs; 496 mca_pml_base_module_enable_fn_t pml_enable; 497 mca_pml_base_module_progress_fn_t pml_progress; 498 499 /* downcalls from MPI to PML */ 500 mca_pml_base_module_add_comm_fn_t pml_add_comm; 501 mca_pml_base_module_del_comm_fn_t pml_del_comm; 502 mca_pml_base_module_irecv_init_fn_t pml_irecv_init; 503 mca_pml_base_module_irecv_fn_t pml_irecv; 504 mca_pml_base_module_recv_fn_t pml_recv; 505 mca_pml_base_module_isend_init_fn_t pml_isend_init; 506 mca_pml_base_module_isend_fn_t pml_isend; 507 mca_pml_base_module_send_fn_t pml_send; 508 mca_pml_base_module_iprobe_fn_t pml_iprobe; 509 mca_pml_base_module_probe_fn_t pml_probe; 510 mca_pml_base_module_start_fn_t pml_start; 511 mca_pml_base_module_improbe_fn_t pml_improbe; 512 mca_pml_base_module_mprobe_fn_t pml_mprobe; 513 mca_pml_base_module_imrecv_fn_t pml_imrecv; 514 mca_pml_base_module_mrecv_fn_t pml_mrecv; 515 516 /* diagnostics */ 517 mca_pml_base_module_dump_fn_t pml_dump; 518 519 /* FT Event */ 520 mca_pml_base_module_ft_event_fn_t pml_ft_event; 521 522 /* maximum constant sizes */ 523 uint32_t pml_max_contextid; 524 int pml_max_tag; 525 int pml_flags; 526 }; 527 typedef struct mca_pml_base_module_1_0_1_t mca_pml_base_module_1_0_1_t; 528 typedef mca_pml_base_module_1_0_1_t mca_pml_base_module_t; 529 530 /* 531 * Macro for use in components that are of type pml 532 */ 533 #define MCA_PML_BASE_VERSION_2_0_0 \ 534 OMPI_MCA_BASE_VERSION_2_1_0("pml", 2, 0, 0) 535 536 /* 537 * macro for doing direct call / call through struct 538 */ 539 #if MCA_ompi_pml_DIRECT_CALL 540 541 #include MCA_ompi_pml_DIRECT_CALL_HEADER 542 543 #define MCA_PML_CALL_STAMP(a, b) mca_pml_ ## a ## _ ## b 544 #define MCA_PML_CALL_EXPANDER(a, b) MCA_PML_CALL_STAMP(a,b) 545 #define MCA_PML_CALL(a) MCA_PML_CALL_EXPANDER(MCA_ompi_pml_DIRECT_CALL_COMPONENT, a) 546 547 #else 548 #define MCA_PML_CALL(a) mca_pml.pml_ ## a 549 #endif 550 551 OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml; 552 553 static inline bool mca_pml_base_requires_world (void) 554 { 555 return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD); 556 } 557 558 END_C_DECLS 559 #endif /* MCA_PML_H */