1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2005 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2006 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2006-2015 Los Alamos National Security, LLC. All rights
14 * reserved.
15 * Copyright (c) 2011 Sandia National Laboratories. All rights reserved.
16 * Copyright (c) 2015 Research Organization for Information Science
17 * and Technology (RIST). All rights reserved.
18 * $COPYRIGHT$
19 *
20 * Additional copyrights may follow
21 *
22 * $HEADER$
23 */
24 /**
25 * @file
26 *
27 * P2P Management Layer (PML)
28 *
29 * An MCA component type that provides the P2P interface functionality
30 * required by the MPI layer. The PML is a relatively thin layer that
31 * primarily provides for the fragmentation and scheduling of messages
32 * over multiple transports (instances of the Byte Transfer Layer
33 * (BTL) MCA component type) as depicted below:
34 *
35 * ------------------------------------
36 * | MPI |
37 * ------------------------------------
38 * | PML |
39 * ------------------------------------
40 * | BTL (TCP) | BTL (SM) | BTL (...) |
41 * ------------------------------------
42 *
43 * A single PML component is selected by the MCA framework during
44 * library initialization. Initially, all available PMLs are loaded
45 * (potentially as shared libraries) and their component open and init
46 * functions called. The MCA framework selects the component
47 * returning the highest priority and closes/unloads any other PML
48 * components that may have been opened.
49 *
50 * After all of the MCA components are initialized, the MPI/RTE will
51 * make downcalls into the PML to provide the initial list of
52 * processes (ompi_proc_t instances), and notification of changes
53 * (add/delete).
54 *
55 * The PML module must select the set of BTL components that are to be
56 * used to reach a given destination. These should be cached on a PML
57 * specific data structure that is hung off the ompi_proc_t.
58 *
59 * The PML should then apply a scheduling algorithm (round-robin,
60 * weighted distribution, etc), to schedule the delivery of messages
61 * over the available BTLs.
62 *
63 */
64
65 #ifndef MCA_PML_H
66 #define MCA_PML_H
67
68 #include "ompi_config.h"
69 #include "ompi/mca/mca.h"
70 #include "mpi.h" /* needed for MPI_ANY_TAG */
71 #include "ompi/mca/pml/pml_constants.h"
72 #include "ompi/request/request.h"
73
74 BEGIN_C_DECLS
75
76 /*
77 * PML component types
78 */
79
80 typedef uint64_t mca_pml_sequence_t;
81 struct ompi_proc_t;
82
83 /**
84 * MCA->PML Called by MCA framework to initialize the component.
85 *
86 * @param priority (OUT) Relative priority or ranking used by MCA to
87 * selected a component.
88 *
89 * @param enable_progress_threads (IN) Whether this component is
90 * allowed to run a hidden/progress thread or not.
91 *
92 * @param enable_mpi_threads (IN) Whether support for multiple MPI
93 * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
94 * indicates whether multiple threads may invoke this component
95 * simultaneously or not.
96 */
97 typedef struct mca_pml_base_module_1_0_1_t * (*mca_pml_base_component_init_fn_t)(
98 int *priority,
99 bool enable_progress_threads,
100 bool enable_mpi_threads);
101
102 typedef int (*mca_pml_base_component_finalize_fn_t)(void);
103
104 /**
105 * PML component version and interface functions.
106 */
107
108 struct mca_pml_base_component_2_0_0_t {
109 mca_base_component_t pmlm_version;
110 mca_base_component_data_t pmlm_data;
111 mca_pml_base_component_init_fn_t pmlm_init;
112 mca_pml_base_component_finalize_fn_t pmlm_finalize;
113 };
114 typedef struct mca_pml_base_component_2_0_0_t mca_pml_base_component_2_0_0_t;
115 typedef mca_pml_base_component_2_0_0_t mca_pml_base_component_t;
116
117
118 /**
119 * MCA management functions.
120 */
121
122
123 /**
124 * Downcall from MPI/RTE layer when new processes are created.
125 *
126 * @param procs Array of new processes
127 * @param nprocs Size of process array
128 * @return OMPI_SUCCESS or failure status.
129 *
130 * Provides a notification to the PML that new processes have been
131 * created, and provides the PML the opportunity to cache data
132 * (e.g. list of BTLs to use) on the ompi_proc_t data structure.
133 */
134 typedef int (*mca_pml_base_module_add_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs);
135
136
137 /**
138 * Downcall from MPI/RTE layer when processes are terminated.
139 *
140 * @param procs Array of processes
141 * @param nprocs Size of process array
142 * @return OMPI_SUCCESS or failure status.
143 *
144 * Provides a notification to the PML that processes have
145 * gone away, and provides the PML the opportunity to cleanup
146 * any data cached on the ompi_proc_t data structure.
147 */
148 typedef int (*mca_pml_base_module_del_procs_fn_t)(struct ompi_proc_t **procs, size_t nprocs);
149
150 /**
151 * Downcall from MCA layer to enable the PML/BTLs.
152 *
153 * @param enable Enable/Disable PML forwarding
154 * @return OMPI_SUCCESS or failure status.
155 */
156 typedef int (*mca_pml_base_module_enable_fn_t)(
157 bool enable
158 );
159
160
161 /**
162 * For non-threaded case, provides MCA the opportunity to
163 * progress outstanding requests on all btls.
164 *
165 * * @return Count of "completions", a metric of
166 * how many items where completed in the call
167 * to progress.
168 */
169 typedef int (*mca_pml_base_module_progress_fn_t)(void);
170
171 /**
172 * MPI Interface Functions
173 */
174
175
176 /**
177 * Downcall from MPI layer when a new communicator is created.
178 *
179 * @param comm Communicator
180 * @return OMPI_SUCCESS or failure status.
181 *
182 * Provides the PML the opportunity to initialize/cache a data structure
183 * on the communicator.
184 */
185 typedef int (*mca_pml_base_module_add_comm_fn_t)(struct ompi_communicator_t* comm);
186
187
188 /**
189 * Downcall from MPI layer when a communicator is destroyed.
190 *
191 * @param comm Communicator
192 * @return OMPI_SUCCESS or failure status.
193 *
194 * Provides the PML the opportunity to cleanup any datastructures
195 * associated with the communicator.
196 */
197 typedef int (*mca_pml_base_module_del_comm_fn_t)(struct ompi_communicator_t* comm);
198
199 /**
200 * Initialize a persistent receive request.
201 *
202 * @param buf (IN) User buffer.
203 * @param count (IN) Number of elements of the specified datatype.
204 * @param datatype (IN) User defined datatype.
205 * @param src (IN) Source rank w/in communicator.
206 * @param tag (IN) User defined tag.
207 * @param comm (IN) Communicator.
208 * @param request (OUT) Request handle.
209 * @return OMPI_SUCCESS or failure status.
210 */
211 typedef int (*mca_pml_base_module_irecv_init_fn_t)(
212 void *buf,
213 size_t count,
214 struct ompi_datatype_t *datatype,
215 int src,
216 int tag,
217 struct ompi_communicator_t* comm,
218 struct ompi_request_t **request
219 );
220
221 /**
222 * Post a receive request.
223 *
224 * @param buf (IN) User buffer.
225 * @param count (IN) Number of elements of the specified datatype.
226 * @param datatype (IN) User defined datatype.
227 * @param src (IN) Source rank w/in communicator.
228 * @param tag (IN) User defined tag.
229 * @param comm (IN) Communicator.
230 * @param request (OUT) Request handle.
231 * @return OMPI_SUCCESS or failure status.
232 */
233 typedef int (*mca_pml_base_module_irecv_fn_t)(
234 void *buf,
235 size_t count,
236 struct ompi_datatype_t *datatype,
237 int src,
238 int tag,
239 struct ompi_communicator_t* comm,
240 struct ompi_request_t **request
241 );
242 typedef int (*mca_pml_base_module_imrecv_fn_t)(
243 void *buf,
244 size_t count,
245 struct ompi_datatype_t *datatype,
246 struct ompi_message_t **message,
247 struct ompi_request_t **request
248 );
249
250 /**
251 * Post a receive and wait for completion.
252 *
253 * @param buf (IN) User buffer
254 * @param count (IN) Number of elements of the specified datatype
255 * @param datatype (IN) User defined datatype
256 * @param src (IN) Source rank w/in communicator
257 * @param tag (IN) User defined tag
258 * @param comm (IN) Communicator
259 * @param status (OUT) Completion status
260 * @return OMPI_SUCCESS or failure status.
261 */
262 typedef int (*mca_pml_base_module_recv_fn_t)(
263 void *buf,
264 size_t count,
265 struct ompi_datatype_t *datatype,
266 int src,
267 int tag,
268 struct ompi_communicator_t* comm,
269 ompi_status_public_t* status
270 );
271 typedef int (*mca_pml_base_module_mrecv_fn_t)(
272 void *buf,
273 size_t count,
274 struct ompi_datatype_t *datatype,
275 struct ompi_message_t **message,
276 ompi_status_public_t* status
277 );
278
279 /**
280 * Initialize a persistent send request.
281 *
282 * @param buf (IN) User buffer.
283 * @param count (IN) Number of elements of the specified datatype.
284 * @param datatype (IN) User defined datatype.
285 * @param dst (IN) Peer rank w/in communicator.
286 * @param tag (IN) User defined tag.
287 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
288 * @param comm (IN) Communicator.
289 * @param request (OUT) Request handle.
290 * @return OMPI_SUCCESS or failure status.
291 */
292 typedef int (*mca_pml_base_module_isend_init_fn_t)(
293 const void *buf,
294 size_t count,
295 struct ompi_datatype_t *datatype,
296 int dst,
297 int tag,
298 mca_pml_base_send_mode_t mode,
299 struct ompi_communicator_t* comm,
300 struct ompi_request_t **request
301 );
302
303
304 /**
305 * Post a send request.
306 *
307 * @param buf (IN) User buffer.
308 * @param count (IN) Number of elements of the specified datatype.
309 * @param datatype (IN) User defined datatype.
310 * @param dst (IN) Peer rank w/in communicator.
311 * @param tag (IN) User defined tag.
312 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
313 * @param comm (IN) Communicator.
314 * @param request (OUT) Request handle.
315 * @return OMPI_SUCCESS or failure status.
316 */
317 typedef int (*mca_pml_base_module_isend_fn_t)(
318 const void *buf,
319 size_t count,
320 struct ompi_datatype_t *datatype,
321 int dst,
322 int tag,
323 mca_pml_base_send_mode_t mode,
324 struct ompi_communicator_t* comm,
325 struct ompi_request_t **request
326 );
327
328
329 /**
330 * Post a send request and wait for completion.
331 *
332 * @param buf (IN) User buffer.
333 * @param count (IN) Number of elements of the specified datatype.
334 * @param datatype (IN) User defined datatype.
335 * @param dst (IN) Peer rank w/in communicator.
336 * @param tag (IN) User defined tag.
337 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
338 * @param comm (IN) Communicator.
339 * @return OMPI_SUCCESS or failure status.
340 */
341 typedef int (*mca_pml_base_module_send_fn_t)(
342 const void *buf,
343 size_t count,
344 struct ompi_datatype_t *datatype,
345 int dst,
346 int tag,
347 mca_pml_base_send_mode_t mode,
348 struct ompi_communicator_t* comm
349 );
350
351 /**
352 * Initiate one or more persistent requests.
353 *
354 * @param count (IN) Number of requests
355 * @param requests (IN/OUT) Array of persistent requests
356 * @return OMPI_SUCCESS or failure status.
357 */
358 typedef ompi_request_start_fn_t mca_pml_base_module_start_fn_t;
359
360 /**
361 * Probe to poll for pending recv.
362 *
363 * @param src (IN) Source rank w/in communicator.
364 * @param tag (IN) User defined tag.
365 * @param comm (IN) Communicator.
366 * @param matched (OUT) Flag indicating if matching recv exists.
367 * @param status (OUT) Completion statuses.
368 * @return OMPI_SUCCESS or failure status.
369 *
370 */
371 typedef int (*mca_pml_base_module_iprobe_fn_t)(
372 int src,
373 int tag,
374 struct ompi_communicator_t* comm,
375 int *matched,
376 ompi_status_public_t *status
377 );
378
379 typedef int (*mca_pml_base_module_improbe_fn_t)(
380 int src,
381 int tag,
382 struct ompi_communicator_t* comm,
383 int *matched,
384 struct ompi_message_t **message,
385 ompi_status_public_t *status
386 );
387
388 /**
389 * Blocking probe to wait for pending recv.
390 *
391 * @param src (IN) Source rank w/in communicator.
392 * @param tag (IN) User defined tag.
393 * @param comm (IN) Communicator.
394 * @param status (OUT) Completion statuses.
395 * @return OMPI_SUCCESS or failure status.
396 *
397 */
398 typedef int (*mca_pml_base_module_probe_fn_t)(
399 int src,
400 int tag,
401 struct ompi_communicator_t* comm,
402 ompi_status_public_t *status
403 );
404
405 typedef int (*mca_pml_base_module_mprobe_fn_t)(
406 int src,
407 int tag,
408 struct ompi_communicator_t* comm,
409 struct ompi_message_t **message,
410 ompi_status_public_t *status
411 );
412
413 /**
414 * Cancel pending operation.
415 *
416 * @param request (IN) Request
417 * @return OMPI_SUCCESS or failure status.
418 *
419 */
420 typedef int (*mca_pml_base_module_cancel_fn_t)(
421 struct ompi_request_t* request
422 );
423
424
425 /**
426 * Has a request been cancelled?
427 *
428 * @param request (IN) Request
429 * @return OMPI_SUCCESS or failure status.
430 *
431 */
432 typedef int (*mca_pml_base_module_cancelled_fn_t)(
433 struct ompi_request_t* request,
434 int *flag
435 );
436
437 /**
438 * Release resources held by a persistent mode request.
439 *
440 * @param request (IN) Request
441 * @return OMPI_SUCCESS or failure status.
442 *
443 */
444 typedef int (*mca_pml_base_module_free_fn_t)(
445 struct ompi_request_t** request
446 );
447
448
449 /**
450 * A special NULL request handle.
451 *
452 * @param request (OUT) Request
453 * @return OMPI_SUCCESS or failure status.
454 *
455 */
456 typedef int (*mca_pml_base_module_null_fn_t)(
457 struct ompi_request_t** request
458 );
459
460 /**
461 * Diagnostics function.
462 *
463 * @param request (IN) Communicator
464 * @param verbose (IN) Verbosity level (passed to BTL)
465 * @return OMPI_SUCCESS or failure status.
466 *
467 */
468 typedef int (*mca_pml_base_module_dump_fn_t)(
469 struct ompi_communicator_t* comm,
470 int verbose
471 );
472
473 /**
474 * Fault Tolerance Awareness function
475 * @param status Checkpoint status
476 * @return OMPI_SUCCESS or failure status
477 */
478 typedef int (*mca_pml_base_module_ft_event_fn_t) (int status);
479
480 /**
481 * pml module flags
482 */
483 /** PML requires requires all procs in the job on the first call to
484 * add_procs */
485 #define MCA_PML_BASE_FLAG_REQUIRE_WORLD 0x00000001
486
487 /**
488 * PML instance.
489 */
490
491 struct mca_pml_base_module_1_0_1_t {
492
493 /* downcalls from MCA to PML */
494 mca_pml_base_module_add_procs_fn_t pml_add_procs;
495 mca_pml_base_module_del_procs_fn_t pml_del_procs;
496 mca_pml_base_module_enable_fn_t pml_enable;
497 mca_pml_base_module_progress_fn_t pml_progress;
498
499 /* downcalls from MPI to PML */
500 mca_pml_base_module_add_comm_fn_t pml_add_comm;
501 mca_pml_base_module_del_comm_fn_t pml_del_comm;
502 mca_pml_base_module_irecv_init_fn_t pml_irecv_init;
503 mca_pml_base_module_irecv_fn_t pml_irecv;
504 mca_pml_base_module_recv_fn_t pml_recv;
505 mca_pml_base_module_isend_init_fn_t pml_isend_init;
506 mca_pml_base_module_isend_fn_t pml_isend;
507 mca_pml_base_module_send_fn_t pml_send;
508 mca_pml_base_module_iprobe_fn_t pml_iprobe;
509 mca_pml_base_module_probe_fn_t pml_probe;
510 mca_pml_base_module_start_fn_t pml_start;
511 mca_pml_base_module_improbe_fn_t pml_improbe;
512 mca_pml_base_module_mprobe_fn_t pml_mprobe;
513 mca_pml_base_module_imrecv_fn_t pml_imrecv;
514 mca_pml_base_module_mrecv_fn_t pml_mrecv;
515
516 /* diagnostics */
517 mca_pml_base_module_dump_fn_t pml_dump;
518
519 /* FT Event */
520 mca_pml_base_module_ft_event_fn_t pml_ft_event;
521
522 /* maximum constant sizes */
523 uint32_t pml_max_contextid;
524 int pml_max_tag;
525 int pml_flags;
526 };
527 typedef struct mca_pml_base_module_1_0_1_t mca_pml_base_module_1_0_1_t;
528 typedef mca_pml_base_module_1_0_1_t mca_pml_base_module_t;
529
530 /*
531 * Macro for use in components that are of type pml
532 */
533 #define MCA_PML_BASE_VERSION_2_0_0 \
534 OMPI_MCA_BASE_VERSION_2_1_0("pml", 2, 0, 0)
535
536 /*
537 * macro for doing direct call / call through struct
538 */
539 #if MCA_ompi_pml_DIRECT_CALL
540
541 #include MCA_ompi_pml_DIRECT_CALL_HEADER
542
543 #define MCA_PML_CALL_STAMP(a, b) mca_pml_ ## a ## _ ## b
544 #define MCA_PML_CALL_EXPANDER(a, b) MCA_PML_CALL_STAMP(a,b)
545 #define MCA_PML_CALL(a) MCA_PML_CALL_EXPANDER(MCA_ompi_pml_DIRECT_CALL_COMPONENT, a)
546
547 #else
548 #define MCA_PML_CALL(a) mca_pml.pml_ ## a
549 #endif
550
551 OMPI_DECLSPEC extern mca_pml_base_module_t mca_pml;
552
553 static inline bool mca_pml_base_requires_world (void)
554 {
555 return !!(mca_pml.pml_flags & MCA_PML_BASE_FLAG_REQUIRE_WORLD);
556 }
557
558 END_C_DECLS
559 #endif /* MCA_PML_H */