1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2013 Mellanox Technologies, Inc.
4 * All rights reserved.
5 * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved.
6 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
7 * reserved.
8 * Copyright (c) 2016 Research Organization for Information Science
9 * and Technology (RIST). All rights reserved.
10 * $COPYRIGHT$
11 *
12 * Additional copyrights may follow
13 *
14 * $HEADER$
15 */
16
17 #ifndef MCA_SPML_H
18 #define MCA_SPML_H
19
20 #include "oshmem_config.h"
21 #include "oshmem/types.h"
22 #include "oshmem/constants.h"
23
24 #include "opal_stdint.h"
25 #include "oshmem/mca/mca.h"
26 #include "opal/mca/btl/btl.h"
27 #include "oshmem/proc/proc.h"
28
29 #include "oshmem/mca/sshmem/sshmem.h"
30
31 BEGIN_C_DECLS
32
33 /*
34 * SPML component types
35 */
36
37 /**
38 * MCA->PML Called by MCA framework to initialize the component.
39 *
40 * @param priority (OUT) Relative priority or ranking used by MCA to
41 * selected a component.
42 *
43 * @param enable_progress_threads (IN) Whether this component is
44 * allowed to run a hidden/progress thread or not.
45 *
46 * @param enable_mpi_threads (IN) Whether support for multiple MPI
47 * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which
48 * indicates whether multiple threads may invoke this component
49 * simultaneously or not.
50 */
51 typedef enum {
52 MCA_SPML_BASE_PUT_SYNCHRONOUS,
53 MCA_SPML_BASE_PUT_COMPLETE,
54 MCA_SPML_BASE_PUT_BUFFERED,
55 MCA_SPML_BASE_PUT_READY,
56 MCA_SPML_BASE_PUT_STANDARD,
57 MCA_SPML_BASE_PUT_SIZE
58 } mca_spml_base_put_mode_t;
59
60 typedef struct mca_spml_base_module_1_0_0_t * (*mca_spml_base_component_init_fn_t)(int *priority,
61 bool enable_progress_threads,
62 bool enable_mpi_threads);
63
64 typedef int (*mca_spml_base_component_finalize_fn_t)(void);
65
66 /**
67 * SPML component version and interface functions.
68 */
69 struct mca_spml_base_component_2_0_0_t {
70 mca_base_component_t spmlm_version;
71 mca_base_component_data_t spmlm_data;
72 mca_spml_base_component_init_fn_t spmlm_init;
73 mca_spml_base_component_finalize_fn_t spmlm_finalize;
74 };
75 typedef struct mca_spml_base_component_2_0_0_t mca_spml_base_component_2_0_0_t;
76 typedef mca_spml_base_component_2_0_0_t mca_spml_base_component_t;
77
78 /**
79 * MCA management functions.
80 */
81
82 static inline char *mca_spml_base_mkey2str(sshmem_mkey_t *mkey)
83 {
84 static char buf[64];
85
86 if (mkey->len == 0) {
87 snprintf(buf, sizeof(buf), "mkey: base=%p len=%d key=%" PRIu64, mkey->va_base, mkey->len, mkey->u.key);
88 } else {
89 snprintf(buf, sizeof(buf), "mkey: base=%p len=%d data=0x%p", mkey->va_base, mkey->len, mkey->u.data);
90 }
91
92 return buf;
93 }
94
95 /**
96 * Downcall from MCA layer to enable the PML/BTLs.
97 *
98 * @param enable Enable/Disable SPML forwarding
99 * @return OSHMEM_SUCCESS or failure status.
100 */
101 typedef int (*mca_spml_base_module_enable_fn_t)(bool enable);
102
103 /**
104 * Waits for an int variable to change on the local PE.
105 * Blocked until the variable is not equal to value.
106 *
107 * @param addr Address of the variable to pool on.
108 * @param value The value to pool on. Pool until the value held in addr is different than value.
109 * @return OSHMEM_SUCCESS or failure status.
110 */
111 typedef int (*mca_spml_base_module_wait_fn_t)(void* addr,
112 int cmp,
113 void* value,
114 int datatype);
115
116 /**
117 * Test for an int variable to change on the local PE.
118 *
119 * @param addr Address of the variable to pool on.
120 * @param value The value to pool on. Pool until the value held in addr is different than value.
121 * @param out_value Return value to indicated if variable is equal to given cmp value.
122 * @return OSHMEM_SUCCESS or failure status.
123 */
124 typedef int (*mca_spml_base_module_test_fn_t)(void* addr,
125 int cmp,
126 void* value,
127 int datatype,
128 int *out_value);
129
130 /**
131 * deserialize remote mkey
132 *
133 * @param mkey remote mkey
134 */
135 typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(shmem_ctx_t ctx, sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id);
136
137 /**
138 * If possible, get a pointer to the remote memory described by the mkey
139 *
140 * @param dst_addr address of the symmetric variable
141 * @param mkey remote memory key
142 * @param pe remote PE
143 *
144 * @return pointer to remote memory or NULL
145 */
146 typedef void * (*mca_spml_base_module_mkey_ptr_fn_t)(const void *dst_addr, sshmem_mkey_t *mkey, int pe);
147
148 /**
149 * free resources used by deserialized remote mkey
150 *
151 * @param mkey remote mkey
152 */
153 typedef void (*mca_spml_base_module_mkey_free_fn_t)(sshmem_mkey_t *);
154
155 /**
156 * Register (Pinn) a buffer of 'size' bits starting in address addr
157 *
158 * @param addr base address of the registered buffer.
159 * @param size the size of the buffer to be registered.
160 * @param seg_id sysv segment id
161 * @param count number of internal transports (btls) that registered memory
162 * @return array of mkeys (one mkey per "btl") or NULL on failure
163 *
164 */
165 typedef sshmem_mkey_t * (*mca_spml_base_module_register_fn_t)(void *addr,
166 size_t size,
167 uint64_t shmid,
168 int *count);
169
170 /**
171 * deregister memory pinned by register()
172 */
173 typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys);
174
175 /**
176 * try to fill up mkeys that can be used to reach remote pe.
177 * @param pe remote pe
178 * @param seg 0 - symmetric heap, 1 - static data, everything else are static data in .so
179 * @param mkeys mkeys array
180 *
181 * @return OSHMEM_SUCCSESS if keys are found
182 */
183 typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(shmem_ctx_t ctx, int pe,
184 uint32_t seg,
185 sshmem_mkey_t *mkeys);
186
187 /**
188 * For each proc setup a datastructure that indicates the BTLs
189 * that can be used to reach the destination.
190 *
191 * @param procs A list of all procs participating in the parallel application.
192 * @param nprocs The number of procs in the parallel application.
193 * @return OSHMEM_SUCCESS or failure status.
194 *
195 */
196 typedef int (*mca_spml_base_module_add_procs_fn_t)(ompi_proc_t** procs,
197 size_t nprocs);
198 typedef int (*mca_spml_base_module_del_procs_fn_t)(ompi_proc_t** procs,
199 size_t nprocs);
200
201
202 /**
203 * Create a communication context.
204 *
205 * @param options The set of options requested for the given context.
206 * @param ctx A handle to the newly created context.
207 * @return OSHMEM_SUCCESS or failure status.
208 */
209 typedef int (*mca_spml_base_module_ctx_create_fn_t)(long options, shmem_ctx_t *ctx);
210
211
212 /**
213 * Destroy a communication context.
214 *
215 * @param ctx Handle to the context that will be destroyed.
216 */
217 typedef void (*mca_spml_base_module_ctx_destroy_fn_t)(shmem_ctx_t ctx);
218
219 /**
220 * Transfer data to a remote pe.
221 *
222 * @param ctx The context object this routine is working on.
223 * @param dst_addr The address in the remote PE of the object being written.
224 * @param size The number of bytes to be written.
225 * @param src_addr An address on the local PE holdng the value to be written.
226 * @param dst The remote PE to be written to.
227 * @return OSHMEM_SUCCESS or failure status.
228 */
229 typedef int (*mca_spml_base_module_put_fn_t)(shmem_ctx_t ctx,
230 void *dst_addr,
231 size_t size,
232 void *src_addr,
233 int dst);
234
235 /**
236 * These routines provide the means for copying contiguous data to another PE without
237 * blocking the caller. These routines return before the data has been delivered to the
238 * remote PE.
239 *
240 * @param ctx The context object this routine is working on.
241 * @param dst_addr The address in the remote PE of the object being written.
242 * @param size The number of bytes to be written.
243 * @param src_addr An address on the local PE holdng the value to be written.
244 * @param dst The remote PE to be written to.
245 * @param handle The address of a handle to be passed to shmem_wait_nb() or
246 * shmem_test_nb() to wait or poll for the completion of the transfer.
247 * @return OSHMEM_SUCCESS or failure status.
248 */
249 typedef int (*mca_spml_base_module_put_nb_fn_t)(shmem_ctx_t ctx,
250 void *dst_addr,
251 size_t size,
252 void *src_addr,
253 int dst,
254 void **handle);
255
256 /**
257 * Blocking data transfer from remote PE.
258 * Read data from remote PE.
259 *
260 * @param ctx The context object this routine is working on.
261 * @param dst_addr The address on the local PE, to write the result of the get operation to.
262 * @param size The number of bytes to be read.
263 * @param src_addr The address on the remote PE, to read from.
264 * @param src The ID of the remote PE.
265 * @return OSHMEM_SUCCESS or failure status.
266 */
267 typedef int (*mca_spml_base_module_get_fn_t)(shmem_ctx_t ctx,
268 void *dst_addr,
269 size_t size,
270 void *src_addr,
271 int src);
272
273 /**
274 * Non-blocking data transfer from remote PE.
275 * Read data from remote PE.
276 *
277 * @param ctx The context object this routine is working on.
278 * @param dst_addr The address on the local PE, to write the result of the get operation to.
279 * @param size The number of bytes to be read.
280 * @param src_addr The address on the remote PE, to read from.
281 * @param src The ID of the remote PE.
282 * @param handle The address of a handle to be passed to shmem_wait_nb() or
283 * shmem_test_nb() to wait or poll for the completion of the transfer.
284 * @return - OSHMEM_SUCCESS or failure status.
285 */
286 typedef int (*mca_spml_base_module_get_nb_fn_t)(shmem_ctx_t ctx,
287 void *dst_addr,
288 size_t size,
289 void *src_addr,
290 int src,
291 void **handle);
292
293 /**
294 * Post a receive and wait for completion.
295 *
296 * @param buf (IN) User buffer.
297 * @param count (IN) The number of bytes to be sent.
298 * @param src (IN) The ID of the remote PE.
299 * @return OSHMEM_SUCCESS or failure status.
300 */
301 typedef int (*mca_spml_base_module_recv_fn_t)(void *buf, size_t count, int src);
302
303 /**
304 * Post a send request and wait for completion.
305 *
306 * @param buf (IN) User buffer.
307 * @param count (IN) The number of bytes to be sent.
308 * @param dst (IN) The ID of the remote PE.
309 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY)
310 * @return OSHMEM_SUCCESS or failure status.
311 */
312 typedef int (*mca_spml_base_module_send_fn_t)(void *buf,
313 size_t count,
314 int dst,
315 mca_spml_base_put_mode_t mode);
316
317 /**
318 * The routine transfers the data asynchronously from the source PE to all
319 * PEs in the OpenSHMEM job. The routine returns immediately. The source and
320 * target buffers are reusable only after the completion of the routine.
321 * After the data is transferred to the target buffers, the counter object
322 * is updated atomically. The counter object can be read either using atomic
323 * operations such as shmem_atomic_fetch or can use point-to-point synchronization
324 * routines such as shmem_wait_until and shmem_test.
325 *
326 * Shmem_quiet may be used for completing the operation, but not required for
327 * progress or completion. In a multithreaded OpenSHMEM program, the user
328 * (the OpenSHMEM program) should ensure the correct ordering of
329 * shmemx_alltoall_global calls.
330 *
331 * @param dest A symmetric data object that is large enough to receive
332 * “size” bytes of data from each PE in the OpenSHMEM job.
333 * @param source A symmetric data object that contains “size” bytes of data
334 * for each PE in the OpenSHMEM job.
335 * @param size The number of bytes to be sent to each PE in the job.
336 * @param counter A symmetric data object to be atomically incremented after
337 * the target buffer is updated.
338 *
339 * @return OSHMEM_SUCCESS or failure status.
340 */
341 typedef int (*mca_spml_base_module_put_all_nb_fn_t)(void *dest,
342 const void *source,
343 size_t size,
344 long *counter);
345
346 /**
347 * Assures ordering of delivery of put() requests
348 *
349 * @param ctx - The context object this routine is working on.
350 * @return - OSHMEM_SUCCESS or failure status.
351 */
352 typedef int (*mca_spml_base_module_fence_fn_t)(shmem_ctx_t ctx);
353
354 /**
355 * Wait for completion of all outstanding put() requests
356 *
357 * @param ctx - The context object this routine is working on.
358 * @return - OSHMEM_SUCCESS or failure status.
359 */
360 typedef int (*mca_spml_base_module_quiet_fn_t)(shmem_ctx_t ctx);
361
362 /**
363 * Waits for completion of a non-blocking put or get issued by the calling PE.
364 *
365 * @return - OSHMEM_SUCCESS or failure status.
366 */
367 typedef int (*mca_spml_base_module_wait_nb_fn_t)(void *);
368
369 /**
370 * Called by memheap when memory is allocated by shmalloc(),
371 * shcalloc(), shmemalign() or shrealloc()
372 *
373 * @param addr base address of the registered buffer.
374 * @param size the size of the buffer to be registered.
375 */
376 typedef void (*mca_spml_base_module_memuse_hook_fn_t)(void *, size_t);
377
378 /**
379 * SPML instance.
380 */
381 struct mca_spml_base_module_1_0_0_t {
382
383 mca_spml_base_module_add_procs_fn_t spml_add_procs;
384 mca_spml_base_module_del_procs_fn_t spml_del_procs;
385
386 mca_spml_base_module_enable_fn_t spml_enable;
387 mca_spml_base_module_register_fn_t spml_register;
388 mca_spml_base_module_deregister_fn_t spml_deregister;
389 mca_spml_base_module_oob_get_mkeys_fn_t spml_oob_get_mkeys;
390
391 mca_spml_base_module_ctx_create_fn_t spml_ctx_create;
392 mca_spml_base_module_ctx_destroy_fn_t spml_ctx_destroy;
393
394 mca_spml_base_module_put_fn_t spml_put;
395 mca_spml_base_module_put_nb_fn_t spml_put_nb;
396 mca_spml_base_module_get_fn_t spml_get;
397 mca_spml_base_module_get_nb_fn_t spml_get_nb;
398
399 mca_spml_base_module_recv_fn_t spml_recv;
400 mca_spml_base_module_send_fn_t spml_send;
401
402 mca_spml_base_module_wait_fn_t spml_wait;
403 mca_spml_base_module_wait_nb_fn_t spml_wait_nb;
404 mca_spml_base_module_test_fn_t spml_test;
405 mca_spml_base_module_fence_fn_t spml_fence;
406 mca_spml_base_module_quiet_fn_t spml_quiet;
407
408 mca_spml_base_module_mkey_unpack_fn_t spml_rmkey_unpack;
409 mca_spml_base_module_mkey_free_fn_t spml_rmkey_free;
410 mca_spml_base_module_mkey_ptr_fn_t spml_rmkey_ptr;
411
412 mca_spml_base_module_memuse_hook_fn_t spml_memuse_hook;
413 mca_spml_base_module_put_all_nb_fn_t spml_put_all_nb;
414 void *self;
415 };
416
417 typedef struct mca_spml_base_module_1_0_0_t mca_spml_base_module_1_0_0_t;
418 typedef mca_spml_base_module_1_0_0_t mca_spml_base_module_t;
419
420 /*
421 * Macro for use in components that are of type spml
422 */
423 #define MCA_SPML_BASE_VERSION_2_0_0 \
424 OSHMEM_MCA_BASE_VERSION_2_1_0("spml", 2, 0, 0)
425
426 /*
427 * macro for doing direct call / call through struct
428 */
429 #if MCA_oshmem_spml_DIRECT_CALL
430
431 #include MCA_oshmem_spml_DIRECT_CALL_HEADER
432
433 #define MCA_SPML_CALL_STAMP(a, b) mca_spml_ ## a ## _ ## b
434 #define MCA_SPML_CALL_EXPANDER(a, b) MCA_SPML_CALL_STAMP(a,b)
435 #define MCA_SPML_CALL(a) MCA_SPML_CALL_EXPANDER(MCA_oshmem_spml_DIRECT_CALL_COMPONENT, a)
436
437 #else
438 #define MCA_SPML_CALL(a) mca_spml.spml_ ## a
439 #endif
440
441 OSHMEM_DECLSPEC extern mca_spml_base_module_t mca_spml;
442
443 END_C_DECLS
444 #endif /* MCA_SPML_H */