1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2013 Mellanox Technologies, Inc. 4 * All rights reserved. 5 * Copyright (c) 2014 Cisco Systems, Inc. All rights reserved. 6 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights 7 * reserved. 8 * Copyright (c) 2016 Research Organization for Information Science 9 * and Technology (RIST). All rights reserved. 10 * $COPYRIGHT$ 11 * 12 * Additional copyrights may follow 13 * 14 * $HEADER$ 15 */ 16 17 #ifndef MCA_SPML_H 18 #define MCA_SPML_H 19 20 #include "oshmem_config.h" 21 #include "oshmem/types.h" 22 #include "oshmem/constants.h" 23 24 #include "opal_stdint.h" 25 #include "oshmem/mca/mca.h" 26 #include "opal/mca/btl/btl.h" 27 #include "oshmem/proc/proc.h" 28 29 #include "oshmem/mca/sshmem/sshmem.h" 30 31 BEGIN_C_DECLS 32 33 /* 34 * SPML component types 35 */ 36 37 /** 38 * MCA->PML Called by MCA framework to initialize the component. 39 * 40 * @param priority (OUT) Relative priority or ranking used by MCA to 41 * selected a component. 42 * 43 * @param enable_progress_threads (IN) Whether this component is 44 * allowed to run a hidden/progress thread or not. 45 * 46 * @param enable_mpi_threads (IN) Whether support for multiple MPI 47 * threads is enabled or not (i.e., MPI_THREAD_MULTIPLE), which 48 * indicates whether multiple threads may invoke this component 49 * simultaneously or not. 50 */ 51 typedef enum { 52 MCA_SPML_BASE_PUT_SYNCHRONOUS, 53 MCA_SPML_BASE_PUT_COMPLETE, 54 MCA_SPML_BASE_PUT_BUFFERED, 55 MCA_SPML_BASE_PUT_READY, 56 MCA_SPML_BASE_PUT_STANDARD, 57 MCA_SPML_BASE_PUT_SIZE 58 } mca_spml_base_put_mode_t; 59 60 typedef struct mca_spml_base_module_1_0_0_t * (*mca_spml_base_component_init_fn_t)(int *priority, 61 bool enable_progress_threads, 62 bool enable_mpi_threads); 63 64 typedef int (*mca_spml_base_component_finalize_fn_t)(void); 65 66 /** 67 * SPML component version and interface functions. 68 */ 69 struct mca_spml_base_component_2_0_0_t { 70 mca_base_component_t spmlm_version; 71 mca_base_component_data_t spmlm_data; 72 mca_spml_base_component_init_fn_t spmlm_init; 73 mca_spml_base_component_finalize_fn_t spmlm_finalize; 74 }; 75 typedef struct mca_spml_base_component_2_0_0_t mca_spml_base_component_2_0_0_t; 76 typedef mca_spml_base_component_2_0_0_t mca_spml_base_component_t; 77 78 /** 79 * MCA management functions. 80 */ 81 82 static inline char *mca_spml_base_mkey2str(sshmem_mkey_t *mkey) 83 { 84 static char buf[64]; 85 86 if (mkey->len == 0) { 87 snprintf(buf, sizeof(buf), "mkey: base=%p len=%d key=%" PRIu64, mkey->va_base, mkey->len, mkey->u.key); 88 } else { 89 snprintf(buf, sizeof(buf), "mkey: base=%p len=%d data=0x%p", mkey->va_base, mkey->len, mkey->u.data); 90 } 91 92 return buf; 93 } 94 95 /** 96 * Downcall from MCA layer to enable the PML/BTLs. 97 * 98 * @param enable Enable/Disable SPML forwarding 99 * @return OSHMEM_SUCCESS or failure status. 100 */ 101 typedef int (*mca_spml_base_module_enable_fn_t)(bool enable); 102 103 /** 104 * Waits for an int variable to change on the local PE. 105 * Blocked until the variable is not equal to value. 106 * 107 * @param addr Address of the variable to pool on. 108 * @param value The value to pool on. Pool until the value held in addr is different than value. 109 * @return OSHMEM_SUCCESS or failure status. 110 */ 111 typedef int (*mca_spml_base_module_wait_fn_t)(void* addr, 112 int cmp, 113 void* value, 114 int datatype); 115 116 /** 117 * Test for an int variable to change on the local PE. 118 * 119 * @param addr Address of the variable to pool on. 120 * @param value The value to pool on. Pool until the value held in addr is different than value. 121 * @param out_value Return value to indicated if variable is equal to given cmp value. 122 * @return OSHMEM_SUCCESS or failure status. 123 */ 124 typedef int (*mca_spml_base_module_test_fn_t)(void* addr, 125 int cmp, 126 void* value, 127 int datatype, 128 int *out_value); 129 130 /** 131 * deserialize remote mkey 132 * 133 * @param mkey remote mkey 134 */ 135 typedef void (*mca_spml_base_module_mkey_unpack_fn_t)(shmem_ctx_t ctx, sshmem_mkey_t *, uint32_t segno, int remote_pe, int tr_id); 136 137 /** 138 * If possible, get a pointer to the remote memory described by the mkey 139 * 140 * @param dst_addr address of the symmetric variable 141 * @param mkey remote memory key 142 * @param pe remote PE 143 * 144 * @return pointer to remote memory or NULL 145 */ 146 typedef void * (*mca_spml_base_module_mkey_ptr_fn_t)(const void *dst_addr, sshmem_mkey_t *mkey, int pe); 147 148 /** 149 * free resources used by deserialized remote mkey 150 * 151 * @param mkey remote mkey 152 */ 153 typedef void (*mca_spml_base_module_mkey_free_fn_t)(sshmem_mkey_t *); 154 155 /** 156 * Register (Pinn) a buffer of 'size' bits starting in address addr 157 * 158 * @param addr base address of the registered buffer. 159 * @param size the size of the buffer to be registered. 160 * @param seg_id sysv segment id 161 * @param count number of internal transports (btls) that registered memory 162 * @return array of mkeys (one mkey per "btl") or NULL on failure 163 * 164 */ 165 typedef sshmem_mkey_t * (*mca_spml_base_module_register_fn_t)(void *addr, 166 size_t size, 167 uint64_t shmid, 168 int *count); 169 170 /** 171 * deregister memory pinned by register() 172 */ 173 typedef int (*mca_spml_base_module_deregister_fn_t)(sshmem_mkey_t *mkeys); 174 175 /** 176 * try to fill up mkeys that can be used to reach remote pe. 177 * @param pe remote pe 178 * @param seg 0 - symmetric heap, 1 - static data, everything else are static data in .so 179 * @param mkeys mkeys array 180 * 181 * @return OSHMEM_SUCCSESS if keys are found 182 */ 183 typedef int (*mca_spml_base_module_oob_get_mkeys_fn_t)(shmem_ctx_t ctx, int pe, 184 uint32_t seg, 185 sshmem_mkey_t *mkeys); 186 187 /** 188 * For each proc setup a datastructure that indicates the BTLs 189 * that can be used to reach the destination. 190 * 191 * @param procs A list of all procs participating in the parallel application. 192 * @param nprocs The number of procs in the parallel application. 193 * @return OSHMEM_SUCCESS or failure status. 194 * 195 */ 196 typedef int (*mca_spml_base_module_add_procs_fn_t)(ompi_proc_t** procs, 197 size_t nprocs); 198 typedef int (*mca_spml_base_module_del_procs_fn_t)(ompi_proc_t** procs, 199 size_t nprocs); 200 201 202 /** 203 * Create a communication context. 204 * 205 * @param options The set of options requested for the given context. 206 * @param ctx A handle to the newly created context. 207 * @return OSHMEM_SUCCESS or failure status. 208 */ 209 typedef int (*mca_spml_base_module_ctx_create_fn_t)(long options, shmem_ctx_t *ctx); 210 211 212 /** 213 * Destroy a communication context. 214 * 215 * @param ctx Handle to the context that will be destroyed. 216 */ 217 typedef void (*mca_spml_base_module_ctx_destroy_fn_t)(shmem_ctx_t ctx); 218 219 /** 220 * Transfer data to a remote pe. 221 * 222 * @param ctx The context object this routine is working on. 223 * @param dst_addr The address in the remote PE of the object being written. 224 * @param size The number of bytes to be written. 225 * @param src_addr An address on the local PE holdng the value to be written. 226 * @param dst The remote PE to be written to. 227 * @return OSHMEM_SUCCESS or failure status. 228 */ 229 typedef int (*mca_spml_base_module_put_fn_t)(shmem_ctx_t ctx, 230 void *dst_addr, 231 size_t size, 232 void *src_addr, 233 int dst); 234 235 /** 236 * These routines provide the means for copying contiguous data to another PE without 237 * blocking the caller. These routines return before the data has been delivered to the 238 * remote PE. 239 * 240 * @param ctx The context object this routine is working on. 241 * @param dst_addr The address in the remote PE of the object being written. 242 * @param size The number of bytes to be written. 243 * @param src_addr An address on the local PE holdng the value to be written. 244 * @param dst The remote PE to be written to. 245 * @param handle The address of a handle to be passed to shmem_wait_nb() or 246 * shmem_test_nb() to wait or poll for the completion of the transfer. 247 * @return OSHMEM_SUCCESS or failure status. 248 */ 249 typedef int (*mca_spml_base_module_put_nb_fn_t)(shmem_ctx_t ctx, 250 void *dst_addr, 251 size_t size, 252 void *src_addr, 253 int dst, 254 void **handle); 255 256 /** 257 * Blocking data transfer from remote PE. 258 * Read data from remote PE. 259 * 260 * @param ctx The context object this routine is working on. 261 * @param dst_addr The address on the local PE, to write the result of the get operation to. 262 * @param size The number of bytes to be read. 263 * @param src_addr The address on the remote PE, to read from. 264 * @param src The ID of the remote PE. 265 * @return OSHMEM_SUCCESS or failure status. 266 */ 267 typedef int (*mca_spml_base_module_get_fn_t)(shmem_ctx_t ctx, 268 void *dst_addr, 269 size_t size, 270 void *src_addr, 271 int src); 272 273 /** 274 * Non-blocking data transfer from remote PE. 275 * Read data from remote PE. 276 * 277 * @param ctx The context object this routine is working on. 278 * @param dst_addr The address on the local PE, to write the result of the get operation to. 279 * @param size The number of bytes to be read. 280 * @param src_addr The address on the remote PE, to read from. 281 * @param src The ID of the remote PE. 282 * @param handle The address of a handle to be passed to shmem_wait_nb() or 283 * shmem_test_nb() to wait or poll for the completion of the transfer. 284 * @return - OSHMEM_SUCCESS or failure status. 285 */ 286 typedef int (*mca_spml_base_module_get_nb_fn_t)(shmem_ctx_t ctx, 287 void *dst_addr, 288 size_t size, 289 void *src_addr, 290 int src, 291 void **handle); 292 293 /** 294 * Post a receive and wait for completion. 295 * 296 * @param buf (IN) User buffer. 297 * @param count (IN) The number of bytes to be sent. 298 * @param src (IN) The ID of the remote PE. 299 * @return OSHMEM_SUCCESS or failure status. 300 */ 301 typedef int (*mca_spml_base_module_recv_fn_t)(void *buf, size_t count, int src); 302 303 /** 304 * Post a send request and wait for completion. 305 * 306 * @param buf (IN) User buffer. 307 * @param count (IN) The number of bytes to be sent. 308 * @param dst (IN) The ID of the remote PE. 309 * @param mode (IN) Send mode (STANDARD,BUFFERED,SYNCHRONOUS,READY) 310 * @return OSHMEM_SUCCESS or failure status. 311 */ 312 typedef int (*mca_spml_base_module_send_fn_t)(void *buf, 313 size_t count, 314 int dst, 315 mca_spml_base_put_mode_t mode); 316 317 /** 318 * The routine transfers the data asynchronously from the source PE to all 319 * PEs in the OpenSHMEM job. The routine returns immediately. The source and 320 * target buffers are reusable only after the completion of the routine. 321 * After the data is transferred to the target buffers, the counter object 322 * is updated atomically. The counter object can be read either using atomic 323 * operations such as shmem_atomic_fetch or can use point-to-point synchronization 324 * routines such as shmem_wait_until and shmem_test. 325 * 326 * Shmem_quiet may be used for completing the operation, but not required for 327 * progress or completion. In a multithreaded OpenSHMEM program, the user 328 * (the OpenSHMEM program) should ensure the correct ordering of 329 * shmemx_alltoall_global calls. 330 * 331 * @param dest A symmetric data object that is large enough to receive 332 * “size” bytes of data from each PE in the OpenSHMEM job. 333 * @param source A symmetric data object that contains “size” bytes of data 334 * for each PE in the OpenSHMEM job. 335 * @param size The number of bytes to be sent to each PE in the job. 336 * @param counter A symmetric data object to be atomically incremented after 337 * the target buffer is updated. 338 * 339 * @return OSHMEM_SUCCESS or failure status. 340 */ 341 typedef int (*mca_spml_base_module_put_all_nb_fn_t)(void *dest, 342 const void *source, 343 size_t size, 344 long *counter); 345 346 /** 347 * Assures ordering of delivery of put() requests 348 * 349 * @param ctx - The context object this routine is working on. 350 * @return - OSHMEM_SUCCESS or failure status. 351 */ 352 typedef int (*mca_spml_base_module_fence_fn_t)(shmem_ctx_t ctx); 353 354 /** 355 * Wait for completion of all outstanding put() requests 356 * 357 * @param ctx - The context object this routine is working on. 358 * @return - OSHMEM_SUCCESS or failure status. 359 */ 360 typedef int (*mca_spml_base_module_quiet_fn_t)(shmem_ctx_t ctx); 361 362 /** 363 * Waits for completion of a non-blocking put or get issued by the calling PE. 364 * 365 * @return - OSHMEM_SUCCESS or failure status. 366 */ 367 typedef int (*mca_spml_base_module_wait_nb_fn_t)(void *); 368 369 /** 370 * Called by memheap when memory is allocated by shmalloc(), 371 * shcalloc(), shmemalign() or shrealloc() 372 * 373 * @param addr base address of the registered buffer. 374 * @param size the size of the buffer to be registered. 375 */ 376 typedef void (*mca_spml_base_module_memuse_hook_fn_t)(void *, size_t); 377 378 /** 379 * SPML instance. 380 */ 381 struct mca_spml_base_module_1_0_0_t { 382 383 mca_spml_base_module_add_procs_fn_t spml_add_procs; 384 mca_spml_base_module_del_procs_fn_t spml_del_procs; 385 386 mca_spml_base_module_enable_fn_t spml_enable; 387 mca_spml_base_module_register_fn_t spml_register; 388 mca_spml_base_module_deregister_fn_t spml_deregister; 389 mca_spml_base_module_oob_get_mkeys_fn_t spml_oob_get_mkeys; 390 391 mca_spml_base_module_ctx_create_fn_t spml_ctx_create; 392 mca_spml_base_module_ctx_destroy_fn_t spml_ctx_destroy; 393 394 mca_spml_base_module_put_fn_t spml_put; 395 mca_spml_base_module_put_nb_fn_t spml_put_nb; 396 mca_spml_base_module_get_fn_t spml_get; 397 mca_spml_base_module_get_nb_fn_t spml_get_nb; 398 399 mca_spml_base_module_recv_fn_t spml_recv; 400 mca_spml_base_module_send_fn_t spml_send; 401 402 mca_spml_base_module_wait_fn_t spml_wait; 403 mca_spml_base_module_wait_nb_fn_t spml_wait_nb; 404 mca_spml_base_module_test_fn_t spml_test; 405 mca_spml_base_module_fence_fn_t spml_fence; 406 mca_spml_base_module_quiet_fn_t spml_quiet; 407 408 mca_spml_base_module_mkey_unpack_fn_t spml_rmkey_unpack; 409 mca_spml_base_module_mkey_free_fn_t spml_rmkey_free; 410 mca_spml_base_module_mkey_ptr_fn_t spml_rmkey_ptr; 411 412 mca_spml_base_module_memuse_hook_fn_t spml_memuse_hook; 413 mca_spml_base_module_put_all_nb_fn_t spml_put_all_nb; 414 void *self; 415 }; 416 417 typedef struct mca_spml_base_module_1_0_0_t mca_spml_base_module_1_0_0_t; 418 typedef mca_spml_base_module_1_0_0_t mca_spml_base_module_t; 419 420 /* 421 * Macro for use in components that are of type spml 422 */ 423 #define MCA_SPML_BASE_VERSION_2_0_0 \ 424 OSHMEM_MCA_BASE_VERSION_2_1_0("spml", 2, 0, 0) 425 426 /* 427 * macro for doing direct call / call through struct 428 */ 429 #if MCA_oshmem_spml_DIRECT_CALL 430 431 #include MCA_oshmem_spml_DIRECT_CALL_HEADER 432 433 #define MCA_SPML_CALL_STAMP(a, b) mca_spml_ ## a ## _ ## b 434 #define MCA_SPML_CALL_EXPANDER(a, b) MCA_SPML_CALL_STAMP(a,b) 435 #define MCA_SPML_CALL(a) MCA_SPML_CALL_EXPANDER(MCA_oshmem_spml_DIRECT_CALL_COMPONENT, a) 436 437 #else 438 #define MCA_SPML_CALL(a) mca_spml.spml_ ## a 439 #endif 440 441 OSHMEM_DECLSPEC extern mca_spml_base_module_t mca_spml; 442 443 END_C_DECLS 444 #endif /* MCA_SPML_H */