1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
4 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
5 * reserved.
6 * $COPYRIGHT$
7 *
8 * Additional copyrights may follow
9 *
10 * $HEADER$
11 */
12
13 #ifndef OPAL_PMIX_H
14 #define OPAL_PMIX_H
15
16 #include "opal_config.h"
17 #include "opal/types.h"
18
19 #ifdef HAVE_SYS_UN_H
20 #include <sys/un.h>
21 #endif
22
23 #include "opal/mca/mca.h"
24 #include "opal/mca/event/event.h"
25 #include "opal/dss/dss.h"
26 #include "opal/runtime/opal.h"
27 #include "opal/dss/dss.h"
28 #include "opal/util/error.h"
29 #include "opal/util/proc.h"
30 #include "opal/hash_string.h"
31
32 #include "opal/mca/pmix/pmix_types.h"
33 #include "opal/mca/pmix/pmix_server.h"
34
35 BEGIN_C_DECLS
36
37 /* provide access to the framework verbose output without
38 * exposing the entire base */
39 extern int opal_pmix_verbose_output;
40 extern bool opal_pmix_collect_all_data;
41 extern bool opal_pmix_base_async_modex;
42 extern int opal_pmix_base_exchange(opal_value_t *info,
43 opal_pmix_pdata_t *pdat,
44 int timeout);
45
46 /*
47 * Count the fash for the the external RM
48 */
49 #define OPAL_HASH_JOBID( str, hash ){ \
50 OPAL_HASH_STR( str, hash ); \
51 hash &= ~(0x8000); \
52 }
53
54 /**
55 * Provide a simplified macro for sending data via modex
56 * to other processes. The macro requires four arguments:
57 *
58 * r - the integer return status from the modex op
59 * sc - the PMIX scope of the data
60 * s - the key to tag the data being posted
61 * d - pointer to the data object being posted
62 * t - the type of the data
63 */
64 #define OPAL_MODEX_SEND_VALUE(r, sc, s, d, t) \
65 do { \
66 opal_value_t _kv; \
67 OBJ_CONSTRUCT(&(_kv), opal_value_t); \
68 _kv.key = (s); \
69 if (OPAL_SUCCESS != ((r) = opal_value_load(&(_kv), (d), (t)))) { \
70 OPAL_ERROR_LOG((r)); \
71 } else { \
72 if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \
73 OPAL_ERROR_LOG((r)); \
74 } \
75 } \
76 /* opal_value_load makes a copy of the data, so release it */ \
77 _kv.key = NULL; \
78 OBJ_DESTRUCT(&(_kv)); \
79 } while(0);
80
81 /**
82 * Provide a simplified macro for sending data via modex
83 * to other processes. The macro requires four arguments:
84 *
85 * r - the integer return status from the modex op
86 * sc - the PMIX scope of the data
87 * s - the key to tag the data being posted
88 * d - the data object being posted
89 * sz - the number of bytes in the data object
90 */
91 #define OPAL_MODEX_SEND_STRING(r, sc, s, d, sz) \
92 do { \
93 opal_value_t _kv; \
94 OBJ_CONSTRUCT(&(_kv), opal_value_t); \
95 _kv.key = (s); \
96 _kv.type = OPAL_BYTE_OBJECT; \
97 _kv.data.bo.bytes = (uint8_t*)(d); \
98 _kv.data.bo.size = (sz); \
99 if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \
100 OPAL_ERROR_LOG((r)); \
101 } \
102 _kv.data.bo.bytes = NULL; /* protect the data */ \
103 _kv.key = NULL; /* protect the key */ \
104 OBJ_DESTRUCT(&(_kv)); \
105 } while(0);
106
107 /**
108 * Provide a simplified macro for sending data via modex
109 * to other processes. The macro requires four arguments:
110 *
111 * r - the integer return status from the modex op
112 * sc - the PMIX scope of the data
113 * s - the MCA component that is posting the data
114 * d - the data object being posted
115 * sz - the number of bytes in the data object
116 */
117 #define OPAL_MODEX_SEND(r, sc, s, d, sz) \
118 do { \
119 char *_key; \
120 _key = mca_base_component_to_string((s)); \
121 OPAL_MODEX_SEND_STRING((r), (sc), _key, (d), (sz)); \
122 free(_key); \
123 } while(0);
124
125 /**
126 * Provide a simplified macro for retrieving modex data
127 * from another process when we don't want the PMIx module
128 * to request it from the server if not found:
129 *
130 * r - the integer return status from the modex op (int)
131 * s - string key (char*)
132 * p - pointer to the opal_process_name_t of the proc that posted
133 * the data (opal_process_name_t*)
134 * d - pointer to a location wherein the data object
135 * is to be returned
136 * t - the expected data type
137 */
138 #define OPAL_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \
139 do { \
140 opal_value_t *_kv, *_info; \
141 opal_list_t _ilist; \
142 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
143 "%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \
144 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
145 __FILE__, __LINE__, \
146 OPAL_NAME_PRINT(*(p)), (s))); \
147 OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
148 _info = OBJ_NEW(opal_value_t); \
149 _info->key = strdup(OPAL_PMIX_OPTIONAL); \
150 _info->type = OPAL_BOOL; \
151 _info->data.flag = true; \
152 opal_list_append(&(_ilist), &(_info)->super); \
153 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
154 if (NULL == _kv) { \
155 (r) = OPAL_ERR_NOT_FOUND; \
156 } else { \
157 (r) = opal_value_unload(_kv, (void**)(d), (t)); \
158 OBJ_RELEASE(_kv); \
159 } \
160 } \
161 OPAL_LIST_DESTRUCT(&(_ilist)); \
162 } while(0);
163
164 /**
165 * Provide a simplified macro for retrieving modex data
166 * from another process when we want the PMIx module
167 * to request it from the server if not found, but do not
168 * want the server to go find it if the server doesn't
169 * already have it:
170 *
171 * r - the integer return status from the modex op (int)
172 * s - string key (char*)
173 * p - pointer to the opal_process_name_t of the proc that posted
174 * the data (opal_process_name_t*)
175 * d - pointer to a location wherein the data object
176 * is to be returned
177 * t - the expected data type
178 */
179 #define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \
180 do { \
181 opal_value_t *_kv, *_info; \
182 opal_list_t _ilist; \
183 opal_output_verbose(1, opal_pmix_verbose_output, \
184 "%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \
185 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
186 __FILE__, __LINE__, \
187 OPAL_NAME_PRINT(*(p)), (s)); \
188 OBJ_CONSTRUCT(&(_ilist), opal_list_t); \
189 _info = OBJ_NEW(opal_value_t); \
190 _info->key = strdup(OPAL_PMIX_IMMEDIATE); \
191 _info->type = OPAL_BOOL; \
192 _info->data.flag = true; \
193 opal_list_append(&(_ilist), &(_info)->super); \
194 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \
195 if (NULL == _kv) { \
196 (r) = OPAL_ERR_NOT_FOUND; \
197 } else { \
198 (r) = opal_value_unload(_kv, (void**)(d), (t)); \
199 OBJ_RELEASE(_kv); \
200 } \
201 } \
202 OPAL_LIST_DESTRUCT(&(_ilist)); \
203 } while(0);
204
205 /**
206 * Provide a simplified macro for retrieving modex data
207 * from another process:
208 *
209 * r - the integer return status from the modex op (int)
210 * s - string key (char*)
211 * p - pointer to the opal_process_name_t of the proc that posted
212 * the data (opal_process_name_t*)
213 * d - pointer to a location wherein the data object
214 * is to be returned
215 * t - the expected data type
216 */
217 #define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \
218 do { \
219 opal_value_t *_kv; \
220 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
221 "%s[%s:%d] MODEX RECV VALUE FOR PROC %s KEY %s", \
222 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
223 __FILE__, __LINE__, \
224 OPAL_NAME_PRINT(*(p)), (s))); \
225 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \
226 if (NULL == _kv) { \
227 (r) = OPAL_ERR_NOT_FOUND; \
228 } else { \
229 (r) = opal_value_unload(_kv, (void**)(d), (t)); \
230 OBJ_RELEASE(_kv); \
231 } \
232 } \
233 } while(0);
234
235 /**
236 * Provide a simplified macro for retrieving modex data
237 * from another process:
238 *
239 * r - the integer return status from the modex op (int)
240 * s - string key (char*)
241 * p - pointer to the opal_process_name_t of the proc that posted
242 * the data (opal_process_name_t*)
243 * d - pointer to a location wherein the data object
244 * it to be returned (char**)
245 * sz - pointer to a location wherein the number of bytes
246 * in the data object can be returned (size_t)
247 */
248 #define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \
249 do { \
250 opal_value_t *_kv; \
251 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
252 "%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \
253 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
254 __FILE__, __LINE__, \
255 OPAL_NAME_PRINT(*(p)), (s))); \
256 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \
257 if (NULL == _kv) { \
258 *(sz) = 0; \
259 (r) = OPAL_ERR_NOT_FOUND; \
260 } else { \
261 *(d) = _kv->data.bo.bytes; \
262 *(sz) = _kv->data.bo.size; \
263 _kv->data.bo.bytes = NULL; /* protect the data */ \
264 OBJ_RELEASE(_kv); \
265 } \
266 } else { \
267 *(sz) = 0; \
268 (r) = OPAL_ERR_NOT_FOUND; \
269 } \
270 } while(0);
271
272 /**
273 * Provide a simplified macro for retrieving modex data
274 * from another process:
275 *
276 * r - the integer return status from the modex op (int)
277 * s - the MCA component that posted the data (mca_base_component_t*)
278 * p - pointer to the opal_process_name_t of the proc that posted
279 * the data (opal_process_name_t*)
280 * d - pointer to a location wherein the data object
281 * it to be returned (char**)
282 * sz - pointer to a location wherein the number of bytes
283 * in the data object can be returned (size_t)
284 */
285 #define OPAL_MODEX_RECV(r, s, p, d, sz) \
286 do { \
287 char *_key; \
288 _key = mca_base_component_to_string((s)); \
289 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
290 "%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \
291 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
292 __FILE__, __LINE__, \
293 OPAL_NAME_PRINT(*(p)), _key)); \
294 if (NULL == _key) { \
295 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \
296 (r) = OPAL_ERR_OUT_OF_RESOURCE; \
297 } else { \
298 OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \
299 free(_key); \
300 } \
301 } while(0);
302
303 /**
304 * Provide a macro for accessing a base function that exchanges
305 * data values between two procs using the PMIx Publish/Lookup
306 * APIs */
307 #define OPAL_PMIX_EXCHANGE(r, i, p, t) \
308 do { \
309 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \
310 "%s[%s:%d] EXCHANGE %s WITH %s", \
311 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \
312 __FILE__, __LINE__, \
313 (i)->key, (p)->value.key)); \
314 (r) = opal_pmix_base_exchange((i), (p), (t)); \
315 } while(0);
316
317
318 /************************************************************
319 * CLIENT APIs *
320 ************************************************************/
321
322 /* Initialize the PMIx client
323 * When called the client will check for the required connection
324 * information of the local server and will establish the connection.
325 * If the information is not found, or the server connection fails, then
326 * an appropriate error constant will be returned.
327 */
328 typedef int (*opal_pmix_base_module_init_fn_t)(opal_list_t *ilist);
329
330 /* Finalize the PMIx client, closing the connection to the local server.
331 * An error code will be returned if, for some reason, the connection
332 * cannot be closed. */
333 typedef int (*opal_pmix_base_module_fini_fn_t)(void);
334
335 /* Returns _true_ if the PMIx client has been successfully initialized,
336 * returns _false_ otherwise. Note that the function only reports the
337 * internal state of the PMIx client - it does not verify an active
338 * connection with the server, nor that the server is functional. */
339 typedef int (*opal_pmix_base_module_initialized_fn_t)(void);
340
341 /* Request that the provided list of opal_namelist_t procs be aborted, returning the
342 * provided _status_ and printing the provided message. A _NULL_
343 * for the proc list indicates that all processes in the caller's
344 * nspace are to be aborted.
345 *
346 * The response to this request is somewhat dependent on the specific resource
347 * manager and its configuration (e.g., some resource managers will
348 * not abort the application if the provided _status_ is zero unless
349 * specifically configured to do so), and thus lies outside the control
350 * of PMIx itself. However, the client will inform the RM of
351 * the request that the application be aborted, regardless of the
352 * value of the provided _status_.
353 *
354 * Passing a _NULL_ msg parameter is allowed. Note that race conditions
355 * caused by multiple processes calling PMIx_Abort are left to the
356 * server implementation to resolve with regard to which status is
357 * returned and what messages (if any) are printed.
358 */
359 typedef int (*opal_pmix_base_module_abort_fn_t)(int status, const char *msg,
360 opal_list_t *procs);
361
362 /* Push all previously _PMIx_Put_ values to the local PMIx server.
363 * This is an asynchronous operation - the library will immediately
364 * return to the caller while the data is transmitted to the local
365 * server in the background */
366 typedef int (*opal_pmix_base_module_commit_fn_t)(void);
367
368 /* Execute a blocking barrier across the processes identified in the
369 * specified list of opal_namelist_t. Passing a _NULL_ pointer
370 * indicates that the barrier is to span all processes in the client's
371 * namespace. Each provided opal_namelist_t can pass PMIX_RANK_WILDCARD to
372 * indicate that all processes in the given jobid are
373 * participating.
374 *
375 * The _collect_data_ parameter is passed to the server to indicate whether
376 * or not the barrier operation is to return the _put_ data from all
377 * participating processes. A value of _false_ indicates that the callback
378 * is just used as a release and no data is to be returned at that time. A
379 * value of _true_ indicates that all _put_ data is to be collected by the
380 * barrier. Returned data is locally cached so that subsequent calls to _PMIx_Get_
381 * can be serviced without communicating to/from the server, but at the cost
382 * of increased memory footprint
383 */
384 typedef int (*opal_pmix_base_module_fence_fn_t)(opal_list_t *procs, int collect_data);
385
386 /* Fence_nb */
387 /* Non-blocking version of PMIx_Fence. Note that the function will return
388 * an error if a _NULL_ callback function is given. */
389 typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_list_t *procs, int collect_data,
390 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
391
392 /* Push a value into the client's namespace. The client library will cache
393 * the information locally until _PMIx_Commit_ is called. The provided scope
394 * value is passed to the local PMIx server, which will distribute the data
395 * as directed. */
396 typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope,
397 opal_value_t *val);
398
399 /* Retrieve information for the specified _key_ as published by the rank
400 * and jobid i the provided opal_process_name, and subject to any provided
401 * constraints, returning a pointer to the value in the given address.
402 *
403 * This is a blocking operation - the caller will block until
404 * the specified data has been _PMIx_Put_ by the specified rank. The caller is
405 * responsible for freeing all memory associated with the returned value when
406 * no longer required. */
407 typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc,
408 const char *key, opal_list_t *info,
409 opal_value_t **val);
410
411 /* Retrieve information for the specified _key_ as published by the given rank
412 * and jobid in the opal_process_name_t, and subject to any provided
413 * constraints. This is a non-blocking operation - the
414 * callback function will be executed once the specified data has been _PMIx_Put_
415 * by the specified proc and retrieved by the local server. */
416 typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc,
417 const char *key, opal_list_t *info,
418 opal_pmix_value_cbfunc_t cbfunc, void *cbdata);
419
420 /* Publish the given data to the "universal" nspace
421 * for lookup by others subject to the provided scope.
422 * Note that the keys must be unique within the specified
423 * scope or else an error will be returned (first published
424 * wins). Attempts to access the data by procs outside of
425 * the provided scope will be rejected.
426 *
427 * Note: Some host environments may support user/group level
428 * access controls on the information in addition to the scope.
429 * These can be specified in the info array using the appropriately
430 * defined keys.
431 *
432 * The persistence parameter instructs the server as to how long
433 * the data is to be retained, within the context of the scope.
434 * For example, data published within _PMIX_NAMESPACE_ will be
435 * deleted along with the namespace regardless of the persistence.
436 * However, data published within PMIX_USER would be retained if
437 * the persistence was set to _PMIX_PERSIST_SESSION_ until the
438 * allocation terminates.
439 *
440 * The blocking form will block until the server confirms that the
441 * data has been posted and is available. The non-blocking form will
442 * return immediately, executing the callback when the server confirms
443 * availability of the data */
444 typedef int (*opal_pmix_base_module_publish_fn_t)(opal_list_t *info);
445 typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_list_t *info,
446 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
447
448 /* Lookup information published by another process within the
449 * specified scope. A scope of _PMIX_SCOPE_UNDEF_ requests that
450 * the search be conducted across _all_ namespaces. The "data"
451 * parameter consists of an array of pmix_pdata_t struct with the
452 * keys specifying the requested information. Data will be returned
453 * for each key in the associated info struct - any key that cannot
454 * be found will return with a data type of "PMIX_UNDEF". The function
455 * will return SUCCESS if _any_ values can be found, so the caller
456 * must check each data element to ensure it was returned.
457 *
458 * The proc field in each pmix_pdata_t struct will contain the
459 * nspace/rank of the process that published the data.
460 *
461 * Note: although this is a blocking function, it will _not_ wait
462 * for the requested data to be published. Instead, it will block
463 * for the time required by the server to lookup its current data
464 * and return any found items. Thus, the caller is responsible for
465 * ensuring that data is published prior to executing a lookup, or
466 * for retrying until the requested data is found */
467 typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_list_t *data,
468 opal_list_t *info);
469
470 /* Non-blocking form of the _PMIx_Lookup_ function. Data for
471 * the provided NULL-terminated keys array will be returned
472 * in the provided callback function. The _wait_ parameter
473 * is used to indicate if the caller wishes the callback to
474 * wait for _all_ requested data before executing the callback
475 * (_true_), or to callback once the server returns whatever
476 * data is immediately available (_false_) */
477 typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(char **keys, opal_list_t *info,
478 opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
479
480 /* Unpublish data posted by this process using the given keys
481 * within the specified scope. The function will block until
482 * the data has been removed by the server. A value of _NULL_
483 * for the keys parameter instructs the server to remove
484 * _all_ data published by this process within the given scope */
485 typedef int (*opal_pmix_base_module_unpublish_fn_t)(char **keys, opal_list_t *info);
486
487 /* Non-blocking form of the _PMIx_Unpublish_ function. The
488 * callback function will be executed once the server confirms
489 * removal of the specified data. A value of _NULL_
490 * for the keys parameter instructs the server to remove
491 * _all_ data published by this process within the given scope */
492 typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(char **keys, opal_list_t *info,
493 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
494
495 /* Spawn a new job. The spawned applications are automatically
496 * connected to the calling process, and their assigned namespace
497 * is returned in the nspace parameter - a _NULL_ value in that
498 * location indicates that the caller doesn't wish to have the
499 * namespace returned. Behavior of individual resource managers
500 * may differ, but it is expected that failure of any application
501 * process to start will result in termination/cleanup of _all_
502 * processes in the newly spawned job and return of an error
503 * code to the caller */
504 typedef int (*opal_pmix_base_module_spawn_fn_t)(opal_list_t *job_info,
505 opal_list_t *apps,
506 opal_jobid_t *jobid);
507
508 /* Non-blocking form of the _PMIx_Spawn_ function. The callback
509 * will be executed upon launch of the specified applications,
510 * or upon failure to launch any of them. */
511 typedef int (*opal_pmix_base_module_spawn_nb_fn_t)(opal_list_t *job_info,
512 opal_list_t *apps,
513 opal_pmix_spawn_cbfunc_t cbfunc,
514 void *cbdata);
515
516 /* Record the specified processes as "connected". Both blocking and non-blocking
517 * versions are provided. This means that the resource manager should treat the
518 * failure of any process in the specified group as a reportable event, and take
519 * appropriate action. Note that different resource managers may respond to
520 * failures in different manners.
521 *
522 * The list is to be provided as opal_namelist_t objects
523 *
524 * The callback function is to be called once all participating processes have
525 * called connect. The server is required to return any job-level info for the
526 * connecting processes that might not already have - i.e., if the connect
527 * request involves procs from different nspaces, then each proc shall receive
528 * the job-level info from those nspaces other than their own.
529 *
530 * Note: a process can only engage in _one_ connect operation involving the identical
531 * set of ranges at a time. However, a process _can_ be simultaneously engaged
532 * in multiple connect operations, each involving a different set of ranges */
533 typedef int (*opal_pmix_base_module_connect_fn_t)(opal_list_t *procs);
534
535 typedef int (*opal_pmix_base_module_connect_nb_fn_t)(opal_list_t *procs,
536 opal_pmix_op_cbfunc_t cbfunc,
537 void *cbdata);
538
539 /* Disconnect a previously connected set of processes. An error will be returned
540 * if the specified set of procs was not previously "connected". As above, a process
541 * may be involved in multiple simultaneous disconnect operations. However, a process
542 * is not allowed to reconnect to a set of procs that has not fully completed
543 * disconnect - i.e., you have to fully disconnect before you can reconnect to the
544 * _same_ group of processes. */
545 typedef int (*opal_pmix_base_module_disconnect_fn_t)(opal_list_t *procs);
546
547 typedef int (*opal_pmix_base_module_disconnect_nb_fn_t)(opal_list_t *procs,
548 opal_pmix_op_cbfunc_t cbfunc,
549 void *cbdata);
550
551 /* Given a node name, return an array of processes within the specified jobid
552 * on that node. If the jobid is OPAL_JOBID_WILDCARD, then all processes on the node will
553 * be returned. If the specified node does not currently host any processes,
554 * then the returned list will be empty.
555 */
556 typedef int (*opal_pmix_base_module_resolve_peers_fn_t)(const char *nodename,
557 opal_jobid_t jobid,
558 opal_list_t *procs);
559
560
561 /* Given a jobid, return the list of nodes hosting processes within
562 * that jobid. The returned string will contain a comma-delimited list
563 * of nodenames. The caller is responsible for releasing the string
564 * when done with it */
565 typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char **nodelist);
566
567
568 /************************************************************
569 * SERVER APIs *
570 * *
571 * These are calls that go down (or "south") from the ORTE *
572 * daemon into the PMIx server library *
573 ************************************************************/
574
575 /* Initialize the server support library - must pass the callback
576 * module for the server to use, plus any attributes we want to
577 * pass down to it */
578 typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module,
579 opal_list_t *info);
580
581 /* Finalize the server support library */
582 typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void);
583
584 /* given a semicolon-separated list of input values, generate
585 * a regex that can be passed down to the client for parsing.
586 * The caller is responsible for free'ing the resulting
587 * string
588 *
589 * If values have leading zero's, then that is preserved. You
590 * have to add back any prefix/suffix for node names
591 * odin[009-015,017-023,076-086]
592 *
593 * "pmix:odin[009-015,017-023,076-086]"
594 *
595 * Note that the "pmix" at the beginning of each regex indicates
596 * that the PMIx native parser is to be used by the client for
597 * parsing the provided regex. Other parsers may be supported - see
598 * the pmix_client.h header for a list.
599 */
600 typedef int (*opal_pmix_base_module_generate_regex_fn_t)(const char *input, char **regex);
601
602 /* The input is expected to consist of a comma-separated list
603 * of ranges. Thus, an input of:
604 * "1-4;2-5;8,10,11,12;6,7,9"
605 * would generate a regex of
606 * "[pmix:2x(3);8,10-12;6-7,9]"
607 *
608 * Note that the "pmix" at the beginning of each regex indicates
609 * that the PMIx native parser is to be used by the client for
610 * parsing the provided regex. Other parsers may be supported - see
611 * the pmix_client.h header for a list.
612 */
613 typedef int (*opal_pmix_base_module_generate_ppn_fn_t)(const char *input, char **ppn);
614
615 /* Setup the data about a particular nspace so it can
616 * be passed to any child process upon startup. The PMIx
617 * connection procedure provides an opportunity for the
618 * host PMIx server to pass job-related info down to a
619 * child process. This might include the number of
620 * processes in the job, relative local ranks of the
621 * processes within the job, and other information of
622 * use to the process. The server is free to determine
623 * which, if any, of the supported elements it will
624 * provide - defined values are provided in pmix_common.h.
625 *
626 * NOTE: the server must register ALL nspaces that will
627 * participate in collective operations with local processes.
628 * This means that the server must register an nspace even
629 * if it will not host any local procs from within that
630 * nspace IF any local proc might at some point perform
631 * a collective operation involving one or more procs from
632 * that nspace. This is necessary so that the collective
633 * operation can know when it is locally complete.
634 *
635 * The caller must also provide the number of local procs
636 * that will be launched within this nspace. This is required
637 * for the PMIx server library to correctly handle collectives
638 * as a collective operation call can occur before all the
639 * procs have been started */
640 typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jobid,
641 int nlocalprocs,
642 opal_list_t *info,
643 opal_pmix_op_cbfunc_t cbfunc,
644 void *cbdata);
645
646 /* Deregister an nspace. Instruct the PMIx server to purge
647 * all info relating to the provided jobid so that memory
648 * can be freed. Note that the server will automatically
649 * purge all info relating to any clients it has from
650 * this nspace */
651 typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid,
652 opal_pmix_op_cbfunc_t cbfunc,
653 void *cbdata);
654
655 /* Register a client process with the PMIx server library. The
656 * expected user ID and group ID of the child process helps the
657 * server library to properly authenticate clients as they connect
658 * by requiring the two values to match.
659 *
660 * The host server can also, if it desires, provide an object
661 * it wishes to be returned when a server function is called
662 * that relates to a specific process. For example, the host
663 * server may have an object that tracks the specific client.
664 * Passing the object to the library allows the library to
665 * return that object when the client calls "finalize", thus
666 * allowing the host server to access the object without
667 * performing a lookup. */
668 typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_process_name_t *proc,
669 uid_t uid, gid_t gid,
670 void *server_object,
671 opal_pmix_op_cbfunc_t cbfunc,
672 void *cbdata);
673
674 /* Deregister a client. Instruct the PMIx server to purge
675 * all info relating to the provided client so that memory
676 * can be freed. As per above note, the server will automatically
677 * free all client-related data when the nspace is deregistered,
678 * so there is no need to call this function during normal
679 * finalize operations. Instead, this is provided for use
680 * during exception operations */
681 typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc,
682 opal_pmix_op_cbfunc_t cbfunc,
683 void *cbdata);
684
685 /* Setup the environment of a child process to be forked
686 * by the host so it can correctly interact with the PMIx
687 * server. The PMIx client needs some setup information
688 * so it can properly connect back to the server. This function
689 * will set appropriate environmental variables for this purpose. */
690 typedef int (*opal_pmix_base_module_server_setup_fork_fn_t)(const opal_process_name_t *proc, char ***env);
691
692 /* Define a function by which the host server can request modex data
693 * from the local PMIx server. This is used to support the direct modex
694 * operation - i.e., where data is cached locally on each PMIx
695 * server for its own local clients, and is obtained on-demand
696 * for remote requests. Upon receiving a request from a remote
697 * server, the host server will call this function to pass the
698 * request into the PMIx server. The PMIx server will return a blob
699 * (once it becomes available) via the cbfunc - the host
700 * server shall send the blob back to the original requestor */
701 typedef int (*opal_pmix_base_module_server_dmodex_request_fn_t)(const opal_process_name_t *proc,
702 opal_pmix_modex_cbfunc_t cbfunc,
703 void *cbdata);
704
705 /* Report an event to a process for notification via any
706 * registered event handler. The handler registration can be
707 * called by both the server and the client application. On the
708 * server side, the handler is used to report events detected
709 * by PMIx to the host server for handling. On the client side,
710 * the handler is used to notify the process of events
711 * reported by the server - e.g., the failure of another process.
712 *
713 * This function allows the host server to direct the server
714 * convenience library to notify all registered local procs of
715 * an event. The event can be local, or anywhere in the cluster.
716 * The status indicates the event being reported.
717 *
718 * The source parameter informs the handler of the source that
719 * generated the event. This will be NULL if the event came
720 * from the external resource manager.
721 *
722 * The info array contains any further info the RM can and/or chooses
723 * to provide.
724 *
725 * The callback function will be called upon completion of the
726 * notify_event function's actions. Note that any messages will
727 * have been queued, but may not have been transmitted by this
728 * time. Note that the caller is required to maintain the input
729 * data until the callback function has been executed if this
730 * function returns OPAL_SUCCESS! */
731 typedef int (*opal_pmix_base_module_server_notify_event_fn_t)(int status,
732 const opal_process_name_t *source,
733 opal_list_t *info,
734 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
735
736 /* push IO to local clients */
737 typedef int (*opal_pmix_base_module_server_push_io_fn_t)(const opal_process_name_t *source,
738 opal_pmix_iof_channel_t channel,
739 unsigned char *data, size_t nbytes);
740
741 /* define a callback function for the setup_application API. The returned info
742 * array is owned by the PMIx server library and will be free'd when the
743 * provided cbfunc is called. */
744 typedef void (*opal_pmix_setup_application_cbfunc_t)(int status,
745 opal_list_t *info,
746 void *provided_cbdata,
747 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
748
749 /* Provide a function by which we can request
750 * any application-specific environmental variables prior to
751 * launch of an application. For example, network libraries may
752 * opt to provide security credentials for the application. This
753 * is defined as a non-blocking operation in case network
754 * libraries need to perform some action before responding. The
755 * returned env will be distributed along with the application */
756 typedef int (*opal_pmix_server_setup_application_fn_t)(opal_jobid_t jobid,
757 opal_list_t *info,
758 opal_pmix_setup_application_cbfunc_t cbfunc, void *cbdata);
759
760 /* Provide a function by which the local PMIx server can perform
761 * any application-specific operations prior to spawning local
762 * clients of a given application. For example, a network library
763 * might need to setup the local driver for "instant on" addressing.
764 */
765 typedef int (*opal_pmix_server_setup_local_support_fn_t)(opal_jobid_t jobid,
766 opal_list_t *info,
767 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
768
769
770 /************************************************************
771 * TOOL APIs *
772 ************************************************************/
773 /* Initialize the PMIx tool support
774 * When called the library will check for the required connection
775 * information of the local server and will establish the connection.
776 * The connection info can be provided either in the environment or
777 * in the list of attributes. If the information is not found, or the
778 * server connection fails, then an appropriate error constant will
779 * be returned.
780 */
781 typedef int (*opal_pmix_base_module_tool_init_fn_t)(opal_list_t *ilist);
782
783 /* Finalize the PMIx tool support */
784 typedef int (*opal_pmix_base_module_tool_fini_fn_t)(void);
785
786
787 /************************************************************
788 * UTILITY APIs *
789 ************************************************************/
790
791 /* get the version of the embedded library */
792 typedef const char* (*opal_pmix_base_module_get_version_fn_t)(void);
793
794 /* Register an event handler to report event. Three types of events
795 * can be reported:
796 *
797 * (a) those that occur within the client library, but are not
798 * reportable via the API itself (e.g., loss of connection to
799 * the server). These events typically occur during behind-the-scenes
800 * non-blocking operations.
801 *
802 * (b) job-related events such as the failure of another process in
803 * the job or in any connected job, impending failure of hardware
804 * within the job's usage footprint, etc.
805 *
806 * (c) system notifications that are made available by the local
807 * administrators
808 *
809 * By default, only events that directly affect the process and/or
810 * any process to which it is connected (via the PMIx_Connect call)
811 * will be reported. Options to modify that behavior can be provided
812 * in the info array
813 *
814 * Both the client application and the resource manager can register
815 * event handlers for specific events. PMIx client/server calls the registered
816 * event handler upon receiving event notify notification (via PMIx_Notify_event)
817 * from the other end (Resource Manager/Client application).
818 *
819 * Multiple event handlers can be registered for different events. PMIX returns
820 * a size_t reference to each register handler in the callback fn. The caller
821 * must retain the reference in order to deregister the evhandler.
822 * Modification of the notification behavior can be accomplished by
823 * deregistering the current evhandler, and then registering it
824 * using a new set of info values.
825 *
826 * A NULL for event_codes indicates registration as a default event handler
827 *
828 * See pmix_types.h for a description of the notification function */
829 typedef void (*opal_pmix_base_module_register_fn_t)(opal_list_t *event_codes,
830 opal_list_t *info,
831 opal_pmix_notification_fn_t evhandler,
832 opal_pmix_evhandler_reg_cbfunc_t cbfunc,
833 void *cbdata);
834
835 /* deregister the evhandler
836 * evhandler_ref is the reference returned by PMIx for the evhandler
837 * to pmix_evhandler_reg_cbfunc_t */
838 typedef void (*opal_pmix_base_module_deregister_fn_t)(size_t evhandler,
839 opal_pmix_op_cbfunc_t cbfunc,
840 void *cbdata);
841
842 /* Report an event for notification via any
843 * registered evhandler. On the PMIx
844 * server side, this is used to report events detected
845 * by PMIx to the host server for handling and/or distribution.
846 *
847 * The client application can also call this function to notify the
848 * resource manager of an event it detected. It can specify the
849 * range over which that notification should occur.
850 *
851 * The info array contains any further info the caller can and/or chooses
852 * to provide.
853 *
854 * The callback function will be called upon completion of the
855 * notify_event function's actions. Note that any messages will
856 * have been queued, but may not have been transmitted by this
857 * time. Note that the caller is required to maintain the input
858 * data until the callback function has been executed if it
859 * returns OPAL_SUCCESS!
860 */
861 typedef int (*opal_pmix_base_module_notify_event_fn_t)(int status,
862 const opal_process_name_t *source,
863 opal_pmix_data_range_t range,
864 opal_list_t *info,
865 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
866
867 /* store data internally, but don't push it out to be shared - this is
868 * intended solely for storage of info on other procs that comes thru
869 * a non-PMIx channel (e.g., may be computed locally) but is desired
870 * to be available via a PMIx_Get call */
871 typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc,
872 opal_value_t *val);
873
874 /* retrieve the nspace corresponding to a given jobid */
875 typedef const char* (*opal_pmix_base_module_get_nspace_fn_t)(opal_jobid_t jobid);
876
877 /* register a jobid-to-nspace pair */
878 typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, const char *nspace);
879
880 /* query information from the system */
881 typedef void (*opal_pmix_base_module_query_fn_t)(opal_list_t *queries,
882 opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
883
884 /* log data to the system */
885 typedef void (*opal_pmix_base_log_fn_t)(opal_list_t *info,
886 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
887
888 /* allocation */
889 typedef int (*opal_pmix_base_alloc_fn_t)(opal_pmix_alloc_directive_t directive,
890 opal_list_t *info,
891 opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
892
893 /* job control */
894 typedef int (*opal_pmix_base_job_control_fn_t)(opal_list_t *targets,
895 opal_list_t *directives,
896 opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
897
898 /* monitoring */
899 typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor,
900 opal_list_t *directives,
901 opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
902
903 /* register cleanup */
904 typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool directory, bool ignore, bool jobscope);
905
906 typedef bool (*opal_pmix_base_legacy_get_fn_t)(void);
907
908 /*
909 * the standard public API data structure
910 */
911 typedef struct {
912 opal_pmix_base_legacy_get_fn_t legacy_get;
913 /* client APIs */
914 opal_pmix_base_module_init_fn_t init;
915 opal_pmix_base_module_fini_fn_t finalize;
916 opal_pmix_base_module_initialized_fn_t initialized;
917 opal_pmix_base_module_abort_fn_t abort;
918 opal_pmix_base_module_commit_fn_t commit;
919 opal_pmix_base_module_fence_fn_t fence;
920 opal_pmix_base_module_fence_nb_fn_t fence_nb;
921 opal_pmix_base_module_put_fn_t put;
922 opal_pmix_base_module_get_fn_t get;
923 opal_pmix_base_module_get_nb_fn_t get_nb;
924 opal_pmix_base_module_publish_fn_t publish;
925 opal_pmix_base_module_publish_nb_fn_t publish_nb;
926 opal_pmix_base_module_lookup_fn_t lookup;
927 opal_pmix_base_module_lookup_nb_fn_t lookup_nb;
928 opal_pmix_base_module_unpublish_fn_t unpublish;
929 opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb;
930 opal_pmix_base_module_spawn_fn_t spawn;
931 opal_pmix_base_module_spawn_nb_fn_t spawn_nb;
932 opal_pmix_base_module_connect_fn_t connect;
933 opal_pmix_base_module_connect_nb_fn_t connect_nb;
934 opal_pmix_base_module_disconnect_fn_t disconnect;
935 opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb;
936 opal_pmix_base_module_resolve_peers_fn_t resolve_peers;
937 opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes;
938 opal_pmix_base_module_query_fn_t query;
939 opal_pmix_base_log_fn_t log;
940 opal_pmix_base_alloc_fn_t allocate;
941 opal_pmix_base_job_control_fn_t job_control;
942 opal_pmix_base_process_monitor_fn_t monitor;
943 opal_pmix_base_register_cleanup_fn_t register_cleanup;
944 /* server APIs */
945 opal_pmix_base_module_server_init_fn_t server_init;
946 opal_pmix_base_module_server_finalize_fn_t server_finalize;
947 opal_pmix_base_module_generate_regex_fn_t generate_regex;
948 opal_pmix_base_module_generate_ppn_fn_t generate_ppn;
949 opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace;
950 opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace;
951 opal_pmix_base_module_server_register_client_fn_t server_register_client;
952 opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client;
953 opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork;
954 opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request;
955 opal_pmix_base_module_server_notify_event_fn_t server_notify_event;
956 opal_pmix_base_module_server_push_io_fn_t server_iof_push;
957 opal_pmix_server_setup_application_fn_t server_setup_application;
958 opal_pmix_server_setup_local_support_fn_t server_setup_local_support;
959 /* tool APIs */
960 opal_pmix_base_module_tool_init_fn_t tool_init;
961 opal_pmix_base_module_tool_fini_fn_t tool_finalize;
962 /* Utility APIs */
963 opal_pmix_base_module_get_version_fn_t get_version;
964 opal_pmix_base_module_register_fn_t register_evhandler;
965 opal_pmix_base_module_deregister_fn_t deregister_evhandler;
966 opal_pmix_base_module_notify_event_fn_t notify_event;
967 opal_pmix_base_module_store_fn_t store_local;
968 opal_pmix_base_module_get_nspace_fn_t get_nspace;
969 opal_pmix_base_module_register_jobid_fn_t register_jobid;
970 } opal_pmix_base_module_t;
971
972 typedef struct {
973 mca_base_component_t base_version;
974 mca_base_component_data_t base_data;
975 int priority;
976 } opal_pmix_base_component_t;
977
978 /*
979 * Macro for use in components that are of type pmix
980 */
981 #define OPAL_PMIX_BASE_VERSION_2_0_0 \
982 OPAL_MCA_BASE_VERSION_2_1_0("pmix", 2, 0, 0)
983
984 /* Global structure for accessing store functions */
985 OPAL_DECLSPEC extern opal_pmix_base_module_t opal_pmix; /* holds base function pointers */
986
987 END_C_DECLS
988
989 #endif