1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2014-2018 Intel, Inc. All rights reserved. 4 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights 5 * reserved. 6 * $COPYRIGHT$ 7 * 8 * Additional copyrights may follow 9 * 10 * $HEADER$ 11 */ 12 13 #ifndef OPAL_PMIX_H 14 #define OPAL_PMIX_H 15 16 #include "opal_config.h" 17 #include "opal/types.h" 18 19 #ifdef HAVE_SYS_UN_H 20 #include <sys/un.h> 21 #endif 22 23 #include "opal/mca/mca.h" 24 #include "opal/mca/event/event.h" 25 #include "opal/dss/dss.h" 26 #include "opal/runtime/opal.h" 27 #include "opal/dss/dss.h" 28 #include "opal/util/error.h" 29 #include "opal/util/proc.h" 30 #include "opal/hash_string.h" 31 32 #include "opal/mca/pmix/pmix_types.h" 33 #include "opal/mca/pmix/pmix_server.h" 34 35 BEGIN_C_DECLS 36 37 /* provide access to the framework verbose output without 38 * exposing the entire base */ 39 extern int opal_pmix_verbose_output; 40 extern bool opal_pmix_collect_all_data; 41 extern bool opal_pmix_base_async_modex; 42 extern int opal_pmix_base_exchange(opal_value_t *info, 43 opal_pmix_pdata_t *pdat, 44 int timeout); 45 46 /* 47 * Count the fash for the the external RM 48 */ 49 #define OPAL_HASH_JOBID( str, hash ){ \ 50 OPAL_HASH_STR( str, hash ); \ 51 hash &= ~(0x8000); \ 52 } 53 54 /** 55 * Provide a simplified macro for sending data via modex 56 * to other processes. The macro requires four arguments: 57 * 58 * r - the integer return status from the modex op 59 * sc - the PMIX scope of the data 60 * s - the key to tag the data being posted 61 * d - pointer to the data object being posted 62 * t - the type of the data 63 */ 64 #define OPAL_MODEX_SEND_VALUE(r, sc, s, d, t) \ 65 do { \ 66 opal_value_t _kv; \ 67 OBJ_CONSTRUCT(&(_kv), opal_value_t); \ 68 _kv.key = (s); \ 69 if (OPAL_SUCCESS != ((r) = opal_value_load(&(_kv), (d), (t)))) { \ 70 OPAL_ERROR_LOG((r)); \ 71 } else { \ 72 if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \ 73 OPAL_ERROR_LOG((r)); \ 74 } \ 75 } \ 76 /* opal_value_load makes a copy of the data, so release it */ \ 77 _kv.key = NULL; \ 78 OBJ_DESTRUCT(&(_kv)); \ 79 } while(0); 80 81 /** 82 * Provide a simplified macro for sending data via modex 83 * to other processes. The macro requires four arguments: 84 * 85 * r - the integer return status from the modex op 86 * sc - the PMIX scope of the data 87 * s - the key to tag the data being posted 88 * d - the data object being posted 89 * sz - the number of bytes in the data object 90 */ 91 #define OPAL_MODEX_SEND_STRING(r, sc, s, d, sz) \ 92 do { \ 93 opal_value_t _kv; \ 94 OBJ_CONSTRUCT(&(_kv), opal_value_t); \ 95 _kv.key = (s); \ 96 _kv.type = OPAL_BYTE_OBJECT; \ 97 _kv.data.bo.bytes = (uint8_t*)(d); \ 98 _kv.data.bo.size = (sz); \ 99 if (OPAL_SUCCESS != ((r) = opal_pmix.put(sc, &(_kv)))) { \ 100 OPAL_ERROR_LOG((r)); \ 101 } \ 102 _kv.data.bo.bytes = NULL; /* protect the data */ \ 103 _kv.key = NULL; /* protect the key */ \ 104 OBJ_DESTRUCT(&(_kv)); \ 105 } while(0); 106 107 /** 108 * Provide a simplified macro for sending data via modex 109 * to other processes. The macro requires four arguments: 110 * 111 * r - the integer return status from the modex op 112 * sc - the PMIX scope of the data 113 * s - the MCA component that is posting the data 114 * d - the data object being posted 115 * sz - the number of bytes in the data object 116 */ 117 #define OPAL_MODEX_SEND(r, sc, s, d, sz) \ 118 do { \ 119 char *_key; \ 120 _key = mca_base_component_to_string((s)); \ 121 OPAL_MODEX_SEND_STRING((r), (sc), _key, (d), (sz)); \ 122 free(_key); \ 123 } while(0); 124 125 /** 126 * Provide a simplified macro for retrieving modex data 127 * from another process when we don't want the PMIx module 128 * to request it from the server if not found: 129 * 130 * r - the integer return status from the modex op (int) 131 * s - string key (char*) 132 * p - pointer to the opal_process_name_t of the proc that posted 133 * the data (opal_process_name_t*) 134 * d - pointer to a location wherein the data object 135 * is to be returned 136 * t - the expected data type 137 */ 138 #define OPAL_MODEX_RECV_VALUE_OPTIONAL(r, s, p, d, t) \ 139 do { \ 140 opal_value_t *_kv, *_info; \ 141 opal_list_t _ilist; \ 142 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ 143 "%s[%s:%d] MODEX RECV VALUE OPTIONAL FOR PROC %s KEY %s", \ 144 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ 145 __FILE__, __LINE__, \ 146 OPAL_NAME_PRINT(*(p)), (s))); \ 147 OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ 148 _info = OBJ_NEW(opal_value_t); \ 149 _info->key = strdup(OPAL_PMIX_OPTIONAL); \ 150 _info->type = OPAL_BOOL; \ 151 _info->data.flag = true; \ 152 opal_list_append(&(_ilist), &(_info)->super); \ 153 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \ 154 if (NULL == _kv) { \ 155 (r) = OPAL_ERR_NOT_FOUND; \ 156 } else { \ 157 (r) = opal_value_unload(_kv, (void**)(d), (t)); \ 158 OBJ_RELEASE(_kv); \ 159 } \ 160 } \ 161 OPAL_LIST_DESTRUCT(&(_ilist)); \ 162 } while(0); 163 164 /** 165 * Provide a simplified macro for retrieving modex data 166 * from another process when we want the PMIx module 167 * to request it from the server if not found, but do not 168 * want the server to go find it if the server doesn't 169 * already have it: 170 * 171 * r - the integer return status from the modex op (int) 172 * s - string key (char*) 173 * p - pointer to the opal_process_name_t of the proc that posted 174 * the data (opal_process_name_t*) 175 * d - pointer to a location wherein the data object 176 * is to be returned 177 * t - the expected data type 178 */ 179 #define OPAL_MODEX_RECV_VALUE_IMMEDIATE(r, s, p, d, t) \ 180 do { \ 181 opal_value_t *_kv, *_info; \ 182 opal_list_t _ilist; \ 183 opal_output_verbose(1, opal_pmix_verbose_output, \ 184 "%s[%s:%d] MODEX RECV VALUE IMMEDIATE FOR PROC %s KEY %s", \ 185 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ 186 __FILE__, __LINE__, \ 187 OPAL_NAME_PRINT(*(p)), (s)); \ 188 OBJ_CONSTRUCT(&(_ilist), opal_list_t); \ 189 _info = OBJ_NEW(opal_value_t); \ 190 _info->key = strdup(OPAL_PMIX_IMMEDIATE); \ 191 _info->type = OPAL_BOOL; \ 192 _info->data.flag = true; \ 193 opal_list_append(&(_ilist), &(_info)->super); \ 194 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), &(_ilist), &(_kv)))) { \ 195 if (NULL == _kv) { \ 196 (r) = OPAL_ERR_NOT_FOUND; \ 197 } else { \ 198 (r) = opal_value_unload(_kv, (void**)(d), (t)); \ 199 OBJ_RELEASE(_kv); \ 200 } \ 201 } \ 202 OPAL_LIST_DESTRUCT(&(_ilist)); \ 203 } while(0); 204 205 /** 206 * Provide a simplified macro for retrieving modex data 207 * from another process: 208 * 209 * r - the integer return status from the modex op (int) 210 * s - string key (char*) 211 * p - pointer to the opal_process_name_t of the proc that posted 212 * the data (opal_process_name_t*) 213 * d - pointer to a location wherein the data object 214 * is to be returned 215 * t - the expected data type 216 */ 217 #define OPAL_MODEX_RECV_VALUE(r, s, p, d, t) \ 218 do { \ 219 opal_value_t *_kv; \ 220 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ 221 "%s[%s:%d] MODEX RECV VALUE FOR PROC %s KEY %s", \ 222 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ 223 __FILE__, __LINE__, \ 224 OPAL_NAME_PRINT(*(p)), (s))); \ 225 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \ 226 if (NULL == _kv) { \ 227 (r) = OPAL_ERR_NOT_FOUND; \ 228 } else { \ 229 (r) = opal_value_unload(_kv, (void**)(d), (t)); \ 230 OBJ_RELEASE(_kv); \ 231 } \ 232 } \ 233 } while(0); 234 235 /** 236 * Provide a simplified macro for retrieving modex data 237 * from another process: 238 * 239 * r - the integer return status from the modex op (int) 240 * s - string key (char*) 241 * p - pointer to the opal_process_name_t of the proc that posted 242 * the data (opal_process_name_t*) 243 * d - pointer to a location wherein the data object 244 * it to be returned (char**) 245 * sz - pointer to a location wherein the number of bytes 246 * in the data object can be returned (size_t) 247 */ 248 #define OPAL_MODEX_RECV_STRING(r, s, p, d, sz) \ 249 do { \ 250 opal_value_t *_kv; \ 251 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ 252 "%s[%s:%d] MODEX RECV STRING FOR PROC %s KEY %s", \ 253 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ 254 __FILE__, __LINE__, \ 255 OPAL_NAME_PRINT(*(p)), (s))); \ 256 if (OPAL_SUCCESS == ((r) = opal_pmix.get((p), (s), NULL, &(_kv)))) { \ 257 if (NULL == _kv) { \ 258 *(sz) = 0; \ 259 (r) = OPAL_ERR_NOT_FOUND; \ 260 } else { \ 261 *(d) = _kv->data.bo.bytes; \ 262 *(sz) = _kv->data.bo.size; \ 263 _kv->data.bo.bytes = NULL; /* protect the data */ \ 264 OBJ_RELEASE(_kv); \ 265 } \ 266 } else { \ 267 *(sz) = 0; \ 268 (r) = OPAL_ERR_NOT_FOUND; \ 269 } \ 270 } while(0); 271 272 /** 273 * Provide a simplified macro for retrieving modex data 274 * from another process: 275 * 276 * r - the integer return status from the modex op (int) 277 * s - the MCA component that posted the data (mca_base_component_t*) 278 * p - pointer to the opal_process_name_t of the proc that posted 279 * the data (opal_process_name_t*) 280 * d - pointer to a location wherein the data object 281 * it to be returned (char**) 282 * sz - pointer to a location wherein the number of bytes 283 * in the data object can be returned (size_t) 284 */ 285 #define OPAL_MODEX_RECV(r, s, p, d, sz) \ 286 do { \ 287 char *_key; \ 288 _key = mca_base_component_to_string((s)); \ 289 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ 290 "%s[%s:%d] MODEX RECV FOR PROC %s KEY %s", \ 291 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ 292 __FILE__, __LINE__, \ 293 OPAL_NAME_PRINT(*(p)), _key)); \ 294 if (NULL == _key) { \ 295 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE); \ 296 (r) = OPAL_ERR_OUT_OF_RESOURCE; \ 297 } else { \ 298 OPAL_MODEX_RECV_STRING((r), _key, (p), (d), (sz)); \ 299 free(_key); \ 300 } \ 301 } while(0); 302 303 /** 304 * Provide a macro for accessing a base function that exchanges 305 * data values between two procs using the PMIx Publish/Lookup 306 * APIs */ 307 #define OPAL_PMIX_EXCHANGE(r, i, p, t) \ 308 do { \ 309 OPAL_OUTPUT_VERBOSE((1, opal_pmix_verbose_output, \ 310 "%s[%s:%d] EXCHANGE %s WITH %s", \ 311 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), \ 312 __FILE__, __LINE__, \ 313 (i)->key, (p)->value.key)); \ 314 (r) = opal_pmix_base_exchange((i), (p), (t)); \ 315 } while(0); 316 317 318 /************************************************************ 319 * CLIENT APIs * 320 ************************************************************/ 321 322 /* Initialize the PMIx client 323 * When called the client will check for the required connection 324 * information of the local server and will establish the connection. 325 * If the information is not found, or the server connection fails, then 326 * an appropriate error constant will be returned. 327 */ 328 typedef int (*opal_pmix_base_module_init_fn_t)(opal_list_t *ilist); 329 330 /* Finalize the PMIx client, closing the connection to the local server. 331 * An error code will be returned if, for some reason, the connection 332 * cannot be closed. */ 333 typedef int (*opal_pmix_base_module_fini_fn_t)(void); 334 335 /* Returns _true_ if the PMIx client has been successfully initialized, 336 * returns _false_ otherwise. Note that the function only reports the 337 * internal state of the PMIx client - it does not verify an active 338 * connection with the server, nor that the server is functional. */ 339 typedef int (*opal_pmix_base_module_initialized_fn_t)(void); 340 341 /* Request that the provided list of opal_namelist_t procs be aborted, returning the 342 * provided _status_ and printing the provided message. A _NULL_ 343 * for the proc list indicates that all processes in the caller's 344 * nspace are to be aborted. 345 * 346 * The response to this request is somewhat dependent on the specific resource 347 * manager and its configuration (e.g., some resource managers will 348 * not abort the application if the provided _status_ is zero unless 349 * specifically configured to do so), and thus lies outside the control 350 * of PMIx itself. However, the client will inform the RM of 351 * the request that the application be aborted, regardless of the 352 * value of the provided _status_. 353 * 354 * Passing a _NULL_ msg parameter is allowed. Note that race conditions 355 * caused by multiple processes calling PMIx_Abort are left to the 356 * server implementation to resolve with regard to which status is 357 * returned and what messages (if any) are printed. 358 */ 359 typedef int (*opal_pmix_base_module_abort_fn_t)(int status, const char *msg, 360 opal_list_t *procs); 361 362 /* Push all previously _PMIx_Put_ values to the local PMIx server. 363 * This is an asynchronous operation - the library will immediately 364 * return to the caller while the data is transmitted to the local 365 * server in the background */ 366 typedef int (*opal_pmix_base_module_commit_fn_t)(void); 367 368 /* Execute a blocking barrier across the processes identified in the 369 * specified list of opal_namelist_t. Passing a _NULL_ pointer 370 * indicates that the barrier is to span all processes in the client's 371 * namespace. Each provided opal_namelist_t can pass PMIX_RANK_WILDCARD to 372 * indicate that all processes in the given jobid are 373 * participating. 374 * 375 * The _collect_data_ parameter is passed to the server to indicate whether 376 * or not the barrier operation is to return the _put_ data from all 377 * participating processes. A value of _false_ indicates that the callback 378 * is just used as a release and no data is to be returned at that time. A 379 * value of _true_ indicates that all _put_ data is to be collected by the 380 * barrier. Returned data is locally cached so that subsequent calls to _PMIx_Get_ 381 * can be serviced without communicating to/from the server, but at the cost 382 * of increased memory footprint 383 */ 384 typedef int (*opal_pmix_base_module_fence_fn_t)(opal_list_t *procs, int collect_data); 385 386 /* Fence_nb */ 387 /* Non-blocking version of PMIx_Fence. Note that the function will return 388 * an error if a _NULL_ callback function is given. */ 389 typedef int (*opal_pmix_base_module_fence_nb_fn_t)(opal_list_t *procs, int collect_data, 390 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 391 392 /* Push a value into the client's namespace. The client library will cache 393 * the information locally until _PMIx_Commit_ is called. The provided scope 394 * value is passed to the local PMIx server, which will distribute the data 395 * as directed. */ 396 typedef int (*opal_pmix_base_module_put_fn_t)(opal_pmix_scope_t scope, 397 opal_value_t *val); 398 399 /* Retrieve information for the specified _key_ as published by the rank 400 * and jobid i the provided opal_process_name, and subject to any provided 401 * constraints, returning a pointer to the value in the given address. 402 * 403 * This is a blocking operation - the caller will block until 404 * the specified data has been _PMIx_Put_ by the specified rank. The caller is 405 * responsible for freeing all memory associated with the returned value when 406 * no longer required. */ 407 typedef int (*opal_pmix_base_module_get_fn_t)(const opal_process_name_t *proc, 408 const char *key, opal_list_t *info, 409 opal_value_t **val); 410 411 /* Retrieve information for the specified _key_ as published by the given rank 412 * and jobid in the opal_process_name_t, and subject to any provided 413 * constraints. This is a non-blocking operation - the 414 * callback function will be executed once the specified data has been _PMIx_Put_ 415 * by the specified proc and retrieved by the local server. */ 416 typedef int (*opal_pmix_base_module_get_nb_fn_t)(const opal_process_name_t *proc, 417 const char *key, opal_list_t *info, 418 opal_pmix_value_cbfunc_t cbfunc, void *cbdata); 419 420 /* Publish the given data to the "universal" nspace 421 * for lookup by others subject to the provided scope. 422 * Note that the keys must be unique within the specified 423 * scope or else an error will be returned (first published 424 * wins). Attempts to access the data by procs outside of 425 * the provided scope will be rejected. 426 * 427 * Note: Some host environments may support user/group level 428 * access controls on the information in addition to the scope. 429 * These can be specified in the info array using the appropriately 430 * defined keys. 431 * 432 * The persistence parameter instructs the server as to how long 433 * the data is to be retained, within the context of the scope. 434 * For example, data published within _PMIX_NAMESPACE_ will be 435 * deleted along with the namespace regardless of the persistence. 436 * However, data published within PMIX_USER would be retained if 437 * the persistence was set to _PMIX_PERSIST_SESSION_ until the 438 * allocation terminates. 439 * 440 * The blocking form will block until the server confirms that the 441 * data has been posted and is available. The non-blocking form will 442 * return immediately, executing the callback when the server confirms 443 * availability of the data */ 444 typedef int (*opal_pmix_base_module_publish_fn_t)(opal_list_t *info); 445 typedef int (*opal_pmix_base_module_publish_nb_fn_t)(opal_list_t *info, 446 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 447 448 /* Lookup information published by another process within the 449 * specified scope. A scope of _PMIX_SCOPE_UNDEF_ requests that 450 * the search be conducted across _all_ namespaces. The "data" 451 * parameter consists of an array of pmix_pdata_t struct with the 452 * keys specifying the requested information. Data will be returned 453 * for each key in the associated info struct - any key that cannot 454 * be found will return with a data type of "PMIX_UNDEF". The function 455 * will return SUCCESS if _any_ values can be found, so the caller 456 * must check each data element to ensure it was returned. 457 * 458 * The proc field in each pmix_pdata_t struct will contain the 459 * nspace/rank of the process that published the data. 460 * 461 * Note: although this is a blocking function, it will _not_ wait 462 * for the requested data to be published. Instead, it will block 463 * for the time required by the server to lookup its current data 464 * and return any found items. Thus, the caller is responsible for 465 * ensuring that data is published prior to executing a lookup, or 466 * for retrying until the requested data is found */ 467 typedef int (*opal_pmix_base_module_lookup_fn_t)(opal_list_t *data, 468 opal_list_t *info); 469 470 /* Non-blocking form of the _PMIx_Lookup_ function. Data for 471 * the provided NULL-terminated keys array will be returned 472 * in the provided callback function. The _wait_ parameter 473 * is used to indicate if the caller wishes the callback to 474 * wait for _all_ requested data before executing the callback 475 * (_true_), or to callback once the server returns whatever 476 * data is immediately available (_false_) */ 477 typedef int (*opal_pmix_base_module_lookup_nb_fn_t)(char **keys, opal_list_t *info, 478 opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata); 479 480 /* Unpublish data posted by this process using the given keys 481 * within the specified scope. The function will block until 482 * the data has been removed by the server. A value of _NULL_ 483 * for the keys parameter instructs the server to remove 484 * _all_ data published by this process within the given scope */ 485 typedef int (*opal_pmix_base_module_unpublish_fn_t)(char **keys, opal_list_t *info); 486 487 /* Non-blocking form of the _PMIx_Unpublish_ function. The 488 * callback function will be executed once the server confirms 489 * removal of the specified data. A value of _NULL_ 490 * for the keys parameter instructs the server to remove 491 * _all_ data published by this process within the given scope */ 492 typedef int (*opal_pmix_base_module_unpublish_nb_fn_t)(char **keys, opal_list_t *info, 493 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 494 495 /* Spawn a new job. The spawned applications are automatically 496 * connected to the calling process, and their assigned namespace 497 * is returned in the nspace parameter - a _NULL_ value in that 498 * location indicates that the caller doesn't wish to have the 499 * namespace returned. Behavior of individual resource managers 500 * may differ, but it is expected that failure of any application 501 * process to start will result in termination/cleanup of _all_ 502 * processes in the newly spawned job and return of an error 503 * code to the caller */ 504 typedef int (*opal_pmix_base_module_spawn_fn_t)(opal_list_t *job_info, 505 opal_list_t *apps, 506 opal_jobid_t *jobid); 507 508 /* Non-blocking form of the _PMIx_Spawn_ function. The callback 509 * will be executed upon launch of the specified applications, 510 * or upon failure to launch any of them. */ 511 typedef int (*opal_pmix_base_module_spawn_nb_fn_t)(opal_list_t *job_info, 512 opal_list_t *apps, 513 opal_pmix_spawn_cbfunc_t cbfunc, 514 void *cbdata); 515 516 /* Record the specified processes as "connected". Both blocking and non-blocking 517 * versions are provided. This means that the resource manager should treat the 518 * failure of any process in the specified group as a reportable event, and take 519 * appropriate action. Note that different resource managers may respond to 520 * failures in different manners. 521 * 522 * The list is to be provided as opal_namelist_t objects 523 * 524 * The callback function is to be called once all participating processes have 525 * called connect. The server is required to return any job-level info for the 526 * connecting processes that might not already have - i.e., if the connect 527 * request involves procs from different nspaces, then each proc shall receive 528 * the job-level info from those nspaces other than their own. 529 * 530 * Note: a process can only engage in _one_ connect operation involving the identical 531 * set of ranges at a time. However, a process _can_ be simultaneously engaged 532 * in multiple connect operations, each involving a different set of ranges */ 533 typedef int (*opal_pmix_base_module_connect_fn_t)(opal_list_t *procs); 534 535 typedef int (*opal_pmix_base_module_connect_nb_fn_t)(opal_list_t *procs, 536 opal_pmix_op_cbfunc_t cbfunc, 537 void *cbdata); 538 539 /* Disconnect a previously connected set of processes. An error will be returned 540 * if the specified set of procs was not previously "connected". As above, a process 541 * may be involved in multiple simultaneous disconnect operations. However, a process 542 * is not allowed to reconnect to a set of procs that has not fully completed 543 * disconnect - i.e., you have to fully disconnect before you can reconnect to the 544 * _same_ group of processes. */ 545 typedef int (*opal_pmix_base_module_disconnect_fn_t)(opal_list_t *procs); 546 547 typedef int (*opal_pmix_base_module_disconnect_nb_fn_t)(opal_list_t *procs, 548 opal_pmix_op_cbfunc_t cbfunc, 549 void *cbdata); 550 551 /* Given a node name, return an array of processes within the specified jobid 552 * on that node. If the jobid is OPAL_JOBID_WILDCARD, then all processes on the node will 553 * be returned. If the specified node does not currently host any processes, 554 * then the returned list will be empty. 555 */ 556 typedef int (*opal_pmix_base_module_resolve_peers_fn_t)(const char *nodename, 557 opal_jobid_t jobid, 558 opal_list_t *procs); 559 560 561 /* Given a jobid, return the list of nodes hosting processes within 562 * that jobid. The returned string will contain a comma-delimited list 563 * of nodenames. The caller is responsible for releasing the string 564 * when done with it */ 565 typedef int (*opal_pmix_base_module_resolve_nodes_fn_t)(opal_jobid_t jobid, char **nodelist); 566 567 568 /************************************************************ 569 * SERVER APIs * 570 * * 571 * These are calls that go down (or "south") from the ORTE * 572 * daemon into the PMIx server library * 573 ************************************************************/ 574 575 /* Initialize the server support library - must pass the callback 576 * module for the server to use, plus any attributes we want to 577 * pass down to it */ 578 typedef int (*opal_pmix_base_module_server_init_fn_t)(opal_pmix_server_module_t *module, 579 opal_list_t *info); 580 581 /* Finalize the server support library */ 582 typedef int (*opal_pmix_base_module_server_finalize_fn_t)(void); 583 584 /* given a semicolon-separated list of input values, generate 585 * a regex that can be passed down to the client for parsing. 586 * The caller is responsible for free'ing the resulting 587 * string 588 * 589 * If values have leading zero's, then that is preserved. You 590 * have to add back any prefix/suffix for node names 591 * odin[009-015,017-023,076-086] 592 * 593 * "pmix:odin[009-015,017-023,076-086]" 594 * 595 * Note that the "pmix" at the beginning of each regex indicates 596 * that the PMIx native parser is to be used by the client for 597 * parsing the provided regex. Other parsers may be supported - see 598 * the pmix_client.h header for a list. 599 */ 600 typedef int (*opal_pmix_base_module_generate_regex_fn_t)(const char *input, char **regex); 601 602 /* The input is expected to consist of a comma-separated list 603 * of ranges. Thus, an input of: 604 * "1-4;2-5;8,10,11,12;6,7,9" 605 * would generate a regex of 606 * "[pmix:2x(3);8,10-12;6-7,9]" 607 * 608 * Note that the "pmix" at the beginning of each regex indicates 609 * that the PMIx native parser is to be used by the client for 610 * parsing the provided regex. Other parsers may be supported - see 611 * the pmix_client.h header for a list. 612 */ 613 typedef int (*opal_pmix_base_module_generate_ppn_fn_t)(const char *input, char **ppn); 614 615 /* Setup the data about a particular nspace so it can 616 * be passed to any child process upon startup. The PMIx 617 * connection procedure provides an opportunity for the 618 * host PMIx server to pass job-related info down to a 619 * child process. This might include the number of 620 * processes in the job, relative local ranks of the 621 * processes within the job, and other information of 622 * use to the process. The server is free to determine 623 * which, if any, of the supported elements it will 624 * provide - defined values are provided in pmix_common.h. 625 * 626 * NOTE: the server must register ALL nspaces that will 627 * participate in collective operations with local processes. 628 * This means that the server must register an nspace even 629 * if it will not host any local procs from within that 630 * nspace IF any local proc might at some point perform 631 * a collective operation involving one or more procs from 632 * that nspace. This is necessary so that the collective 633 * operation can know when it is locally complete. 634 * 635 * The caller must also provide the number of local procs 636 * that will be launched within this nspace. This is required 637 * for the PMIx server library to correctly handle collectives 638 * as a collective operation call can occur before all the 639 * procs have been started */ 640 typedef int (*opal_pmix_base_module_server_register_nspace_fn_t)(opal_jobid_t jobid, 641 int nlocalprocs, 642 opal_list_t *info, 643 opal_pmix_op_cbfunc_t cbfunc, 644 void *cbdata); 645 646 /* Deregister an nspace. Instruct the PMIx server to purge 647 * all info relating to the provided jobid so that memory 648 * can be freed. Note that the server will automatically 649 * purge all info relating to any clients it has from 650 * this nspace */ 651 typedef void (*opal_pmix_base_module_server_deregister_nspace_fn_t)(opal_jobid_t jobid, 652 opal_pmix_op_cbfunc_t cbfunc, 653 void *cbdata); 654 655 /* Register a client process with the PMIx server library. The 656 * expected user ID and group ID of the child process helps the 657 * server library to properly authenticate clients as they connect 658 * by requiring the two values to match. 659 * 660 * The host server can also, if it desires, provide an object 661 * it wishes to be returned when a server function is called 662 * that relates to a specific process. For example, the host 663 * server may have an object that tracks the specific client. 664 * Passing the object to the library allows the library to 665 * return that object when the client calls "finalize", thus 666 * allowing the host server to access the object without 667 * performing a lookup. */ 668 typedef int (*opal_pmix_base_module_server_register_client_fn_t)(const opal_process_name_t *proc, 669 uid_t uid, gid_t gid, 670 void *server_object, 671 opal_pmix_op_cbfunc_t cbfunc, 672 void *cbdata); 673 674 /* Deregister a client. Instruct the PMIx server to purge 675 * all info relating to the provided client so that memory 676 * can be freed. As per above note, the server will automatically 677 * free all client-related data when the nspace is deregistered, 678 * so there is no need to call this function during normal 679 * finalize operations. Instead, this is provided for use 680 * during exception operations */ 681 typedef void (*opal_pmix_base_module_server_deregister_client_fn_t)(const opal_process_name_t *proc, 682 opal_pmix_op_cbfunc_t cbfunc, 683 void *cbdata); 684 685 /* Setup the environment of a child process to be forked 686 * by the host so it can correctly interact with the PMIx 687 * server. The PMIx client needs some setup information 688 * so it can properly connect back to the server. This function 689 * will set appropriate environmental variables for this purpose. */ 690 typedef int (*opal_pmix_base_module_server_setup_fork_fn_t)(const opal_process_name_t *proc, char ***env); 691 692 /* Define a function by which the host server can request modex data 693 * from the local PMIx server. This is used to support the direct modex 694 * operation - i.e., where data is cached locally on each PMIx 695 * server for its own local clients, and is obtained on-demand 696 * for remote requests. Upon receiving a request from a remote 697 * server, the host server will call this function to pass the 698 * request into the PMIx server. The PMIx server will return a blob 699 * (once it becomes available) via the cbfunc - the host 700 * server shall send the blob back to the original requestor */ 701 typedef int (*opal_pmix_base_module_server_dmodex_request_fn_t)(const opal_process_name_t *proc, 702 opal_pmix_modex_cbfunc_t cbfunc, 703 void *cbdata); 704 705 /* Report an event to a process for notification via any 706 * registered event handler. The handler registration can be 707 * called by both the server and the client application. On the 708 * server side, the handler is used to report events detected 709 * by PMIx to the host server for handling. On the client side, 710 * the handler is used to notify the process of events 711 * reported by the server - e.g., the failure of another process. 712 * 713 * This function allows the host server to direct the server 714 * convenience library to notify all registered local procs of 715 * an event. The event can be local, or anywhere in the cluster. 716 * The status indicates the event being reported. 717 * 718 * The source parameter informs the handler of the source that 719 * generated the event. This will be NULL if the event came 720 * from the external resource manager. 721 * 722 * The info array contains any further info the RM can and/or chooses 723 * to provide. 724 * 725 * The callback function will be called upon completion of the 726 * notify_event function's actions. Note that any messages will 727 * have been queued, but may not have been transmitted by this 728 * time. Note that the caller is required to maintain the input 729 * data until the callback function has been executed if this 730 * function returns OPAL_SUCCESS! */ 731 typedef int (*opal_pmix_base_module_server_notify_event_fn_t)(int status, 732 const opal_process_name_t *source, 733 opal_list_t *info, 734 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 735 736 /* push IO to local clients */ 737 typedef int (*opal_pmix_base_module_server_push_io_fn_t)(const opal_process_name_t *source, 738 opal_pmix_iof_channel_t channel, 739 unsigned char *data, size_t nbytes); 740 741 /* define a callback function for the setup_application API. The returned info 742 * array is owned by the PMIx server library and will be free'd when the 743 * provided cbfunc is called. */ 744 typedef void (*opal_pmix_setup_application_cbfunc_t)(int status, 745 opal_list_t *info, 746 void *provided_cbdata, 747 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 748 749 /* Provide a function by which we can request 750 * any application-specific environmental variables prior to 751 * launch of an application. For example, network libraries may 752 * opt to provide security credentials for the application. This 753 * is defined as a non-blocking operation in case network 754 * libraries need to perform some action before responding. The 755 * returned env will be distributed along with the application */ 756 typedef int (*opal_pmix_server_setup_application_fn_t)(opal_jobid_t jobid, 757 opal_list_t *info, 758 opal_pmix_setup_application_cbfunc_t cbfunc, void *cbdata); 759 760 /* Provide a function by which the local PMIx server can perform 761 * any application-specific operations prior to spawning local 762 * clients of a given application. For example, a network library 763 * might need to setup the local driver for "instant on" addressing. 764 */ 765 typedef int (*opal_pmix_server_setup_local_support_fn_t)(opal_jobid_t jobid, 766 opal_list_t *info, 767 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 768 769 770 /************************************************************ 771 * TOOL APIs * 772 ************************************************************/ 773 /* Initialize the PMIx tool support 774 * When called the library will check for the required connection 775 * information of the local server and will establish the connection. 776 * The connection info can be provided either in the environment or 777 * in the list of attributes. If the information is not found, or the 778 * server connection fails, then an appropriate error constant will 779 * be returned. 780 */ 781 typedef int (*opal_pmix_base_module_tool_init_fn_t)(opal_list_t *ilist); 782 783 /* Finalize the PMIx tool support */ 784 typedef int (*opal_pmix_base_module_tool_fini_fn_t)(void); 785 786 787 /************************************************************ 788 * UTILITY APIs * 789 ************************************************************/ 790 791 /* get the version of the embedded library */ 792 typedef const char* (*opal_pmix_base_module_get_version_fn_t)(void); 793 794 /* Register an event handler to report event. Three types of events 795 * can be reported: 796 * 797 * (a) those that occur within the client library, but are not 798 * reportable via the API itself (e.g., loss of connection to 799 * the server). These events typically occur during behind-the-scenes 800 * non-blocking operations. 801 * 802 * (b) job-related events such as the failure of another process in 803 * the job or in any connected job, impending failure of hardware 804 * within the job's usage footprint, etc. 805 * 806 * (c) system notifications that are made available by the local 807 * administrators 808 * 809 * By default, only events that directly affect the process and/or 810 * any process to which it is connected (via the PMIx_Connect call) 811 * will be reported. Options to modify that behavior can be provided 812 * in the info array 813 * 814 * Both the client application and the resource manager can register 815 * event handlers for specific events. PMIx client/server calls the registered 816 * event handler upon receiving event notify notification (via PMIx_Notify_event) 817 * from the other end (Resource Manager/Client application). 818 * 819 * Multiple event handlers can be registered for different events. PMIX returns 820 * a size_t reference to each register handler in the callback fn. The caller 821 * must retain the reference in order to deregister the evhandler. 822 * Modification of the notification behavior can be accomplished by 823 * deregistering the current evhandler, and then registering it 824 * using a new set of info values. 825 * 826 * A NULL for event_codes indicates registration as a default event handler 827 * 828 * See pmix_types.h for a description of the notification function */ 829 typedef void (*opal_pmix_base_module_register_fn_t)(opal_list_t *event_codes, 830 opal_list_t *info, 831 opal_pmix_notification_fn_t evhandler, 832 opal_pmix_evhandler_reg_cbfunc_t cbfunc, 833 void *cbdata); 834 835 /* deregister the evhandler 836 * evhandler_ref is the reference returned by PMIx for the evhandler 837 * to pmix_evhandler_reg_cbfunc_t */ 838 typedef void (*opal_pmix_base_module_deregister_fn_t)(size_t evhandler, 839 opal_pmix_op_cbfunc_t cbfunc, 840 void *cbdata); 841 842 /* Report an event for notification via any 843 * registered evhandler. On the PMIx 844 * server side, this is used to report events detected 845 * by PMIx to the host server for handling and/or distribution. 846 * 847 * The client application can also call this function to notify the 848 * resource manager of an event it detected. It can specify the 849 * range over which that notification should occur. 850 * 851 * The info array contains any further info the caller can and/or chooses 852 * to provide. 853 * 854 * The callback function will be called upon completion of the 855 * notify_event function's actions. Note that any messages will 856 * have been queued, but may not have been transmitted by this 857 * time. Note that the caller is required to maintain the input 858 * data until the callback function has been executed if it 859 * returns OPAL_SUCCESS! 860 */ 861 typedef int (*opal_pmix_base_module_notify_event_fn_t)(int status, 862 const opal_process_name_t *source, 863 opal_pmix_data_range_t range, 864 opal_list_t *info, 865 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 866 867 /* store data internally, but don't push it out to be shared - this is 868 * intended solely for storage of info on other procs that comes thru 869 * a non-PMIx channel (e.g., may be computed locally) but is desired 870 * to be available via a PMIx_Get call */ 871 typedef int (*opal_pmix_base_module_store_fn_t)(const opal_process_name_t *proc, 872 opal_value_t *val); 873 874 /* retrieve the nspace corresponding to a given jobid */ 875 typedef const char* (*opal_pmix_base_module_get_nspace_fn_t)(opal_jobid_t jobid); 876 877 /* register a jobid-to-nspace pair */ 878 typedef void (*opal_pmix_base_module_register_jobid_fn_t)(opal_jobid_t jobid, const char *nspace); 879 880 /* query information from the system */ 881 typedef void (*opal_pmix_base_module_query_fn_t)(opal_list_t *queries, 882 opal_pmix_info_cbfunc_t cbfunc, void *cbdata); 883 884 /* log data to the system */ 885 typedef void (*opal_pmix_base_log_fn_t)(opal_list_t *info, 886 opal_pmix_op_cbfunc_t cbfunc, void *cbdata); 887 888 /* allocation */ 889 typedef int (*opal_pmix_base_alloc_fn_t)(opal_pmix_alloc_directive_t directive, 890 opal_list_t *info, 891 opal_pmix_info_cbfunc_t cbfunc, void *cbdata); 892 893 /* job control */ 894 typedef int (*opal_pmix_base_job_control_fn_t)(opal_list_t *targets, 895 opal_list_t *directives, 896 opal_pmix_info_cbfunc_t cbfunc, void *cbdata); 897 898 /* monitoring */ 899 typedef int (*opal_pmix_base_process_monitor_fn_t)(opal_list_t *monitor, 900 opal_list_t *directives, 901 opal_pmix_info_cbfunc_t cbfunc, void *cbdata); 902 903 /* register cleanup */ 904 typedef int (*opal_pmix_base_register_cleanup_fn_t)(char *path, bool directory, bool ignore, bool jobscope); 905 906 typedef bool (*opal_pmix_base_legacy_get_fn_t)(void); 907 908 /* 909 * the standard public API data structure 910 */ 911 typedef struct { 912 opal_pmix_base_legacy_get_fn_t legacy_get; 913 /* client APIs */ 914 opal_pmix_base_module_init_fn_t init; 915 opal_pmix_base_module_fini_fn_t finalize; 916 opal_pmix_base_module_initialized_fn_t initialized; 917 opal_pmix_base_module_abort_fn_t abort; 918 opal_pmix_base_module_commit_fn_t commit; 919 opal_pmix_base_module_fence_fn_t fence; 920 opal_pmix_base_module_fence_nb_fn_t fence_nb; 921 opal_pmix_base_module_put_fn_t put; 922 opal_pmix_base_module_get_fn_t get; 923 opal_pmix_base_module_get_nb_fn_t get_nb; 924 opal_pmix_base_module_publish_fn_t publish; 925 opal_pmix_base_module_publish_nb_fn_t publish_nb; 926 opal_pmix_base_module_lookup_fn_t lookup; 927 opal_pmix_base_module_lookup_nb_fn_t lookup_nb; 928 opal_pmix_base_module_unpublish_fn_t unpublish; 929 opal_pmix_base_module_unpublish_nb_fn_t unpublish_nb; 930 opal_pmix_base_module_spawn_fn_t spawn; 931 opal_pmix_base_module_spawn_nb_fn_t spawn_nb; 932 opal_pmix_base_module_connect_fn_t connect; 933 opal_pmix_base_module_connect_nb_fn_t connect_nb; 934 opal_pmix_base_module_disconnect_fn_t disconnect; 935 opal_pmix_base_module_disconnect_nb_fn_t disconnect_nb; 936 opal_pmix_base_module_resolve_peers_fn_t resolve_peers; 937 opal_pmix_base_module_resolve_nodes_fn_t resolve_nodes; 938 opal_pmix_base_module_query_fn_t query; 939 opal_pmix_base_log_fn_t log; 940 opal_pmix_base_alloc_fn_t allocate; 941 opal_pmix_base_job_control_fn_t job_control; 942 opal_pmix_base_process_monitor_fn_t monitor; 943 opal_pmix_base_register_cleanup_fn_t register_cleanup; 944 /* server APIs */ 945 opal_pmix_base_module_server_init_fn_t server_init; 946 opal_pmix_base_module_server_finalize_fn_t server_finalize; 947 opal_pmix_base_module_generate_regex_fn_t generate_regex; 948 opal_pmix_base_module_generate_ppn_fn_t generate_ppn; 949 opal_pmix_base_module_server_register_nspace_fn_t server_register_nspace; 950 opal_pmix_base_module_server_deregister_nspace_fn_t server_deregister_nspace; 951 opal_pmix_base_module_server_register_client_fn_t server_register_client; 952 opal_pmix_base_module_server_deregister_client_fn_t server_deregister_client; 953 opal_pmix_base_module_server_setup_fork_fn_t server_setup_fork; 954 opal_pmix_base_module_server_dmodex_request_fn_t server_dmodex_request; 955 opal_pmix_base_module_server_notify_event_fn_t server_notify_event; 956 opal_pmix_base_module_server_push_io_fn_t server_iof_push; 957 opal_pmix_server_setup_application_fn_t server_setup_application; 958 opal_pmix_server_setup_local_support_fn_t server_setup_local_support; 959 /* tool APIs */ 960 opal_pmix_base_module_tool_init_fn_t tool_init; 961 opal_pmix_base_module_tool_fini_fn_t tool_finalize; 962 /* Utility APIs */ 963 opal_pmix_base_module_get_version_fn_t get_version; 964 opal_pmix_base_module_register_fn_t register_evhandler; 965 opal_pmix_base_module_deregister_fn_t deregister_evhandler; 966 opal_pmix_base_module_notify_event_fn_t notify_event; 967 opal_pmix_base_module_store_fn_t store_local; 968 opal_pmix_base_module_get_nspace_fn_t get_nspace; 969 opal_pmix_base_module_register_jobid_fn_t register_jobid; 970 } opal_pmix_base_module_t; 971 972 typedef struct { 973 mca_base_component_t base_version; 974 mca_base_component_data_t base_data; 975 int priority; 976 } opal_pmix_base_component_t; 977 978 /* 979 * Macro for use in components that are of type pmix 980 */ 981 #define OPAL_PMIX_BASE_VERSION_2_0_0 \ 982 OPAL_MCA_BASE_VERSION_2_1_0("pmix", 2, 0, 0) 983 984 /* Global structure for accessing store functions */ 985 OPAL_DECLSPEC extern opal_pmix_base_module_t opal_pmix; /* holds base function pointers */ 986 987 END_C_DECLS 988 989 #endif