root/opal/mca/pmix/pmix4x/pmix/include/pmix.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2013-2018 Intel, Inc.  All rights reserved.
   3  * Copyright (c) 2016      Research Organization for Information Science
   4  *                         and Technology (RIST). All rights reserved.
   5  *
   6  * Redistribution and use in source and binary forms, with or without
   7  * modification, are permitted provided that the following conditions are
   8  * met:
   9  *
  10  * - Redistributions of source code must retain the above copyright
  11  *   notice, this list of conditions and the following disclaimer.
  12  *
  13  * - Redistributions in binary form must reproduce the above copyright
  14  *   notice, this list of conditions and the following disclaimer listed
  15  *   in this license in the documentation and/or other materials
  16  *   provided with the distribution.
  17  *
  18  * - Neither the name of the copyright holders nor the names of its
  19  *   contributors may be used to endorse or promote products derived from
  20  *   this software without specific prior written permission.
  21  *
  22  * The copyright holders provide no reassurances that the source code
  23  * provided does not infringe any patent, copyright, or any other
  24  * intellectual property rights of third parties.  The copyright holders
  25  * disclaim any liability to any recipient for claims brought against
  26  * recipient by any third party for infringement of that parties
  27  * intellectual property rights.
  28  *
  29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  30  * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  31  * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
  32  * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  33  * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  34  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
  35  * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  36  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  37  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  38  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  39  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  40  *
  41  * $COPYRIGHT$
  42  *
  43  * Additional copyrights may follow
  44  *
  45  * $HEADER$
  46  */
  47 
  48 #ifndef PMIx_H
  49 #define PMIx_H
  50 
  51 /* Structure and constant definitions */
  52 #include <pmix_common.h>
  53 
  54 #if defined(c_plusplus) || defined(__cplusplus)
  55 extern "C" {
  56 #endif
  57 
  58 /****    PMIX API    ****/
  59 
  60 /* Initialize the PMIx client, returning the process identifier assigned
  61  * to this client's application in the provided pmix_proc_t struct.
  62  * Passing a parameter of _NULL_ for this parameter is allowed if the user
  63  * wishes solely to initialize the PMIx system and does not require
  64  * return of the identifier at that time.
  65  *
  66  * When called the PMIx client will check for the required connection
  67  * information of the local PMIx server and will establish the connection.
  68  * If the information is not found, or the server connection fails, then
  69  * an appropriate error constant will be returned.
  70  *
  71  * If successful, the function will return PMIX_SUCCESS and will fill the
  72  * provided structure with the server-assigned namespace and rank of the
  73  * process within the application.
  74  *
  75  * Note that the PMIx client library is referenced counted, and so multiple
  76  * calls to PMIx_Init are allowed. Thus, one way to obtain the namespace and
  77  * rank of the process is to simply call PMIx_Init with a non-NULL parameter.
  78  *
  79  * The info array is used to pass user requests pertaining to the init
  80  * and subsequent operations. Pass a _NULL_ value for the array pointer
  81  * is supported if no directives are desired.
  82  */
  83 PMIX_EXPORT pmix_status_t PMIx_Init(pmix_proc_t *proc,
  84                                     pmix_info_t info[], size_t ninfo);
  85 
  86 /* Finalize the PMIx client, closing the connection to the local server.
  87  * An error code will be returned if, for some reason, the connection
  88  * cannot be closed.
  89  *
  90  * The info array is used to pass user requests regarding the finalize
  91  * operation. This can include:
  92  *
  93  * (a) PMIX_EMBED_BARRIER - By default, PMIx_Finalize does not include an
  94  * internal barrier operation. This attribute directs PMIx_Finalize to
  95  * execute a barrier as part of the finalize operation.
  96  */
  97 PMIX_EXPORT pmix_status_t PMIx_Finalize(const pmix_info_t info[], size_t ninfo);
  98 
  99 
 100 /* Returns _true_ if the PMIx client has been successfully initialized,
 101  * returns _false_ otherwise. Note that the function only reports the
 102  * internal state of the PMIx client - it does not verify an active
 103  * connection with the server, nor that the server is functional. */
 104 PMIX_EXPORT int PMIx_Initialized(void);
 105 
 106 
 107 /* Request that the provided array of procs be aborted, returning the
 108  * provided _status_ and printing the provided message. A _NULL_
 109  * for the proc array indicates that all processes in the caller's
 110  * nspace are to be aborted.
 111  *
 112  * The response to this request is somewhat dependent on the specific resource
 113  * manager and its configuration (e.g., some resource managers will
 114  * not abort the application if the provided _status_ is zero unless
 115  * specifically configured to do so), and thus lies outside the control
 116  * of PMIx itself. However, the client will inform the RM of
 117  * the request that the application be aborted, regardless of the
 118  * value of the provided _status_.
 119  *
 120  * Passing a _NULL_ msg parameter is allowed. Note that race conditions
 121  * caused by multiple processes calling PMIx_Abort are left to the
 122  * server implementation to resolve with regard to which status is
 123  * returned and what messages (if any) are printed. */
 124 PMIX_EXPORT pmix_status_t PMIx_Abort(int status, const char msg[],
 125                                      pmix_proc_t procs[], size_t nprocs);
 126 
 127 
 128 /* Push a value into the client's namespace. The client library will cache
 129  * the information locally until _PMIx_Commit_ is called. The provided scope
 130  * value is passed to the local PMIx server, which will distribute the data
 131  * as directed. */
 132 PMIX_EXPORT pmix_status_t PMIx_Put(pmix_scope_t scope, const pmix_key_t key, pmix_value_t *val);
 133 
 134 
 135 /* Push all previously _PMIx_Put_ values to the local PMIx server.
 136  * This is an asynchronous operation - the library will immediately
 137  * return to the caller while the data is transmitted to the local
 138  * server in the background */
 139 PMIX_EXPORT pmix_status_t PMIx_Commit(void);
 140 
 141 
 142 /* Execute a blocking barrier across the processes identified in the
 143  * specified array. Passing a _NULL_ pointer as the _procs_ parameter
 144  * indicates that the barrier is to span all processes in the client's
 145  * namespace. Each provided pmix_proc_t struct can pass PMIX_RANK_WILDCARD to
 146  * indicate that all processes in the given namespace are
 147  * participating.
 148  *
 149  * The info array is used to pass user requests regarding the fence
 150  * operation. This can include:
 151  *
 152  * (a) PMIX_COLLECT_DATA - a boolean indicating whether or not the barrier
 153  *     operation is to return the _put_ data from all participating processes.
 154  *     A value of _false_ indicates that the callback is just used as a release
 155  *     and no data is to be returned at that time. A value of _true_ indicates
 156  *     that all _put_ data is to be collected by the barrier. Returned data is
 157  *     cached at the server to reduce memory footprint, and can be retrieved
 158  *     as needed by calls to PMIx_Get(nb).
 159  *
 160  *     Note that for scalability reasons, the default behavior for PMIx_Fence
 161  *     is to _not_ collect the data.
 162  *
 163  * (b) PMIX_COLLECTIVE_ALGO - a comma-delimited string indicating the algos
 164  *     to be used for executing the barrier, in priority order.
 165  *
 166  * (c) PMIX_COLLECTIVE_ALGO_REQD - instructs the host RM that it should return
 167  *     an error if none of the specified algos are available. Otherwise, the RM
 168  *     is to use one of the algos if possible, but is otherwise free to use any
 169  *     of its available methods to execute the operation.
 170  *
 171  * (d) PMIX_TIMEOUT - maximum time for the fence to execute before declaring
 172  *     an error. By default, the RM shall terminate the operation and notify participants
 173  *     if one or more of the indicated procs fails during the fence. However,
 174  *     the timeout parameter can help avoid "hangs" due to programming errors
 175  *     that prevent one or more procs from reaching the "fence".
 176  */
 177 PMIX_EXPORT pmix_status_t PMIx_Fence(const pmix_proc_t procs[], size_t nprocs,
 178                                      const pmix_info_t info[], size_t ninfo);
 179 
 180 /* Non-blocking version of PMIx_Fence. Note that the function will return
 181  * an error if a _NULL_ callback function is given. */
 182 PMIX_EXPORT pmix_status_t PMIx_Fence_nb(const pmix_proc_t procs[], size_t nprocs,
 183                                         const pmix_info_t info[], size_t ninfo,
 184                                         pmix_op_cbfunc_t cbfunc, void *cbdata);
 185 
 186 
 187 /* Retrieve information for the specified _key_ as published by the process
 188  * identified in the given pmix_proc_t, returning a pointer to the value in the
 189  * given address.
 190  *
 191  * This is a blocking operation - the caller will block until
 192  * the specified data has been _PMIx_Put_ by the specified rank. The caller is
 193  * responsible for freeing all memory associated with the returned value when
 194  * no longer required.
 195  *
 196  * The info array is used to pass user requests regarding the get
 197  * operation. This can include:
 198  *
 199  * (a) PMIX_TIMEOUT - maximum time for the get to execute before declaring
 200  *     an error. The timeout parameter can help avoid "hangs" due to programming
 201  *     errors that prevent the target proc from ever exposing its data.
 202  */
 203 PMIX_EXPORT pmix_status_t PMIx_Get(const pmix_proc_t *proc, const char key[],
 204                                    const pmix_info_t info[], size_t ninfo,
 205                                    pmix_value_t **val);
 206 
 207 /* A non-blocking operation version of PMIx_Get - the callback function will
 208  * be executed once the specified data has been _PMIx_Put_
 209  * by the identified process and retrieved by the local server. The info
 210  * array is used as described above for the blocking form of this call. */
 211 PMIX_EXPORT pmix_status_t PMIx_Get_nb(const pmix_proc_t *proc, const pmix_key_t key,
 212                                       const pmix_info_t info[], size_t ninfo,
 213                                       pmix_value_cbfunc_t cbfunc, void *cbdata);
 214 
 215 
 216 /* Publish the data in the info array for lookup. By default,
 217  * the data will be published into the PMIX_SESSION range and
 218  * with PMIX_PERSIST_APP persistence. Changes to those values,
 219  * and any additional directives, can be included in the pmix_info_t
 220  * array.
 221  *
 222  * Note that the keys must be unique within the specified
 223  * data range or else an error will be returned (first published
 224  * wins). Attempts to access the data by procs outside of
 225  * the provided data range will be rejected.
 226  *
 227  * The persistence parameter instructs the server as to how long
 228  * the data is to be retained.
 229  *
 230  * The blocking form will block until the server confirms that the
 231  * data has been posted and is available. The non-blocking form will
 232  * return immediately, executing the callback when the server confirms
 233  * availability of the data.
 234  */
 235 PMIX_EXPORT pmix_status_t PMIx_Publish(const pmix_info_t info[], size_t ninfo);
 236 PMIX_EXPORT pmix_status_t PMIx_Publish_nb(const pmix_info_t info[], size_t ninfo,
 237                                           pmix_op_cbfunc_t cbfunc, void *cbdata);
 238 
 239 
 240 /* Lookup information published by this or another process. By default,
 241  * the search will be conducted across the PMIX_SESSION range. Changes
 242  * to the range, and any additional directives, can be provided
 243  * in the pmix_info_t array. Note that the search is also constrained
 244  * to only data published by the current user ID - i.e., the search
 245  * will not return data published by an application being executed
 246  * by another user. There currently is no option to override this
 247  * behavior - such an option may become available later via an
 248  * appropriate pmix_info_t directive.
 249  *
 250  * The "data" parameter consists of an array of pmix_pdata_t struct with the
 251  * keys specifying the requested information. Data will be returned
 252  * for each key in the associated info struct - any key that cannot
 253  * be found will return with a data type of "PMIX_UNDEF". The function
 254  * will return SUCCESS if _any_ values can be found, so the caller
 255  * must check each data element to ensure it was returned.
 256  *
 257  * The proc field in each pmix_pdata_t struct will contain the
 258  * nspace/rank of the process that published the data.
 259  *
 260  * Note: although this is a blocking function, it will _not_ wait
 261  * by default for the requested data to be published. Instead, it
 262  * will block for the time required by the server to lookup its current
 263  * data and return any found items. Thus, the caller is responsible for
 264  * ensuring that data is published prior to executing a lookup, or
 265  * for retrying until the requested data is found
 266  *
 267  * Optionally, the info array can be used to modify this behavior
 268  * by including:
 269  *
 270  * (a) PMIX_WAIT - wait for the requested data to be published. The
 271  *     server is to wait until all data has become available.
 272  *
 273  * (b) PMIX_TIMEOUT - max time to wait for data to become available.
 274  *
 275  */
 276 PMIX_EXPORT pmix_status_t PMIx_Lookup(pmix_pdata_t data[], size_t ndata,
 277                                       const pmix_info_t info[], size_t ninfo);
 278 
 279 /* Non-blocking form of the _PMIx_Lookup_ function. Data for
 280  * the provided NULL-terminated keys array will be returned
 281  * in the provided callback function. As above, the default
 282  * behavior is to _not_ wait for data to be published. The
 283  * info keys can be used to modify the behavior as previously
 284  * described */
 285 PMIX_EXPORT pmix_status_t PMIx_Lookup_nb(char **keys, const pmix_info_t info[], size_t ninfo,
 286                                          pmix_lookup_cbfunc_t cbfunc, void *cbdata);
 287 
 288 
 289 /* Unpublish data posted by this process using the given keys.
 290  * The function will block until the data has been removed by
 291  * the server. A value of _NULL_ for the keys parameter instructs
 292  * the server to remove _all_ data published by this process.
 293  *
 294  * By default, the range is assumed to be PMIX_SESSION. Changes
 295  * to the range, and any additional directives, can be provided
 296  * in the pmix_info_t array */
 297 PMIX_EXPORT pmix_status_t PMIx_Unpublish(char **keys,
 298                                          const pmix_info_t info[], size_t ninfo);
 299 
 300 /* Non-blocking form of the _PMIx_Unpublish_ function. The
 301  * callback function will be executed once the server confirms
 302  * removal of the specified data. */
 303 PMIX_EXPORT pmix_status_t PMIx_Unpublish_nb(char **keys,
 304                                             const pmix_info_t info[], size_t ninfo,
 305                                             pmix_op_cbfunc_t cbfunc, void *cbdata);
 306 
 307 
 308 /* Spawn a new job. The assigned namespace of the spawned applications
 309  * is returned in the nspace parameter - a _NULL_ value in that
 310  * location indicates that the caller doesn't wish to have the
 311  * namespace returned. The nspace array must be at least of size
 312  * PMIX_MAX_NSLEN+1. Behavior of individual resource managers
 313  * may differ, but it is expected that failure of any application
 314  * process to start will result in termination/cleanup of _all_
 315  * processes in the newly spawned job and return of an error
 316  * code to the caller.
 317  *
 318  * By default, the spawned processes will be PMIx "connected" to
 319  * the parent process upon successful launch (see PMIx_Connect
 320  * description for details). Note that this only means that the
 321  * parent process (a) will be given a copy of the  new job's
 322  * information so it can query job-level info without
 323  * incurring any communication penalties, and (b) will receive
 324  * notification of errors from process in the child job.
 325  *
 326  * Job-level directives can be specified in the job_info array. This
 327  * can include:
 328  *
 329  * (a) PMIX_NON_PMI - processes in the spawned job will
 330  *     not be calling PMIx_Init
 331  *
 332  * (b) PMIX_TIMEOUT - declare the spawn as having failed if the launched
 333  *     procs do not call PMIx_Init within the specified time
 334  *
 335  * (c) PMIX_NOTIFY_COMPLETION - notify the parent process when the
 336  *     child job terminates, either normally or with error
 337  */
 338 PMIX_EXPORT pmix_status_t PMIx_Spawn(const pmix_info_t job_info[], size_t ninfo,
 339                                      const pmix_app_t apps[], size_t napps,
 340                                      pmix_nspace_t nspace);
 341 
 342 
 343 /* Non-blocking form of the _PMIx_Spawn_ function. The callback
 344  * will be executed upon launch of the specified applications,
 345  * or upon failure to launch any of them. */
 346 PMIX_EXPORT pmix_status_t PMIx_Spawn_nb(const pmix_info_t job_info[], size_t ninfo,
 347                                         const pmix_app_t apps[], size_t napps,
 348                                         pmix_spawn_cbfunc_t cbfunc, void *cbdata);
 349 
 350 /* Record the specified processes as "connected". Both blocking and non-blocking
 351  * versions are provided. This means that the resource manager should treat the
 352  * failure of any process in the specified group as a reportable event, and take
 353  * appropriate action. Note that different resource managers may respond to
 354  * failures in different manners.
 355  *
 356  * The callback function is to be called once all participating processes have
 357  * called connect. The server is required to return any job-level info for the
 358  * connecting processes that might not already have - i.e., if the connect
 359  * request involves procs from different nspaces, then each proc shall receive
 360  * the job-level info from those nspaces other than their own.
 361  *
 362  * Note: a process can only engage in _one_ connect operation involving the identical
 363  * set of processes at a time. However, a process _can_ be simultaneously engaged
 364  * in multiple connect operations, each involving a different set of processes
 365  *
 366  * As in the case of the fence operation, the info array can be used to pass
 367  * user-level directives regarding the algorithm to be used for the collective
 368  * operation involved in the "connect", timeout constraints, and other options
 369  * available from the host RM */
 370 PMIX_EXPORT pmix_status_t PMIx_Connect(const pmix_proc_t procs[], size_t nprocs,
 371                                        const pmix_info_t info[], size_t ninfo);
 372 
 373 PMIX_EXPORT pmix_status_t PMIx_Connect_nb(const pmix_proc_t procs[], size_t nprocs,
 374                                           const pmix_info_t info[], size_t ninfo,
 375                                           pmix_op_cbfunc_t cbfunc, void *cbdata);
 376 
 377 /* Disconnect a previously connected set of processes. An error will be returned
 378  * if the specified set of procs was not previously "connected". As above, a process
 379  * may be involved in multiple simultaneous disconnect operations. However, a process
 380  * is not allowed to reconnect to a set of procs that has not fully completed
 381  * disconnect - i.e., you have to fully disconnect before you can reconnect to the
 382  * _same_ group of processes. The info array is used as above. */
 383 PMIX_EXPORT pmix_status_t PMIx_Disconnect(const pmix_proc_t procs[], size_t nprocs,
 384                                           const pmix_info_t info[], size_t ninfo);
 385 
 386 PMIX_EXPORT pmix_status_t PMIx_Disconnect_nb(const pmix_proc_t ranges[], size_t nprocs,
 387                                              const pmix_info_t info[], size_t ninfo,
 388                                              pmix_op_cbfunc_t cbfunc, void *cbdata);
 389 
 390 /* Given a node name, return an array of processes within the specified nspace
 391  * on that node. If the nspace is NULL, then all processes on the node will
 392  * be returned. If the specified node does not currently host any processes,
 393  * then the returned array will be NULL, and nprocs=0. The caller is responsible
 394  * for releasing the array when done with it - the PMIX_PROC_FREE macro is
 395  * provided for this purpose.
 396  */
 397 PMIX_EXPORT pmix_status_t PMIx_Resolve_peers(const char *nodename,
 398                                              const pmix_nspace_t nspace,
 399                                              pmix_proc_t **procs, size_t *nprocs);
 400 
 401 
 402 /* Given an nspace, return the list of nodes hosting processes within
 403  * that nspace. The returned string will contain a comma-delimited list
 404  * of nodenames. The caller is responsible for releasing the string
 405  * when done with it */
 406 PMIX_EXPORT pmix_status_t PMIx_Resolve_nodes(const pmix_nspace_t nspace, char **nodelist);
 407 
 408 /* Query information about the system in general - can include
 409  * a list of active nspaces, network topology, etc. Also can be
 410  * used to query node-specific info such as the list of peers
 411  * executing on a given node. We assume that the host RM will
 412  * exercise appropriate access control on the information.
 413  *
 414  * NOTE: there is no blocking form of this API as the structures
 415  * passed to query info differ from those for receiving the results
 416  *
 417  * The following return status codes are provided in the callback:
 418  *
 419  * PMIX_SUCCESS - all data has been returned
 420  * PMIX_ERR_NOT_FOUND - none of the requested data was available
 421  * PMIX_ERR_PARTIAL_SUCCESS - some of the data has been returned
 422  * PMIX_ERR_NOT_SUPPORTED - the host RM does not support this function
 423  */
 424 PMIX_EXPORT pmix_status_t PMIx_Query_info_nb(pmix_query_t queries[], size_t nqueries,
 425                                              pmix_info_cbfunc_t cbfunc, void *cbdata);
 426 
 427 /* Log data to a central data service/store, subject to the
 428  * services offered by the host resource manager. The data to
 429  * be logged is provided in the data array. The (optional) directives
 430  * can be used to request specific storage options and direct
 431  * the choice of storage option.
 432  *
 433  * The callback function will be executed when the log operation
 434  * has been completed. The data array must be maintained until
 435  * the callback is provided
 436  */
 437 PMIX_EXPORT pmix_status_t PMIx_Log(const pmix_info_t data[], size_t ndata,
 438                                    const pmix_info_t directives[], size_t ndirs);
 439 
 440 PMIX_EXPORT pmix_status_t PMIx_Log_nb(const pmix_info_t data[], size_t ndata,
 441                                       const pmix_info_t directives[], size_t ndirs,
 442                                       pmix_op_cbfunc_t cbfunc, void *cbdata);
 443 
 444 /* Request an allocation operation from the host resource manager.
 445  * Several broad categories are envisioned, including the ability to:
 446  *
 447  * - request allocation of additional resources, including memory,
 448  *   bandwidth, and compute. This should be accomplished in a
 449  *   non-blocking manner so that the application can continue to
 450  *   progress while waiting for resources to become available. Note
 451  *   that the new allocation will be disjoint from (i.e., not
 452  *   affiliated with) the allocation of the requestor - thus the
 453  *   termination of one allocation will not impact the other.
 454  *
 455  * - extend the reservation on currently allocated resources, subject
 456  *   to scheduling availability and priorities. This includes extending
 457  *   the time limit on current resources, and/or requesting additional
 458  *   resources be allocated to the requesting job. Any additional
 459  *   allocated resources will be considered as part of the current
 460  *   allocation, and thus will be released at the same time.
 461  *
 462  * - release currently allocated resources that are no longer required.
 463  *   This is intended to support partial release of resources since all
 464  *   resources are normally released upon termination of the job. The
 465  *   identified use-cases include resource variations across discrete steps
 466  *   of a workflow, as well as applications that spawn sub-jobs and/or
 467  *   dynamically grow/shrink over time
 468  *
 469  * - "lend" resources back to the scheduler with an expectation of getting
 470  *   them back at some later time in the job. This can be a proactive
 471  *   operation (e.g., to save on computing costs when resources are
 472  *   temporarily not required), or in response to scheduler requests in
 473  *   lieue of preemption. A corresponding ability to "reacquire" resources
 474  *   previously released is included.
 475  */
 476 PMIX_EXPORT pmix_status_t PMIx_Allocation_request(pmix_alloc_directive_t directive,
 477                                                   pmix_info_t *info, size_t ninfo);
 478 
 479 PMIX_EXPORT pmix_status_t PMIx_Allocation_request_nb(pmix_alloc_directive_t directive,
 480                                                      pmix_info_t *info, size_t ninfo,
 481                                                      pmix_info_cbfunc_t cbfunc, void *cbdata);
 482 
 483 /* Request a job control action. The targets array identifies the
 484  * processes to which the requested job control action is to be applied.
 485  * A NULL value can be used to indicate all processes in the caller's
 486  * nspace. The use of PMIX_RANK_WILDARD can also be used to indicate
 487  * that all processes in the given nspace are to be included.
 488  *
 489  * The directives are provided as pmix_info_t structs in the directives
 490  * array. The callback function provides a status to indicate whether or
 491  * not the request was granted, and to provide some information as to
 492  * the reason for any denial in the pmix_info_cbfunc_t array of pmix_info_t
 493  * structures. If non-NULL, then the specified release_fn must be called
 494  * when the callback function completes - this will be used to release
 495  * any provided pmix_info_t array.
 496  */
 497 PMIX_EXPORT pmix_status_t PMIx_Job_control(const pmix_proc_t targets[], size_t ntargets,
 498                                            const pmix_info_t directives[], size_t ndirs);
 499 
 500 PMIX_EXPORT pmix_status_t PMIx_Job_control_nb(const pmix_proc_t targets[], size_t ntargets,
 501                                               const pmix_info_t directives[], size_t ndirs,
 502                                               pmix_info_cbfunc_t cbfunc, void *cbdata);
 503 
 504 /* Request that something be monitored - e.g., that the server monitor
 505  * this process for periodic heartbeats as an indication that the process
 506  * has not become "wedged". When a monitor detects the specified alarm
 507  * condition, it will generate an event notification using the provided
 508  * error code and passing along any available relevant information. It is
 509  * up to the caller to register a corresponding event handler.
 510  *
 511  * Params:
 512  *
 513  * monitor: attribute indicating the type of monitor being requested - e.g.,
 514  *          PMIX_MONITOR_FILE to indicate that the requestor is asking that
 515  *          a file be monitored.
 516  *
 517  * error: the status code to be used when generating an event notification
 518  *        alerting that the monitor has been triggered. The range of the
 519  *        notification defaults to PMIX_RANGE_NAMESPACE - this can be
 520  *        changed by providing a PMIX_RANGE directive
 521  *
 522  * directives: characterize the monitoring request (e.g., monitor file size)
 523  *             and frequency of checking to be done
 524  *
 525  * cbfunc: provides a status to indicate whether or not the request was granted,
 526  *         and to provide some information as to the reason for any denial in
 527  *         the pmix_info_cbfunc_t array of pmix_info_t structures.
 528  *
 529  * Note: a process can send a heartbeat to the server using the PMIx_Heartbeat
 530  * macro provided below*/
 531 PMIX_EXPORT pmix_status_t PMIx_Process_monitor(const pmix_info_t *monitor, pmix_status_t error,
 532                                                const pmix_info_t directives[], size_t ndirs);
 533 
 534 PMIX_EXPORT pmix_status_t PMIx_Process_monitor_nb(const pmix_info_t *monitor, pmix_status_t error,
 535                                                   const pmix_info_t directives[], size_t ndirs,
 536                                                   pmix_info_cbfunc_t cbfunc, void *cbdata);
 537 
 538 /* define a special macro to simplify sending of a heartbeat */
 539 #define PMIx_Heartbeat()                                                    \
 540     do {                                                                    \
 541         pmix_info_t _in;                                                    \
 542         PMIX_INFO_CONSTRUCT(&_in);                                          \
 543         PMIX_INFO_LOAD(&_in, PMIX_SEND_HEARTBEAT, NULL, PMIX_POINTER);      \
 544         PMIx_Process_monitor_nb(&_in, PMIX_SUCCESS, NULL, 0, NULL, NULL);   \
 545         PMIX_INFO_DESTRUCT(&_in);                                           \
 546     } while(0)
 547 
 548 /* Request a credential from the PMIx server/SMS.
 549  * Input values include:
 550  *
 551  * info - an array of pmix_info_t structures containing any directives the
 552  *        caller may wish to pass. Typical usage might include:
 553  *            PMIX_TIMEOUT - how long to wait (in seconds) for a credential
 554  *                           before timing out and returning an error
 555  *            PMIX_CRED_TYPE - a prioritized, comma-delimited list of desired
 556  *                             credential types for use in environments where
 557  *                             multiple authentication mechanisms may be
 558  *                             available
 559  *
 560  * ninfo - number of elements in the info array
 561  *
 562  * cbfunc - the pmix_credential_cbfunc_t function to be called upon completion
 563  *          of the request
 564  *
 565  * cbdata - pointer to an object to be returned when cbfunc is called
 566  *
 567  * Returned values:
 568  * PMIX_SUCCESS - indicates that the request has been successfully communicated to
 569  *                the local PMIx server. The response will be coming in the provided
 570  *                callback function.
 571  *
 572  * Any other value indicates an appropriate error condition. The callback function
 573  * will _not_ be called in such cases.
 574  */
 575 PMIX_EXPORT pmix_status_t PMIx_Get_credential(const pmix_info_t info[], size_t ninfo,
 576                                               pmix_credential_cbfunc_t cbfunc, void *cbdata);
 577 
 578 
 579 /* Request validation of a credential by the PMIx server/SMS
 580  * Input values include:
 581  *
 582  * cred - pointer to a pmix_byte_object_t containing the credential
 583  *
 584  * info - an array of pmix_info_t structures containing any directives the
 585  *        caller may wish to pass. Typical usage might include:
 586  *            PMIX_TIMEOUT - how long to wait (in seconds) for validation
 587  *                           before timing out and returning an error
 588  *            PMIX_USERID - the expected effective userid of the credential
 589  *                          to be validated
 590  *            PMIX_GROUPID - the expected effective group id of the credential
 591  *                          to be validated
 592  *
 593  * ninfo - number of elements in the info array
 594  *
 595  * cbfunc - the pmix_validation_cbfunc_t function to be called upon completion
 596  *          of the request
 597  *
 598  * cbdata - pointer to an object to be returned when cbfunc is called
 599  *
 600  * Returned values:
 601  * PMIX_SUCCESS - indicates that the request has been successfully communicated to
 602  *                the local PMIx server. The response will be coming in the provided
 603  *                callback function.
 604  *
 605  * Any other value indicates an appropriate error condition. The callback function
 606  * will _not_ be called in such cases.
 607  */
 608 PMIX_EXPORT pmix_status_t PMIx_Validate_credential(const pmix_byte_object_t *cred,
 609                                                    const pmix_info_t info[], size_t ninfo,
 610                                                    pmix_validation_cbfunc_t cbfunc, void *cbdata);
 611 
 612 /* Define a callback function for delivering forwarded IO to a process
 613  * This function will be called whenever data becomes available, or a
 614  * specified buffering size and/or time has been met. The function
 615  * will be passed the following values:
 616  *
 617  * iofhdlr - the returned registration number of the handler being invoked.
 618  *           This is required when deregistering the handler.
 619  *
 620  * channel - a bitmask identifying the channel the data arrived on
 621  *
 622  * source - the nspace/rank of the process that generated the data
 623  *
 624  * payload - pointer to character array containing the data. Note that
 625  *           multiple strings may be included, and that the array may
 626  *           _not_ be NULL terminated
 627  *
 628  * info - an optional array of info provided by the source containing
 629  *        metadata about the payload. This could include PMIX_IOF_COMPLETE
 630  *
 631  * ninfo - number of elements in the optional info array
 632  */
 633  typedef void (*pmix_iof_cbfunc_t)(size_t iofhdlr, pmix_iof_channel_t channel,
 634                                    pmix_proc_t *source, char *payload,
 635                                    pmix_info_t info[], size_t ninfo);
 636 
 637 
 638 /* Register to receive output forwarded from a remote process.
 639  *
 640  * procs - array of identifiers for sources whose IO is being
 641  *         requested. Wildcard rank indicates that all procs
 642  *         in the specified nspace are included in the request
 643  *
 644  * nprocs - number of identifiers in the procs array
 645  *
 646  * directives - optional array of attributes to control the
 647  *              behavior of the request. For example, this
 648  *              might include directives on buffering IO
 649  *              before delivery, and/or directives to include
 650  *              or exclude any backlogged data
 651  *
 652  * ndirs - number of elements in the directives array
 653  *
 654  * channel - bitmask of IO channels included in the request.
 655  *           NOTE: STDIN is not supported as it will always
 656  *           be delivered to the stdin file descriptor
 657  *
 658  * cbfunc - function to be called when relevant IO is received
 659  *
 660  * regcbfunc - since registration is async, this is the
 661  *             function to be called when registration is
 662  *             completed. The function itself will return
 663  *             a non-success error if the registration cannot
 664  *             be submitted - in this case, the regcbfunc
 665  *             will _not_ be called.
 666  *
 667  * cbdata - pointer to object to be returned in regcbfunc
 668  */
 669 PMIX_EXPORT pmix_status_t PMIx_IOF_pull(const pmix_proc_t procs[], size_t nprocs,
 670                                         const pmix_info_t directives[], size_t ndirs,
 671                                         pmix_iof_channel_t channel, pmix_iof_cbfunc_t cbfunc,
 672                                         pmix_hdlr_reg_cbfunc_t regcbfunc, void *regcbdata);
 673 
 674 /* Deregister from output forwarded from a remote process.
 675  *
 676  * iofhdlr - the registration number returned from the
 677  *           call to PMIx_IOF_pull
 678  *
 679  * directives - optional array of attributes to control the
 680  *              behavior of the request. For example, this
 681  *              might include directives regarding what to
 682  *              do with any data currently in the IO buffer
 683  *              for this process
 684  *
 685  * cbfunc - function to be called when deregistration has
 686  *          been completed. Note that any IO to be flushed
 687  *          may continue to be received after deregistration
 688  *          has completed.
 689  *
 690  * cbdata - pointer to object to be returned in cbfunc
 691  */
 692 PMIX_EXPORT pmix_status_t PMIx_IOF_deregister(size_t iofhdlr,
 693                                               const pmix_info_t directives[], size_t ndirs,
 694                                               pmix_op_cbfunc_t cbfunc, void *cbdata);
 695 
 696 /* Push data collected locally (typically from stdin) to
 697  * stdin of target recipients.
 698  *
 699  * targets - array of process identifiers to which the data is to be delivered. Note
 700  *           that a WILDCARD rank indicates that all procs in the given nspace are
 701  *           to receive a copy of the data
 702  *
 703  * ntargets - number of procs in the targets array
 704  *
 705  * directives - optional array of attributes to control the
 706  *              behavior of the request. For example, this
 707  *              might include directives on buffering IO
 708  *              before delivery, and/or directives to include
 709  *              or exclude any backlogged data
 710  *
 711  * ndirs - number of elements in the directives array
 712  *
 713  * bo - pointer to a byte object containing the stdin data
 714  *
 715  * cbfunc - callback function when the data has been forwarded
 716  *
 717  * cbdata - object to be returned in cbfunc
 718  */
 719 PMIX_EXPORT pmix_status_t PMIx_IOF_push(const pmix_proc_t targets[], size_t ntargets,
 720                                         pmix_byte_object_t *bo,
 721                                         const pmix_info_t directives[], size_t ndirs,
 722                                         pmix_op_cbfunc_t cbfunc, void *cbdata);
 723 
 724 /* Construct a new group composed of the specified processes and identified with
 725  * the provided group identifier. Both blocking and non-blocking versions
 726  * are provided (the callback function for the non-blocking form will be called
 727  * once all specified processes have joined the group). The group identifier is
 728  * a user-defined, NULL-terminated character array of length less than or equal
 729  * to PMIX_MAX_NSLEN. Only characters accepted by standard string comparison
 730  * functions (e.g., strncmp) are supported.
 731  *
 732  * Processes may engage in multiple simultaneous group construct operations as
 733  * desired so long as each is provided with a unique group ID. The info array
 734  * can be used to pass user-level directives regarding timeout constraints and
 735  * other options available from the PMIx server.
 736  *
 737  * The construct leader (if PMIX_GROUP_LEADER is provided) or all participants
 738  * will receive events (if registered for the PMIX_GROUP_MEMBER_FAILED event)
 739  * whenever a process fails or terminates prior to calling
 740  * PMIx_Group_construct(_nb) – the events will contain the identifier of the
 741  * process that failed to join plus any other information that the resource
 742  * manager provided. This provides an opportunity for the leader to react to
 743  * the event – e.g., to invite an alternative member to the group or to decide
 744  * to proceed with a smaller group. The decision to proceed with a smaller group
 745  * is communicated to the PMIx library in the results array at the end of the
 746  * event handler. This allows PMIx to properly adjust accounting for procedure
 747  * completion. When construct is complete, the participating PMIx servers will
 748  * be alerted to any change in participants and each group member will (if
 749  * registered) receive a PMIX_GROUP_MEMBERSHIP_UPDATE event updating the group
 750  * membership.
 751  *
 752  * Processes in a group under construction are not allowed to leave the group
 753  * until group construction is complete. Upon completion of the construct
 754  * procedure, each group member will have access to the job-level information
 755  * of all nspaces represented in the group and the contact information for
 756  * every group member.
 757  *
 758  * Failure of the leader at any time will cause a PMIX_GROUP_LEADER_FAILED event
 759  * to be delivered to all participants so they can optionally declare a new leader.
 760  * A new leader is identified by providing the PMIX_GROUP_LEADER attribute in
 761  * the results array in the return of the event handler. Only one process is
 762  * allowed to return that attribute, declaring itself as the new leader. Results
 763  * of the leader selection will be communicated to all participants via a
 764  * PMIX_GROUP_LEADER_SELECTED event identifying the new leader. If no leader
 765  * was selected, then the status code provided in the event handler will provide
 766  * an error value so the participants can take appropriate action.
 767  *
 768  * Any participant that returns PMIX_GROUP_CONSTRUCT_ABORT from the leader failed
 769  * event handler will cause the construct process to abort. Those processes
 770  * engaged in the blocking construct will return from the call with the
 771  * PMIX_GROUP_CONSTRUCT_ABORT status. Non-blocking participants will have
 772  * their callback function executed with that status.
 773  *
 774  * Some relevant attributes for this operation:
 775  *    PMIX_GROUP_LEADER - declare this process to be the leader of the construction
 776  *                        procedure. If a process provides this attribute, then
 777  *                        failure notification for any participating process will
 778  *                        go only to that one process. In the absence of a
 779  *                        declared leader, failure events go to all participants.
 780  *    PMIX_GROUP_OPTIONAL - participation is optional - do not return an error if
 781  *                          any of the specified processes terminate
 782  *                          without having joined (default=false)
 783  *    PMIX_GROUP_NOTIFY_TERMINATION - notify remaining members when another member
 784  *                                    terminates without first leaving the
 785  *                                    group (default=false)
 786  *    PMIX_GROUP_ASSIGN_CONTEXT_ID - requests that the RM assign a unique context
 787  *                                   ID (size_t) to the group. The value is returned
 788  *                                   in the PMIX_GROUP_CONSTRUCT_COMPLETE event
 789  *    PMIX_TIMEOUT - return an error if the group doesn't assemble within the
 790  *                   specified number of seconds. Targets the scenario where a
 791  *                   process fails to call PMIx_Group_connect due to hanging
 792  *
 793  */
 794 PMIX_EXPORT pmix_status_t PMIx_Group_construct(const char grp[],
 795                                                const pmix_proc_t procs[], size_t nprocs,
 796                                                const pmix_info_t directives[], size_t ndirs,
 797                                                pmix_info_t **results, size_t *nresults);
 798 
 799 PMIX_EXPORT pmix_status_t PMIx_Group_construct_nb(const char grp[],
 800                                                   const pmix_proc_t procs[], size_t nprocs,
 801                                                   const pmix_info_t info[], size_t ninfo,
 802                                                   pmix_info_cbfunc_t cbfunc, void *cbdata);
 803 
 804 /* Explicitly invite specified processes to join a group.
 805  *
 806  * Each invited process will be notified of the invitation via the PMIX_GROUP_INVITED
 807  * event. The processes being invited must have registered for the PMIX_GROUP_INVITED
 808  * event in order to be notified of the invitation. When ready to respond, each invited
 809  * process provides a response using the appropriate form of PMIx_Group_join. This will
 810  * notify the inviting process that the invitation was either accepted (via the
 811  * PMIX_GROUP_INVITE_ACCEPTED event) or declined (via the PMIX_GROUP_INVITE_DECLINED event).
 812  * The inviting process will also receive PMIX_GROUP_MEMBER_FAILED events whenever a
 813  * process fails or terminates prior to responding to the invitation.
 814  *
 815  * Upon accepting the invitation, both the inviting and invited process will receive
 816  * access to the job-level information of each other’s nspaces and the contact
 817  * information of the other process.
 818  *
 819  * Some relevant attributes for this operation:
 820  *    PMIX_GROUP_ASSIGN_CONTEXT_ID - requests that the RM assign a unique context
 821  *                                   ID (size_t) to the group. The value is returned
 822  *                                   in the PMIX_GROUP_CONSTRUCT_COMPLETE event
 823  *    PMIX_TIMEOUT (int): return an error if the group doesn’t assemble within the
 824  *                        specified number of seconds. Targets the scenario where a
 825  *                        process fails to call PMIx_Group_connect due to hanging
 826  *
 827  * The inviting process is automatically considered the leader of the asynchronous
 828  * group construction procedure and will receive all failure or termination events
 829  * for invited members prior to completion. The inviting process is required to
 830  * provide a PMIX_GROUP_CONSTRUCT_COMPLETE event once the group has been fully
 831  * assembled – this event will be distributed to all participants along with the
 832  * final membership.
 833  *
 834  * Failure of the leader at any time will cause a PMIX_GROUP_LEADER_FAILED event
 835  * to be delivered to all participants so they can optionally declare a new leader.
 836  * A new leader is identified by providing the PMIX_GROUP_LEADER attribute in
 837  * the results array in the return of the event handler. Only one process is
 838  * allowed to return that attribute, declaring itself as the new leader. Results
 839  * of the leader selection will be communicated to all participants via a
 840  * PMIX_GROUP_LEADER_SELECTED event identifying the new leader. If no leader
 841  * was selected, then the status code provided in the event handler will provide
 842  * an error value so the participants can take appropriate action.
 843  *
 844  * Any participant that returns PMIX_GROUP_CONSTRUCT_ABORT from the event
 845  * handler will cause all participants to receive an event notifying them
 846  * of that status.
 847  */
 848 PMIX_EXPORT pmix_status_t PMIx_Group_invite(const char grp[],
 849                                             const pmix_proc_t procs[], size_t nprocs,
 850                                             const pmix_info_t info[], size_t ninfo,
 851                                             pmix_info_t **results, size_t *nresult);
 852 
 853 PMIX_EXPORT pmix_status_t PMIx_Group_invite_nb(const char grp[],
 854                                                const pmix_proc_t procs[], size_t nprocs,
 855                                                const pmix_info_t info[], size_t ninfo,
 856                                                pmix_info_cbfunc_t cbfunc, void *cbdata);
 857 
 858 /* Respond to an invitation to join a group that is being asynchronously constructed.
 859  *
 860  * The process must have registered for the PMIX_GROUP_INVITED event in order to be
 861  * notified of the invitation. When ready to respond, the process provides a response
 862  * using the appropriate form of PMIx_Group_join.
 863  *
 864  * Critical Note: Since the process is alerted to the invitation in a PMIx event handler,
 865  * the process must not use the blocking form of this call unless it first “thread shifts”
 866  * out of the handler and into its own thread context. Likewise, while it is safe to call
 867  * the non-blocking form of the API from the event handler, the process must not block
 868  * in the handler while waiting for the callback function to be called.
 869  *
 870  * Calling this function causes the group “leader” to be notified that the process has
 871  * either accepted or declined the request. The blocking form of the API will return
 872  * once the group has been completely constructed or the group’s construction has failed
 873  * (as determined by the leader) – likewise, the callback function of the non-blocking
 874  * form will be executed upon the same conditions.
 875  *
 876  * Failure of the leader at any time will cause a PMIX_GROUP_LEADER_FAILED event
 877  * to be delivered to all participants so they can optionally declare a new leader.
 878  * A new leader is identified by providing the PMIX_GROUP_LEADER attribute in
 879  * the results array in the return of the event handler. Only one process is
 880  * allowed to return that attribute, declaring itself as the new leader. Results
 881  * of the leader selection will be communicated to all participants via a
 882  * PMIX_GROUP_LEADER_SELECTED event identifying the new leader. If no leader
 883  * was selected, then the status code provided in the event handler will provide
 884  * an error value so the participants can take appropriate action.
 885  *
 886  * Any participant that returns PMIX_GROUP_CONSTRUCT_ABORT from the leader failed
 887  * event handler will cause all participants to receive an event notifying them
 888  * of that status. Similarly, the leader may elect to abort the procedure
 889  * by either returning PMIX_GROUP_CONSTRUCT_ABORT from the handler assigned
 890  * to the PMIX_GROUP_INVITE_ACCEPTED or PMIX_GROUP_INVITE_DECLINED codes, or
 891  * by generating an event for the abort code. Abort events will be sent to
 892  * all invited participants.
 893  */
 894 PMIX_EXPORT pmix_status_t PMIx_Group_join(const char grp[],
 895                                           const pmix_proc_t *leader,
 896                                           pmix_group_opt_t opt,
 897                                           const pmix_info_t info[], size_t ninfo,
 898                                           pmix_info_t **results, size_t *nresult);
 899 
 900 PMIX_EXPORT pmix_status_t PMIx_Group_join_nb(const char grp[],
 901                                              const pmix_proc_t *leader,
 902                                              pmix_group_opt_t opt,
 903                                              const pmix_info_t info[], size_t ninfo,
 904                                              pmix_info_cbfunc_t cbfunc, void *cbdata);
 905 
 906 /* Leave a PMIx Group. Calls to PMIx_Group_leave (or its non-blocking form) will cause
 907  * a PMIX_GROUP_LEFT event to be generated notifying all members of the group of the
 908  * caller’s departure. The function will return (or the non-blocking function will
 909  * execute the specified callback function) once the event has been locally generated
 910  * and is not indicative of remote receipt. All PMIx-based collectives such as
 911  * PMIx_Fence in action across the group will automatically be adjusted if the
 912  * collective was called with the PMIX_GROUP_FT_COLLECTIVE attribute (default is
 913  * false) – otherwise, the standard error return behavior will be provided.
 914  *
 915  * Critical Note: The PMIx_Group_leave API is intended solely for asynchronous
 916  * departures of individual processes from a group as it is not a scalable
 917  * operation – i.e., when a process determines it should no longer be a part of a
 918  * defined group, but the remainder of the group retains a valid reason to continue
 919  * in existence. Developers are advised to use PMIx_Group_destruct (or its
 920  * non-blocking form) for all other scenarios as it represents a more scalable
 921  * operation.
 922  */
 923 PMIX_EXPORT pmix_status_t PMIx_Group_leave(const char grp[],
 924                                            const pmix_info_t info[], size_t ninfo);
 925 
 926 PMIX_EXPORT pmix_status_t PMIx_Group_leave_nb(const char grp[],
 927                                               const pmix_info_t info[], size_t ninfo,
 928                                               pmix_op_cbfunc_t cbfunc, void *cbdata);
 929 
 930 /* Destruct a group identified by the provided group identifier. Both blocking and
 931  * non-blocking versions are provided (the callback function for the non-blocking
 932  * form will be called once all members of the group have called “destruct”).
 933  * Processes may engage in multiple simultaneous group destruct operations as
 934  * desired so long as each involves a unique group ID. The info array can be used
 935  * to pass user-level directives regarding timeout constraints and other options
 936  * available from the PMIx server.
 937  *
 938  * Some relevant attributes for this operation:
 939  *
 940  *    PMIX_TIMEOUT (int): return an error if the group doesn’t destruct within the
 941  *                        specified number of seconds. Targets the scenario where
 942  *                        a process fails to call PMIx_Group_destruct due to hanging
 943  *
 944  * The destruct API will return an error if any group process fails or terminates
 945  * prior to calling PMIx_Group_destruct or its non-blocking version unless the
 946  * PMIX_GROUP_NOTIFY_TERMINATION attribute was provided (with a value of true) at
 947  * time of group construction. If notification was requested, then a event will
 948  * be delivered (using PMIX_GROUP_MEMBER_FAILED) for each process that fails to
 949  * call destruct and the destruct tracker updated to account for the lack of
 950  * participation. The PMIx_Group_destruct operation will subsequently return
 951  * PMIX_SUCCESS when the remaining processes have all called destruct – i.e., the
 952  * event will serve in place of return of an error.
 953  */
 954 PMIX_EXPORT pmix_status_t PMIx_Group_destruct(const char grp[],
 955                                               const pmix_info_t info[], size_t ninfo);
 956 
 957 PMIX_EXPORT pmix_status_t PMIx_Group_destruct_nb(const char grp[],
 958                                                  const pmix_info_t info[], size_t ninfo,
 959                                                  pmix_op_cbfunc_t cbfunc, void *cbdata);
 960 
 961 
 962 #if defined(c_plusplus) || defined(__cplusplus)
 963 }
 964 #endif
 965 
 966 #endif

/* [<][>][^][v][top][bottom][index][help] */