root/opal/mca/pmix/pmix_server.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /*
   2  * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
   3  * $COPYRIGHT$
   4  *
   5  * Additional copyrights may follow
   6  *
   7  * $HEADER$
   8  */
   9 
  10 #ifndef OPAL_PMIX_SERVER_H
  11 #define OPAL_PMIX_SERVER_H
  12 
  13 #include "opal_config.h"
  14 #include "opal/types.h"
  15 
  16 #include "opal/mca/pmix/pmix_types.h"
  17 
  18 BEGIN_C_DECLS
  19 
  20 /****    SERVER FUNCTION-SHIPPED APIs    ****/
  21 /* NOTE: for performance purposes, the host server is required to
  22  * return as quickly as possible from all functions. Execution of
  23  * the function is thus to be done asynchronously so as to allow
  24  * the server support library to handle multiple client requests
  25  * as quickly and scalably as possible.
  26  *
  27  * ALL data passed to the host server functions is "owned" by the
  28  * server support library and MUST NOT be free'd. Data returned
  29  * by the host server via callback function is owned by the host
  30  * server, which is free to release it upon return from the callback */
  31 
  32 
  33 /* Notify the host server that a client connected to us */
  34 typedef int (*opal_pmix_server_client_connected_fn_t)(opal_process_name_t *proc,
  35                                                       void* server_object,
  36                                                       opal_pmix_op_cbfunc_t cbfunc,
  37                                                       void *cbdata);
  38 
  39 /* Notify the host server that a client called pmix.finalize - note
  40  * that the client will be in a blocked state until the host server
  41  * executes the callback function, thus allowing the server support
  42  * library to release the client */
  43 typedef int (*opal_pmix_server_client_finalized_fn_t)(opal_process_name_t *proc, void* server_object,
  44                                                       opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
  45 
  46 /* A local client called pmix.abort - note that the client will be in a blocked
  47  * state until the host server executes the callback function, thus
  48  * allowing the server support library to release the client. The
  49  * list of procs_to_abort indicates which processes are to be terminated. A NULL
  50  * indicates that all procs in the client's nspace are to be terminated */
  51 typedef int (*opal_pmix_server_abort_fn_t)(opal_process_name_t *proc, void *server_object,
  52                                            int status, const char msg[],
  53                                            opal_list_t *procs_to_abort,
  54                                            opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
  55 
  56 /* At least one client called either pmix.fence or pmix.fence_nb. In either case,
  57  * the host server will be called via a non-blocking function to execute
  58  * the specified operation once all participating local procs have
  59  * contributed. All processes in the specified list are required to participate
  60  * in the fence[_nb] operation. The callback is to be executed once each daemon
  61  * hosting at least one participant has called the host server's fencenb function.
  62  *
  63  * The list of opal_value_t includes any directives from the user regarding
  64  * how the fence is to be executed (e.g., timeout limits).
  65  *
  66  * The provided data is to be collectively shared with all host
  67  * servers involved in the fence operation, and returned in the modex
  68  * cbfunc. A _NULL_ data value indicates that the local procs had
  69  * no data to contribute */
  70 typedef int (*opal_pmix_server_fencenb_fn_t)(opal_list_t *procs, opal_list_t *info,
  71                                              char *data, size_t ndata,
  72                                              opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
  73 
  74 /* Used by the PMIx server to request its local host contact the
  75  * PMIx server on the remote node that hosts the specified proc to
  76  * obtain and return a direct modex blob for that proc
  77  *
  78  * The list of opal_value_t includes any directives from the user regarding
  79  * how the operation is to be executed (e.g., timeout limits).
  80  */
  81 typedef int (*opal_pmix_server_dmodex_req_fn_t)(opal_process_name_t *proc, opal_list_t *info,
  82                                                 opal_pmix_modex_cbfunc_t cbfunc, void *cbdata);
  83 
  84 
  85 /* Publish data per the PMIx API specification. The callback is to be executed
  86  * upon completion of the operation. The host server is not required to guarantee
  87  * support for the requested scope - i.e., the server does not need to return an
  88  * error if the data store doesn't support scope-based isolation. However, the
  89  * server must return an error (a) if the key is duplicative within the storage
  90  * scope, and (b) if the server does not allow overwriting of published info by
  91  * the original publisher - it is left to the discretion of the host server to
  92  * allow info-key-based flags to modify this behavior. The persist flag indicates
  93  * how long the server should retain the data. The nspace/rank of the publishing
  94  * process is also provided and is expected to be returned on any subsequent
  95  * lookup request */
  96 typedef int (*opal_pmix_server_publish_fn_t)(opal_process_name_t *proc,
  97                                              opal_list_t *info,
  98                                              opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
  99 
 100 /* Lookup published data. The host server will be passed a NULL-terminated array
 101  * of string keys along with the scope within which the data is expected to have
 102  * been published. The host server is not required to guarantee support for all
 103  * PMIx-defined scopes, but should only search data stores within the specified
 104  * scope within the context of the corresponding "publish" API. The wait flag
 105  * indicates whether the server should wait for all data to become available
 106  * before executing the callback function, or should callback with whatever
 107  * data is immediately available.
 108  *
 109  * The list of opal_value_t includes any directives from the user regarding
 110  * how the operation is to be executed (e.g., timeout limits, whether the
 111  * lookup should wait until data appears).
 112  */
 113 typedef int (*opal_pmix_server_lookup_fn_t)(opal_process_name_t *proc, char **keys,
 114                                             opal_list_t *info,
 115                                             opal_pmix_lookup_cbfunc_t cbfunc, void *cbdata);
 116 
 117 /* Delete data from the data store. The host server will be passed a NULL-terminated array
 118  * of string keys along with the scope within which the data is expected to have
 119  * been published. The callback is to be executed upon completion of the delete
 120  * procedure */
 121 typedef int (*opal_pmix_server_unpublish_fn_t)(opal_process_name_t *proc, char **keys,
 122                                                opal_list_t *info,
 123                                                opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
 124 
 125 /* Spawn a set of applications/processes as per the PMIx API. Note that
 126  * applications are not required to be MPI or any other programming model.
 127  * Thus, the host server cannot make any assumptions as to their required
 128  * support. The callback function is to be executed once all processes have
 129  * been started. An error in starting any application or process in this
 130  * request shall cause all applications and processes in the request to
 131  * be terminated, and an error returned to the originating caller */
 132 typedef int (*opal_pmix_server_spawn_fn_t)(opal_process_name_t *requestor,
 133                                            opal_list_t *job_info, opal_list_t *apps,
 134                                            opal_pmix_spawn_cbfunc_t cbfunc, void *cbdata);
 135 
 136 /* Record the specified processes as "connected". This means that the resource
 137  * manager should treat the failure of any process in the specified group as
 138  * a reportable event, and take appropriate action. The callback function is
 139  * to be called once all participating processes have called connect. Note that
 140  * a process can only engage in *one* connect operation involving the identical
 141  * set of procs at a time. However, a process *can* be simultaneously engaged
 142  * in multiple connect operations, each involving a different set of procs
 143  *
 144  *  The list of opal_value_t includes any directives from the user regarding
 145  * how the operation is to be executed (e.g., timeout limits).
 146  */
 147 typedef int (*opal_pmix_server_connect_fn_t)(opal_list_t *procs, opal_list_t *info,
 148                                              opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
 149 
 150 /* Disconnect a previously connected set of processes. An error should be returned
 151  * if the specified set of procs was not previously "connected". As above, a process
 152  * may be involved in multiple simultaneous disconnect operations. However, a process
 153  * is not allowed to reconnect to a set of ranges that has not fully completed
 154  * disconnect - i.e., you have to fully disconnect before you can reconnect to the
 155  * same group of processes.
 156  *
 157  * The list of opal_value_t includes any directives from the user regarding
 158  * how the operation is to be executed (e.g., timeout limits).
 159  */
 160 typedef int (*opal_pmix_server_disconnect_fn_t)(opal_list_t *procs, opal_list_t *info,
 161                                                 opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
 162 
 163 /* Register to receive notifications for the specified events. The resource
 164  * manager may have access to events beyond process failure. In cases where
 165  * the client application requests to be notified of such events, the request
 166  * will be passed to the PMIx server, which in turn shall pass the request to
 167  * the resource manager. The list of opal_value_t will provide the OPAL
 168  * error codes corresponding to the desired events */
 169  typedef int (*opal_pmix_server_register_events_fn_t)(opal_list_t *info,
 170                                                       opal_pmix_op_cbfunc_t cbfunc,
 171                                                       void *cbdata);
 172 
 173 /* Deregister from the specified events. The list of opal_value_t will provide the OPAL
 174  * error codes corresponding to the desired events */
 175  typedef int (*opal_pmix_server_deregister_events_fn_t)(opal_list_t *info,
 176                                                         opal_pmix_op_cbfunc_t cbfunc,
 177                                                         void *cbdata);
 178 
 179 /* Notify  the specified processes of an event generated either by
 180   * the PMIx server itself, or by one of its local clients. The RTE
 181   * is requested to pass the notification to each PMIx server that
 182   * hosts one or more of the specified processes */
 183 typedef int (*opal_pmix_server_notify_fn_t)(int code, opal_process_name_t *source,
 184                                             opal_list_t *info,
 185                                             opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
 186 
 187 /* Query the RTE for information - the list is composed of opal_pmix_query_t items */
 188 typedef int (*opal_pmix_server_query_fn_t)(opal_process_name_t *requestor,
 189                                            opal_list_t *queries,
 190                                            opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
 191 
 192 /* Register that a tool has connected to the server, and request
 193  * that the tool be assigned a jobid for further interactions.
 194  * The optional opal_value_t list can be used to pass qualifiers for
 195  * the connection request:
 196  *
 197  * (a) OPAL_PMIX_USERID - effective userid of the tool
 198  * (b) OPAL_PMIX_GRPID - effective groupid of the tool
 199  * (c) OPAL_PMIX_FWD_STDOUT - forward any stdout to this tool
 200  * (d) OPAL_PMIX_FWD_STDERR - forward any stderr to this tool
 201  * (e) OPAL_PMIX_FWD_STDIN - forward stdin from this tool to any
 202  *     processes spawned on its behalf
 203  */
 204 typedef void (*opal_pmix_server_tool_connection_fn_t)(opal_list_t *info,
 205                                                       opal_pmix_tool_connection_cbfunc_t cbfunc,
 206                                                       void *cbdata);
 207 
 208 /* Log data on behalf of the client */
 209 typedef void (*opal_pmix_server_log_fn_t)(opal_process_name_t *requestor,
 210                                           opal_list_t *info,
 211                                           opal_list_t *directives,
 212                                           opal_pmix_op_cbfunc_t cbfunc,
 213                                           void *cbdata);
 214 
 215 
 216 /* Callback function for incoming connection requests from
 217  * local clients */
 218 typedef void (*opal_pmix_connection_cbfunc_t)(int incoming_sd);
 219 
 220 /* Register a socket the host server can monitor for connection
 221  * requests, harvest them, and then call our internal callback
 222  * function for further processing. A listener thread is essential
 223  * to efficiently harvesting connection requests from large
 224  * numbers of local clients such as occur when running on large
 225  * SMPs. The host server listener is required to call accept
 226  * on the incoming connection request, and then passing the
 227  * resulting socket to the provided cbfunc. A NULL for this function
 228  * will cause the internal PMIx server to spawn its own listener
 229  * thread */
 230 typedef int (*opal_pmix_server_listener_fn_t)(int listening_sd,
 231                                               opal_pmix_connection_cbfunc_t cbfunc);
 232 
 233 /* Request allocation modifications on behalf of a client */
 234 typedef int (*opal_pmix_server_alloc_fn_t)(const opal_process_name_t *client,
 235                                            opal_pmix_alloc_directive_t directive,
 236                                            opal_list_t *data,
 237                                            opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
 238 
 239 /* Execute a job control action on behalf of a client */
 240 typedef int (*opal_pmix_server_job_control_fn_t)(const opal_process_name_t *requestor,
 241                                                  opal_list_t *targets, opal_list_t *directives,
 242                                                  opal_pmix_info_cbfunc_t cbfunc, void *cbdata);
 243 
 244 /* we do not provide a monitoring capability */
 245 
 246 /* Request forwarding of specified IO channels to the local PMIx server
 247  * for distribution to local clients */
 248 typedef int (*opal_pmix_server_iof_pull_fn_t)(opal_list_t *sources,
 249                                               opal_list_t *directives,
 250                                               opal_pmix_iof_channel_t channels,
 251                                               opal_pmix_op_cbfunc_t cbfunc, void *cbdata);
 252 
 253 /* Entry point for pushing forwarded IO to clients/tools */
 254 typedef int (*opal_pmix_server_iof_push_fn_t)(const opal_process_name_t *source,
 255                                               opal_pmix_iof_channel_t channel,
 256                                               unsigned char *data, size_t nbytes);
 257 
 258 typedef struct opal_pmix_server_module_1_0_0_t {
 259     opal_pmix_server_client_connected_fn_t      client_connected;
 260     opal_pmix_server_client_finalized_fn_t      client_finalized;
 261     opal_pmix_server_abort_fn_t                 abort;
 262     opal_pmix_server_fencenb_fn_t               fence_nb;
 263     opal_pmix_server_dmodex_req_fn_t            direct_modex;
 264     opal_pmix_server_publish_fn_t               publish;
 265     opal_pmix_server_lookup_fn_t                lookup;
 266     opal_pmix_server_unpublish_fn_t             unpublish;
 267     opal_pmix_server_spawn_fn_t                 spawn;
 268     opal_pmix_server_connect_fn_t               connect;
 269     opal_pmix_server_disconnect_fn_t            disconnect;
 270     opal_pmix_server_register_events_fn_t       register_events;
 271     opal_pmix_server_deregister_events_fn_t     deregister_events;
 272     opal_pmix_server_notify_fn_t                notify_event;
 273     opal_pmix_server_query_fn_t                 query;
 274     opal_pmix_server_tool_connection_fn_t       tool_connected;
 275     opal_pmix_server_log_fn_t                   log;
 276     opal_pmix_server_listener_fn_t              listener;
 277     opal_pmix_server_alloc_fn_t                 allocate;
 278     opal_pmix_server_job_control_fn_t           job_control;
 279     opal_pmix_server_iof_pull_fn_t              iof_pull;
 280     opal_pmix_server_iof_push_fn_t              iof_push;
 281 } opal_pmix_server_module_t;
 282 
 283 
 284 END_C_DECLS
 285 
 286 #endif

/* [<][>][^][v][top][bottom][index][help] */