1 /*
2 * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
3 * Copyright (c) 2016 Research Organization for Information Science
4 * and Technology (RIST). All rights reserved.
5 * $COPYRIGHT$
6 *
7 * Additional copyrights may follow
8 *
9 * $HEADER$
10 */
11
12 #ifndef OPAL_PMIX_TYPES_H
13 #define OPAL_PMIX_TYPES_H
14
15 #include "opal_config.h"
16
17 #include "opal/dss/dss_types.h"
18 #include "opal/util/proc.h"
19
20 BEGIN_C_DECLS
21
22 /* define a value for requests for job-level data
23 * where the info itself isn't associated with any
24 * specific rank, or when a request involves
25 * a rank that isn't known - e.g., when someone requests
26 * info thru one of the legacy interfaces where the rank
27 * is typically encoded into the key itself since there is
28 * no rank parameter in the API itself */
29 #define OPAL_PMIX_RANK_UNDEF UINT32_MAX
30 /* define a value to indicate that the user wants the
31 * data for the given key from every rank that posted
32 * that key */
33 #define OPAL_PMIX_RANK_WILDCARD UINT32_MAX-1
34
35 /* other special rank values will be used to define
36 * groups of ranks for use in collectives */
37 #define OPAL_PMIX_RANK_LOCAL_NODE UINT32_MAX-2 // all ranks on local node
38
39
40 /* define a set of "standard" attributes that can
41 * be queried. Implementations (and users) are free to extend as
42 * desired, so the get functions need to be capable
43 * of handling the "not found" condition. Note that these
44 * are attributes of the system and the job as opposed to
45 * values the application (or underlying MPI library)
46 * might choose to expose - i.e., they are values provided
47 * by the resource manager as opposed to the application. Thus,
48 * these keys are RESERVED */
49 #define OPAL_PMIX_ATTR_UNDEF NULL
50
51 #define OPAL_PMIX_SERVER_TOOL_SUPPORT "pmix.srvr.tool" // (bool) The host RM wants to declare itself as willing to
52 // accept tool connection requests
53 #define OPAL_PMIX_SERVER_REMOTE_CONNECTIONS "pmix.srvr.remote" // (bool) Allow connections from remote tools (do not use loopback device)
54 #define OPAL_PMIX_SERVER_SYSTEM_SUPPORT "pmix.srvr.sys" // (bool) The host RM wants to declare itself as being the local
55 // system server for PMIx connection requests
56 #define OPAL_PMIX_SERVER_TMPDIR "pmix.srvr.tmpdir" // (char*) temp directory where PMIx server will place
57 // client rendezvous points
58 #define OPAL_PMIX_SYSTEM_TMPDIR "pmix.sys.tmpdir" // (char*) temp directory where PMIx server will place
59 // tool rendezvous points
60 #define OPAL_PMIX_REGISTER_NODATA "pmix.reg.nodata" // (bool) Registration is for nspace only, do not copy job data
61 #define OPAL_PMIX_SERVER_ENABLE_MONITORING "pmix.srv.monitor" // (bool) Enable PMIx internal monitoring by server
62 #define OPAL_PMIX_SERVER_NSPACE "pmix.srv.nspace" // (char*) Name of the nspace to use for this server
63 #define OPAL_PMIX_SERVER_RANK "pmix.srv.rank" // (uint32_t) Rank of this server
64 #define OPAL_PMIX_SERVER_GATEWAY "pmix.srv.gway" // (bool) Server is acting as a gateway for PMIx requests
65 // that cannot be serviced on backend nodes
66 // (e.g., logging to email)
67
68 /* tool-related attributes */
69 #define OPAL_PMIX_TOOL_NSPACE "pmix.tool.nspace" // (char*) Name of the nspace to use for this tool
70 #define OPAL_PMIX_TOOL_RANK "pmix.tool.rank" // (uint32_t) Rank of this tool
71 #define OPAL_PMIX_SERVER_PIDINFO "pmix.srvr.pidinfo" // (pid_t) pid of the target server for a tool
72 #define OPAL_PMIX_CONNECT_TO_SYSTEM "pmix.cnct.sys" // (bool) The requestor requires that a connection be made only to
73 // a local system-level PMIx server
74 #define OPAL_PMIX_CONNECT_SYSTEM_FIRST "pmix.cnct.sys.first" // (bool) Preferentially look for a system-level PMIx server first
75 #define OPAL_PMIX_SERVER_URI "pmix.srvr.uri" // (char*) URI of server to be contacted
76 #define OPAL_PMIX_SERVER_HOSTNAME "pmix.srvr.host" // (char*) node where target server is located
77 #define OPAL_PMIX_CONNECT_MAX_RETRIES "pmix.tool.mretries" // (uint32_t) maximum number of times to try to connect to server
78 #define OPAL_PMIX_CONNECT_RETRY_DELAY "pmix.tool.retry" // (uint32_t) time in seconds between connection attempts
79 #define OPAL_PMIX_TOOL_DO_NOT_CONNECT "pmix.tool.nocon" // (bool) the tool wants to use internal PMIx support, but does
80 // not want to connect to a PMIx server
81 #define OPAL_PMIX_RECONNECT_SERVER "pmix.cnct.recon" // (bool) tool is requesting to change server connections
82 #define OPAL_PMIX_LAUNCHER "pmix.tool.launcher" // (bool) tool is a launcher and needs rendezvous files created
83
84
85 /* identification attributes */
86 #define OPAL_PMIX_USERID "pmix.euid" // (uint32_t) effective user id
87 #define OPAL_PMIX_GRPID "pmix.egid" // (uint32_t) effective group id
88 #define OPAL_PMIX_DSTPATH "pmix.dstpath" // (char*) path to dstore files
89 #define OPAL_PMIX_VERSION_INFO "pmix.version" // (char*) PMIx version of contactor
90 #define OPAL_PMIX_REQUESTOR_IS_TOOL "pmix.req.tool" // (bool) requesting process is a tool
91 #define OPAL_PMIX_REQUESTOR_IS_CLIENT "pmix.req.client" // (bool) requesting process is a client process
92 #define OPAL_PMIX_PSET_NAME "pmix.pset.nm" // (char*) user-assigned name for the process
93 // set containing the given process
94
95 /* model attributes */
96 #define OPAL_PMIX_PROGRAMMING_MODEL "pmix.pgm.model" // (char*) programming model being initialized (e.g., "MPI" or "OpenMP")
97 #define OPAL_PMIX_MODEL_LIBRARY_NAME "pmix.mdl.name" // (char*) programming model implementation ID (e.g., "OpenMPI" or "MPICH")
98 #define OPAL_PMIX_MODEL_LIBRARY_VERSION "pmix.mld.vrs" // (char*) programming model version string (e.g., "2.1.1")
99 #define OPAL_PMIX_THREADING_MODEL "pmix.threads" // (char*) threading model used (e.g., "pthreads")
100 #define OPAL_PMIX_MODEL_NUM_THREADS "pmix.mdl.nthrds" // (uint64_t) number of active threads being used by the model
101 #define OPAL_PMIX_MODEL_NUM_CPUS "pmix.mdl.ncpu" // (uint64_t) number of cpus being used by the model
102 #define OPAL_PMIX_MODEL_CPU_TYPE "pmix.mdl.cputype" // (char*) granularity - "hwthread", "core", etc.
103 #define OPAL_PMIX_MODEL_PHASE_NAME "pmix.mdl.phase" // (char*) user-assigned name for a phase in the application execution - e.g.,
104 // "cfd reduction"
105 #define OPAL_PMIX_MODEL_PHASE_TYPE "pmix.mdl.ptype" // (char*) type of phase being executed - e.g., "matrix multiply"
106 #define OPAL_PMIX_MODEL_AFFINITY_POLICY "pmix.mdl.tap" // (char*) thread affinity policy - e.g.:
107 // "master" (thread co-located with master thread),
108 // "close" (thread located on cpu close to master thread)
109 // "spread" (threads load-balanced across available cpus)
110
111
112 /* attributes for the USOCK rendezvous socket */
113 #define OPAL_PMIX_USOCK_DISABLE "pmix.usock.disable" // (bool) disable legacy usock support
114 #define OPAL_PMIX_SOCKET_MODE "pmix.sockmode" // (uint32_t) POSIX mode_t (9 bits valid)
115 #define OPAL_PMIX_SINGLE_LISTENER "pmix.sing.listnr" // (bool) use only one rendezvous socket, letting priorities and/or
116 // MCA param select the active transport
117
118 /* attributes for TCP connections */
119 #define OPAL_PMIX_TCP_URI "pmix.tcp.uri" // (char*) URI of server to connect to
120 #define OPAL_PMIX_TCP_IF_INCLUDE "pmix.tcp.ifinclude" // (char*) comma-delimited list of devices and/or CIDR notation
121 #define OPAL_PMIX_TCP_IF_EXCLUDE "pmix.tcp.ifexclude" // (char*) comma-delimited list of devices and/or CIDR notation
122 #define OPAL_PMIX_TCP_IPV4_PORT "pmix.tcp.ipv4" // (int) IPv4 port to be used
123 #define OPAL_PMIX_TCP_IPV6_PORT "pmix.tcp.ipv6" // (int) IPv6 port to be used
124 #define OPAL_PMIX_TCP_DISABLE_IPV4 "pmix.tcp.disipv4" // (bool) true to disable IPv4 family
125 #define OPAL_PMIX_TCP_DISABLE_IPV6 "pmix.tcp.disipv6" // (bool) true to disable IPv6 family
126
127
128 /* general proc-level attributes */
129 #define OPAL_PMIX_CPUSET "pmix.cpuset" // (char*) hwloc bitmap applied to proc upon launch
130 #define OPAL_PMIX_CREDENTIAL "pmix.cred" // (char*) security credential assigned to proc
131 #define OPAL_PMIX_SPAWNED "pmix.spawned" // (bool) true if this proc resulted from a call to PMIx_Spawn
132 #define OPAL_PMIX_ARCH "opal.pmix.arch" // (uint32_t) datatype architecture flag
133 // not set at job startup, so cannot have the pmix prefix
134
135 /* scratch directory locations for use by applications */
136 #define OPAL_PMIX_TMPDIR "pmix.tmpdir" // (char*) top-level tmp dir assigned to session
137 #define OPAL_PMIX_NSDIR "pmix.nsdir" // (char*) sub-tmpdir assigned to namespace
138 #define OPAL_PMIX_PROCDIR "pmix.pdir" // (char*) sub-nsdir assigned to proc
139 #define OPAL_PMIX_TDIR_RMCLEAN "pmix.tdir.rmclean" // (bool) Resource Manager will clean session directories
140
141
142 /* information about relative ranks as assigned by the RM */
143 #define OPAL_PMIX_CLUSTER_ID "pmix.clid" // (char*) a string name for the cluster this proc is executing on
144 #define OPAL_PMIX_PROCID "pmix.procid" // (opal_process_name_t) process identifier
145 #define OPAL_PMIX_NSPACE "pmix.nspace" // (char*) nspace of a job
146 #define OPAL_PMIX_JOBID "pmix.jobid" // (uint32_t) jobid assigned by scheduler
147 #define OPAL_PMIX_APPNUM "pmix.appnum" // (uint32_t) app number within the job
148 #define OPAL_PMIX_RANK "pmix.rank" // (uint32_t) process rank within the job
149 #define OPAL_PMIX_GLOBAL_RANK "pmix.grank" // (uint32_t) rank spanning across all jobs in this session
150 #define OPAL_PMIX_UNIV_RANK "pmix.grank" // (uint32_t) synonym for global_rank
151 #define OPAL_PMIX_APP_RANK "pmix.apprank" // (uint32_t) rank within this app
152 #define OPAL_PMIX_NPROC_OFFSET "pmix.offset" // (uint32_t) starting global rank of this job
153 #define OPAL_PMIX_LOCAL_RANK "pmix.lrank" // (uint16_t) rank on this node within this job
154 #define OPAL_PMIX_NODE_RANK "pmix.nrank" // (uint16_t) rank on this node spanning all jobs
155 #define OPAL_PMIX_LOCALLDR "pmix.lldr" // (uint64_t) opal_identifier of lowest rank on this node within this job
156 #define OPAL_PMIX_APPLDR "pmix.aldr" // (uint32_t) lowest rank in this app within this job
157 #define OPAL_PMIX_PROC_PID "pmix.ppid" // (pid_t) pid of specified proc
158 #define OPAL_PMIX_SESSION_ID "pmix.session.id" // (uint32_t) session identifier
159 #define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for the specified nspace
160 #define OPAL_PMIX_ALLOCATED_NODELIST "pmix.alist" // (char*) comma-delimited list of all nodes in this allocation regardless of
161 // whether or not they currently host procs.
162 #define OPAL_PMIX_HOSTNAME "pmix.hname" // (char*) name of the host the specified proc is on
163 #define OPAL_PMIX_NODEID "pmix.nodeid" // (uint32_t) node identifier
164 #define OPAL_PMIX_LOCAL_PEERS "pmix.lpeers" // (char*) comma-delimited string of ranks on this node within the specified nspace
165 #define OPAL_PMIX_LOCAL_PROCS "pmix.lprocs" // (opal_list_t*) list of opal_namelist_t of procs on the specified node
166 #define OPAL_PMIX_LOCAL_CPUSETS "pmix.lcpus" // (char*) colon-delimited cpusets of local peers within the specified nspace
167 #define OPAL_PMIX_PROC_URI "opal.puri" // (char*) URI containing contact info for proc - NOTE: this is published by procs and
168 // thus cannot be prefixed with "pmix"
169 #define OPAL_PMIX_LOCALITY "pmix.loc" // (uint16_t) relative locality of two procs
170 #define OPAL_PMIX_PARENT_ID "pmix.parent" // (pmix_proc_t*) identifier of the process that called PMIx_Spawn
171 // to launch this proc's application
172 #define OPAL_PMIX_EXIT_CODE "pmix.exit.code" // (int) exit code returned when proc terminated
173
174
175 /* Memory info */
176 #define OPAL_PMIX_AVAIL_PHYS_MEMORY "pmix.pmem" // (uint64_t) total available physical memory on this node
177 #define OPAL_PMIX_DAEMON_MEMORY "pmix.dmn.mem" // (float) Mbytes of memory currently used by daemon
178 #define OPAL_PMIX_CLIENT_AVG_MEMORY "pmix.cl.mem.avg" // (float) Average Mbytes of memory used by client processes
179
180
181 /* size info */
182 #define OPAL_PMIX_UNIV_SIZE "pmix.univ.size" // (uint32_t) #procs in this nspace
183 #define OPAL_PMIX_JOB_SIZE "pmix.job.size" // (uint32_t) #procs in this job
184 #define OPAL_PMIX_JOB_NUM_APPS "pmix.job.napps" // (uint32_t) #apps in this job
185 #define OPAL_PMIX_APP_SIZE "pmix.app.size" // (uint32_t) #procs in this app
186 #define OPAL_PMIX_LOCAL_SIZE "pmix.local.size" // (uint32_t) #procs in this job on this node
187 #define OPAL_PMIX_NODE_SIZE "pmix.node.size" // (uint32_t) #procs across all jobs on this node
188 #define OPAL_PMIX_MAX_PROCS "pmix.max.size" // (uint32_t) max #procs for this job
189 #define OPAL_PMIX_NUM_NODES "pmix.num.nodes" // (uint32_t) #nodes in this nspace
190
191
192 /* topology info */
193 #define OPAL_PMIX_NET_TOPO "pmix.ntopo" // (char*) xml-representation of network topology
194 #define OPAL_PMIX_LOCAL_TOPO "pmix.ltopo" // (char*) xml-representation of local node topology
195 #define OPAL_PMIX_NODE_LIST "pmix.nlist" // (char*) comma-delimited list of nodes running procs for this job
196 #define OPAL_PMIX_TOPOLOGY "pmix.topo" // (hwloc_topology_t) pointer to the PMIx client's internal topology object
197 #define OPAL_PMIX_TOPOLOGY_XML "pmix.topo.xml" // (char*) XML-based description of topology
198 #define OPAL_PMIX_TOPOLOGY_FILE "pmix.topo.file" // (char*) full path to file containing XML topology description
199 #define OPAL_PMIX_TOPOLOGY_SIGNATURE "pmix.toposig" // (char*) topology signature string
200 #define OPAL_PMIX_LOCALITY_STRING "pmix.locstr" // (char*) string describing a proc's location
201 #define OPAL_PMIX_HWLOC_SHMEM_ADDR "pmix.hwlocaddr" // (size_t) address of HWLOC shared memory segment
202 #define OPAL_PMIX_HWLOC_SHMEM_SIZE "pmix.hwlocsize" // (size_t) size of HWLOC shared memory segment
203 #define OPAL_PMIX_HWLOC_SHMEM_FILE "pmix.hwlocfile" // (char*) path to HWLOC shared memory file
204 #define OPAL_PMIX_HWLOC_XML_V1 "pmix.hwlocxml1" // (char*) XML representation of local topology using HWLOC v1.x format
205 #define OPAL_PMIX_HWLOC_XML_V2 "pmix.hwlocxml2" // (char*) XML representation of local topology using HWLOC v2.x format
206 #define OPAL_PMIX_HWLOC_SHARE_TOPO "pmix.hwlocsh" // (bool) Share the HWLOC topology via shared memory
207 #define OPAL_PMIX_HWLOC_HOLE_KIND "pmix.hwlocholek" // (char*) Kind of VM "hole" HWLOC should use for shared memory
208
209
210 /* request-related info */
211 #define OPAL_PMIX_COLLECT_DATA "pmix.collect" // (bool) collect data and return it at the end of the operation
212 #define OPAL_PMIX_TIMEOUT "pmix.timeout" // (int) time in sec before specified operation should time out
213 #define OPAL_PMIX_IMMEDIATE "pmix.immediate" // (bool) specified operation should immediately return an error if requested
214 // data cannot be found - do not request it from the host RM
215 #define OPAL_PMIX_WAIT "pmix.wait" // (int) caller requests that the server wait until at least the specified
216 // #values are found (0 => all and is the default)
217 #define OPAL_PMIX_COLLECTIVE_ALGO "pmix.calgo" // (char*) comma-delimited list of algorithms to use for collective
218 #define OPAL_PMIX_COLLECTIVE_ALGO_REQD "pmix.calreqd" // (bool) if true, indicates that the requested choice of algo is mandatory
219 #define OPAL_PMIX_NOTIFY_COMPLETION "pmix.notecomp" // (bool) notify parent process upon termination of child job
220 #define OPAL_PMIX_RANGE "pmix.range" // (int) opal_pmix_data_range_t value for calls to publish/lookup/unpublish
221 #define OPAL_PMIX_PERSISTENCE "pmix.persist" // (int) opal_pmix_persistence_t value for calls to publish
222 #define OPAL_PMIX_DATA_SCOPE "pmix.scope" // (pmix_scope_t) scope of the data to be found in a PMIx_Get call
223 #define OPAL_PMIX_OPTIONAL "pmix.optional" // (bool) look only in the immediate data store for the requested value - do
224 // not request data from the server if not found
225 #define OPAL_PMIX_EMBED_BARRIER "pmix.embed.barrier" // (bool) execute a blocking fence operation before executing the
226 // specified operation
227 #define OPAL_PMIX_JOB_TERM_STATUS "pmix.job.term.status" // (int) status returned upon job termination
228 #define OPAL_PMIX_PROC_STATE_STATUS "pmix.proc.state" // (int) process state
229
230
231
232 /* attribute used by host server to pass data to the server convenience library - the
233 * data will then be parsed and provided to the local clients */
234 #define OPAL_PMIX_PROC_DATA "pmix.pdata" // (pmix_value_array_t) starts with rank, then contains more data
235 #define OPAL_PMIX_NODE_MAP "pmix.nmap" // (char*) regex of nodes containing procs for this job
236 #define OPAL_PMIX_PROC_MAP "pmix.pmap" // (char*) regex describing procs on each node within this job
237 #define OPAL_PMIX_APP_MAP_TYPE "pmix.apmap.type" // (char*) type of mapping used to layout the application (e.g., cyclic)
238 #define OPAL_PMIX_APP_MAP_REGEX "pmix.apmap.regex" // (char*) regex describing the result of the mapping
239
240
241 /* attributes used internally to communicate data from the server to the client */
242 #define OPAL_PMIX_PROC_BLOB "pmix.pblob" // (pmix_byte_object_t) packed blob of process data
243 #define OPAL_PMIX_MAP_BLOB "pmix.mblob" // (pmix_byte_object_t) packed blob of process location
244
245
246 /* error handler registration and notification info keys */
247 #define OPAL_PMIX_EVENT_HDLR_NAME "pmix.evname" // (char*) string name identifying this handler
248 #define OPAL_PMIX_EVENT_JOB_LEVEL "pmix.evjob" // (bool) register for job-specific events only
249 #define OPAL_PMIX_EVENT_ENVIRO_LEVEL "pmix.evenv" // (bool) register for environment events only
250 #define OPAL_PMIX_EVENT_HDLR_FIRST "pmix.evfirst" // (bool) invoke this event handler before any other handlers
251 #define OPAL_PMIX_EVENT_HDLR_LAST "pmix.evlast" // (bool) invoke this event handler after all other handlers have been called
252 #define OPAL_PMIX_EVENT_HDLR_FIRST_IN_CATEGORY "pmix.evfirstcat" // (bool) invoke this event handler before any other handlers in this category
253 #define OPAL_PMIX_EVENT_HDLR_LAST_IN_CATEGORY "pmix.evlastcat" // (bool) invoke this event handler after all other handlers in this category have been called
254 #define OPAL_PMIX_EVENT_HDLR_BEFORE "pmix.evbefore" // (char*) put this event handler immediately before the one specified in the (char*) value
255 #define OPAL_PMIX_EVENT_HDLR_AFTER "pmix.evafter" // (char*) put this event handler immediately after the one specified in the (char*) value
256 #define OPAL_PMIX_EVENT_HDLR_PREPEND "pmix.evprepend" // (bool) prepend this handler to the precedence list within its category
257 #define OPAL_PMIX_EVENT_HDLR_APPEND "pmix.evappend" // (bool) append this handler to the precedence list within its category
258 #define OPAL_PMIX_EVENT_CUSTOM_RANGE "pmix.evrange" // (pmix_proc_t*) array of pmix_proc_t defining range of event notification
259 #define OPAL_PMIX_EVENT_AFFECTED_PROC "pmix.evproc" // (pmix_proc_t) single proc that was affected
260 #define OPAL_PMIX_EVENT_AFFECTED_PROCS "pmix.evaffected" // (pmix_proc_t*) array of pmix_proc_t defining affected procs
261 #define OPAL_PMIX_EVENT_NON_DEFAULT "pmix.evnondef" // (bool) event is not to be delivered to default event handlers
262 #define OPAL_PMIX_EVENT_RETURN_OBJECT "pmix.evobject" // (void*) object to be returned whenever the registered cbfunc is invoked
263 // NOTE: the object will _only_ be returned to the process that
264 // registered it
265 #define OPAL_PMIX_EVENT_DO_NOT_CACHE "pmix.evnocache" // (bool) instruct the PMIx server not to cache the event
266 #define OPAL_PMIX_EVENT_SILENT_TERMINATION "pmix.evsilentterm" // (bool) do not generate an event when this job normally terminates
267
268
269 /* fault tolerance-related events */
270 #define OPAL_PMIX_EVENT_TERMINATE_SESSION "pmix.evterm.sess" // (bool) RM intends to terminate session
271 #define OPAL_PMIX_EVENT_TERMINATE_JOB "pmix.evterm.job" // (bool) RM intends to terminate this job
272 #define OPAL_PMIX_EVENT_TERMINATE_NODE "pmix.evterm.node" // (bool) RM intends to terminate all procs on this node
273 #define OPAL_PMIX_EVENT_TERMINATE_PROC "pmix.evterm.proc" // (bool) RM intends to terminate just this process
274 #define OPAL_PMIX_EVENT_ACTION_TIMEOUT "pmix.evtimeout" // (int) time in sec before RM will execute error response
275 #define OPAL_PMIX_EVENT_WANT_TERMINATION "pmix.evterm" // (bool) indicates that the handler has determined that the
276 // application should be terminated
277
278
279 /* attributes used to describe "spawn" attributes */
280 #define OPAL_PMIX_PERSONALITY "pmix.pers" // (char*) name of personality to use
281 #define OPAL_PMIX_HOST "pmix.host" // (char*) comma-delimited list of hosts to use for spawned procs
282 #define OPAL_PMIX_HOSTFILE "pmix.hostfile" // (char*) hostfile to use for spawned procs
283 #define OPAL_PMIX_ADD_HOST "pmix.addhost" // (char*) comma-delimited list of hosts to add to allocation
284 #define OPAL_PMIX_ADD_HOSTFILE "pmix.addhostfile" // (char*) hostfile to add to existing allocation
285 #define OPAL_PMIX_PREFIX "pmix.prefix" // (char*) prefix to use for starting spawned procs
286 #define OPAL_PMIX_WDIR "pmix.wdir" // (char*) working directory for spawned procs
287 #define OPAL_PMIX_MAPPER "pmix.mapper" // (char*) mapper to use for placing spawned procs
288 #define OPAL_PMIX_DISPLAY_MAP "pmix.dispmap" // (bool) display process map upon spawn
289 #define OPAL_PMIX_PPR "pmix.ppr" // (char*) #procs to spawn on each identified resource
290 #define OPAL_PMIX_MAPBY "pmix.mapby" // (char*) mapping policy
291 #define OPAL_PMIX_RANKBY "pmix.rankby" // (char*) ranking policy
292 #define OPAL_PMIX_BINDTO "pmix.bindto" // (char*) binding policy
293 #define OPAL_PMIX_PRELOAD_BIN "pmix.preloadbin" // (bool) preload binaries
294 #define OPAL_PMIX_PRELOAD_FILES "pmix.preloadfiles" // (char*) comma-delimited list of files to pre-position
295 #define OPAL_PMIX_NON_PMI "pmix.nonpmi" // (bool) spawned procs will not call PMIx_Init
296 #define OPAL_PMIX_STDIN_TGT "pmix.stdin" // (uint32_t) spawned proc rank that is to receive stdin
297 // (PMIX_RANK_WILDCARD = all in given nspace)
298 #define OPAL_PMIX_DEBUGGER_DAEMONS "pmix.debugger" // (bool) spawned app consists of debugger daemons
299 #define OPAL_PMIX_COSPAWN_APP "pmix.cospawn" // (bool) designated app is to be spawned as a disconnected
300 // job - i.e., not part of the "comm_world" of the job
301 #define OPAL_PMIX_SET_SESSION_CWD "pmix.ssncwd" // (bool) set the application's current working directory to
302 // the session working directory assigned by the RM
303 #define OPAL_PMIX_TAG_OUTPUT "pmix.tagout" // (bool) tag application output with the ID of the source
304 #define OPAL_PMIX_TIMESTAMP_OUTPUT "pmix.tsout" // (bool) timestamp output from applications
305 #define OPAL_PMIX_MERGE_STDERR_STDOUT "pmix.mergeerrout" // (bool) merge stdout and stderr streams from application procs
306 #define OPAL_PMIX_OUTPUT_TO_FILE "pmix.outfile" // (char*) output application output to given file
307 #define OPAL_PMIX_INDEX_ARGV "pmix.indxargv" // (bool) mark the argv with the rank of the proc
308 #define OPAL_PMIX_CPUS_PER_PROC "pmix.cpuperproc" // (uint32_t) #cpus to assign to each rank
309 #define OPAL_PMIX_NO_PROCS_ON_HEAD "pmix.nolocal" // (bool) do not place procs on the head node
310 #define OPAL_PMIX_NO_OVERSUBSCRIBE "pmix.noover" // (bool) do not oversubscribe the cpus
311 #define OPAL_PMIX_REPORT_BINDINGS "pmix.repbind" // (bool) report bindings of the individual procs
312 #define OPAL_PMIX_CPU_LIST "pmix.cpulist" // (char*) list of cpus to use for this job
313 #define OPAL_PMIX_JOB_RECOVERABLE "pmix.recover" // (bool) application supports recoverable operations
314 #define OPAL_PMIX_JOB_CONTINUOUS "pmix.continuous" // (bool) application is continuous, all failed procs should
315 // be immediately restarted
316 #define OPAL_PMIX_MAX_RESTARTS "pmix.maxrestarts" // (uint32_t) max number of times to restart a job
317 #define OPAL_PMIX_FWD_STDIN "pmix.fwd.stdin" // (bool) forward the stdin from this process to the target processes
318 #define OPAL_PMIX_FWD_STDOUT "pmix.fwd.stdout" // (bool) forward stdout from the spawned processes to this process (typically used by a tool)
319 #define OPAL_PMIX_FWD_STDERR "pmix.fwd.stderr" // (bool) forward stderr from the spawned processes to this process (typically used by a tool)
320 #define OPAL_PMIX_FWD_STDDIAG "pmix.fwd.stddiag" // (bool) if a diagnostic channel exists, forward any output on it
321 // from the spawned processes to this process (typically used by a tool)
322
323 /* connect attributes */
324 #define OPAL_PMIX_CONNECT_NOTIFY_EACH "pmix.cnct.each" // (bool) notify the other participants of the connection by event
325 // each time a process connects
326 #define OPAL_PMIX_CONNECT_NOTIFY_REQ "pmix.cnct.req" // (bool) notify all other participants that they are requested to
327 // connect
328 #define OPAL_PMIX_CONNECT_OPTIONAL "pmix.cnt.opt" // (bool) participation is optional - do not return error if procs
329 // terminate without having connected
330 #define OPAL_PMIX_CONNECT_XCHG_ONLY "pmix.cnt.xchg" // (bool) provide participants with job-level info for all participating
331 // nspaces, but do not assign a new nspace or rank
332 #define OPAL_PMIX_CONNECT_ID "pmix.cnt.id" // (char*) an application-provided string identifier for a PMIx_Connect operation.
333
334
335
336 /* environmental variable operation attributes */
337 #define OPAL_PMIX_SET_ENVAR "pmix.envar.set" // (pmix_envar_t*) set the envar to the given value,
338 // overwriting any pre-existing one
339 #define OPAL_PMIX_ADD_ENVAR "pmix.envar.add" // (pmix_envar_t*) add envar, but do not overwrite any existing one
340 #define OPAL_PMIX_UNSET_ENVAR "pmix.envar.unset" // (char*) unset the envar, if present
341 #define OPAL_PMIX_PREPEND_ENVAR "pmix.envar.prepnd" // (pmix_envar_t*) prepend the given value to the
342 // specified envar using the separator
343 // character, creating the envar if it doesn't already exist
344 #define OPAL_PMIX_APPEND_ENVAR "pmix.envar.appnd" // (pmix_envar_t*) append the given value to the specified
345 // envar using the separator character,
346 // creating the envar if it doesn't already exist
347
348 /* query attributes */
349 #define OPAL_PMIX_QUERY_NAMESPACES "pmix.qry.ns" // (char*) request a comma-delimited list of active nspaces
350 #define OPAL_PMIX_QUERY_JOB_STATUS "pmix.qry.jst" // (pmix_status_t) status of a specified currently executing job
351 #define OPAL_PMIX_QUERY_QUEUE_LIST "pmix.qry.qlst" // (char*) request a comma-delimited list of scheduler queues
352 #define OPAL_PMIX_QUERY_QUEUE_STATUS "pmix.qry.qst" // (TBD) status of a specified scheduler queue
353 #define OPAL_PMIX_QUERY_PROC_TABLE "pmix.qry.ptable" // (char*) input nspace of job whose info is being requested
354 // returns (pmix_data_array_t) an array of pmix_proc_info_t
355 #define OPAL_PMIX_QUERY_LOCAL_PROC_TABLE "pmix.qry.lptable" // (char*) input nspace of job whose info is being requested
356 // returns (pmix_data_array_t) an array of pmix_proc_info_t for
357 // procs in job on same node
358 #define OPAL_PMIX_QUERY_AUTHORIZATIONS "pmix.qry.auths" // return operations tool is authorized to perform"
359 #define OPAL_PMIX_QUERY_SPAWN_SUPPORT "pmix.qry.spawn" // return a comma-delimited list of supported spawn attributes
360 #define OPAL_PMIX_QUERY_DEBUG_SUPPORT "pmix.qry.debug" // return a comma-delimited list of supported debug attributes
361 #define OPAL_PMIX_QUERY_MEMORY_USAGE "pmix.qry.mem" // return info on memory usage for the procs indicated in the qualifiers
362 #define OPAL_PMIX_QUERY_LOCAL_ONLY "pmix.qry.local" // constrain the query to local information only
363 #define OPAL_PMIX_QUERY_REPORT_AVG "pmix.qry.avg" // report average values
364 #define OPAL_PMIX_QUERY_REPORT_MINMAX "pmix.qry.minmax" // report minimum and maximum value
365 #define OPAL_PMIX_QUERY_ALLOC_STATUS "pmix.query.alloc" // (char*) string identifier of the allocation whose status
366 // is being requested
367 #define OPAL_PMIX_TIME_REMAINING "pmix.time.remaining" // (char*) query number of seconds (uint32_t) remaining in allocation
368 // for the specified nspace
369
370
371 /* log attributes */
372 #define OPAL_PMIX_LOG_SOURCE "pmix.log.source" // (pmix_proc_t*) ID of source of the log request
373 #define OPAL_PMIX_LOG_STDERR "pmix.log.stderr" // (char*) log string to stderr
374 #define OPAL_PMIX_LOG_STDOUT "pmix.log.stdout" // (char*) log string to stdout
375 #define OPAL_PMIX_LOG_SYSLOG "pmix.log.syslog" // (char*) log data to syslog - defaults to ERROR priority unless
376 // to global syslog if available, otherwise to local syslog
377 #define OPAL_PMIX_LOG_LOCAL_SYSLOG "pmix.log.lsys" // (char*) log msg to local syslog - defaults to ERROR priority
378 #define OPAL_PMIX_LOG_GLOBAL_SYSLOG "pmix.log.gsys" // (char*) forward data to system "master" and log msg to that syslog
379 #define OPAL_PMIX_LOG_SYSLOG_PRI "pmix.log.syspri" // (int) syslog priority level
380
381 #define OPAL_PMIX_LOG_TIMESTAMP "pmix.log.tstmp" // (time_t) timestamp for log report
382 #define OPAL_PMIX_LOG_GENERATE_TIMESTAMP "pmix.log.gtstmp" // (bool) generate timestamp for log
383 #define OPAL_PMIX_LOG_TAG_OUTPUT "pmix.log.tag" // (bool) label the output stream with the channel name (e.g., "stdout")
384 #define OPAL_PMIX_LOG_TIMESTAMP_OUTPUT "pmix.log.tsout" // (bool) print timestamp in output string
385 #define OPAL_PMIX_LOG_XML_OUTPUT "pmix.log.xml" // (bool) print the output stream in xml format
386 #define OPAL_PMIX_LOG_ONCE "pmix.log.once" // (bool) only log this once with whichever channel can first support it
387 #define OPAL_PMIX_LOG_MSG "pmix.log.msg" // (pmix_byte_object_t) message blob to be sent somewhere
388
389 #define OPAL_PMIX_LOG_EMAIL "pmix.log.email" // (pmix_data_array_t*) log via email based on array of pmix_info_t
390 // containing directives
391 #define OPAL_PMIX_LOG_EMAIL_ADDR "pmix.log.emaddr" // (char*) comma-delimited list of email addresses that are to recv msg
392 #define OPAL_PMIX_LOG_EMAIL_SENDER_ADDR "pmix.log.emfaddr" // (char*) return email address of sender
393 #define OPAL_PMIX_LOG_EMAIL_SUBJECT "pmix.log.emsub" // (char*) subject line for email
394 #define OPAL_PMIX_LOG_EMAIL_MSG "pmix.log.emmsg" // (char*) msg to be included in email
395 #define OPAL_PMIX_LOG_EMAIL_SERVER "pmix.log.esrvr" // (char*) hostname (or IP addr) of estmp server
396 #define OPAL_PMIX_LOG_EMAIL_SRVR_PORT "pmix.log.esrvrprt" // (int32_t) port the email server is listening to
397
398 #define OPAL_PMIX_LOG_GLOBAL_DATASTORE "pmix.log.gstore" // (bool)
399 #define OPAL_PMIX_LOG_JOB_RECORD "pmix.log.jrec" // (bool) log the provided information to the RM's job record
400
401
402
403 /* debugger attributes */
404 #define OPAL_PMIX_DEBUG_STOP_ON_EXEC "pmix.dbg.exec" // (bool) job is being spawned under debugger - instruct it to pause on start
405 #define OPAL_PMIX_DEBUG_STOP_IN_INIT "pmix.dbg.init" // (bool) instruct job to stop during PMIx init
406 #define OPAL_PMIX_DEBUG_WAIT_FOR_NOTIFY "pmix.dbg.notify" // (bool) block at desired point until receiving debugger release notification
407 #define OPAL_PMIX_DEBUG_JOB "pmix.dbg.job" // (char*) nspace of the job to be debugged - Note
408 // that id's, pids, and other info on the procs is available
409 // via a query for the nspace's local or global proctable
410 #define OPAL_PMIX_DEBUG_WAITING_FOR_NOTIFY "pmix.dbg.waiting" // (bool) job to be debugged is waiting for a release
411 #define OPAL_PMIX_DEBUG_JOB_DIRECTIVES "pmix.dbg.jdirs" // (opal_list_t*) list of job-level directives
412 #define OPAL_PMIX_DEBUG_APP_DIRECTIVES "pmix.dbg.adirs" // (opal_list_t*) list of app-level directives
413
414
415 /* Resource Manager identification */
416 #define OPAL_PMIX_RM_NAME "pmix.rm.name" // (char*) string name of the resource manager
417 #define OPAL_PMIX_RM_VERSION "pmix.rm.version" // (char*) RM version string
418
419
420 /* attributes relating to allocations */
421 #define OPAL_PMIX_ALLOC_ID "pmix.alloc.id" // (char*) provide a string identifier for this allocation request
422 // which can later be used to query status of the request
423 #define OPAL_PMIX_ALLOC_NUM_NODES "pmix.alloc.nnodes" // (uint64_t) number of nodes
424 #define OPAL_PMIX_ALLOC_NODE_LIST "pmix.alloc.nlist" // (char*) regex of specific nodes
425 #define OPAL_PMIX_ALLOC_NUM_CPUS "pmix.alloc.ncpus" // (uint64_t) number of cpus
426 #define OPAL_PMIX_ALLOC_NUM_CPU_LIST "pmix.alloc.ncpulist" // (char*) regex of #cpus for each node
427 #define OPAL_PMIX_ALLOC_CPU_LIST "pmix.alloc.cpulist" // (char*) regex of specific cpus indicating the cpus involved.
428 #define OPAL_PMIX_ALLOC_MEM_SIZE "pmix.alloc.msize" // (float) number of Mbytes
429 #define OPAL_PMIX_ALLOC_NETWORK "pmix.alloc.net" // (pmix_data_array_t*) Array of pmix_info_t describing
430 // network resource request. This must include at least:
431 // * PMIX_ALLOC_NETWORK_ID
432 // * PMIX_ALLOC_NETWORK_TYPE
433 // * PMIX_ALLOC_NETWORK_ENDPTS
434 // plus whatever other descriptors are desired
435 #define OPAL_PMIX_ALLOC_NETWORK_ID "pmix.alloc.netid" // (char*) key to be used when accessing this requested network allocation. The
436 // allocation will be returned/stored as a pmix_data_array_t of
437 // pmix_info_t indexed by this key and containing at least one
438 // entry with the same key and the allocated resource description.
439 // The type of the included value depends upon the network
440 // support. For example, a TCP allocation might consist of a
441 // comma-delimited string of socket ranges such as
442 // "32000-32100,33005,38123-38146". Additional entries will consist
443 // of any provided resource request directives, along with their
444 // assigned values. Examples include:
445 // * PMIX_ALLOC_NETWORK_TYPE - the type of resources provided
446 // * PMIX_ALLOC_NETWORK_PLANE - if applicable, what plane the
447 // resources were assigned from
448 // * PMIX_ALLOC_NETWORK_QOS - the assigned QoS
449 // * PMIX_ALLOC_BANDWIDTH - the allocated bandwidth
450 // * PMIX_ALLOC_NETWORK_SEC_KEY - a security key for the requested
451 // network allocation
452 // NOTE: the assigned values may differ from those requested,
453 // especially if the "required" flag was not set in the request
454 #define OPAL_PMIX_ALLOC_BANDWIDTH "pmix.alloc.bw" // (float) Mbits/sec
455 #define OPAL_PMIX_ALLOC_NETWORK_QOS "pmix.alloc.netqos" // (char*) quality of service level
456 #define OPAL_PMIX_ALLOC_TIME "pmix.alloc.time" // (uint32_t) time in seconds that the allocation shall remain valid
457 #define OPAL_PMIX_ALLOC_NETWORK_TYPE "pmix.alloc.nettype" // (char*) type of desired transport (e.g., tcp, udp)
458 #define OPAL_PMIX_ALLOC_NETWORK_PLANE "pmix.alloc.netplane" // (char*) id string for the NIC (aka plane) to be used for this allocation
459 // (e.g., CIDR for Ethernet)
460 #define OPAL_PMIX_ALLOC_NETWORK_ENDPTS "pmix.alloc.endpts" // (size_t) number of endpoints to allocate per process
461 #define OPAL_PMIX_ALLOC_NETWORK_ENDPTS_NODE "pmix.alloc.endpts.nd" // (size_t) number of endpoints to allocate per node
462 #define OPAL_PMIX_ALLOC_NETWORK_SEC_KEY "pmix.alloc.nsec" // (pmix_byte_object_t) network security key
463
464
465 /* job control attributes */
466 #define OPAL_PMIX_JOB_CTRL_ID "pmix.jctrl.id" // (char*) provide a string identifier for this request
467 #define OPAL_PMIX_JOB_CTRL_PAUSE "pmix.jctrl.pause" // (bool) pause the specified processes
468 #define OPAL_PMIX_JOB_CTRL_RESUME "pmix.jctrl.resume" // (bool) "un-pause" the specified processes
469 #define OPAL_PMIX_JOB_CTRL_CANCEL "pmix.jctrl.cancel" // (char*) cancel the specified request
470 // (NULL => cancel all requests from this requestor)
471 #define OPAL_PMIX_JOB_CTRL_KILL "pmix.jctrl.kill" // (bool) forcibly terminate the specified processes and cleanup
472 #define OPAL_PMIX_JOB_CTRL_RESTART "pmix.jctrl.restart" // (char*) restart the specified processes using the given checkpoint ID
473 #define OPAL_PMIX_JOB_CTRL_CHECKPOINT "pmix.jctrl.ckpt" // (char*) checkpoint the specified processes and assign the given ID to it
474 #define OPAL_PMIX_JOB_CTRL_CHECKPOINT_EVENT "pmix.jctrl.ckptev" // (bool) use event notification to trigger process checkpoint
475 #define OPAL_PMIX_JOB_CTRL_CHECKPOINT_SIGNAL "pmix.jctrl.ckptsig" // (int) use the given signal to trigger process checkpoint
476 #define OPAL_PMIX_JOB_CTRL_CHECKPOINT_TIMEOUT "pmix.jctrl.ckptsig" // (int) time in seconds to wait for checkpoint to complete
477 #define OPAL_PMIX_JOB_CTRL_SIGNAL "pmix.jctrl.sig" // (int) send given signal to specified processes
478 #define OPAL_PMIX_JOB_CTRL_PROVISION "pmix.jctrl.pvn" // (char*) regex identifying nodes that are to be provisioned
479 #define OPAL_PMIX_JOB_CTRL_PROVISION_IMAGE "pmix.jctrl.pvnimg" // (char*) name of the image that is to be provisioned
480 #define OPAL_PMIX_JOB_CTRL_PREEMPTIBLE "pmix.jctrl.preempt" // (bool) job can be pre-empted
481 #define OPAL_PMIX_JOB_CTRL_TERMINATE "pmix.jctrl.term" // (bool) politely terminate the specified procs
482 #define OPAL_PMIX_REGISTER_CLEANUP "pmix.reg.cleanup" // (char*) comma-delimited list of files/directories to
483 // be removed upon process termination
484 #define OPAL_PMIX_REGISTER_CLEANUP_DIR "pmix.reg.cleanupdir" // (char*) comma-delimited list of directories to
485 // be removed upon process termination
486 #define OPAL_PMIX_CLEANUP_RECURSIVE "pmix.clnup.recurse" // (bool) recursively cleanup all subdirectories under the
487 // specified one(s)
488 #define OPAL_PMIX_CLEANUP_EMPTY "pmix.clnup.empty" // (bool) only remove empty subdirectories
489 #define OPAL_PMIX_CLEANUP_IGNORE "pmix.clnup.ignore" // (char*) comma-delimited list of filenames that are not
490 // to be removed
491 #define OPAL_PMIX_CLEANUP_LEAVE_TOPDIR "pmix.clnup.lvtop" // (bool) when recursively cleaning subdirs, do not remove
492 // the top-level directory (the one given in the
493 // cleanup request)
494
495
496 /* monitoring attributes */
497 #define OPAL_PMIX_MONITOR_ID "pmix.monitor.id" // (char*) provide a string identifier for this request
498 #define OPAL_PMIX_MONITOR_CANCEL "pmix.monitor.cancel" // (char*) identifier to be canceled (NULL = cancel all
499 // monitoring for this process)
500 #define OPAL_PMIX_MONITOR_APP_CONTROL "pmix.monitor.appctrl" // (bool) the application desires to control the response to
501 // a monitoring event
502 #define OPAL_PMIX_MONITOR_HEARTBEAT "pmix.monitor.mbeat" // (void) register to have the server monitor the requestor for heartbeats
503 #define OPAL_PMIX_SEND_HEARTBEAT "pmix.monitor.beat" // (void) send heartbeat to local server
504 #define OPAL_PMIX_MONITOR_HEARTBEAT_TIME "pmix.monitor.btime" // (uint32_t) time in seconds before declaring heartbeat missed
505 #define OPAL_PMIX_MONITOR_HEARTBEAT_DROPS "pmix.monitor.bdrop" // (uint32_t) number of heartbeats that can be missed before taking
506 // specified action
507 #define OPAL_PMIX_MONITOR_FILE "pmix.monitor.fmon" // (char*) register to monitor file for signs of life
508 #define OPAL_PMIX_MONITOR_FILE_SIZE "pmix.monitor.fsize" // (bool) monitor size of given file is growing to determine app is running
509 #define OPAL_PMIX_MONITOR_FILE_ACCESS "pmix.monitor.faccess" // (char*) monitor time since last access of given file to determine app is running
510 #define OPAL_PMIX_MONITOR_FILE_MODIFY "pmix.monitor.fmod" // (char*) monitor time since last modified of given file to determine app is running
511 #define OPAL_PMIX_MONITOR_FILE_CHECK_TIME "pmix.monitor.ftime" // (uint32_t) time in seconds between checking file
512 #define OPAL_PMIX_MONITOR_FILE_DROPS "pmix.monitor.fdrop" // (uint32_t) number of file checks that can be missed before taking
513 // specified action
514
515 /* security attributes */
516 #define OPAL_PMIX_CRED_TYPE "pmix.sec.ctype" // (char*) when passed in PMIx_Get_credential, a prioritized,
517 // comma-delimited list of desired credential types for use
518 // in environments where multiple authentication mechanisms
519 // may be available. When returned in a callback function, a
520 // string identifier of the credential type
521 #define OPAL_PMIX_CRYPTO_KEY "pmix.sec.key" // (pmix_byte_object_t) blob containing crypto key
522
523 /* IO Forwarding Attributes */
524 #define OPAL_PMIX_IOF_CACHE_SIZE "pmix.iof.csize" // (uint32_t) requested size of the server cache in bytes for each specified channel.
525 // By default, the server is allowed (but not required) to drop
526 // all bytes received beyond the max size
527 #define OPAL_PMIX_IOF_DROP_OLDEST "pmix.iof.old" // (bool) in an overflow situation, drop the oldest bytes to make room in the cache
528 #define OPAL_PMIX_IOF_DROP_NEWEST "pmix.iof.new" // (bool) in an overflow situation, drop any new bytes received until room becomes
529 // available in the cache (default)
530 #define OPAL_PMIX_IOF_BUFFERING_SIZE "pmix.iof.bsize" // (uint32_t) basically controls grouping of IO on the specified channel(s) to
531 // avoid being called every time a bit of IO arrives. The library
532 // will execute the callback whenever the specified number of bytes
533 // becomes available. Any remaining buffered data will be "flushed"
534 // upon call to deregister the respective channel
535 #define OPAL_PMIX_IOF_BUFFERING_TIME "pmix.iof.btime" // (uint32_t) max time in seconds to buffer IO before delivering it. Used in conjunction
536 // with buffering size, this prevents IO from being held indefinitely
537 // while waiting for another payload to arrive
538 #define OPAL_PMIX_IOF_COMPLETE "pmix.iof.cmp" // (bool) indicates whether or not the specified IO channel has been closed
539 // by the source
540 #define OPAL_PMIX_IOF_PUSH_STDIN "pmix.iof.stdin" // (bool) Used by a tool to request that the PMIx library collect
541 // the tool's stdin and forward it to the procs specified in
542 // the PMIx_IOF_push call
543 #define OPAL_PMIX_IOF_TAG_OUTPUT "pmix.iof.tag" // (bool) Tag output with the channel it comes from
544 #define OPAL_PMIX_IOF_TIMESTAMP_OUTPUT "pmix.iof.ts" // (bool) Timestamp output
545 #define OPAL_PMIX_IOF_XML_OUTPUT "pmix.iof.xml" // (bool) Format output in XML
546
547 /* Attributes for controlling contents of application setup data */
548 #define OPAL_PMIX_SETUP_APP_ENVARS "pmix.setup.env" // (bool) harvest and include relevant envars
549 #define OPAL_PMIX_SETUP_APP_NONENVARS "pmix.setup.nenv" // (bool) include all non-envar data
550 #define OPAL_PMIX_SETUP_APP_ALL "pmix.setup.all" // (bool) include all relevant data
551
552
553 /* define a scope for data "put" by PMI per the following:
554 *
555 * OPAL_PMI_LOCAL - the data is intended only for other application
556 * processes on the same node. Data marked in this way
557 * will not be included in data packages sent to remote requestors
558 * OPAL_PMI_REMOTE - the data is intended solely for applications processes on
559 * remote nodes. Data marked in this way will not be shared with
560 * other processes on the same node
561 * OPAL_PMI_GLOBAL - the data is to be shared with all other requesting processes,
562 * regardless of location
563 */
564 #define OPAL_PMIX_SCOPE PMIX_UINT
565 typedef enum {
566 OPAL_PMIX_SCOPE_UNDEF = 0,
567 OPAL_PMIX_LOCAL, // share to procs also on this node
568 OPAL_PMIX_REMOTE, // share with procs not on this node
569 OPAL_PMIX_GLOBAL
570 } opal_pmix_scope_t;
571
572 /* define a range for data "published" by PMI */
573 #define OPAL_PMIX_DATA_RANGE OPAL_UINT8
574 typedef uint8_t opal_pmix_data_range_t;
575 #define OPAL_PMIX_RANGE_UNDEF 0
576 #define OPAL_PMIX_RANGE_RM 1 // data is intended for the host resource manager
577 #define OPAL_PMIX_RANGE_LOCAL 2 // available on local node only
578 #define OPAL_PMIX_RANGE_NAMESPACE 3 // data is available to procs in the same nspace only
579 #define OPAL_PMIX_RANGE_SESSION 4 // data available to all procs in session
580 #define OPAL_PMIX_RANGE_GLOBAL 5 // data available to all procs
581 #define OPAL_PMIX_RANGE_CUSTOM 6 // range is specified in a pmix_info_t
582 #define OPAL_PMIX_RANGE_PROC_LOCAL 7 // restrict range to the local proc
583
584 /* define a "persistence" policy for data published by clients */
585 typedef enum {
586 OPAL_PMIX_PERSIST_INDEF = 0, // retain until specifically deleted
587 OPAL_PMIX_PERSIST_FIRST_READ, // delete upon first access
588 OPAL_PMIX_PERSIST_PROC, // retain until publishing process terminates
589 OPAL_PMIX_PERSIST_APP, // retain until application terminates
590 OPAL_PMIX_PERSIST_SESSION // retain until session/allocation terminates
591 } opal_pmix_persistence_t;
592
593
594 /* define allocation request flags */
595 typedef enum {
596 OPAL_PMIX_ALLOC_UNDEF = 0,
597 OPAL_PMIX_ALLOC_NEW,
598 OPAL_PMIX_ALLOC_EXTEND,
599 OPAL_PMIX_ALLOC_RELEASE,
600 OPAL_PMIX_ALLOC_REAQCUIRE
601 } opal_pmix_alloc_directive_t;
602
603 /* define a set of bit-mask flags for specifying IO
604 * forwarding channels. These can be OR'd together
605 * to reference multiple channels */
606 typedef uint16_t opal_pmix_iof_channel_t;
607 #define OPAL_PMIX_FWD_STDIN_CHANNEL 0x01
608 #define OPAL_PMIX_FWD_STDOUT_CHANNEL 0x02
609 #define OPAL_PMIX_FWD_STDERR_CHANNEL 0x04
610 #define OPAL_PMIX_FWD_STDDIAG_CHANNEL 0x08
611
612
613 /**** PMIX INFO STRUCT ****/
614
615 /* NOTE: the pmix_info_t is essentially equivalent to the opal_value_t
616 * Hence, we do not define an opal_value_t */
617
618
619 /**** PMIX LOOKUP RETURN STRUCT ****/
620 typedef struct {
621 opal_list_item_t super;
622 opal_process_name_t proc;
623 opal_value_t value;
624 } opal_pmix_pdata_t;
625 OBJ_CLASS_DECLARATION(opal_pmix_pdata_t);
626
627
628 /**** PMIX APP STRUCT ****/
629 typedef struct {
630 opal_list_item_t super;
631 char *cmd;
632 char **argv;
633 char **env;
634 char *cwd;
635 int maxprocs;
636 opal_list_t info;
637 } opal_pmix_app_t;
638 /* utility macros for working with pmix_app_t structs */
639 OBJ_CLASS_DECLARATION(opal_pmix_app_t);
640
641
642 /**** PMIX MODEX STRUCT ****/
643 typedef struct {
644 opal_object_t super;
645 opal_process_name_t proc;
646 uint8_t *blob;
647 size_t size;
648 } opal_pmix_modex_data_t;
649 OBJ_CLASS_DECLARATION(opal_pmix_modex_data_t);
650
651 /**** PMIX QUERY STRUCT ****/
652 typedef struct {
653 opal_list_item_t super;
654 char **keys;
655 opal_list_t qualifiers; // list of opal_value_t
656 } opal_pmix_query_t;
657 OBJ_CLASS_DECLARATION(opal_pmix_query_t);
658
659 /**** CALLBACK FUNCTIONS FOR NON-BLOCKING OPERATIONS ****/
660
661 typedef void (*opal_pmix_release_cbfunc_t)(void *cbdata);
662
663 /* define a callback function that is solely used by servers, and
664 * not clients, to return modex data in response to "fence" and "get"
665 * operations. The returned blob contains the data collected from each
666 * server participating in the operation. */
667 typedef void (*opal_pmix_modex_cbfunc_t)(int status,
668 const char *data, size_t ndata, void *cbdata,
669 opal_pmix_release_cbfunc_t relcbfunc, void *relcbdata);
670
671 /* define a callback function for calls to spawn_nb - the function
672 * will be called upon completion of the spawn command. The status
673 * will indicate whether or not the spawn succeeded. The jobid
674 * of the spawned processes will be returned, along with any provided
675 * callback data. */
676 typedef void (*opal_pmix_spawn_cbfunc_t)(int status, opal_jobid_t jobid, void *cbdata);
677
678 /* define a callback for common operations that simply return
679 * a status. Examples include the non-blocking versions of
680 * Fence, Connect, and Disconnect */
681 typedef void (*opal_pmix_op_cbfunc_t)(int status, void *cbdata);
682
683 /* define a callback function for calls to lookup_nb - the
684 * function will be called upon completion of the command with the
685 * status indicating the success of failure of the request. Any
686 * retrieved data will be returned in a list of opal_pmix_pdata_t's.
687 * The nspace/rank of the process that provided each data element is
688 * also returned.
689 *
690 * Note that these structures will be released upon return from
691 * the callback function, so the receiver must copy/protect the
692 * data prior to returning if it needs to be retained */
693
694 typedef void (*opal_pmix_lookup_cbfunc_t)(int status,
695 opal_list_t *data,
696 void *cbdata);
697
698 /* define a callback function by which event handlers can notify
699 * us that they have completed their action, and pass along any
700 * further information for subsequent handlers */
701 typedef void (*opal_pmix_notification_complete_fn_t)(int status, opal_list_t *results,
702 opal_pmix_op_cbfunc_t cbfunc, void *thiscbdata,
703 void *notification_cbdata);
704
705 /* define a callback function for the evhandler. Upon receipt of an
706 * event notification, the active module will execute the specified notification
707 * callback function, providing:
708 *
709 * status - the error that occurred
710 * source - identity of the proc that generated the event
711 * info - any additional info provided regarding the error.
712 * results - any info from prior event handlers
713 * cbfunc - callback function to execute when the evhandler is
714 * finished with the provided data so it can be released
715 * cbdata - pointer to be returned in cbfunc
716 *
717 * Note that different resource managers may provide differing levels
718 * of support for event notification to application processes. Thus, the
719 * info list may be NULL or may contain detailed information of the event.
720 * It is the responsibility of the application to parse any provided info array
721 * for defined key-values if it so desires.
722 *
723 * Possible uses of the opal_value_t list include:
724 *
725 * - for the RM to alert the process as to planned actions, such as
726 * to abort the session, in response to the reported event
727 *
728 * - provide a timeout for alternative action to occur, such as for
729 * the application to request an alternate response to the event
730 *
731 * For example, the RM might alert the application to the failure of
732 * a node that resulted in termination of several processes, and indicate
733 * that the overall session will be aborted unless the application
734 * requests an alternative behavior in the next 5 seconds. The application
735 * then has time to respond with a checkpoint request, or a request to
736 * recover from the failure by obtaining replacement nodes and restarting
737 * from some earlier checkpoint.
738 *
739 * Support for these options is left to the discretion of the host RM. Info
740 * keys are included in the common definions above, but also may be augmented
741 * on a per-RM basis.
742 *
743 * On the server side, the notification function is used to inform the host
744 * server of a detected error in the PMIx subsystem and/or client */
745 typedef void (*opal_pmix_notification_fn_t)(int status,
746 const opal_process_name_t *source,
747 opal_list_t *info, opal_list_t *results,
748 opal_pmix_notification_complete_fn_t cbfunc,
749 void *cbdata);
750
751 /* define a callback function for calls to register_evhandler. The
752 * status indicates if the request was successful or not, evhandler_ref is
753 * a size_t reference assigned to the evhandler by PMIX, this reference
754 * must be used to deregister the err handler. A ptr to the original
755 * cbdata is returned. */
756 typedef void (*opal_pmix_evhandler_reg_cbfunc_t)(int status,
757 size_t evhandler_ref,
758 void *cbdata);
759
760 /* define a callback function for calls to get_nb. The status
761 * indicates if the requested data was found or not - a pointer to the
762 * opal_value_t structure containing the found data is returned. The
763 * pointer will be NULL if the requested data was not found. */
764 typedef void (*opal_pmix_value_cbfunc_t)(int status,
765 opal_value_t *kv, void *cbdata);
766
767
768 /* define a callback function for calls to PMIx_Query. The status
769 * indicates if requested data was found or not - a list of
770 * opal_value_t will contain the key/value pairs. */
771 typedef void (*opal_pmix_info_cbfunc_t)(int status,
772 opal_list_t *info,
773 void *cbdata,
774 opal_pmix_release_cbfunc_t release_fn,
775 void *release_cbdata);
776
777 /* Callback function for incoming tool connections - the host
778 * RTE shall provide a jobid/rank for the connecting tool. We
779 * assume that a rank=0 will be the normal assignment, but allow
780 * for the future possibility of a parallel set of tools
781 * connecting, and thus each proc requiring a rank */
782 typedef void (*opal_pmix_tool_connection_cbfunc_t)(int status,
783 opal_process_name_t proc,
784 void *cbdata);
785
786
787 END_C_DECLS
788
789 #endif