1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */
2 /*
3 * (C) 2007 by Argonne National Laboratory.
4 * See COPYRIGHT in top-level directory.
5 */
6
7 #ifndef PMI2_H_INCLUDED
8 #define PMI2_H_INCLUDED
9
10 /* Structure and constant definitions */
11 #include <pmix_common.h>
12
13 #define PMI2_MAX_KEYLEN 64
14 #define PMI2_MAX_VALLEN 1024
15 #define PMI2_MAX_ATTRVALUE 1024
16 #define PMI2_ID_NULL -1
17
18 #if defined(__cplusplus)
19 extern "C" {
20 #endif
21
22 /*D
23 PMI2_CONSTANTS - PMI2 definitions
24
25 Error Codes:
26 + PMI2_SUCCESS - operation completed successfully
27 . PMI2_FAIL - operation failed
28 . PMI2_ERR_NOMEM - input buffer not large enough
29 . PMI2_ERR_INIT - PMI not initialized
30 . PMI2_ERR_INVALID_ARG - invalid argument
31 . PMI2_ERR_INVALID_KEY - invalid key argument
32 . PMI2_ERR_INVALID_KEY_LENGTH - invalid key length argument
33 . PMI2_ERR_INVALID_VAL - invalid val argument
34 . PMI2_ERR_INVALID_VAL_LENGTH - invalid val length argument
35 . PMI2_ERR_INVALID_LENGTH - invalid length argument
36 . PMI2_ERR_INVALID_NUM_ARGS - invalid number of arguments
37 . PMI2_ERR_INVALID_ARGS - invalid args argument
38 . PMI2_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument
39 . PMI2_ERR_INVALID_KEYVALP - invalid keyvalp argument
40 . PMI2_ERR_INVALID_SIZE - invalid size argument
41 - PMI2_ERR_OTHER - other unspecified error
42
43 D*/
44 #define PMI2_SUCCESS 0
45 #define PMI2_FAIL -1
46 #define PMI2_ERR_INIT 1
47 #define PMI2_ERR_NOMEM 2
48 #define PMI2_ERR_INVALID_ARG 3
49 #define PMI2_ERR_INVALID_KEY 4
50 #define PMI2_ERR_INVALID_KEY_LENGTH 5
51 #define PMI2_ERR_INVALID_VAL 6
52 #define PMI2_ERR_INVALID_VAL_LENGTH 7
53 #define PMI2_ERR_INVALID_LENGTH 8
54 #define PMI2_ERR_INVALID_NUM_ARGS 9
55 #define PMI2_ERR_INVALID_ARGS 10
56 #define PMI2_ERR_INVALID_NUM_PARSED 11
57 #define PMI2_ERR_INVALID_KEYVALP 12
58 #define PMI2_ERR_INVALID_SIZE 13
59 #define PMI2_ERR_OTHER 14
60
61 /* This is here to allow spawn multiple functions to compile. This
62 needs to be removed once those functions are fixed for pmi2 */
63 typedef struct PMI_keyval_t
64 {
65 char * key;
66 char * val;
67 } PMI_keyval_t;
68
69
70 /*@
71 PMI2_Connect_comm_t - connection structure used when connecting to other jobs
72
73 Fields:
74 + read - Read from a connection to the leader of the job to which
75 this process will be connecting. Returns 0 on success or an MPI
76 error code on failure.
77 . write - Write to a connection to the leader of the job to which
78 this process will be connecting. Returns 0 on success or an MPI
79 error code on failure.
80 . ctx - An anonymous pointer to data that may be used by the read
81 and write members.
82 - isMaster - Indicates which process is the "master"; may have the
83 values 1 (is the master), 0 (is not the master), or -1 (neither is
84 designated as the master). The two processes must agree on which
85 process is the master, or both must select -1 (neither is the
86 master).
87
88 Notes:
89 A typical implementation of these functions will use the read and
90 write calls on a pre-established file descriptor (fd) between the
91 two leading processes. This will be needed only if the PMI server
92 cannot access the KVS spaces of another job (this may happen, for
93 example, if each mpiexec creates the KVS spaces for the processes
94 that it manages).
95
96 @*/
97 typedef struct PMI2_Connect_comm {
98 int (*read)( void *buf, int maxlen, void *ctx );
99 int (*write)( const void *buf, int len, void *ctx );
100 void *ctx;
101 int isMaster;
102 } PMI2_Connect_comm_t;
103
104
105 /*@
106 PMI2_Init - initialize the Process Manager Interface
107
108 Output Parameter:
109 + spawned - spawned flag
110 . size - number of processes in the job
111 . rank - rank of this process in the job
112 - appnum - which executable is this on the mpiexec commandline
113
114 Return values:
115 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
116
117 Notes:
118 Initialize PMI for this process group. The value of spawned indicates whether
119 this process was created by 'PMI2_Spawn_multiple'. 'spawned' will be non-zero
120 iff this process group has a parent.
121
122 @*/
123 PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum);
124
125 /*@
126 PMI2_Finalize - finalize the Process Manager Interface
127
128 Return values:
129 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
130
131 Notes:
132 Finalize PMI for this job.
133
134 @*/
135 PMIX_EXPORT int PMI2_Finalize(void);
136
137 /*@
138 PMI2_Initialized - check if PMI has been initialized
139
140 Return values:
141 Non-zero if PMI2_Initialize has been called successfully, zero otherwise.
142
143 @*/
144 PMIX_EXPORT int PMI2_Initialized(void);
145
146 /*@
147 PMI2_Abort - abort the process group associated with this process
148
149 Input Parameters:
150 + flag - non-zero if all processes in this job should abort, zero otherwise
151 - error_msg - error message to be printed
152
153 Return values:
154 If the abort succeeds this function will not return. Returns an MPI
155 error code otherwise.
156
157 @*/
158 PMIX_EXPORT int PMI2_Abort(int flag, const char msg[]);
159
160 /*@
161 PMI2_Spawn - spawn a new set of processes
162
163 Input Parameters:
164 + count - count of commands
165 . cmds - array of command strings
166 . argcs - size of argv arrays for each command string
167 . argvs - array of argv arrays for each command string
168 . maxprocs - array of maximum processes to spawn for each command string
169 . info_keyval_sizes - array giving the number of elements in each of the
170 'info_keyval_vectors'
171 . info_keyval_vectors - array of keyval vector arrays
172 . preput_keyval_size - Number of elements in 'preput_keyval_vector'
173 . preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space
174 - jobIdSize - size of the buffer provided in jobId
175
176 Output Parameter:
177 + jobId - job id of the spawned processes
178 - errors - array of errors for each command
179
180 Return values:
181 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
182
183 Notes:
184 This function spawns a set of processes into a new job. The 'count'
185 field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs',
186 'info_keyval_sizes' and 'info_keyval_vectors'. The 'preput_keyval_size' refers
187 to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector'
188 contains keyval pairs that will be put in the keyval space of the newly
189 created job before the processes are started. The 'maxprocs' array
190 specifies the desired number of processes to create for each 'cmd' string.
191 The actual number of processes may be less than the numbers specified in
192 maxprocs. The acceptable number of processes spawned may be controlled by
193 ``soft'' keyvals in the info arrays. The ``soft'' option is specified by
194 mpiexec in the MPI-2 standard. Environment variables may be passed to the
195 spawned processes through PMI implementation specific 'info_keyval' parameters.
196 @*/
197 PMIX_EXPORT int PMI2_Job_Spawn(int count, const char * cmds[],
198 int argcs[], const char ** argvs[],
199 const int maxprocs[],
200 const int info_keyval_sizes[],
201 const PMI_keyval_t *info_keyval_vectors[],
202 int preput_keyval_size,
203 const PMI_keyval_t *preput_keyval_vector[],
204 char jobId[], int jobIdSize,
205 int errors[]);
206
207 /*@
208 PMI2_Job_GetId - get job id of this job
209
210 Input parameters:
211 . jobid_size - size of buffer provided in jobid
212
213 Output parameters:
214 . jobid - the job id of this job
215
216 Return values:
217 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
218
219 @*/
220 PMIX_EXPORT int PMI2_Job_GetId(char jobid[], int jobid_size);
221
222 /*@
223 PMI2_Job_GetRank - get rank of this job
224 Output parameters:
225 . rank - the rank of this job
226 Return values:
227 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
228 @*/
229 PMIX_EXPORT int PMI2_Job_GetRank(int* rank);
230
231 /*@
232 PMI2_Info_GetSize - get the number of processes on the node
233 Output parameters:
234 . rank - the rank of this job
235 Return values:
236 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
237 @*/
238 PMIX_EXPORT int PMI2_Info_GetSize(int* size);
239
240 /*@
241 PMI2_Job_Connect - connect to the parallel job with ID jobid
242
243 Input parameters:
244 . jobid - job id of the job to connect to
245
246 Output parameters:
247 . conn - connection structure used to exteblish communication with
248 the remote job
249
250 Return values:
251 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
252
253 Notes:
254 This just "registers" the other parallel job as part of a parallel
255 program, and is used in the PMI2_KVS_xxx routines (see below). This
256 is not a collective call and establishes a connection between all
257 processes that are connected to the calling processes (on the one
258 side) and that are connected to the named jobId on the other
259 side. Processes that are already connected may call this routine.
260
261 @*/
262 PMIX_EXPORT int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn);
263
264 /*@
265 PMI2_Job_Disconnect - disconnects from the job with ID jobid
266
267 Input parameters:
268 . jobid - job id of the job to connect to
269
270 Return values:
271 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
272
273 @*/
274 PMIX_EXPORT int PMI2_Job_Disconnect(const char jobid[]);
275
276 /*@
277 PMI2_KVS_Put - put a key/value pair in the keyval space for this job
278
279 Input Parameters:
280 + key - key
281 - value - value
282
283 Return values:
284 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
285
286 Notes:
287 If multiple PMI2_KVS_Put calls are made with the same key between
288 calls to PMI2_KVS_Fence, the behavior is undefined. That is, the
289 value returned by PMI2_KVS_Get for that key after the PMI2_KVS_Fence
290 is not defined.
291
292 @*/
293 PMIX_EXPORT int PMI2_KVS_Put(const char key[], const char value[]);
294 /*@
295 PMI2_KVS_Fence - commit all PMI2_KVS_Put calls made before this fence
296
297 Return values:
298 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
299
300 Notes:
301 This is a collective call across the job. It has semantics that are
302 similar to those for MPI_Win_fence and hence is most easily
303 implemented as a barrier across all of the processes in the job.
304 Specifically, all PMI2_KVS_Put operations performed by any process in
305 the same job must be visible to all processes (by using PMI2_KVS_Get)
306 after PMI2_KVS_Fence completes. However, a PMI implementation could
307 make this a lazy operation by not waiting for all processes to enter
308 their corresponding PMI2_KVS_Fence until some process issues a
309 PMI2_KVS_Get. This might be appropriate for some wide-area
310 implementations.
311
312 @*/
313 PMIX_EXPORT int PMI2_KVS_Fence(void);
314
315 /*@
316 PMI2_KVS_Get - returns the value associated with key in the key-value
317 space associated with the job ID jobid
318
319 Input Parameters:
320 + jobid - the job id identifying the key-value space in which to look
321 for key. If jobid is NULL, look in the key-value space of this job.
322 . src_pmi_id - the pmi id of the process which put this keypair. This
323 is just a hint to the server. PMI2_ID_NULL should be passed if no
324 hint is provided.
325 . key - key
326 - maxvalue - size of the buffer provided in value
327
328 Output Parameters:
329 + value - value associated with key
330 - vallen - length of the returned value, or, if the length is longer
331 than maxvalue, the negative of the required length is returned
332
333 Return values:
334 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
335
336 @*/
337 PMIX_EXPORT int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxvalue, int *vallen);
338
339 /*@
340 PMI2_Info_GetNodeAttr - returns the value of the attribute associated
341 with this node
342
343 Input Parameters:
344 + name - name of the node attribute
345 . valuelen - size of the buffer provided in value
346 - waitfor - if non-zero, the function will not return until the
347 attribute is available
348
349 Output Parameters:
350 + value - value of the attribute
351 - found - non-zero indicates that the attribute was found
352
353 Return values:
354 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
355
356 Notes:
357 This provides a way, when combined with PMI2_Info_PutNodeAttr, for
358 processes on the same node to share information without requiring a
359 more general barrier across the entire job.
360
361 If waitfor is non-zero, the function will never return with found
362 set to zero.
363
364 Predefined attributes:
365 + memPoolType - If the process manager allocated a shared memory
366 pool for the MPI processes in this job and on this node, return
367 the type of that pool. Types include sysv, anonmmap and ntshm.
368 . memSYSVid - Return the SYSV memory segment id if the memory pool
369 type is sysv. Returned as a string.
370 . memAnonMMAPfd - Return the FD of the anonymous mmap segment. The
371 FD is returned as a string.
372 - memNTName - Return the name of the Windows NT shared memory
373 segment, file mapping object backed by system paging
374 file. Returned as a string.
375
376 @*/
377 PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor);
378
379 /*@
380 PMI2_Info_GetNodeAttrIntArray - returns the value of the attribute associated
381 with this node. The value must be an array of integers.
382
383 Input Parameters:
384 + name - name of the node attribute
385 - arraylen - number of elements in array
386
387 Output Parameters:
388 + array - value of attribute
389 . outlen - number of elements returned
390 - found - non-zero if attribute was found
391
392 Return values:
393 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
394
395 Notes:
396 Notice that, unlike PMI2_Info_GetNodeAttr, this function does not
397 have a waitfor parameter, and will return immediately with found=0
398 if the attribute was not found.
399
400 Predefined array attribute names:
401 + localRanksCount - Return the number of local ranks that will be
402 returned by the key localRanks.
403 . localRanks - Return the ranks in MPI_COMM_WORLD of the processes
404 that are running on this node.
405 - cartCoords - Return the Cartesian coordinates of this process in
406 the underlying network topology. The coordinates are indexed from
407 zero. Value only if the Job attribute for physTopology includes
408 cartesian.
409
410 @*/
411 PMIX_EXPORT int PMI2_Info_GetNodeAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found);
412
413 /*@
414 PMI2_Info_PutNodeAttr - stores the value of the named attribute
415 associated with this node
416
417 Input Parameters:
418 + name - name of the node attribute
419 - value - the value of the attribute
420
421 Return values:
422 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
423
424 Notes:
425 For example, it might be used to share segment ids with other
426 processes on the same SMP node.
427
428 @*/
429 PMIX_EXPORT int PMI2_Info_PutNodeAttr(const char name[], const char value[]);
430
431 /*@
432 PMI2_Info_GetJobAttr - returns the value of the attribute associated
433 with this job
434
435 Input Parameters:
436 + name - name of the job attribute
437 - valuelen - size of the buffer provided in value
438
439 Output Parameters:
440 + value - value of the attribute
441 - found - non-zero indicates that the attribute was found
442
443 Return values:
444 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
445
446 @*/
447 PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found);
448
449 /*@
450 PMI2_Info_GetJobAttrIntArray - returns the value of the attribute associated
451 with this job. The value must be an array of integers.
452
453 Input Parameters:
454 + name - name of the job attribute
455 - arraylen - number of elements in array
456
457 Output Parameters:
458 + array - value of attribute
459 . outlen - number of elements returned
460 - found - non-zero if attribute was found
461
462 Return values:
463 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
464
465 Predefined array attribute names:
466
467 + universeSize - The size of the "universe" (defined for the MPI
468 attribute MPI_UNIVERSE_SIZE
469
470 . hasNameServ - The value hasNameServ is true if the PMI2 environment
471 supports the name service operations (publish, lookup, and
472 unpublish).
473
474 . physTopology - Return the topology of the underlying network. The
475 valid topology types include cartesian, hierarchical, complete,
476 kautz, hypercube; additional types may be added as necessary. If
477 the type is hierarchical, then additional attributes may be
478 queried to determine the details of the topology. For example, a
479 typical cluster has a hierarchical physical topology, consisting
480 of two levels of complete networks - the switched Ethernet or
481 Infiniband and the SMP nodes. Other systems, such as IBM BlueGene,
482 have one level that is cartesian (and in virtual node mode, have a
483 single-level physical topology).
484
485 . physTopologyLevels - Return a string describing the topology type
486 for each level of the underlying network. Only valid if the
487 physTopology is hierarchical. The value is a comma-separated list
488 of physical topology types (except for hierarchical). The levels
489 are ordered starting at the top, with the network closest to the
490 processes last. The lower level networks may connect only a subset
491 of processes. For example, for a cartesian mesh of SMPs, the value
492 is cartesian,complete. All processes are connected by the
493 cartesian part of this, but for each complete network, only the
494 processes on the same node are connected.
495
496 . cartDims - Return a string of comma-separated values describing
497 the dimensions of the Cartesian topology. This must be consistent
498 with the value of cartCoords that may be returned by
499 PMI2_Info_GetNodeAttrIntArray.
500
501 These job attributes are just a start, but they provide both an
502 example of the sort of external data that is available through the
503 PMI interface and how extensions can be added within the same API
504 and wire protocol. For example, adding more complex network
505 topologies requires only adding new keys, not new routines.
506
507 . isHeterogeneous - The value isHeterogeneous is true if the
508 processes belonging to the job are running on nodes with different
509 underlying data models.
510
511 @*/
512 PMIX_EXPORT int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found);
513
514 /*@
515 PMI2_Nameserv_publish - publish a name
516
517 Input parameters:
518 + service_name - string representing the service being published
519 . info_ptr -
520 - port - string representing the port on which to contact the service
521
522 Return values:
523 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
524
525 @*/
526 PMIX_EXPORT int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_ptr, const char port[]);
527
528 /*@
529 PMI2_Nameserv_lookup - lookup a service by name
530
531 Input parameters:
532 + service_name - string representing the service being published
533 . info_ptr -
534 - portLen - size of buffer provided in port
535
536 Output parameters:
537 . port - string representing the port on which to contact the service
538
539 Return values:
540 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
541
542 @*/
543 PMIX_EXPORT int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr,
544 char port[], int portLen);
545 /*@
546 PMI2_Nameserv_unpublish - unpublish a name
547
548 Input parameters:
549 + service_name - string representing the service being unpublished
550 - info_ptr -
551
552 Return values:
553 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure.
554
555 @*/
556 PMIX_EXPORT int PMI2_Nameserv_unpublish(const char service_name[],
557 const PMI_keyval_t *info_ptr);
558
559
560
561 #if defined(__cplusplus)
562 }
563 #endif
564
565 #endif /* PMI2_H_INCLUDED */