1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil ; -*- */ 2 /* 3 * (C) 2007 by Argonne National Laboratory. 4 * See COPYRIGHT in top-level directory. 5 */ 6 7 #ifndef PMI2_H_INCLUDED 8 #define PMI2_H_INCLUDED 9 10 /* Structure and constant definitions */ 11 #include <pmix_common.h> 12 13 #define PMI2_MAX_KEYLEN 64 14 #define PMI2_MAX_VALLEN 1024 15 #define PMI2_MAX_ATTRVALUE 1024 16 #define PMI2_ID_NULL -1 17 18 #if defined(__cplusplus) 19 extern "C" { 20 #endif 21 22 /*D 23 PMI2_CONSTANTS - PMI2 definitions 24 25 Error Codes: 26 + PMI2_SUCCESS - operation completed successfully 27 . PMI2_FAIL - operation failed 28 . PMI2_ERR_NOMEM - input buffer not large enough 29 . PMI2_ERR_INIT - PMI not initialized 30 . PMI2_ERR_INVALID_ARG - invalid argument 31 . PMI2_ERR_INVALID_KEY - invalid key argument 32 . PMI2_ERR_INVALID_KEY_LENGTH - invalid key length argument 33 . PMI2_ERR_INVALID_VAL - invalid val argument 34 . PMI2_ERR_INVALID_VAL_LENGTH - invalid val length argument 35 . PMI2_ERR_INVALID_LENGTH - invalid length argument 36 . PMI2_ERR_INVALID_NUM_ARGS - invalid number of arguments 37 . PMI2_ERR_INVALID_ARGS - invalid args argument 38 . PMI2_ERR_INVALID_NUM_PARSED - invalid num_parsed length argument 39 . PMI2_ERR_INVALID_KEYVALP - invalid keyvalp argument 40 . PMI2_ERR_INVALID_SIZE - invalid size argument 41 - PMI2_ERR_OTHER - other unspecified error 42 43 D*/ 44 #define PMI2_SUCCESS 0 45 #define PMI2_FAIL -1 46 #define PMI2_ERR_INIT 1 47 #define PMI2_ERR_NOMEM 2 48 #define PMI2_ERR_INVALID_ARG 3 49 #define PMI2_ERR_INVALID_KEY 4 50 #define PMI2_ERR_INVALID_KEY_LENGTH 5 51 #define PMI2_ERR_INVALID_VAL 6 52 #define PMI2_ERR_INVALID_VAL_LENGTH 7 53 #define PMI2_ERR_INVALID_LENGTH 8 54 #define PMI2_ERR_INVALID_NUM_ARGS 9 55 #define PMI2_ERR_INVALID_ARGS 10 56 #define PMI2_ERR_INVALID_NUM_PARSED 11 57 #define PMI2_ERR_INVALID_KEYVALP 12 58 #define PMI2_ERR_INVALID_SIZE 13 59 #define PMI2_ERR_OTHER 14 60 61 /* This is here to allow spawn multiple functions to compile. This 62 needs to be removed once those functions are fixed for pmi2 */ 63 typedef struct PMI_keyval_t 64 { 65 char * key; 66 char * val; 67 } PMI_keyval_t; 68 69 70 /*@ 71 PMI2_Connect_comm_t - connection structure used when connecting to other jobs 72 73 Fields: 74 + read - Read from a connection to the leader of the job to which 75 this process will be connecting. Returns 0 on success or an MPI 76 error code on failure. 77 . write - Write to a connection to the leader of the job to which 78 this process will be connecting. Returns 0 on success or an MPI 79 error code on failure. 80 . ctx - An anonymous pointer to data that may be used by the read 81 and write members. 82 - isMaster - Indicates which process is the "master"; may have the 83 values 1 (is the master), 0 (is not the master), or -1 (neither is 84 designated as the master). The two processes must agree on which 85 process is the master, or both must select -1 (neither is the 86 master). 87 88 Notes: 89 A typical implementation of these functions will use the read and 90 write calls on a pre-established file descriptor (fd) between the 91 two leading processes. This will be needed only if the PMI server 92 cannot access the KVS spaces of another job (this may happen, for 93 example, if each mpiexec creates the KVS spaces for the processes 94 that it manages). 95 96 @*/ 97 typedef struct PMI2_Connect_comm { 98 int (*read)( void *buf, int maxlen, void *ctx ); 99 int (*write)( const void *buf, int len, void *ctx ); 100 void *ctx; 101 int isMaster; 102 } PMI2_Connect_comm_t; 103 104 105 /*@ 106 PMI2_Init - initialize the Process Manager Interface 107 108 Output Parameter: 109 + spawned - spawned flag 110 . size - number of processes in the job 111 . rank - rank of this process in the job 112 - appnum - which executable is this on the mpiexec commandline 113 114 Return values: 115 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 116 117 Notes: 118 Initialize PMI for this process group. The value of spawned indicates whether 119 this process was created by 'PMI2_Spawn_multiple'. 'spawned' will be non-zero 120 iff this process group has a parent. 121 122 @*/ 123 PMIX_EXPORT int PMI2_Init(int *spawned, int *size, int *rank, int *appnum); 124 125 /*@ 126 PMI2_Finalize - finalize the Process Manager Interface 127 128 Return values: 129 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 130 131 Notes: 132 Finalize PMI for this job. 133 134 @*/ 135 PMIX_EXPORT int PMI2_Finalize(void); 136 137 /*@ 138 PMI2_Initialized - check if PMI has been initialized 139 140 Return values: 141 Non-zero if PMI2_Initialize has been called successfully, zero otherwise. 142 143 @*/ 144 PMIX_EXPORT int PMI2_Initialized(void); 145 146 /*@ 147 PMI2_Abort - abort the process group associated with this process 148 149 Input Parameters: 150 + flag - non-zero if all processes in this job should abort, zero otherwise 151 - error_msg - error message to be printed 152 153 Return values: 154 If the abort succeeds this function will not return. Returns an MPI 155 error code otherwise. 156 157 @*/ 158 PMIX_EXPORT int PMI2_Abort(int flag, const char msg[]); 159 160 /*@ 161 PMI2_Spawn - spawn a new set of processes 162 163 Input Parameters: 164 + count - count of commands 165 . cmds - array of command strings 166 . argcs - size of argv arrays for each command string 167 . argvs - array of argv arrays for each command string 168 . maxprocs - array of maximum processes to spawn for each command string 169 . info_keyval_sizes - array giving the number of elements in each of the 170 'info_keyval_vectors' 171 . info_keyval_vectors - array of keyval vector arrays 172 . preput_keyval_size - Number of elements in 'preput_keyval_vector' 173 . preput_keyval_vector - array of keyvals to be pre-put in the spawned keyval space 174 - jobIdSize - size of the buffer provided in jobId 175 176 Output Parameter: 177 + jobId - job id of the spawned processes 178 - errors - array of errors for each command 179 180 Return values: 181 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 182 183 Notes: 184 This function spawns a set of processes into a new job. The 'count' 185 field refers to the size of the array parameters - 'cmd', 'argvs', 'maxprocs', 186 'info_keyval_sizes' and 'info_keyval_vectors'. The 'preput_keyval_size' refers 187 to the size of the 'preput_keyval_vector' array. The 'preput_keyval_vector' 188 contains keyval pairs that will be put in the keyval space of the newly 189 created job before the processes are started. The 'maxprocs' array 190 specifies the desired number of processes to create for each 'cmd' string. 191 The actual number of processes may be less than the numbers specified in 192 maxprocs. The acceptable number of processes spawned may be controlled by 193 ``soft'' keyvals in the info arrays. The ``soft'' option is specified by 194 mpiexec in the MPI-2 standard. Environment variables may be passed to the 195 spawned processes through PMI implementation specific 'info_keyval' parameters. 196 @*/ 197 PMIX_EXPORT int PMI2_Job_Spawn(int count, const char * cmds[], 198 int argcs[], const char ** argvs[], 199 const int maxprocs[], 200 const int info_keyval_sizes[], 201 const PMI_keyval_t *info_keyval_vectors[], 202 int preput_keyval_size, 203 const PMI_keyval_t *preput_keyval_vector[], 204 char jobId[], int jobIdSize, 205 int errors[]); 206 207 /*@ 208 PMI2_Job_GetId - get job id of this job 209 210 Input parameters: 211 . jobid_size - size of buffer provided in jobid 212 213 Output parameters: 214 . jobid - the job id of this job 215 216 Return values: 217 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 218 219 @*/ 220 PMIX_EXPORT int PMI2_Job_GetId(char jobid[], int jobid_size); 221 222 /*@ 223 PMI2_Job_GetRank - get rank of this job 224 Output parameters: 225 . rank - the rank of this job 226 Return values: 227 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 228 @*/ 229 PMIX_EXPORT int PMI2_Job_GetRank(int* rank); 230 231 /*@ 232 PMI2_Info_GetSize - get the number of processes on the node 233 Output parameters: 234 . rank - the rank of this job 235 Return values: 236 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 237 @*/ 238 PMIX_EXPORT int PMI2_Info_GetSize(int* size); 239 240 /*@ 241 PMI2_Job_Connect - connect to the parallel job with ID jobid 242 243 Input parameters: 244 . jobid - job id of the job to connect to 245 246 Output parameters: 247 . conn - connection structure used to exteblish communication with 248 the remote job 249 250 Return values: 251 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 252 253 Notes: 254 This just "registers" the other parallel job as part of a parallel 255 program, and is used in the PMI2_KVS_xxx routines (see below). This 256 is not a collective call and establishes a connection between all 257 processes that are connected to the calling processes (on the one 258 side) and that are connected to the named jobId on the other 259 side. Processes that are already connected may call this routine. 260 261 @*/ 262 PMIX_EXPORT int PMI2_Job_Connect(const char jobid[], PMI2_Connect_comm_t *conn); 263 264 /*@ 265 PMI2_Job_Disconnect - disconnects from the job with ID jobid 266 267 Input parameters: 268 . jobid - job id of the job to connect to 269 270 Return values: 271 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 272 273 @*/ 274 PMIX_EXPORT int PMI2_Job_Disconnect(const char jobid[]); 275 276 /*@ 277 PMI2_KVS_Put - put a key/value pair in the keyval space for this job 278 279 Input Parameters: 280 + key - key 281 - value - value 282 283 Return values: 284 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 285 286 Notes: 287 If multiple PMI2_KVS_Put calls are made with the same key between 288 calls to PMI2_KVS_Fence, the behavior is undefined. That is, the 289 value returned by PMI2_KVS_Get for that key after the PMI2_KVS_Fence 290 is not defined. 291 292 @*/ 293 PMIX_EXPORT int PMI2_KVS_Put(const char key[], const char value[]); 294 /*@ 295 PMI2_KVS_Fence - commit all PMI2_KVS_Put calls made before this fence 296 297 Return values: 298 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 299 300 Notes: 301 This is a collective call across the job. It has semantics that are 302 similar to those for MPI_Win_fence and hence is most easily 303 implemented as a barrier across all of the processes in the job. 304 Specifically, all PMI2_KVS_Put operations performed by any process in 305 the same job must be visible to all processes (by using PMI2_KVS_Get) 306 after PMI2_KVS_Fence completes. However, a PMI implementation could 307 make this a lazy operation by not waiting for all processes to enter 308 their corresponding PMI2_KVS_Fence until some process issues a 309 PMI2_KVS_Get. This might be appropriate for some wide-area 310 implementations. 311 312 @*/ 313 PMIX_EXPORT int PMI2_KVS_Fence(void); 314 315 /*@ 316 PMI2_KVS_Get - returns the value associated with key in the key-value 317 space associated with the job ID jobid 318 319 Input Parameters: 320 + jobid - the job id identifying the key-value space in which to look 321 for key. If jobid is NULL, look in the key-value space of this job. 322 . src_pmi_id - the pmi id of the process which put this keypair. This 323 is just a hint to the server. PMI2_ID_NULL should be passed if no 324 hint is provided. 325 . key - key 326 - maxvalue - size of the buffer provided in value 327 328 Output Parameters: 329 + value - value associated with key 330 - vallen - length of the returned value, or, if the length is longer 331 than maxvalue, the negative of the required length is returned 332 333 Return values: 334 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 335 336 @*/ 337 PMIX_EXPORT int PMI2_KVS_Get(const char *jobid, int src_pmi_id, const char key[], char value [], int maxvalue, int *vallen); 338 339 /*@ 340 PMI2_Info_GetNodeAttr - returns the value of the attribute associated 341 with this node 342 343 Input Parameters: 344 + name - name of the node attribute 345 . valuelen - size of the buffer provided in value 346 - waitfor - if non-zero, the function will not return until the 347 attribute is available 348 349 Output Parameters: 350 + value - value of the attribute 351 - found - non-zero indicates that the attribute was found 352 353 Return values: 354 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 355 356 Notes: 357 This provides a way, when combined with PMI2_Info_PutNodeAttr, for 358 processes on the same node to share information without requiring a 359 more general barrier across the entire job. 360 361 If waitfor is non-zero, the function will never return with found 362 set to zero. 363 364 Predefined attributes: 365 + memPoolType - If the process manager allocated a shared memory 366 pool for the MPI processes in this job and on this node, return 367 the type of that pool. Types include sysv, anonmmap and ntshm. 368 . memSYSVid - Return the SYSV memory segment id if the memory pool 369 type is sysv. Returned as a string. 370 . memAnonMMAPfd - Return the FD of the anonymous mmap segment. The 371 FD is returned as a string. 372 - memNTName - Return the name of the Windows NT shared memory 373 segment, file mapping object backed by system paging 374 file. Returned as a string. 375 376 @*/ 377 PMIX_EXPORT int PMI2_Info_GetNodeAttr(const char name[], char value[], int valuelen, int *found, int waitfor); 378 379 /*@ 380 PMI2_Info_GetNodeAttrIntArray - returns the value of the attribute associated 381 with this node. The value must be an array of integers. 382 383 Input Parameters: 384 + name - name of the node attribute 385 - arraylen - number of elements in array 386 387 Output Parameters: 388 + array - value of attribute 389 . outlen - number of elements returned 390 - found - non-zero if attribute was found 391 392 Return values: 393 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 394 395 Notes: 396 Notice that, unlike PMI2_Info_GetNodeAttr, this function does not 397 have a waitfor parameter, and will return immediately with found=0 398 if the attribute was not found. 399 400 Predefined array attribute names: 401 + localRanksCount - Return the number of local ranks that will be 402 returned by the key localRanks. 403 . localRanks - Return the ranks in MPI_COMM_WORLD of the processes 404 that are running on this node. 405 - cartCoords - Return the Cartesian coordinates of this process in 406 the underlying network topology. The coordinates are indexed from 407 zero. Value only if the Job attribute for physTopology includes 408 cartesian. 409 410 @*/ 411 PMIX_EXPORT int PMI2_Info_GetNodeAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); 412 413 /*@ 414 PMI2_Info_PutNodeAttr - stores the value of the named attribute 415 associated with this node 416 417 Input Parameters: 418 + name - name of the node attribute 419 - value - the value of the attribute 420 421 Return values: 422 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 423 424 Notes: 425 For example, it might be used to share segment ids with other 426 processes on the same SMP node. 427 428 @*/ 429 PMIX_EXPORT int PMI2_Info_PutNodeAttr(const char name[], const char value[]); 430 431 /*@ 432 PMI2_Info_GetJobAttr - returns the value of the attribute associated 433 with this job 434 435 Input Parameters: 436 + name - name of the job attribute 437 - valuelen - size of the buffer provided in value 438 439 Output Parameters: 440 + value - value of the attribute 441 - found - non-zero indicates that the attribute was found 442 443 Return values: 444 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 445 446 @*/ 447 PMIX_EXPORT int PMI2_Info_GetJobAttr(const char name[], char value[], int valuelen, int *found); 448 449 /*@ 450 PMI2_Info_GetJobAttrIntArray - returns the value of the attribute associated 451 with this job. The value must be an array of integers. 452 453 Input Parameters: 454 + name - name of the job attribute 455 - arraylen - number of elements in array 456 457 Output Parameters: 458 + array - value of attribute 459 . outlen - number of elements returned 460 - found - non-zero if attribute was found 461 462 Return values: 463 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 464 465 Predefined array attribute names: 466 467 + universeSize - The size of the "universe" (defined for the MPI 468 attribute MPI_UNIVERSE_SIZE 469 470 . hasNameServ - The value hasNameServ is true if the PMI2 environment 471 supports the name service operations (publish, lookup, and 472 unpublish). 473 474 . physTopology - Return the topology of the underlying network. The 475 valid topology types include cartesian, hierarchical, complete, 476 kautz, hypercube; additional types may be added as necessary. If 477 the type is hierarchical, then additional attributes may be 478 queried to determine the details of the topology. For example, a 479 typical cluster has a hierarchical physical topology, consisting 480 of two levels of complete networks - the switched Ethernet or 481 Infiniband and the SMP nodes. Other systems, such as IBM BlueGene, 482 have one level that is cartesian (and in virtual node mode, have a 483 single-level physical topology). 484 485 . physTopologyLevels - Return a string describing the topology type 486 for each level of the underlying network. Only valid if the 487 physTopology is hierarchical. The value is a comma-separated list 488 of physical topology types (except for hierarchical). The levels 489 are ordered starting at the top, with the network closest to the 490 processes last. The lower level networks may connect only a subset 491 of processes. For example, for a cartesian mesh of SMPs, the value 492 is cartesian,complete. All processes are connected by the 493 cartesian part of this, but for each complete network, only the 494 processes on the same node are connected. 495 496 . cartDims - Return a string of comma-separated values describing 497 the dimensions of the Cartesian topology. This must be consistent 498 with the value of cartCoords that may be returned by 499 PMI2_Info_GetNodeAttrIntArray. 500 501 These job attributes are just a start, but they provide both an 502 example of the sort of external data that is available through the 503 PMI interface and how extensions can be added within the same API 504 and wire protocol. For example, adding more complex network 505 topologies requires only adding new keys, not new routines. 506 507 . isHeterogeneous - The value isHeterogeneous is true if the 508 processes belonging to the job are running on nodes with different 509 underlying data models. 510 511 @*/ 512 PMIX_EXPORT int PMI2_Info_GetJobAttrIntArray(const char name[], int array[], int arraylen, int *outlen, int *found); 513 514 /*@ 515 PMI2_Nameserv_publish - publish a name 516 517 Input parameters: 518 + service_name - string representing the service being published 519 . info_ptr - 520 - port - string representing the port on which to contact the service 521 522 Return values: 523 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 524 525 @*/ 526 PMIX_EXPORT int PMI2_Nameserv_publish(const char service_name[], const PMI_keyval_t *info_ptr, const char port[]); 527 528 /*@ 529 PMI2_Nameserv_lookup - lookup a service by name 530 531 Input parameters: 532 + service_name - string representing the service being published 533 . info_ptr - 534 - portLen - size of buffer provided in port 535 536 Output parameters: 537 . port - string representing the port on which to contact the service 538 539 Return values: 540 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 541 542 @*/ 543 PMIX_EXPORT int PMI2_Nameserv_lookup(const char service_name[], const PMI_keyval_t *info_ptr, 544 char port[], int portLen); 545 /*@ 546 PMI2_Nameserv_unpublish - unpublish a name 547 548 Input parameters: 549 + service_name - string representing the service being unpublished 550 - info_ptr - 551 552 Return values: 553 Returns 'PMI2_SUCCESS' on success and an PMI error code on failure. 554 555 @*/ 556 PMIX_EXPORT int PMI2_Nameserv_unpublish(const char service_name[], 557 const PMI_keyval_t *info_ptr); 558 559 560 561 #if defined(__cplusplus) 562 } 563 #endif 564 565 #endif /* PMI2_H_INCLUDED */