1 /* 2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana 3 * University Research and Technology 4 * Corporation. All rights reserved. 5 * Copyright (c) 2004-2011 The University of Tennessee and The University 6 * of Tennessee Research Foundation. All rights 7 * reserved. 8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 9 * University of Stuttgart. All rights reserved. 10 * Copyright (c) 2004-2005 The Regents of the University of California. 11 * All rights reserved. 12 * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved. 13 * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights 14 * reserved. 15 * Copyright (c) 2013-2014 Intel, Inc. All rights reserved 16 * Copyright (c) 2015-2016 Research Organization for Information Science 17 * and Technology (RIST). All rights reserved. 18 * $COPYRIGHT$ 19 * 20 * Additional copyrights may follow 21 * 22 * $HEADER$ 23 */ 24 25 26 /** @file 27 * Process identification structure interface 28 * 29 * Process identification structure interface. The ompi_proc_t 30 * structure contatins basic information about the remote (and local) 31 * processes. 32 */ 33 34 #ifndef OMPI_PROC_PROC_H 35 #define OMPI_PROC_PROC_H 36 37 #include "ompi_config.h" 38 #include "ompi/types.h" 39 40 #include "opal/util/proc.h" 41 42 #include "ompi/mca/rte/rte.h" 43 44 45 BEGIN_C_DECLS 46 47 /* ******************************************************************** */ 48 49 50 /** 51 * Remote Open MPI process structure 52 * 53 * Remote Open MPI process structure. Each process contains exactly 54 * one ompi_proc_t structure for each remote process it knows about. 55 * 56 * Each proc entry has an array of endpoint data associated with it. 57 * The size of this array, and its entries, is unique to a particular 58 * build of Open MPI. As the endpoint list (or index values) are 59 * local to a process, this does not negatively impact heterogeneous 60 * builds. If a component or framework requires a tag index, it 61 * should call OMPI_REQUIRE_ENDPOINT_TAG(<name>). Requests which 62 * share the same name will have the same value, allowing 63 * cross-component sharing of endpoint data. The tag may be referenced 64 * by the pre-processor define OMPI_PROC_ENDPOINT_TAG_<name>. Adding 65 * a tag increases the memory consumed by Open MPI, so should only be done 66 * if unavoidable. 67 */ 68 69 #define OMPI_PROC_PADDING_SIZE 16 70 71 struct ompi_proc_t { 72 opal_proc_t super; 73 74 /* endpoint data */ 75 void *proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MAX]; 76 77 char padding[OMPI_PROC_PADDING_SIZE]; /* for future extensions (OSHMEM uses this area also)*/ 78 }; 79 typedef struct ompi_proc_t ompi_proc_t; 80 OBJ_CLASS_DECLARATION(ompi_proc_t); 81 82 83 /** 84 * @private 85 * 86 * Pointer to the ompi_proc_t structure for the local process 87 * 88 * @note This pointer is declared here to allow inline functions 89 * within this header file to access the local process quickly. 90 * Please use ompi_proc_local() instead. 91 */ 92 OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc; 93 OMPI_DECLSPEC extern opal_list_t ompi_proc_list; 94 95 /* ******************************************************************** */ 96 97 98 /** 99 * Initialize the OMPI process subsystem 100 * 101 * Initialize the Open MPI process subsystem. This function will 102 * query the run-time environment and build a list of the proc 103 * instances in the current MPI_COMM_WORLD. The local information not 104 * easily determined by the run-time ahead of time (architecture and 105 * hostname) will be published during this call. 106 * 107 * @note While an ompi_proc_t will exist with mostly valid information 108 * for each process in the MPI_COMM_WORLD at the conclusion of this 109 * call, some information will not be immediately available. This 110 * includes the architecture and hostname, which will be available by 111 * the conclusion of the stage gate. 112 * 113 * @retval OMPI_SUCESS System successfully initialized 114 * @retval OMPI_ERROR Initialization failed due to unspecified error 115 */ 116 OMPI_DECLSPEC int ompi_proc_init(void); 117 118 /** 119 * Complete filling up the proc information (arch, name and locality) for all 120 * procs related to this job. This function is to be called only after 121 * the modex exchange has been completed. 122 * 123 * @retval OMPI_SUCCESS All information correctly set. 124 * @retval OMPI_ERROR Some info could not be initialized. 125 */ 126 OMPI_DECLSPEC int ompi_proc_complete_init(void); 127 128 /** 129 * Complete filling up the proc information (arch, name and locality) for 130 * a given proc. This function is to be called only after the modex exchange 131 * has been completed. 132 * 133 * @param[in] proc the proc whose information will be filled up 134 * 135 * @retval OMPI_SUCCESS All information correctly set. 136 * @retval OMPI_ERROR Some info could not be initialized. 137 */ 138 OMPI_DECLSPEC int ompi_proc_complete_init_single(ompi_proc_t* proc); 139 140 /** 141 * Finalize the OMPI Process subsystem 142 * 143 * Finalize the Open MPI process subsystem. This function will 144 * release all memory created during the life of the application, 145 * including all ompi_proc_t structures. 146 * 147 * @retval OMPI_SUCCESS System successfully finalized 148 */ 149 OMPI_DECLSPEC int ompi_proc_finalize(void); 150 151 152 /** 153 * Returns the list of proc instances associated with this job. 154 * 155 * Returns the list of proc instances associated with this job. Given 156 * the current association between a job and an MPI_COMM_WORLD, this 157 * function provides the process instances for the current 158 * MPI_COMM_WORLD. Use this function only if absolutely needed as it 159 * will cause ompi_proc_t objects to be allocated for every process in 160 * the job. If you only need the allocated ompi_proc_t objects call 161 * ompi_proc_get_allocated() instead. 162 * 163 * @note The reference count of each process in the array is 164 * NOT incremented - the caller is responsible for ensuring the 165 * correctness of the reference count once they are done with 166 * the array. 167 * 168 * @param[in] size Number of processes in the ompi_proc_t array 169 * 170 * @return Array of pointers to proc instances in the current 171 * MPI_COMM_WORLD, or NULL if there is an internal failure. 172 */ 173 OMPI_DECLSPEC ompi_proc_t** ompi_proc_world(size_t* size); 174 175 /** 176 * Returns the number of processes in the associated with this job. 177 * 178 * Returns the list of proc instances associated with this job. Given 179 * the current association between a job and an MPI_COMM_WORLD, this 180 * function provides the number of processes for the current 181 * MPI_COMM_WORLD. 182 */ 183 184 OMPI_DECLSPEC int ompi_proc_world_size (void); 185 186 /** 187 * Returns the list of proc instances associated with this job. 188 * 189 * Returns the list of proc instances associated with this job that have 190 * already been allocated. Given the current association between a job 191 * and an MPI_COMM_WORLD, this function provides the allocated process 192 * instances for the current MPI_COMM_WORLD. 193 * 194 * @note The reference count of each process in the array is 195 * NOT incremented - the caller is responsible for ensuring the 196 * correctness of the reference count once they are done with 197 * the array. 198 * 199 * @param[in] size Number of processes in the ompi_proc_t array 200 * 201 * @return Array of pointers to allocated proc instances in the current 202 * MPI_COMM_WORLD, or NULL if there is an internal failure. 203 */ 204 OMPI_DECLSPEC ompi_proc_t **ompi_proc_get_allocated (size_t *size); 205 206 /** 207 * Returns the list of all known proc instances. 208 * 209 * Returns the list of all known proc instances, including those in 210 * other MPI_COMM_WORLDs. It is possible that we may no longer be 211 * connected to some of the procs returned (in the MPI sense of the 212 * word connected). In a strictly MPI-1 application, this function 213 * will return the same information as ompi_proc_world(). 214 * 215 * @note The reference count of each process in the array is 216 * incremented and the caller is responsible for releasing each 217 * process in the array, as well as freeing the array. 218 * 219 * @param[in] size Number of processes in the ompi_proc_t array 220 * 221 * @return Array of pointers to proc instances in the current 222 * known universe, or NULL if there is an internal failure. 223 */ 224 OMPI_DECLSPEC ompi_proc_t** ompi_proc_all(size_t* size); 225 226 227 /** 228 * Returns a list of the local process 229 * 230 * Returns a list containing the local process (and only the local 231 * process). Has calling semantics similar to ompi_proc_world() and 232 * ompi_proc_all(). 233 * 234 * @note The reference count of each process in the array is 235 * incremented and the caller is responsible for releasing each 236 * process in the array, as well as freeing the array. 237 * 238 * @param[in] size Number of processes in the ompi_proc_t array 239 * 240 * @return Array of pointers to proc instances in the current 241 * known universe, or NULL if there is an internal failure. 242 */ 243 OMPI_DECLSPEC ompi_proc_t** ompi_proc_self(size_t* size); 244 245 246 /** 247 * Returns a pointer to the local process 248 * 249 * Returns a pointer to the local process. Unlike ompi_proc_self(), 250 * the reference count on the local proc instance is not modified by 251 * this function. 252 * 253 * @return Pointer to the local process structure 254 */ 255 static inline ompi_proc_t* ompi_proc_local(void) 256 { 257 return ompi_proc_local_proc; 258 } 259 260 261 /** 262 * Returns the proc instance for a given name 263 * 264 * Returns the proc instance for the specified process name. The 265 * reference count for the proc instance is not incremented by this 266 * function. 267 * 268 * @param[in] name The process name to look for 269 * 270 * @return Pointer to the process instance for \c name 271 */ 272 OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const ompi_process_name_t* name ); 273 274 OMPI_DECLSPEC ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew); 275 276 /** 277 * Pack proc list into portable buffer 278 * 279 * This function takes a list of ompi_proc_t pointers (e.g. as given 280 * in groups) and returns a orte buffer containing all information 281 * needed to add the proc to a remote list. This includes the ORTE 282 * process name, the architecture, and the hostname. Ordering is 283 * maintained. The buffer is packed to be sent to a remote node with 284 * different architecture (endian or word size). 285 * 286 * @param[in] proclist List of process pointers 287 * @param[in] proclistsize Length of the proclist array 288 * @param[in,out] buf An opal_buffer containing the packed names. 289 * The buffer must be constructed but empty when 290 * passed to this function 291 * @retval OMPI_SUCCESS Success 292 * @retval OMPI_ERROR Unspecified error 293 */ 294 OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist, 295 int proclistsize, 296 opal_buffer_t *buf); 297 298 299 /** 300 * Unpack a portable buffer of procs 301 * 302 * This function unpacks a packed list of ompi_proc_t structures and 303 * returns the ordered list of proc structures. If the given proc is 304 * already "known", the architecture and hostname information in the 305 * buffer is ignored. If the proc is "new" to this process, it will 306 * be added to the global list of known procs, with information 307 * provided in the buffer. The lookup actions are always entirely 308 * local. The proclist returned is a list of pointers to all procs in 309 * the buffer, whether they were previously known or are new to this 310 * process. 311 * 312 * @note In previous versions of this function, The PML's add_procs() 313 * function was called for any new processes discovered as a result of 314 * this operation. That is no longer the case -- the caller must use 315 * the newproclist information to call add_procs() if necessary. 316 * 317 * @note The reference count for procs created as a result of this 318 * operation will be set to 1. Existing procs will not have their 319 * reference count changed. The reference count of a proc at the 320 * return of this function is the same regardless of whether NULL is 321 * provided for newproclist. The user is responsible for freeing the 322 * newproclist array. 323 * 324 * @param[in] buf opal_buffer containing the packed names 325 * @param[in] proclistsize number of expected proc-pointres 326 * @param[out] proclist list of process pointers 327 * @param[out] newproclistsize Number of new procs added as a result 328 * of the unpack operation. NULL may be 329 * provided if information is not needed. 330 * @param[out] newproclist List of new procs added as a result of 331 * the unpack operation. NULL may be 332 * provided if informationis not needed. 333 * 334 * Return value: 335 * OMPI_SUCCESS on success 336 * OMPI_ERROR else 337 */ 338 OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf, 339 int proclistsize, 340 ompi_proc_t ***proclist, 341 int *newproclistsize, 342 ompi_proc_t ***newproclist); 343 344 /** 345 * Refresh the OMPI process subsystem 346 * 347 * Refresh the Open MPI process subsystem. This function will update 348 * the list of proc instances in the current MPI_COMM_WORLD with 349 * data from the run-time environemnt. 350 * 351 * @note This is primarily used when restarting a process and thus 352 * need to update the jobid and node name. 353 * 354 * @retval OMPI_SUCESS System successfully refreshed 355 * @retval OMPI_ERROR Refresh failed due to unspecified error 356 */ 357 OMPI_DECLSPEC int ompi_proc_refresh(void); 358 359 /** 360 * Get the ompi_proc_t for a given process name 361 * 362 * @param[in] proc_name opal process name 363 * 364 * @returns cached or new ompi_proc_t for the given process name 365 * 366 * This function looks up the given process name in the hash of existing 367 * ompi_proc_t structures. If no ompi_proc_t structure exists matching the 368 * given name a new ompi_proc_t is allocated, initialized, and returned. 369 * 370 * @note The ompi_proc_t is added to the local list of processes but is not 371 * added to any communicator. ompi_comm_peer_lookup is responsible for caching 372 * the ompi_proc_t on a communicator. 373 */ 374 OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name); 375 376 377 OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name); 378 379 /** 380 * Check if an ompi_proc_t is a sentinel 381 */ 382 static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc) 383 { 384 return (intptr_t) proc & 0x1; 385 } 386 387 #if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P 388 /* 389 * we assume an ompi_proc_t is at least aligned on two bytes, 390 * so if the LSB of a pointer to an ompi_proc_t is 1, we have to handle 391 * this pointer as a sentinel instead of a pointer. 392 * a sentinel can be seen as an uint64_t with the following format : 393 * - bit 0 : 1 394 * - bits 1-15 : local jobid 395 * - bits 16-31 : job family 396 * - bits 32-63 : vpid 397 */ 398 static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) 399 { 400 uintptr_t tmp, sentinel = 0; 401 /* local jobid must fit in 15 bits */ 402 assert(! (OMPI_LOCAL_JOBID(name.jobid) & 0x8000)); 403 sentinel |= 0x1; 404 tmp = (uintptr_t)OMPI_LOCAL_JOBID(name.jobid); 405 sentinel |= ((tmp << 1) & 0xfffe); 406 tmp = (uintptr_t)OMPI_JOB_FAMILY(name.jobid); 407 sentinel |= ((tmp << 16) & 0xffff0000); 408 tmp = (uintptr_t)name.vpid; 409 sentinel |= ((tmp << 32) & 0xffffffff00000000); 410 return sentinel; 411 } 412 413 static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel) 414 { 415 opal_process_name_t name; 416 uint32_t local, family; 417 uint32_t vpid; 418 assert(sentinel & 0x1); 419 local = (sentinel >> 1) & 0x7fff; 420 family = (sentinel >> 16) & 0xffff; 421 vpid = (sentinel >> 32) & 0xffffffff; 422 name.jobid = OMPI_CONSTRUCT_JOBID(family,local); 423 name.vpid = vpid; 424 return name; 425 } 426 #elif 4 == SIZEOF_VOID_P 427 /* 428 * currently, a sentinel is only made from the current jobid aka OMPI_PROC_MY_NAME->jobid 429 * so we only store the first 31 bits of the vpid 430 */ 431 static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name) 432 { 433 assert(OMPI_PROC_MY_NAME->jobid == name.jobid); 434 return (uintptr_t)((name.vpid <<1) | 0x1); 435 } 436 437 static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel) 438 { 439 opal_process_name_t name; 440 name.jobid = OMPI_PROC_MY_NAME->jobid; 441 name.vpid = sentinel >> 1; 442 return name; 443 } 444 #else 445 #error unsupported pointer size 446 #endif 447 448 END_C_DECLS 449 450 #endif /* OMPI_PROC_PROC_H */