1 /*
2 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
3 * University Research and Technology
4 * Corporation. All rights reserved.
5 * Copyright (c) 2004-2011 The University of Tennessee and The University
6 * of Tennessee Research Foundation. All rights
7 * reserved.
8 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
9 * University of Stuttgart. All rights reserved.
10 * Copyright (c) 2004-2005 The Regents of the University of California.
11 * All rights reserved.
12 * Copyright (c) 2006-2012 Cisco Systems, Inc. All rights reserved.
13 * Copyright (c) 2007-2012 Los Alamos National Security, LLC. All rights
14 * reserved.
15 * Copyright (c) 2013-2014 Intel, Inc. All rights reserved
16 * Copyright (c) 2015-2016 Research Organization for Information Science
17 * and Technology (RIST). All rights reserved.
18 * $COPYRIGHT$
19 *
20 * Additional copyrights may follow
21 *
22 * $HEADER$
23 */
24
25
26 /** @file
27 * Process identification structure interface
28 *
29 * Process identification structure interface. The ompi_proc_t
30 * structure contatins basic information about the remote (and local)
31 * processes.
32 */
33
34 #ifndef OMPI_PROC_PROC_H
35 #define OMPI_PROC_PROC_H
36
37 #include "ompi_config.h"
38 #include "ompi/types.h"
39
40 #include "opal/util/proc.h"
41
42 #include "ompi/mca/rte/rte.h"
43
44
45 BEGIN_C_DECLS
46
47 /* ******************************************************************** */
48
49
50 /**
51 * Remote Open MPI process structure
52 *
53 * Remote Open MPI process structure. Each process contains exactly
54 * one ompi_proc_t structure for each remote process it knows about.
55 *
56 * Each proc entry has an array of endpoint data associated with it.
57 * The size of this array, and its entries, is unique to a particular
58 * build of Open MPI. As the endpoint list (or index values) are
59 * local to a process, this does not negatively impact heterogeneous
60 * builds. If a component or framework requires a tag index, it
61 * should call OMPI_REQUIRE_ENDPOINT_TAG(<name>). Requests which
62 * share the same name will have the same value, allowing
63 * cross-component sharing of endpoint data. The tag may be referenced
64 * by the pre-processor define OMPI_PROC_ENDPOINT_TAG_<name>. Adding
65 * a tag increases the memory consumed by Open MPI, so should only be done
66 * if unavoidable.
67 */
68
69 #define OMPI_PROC_PADDING_SIZE 16
70
71 struct ompi_proc_t {
72 opal_proc_t super;
73
74 /* endpoint data */
75 void *proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MAX];
76
77 char padding[OMPI_PROC_PADDING_SIZE]; /* for future extensions (OSHMEM uses this area also)*/
78 };
79 typedef struct ompi_proc_t ompi_proc_t;
80 OBJ_CLASS_DECLARATION(ompi_proc_t);
81
82
83 /**
84 * @private
85 *
86 * Pointer to the ompi_proc_t structure for the local process
87 *
88 * @note This pointer is declared here to allow inline functions
89 * within this header file to access the local process quickly.
90 * Please use ompi_proc_local() instead.
91 */
92 OMPI_DECLSPEC extern ompi_proc_t* ompi_proc_local_proc;
93 OMPI_DECLSPEC extern opal_list_t ompi_proc_list;
94
95 /* ******************************************************************** */
96
97
98 /**
99 * Initialize the OMPI process subsystem
100 *
101 * Initialize the Open MPI process subsystem. This function will
102 * query the run-time environment and build a list of the proc
103 * instances in the current MPI_COMM_WORLD. The local information not
104 * easily determined by the run-time ahead of time (architecture and
105 * hostname) will be published during this call.
106 *
107 * @note While an ompi_proc_t will exist with mostly valid information
108 * for each process in the MPI_COMM_WORLD at the conclusion of this
109 * call, some information will not be immediately available. This
110 * includes the architecture and hostname, which will be available by
111 * the conclusion of the stage gate.
112 *
113 * @retval OMPI_SUCESS System successfully initialized
114 * @retval OMPI_ERROR Initialization failed due to unspecified error
115 */
116 OMPI_DECLSPEC int ompi_proc_init(void);
117
118 /**
119 * Complete filling up the proc information (arch, name and locality) for all
120 * procs related to this job. This function is to be called only after
121 * the modex exchange has been completed.
122 *
123 * @retval OMPI_SUCCESS All information correctly set.
124 * @retval OMPI_ERROR Some info could not be initialized.
125 */
126 OMPI_DECLSPEC int ompi_proc_complete_init(void);
127
128 /**
129 * Complete filling up the proc information (arch, name and locality) for
130 * a given proc. This function is to be called only after the modex exchange
131 * has been completed.
132 *
133 * @param[in] proc the proc whose information will be filled up
134 *
135 * @retval OMPI_SUCCESS All information correctly set.
136 * @retval OMPI_ERROR Some info could not be initialized.
137 */
138 OMPI_DECLSPEC int ompi_proc_complete_init_single(ompi_proc_t* proc);
139
140 /**
141 * Finalize the OMPI Process subsystem
142 *
143 * Finalize the Open MPI process subsystem. This function will
144 * release all memory created during the life of the application,
145 * including all ompi_proc_t structures.
146 *
147 * @retval OMPI_SUCCESS System successfully finalized
148 */
149 OMPI_DECLSPEC int ompi_proc_finalize(void);
150
151
152 /**
153 * Returns the list of proc instances associated with this job.
154 *
155 * Returns the list of proc instances associated with this job. Given
156 * the current association between a job and an MPI_COMM_WORLD, this
157 * function provides the process instances for the current
158 * MPI_COMM_WORLD. Use this function only if absolutely needed as it
159 * will cause ompi_proc_t objects to be allocated for every process in
160 * the job. If you only need the allocated ompi_proc_t objects call
161 * ompi_proc_get_allocated() instead.
162 *
163 * @note The reference count of each process in the array is
164 * NOT incremented - the caller is responsible for ensuring the
165 * correctness of the reference count once they are done with
166 * the array.
167 *
168 * @param[in] size Number of processes in the ompi_proc_t array
169 *
170 * @return Array of pointers to proc instances in the current
171 * MPI_COMM_WORLD, or NULL if there is an internal failure.
172 */
173 OMPI_DECLSPEC ompi_proc_t** ompi_proc_world(size_t* size);
174
175 /**
176 * Returns the number of processes in the associated with this job.
177 *
178 * Returns the list of proc instances associated with this job. Given
179 * the current association between a job and an MPI_COMM_WORLD, this
180 * function provides the number of processes for the current
181 * MPI_COMM_WORLD.
182 */
183
184 OMPI_DECLSPEC int ompi_proc_world_size (void);
185
186 /**
187 * Returns the list of proc instances associated with this job.
188 *
189 * Returns the list of proc instances associated with this job that have
190 * already been allocated. Given the current association between a job
191 * and an MPI_COMM_WORLD, this function provides the allocated process
192 * instances for the current MPI_COMM_WORLD.
193 *
194 * @note The reference count of each process in the array is
195 * NOT incremented - the caller is responsible for ensuring the
196 * correctness of the reference count once they are done with
197 * the array.
198 *
199 * @param[in] size Number of processes in the ompi_proc_t array
200 *
201 * @return Array of pointers to allocated proc instances in the current
202 * MPI_COMM_WORLD, or NULL if there is an internal failure.
203 */
204 OMPI_DECLSPEC ompi_proc_t **ompi_proc_get_allocated (size_t *size);
205
206 /**
207 * Returns the list of all known proc instances.
208 *
209 * Returns the list of all known proc instances, including those in
210 * other MPI_COMM_WORLDs. It is possible that we may no longer be
211 * connected to some of the procs returned (in the MPI sense of the
212 * word connected). In a strictly MPI-1 application, this function
213 * will return the same information as ompi_proc_world().
214 *
215 * @note The reference count of each process in the array is
216 * incremented and the caller is responsible for releasing each
217 * process in the array, as well as freeing the array.
218 *
219 * @param[in] size Number of processes in the ompi_proc_t array
220 *
221 * @return Array of pointers to proc instances in the current
222 * known universe, or NULL if there is an internal failure.
223 */
224 OMPI_DECLSPEC ompi_proc_t** ompi_proc_all(size_t* size);
225
226
227 /**
228 * Returns a list of the local process
229 *
230 * Returns a list containing the local process (and only the local
231 * process). Has calling semantics similar to ompi_proc_world() and
232 * ompi_proc_all().
233 *
234 * @note The reference count of each process in the array is
235 * incremented and the caller is responsible for releasing each
236 * process in the array, as well as freeing the array.
237 *
238 * @param[in] size Number of processes in the ompi_proc_t array
239 *
240 * @return Array of pointers to proc instances in the current
241 * known universe, or NULL if there is an internal failure.
242 */
243 OMPI_DECLSPEC ompi_proc_t** ompi_proc_self(size_t* size);
244
245
246 /**
247 * Returns a pointer to the local process
248 *
249 * Returns a pointer to the local process. Unlike ompi_proc_self(),
250 * the reference count on the local proc instance is not modified by
251 * this function.
252 *
253 * @return Pointer to the local process structure
254 */
255 static inline ompi_proc_t* ompi_proc_local(void)
256 {
257 return ompi_proc_local_proc;
258 }
259
260
261 /**
262 * Returns the proc instance for a given name
263 *
264 * Returns the proc instance for the specified process name. The
265 * reference count for the proc instance is not incremented by this
266 * function.
267 *
268 * @param[in] name The process name to look for
269 *
270 * @return Pointer to the process instance for \c name
271 */
272 OMPI_DECLSPEC ompi_proc_t * ompi_proc_find ( const ompi_process_name_t* name );
273
274 OMPI_DECLSPEC ompi_proc_t * ompi_proc_find_and_add(const ompi_process_name_t * name, bool* isnew);
275
276 /**
277 * Pack proc list into portable buffer
278 *
279 * This function takes a list of ompi_proc_t pointers (e.g. as given
280 * in groups) and returns a orte buffer containing all information
281 * needed to add the proc to a remote list. This includes the ORTE
282 * process name, the architecture, and the hostname. Ordering is
283 * maintained. The buffer is packed to be sent to a remote node with
284 * different architecture (endian or word size).
285 *
286 * @param[in] proclist List of process pointers
287 * @param[in] proclistsize Length of the proclist array
288 * @param[in,out] buf An opal_buffer containing the packed names.
289 * The buffer must be constructed but empty when
290 * passed to this function
291 * @retval OMPI_SUCCESS Success
292 * @retval OMPI_ERROR Unspecified error
293 */
294 OMPI_DECLSPEC int ompi_proc_pack(ompi_proc_t **proclist,
295 int proclistsize,
296 opal_buffer_t *buf);
297
298
299 /**
300 * Unpack a portable buffer of procs
301 *
302 * This function unpacks a packed list of ompi_proc_t structures and
303 * returns the ordered list of proc structures. If the given proc is
304 * already "known", the architecture and hostname information in the
305 * buffer is ignored. If the proc is "new" to this process, it will
306 * be added to the global list of known procs, with information
307 * provided in the buffer. The lookup actions are always entirely
308 * local. The proclist returned is a list of pointers to all procs in
309 * the buffer, whether they were previously known or are new to this
310 * process.
311 *
312 * @note In previous versions of this function, The PML's add_procs()
313 * function was called for any new processes discovered as a result of
314 * this operation. That is no longer the case -- the caller must use
315 * the newproclist information to call add_procs() if necessary.
316 *
317 * @note The reference count for procs created as a result of this
318 * operation will be set to 1. Existing procs will not have their
319 * reference count changed. The reference count of a proc at the
320 * return of this function is the same regardless of whether NULL is
321 * provided for newproclist. The user is responsible for freeing the
322 * newproclist array.
323 *
324 * @param[in] buf opal_buffer containing the packed names
325 * @param[in] proclistsize number of expected proc-pointres
326 * @param[out] proclist list of process pointers
327 * @param[out] newproclistsize Number of new procs added as a result
328 * of the unpack operation. NULL may be
329 * provided if information is not needed.
330 * @param[out] newproclist List of new procs added as a result of
331 * the unpack operation. NULL may be
332 * provided if informationis not needed.
333 *
334 * Return value:
335 * OMPI_SUCCESS on success
336 * OMPI_ERROR else
337 */
338 OMPI_DECLSPEC int ompi_proc_unpack(opal_buffer_t *buf,
339 int proclistsize,
340 ompi_proc_t ***proclist,
341 int *newproclistsize,
342 ompi_proc_t ***newproclist);
343
344 /**
345 * Refresh the OMPI process subsystem
346 *
347 * Refresh the Open MPI process subsystem. This function will update
348 * the list of proc instances in the current MPI_COMM_WORLD with
349 * data from the run-time environemnt.
350 *
351 * @note This is primarily used when restarting a process and thus
352 * need to update the jobid and node name.
353 *
354 * @retval OMPI_SUCESS System successfully refreshed
355 * @retval OMPI_ERROR Refresh failed due to unspecified error
356 */
357 OMPI_DECLSPEC int ompi_proc_refresh(void);
358
359 /**
360 * Get the ompi_proc_t for a given process name
361 *
362 * @param[in] proc_name opal process name
363 *
364 * @returns cached or new ompi_proc_t for the given process name
365 *
366 * This function looks up the given process name in the hash of existing
367 * ompi_proc_t structures. If no ompi_proc_t structure exists matching the
368 * given name a new ompi_proc_t is allocated, initialized, and returned.
369 *
370 * @note The ompi_proc_t is added to the local list of processes but is not
371 * added to any communicator. ompi_comm_peer_lookup is responsible for caching
372 * the ompi_proc_t on a communicator.
373 */
374 OMPI_DECLSPEC opal_proc_t *ompi_proc_for_name (const opal_process_name_t proc_name);
375
376
377 OMPI_DECLSPEC opal_proc_t *ompi_proc_lookup (const opal_process_name_t proc_name);
378
379 /**
380 * Check if an ompi_proc_t is a sentinel
381 */
382 static inline bool ompi_proc_is_sentinel (ompi_proc_t *proc)
383 {
384 return (intptr_t) proc & 0x1;
385 }
386
387 #if OPAL_SIZEOF_PROCESS_NAME_T == SIZEOF_VOID_P
388 /*
389 * we assume an ompi_proc_t is at least aligned on two bytes,
390 * so if the LSB of a pointer to an ompi_proc_t is 1, we have to handle
391 * this pointer as a sentinel instead of a pointer.
392 * a sentinel can be seen as an uint64_t with the following format :
393 * - bit 0 : 1
394 * - bits 1-15 : local jobid
395 * - bits 16-31 : job family
396 * - bits 32-63 : vpid
397 */
398 static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
399 {
400 uintptr_t tmp, sentinel = 0;
401 /* local jobid must fit in 15 bits */
402 assert(! (OMPI_LOCAL_JOBID(name.jobid) & 0x8000));
403 sentinel |= 0x1;
404 tmp = (uintptr_t)OMPI_LOCAL_JOBID(name.jobid);
405 sentinel |= ((tmp << 1) & 0xfffe);
406 tmp = (uintptr_t)OMPI_JOB_FAMILY(name.jobid);
407 sentinel |= ((tmp << 16) & 0xffff0000);
408 tmp = (uintptr_t)name.vpid;
409 sentinel |= ((tmp << 32) & 0xffffffff00000000);
410 return sentinel;
411 }
412
413 static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
414 {
415 opal_process_name_t name;
416 uint32_t local, family;
417 uint32_t vpid;
418 assert(sentinel & 0x1);
419 local = (sentinel >> 1) & 0x7fff;
420 family = (sentinel >> 16) & 0xffff;
421 vpid = (sentinel >> 32) & 0xffffffff;
422 name.jobid = OMPI_CONSTRUCT_JOBID(family,local);
423 name.vpid = vpid;
424 return name;
425 }
426 #elif 4 == SIZEOF_VOID_P
427 /*
428 * currently, a sentinel is only made from the current jobid aka OMPI_PROC_MY_NAME->jobid
429 * so we only store the first 31 bits of the vpid
430 */
431 static inline uintptr_t ompi_proc_name_to_sentinel (opal_process_name_t name)
432 {
433 assert(OMPI_PROC_MY_NAME->jobid == name.jobid);
434 return (uintptr_t)((name.vpid <<1) | 0x1);
435 }
436
437 static inline opal_process_name_t ompi_proc_sentinel_to_name (uintptr_t sentinel)
438 {
439 opal_process_name_t name;
440 name.jobid = OMPI_PROC_MY_NAME->jobid;
441 name.vpid = sentinel >> 1;
442 return name;
443 }
444 #else
445 #error unsupported pointer size
446 #endif
447
448 END_C_DECLS
449
450 #endif /* OMPI_PROC_PROC_H */