1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2014 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2009 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved.
14 * Copyright (c) 2008-2010 Cisco Systems, Inc. All rights reserved.
15 * Copyright (c) 2010-2015 Los Alamos National Security, LLC.
16 * All rights reserved.
17 * Copyright (c) 2014 Intel, Inc. All rights reserved
18 * $COPYRIGHT$
19 *
20 * Additional copyrights may follow
21 *
22 * $HEADER$
23 */
24
25 /* ASSUMING local process homogeneity with respect to all utilized shared memory
26 * facilities. that is, if one local process deems a particular shared memory
27 * facility acceptable, then ALL local processes should be able to utilize that
28 * facility. as it stands, this is an important point because one process
29 * dictates to all other local processes which common sm component will be
30 * selected based on its own, local run-time test.
31 */
32
33 #include "opal_config.h"
34
35 #include "opal/align.h"
36 #include "opal/util/argv.h"
37 #include "opal/util/show_help.h"
38 #include "opal/util/error.h"
39 #include "opal/mca/shmem/base/base.h"
40 #if OPAL_ENABLE_FT_CR == 1
41 #include "opal/runtime/opal_cr.h"
42 #endif
43 #include "common_sm.h"
44 #include "opal/constants.h"
45
46
47 OBJ_CLASS_INSTANCE(mca_common_sm_module_t,opal_list_item_t,
48 NULL, NULL);
49
50
51 /* ////////////////////////////////////////////////////////////////////////// */
52 /* static utility functions */
53 /* ////////////////////////////////////////////////////////////////////////// */
54
55 /* ////////////////////////////////////////////////////////////////////////// */
56 static mca_common_sm_module_t *
57 attach_and_init(opal_shmem_ds_t *shmem_bufp,
58 size_t size,
59 size_t size_ctl_structure,
60 size_t data_seg_alignment,
61 bool first_call)
62 {
63 mca_common_sm_module_t *map = NULL;
64 mca_common_sm_seg_header_t *seg = NULL;
65 unsigned char *addr = NULL;
66
67 /* attach to the specified segment. note that at this point, the contents of
68 * *shmem_bufp have already been initialized via opal_shmem_segment_create.
69 */
70 if (NULL == (seg = (mca_common_sm_seg_header_t *)
71 opal_shmem_segment_attach(shmem_bufp))) {
72 return NULL;
73 }
74 opal_atomic_rmb();
75
76 if (NULL == (map = OBJ_NEW(mca_common_sm_module_t))) {
77 OPAL_ERROR_LOG(OPAL_ERR_OUT_OF_RESOURCE);
78 (void)opal_shmem_segment_detach(shmem_bufp);
79 return NULL;
80 }
81
82 /* copy meta information into common sm module
83 * from ====> to */
84 if (OPAL_SUCCESS != opal_shmem_ds_copy(shmem_bufp, &map->shmem_ds)) {
85 (void)opal_shmem_segment_detach(shmem_bufp);
86 free(map);
87 return NULL;
88 }
89
90 /* the first entry in the file is the control structure. the first
91 * entry in the control structure is an mca_common_sm_seg_header_t
92 * element.
93 */
94 map->module_seg = seg;
95
96 addr = ((unsigned char *)seg) + size_ctl_structure;
97 /* if we have a data segment (i.e., if 0 != data_seg_alignment),
98 * then make it the first aligned address after the control
99 * structure. IF THIS HAPPENS, THIS IS A PROGRAMMING ERROR IN
100 * OPEN MPI!
101 */
102 if (0 != data_seg_alignment) {
103 addr = OPAL_ALIGN_PTR(addr, data_seg_alignment, unsigned char *);
104 /* is addr past end of the shared memory segment? */
105 if ((unsigned char *)seg + shmem_bufp->seg_size < addr) {
106 opal_show_help("help-mpi-common-sm.txt", "mmap too small", 1,
107 opal_proc_local_get()->proc_hostname,
108 (unsigned long)shmem_bufp->seg_size,
109 (unsigned long)size_ctl_structure,
110 (unsigned long)data_seg_alignment);
111 (void)opal_shmem_segment_detach(shmem_bufp);
112 free(map);
113 return NULL;
114 }
115 }
116
117 map->module_data_addr = addr;
118 map->module_seg_addr = (unsigned char *)seg;
119
120 /* note that size is only used during the first call */
121 if (first_call) {
122 /* initialize some segment information */
123 size_t mem_offset = map->module_data_addr -
124 (unsigned char *)map->module_seg;
125 opal_atomic_lock_init(&map->module_seg->seg_lock, OPAL_ATOMIC_LOCK_UNLOCKED);
126 map->module_seg->seg_inited = 0;
127 map->module_seg->seg_num_procs_inited = 0;
128 map->module_seg->seg_offset = mem_offset;
129 map->module_seg->seg_size = size - mem_offset;
130 opal_atomic_wmb();
131 }
132
133 /* increment the number of processes that are attached to the segment. */
134 (void)opal_atomic_add_fetch_size_t(&map->module_seg->seg_num_procs_inited, 1);
135
136 /* commit the changes before we return */
137 opal_atomic_wmb();
138
139 return map;
140 }
141
142 /* ////////////////////////////////////////////////////////////////////////// */
143 /* api implementation */
144 /* ////////////////////////////////////////////////////////////////////////// */
145
146 /* ////////////////////////////////////////////////////////////////////////// */
147 mca_common_sm_module_t *
148 mca_common_sm_module_create_and_attach(size_t size,
149 char *file_name,
150 size_t size_ctl_structure,
151 size_t data_seg_alignment)
152 {
153 mca_common_sm_module_t *map = NULL;
154 opal_shmem_ds_t *seg_meta = NULL;
155
156 if (NULL == (seg_meta = calloc(1, sizeof(*seg_meta)))) {
157 /* out of resources */
158 return NULL;
159 }
160 if (OPAL_SUCCESS == opal_shmem_segment_create(seg_meta, file_name, size)) {
161 map = attach_and_init(seg_meta, size, size_ctl_structure,
162 data_seg_alignment, true);
163 }
164 /* at this point, seg_meta has been copied to the newly created
165 * shared memory segment, so we can free it */
166 if (seg_meta) {
167 free(seg_meta);
168 }
169
170 return map;
171 }
172
173 /* ////////////////////////////////////////////////////////////////////////// */
174 /**
175 * @return a pointer to the mca_common_sm_module_t associated with seg_meta if
176 * everything was okay, otherwise returns NULL.
177 */
178 mca_common_sm_module_t *
179 mca_common_sm_module_attach(opal_shmem_ds_t *seg_meta,
180 size_t size_ctl_structure,
181 size_t data_seg_alignment)
182 {
183 /* notice that size is 0 here. it really doesn't matter because size WILL
184 * NOT be used because this is an attach (first_call is false). */
185 return attach_and_init(seg_meta, 0, size_ctl_structure,
186 data_seg_alignment, false);
187 }
188
189 /* ////////////////////////////////////////////////////////////////////////// */
190 int
191 mca_common_sm_module_unlink(mca_common_sm_module_t *modp)
192 {
193 if (NULL == modp) {
194 return OPAL_ERROR;
195 }
196 if (OPAL_SUCCESS != opal_shmem_unlink(&modp->shmem_ds)) {
197 return OPAL_ERROR;
198 }
199 return OPAL_SUCCESS;
200 }
201
202 /* ////////////////////////////////////////////////////////////////////////// */
203 int
204 mca_common_sm_local_proc_reorder(opal_proc_t **procs,
205 size_t num_procs,
206 size_t *out_num_local_procs)
207 {
208 size_t num_local_procs = 0;
209 bool found_lowest = false;
210 opal_proc_t *temp_proc = NULL;
211 size_t p;
212
213 if (NULL == out_num_local_procs || NULL == procs) {
214 return OPAL_ERR_BAD_PARAM;
215 }
216 /* o reorder procs array to have all the local procs at the beginning.
217 * o look for the local proc with the lowest name.
218 * o determine the number of local procs.
219 * o ensure that procs[0] is the lowest named process.
220 */
221 for (p = 0; p < num_procs; ++p) {
222 if (OPAL_PROC_ON_LOCAL_NODE(procs[p]->proc_flags)) {
223 /* if we don't have a lowest, save the first one */
224 if (!found_lowest) {
225 procs[0] = procs[p];
226 found_lowest = true;
227 }
228 else {
229 /* save this proc */
230 procs[num_local_procs] = procs[p];
231 /* if we have a new lowest, swap it with position 0
232 * so that procs[0] is always the lowest named proc */
233 if( 0 > opal_compare_proc(procs[p]->proc_name, procs[0]->proc_name) ) {
234 temp_proc = procs[0];
235 procs[0] = procs[p];
236 procs[num_local_procs] = temp_proc;
237 }
238 }
239 /* regardless of the comparisons above, we found
240 * another proc on the local node, so increment
241 */
242 ++num_local_procs;
243 }
244 }
245 *out_num_local_procs = num_local_procs;
246
247 return OPAL_SUCCESS;
248 }
249
250 /* ////////////////////////////////////////////////////////////////////////// */
251 /**
252 * allocate memory from a previously allocated shared memory
253 * block.
254 *
255 * @param size size of request, in bytes (IN)
256 *
257 * @retval addr virtual address
258 */
259 void *mca_common_sm_seg_alloc (void *ctx, size_t *size)
260 {
261 mca_common_sm_module_t *sm_module = (mca_common_sm_module_t *) ctx;
262 mca_common_sm_seg_header_t *seg = sm_module->module_seg;
263 void *addr;
264
265 opal_atomic_lock(&seg->seg_lock);
266 if (seg->seg_offset + *size > seg->seg_size) {
267 addr = NULL;
268 }
269 else {
270 size_t fixup;
271
272 /* add base address to segment offset */
273 addr = sm_module->module_data_addr + seg->seg_offset;
274 seg->seg_offset += *size;
275
276 /* fix up seg_offset so next allocation is aligned on a
277 * sizeof(long) boundry. Do it here so that we don't have to
278 * check before checking remaining size in buffer
279 */
280 if ((fixup = (seg->seg_offset & (sizeof(long) - 1))) > 0) {
281 seg->seg_offset += sizeof(long) - fixup;
282 }
283 }
284
285 opal_atomic_unlock(&seg->seg_lock);
286 return addr;
287 }
288
289 /* ////////////////////////////////////////////////////////////////////////// */
290 int
291 mca_common_sm_fini(mca_common_sm_module_t *mca_common_sm_module)
292 {
293 int rc = OPAL_SUCCESS;
294
295 if (NULL != mca_common_sm_module->module_seg) {
296 if (OPAL_SUCCESS !=
297 opal_shmem_segment_detach(&mca_common_sm_module->shmem_ds)) {
298 rc = OPAL_ERROR;
299 }
300 }
301 return rc;
302 }