This source file includes following definitions.
- ompi_osc_rdma_peer_btl_endpoint
- ompi_osc_rdma_new_peer
- ompi_osc_rdma_peer_setup
- ompi_osc_rdma_peer_lookup_internal
- ompi_osc_rdma_peer_lookup
- ompi_osc_rdma_peer_construct
- ompi_osc_rdma_peer_destruct
- ompi_osc_rdma_peer_basic_construct
- ompi_osc_rdma_peer_basic_destruct
- ompi_osc_rdma_peer_dynamic_construct
- ompi_osc_rdma_peer_dynamic_destruct
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #include "ompi_config.h"
16
17 #ifdef HAVE_ALLOCA_H
18 #include <alloca.h>
19 #endif
20
21 #include "osc_rdma_comm.h"
22
23 #include "ompi/mca/bml/base/base.h"
24
25 #define NODE_ID_TO_RANK(module, peer_data, node_id) ((int)(peer_data)->len)
26
27
28
29
30
31
32
33
34
35
36 struct mca_btl_base_endpoint_t *ompi_osc_rdma_peer_btl_endpoint (struct ompi_osc_rdma_module_t *module, int peer_id)
37 {
38 ompi_proc_t *proc = ompi_comm_peer_lookup (module->comm, peer_id);
39 mca_bml_base_endpoint_t *bml_endpoint;
40 int num_btls;
41
42
43 bml_endpoint = mca_bml_base_get_endpoint (proc);
44
45 num_btls = mca_bml_base_btl_array_get_size (&bml_endpoint->btl_rdma);
46
47 for (int btl_index = 0 ; btl_index < num_btls ; ++btl_index) {
48 if (bml_endpoint->btl_rdma.bml_btls[btl_index].btl == module->selected_btl) {
49 return bml_endpoint->btl_rdma.bml_btls[btl_index].btl_endpoint;
50 }
51 }
52
53
54 return NULL;
55 }
56
57 int ompi_osc_rdma_new_peer (struct ompi_osc_rdma_module_t *module, int peer_id, ompi_osc_rdma_peer_t **peer_out) {
58 struct mca_btl_base_endpoint_t *endpoint;
59 ompi_osc_rdma_peer_t *peer;
60
61 *peer_out = NULL;
62
63 endpoint = ompi_osc_rdma_peer_btl_endpoint (module, peer_id);
64 if (OPAL_UNLIKELY(NULL == endpoint && !((module->selected_btl->btl_atomic_flags & MCA_BTL_ATOMIC_SUPPORTS_GLOB) &&
65 peer_id == ompi_comm_rank (module->comm)))) {
66 return OMPI_ERR_UNREACH;
67 }
68
69 if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) {
70 peer = (ompi_osc_rdma_peer_t *) OBJ_NEW(ompi_osc_rdma_peer_dynamic_t);
71 } else if (module->same_size && module->same_disp_unit) {
72
73 peer = (ompi_osc_rdma_peer_t *) OBJ_NEW(ompi_osc_rdma_peer_basic_t);
74 } else {
75 peer = (ompi_osc_rdma_peer_t *) OBJ_NEW(ompi_osc_rdma_peer_extended_t);
76 }
77
78 peer->data_endpoint = endpoint;
79 peer->rank = peer_id;
80
81 *peer_out = peer;
82
83 return OMPI_SUCCESS;
84 }
85
86
87
88
89
90
91
92
93
94
95
96 static int ompi_osc_rdma_peer_setup (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer)
97 {
98 ompi_osc_rdma_peer_extended_t *ex_peer = (ompi_osc_rdma_peer_extended_t *) peer;
99 uint64_t peer_data_size;
100 uint64_t peer_data_offset, array_pointer;
101 struct mca_btl_base_endpoint_t *array_endpoint;
102 ompi_osc_rdma_region_t *array_peer_data, *node_peer_data;
103 ompi_osc_rdma_rank_data_t rank_data;
104 int registration_handle_size = 0;
105 int node_id, node_rank, array_index;
106 int ret, disp_unit;
107 char *peer_data;
108
109 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "configuring peer for rank %d", peer->rank);
110
111 if (module->selected_btl->btl_register_mem) {
112 registration_handle_size = module->selected_btl->btl_registration_handle_size;
113 }
114
115
116
117
118 node_id = peer->rank / RANK_ARRAY_COUNT(module);
119 array_peer_data = (ompi_osc_rdma_region_t *) ((intptr_t) module->node_comm_info + node_id * module->region_size);
120
121
122 node_rank = NODE_ID_TO_RANK(module, array_peer_data, node_id);
123 array_index = peer->rank % RANK_ARRAY_COUNT(module);
124
125 array_pointer = array_peer_data->base + array_index * sizeof (rank_data);
126
127
128 array_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, node_rank);
129 if (OPAL_UNLIKELY(NULL == array_endpoint)) {
130 return OMPI_ERR_UNREACH;
131 }
132
133 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "reading region data for %d from rank: %d, index: %d, pointer: 0x%" PRIx64
134 ", size: %lu", peer->rank, node_rank, array_index, array_pointer, sizeof (rank_data));
135
136 ret = ompi_osc_get_data_blocking (module, array_endpoint, array_pointer, (mca_btl_base_registration_handle_t *) array_peer_data->btl_handle_data,
137 &rank_data, sizeof (rank_data));
138 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
139 return ret;
140 }
141
142
143
144
145 node_peer_data = (ompi_osc_rdma_region_t *) ((intptr_t) module->node_comm_info + rank_data.node_id * module->region_size);
146
147 peer->state = node_peer_data->base + module->state_offset + module->state_size * rank_data.rank;
148
149 if (registration_handle_size) {
150 peer->state_handle = (mca_btl_base_registration_handle_t *) node_peer_data->btl_handle_data;
151 }
152
153 peer->state_endpoint = ompi_osc_rdma_peer_btl_endpoint (module, NODE_ID_TO_RANK(module, node_peer_data, rank_data.node_id));
154 if (OPAL_UNLIKELY(NULL == peer->state_endpoint)) {
155 return OPAL_ERR_UNREACH;
156 }
157
158
159 if (MPI_WIN_FLAVOR_DYNAMIC == module->flavor) {
160 return OMPI_SUCCESS;
161 }
162
163
164 if (module->same_disp_unit) {
165
166 peer_data_offset = offsetof (ompi_osc_rdma_state_t, regions);
167 } else {
168 peer_data_offset = offsetof (ompi_osc_rdma_state_t, disp_unit);
169 }
170
171 peer_data_size = module->state_size - peer_data_offset;
172 peer_data = alloca (peer_data_size);
173
174
175 ret = ompi_osc_get_data_blocking (module, peer->state_endpoint, peer->state + peer_data_offset, peer->state_handle,
176 peer_data, peer_data_size);
177 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
178 return ret;
179 }
180
181 if (!module->same_disp_unit) {
182
183 memcpy (&ex_peer->disp_unit, peer_data, sizeof (ex_peer->disp_unit));
184 peer_data += offsetof (ompi_osc_rdma_state_t, regions) - offsetof (ompi_osc_rdma_state_t, disp_unit);
185 disp_unit = ex_peer->disp_unit;
186 } else {
187 disp_unit = module->disp_unit;
188 }
189
190 ompi_osc_rdma_region_t *base_region = (ompi_osc_rdma_region_t *) peer_data;
191
192 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "peer %d: remote base region: 0x%" PRIx64 ", size: %" PRId64
193 ", flags: 0x%x, disp_unit: %d", peer->rank, base_region->base, base_region->len,
194 peer->flags, disp_unit);
195 (void)disp_unit;
196
197 if (ompi_osc_rdma_peer_local_base (peer)) {
198
199 return OMPI_SUCCESS;
200 }
201
202 ex_peer->super.base = base_region->base;
203
204
205 if (!module->same_size) {
206 ex_peer->size = base_region->len;
207 }
208
209 if (base_region->len) {
210 if (registration_handle_size) {
211 ex_peer->super.base_handle = malloc (registration_handle_size);
212 if (OPAL_UNLIKELY(NULL == ex_peer->super.base_handle)) {
213 return OMPI_ERR_OUT_OF_RESOURCE;
214 }
215
216 peer->flags |= OMPI_OSC_RDMA_PEER_BASE_FREE;
217
218 memcpy (ex_peer->super.base_handle, base_region->btl_handle_data, registration_handle_size);
219 }
220
221 if (MPI_WIN_FLAVOR_ALLOCATE == module->flavor) {
222 ex_peer->super.super.data_endpoint = ex_peer->super.super.state_endpoint;
223 }
224 }
225
226 return OMPI_SUCCESS;
227 }
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242 static struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup_internal (struct ompi_osc_rdma_module_t *module, int peer_id)
243 {
244 ompi_osc_rdma_peer_t *peer;
245 int ret;
246
247 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "looking up peer data for rank %d", peer_id);
248
249 peer = ompi_osc_module_get_peer (module, peer_id);
250 if (NULL != peer) {
251 return peer;
252 }
253
254 ret = ompi_osc_rdma_new_peer (module, peer_id, &peer);
255 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
256 return NULL;
257 }
258
259 ret = ompi_osc_rdma_peer_setup (module, peer);
260 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
261 OBJ_RELEASE(peer);
262 return NULL;
263 }
264
265 ret = ompi_osc_module_add_peer (module, peer);
266 if (OPAL_SUCCESS != ret) {
267
268 OBJ_RELEASE(peer);
269 return NULL;
270 }
271
272
273 opal_atomic_wmb ();
274
275 return peer;
276 }
277
278 struct ompi_osc_rdma_peer_t *ompi_osc_rdma_peer_lookup (struct ompi_osc_rdma_module_t *module, int peer_id)
279 {
280 struct ompi_osc_rdma_peer_t *peer;
281
282 opal_mutex_lock (&module->peer_lock);
283 peer = ompi_osc_rdma_peer_lookup_internal (module, peer_id);
284 opal_mutex_unlock (&module->peer_lock);
285
286 return peer;
287 }
288
289
290
291
292 static void ompi_osc_rdma_peer_construct (ompi_osc_rdma_peer_t *peer)
293 {
294 memset ((char *) peer + sizeof (peer->super), 0, sizeof (*peer) - sizeof (peer->super));
295 }
296
297 static void ompi_osc_rdma_peer_destruct (ompi_osc_rdma_peer_t *peer)
298 {
299 if (peer->state_handle && (peer->flags & OMPI_OSC_RDMA_PEER_STATE_FREE)) {
300 free (peer->state_handle);
301 }
302 }
303
304 OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_t, opal_list_item_t,
305 ompi_osc_rdma_peer_construct,
306 ompi_osc_rdma_peer_destruct);
307
308 static void ompi_osc_rdma_peer_basic_construct (ompi_osc_rdma_peer_basic_t *peer)
309 {
310 memset ((char *) peer + sizeof (peer->super), 0, sizeof (*peer) - sizeof (peer->super));
311 }
312
313 static void ompi_osc_rdma_peer_basic_destruct (ompi_osc_rdma_peer_basic_t *peer)
314 {
315 if (peer->base_handle && (peer->super.flags & OMPI_OSC_RDMA_PEER_BASE_FREE)) {
316 free (peer->base_handle);
317 }
318 }
319
320 OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_basic_t, ompi_osc_rdma_peer_t,
321 ompi_osc_rdma_peer_basic_construct,
322 ompi_osc_rdma_peer_basic_destruct);
323
324 OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_extended_t, ompi_osc_rdma_peer_basic_t,
325 NULL, NULL);
326
327 static void ompi_osc_rdma_peer_dynamic_construct (ompi_osc_rdma_peer_dynamic_t *peer)
328 {
329 memset ((char *) peer + sizeof (peer->super), 0, sizeof (*peer) - sizeof (peer->super));
330 }
331
332 static void ompi_osc_rdma_peer_dynamic_destruct (ompi_osc_rdma_peer_dynamic_t *peer)
333 {
334 if (peer->regions) {
335 free (peer->regions);
336 }
337 }
338
339 OBJ_CLASS_INSTANCE(ompi_osc_rdma_peer_dynamic_t, ompi_osc_rdma_peer_t,
340 ompi_osc_rdma_peer_dynamic_construct,
341 ompi_osc_rdma_peer_dynamic_destruct);