This source file includes following definitions.
- ompi_osc_rdma_find_region_containing
- find_insertion_point
- ompi_osc_rdma_attach
- ompi_osc_rdma_detach
- ompi_osc_rdma_refresh_dynamic_region
- ompi_osc_rdma_find_dynamic_region
1
2
3
4
5
6
7
8
9
10
11
12 #include "osc_rdma_comm.h"
13 #include "osc_rdma_lock.h"
14
15 #include "mpi.h"
16
17 #include "opal/util/sys_limits.h"
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37 static inline ompi_osc_rdma_region_t *ompi_osc_rdma_find_region_containing (ompi_osc_rdma_region_t *regions, int min_index,
38 int max_index, intptr_t base, intptr_t bound,
39 size_t region_size, int *region_index)
40 {
41 int mid_index = (max_index + min_index) >> 1;
42 ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *)((intptr_t) regions + mid_index * region_size);
43 intptr_t region_bound;
44
45 if (min_index > max_index) {
46 return NULL;
47 }
48
49 region_bound = (intptr_t) (region->base + region->len);
50
51 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "checking memory region %p-%p against %p-%p (index %d) (min_index = %d, max_index = %d)",
52 (void *) base, (void *) bound, (void *) region->base, (void *)(region->base + region->len), mid_index,
53 min_index, max_index);
54
55 if (region->base > base) {
56 return ompi_osc_rdma_find_region_containing (regions, min_index, mid_index-1, base, bound, region_size, region_index);
57 } else if (bound <= region_bound) {
58 if (region_index) {
59 *region_index = mid_index;
60 }
61
62 return region;
63 }
64
65 return ompi_osc_rdma_find_region_containing (regions, mid_index+1, max_index, base, bound, region_size, region_index);
66 }
67
68
69 static ompi_osc_rdma_region_t *find_insertion_point (ompi_osc_rdma_region_t *regions, int min_index, int max_index, intptr_t base,
70 size_t region_size, int *region_index)
71 {
72 int mid_index = (max_index + min_index) >> 1;
73 ompi_osc_rdma_region_t *region = (ompi_osc_rdma_region_t *)((intptr_t) regions + mid_index * region_size);
74
75 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "find_insertion_point (%d, %d, %lx, %lu)\n", min_index, max_index, base, region_size);
76
77 if (max_index < min_index) {
78 *region_index = min_index;
79 return (ompi_osc_rdma_region_t *)((intptr_t) regions + min_index * region_size);
80 }
81
82 if (region->base > base) {
83 return find_insertion_point (regions, min_index, mid_index-1, base, region_size, region_index);
84 } else {
85 return find_insertion_point (regions, mid_index+1, max_index, base, region_size, region_index);
86 }
87 }
88
89 int ompi_osc_rdma_attach (struct ompi_win_t *win, void *base, size_t len)
90 {
91 ompi_osc_rdma_module_t *module = GET_MODULE(win);
92 const int my_rank = ompi_comm_rank (module->comm);
93 ompi_osc_rdma_peer_t *my_peer = ompi_osc_rdma_module_peer (module, my_rank);
94 ompi_osc_rdma_region_t *region;
95 osc_rdma_counter_t region_count;
96 osc_rdma_counter_t region_id;
97 void *bound;
98 intptr_t page_size = opal_getpagesize ();
99 int region_index;
100 int ret;
101
102 if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
103 return OMPI_ERR_RMA_FLAVOR;
104 }
105
106 if (0 == len) {
107
108 return OMPI_SUCCESS;
109 }
110
111 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach: %s, %p, %lu", win->w_name, base, (unsigned long) len);
112
113 OPAL_THREAD_LOCK(&module->lock);
114
115 region_count = module->state->region_count & 0xffffffffL;
116 region_id = module->state->region_count >> 32;
117
118 if (region_count == mca_osc_rdma_component.max_attach) {
119 OPAL_THREAD_UNLOCK(&module->lock);
120 return OMPI_ERR_RMA_ATTACH;
121 }
122
123
124
125 bound = (void *)OPAL_ALIGN((intptr_t) base + len, page_size, intptr_t);
126 base = (void *)((intptr_t) base & ~(page_size - 1));
127 len = (size_t)((intptr_t) bound - (intptr_t) base);
128
129
130 region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
131 (intptr_t) bound, module->region_size, ®ion_index);
132 if (NULL != region) {
133 ++module->dynamic_handles[region_index].refcnt;
134 OPAL_THREAD_UNLOCK(&module->lock);
135
136 return OMPI_SUCCESS;
137 }
138
139
140 module->state->region_count = -1;
141 opal_atomic_wmb ();
142
143 ompi_osc_rdma_lock_acquire_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
144
145
146 if (region_count) {
147 region = find_insertion_point ((ompi_osc_rdma_region_t *) module->state->regions, 0, region_count - 1, (intptr_t) base,
148 module->region_size, ®ion_index);
149
150 if (region_index < region_count) {
151 memmove ((void *) ((intptr_t) region + module->region_size), region, (region_count - region_index) * module->region_size);
152
153 if (module->selected_btl->btl_register_mem) {
154 memmove (module->dynamic_handles + region_index + 1, module->dynamic_handles + region_index,
155 (region_count - region_index) * sizeof (module->dynamic_handles[0]));
156 }
157 }
158 } else {
159 region_index = 0;
160 region = (ompi_osc_rdma_region_t *) module->state->regions;
161 }
162
163 region->base = (intptr_t) base;
164 region->len = len;
165
166 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "attaching dynamic memory region {%p, %p} at index %d",
167 base, (void *)((intptr_t) base + len), region_index);
168
169 if (module->selected_btl->btl_register_mem) {
170 mca_btl_base_registration_handle_t *handle;
171
172 ret = ompi_osc_rdma_register (module, MCA_BTL_ENDPOINT_ANY, (void *) region->base, region->len, MCA_BTL_REG_FLAG_ACCESS_ANY,
173 &handle);
174 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
175 OPAL_THREAD_UNLOCK(&module->lock);
176 return OMPI_ERR_RMA_ATTACH;
177 }
178
179 memcpy (region->btl_handle_data, handle, module->selected_btl->btl_registration_handle_size);
180 module->dynamic_handles[region_index].btl_handle = handle;
181 } else {
182 module->dynamic_handles[region_index].btl_handle = NULL;
183 }
184
185 module->dynamic_handles[region_index].refcnt = 1;
186
187 #if OPAL_ENABLE_DEBUG
188 for (int i = 0 ; i < region_count + 1 ; ++i) {
189 region = (ompi_osc_rdma_region_t *) ((intptr_t) module->state->regions + i * module->region_size);
190
191 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, " dynamic region %d: {%p, %lu}", i,
192 (void *) region->base, (unsigned long) region->len);
193 }
194 #endif
195
196 opal_atomic_mb ();
197
198 module->state->region_count = ((region_id + 1) << 32) | (region_count + 1);
199
200 ompi_osc_rdma_lock_release_exclusive (module, my_peer, offsetof (ompi_osc_rdma_state_t, regions_lock));
201 OPAL_THREAD_UNLOCK(&module->lock);
202
203 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "attach complete");
204
205 return OMPI_SUCCESS;
206 }
207
208
209 int ompi_osc_rdma_detach (struct ompi_win_t *win, const void *base)
210 {
211 ompi_osc_rdma_module_t *module = GET_MODULE(win);
212 const int my_rank = ompi_comm_rank (module->comm);
213 ompi_osc_rdma_peer_dynamic_t *my_peer = (ompi_osc_rdma_peer_dynamic_t *) ompi_osc_rdma_module_peer (module, my_rank);
214 osc_rdma_counter_t region_count, region_id;
215 ompi_osc_rdma_region_t *region;
216 int region_index;
217
218 if (module->flavor != MPI_WIN_FLAVOR_DYNAMIC) {
219 return OMPI_ERR_WIN;
220 }
221
222 OPAL_THREAD_LOCK(&module->lock);
223
224 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach: %s, %p", win->w_name, base);
225
226
227 region_count = module->state->region_count & 0xffffffffL;
228 region_id = module->state->region_count >> 32;
229
230 region = ompi_osc_rdma_find_region_containing ((ompi_osc_rdma_region_t *) module->state->regions, 0,
231 region_count - 1, (intptr_t) base, (intptr_t) base + 1,
232 module->region_size, ®ion_index);
233 if (NULL == region) {
234 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "could not find dynamic memory region starting at %p", base);
235 OPAL_THREAD_UNLOCK(&module->lock);
236 return OMPI_ERROR;
237 }
238
239 if (--module->dynamic_handles[region_index].refcnt > 0) {
240 OPAL_THREAD_UNLOCK(&module->lock);
241 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");
242 return OMPI_SUCCESS;
243 }
244
245
246 ompi_osc_rdma_lock_acquire_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
247
248 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "detaching dynamic memory region {%p, %p} from index %d",
249 base, (void *)((intptr_t) base + region->len), region_index);
250
251 if (module->selected_btl->btl_register_mem) {
252 ompi_osc_rdma_deregister (module, module->dynamic_handles[region_index].btl_handle);
253
254 if (region_index < region_count - 1) {
255 memmove (module->dynamic_handles + region_index, module->dynamic_handles + region_index + 1,
256 (region_count - region_index - 1) * sizeof (void *));
257 }
258
259 memset (module->dynamic_handles + region_count - 1, 0, sizeof (module->dynamic_handles[0]));
260 }
261
262 if (region_index < region_count - 1) {
263 memmove (region, (void *)((intptr_t) region + module->region_size),
264 (region_count - region_index - 1) * module->region_size);;
265 }
266
267 module->state->region_count = ((region_id + 1) << 32) | (region_count - 1);
268
269 ompi_osc_rdma_lock_release_exclusive (module, &my_peer->super, offsetof (ompi_osc_rdma_state_t, regions_lock));
270
271 OPAL_THREAD_UNLOCK(&module->lock);
272
273 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "detach complete");
274
275 return OMPI_SUCCESS;
276 }
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291 static int ompi_osc_rdma_refresh_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_dynamic_t *peer) {
292 osc_rdma_counter_t region_count, region_id;
293 uint64_t source_address;
294 int ret;
295
296 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "refreshing dynamic memory regions for target %d", peer->super.rank);
297
298
299 do {
300 osc_rdma_counter_t remote_value;
301
302 source_address = (uint64_t)(intptr_t) peer->super.state + offsetof (ompi_osc_rdma_state_t, region_count);
303 ret = ompi_osc_get_data_blocking (module, peer->super.state_endpoint, source_address, peer->super.state_handle,
304 &remote_value, sizeof (remote_value));
305 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
306 return ret;
307 }
308
309 region_id = remote_value >> 32;
310 region_count = remote_value & 0xffffffffl;
311
312 } while (0xffffffffl == region_count);
313
314 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "target region: id 0x%lx, count 0x%lx (cached: 0x%x, 0x%x)",
315 (unsigned long) region_id, (unsigned long) region_count, peer->region_id, peer->region_count);
316
317 if (0 == region_count) {
318 return OMPI_ERR_RMA_RANGE;
319 }
320
321
322 OPAL_THREAD_LOCK(&module->lock);
323
324 if (peer->region_id != region_id) {
325 unsigned region_len = module->region_size * region_count;
326 void *temp;
327
328 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "dynamic memory cache is out of data. reloading from peer");
329
330
331 temp = realloc (peer->regions, region_len);
332 if (NULL == temp) {
333 OPAL_THREAD_UNLOCK(&module->lock);
334 return OMPI_ERR_OUT_OF_RESOURCE;
335 }
336 peer->regions = temp;
337
338
339 ompi_osc_rdma_lock_acquire_shared (module, &peer->super, 1, offsetof (ompi_osc_rdma_state_t, regions_lock),
340 OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
341
342 source_address = (uint64_t)(intptr_t) peer->super.state + offsetof (ompi_osc_rdma_state_t, regions);
343 ret = ompi_osc_get_data_blocking (module, peer->super.state_endpoint, source_address, peer->super.state_handle,
344 peer->regions, region_len);
345 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
346 OPAL_THREAD_UNLOCK(&module->lock);
347 return ret;
348 }
349
350
351 ompi_osc_rdma_lock_release_shared (module, &peer->super, -1, offsetof (ompi_osc_rdma_state_t, regions_lock));
352
353
354 peer->region_id = region_id;
355 peer->region_count = region_count;
356 }
357
358 OPAL_THREAD_UNLOCK(&module->lock);
359
360 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "finished refreshing dynamic memory regions for target %d", peer->super.rank);
361
362 return OMPI_SUCCESS;
363 }
364
365 int ompi_osc_rdma_find_dynamic_region (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer, uint64_t base, size_t len,
366 ompi_osc_rdma_region_t **region)
367 {
368 ompi_osc_rdma_peer_dynamic_t *dy_peer = (ompi_osc_rdma_peer_dynamic_t *) peer;
369 intptr_t bound = (intptr_t) base + len;
370 ompi_osc_rdma_region_t *regions;
371 int ret, region_count;
372
373 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "locating dynamic memory region matching: {%" PRIx64 ", %" PRIx64 "}"
374 " (len %lu)", base, base + len, (unsigned long) len);
375
376 if (!ompi_osc_rdma_peer_local_state (peer)) {
377 ret = ompi_osc_rdma_refresh_dynamic_region (module, dy_peer);
378 if (OMPI_SUCCESS != ret) {
379 return ret;
380 }
381
382 regions = dy_peer->regions;
383 region_count = dy_peer->region_count;
384 } else {
385 ompi_osc_rdma_state_t *peer_state = (ompi_osc_rdma_state_t *) peer->state;
386 regions = (ompi_osc_rdma_region_t *) peer_state->regions;
387 region_count = peer_state->region_count;
388 }
389
390 *region = ompi_osc_rdma_find_region_containing (regions, 0, region_count - 1, (intptr_t) base, bound, module->region_size, NULL);
391 if (!*region) {
392 return OMPI_ERR_RMA_RANGE;
393 }
394
395
396 return OMPI_SUCCESS;
397 }