This source file includes following definitions.
- ompi_osc_module_get_peer
- ompi_osc_rdma_module_peer
- ompi_osc_rdma_in_passive_epoch
- _ompi_osc_rdma_register
- _ompi_osc_rdma_deregister
- ompi_osc_rdma_progress
- ompi_osc_rdma_module_lock_find
- ompi_osc_rdma_module_lock_insert
- ompi_osc_rdma_module_lock_remove
- ompi_osc_rdma_module_sync_lookup
- ompi_osc_rdma_use_btl_flush
- ompi_osc_rdma_sync_rdma_inc_always
- ompi_osc_rdma_sync_rdma_inc
- ompi_osc_rdma_sync_rdma_dec_always
- ompi_osc_rdma_sync_rdma_dec
- ompi_osc_rdma_sync_rdma_complete
- ompi_osc_rdma_access_epoch_active
- ompi_osc_rdma_oor
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 #ifndef OMPI_OSC_RDMA_H
24 #define OMPI_OSC_RDMA_H
25
26 #include "ompi_config.h"
27 #include "opal/class/opal_free_list.h"
28 #include "opal/class/opal_hash_table.h"
29 #include "opal/threads/threads.h"
30 #include "opal/util/output.h"
31
32 #include "opal/mca/shmem/shmem.h"
33 #include "opal/mca/shmem/base/base.h"
34
35 #include "ompi/win/win.h"
36 #include "ompi/communicator/communicator.h"
37 #include "ompi/datatype/ompi_datatype.h"
38 #include "ompi/request/request.h"
39 #include "ompi/mca/osc/osc.h"
40 #include "ompi/mca/osc/base/base.h"
41 #include "opal/mca/btl/btl.h"
42 #include "ompi/memchecker.h"
43 #include "ompi/op/op.h"
44 #include "opal/align.h"
45
46 #include "osc_rdma_types.h"
47 #include "osc_rdma_sync.h"
48
49 #include "osc_rdma_peer.h"
50
51 #include "opal_stdint.h"
52
53 #define RANK_ARRAY_COUNT(module) ((ompi_comm_size ((module)->comm) + (module)->node_count - 1) / (module)->node_count)
54
55 enum {
56 OMPI_OSC_RDMA_LOCKING_TWO_LEVEL,
57 OMPI_OSC_RDMA_LOCKING_ON_DEMAND,
58 };
59
60
61
62
63 struct ompi_osc_rdma_component_t {
64
65 ompi_osc_base_component_t super;
66
67
68 opal_mutex_t lock;
69
70
71 opal_hash_table_t modules;
72
73
74 opal_free_list_t frags;
75
76
77 opal_free_list_t requests;
78
79
80 unsigned int buffer_size;
81
82
83 opal_list_t request_gc;
84
85
86 opal_list_t buffer_gc;
87
88
89 unsigned int max_attach;
90
91
92 bool no_locks;
93
94
95 int locking_mode;
96
97
98 bool acc_single_intrinsic;
99
100
101 bool acc_use_amo;
102
103
104 unsigned int priority;
105
106
107 char *backing_directory;
108 };
109 typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
110
111 struct ompi_osc_rdma_frag_t;
112
113
114
115
116
117
118
119 struct ompi_osc_rdma_module_t {
120
121 ompi_osc_base_module_t super;
122
123
124 struct ompi_win_t *win;
125
126
127 opal_mutex_t lock;
128
129
130 int locking_mode;
131
132
133
134
135 bool same_disp_unit;
136
137
138 bool same_size;
139
140
141 bool use_cpu_atomics;
142
143
144 bool no_locks;
145
146 bool acc_single_intrinsic;
147
148 bool acc_use_amo;
149
150
151 int flavor;
152
153
154 size_t size;
155
156
157 int disp_unit;
158
159
160 ompi_osc_rdma_peer_t *leader;
161
162
163 ompi_osc_rdma_peer_t *my_peer;
164
165
166 void *free_after;
167
168
169 ompi_osc_rdma_state_t *state;
170
171
172 unsigned char *node_comm_info;
173
174
175 ompi_osc_rdma_rank_data_t *rank_array;
176
177
178
179
180 ompi_communicator_t *comm;
181
182
183 ompi_communicator_t *local_leaders;
184 ompi_communicator_t *shared_comm;
185
186
187 int node_id;
188
189
190 int node_count;
191
192
193 mca_btl_base_registration_handle_t *state_handle;
194
195
196 mca_btl_base_registration_handle_t *base_handle;
197
198
199 size_t region_size;
200
201
202 size_t state_size;
203
204
205 size_t state_offset;
206
207
208
209
210 ompi_osc_rdma_sync_t all_sync;
211
212
213 struct ompi_group_t *pw_group;
214
215
216 opal_list_t pending_posts;
217
218
219
220
221 osc_rdma_counter_t passive_target_access_epoch;
222
223
224 opal_hash_table_t outstanding_locks;
225
226
227 ompi_osc_rdma_sync_t **outstanding_lock_array;
228
229
230
231
232
233 opal_hash_table_t peer_hash;
234
235
236 ompi_osc_rdma_peer_t **peer_array;
237
238
239 opal_mutex_t peer_lock;
240
241
242
243 struct mca_btl_base_module_t *selected_btl;
244
245
246 struct ompi_osc_rdma_frag_t *rdma_frag;
247
248
249
250 ompi_osc_rdma_handle_t *dynamic_handles;
251
252
253
254
255 void *segment_base;
256
257
258 opal_shmem_ds_t seg_ds;
259
260
261
262
263
264 unsigned long put_retry_count;
265
266
267 unsigned long get_retry_count;
268
269
270 opal_atomic_int32_t pending_ops;
271 };
272 typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t;
273 OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component;
274
275 #define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module)
276
277 int ompi_osc_rdma_free (struct ompi_win_t *win);
278
279
280
281
282
283
284
285
286
287
288
289
290
291 int ompi_osc_module_add_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer);
292
293
294
295
296
297
298
299
300
301 int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer);
302
303
304
305
306
307
308
309
310
311
312 static inline ompi_osc_rdma_peer_t *ompi_osc_module_get_peer (ompi_osc_rdma_module_t *module, int peer_id)
313 {
314 if (NULL == module->peer_array) {
315 ompi_osc_rdma_peer_t *peer = NULL;
316 (void) opal_hash_table_get_value_uint32 (&module->peer_hash, peer_id, (void **) &peer);
317 return peer;
318 }
319
320 return module->peer_array[peer_id];
321 }
322
323
324
325
326
327
328
329 static inline ompi_osc_rdma_peer_t *ompi_osc_rdma_module_peer (ompi_osc_rdma_module_t *module, int peer_id)
330 {
331 ompi_osc_rdma_peer_t *peer;
332
333 peer = ompi_osc_module_get_peer (module, peer_id);
334 if (NULL != peer) {
335 return peer;
336 }
337
338 return ompi_osc_rdma_peer_lookup (module, peer_id);
339 }
340
341
342
343
344
345
346 static inline bool ompi_osc_rdma_in_passive_epoch (ompi_osc_rdma_module_t *module)
347 {
348 return 0 != module->passive_target_access_epoch;
349 }
350
351 static inline int _ompi_osc_rdma_register (ompi_osc_rdma_module_t *module, struct mca_btl_base_endpoint_t *endpoint, void *ptr,
352 size_t size, uint32_t flags, mca_btl_base_registration_handle_t **handle, int line, const char *file)
353 {
354 if (module->selected_btl->btl_register_mem) {
355 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "registering segment with btl. range: %p - %p (%lu bytes)",
356 ptr, (void*)((char *) ptr + size), size);
357
358 *handle = module->selected_btl->btl_register_mem (module->selected_btl, endpoint, ptr, size, flags);
359 if (OPAL_UNLIKELY(NULL == *handle)) {
360 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to register pointer with selected BTL. base: %p, "
361 "size: %lu. file: %s, line: %d", ptr, (unsigned long) size, file, line);
362 return OMPI_ERR_OUT_OF_RESOURCE;
363 }
364 } else {
365 *handle = NULL;
366 }
367
368 return OMPI_SUCCESS;
369 }
370
371 #define ompi_osc_rdma_register(...) _ompi_osc_rdma_register(__VA_ARGS__, __LINE__, __FILE__)
372
373 static inline void _ompi_osc_rdma_deregister (ompi_osc_rdma_module_t *module, mca_btl_base_registration_handle_t *handle, int line, const char *file)
374 {
375 if (handle) {
376 module->selected_btl->btl_deregister_mem (module->selected_btl, handle);
377 }
378 }
379
380 #define ompi_osc_rdma_deregister(...) _ompi_osc_rdma_deregister(__VA_ARGS__, __LINE__, __FILE__)
381
382 static inline void ompi_osc_rdma_progress (ompi_osc_rdma_module_t *module) {
383 opal_progress ();
384 }
385
386
387
388
389
390
391
392
393
394
395
396
397 static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_lock_find (ompi_osc_rdma_module_t *module, int target,
398 ompi_osc_rdma_peer_t **peer)
399 {
400 ompi_osc_rdma_sync_t *outstanding_lock = NULL;
401
402 if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) {
403 outstanding_lock = module->outstanding_lock_array[target];
404 } else {
405 (void) opal_hash_table_get_value_uint32 (&module->outstanding_locks, (uint32_t) target, (void **) &outstanding_lock);
406 }
407
408 if (NULL != outstanding_lock && peer) {
409 *peer = outstanding_lock->peer_list.peer;
410 }
411
412 return outstanding_lock;
413 }
414
415
416
417
418
419
420
421
422
423
424 static inline void ompi_osc_rdma_module_lock_insert (struct ompi_osc_rdma_module_t *module, ompi_osc_rdma_sync_t *lock)
425 {
426 if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) {
427 module->outstanding_lock_array[lock->sync.lock.target] = lock;
428 } else {
429 (void) opal_hash_table_set_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target, (void *) lock);
430 }
431 }
432
433
434
435
436
437
438
439
440
441
442
443 static inline void ompi_osc_rdma_module_lock_remove (struct ompi_osc_rdma_module_t *module, ompi_osc_rdma_sync_t *lock)
444 {
445 if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) {
446 module->outstanding_lock_array[lock->sync.lock.target] = NULL;
447 } else {
448 (void) opal_hash_table_remove_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target);
449 }
450 }
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465 static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_rdma_module_t *module, int target, struct ompi_osc_rdma_peer_t **peer)
466 {
467 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "looking for synchronization object for target %d", target);
468
469 switch (module->all_sync.type) {
470 case OMPI_OSC_RDMA_SYNC_TYPE_NONE:
471 if (!module->no_locks) {
472 return ompi_osc_rdma_module_lock_find (module, target, peer);
473 }
474
475 return NULL;
476 case OMPI_OSC_RDMA_SYNC_TYPE_LOCK:
477 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found lock_all access epoch for target %d", target);
478
479 *peer = ompi_osc_rdma_module_peer (module, target);
480 if (OPAL_UNLIKELY(OMPI_OSC_RDMA_LOCKING_ON_DEMAND == module->locking_mode &&
481 !ompi_osc_rdma_peer_is_demand_locked (*peer))) {
482 ompi_osc_rdma_demand_lock_peer (module, *peer);
483 }
484
485 return &module->all_sync;
486 case OMPI_OSC_RDMA_SYNC_TYPE_FENCE:
487 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found fence access epoch for target %d", target);
488
489 module->all_sync.epoch_active = true;
490 *peer = ompi_osc_rdma_module_peer (module, target);
491
492 return &module->all_sync;
493 case OMPI_OSC_RDMA_SYNC_TYPE_PSCW:
494 if (ompi_osc_rdma_sync_pscw_peer (module, target, peer)) {
495 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found PSCW access epoch target for %d", target);
496 return &module->all_sync;
497 }
498 }
499
500 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no access epoch found for target %d", target);
501
502 return NULL;
503 }
504
505 static bool ompi_osc_rdma_use_btl_flush (ompi_osc_rdma_module_t *module)
506 {
507 #if defined(BTL_VERSION) && (BTL_VERSION >= 310)
508 return !!(module->selected_btl->btl_flush);
509 #else
510 return false;
511 #endif
512 }
513
514
515
516
517
518
519 static inline void ompi_osc_rdma_sync_rdma_inc_always (ompi_osc_rdma_sync_t *rdma_sync)
520 {
521 ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma.counter, 1);
522
523 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "inc: there are %ld outstanding rdma operations",
524 (unsigned long) rdma_sync->outstanding_rdma.counter);
525 }
526
527 static inline void ompi_osc_rdma_sync_rdma_inc (ompi_osc_rdma_sync_t *rdma_sync)
528 {
529 #if defined(BTL_VERSION) && (BTL_VERSION >= 310)
530 if (ompi_osc_rdma_use_btl_flush (rdma_sync->module)) {
531 return;
532 }
533 #endif
534 ompi_osc_rdma_sync_rdma_inc_always (rdma_sync);
535 }
536
537
538
539
540
541
542 static inline void ompi_osc_rdma_sync_rdma_dec_always (ompi_osc_rdma_sync_t *rdma_sync)
543 {
544 opal_atomic_wmb ();
545 ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma.counter, -1);
546
547 OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "dec: there are %ld outstanding rdma operations",
548 (unsigned long) rdma_sync->outstanding_rdma.counter);
549 }
550
551 static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync)
552 {
553 #if defined(BTL_VERSION) && (BTL_VERSION >= 310)
554 if (ompi_osc_rdma_use_btl_flush (rdma_sync->module)) {
555 return;
556 }
557 #endif
558 ompi_osc_rdma_sync_rdma_dec_always (rdma_sync);
559 }
560
561
562
563
564
565
566 static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync)
567 {
568 #if !defined(BTL_VERSION) || (BTL_VERSION < 310)
569 do {
570 opal_progress ();
571 } while (ompi_osc_rdma_sync_get_count (sync));
572 #else
573 mca_btl_base_module_t *btl_module = sync->module->selected_btl;
574
575 do {
576 if (!ompi_osc_rdma_use_btl_flush (sync->module)) {
577 opal_progress ();
578 } else {
579 btl_module->btl_flush (btl_module, NULL);
580 }
581 } while (ompi_osc_rdma_sync_get_count (sync) || (sync->module->rdma_frag && (sync->module->rdma_frag->pending > 1)));
582 #endif
583 }
584
585
586
587
588
589
590
591
592
593
594
595 static inline bool ompi_osc_rdma_access_epoch_active (ompi_osc_rdma_module_t *module)
596 {
597 return (module->all_sync.epoch_active || ompi_osc_rdma_in_passive_epoch (module));
598 }
599
600 __opal_attribute_always_inline__
601 static inline bool ompi_osc_rdma_oor (int rc)
602 {
603
604 return (OPAL_SUCCESS != rc && (OPAL_ERR_OUT_OF_RESOURCE == rc || OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc));
605 }
606
607 #endif