This source file includes following definitions.
- ompi_osc_module_get_peer
- ompi_osc_rdma_module_peer
- ompi_osc_rdma_in_passive_epoch
- _ompi_osc_rdma_register
- _ompi_osc_rdma_deregister
- ompi_osc_rdma_progress
- ompi_osc_rdma_module_lock_find
- ompi_osc_rdma_module_lock_insert
- ompi_osc_rdma_module_lock_remove
- ompi_osc_rdma_module_sync_lookup
- ompi_osc_rdma_use_btl_flush
- ompi_osc_rdma_sync_rdma_inc_always
- ompi_osc_rdma_sync_rdma_inc
- ompi_osc_rdma_sync_rdma_dec_always
- ompi_osc_rdma_sync_rdma_dec
- ompi_osc_rdma_sync_rdma_complete
- ompi_osc_rdma_access_epoch_active
- ompi_osc_rdma_oor
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 #ifndef OMPI_OSC_RDMA_H
  24 #define OMPI_OSC_RDMA_H
  25 
  26 #include "ompi_config.h"
  27 #include "opal/class/opal_free_list.h"
  28 #include "opal/class/opal_hash_table.h"
  29 #include "opal/threads/threads.h"
  30 #include "opal/util/output.h"
  31 
  32 #include "opal/mca/shmem/shmem.h"
  33 #include "opal/mca/shmem/base/base.h"
  34 
  35 #include "ompi/win/win.h"
  36 #include "ompi/communicator/communicator.h"
  37 #include "ompi/datatype/ompi_datatype.h"
  38 #include "ompi/request/request.h"
  39 #include "ompi/mca/osc/osc.h"
  40 #include "ompi/mca/osc/base/base.h"
  41 #include "opal/mca/btl/btl.h"
  42 #include "ompi/memchecker.h"
  43 #include "ompi/op/op.h"
  44 #include "opal/align.h"
  45 
  46 #include "osc_rdma_types.h"
  47 #include "osc_rdma_sync.h"
  48 
  49 #include "osc_rdma_peer.h"
  50 
  51 #include "opal_stdint.h"
  52 
  53 #define RANK_ARRAY_COUNT(module) ((ompi_comm_size ((module)->comm) + (module)->node_count - 1) / (module)->node_count)
  54 
  55 enum {
  56     OMPI_OSC_RDMA_LOCKING_TWO_LEVEL,
  57     OMPI_OSC_RDMA_LOCKING_ON_DEMAND,
  58 };
  59 
  60 
  61 
  62 
  63 struct ompi_osc_rdma_component_t {
  64     
  65     ompi_osc_base_component_t super;
  66 
  67     
  68     opal_mutex_t lock;
  69 
  70     
  71     opal_hash_table_t modules;
  72 
  73     
  74     opal_free_list_t frags;
  75 
  76     
  77     opal_free_list_t requests;
  78 
  79     
  80     unsigned int buffer_size;
  81 
  82     
  83     opal_list_t request_gc;
  84 
  85     
  86     opal_list_t buffer_gc;
  87 
  88     
  89     unsigned int max_attach;
  90 
  91     
  92     bool no_locks;
  93 
  94     
  95     int locking_mode;
  96 
  97     
  98     bool acc_single_intrinsic;
  99 
 100     
 101     bool acc_use_amo;
 102 
 103     
 104     unsigned int priority;
 105 
 106     
 107     char *backing_directory;
 108 };
 109 typedef struct ompi_osc_rdma_component_t ompi_osc_rdma_component_t;
 110 
 111 struct ompi_osc_rdma_frag_t;
 112 
 113 
 114 
 115 
 116 
 117 
 118 
 119 struct ompi_osc_rdma_module_t {
 120     
 121     ompi_osc_base_module_t super;
 122 
 123     
 124     struct ompi_win_t *win;
 125 
 126     
 127     opal_mutex_t lock;
 128 
 129     
 130     int locking_mode;
 131 
 132     
 133 
 134     
 135     bool same_disp_unit;
 136 
 137     
 138     bool same_size;
 139 
 140     
 141     bool use_cpu_atomics;
 142 
 143     
 144     bool no_locks;
 145 
 146     bool acc_single_intrinsic;
 147 
 148     bool acc_use_amo;
 149 
 150     
 151     int flavor;
 152 
 153     
 154     size_t size;
 155 
 156     
 157     int disp_unit;
 158 
 159     
 160     ompi_osc_rdma_peer_t *leader;
 161 
 162     
 163     ompi_osc_rdma_peer_t *my_peer;
 164 
 165     
 166     void *free_after;
 167 
 168     
 169     ompi_osc_rdma_state_t *state;
 170 
 171     
 172     unsigned char *node_comm_info;
 173 
 174     
 175     ompi_osc_rdma_rank_data_t *rank_array;
 176 
 177 
 178     
 179 
 180     ompi_communicator_t *comm;
 181 
 182     
 183     ompi_communicator_t *local_leaders;
 184     ompi_communicator_t *shared_comm;
 185 
 186     
 187     int node_id;
 188 
 189     
 190     int node_count;
 191 
 192     
 193     mca_btl_base_registration_handle_t *state_handle;
 194 
 195     
 196     mca_btl_base_registration_handle_t *base_handle;
 197 
 198     
 199     size_t region_size;
 200 
 201     
 202     size_t state_size;
 203 
 204     
 205     size_t state_offset;
 206 
 207     
 208 
 209     
 210     ompi_osc_rdma_sync_t all_sync;
 211 
 212     
 213     struct ompi_group_t *pw_group;
 214 
 215     
 216     opal_list_t        pending_posts;
 217 
 218     
 219 
 220     
 221     osc_rdma_counter_t passive_target_access_epoch;
 222 
 223     
 224     opal_hash_table_t outstanding_locks;
 225 
 226     
 227     ompi_osc_rdma_sync_t **outstanding_lock_array;
 228 
 229 
 230     
 231 
 232     
 233     opal_hash_table_t peer_hash;
 234 
 235     
 236     ompi_osc_rdma_peer_t **peer_array;
 237 
 238     
 239     opal_mutex_t peer_lock;
 240 
 241 
 242     
 243     struct mca_btl_base_module_t *selected_btl;
 244 
 245     
 246     struct ompi_osc_rdma_frag_t *rdma_frag;
 247 
 248     
 249 
 250     ompi_osc_rdma_handle_t *dynamic_handles;
 251 
 252     
 253 
 254 
 255     void *segment_base;
 256 
 257     
 258     opal_shmem_ds_t seg_ds;
 259 
 260 
 261     
 262 
 263     
 264     unsigned long put_retry_count;
 265 
 266     
 267     unsigned long get_retry_count;
 268 
 269     
 270     opal_atomic_int32_t pending_ops;
 271 };
 272 typedef struct ompi_osc_rdma_module_t ompi_osc_rdma_module_t;
 273 OMPI_MODULE_DECLSPEC extern ompi_osc_rdma_component_t mca_osc_rdma_component;
 274 
 275 #define GET_MODULE(win) ((ompi_osc_rdma_module_t*) win->w_osc_module)
 276 
 277 int ompi_osc_rdma_free (struct ompi_win_t *win);
 278 
 279 
 280 
 281 
 282 
 283 
 284 
 285 
 286 
 287 
 288 
 289 
 290 
 291 int ompi_osc_module_add_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer);
 292 
 293 
 294 
 295 
 296 
 297 
 298 
 299 
 300 
 301 int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer);
 302 
 303 
 304 
 305 
 306 
 307 
 308 
 309 
 310 
 311 
 312 static inline ompi_osc_rdma_peer_t *ompi_osc_module_get_peer (ompi_osc_rdma_module_t *module, int peer_id)
 313 {
 314     if (NULL == module->peer_array) {
 315         ompi_osc_rdma_peer_t *peer = NULL;
 316         (void) opal_hash_table_get_value_uint32 (&module->peer_hash, peer_id, (void **) &peer);
 317         return peer;
 318     }
 319 
 320     return module->peer_array[peer_id];
 321 }
 322 
 323 
 324 
 325 
 326 
 327 
 328 
 329 static inline ompi_osc_rdma_peer_t *ompi_osc_rdma_module_peer (ompi_osc_rdma_module_t *module, int peer_id)
 330 {
 331     ompi_osc_rdma_peer_t *peer;
 332 
 333     peer = ompi_osc_module_get_peer (module, peer_id);
 334     if (NULL != peer) {
 335         return peer;
 336     }
 337 
 338     return ompi_osc_rdma_peer_lookup (module, peer_id);
 339 }
 340 
 341 
 342 
 343 
 344 
 345 
 346 static inline bool ompi_osc_rdma_in_passive_epoch (ompi_osc_rdma_module_t *module)
 347 {
 348     return 0 != module->passive_target_access_epoch;
 349 }
 350 
 351 static inline int _ompi_osc_rdma_register (ompi_osc_rdma_module_t *module, struct mca_btl_base_endpoint_t *endpoint, void *ptr,
 352                                            size_t size, uint32_t flags, mca_btl_base_registration_handle_t **handle, int line, const char *file)
 353 {
 354     if (module->selected_btl->btl_register_mem) {
 355         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "registering segment with btl. range: %p - %p (%lu bytes)",
 356                          ptr, (void*)((char *) ptr + size), size);
 357 
 358         *handle = module->selected_btl->btl_register_mem (module->selected_btl, endpoint, ptr, size, flags);
 359         if (OPAL_UNLIKELY(NULL == *handle)) {
 360             OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "failed to register pointer with selected BTL. base: %p, "
 361                              "size: %lu. file: %s, line: %d", ptr, (unsigned long) size, file, line);
 362             return OMPI_ERR_OUT_OF_RESOURCE;
 363         }
 364     } else {
 365         *handle = NULL;
 366     }
 367 
 368     return OMPI_SUCCESS;
 369 }
 370 
 371 #define ompi_osc_rdma_register(...) _ompi_osc_rdma_register(__VA_ARGS__, __LINE__, __FILE__)
 372 
 373 static inline void _ompi_osc_rdma_deregister (ompi_osc_rdma_module_t *module, mca_btl_base_registration_handle_t *handle, int line, const char *file)
 374 {
 375     if (handle) {
 376         module->selected_btl->btl_deregister_mem (module->selected_btl, handle);
 377     }
 378 }
 379 
 380 #define ompi_osc_rdma_deregister(...) _ompi_osc_rdma_deregister(__VA_ARGS__, __LINE__, __FILE__)
 381 
 382 static inline void ompi_osc_rdma_progress (ompi_osc_rdma_module_t *module) {
 383     opal_progress ();
 384 }
 385 
 386 
 387 
 388 
 389 
 390 
 391 
 392 
 393 
 394 
 395 
 396 
 397 static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_lock_find (ompi_osc_rdma_module_t *module, int target,
 398                                                                     ompi_osc_rdma_peer_t **peer)
 399 {
 400     ompi_osc_rdma_sync_t *outstanding_lock = NULL;
 401 
 402     if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) {
 403         outstanding_lock = module->outstanding_lock_array[target];
 404     } else {
 405         (void) opal_hash_table_get_value_uint32 (&module->outstanding_locks, (uint32_t) target, (void **) &outstanding_lock);
 406     }
 407 
 408     if (NULL != outstanding_lock && peer) {
 409         *peer = outstanding_lock->peer_list.peer;
 410     }
 411 
 412     return outstanding_lock;
 413 }
 414 
 415 
 416 
 417 
 418 
 419 
 420 
 421 
 422 
 423 
 424 static inline void ompi_osc_rdma_module_lock_insert (struct ompi_osc_rdma_module_t *module, ompi_osc_rdma_sync_t *lock)
 425 {
 426     if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) {
 427         module->outstanding_lock_array[lock->sync.lock.target] = lock;
 428     } else {
 429         (void) opal_hash_table_set_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target, (void *) lock);
 430     }
 431 }
 432 
 433 
 434 
 435 
 436 
 437 
 438 
 439 
 440 
 441 
 442 
 443 static inline void ompi_osc_rdma_module_lock_remove (struct ompi_osc_rdma_module_t *module, ompi_osc_rdma_sync_t *lock)
 444 {
 445     if (OPAL_LIKELY(NULL != module->outstanding_lock_array)) {
 446         module->outstanding_lock_array[lock->sync.lock.target] = NULL;
 447     } else {
 448         (void) opal_hash_table_remove_value_uint32 (&module->outstanding_locks, (uint32_t) lock->sync.lock.target);
 449     }
 450 }
 451 
 452 
 453 
 454 
 455 
 456 
 457 
 458 
 459 
 460 
 461 
 462 
 463 
 464 
 465 static inline ompi_osc_rdma_sync_t *ompi_osc_rdma_module_sync_lookup (ompi_osc_rdma_module_t *module, int target, struct ompi_osc_rdma_peer_t **peer)
 466 {
 467     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "looking for synchronization object for target %d", target);
 468 
 469     switch (module->all_sync.type) {
 470     case OMPI_OSC_RDMA_SYNC_TYPE_NONE:
 471         if (!module->no_locks) {
 472             return ompi_osc_rdma_module_lock_find (module, target, peer);
 473         }
 474 
 475         return NULL;
 476     case OMPI_OSC_RDMA_SYNC_TYPE_LOCK:
 477         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found lock_all access epoch for target %d", target);
 478 
 479         *peer = ompi_osc_rdma_module_peer (module, target);
 480         if (OPAL_UNLIKELY(OMPI_OSC_RDMA_LOCKING_ON_DEMAND == module->locking_mode &&
 481                           !ompi_osc_rdma_peer_is_demand_locked (*peer))) {
 482             ompi_osc_rdma_demand_lock_peer (module, *peer);
 483         }
 484 
 485         return &module->all_sync;
 486     case OMPI_OSC_RDMA_SYNC_TYPE_FENCE:
 487         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found fence access epoch for target %d", target);
 488         
 489         module->all_sync.epoch_active = true;
 490         *peer = ompi_osc_rdma_module_peer (module, target);
 491 
 492         return &module->all_sync;
 493     case OMPI_OSC_RDMA_SYNC_TYPE_PSCW:
 494         if (ompi_osc_rdma_sync_pscw_peer (module, target, peer)) {
 495             OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "found PSCW access epoch target for %d", target);
 496             return &module->all_sync;
 497         }
 498     }
 499 
 500     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "no access epoch found for target %d", target);
 501 
 502     return NULL;
 503 }
 504 
 505 static bool ompi_osc_rdma_use_btl_flush (ompi_osc_rdma_module_t *module)
 506 {
 507 #if defined(BTL_VERSION) && (BTL_VERSION >= 310)
 508     return !!(module->selected_btl->btl_flush);
 509 #else
 510     return false;
 511 #endif
 512 }
 513 
 514 
 515 
 516 
 517 
 518 
 519 static inline void ompi_osc_rdma_sync_rdma_inc_always (ompi_osc_rdma_sync_t *rdma_sync)
 520 {
 521     ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma.counter, 1);
 522 
 523     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "inc: there are %ld outstanding rdma operations",
 524                      (unsigned long) rdma_sync->outstanding_rdma.counter);
 525 }
 526 
 527 static inline void ompi_osc_rdma_sync_rdma_inc (ompi_osc_rdma_sync_t *rdma_sync)
 528 {
 529 #if defined(BTL_VERSION) && (BTL_VERSION >= 310)
 530     if (ompi_osc_rdma_use_btl_flush (rdma_sync->module)) {
 531         return;
 532     }
 533 #endif
 534     ompi_osc_rdma_sync_rdma_inc_always (rdma_sync);
 535 }
 536 
 537 
 538 
 539 
 540 
 541 
 542 static inline void ompi_osc_rdma_sync_rdma_dec_always (ompi_osc_rdma_sync_t *rdma_sync)
 543 {
 544     opal_atomic_wmb ();
 545     ompi_osc_rdma_counter_add (&rdma_sync->outstanding_rdma.counter, -1);
 546 
 547     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "dec: there are %ld outstanding rdma operations",
 548                      (unsigned long) rdma_sync->outstanding_rdma.counter);
 549 }
 550 
 551 static inline void ompi_osc_rdma_sync_rdma_dec (ompi_osc_rdma_sync_t *rdma_sync)
 552 {
 553 #if defined(BTL_VERSION) && (BTL_VERSION >= 310)
 554     if (ompi_osc_rdma_use_btl_flush (rdma_sync->module)) {
 555         return;
 556     }
 557 #endif
 558     ompi_osc_rdma_sync_rdma_dec_always (rdma_sync);
 559 }
 560 
 561 
 562 
 563 
 564 
 565 
 566 static inline void ompi_osc_rdma_sync_rdma_complete (ompi_osc_rdma_sync_t *sync)
 567 {
 568 #if !defined(BTL_VERSION) || (BTL_VERSION < 310)
 569     do {
 570         opal_progress ();
 571     }  while (ompi_osc_rdma_sync_get_count (sync));
 572 #else
 573     mca_btl_base_module_t *btl_module = sync->module->selected_btl;
 574 
 575     do {
 576         if (!ompi_osc_rdma_use_btl_flush (sync->module)) {
 577             opal_progress ();
 578         } else {
 579             btl_module->btl_flush (btl_module, NULL);
 580         }
 581     }  while (ompi_osc_rdma_sync_get_count (sync) || (sync->module->rdma_frag && (sync->module->rdma_frag->pending > 1)));
 582 #endif
 583 }
 584 
 585 
 586 
 587 
 588 
 589 
 590 
 591 
 592 
 593 
 594 
 595 static inline bool ompi_osc_rdma_access_epoch_active (ompi_osc_rdma_module_t *module)
 596 {
 597     return (module->all_sync.epoch_active || ompi_osc_rdma_in_passive_epoch (module));
 598 }
 599 
 600 __opal_attribute_always_inline__
 601 static inline bool ompi_osc_rdma_oor (int rc)
 602 {
 603     
 604     return (OPAL_SUCCESS != rc && (OPAL_ERR_OUT_OF_RESOURCE == rc || OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc));
 605 }
 606 
 607 #endif