root/ompi/mca/osc/rdma/osc_rdma_passive_target.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_osc_rdma_sync
  2. ompi_osc_rdma_flush
  3. ompi_osc_rdma_flush_all
  4. ompi_osc_rdma_flush_local
  5. ompi_osc_rdma_flush_local_all
  6. ompi_osc_rdma_lock_atomic_internal
  7. ompi_osc_rdma_unlock_atomic_internal
  8. ompi_osc_rdma_demand_lock_peer
  9. ompi_osc_rdma_lock_atomic
  10. ompi_osc_rdma_unlock_atomic
  11. ompi_osc_rdma_lock_all_atomic
  12. ompi_osc_rdma_unlock_all_atomic

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University.
   4  *                         All rights reserved.
   5  * Copyright (c) 2004-2005 The Trustees of the University of Tennessee.
   6  *                         All rights reserved.
   7  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   8  *                         University of Stuttgart.  All rights reserved.
   9  * Copyright (c) 2004-2005 The Regents of the University of California.
  10  *                         All rights reserved.
  11  * Copyright (c) 2007-2018 Los Alamos National Security, LLC.  All rights
  12  *                         reserved.
  13  * Copyright (c) 2010      IBM Corporation.  All rights reserved.
  14  * Copyright (c) 2012-2013 Sandia National Laboratories.  All rights reserved.
  15  * Copyright (c) 2018      Intel, Inc. All rights reserved.
  16  * $COPYRIGHT$
  17  *
  18  * Additional copyrights may follow
  19  *
  20  * $HEADER$
  21  */
  22 
  23 #include "ompi_config.h"
  24 
  25 #include "osc_rdma_passive_target.h"
  26 #include "osc_rdma_comm.h"
  27 
  28 #include "mpi.h"
  29 
  30 
  31 int ompi_osc_rdma_sync (struct ompi_win_t *win)
  32 {
  33     ompi_osc_rdma_progress (GET_MODULE(win));
  34     return OMPI_SUCCESS;
  35 }
  36 
  37 int ompi_osc_rdma_flush (int target, struct ompi_win_t *win)
  38 {
  39     ompi_osc_rdma_module_t *module = GET_MODULE(win);
  40     ompi_osc_rdma_sync_t *lock;
  41     ompi_osc_rdma_peer_t *peer;
  42 
  43     assert (0 <= target);
  44 
  45     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush: %d, %s", target, win->w_name);
  46 
  47     OPAL_THREAD_LOCK(&module->lock);
  48 
  49     lock = ompi_osc_rdma_module_sync_lookup (module, target, &peer);
  50     if (OPAL_UNLIKELY(NULL == lock || OMPI_OSC_RDMA_SYNC_TYPE_LOCK != lock->type)) {
  51         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "flush: target %d is not locked in window %s",
  52                          target, win->w_name);
  53         OPAL_THREAD_UNLOCK(&module->lock);
  54         return OMPI_ERR_RMA_SYNC;
  55     }
  56     OPAL_THREAD_UNLOCK(&module->lock);
  57 
  58     /* finish all outstanding fragments */
  59     ompi_osc_rdma_sync_rdma_complete (lock);
  60 
  61     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush on target %d complete", target);
  62 
  63     return OMPI_SUCCESS;
  64 }
  65 
  66 
  67 int ompi_osc_rdma_flush_all (struct ompi_win_t *win)
  68 {
  69     ompi_osc_rdma_module_t *module = GET_MODULE(win);
  70     ompi_osc_rdma_sync_t *lock;
  71     int ret = OMPI_SUCCESS;
  72     uint32_t key;
  73     void *node;
  74 
  75     /* flush is only allowed from within a passive target epoch */
  76     if (!ompi_osc_rdma_in_passive_epoch (module)) {
  77         return OMPI_ERR_RMA_SYNC;
  78     }
  79 
  80     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all: %s", win->w_name);
  81 
  82     /* globally complete all outstanding rdma requests */
  83     if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) {
  84         ompi_osc_rdma_sync_rdma_complete (&module->all_sync);
  85     }
  86 
  87     /* flush all locks */
  88     ret = opal_hash_table_get_first_key_uint32 (&module->outstanding_locks, &key, (void **) &lock, &node);
  89     while (OPAL_SUCCESS == ret) {
  90         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "flushing lock %p", (void *) lock);
  91         ompi_osc_rdma_sync_rdma_complete (lock);
  92         ret = opal_hash_table_get_next_key_uint32 (&module->outstanding_locks, &key, (void **) &lock,
  93                                                    node, &node);
  94     }
  95 
  96     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "flush_all complete");
  97 
  98     return OPAL_SUCCESS;
  99 }
 100 
 101 
 102 int ompi_osc_rdma_flush_local (int target, struct ompi_win_t *win)
 103 {
 104     return ompi_osc_rdma_flush (target, win);
 105 }
 106 
 107 
 108 int ompi_osc_rdma_flush_local_all (struct ompi_win_t *win)
 109 {
 110     return ompi_osc_rdma_flush_all (win);
 111 }
 112 
 113 /* locking via atomics */
 114 static inline int ompi_osc_rdma_lock_atomic_internal (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer,
 115                                                       ompi_osc_rdma_sync_t *lock)
 116 {
 117     const int locking_mode = module->locking_mode;
 118     int ret;
 119 
 120     if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) {
 121         do {
 122             OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "incrementing global exclusive lock");
 123             if (OMPI_OSC_RDMA_LOCKING_TWO_LEVEL == locking_mode) {
 124                 /* lock the master lock. this requires no rank has a global shared lock */
 125                 ret = ompi_osc_rdma_lock_acquire_shared (module, module->leader, 1, offsetof (ompi_osc_rdma_state_t, global_lock),
 126                                                          0xffffffff00000000L);
 127                 if (OMPI_SUCCESS != ret) {
 128                     ompi_osc_rdma_progress (module);
 129                     continue;
 130                 }
 131             }
 132 
 133             OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "acquiring exclusive lock on peer");
 134             ret = ompi_osc_rdma_lock_try_acquire_exclusive (module, peer,  offsetof (ompi_osc_rdma_state_t, local_lock));
 135             if (ret) {
 136                 /* release the global lock */
 137                 if (OMPI_OSC_RDMA_LOCKING_TWO_LEVEL == locking_mode) {
 138                     ompi_osc_rdma_lock_release_shared (module, module->leader, -1, offsetof (ompi_osc_rdma_state_t, global_lock));
 139                 }
 140                 ompi_osc_rdma_progress (module);
 141                 continue;
 142             }
 143 
 144             peer->flags |= OMPI_OSC_RDMA_PEER_EXCLUSIVE;
 145             break;
 146         } while (1);
 147     } else {
 148         do {
 149             /* go right to the target to acquire a shared lock */
 150             OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "incrementing global shared lock");
 151             ret = ompi_osc_rdma_lock_acquire_shared (module, peer, 1, offsetof (ompi_osc_rdma_state_t, local_lock),
 152                                                      OMPI_OSC_RDMA_LOCK_EXCLUSIVE);
 153             if (OMPI_SUCCESS == ret) {
 154                 return OMPI_SUCCESS;
 155             }
 156 
 157             ompi_osc_rdma_progress (module);
 158         } while (1);
 159     }
 160 
 161     return OMPI_SUCCESS;
 162 }
 163 
 164 static inline int ompi_osc_rdma_unlock_atomic_internal (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer,
 165                                                         ompi_osc_rdma_sync_t *lock)
 166 {
 167     const int locking_mode = module->locking_mode;
 168 
 169     if (MPI_LOCK_EXCLUSIVE == lock->sync.lock.type) {
 170         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "releasing exclusive lock on peer");
 171         ompi_osc_rdma_lock_release_exclusive (module, peer, offsetof (ompi_osc_rdma_state_t, local_lock));
 172 
 173         if (OMPI_OSC_RDMA_LOCKING_TWO_LEVEL == locking_mode) {
 174             OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global exclusive lock");
 175             ompi_osc_rdma_lock_release_shared (module, module->leader, -1, offsetof (ompi_osc_rdma_state_t, global_lock));
 176         }
 177 
 178         peer->flags &= ~OMPI_OSC_RDMA_PEER_EXCLUSIVE;
 179     } else {
 180         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_DEBUG, "decrementing global shared lock");
 181         ompi_osc_rdma_lock_release_shared (module, peer, -1, offsetof (ompi_osc_rdma_state_t, local_lock));
 182         peer->flags &= ~OMPI_OSC_RDMA_PEER_DEMAND_LOCKED;
 183     }
 184 
 185     return OMPI_SUCCESS;
 186 }
 187 
 188 int ompi_osc_rdma_demand_lock_peer (ompi_osc_rdma_module_t *module, ompi_osc_rdma_peer_t *peer)
 189 {
 190     ompi_osc_rdma_sync_t *lock = &module->all_sync;
 191     int ret = OMPI_SUCCESS;
 192 
 193     /* check for bad usage */
 194     assert (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == lock->type);
 195 
 196     OPAL_THREAD_SCOPED_LOCK(&peer->lock,
 197     do {
 198         if (!ompi_osc_rdma_peer_is_demand_locked (peer)) {
 199             ret = ompi_osc_rdma_lock_atomic_internal (module, peer, lock);
 200             OPAL_THREAD_SCOPED_LOCK(&lock->lock, opal_list_append (&lock->demand_locked_peers, &peer->super));
 201             peer->flags |= OMPI_OSC_RDMA_PEER_DEMAND_LOCKED;
 202         }
 203     } while (0);
 204     );
 205 
 206     return ret;
 207 }
 208 
 209 int ompi_osc_rdma_lock_atomic (int lock_type, int target, int assert, ompi_win_t *win)
 210 {
 211     ompi_osc_rdma_module_t *module = GET_MODULE(win);
 212     ompi_osc_rdma_peer_t *peer = ompi_osc_rdma_module_peer (module, target);
 213     ompi_osc_rdma_sync_t *lock;
 214     int ret = OMPI_SUCCESS;
 215 
 216     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock: %d, %d, %d, %s", lock_type, target, assert, win->w_name);
 217 
 218     if (module->no_locks) {
 219         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted to lock with no_locks set");
 220         return OMPI_ERR_RMA_SYNC;
 221     }
 222 
 223     if (module->all_sync.epoch_active && (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type || MPI_LOCK_EXCLUSIVE == lock_type)) {
 224         /* impossible to get an exclusive lock while holding a global shared lock or in a active
 225          * target access epoch */
 226         return OMPI_ERR_RMA_SYNC;
 227     }
 228 
 229     /* clear the global sync object (in case MPI_Win_fence was called) */
 230     module->all_sync.type = OMPI_OSC_RDMA_SYNC_TYPE_NONE;
 231 
 232     /* create lock item */
 233     lock = ompi_osc_rdma_sync_allocate (module);
 234     if (OPAL_UNLIKELY(NULL == lock)) {
 235         return OMPI_ERR_OUT_OF_RESOURCE;
 236     }
 237 
 238     lock->type = OMPI_OSC_RDMA_SYNC_TYPE_LOCK;
 239     lock->sync.lock.target = target;
 240     lock->sync.lock.type = lock_type;
 241     lock->sync.lock.assert = assert;
 242 
 243     lock->peer_list.peer = peer;
 244     lock->num_peers = 1;
 245     OBJ_RETAIN(peer);
 246 
 247     if (0 == (assert & MPI_MODE_NOCHECK)) {
 248         ret = ompi_osc_rdma_lock_atomic_internal (module, peer, lock);
 249     }
 250 
 251     if (OPAL_LIKELY(OMPI_SUCCESS == ret)) {
 252         ++module->passive_target_access_epoch;
 253 
 254         opal_atomic_wmb ();
 255 
 256         OPAL_THREAD_SCOPED_LOCK(&module->lock, ompi_osc_rdma_module_lock_insert (module, lock));
 257     } else {
 258         OBJ_RELEASE(lock);
 259     }
 260 
 261     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock %d complete", target);
 262 
 263     return ret;
 264 }
 265 
 266 
 267 int ompi_osc_rdma_unlock_atomic (int target, ompi_win_t *win)
 268 {
 269     ompi_osc_rdma_module_t *module = GET_MODULE(win);
 270     ompi_osc_rdma_peer_t *peer;
 271     ompi_osc_rdma_sync_t *lock;
 272     int ret = OMPI_SUCCESS;
 273 
 274     OPAL_THREAD_LOCK(&module->lock);
 275 
 276     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock: %d, %s", target, win->w_name);
 277 
 278     lock = ompi_osc_rdma_module_lock_find (module, target, &peer);
 279     if (OPAL_UNLIKELY(NULL == lock)) {
 280         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "target %d is not locked in window %s",
 281                          target, win->w_name);
 282         OPAL_THREAD_UNLOCK(&module->lock);
 283         return OMPI_ERR_RMA_SYNC;
 284     }
 285 
 286     ompi_osc_rdma_module_lock_remove (module, lock);
 287 
 288     /* finish all outstanding fragments */
 289     ompi_osc_rdma_sync_rdma_complete (lock);
 290 
 291     if (!(lock->sync.lock.assert & MPI_MODE_NOCHECK)) {
 292         ret = ompi_osc_rdma_unlock_atomic_internal (module, peer, lock);
 293     }
 294 
 295     /* release our reference to this peer */
 296     OBJ_RELEASE(peer);
 297 
 298     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock %d complete", target);
 299 
 300     --module->passive_target_access_epoch;
 301 
 302     opal_atomic_wmb ();
 303 
 304     OPAL_THREAD_UNLOCK(&module->lock);
 305 
 306     /* delete the lock */
 307     ompi_osc_rdma_sync_return (lock);
 308 
 309     return ret;
 310 }
 311 
 312 int ompi_osc_rdma_lock_all_atomic (int assert, struct ompi_win_t *win)
 313 {
 314     ompi_osc_rdma_module_t *module = GET_MODULE(win);
 315     ompi_osc_rdma_sync_t *lock;
 316     int ret = OMPI_SUCCESS;
 317 
 318     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock_all: %d, %s", assert, win->w_name);
 319 
 320     if (module->no_locks) {
 321         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted to lock with no_locks set");
 322         return OMPI_ERR_RMA_SYNC;
 323     }
 324 
 325     OPAL_THREAD_LOCK(&module->lock);
 326     if (module->all_sync.epoch_active) {
 327         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "attempted lock_all when active target epoch is %s "
 328                          "and lock all epoch is %s",
 329                          (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != module->all_sync.type && module->all_sync.epoch_active) ?
 330                          "active" : "inactive",
 331                          (OMPI_OSC_RDMA_SYNC_TYPE_LOCK == module->all_sync.type) ? "active" : "inactive");
 332         OPAL_THREAD_UNLOCK(&module->lock);
 333         return OMPI_ERR_RMA_SYNC;
 334     }
 335 
 336     /* set up lock */
 337     lock = &module->all_sync;
 338 
 339     lock->type = OMPI_OSC_RDMA_SYNC_TYPE_LOCK;
 340     lock->sync.lock.target = -1;
 341     lock->sync.lock.type   = MPI_LOCK_SHARED;
 342     lock->sync.lock.assert = assert;
 343     lock->num_peers = ompi_comm_size (module->comm);
 344 
 345     lock->epoch_active = true;
 346     /* NTH: TODO -- like fence it might be a good idea to create an array to access all peers
 347      * without having to access the hash table. Such a change would likely increase performance
 348      * at the expense of memory usage. Ex. if a window has 1M peers then 8MB per process would
 349      * be needed for this array. */
 350 
 351     if (0 == (assert & MPI_MODE_NOCHECK)) {
 352         /* increment the global shared lock */
 353         if (OMPI_OSC_RDMA_LOCKING_TWO_LEVEL == module->locking_mode) {
 354             ret = ompi_osc_rdma_lock_acquire_shared (module, module->leader, 0x0000000100000000UL,
 355                                                      offsetof(ompi_osc_rdma_state_t, global_lock),
 356                                                      0x00000000ffffffffUL);
 357         } else {
 358             /* always lock myself */
 359             ret = ompi_osc_rdma_demand_lock_peer (module, module->my_peer);
 360         }
 361     }
 362 
 363     if (OPAL_LIKELY(OMPI_SUCCESS != ret)) {
 364         lock->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE;
 365         lock->num_peers = 0;
 366         lock->epoch_active = false;
 367     } else {
 368         ++module->passive_target_access_epoch;
 369     }
 370 
 371     opal_atomic_wmb ();
 372 
 373     OPAL_THREAD_UNLOCK(&module->lock);
 374 
 375     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "lock_all complete");
 376 
 377     return ret;
 378 }
 379 
 380 int ompi_osc_rdma_unlock_all_atomic (struct ompi_win_t *win)
 381 {
 382     ompi_osc_rdma_module_t *module = GET_MODULE(win);
 383     ompi_osc_rdma_sync_t *lock;
 384 
 385     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock_all: %s", win->w_name);
 386 
 387     OPAL_THREAD_LOCK(&module->lock);
 388 
 389     lock = &module->all_sync;
 390     if (OMPI_OSC_RDMA_SYNC_TYPE_LOCK != lock->type) {
 391         OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_INFO, "not locked in window %s", win->w_name);
 392         OPAL_THREAD_UNLOCK(&module->lock);
 393         return OMPI_ERR_RMA_SYNC;
 394     }
 395 
 396     /* finish all outstanding fragments */
 397     ompi_osc_rdma_sync_rdma_complete (lock);
 398 
 399     if (0 == (lock->sync.lock.assert & MPI_MODE_NOCHECK)) {
 400         if (OMPI_OSC_RDMA_LOCKING_ON_DEMAND == module->locking_mode) {
 401             ompi_osc_rdma_peer_t *peer, *next;
 402 
 403             /* drop all on-demand locks */
 404             OPAL_LIST_FOREACH_SAFE(peer, next, &lock->demand_locked_peers, ompi_osc_rdma_peer_t) {
 405                 (void) ompi_osc_rdma_unlock_atomic_internal (module, peer, lock);
 406                 opal_list_remove_item (&lock->demand_locked_peers, &peer->super);
 407             }
 408         } else {
 409             /* decrement the master lock shared count */
 410             (void) ompi_osc_rdma_lock_release_shared (module, module->leader, -0x0000000100000000UL,
 411                                                       offsetof (ompi_osc_rdma_state_t, global_lock));
 412         }
 413     }
 414 
 415     lock->type = OMPI_OSC_RDMA_SYNC_TYPE_NONE;
 416     lock->num_peers = 0;
 417     lock->epoch_active = false;
 418 
 419     --module->passive_target_access_epoch;
 420 
 421     opal_atomic_wmb ();
 422 
 423     OPAL_THREAD_UNLOCK(&module->lock);
 424 
 425     OSC_RDMA_VERBOSE(MCA_BASE_VERBOSE_TRACE, "unlock_all complete");
 426 
 427     return OMPI_SUCCESS;
 428 }

/* [<][>][^][v][top][bottom][index][help] */