root/opal/mca/rcache/rgpusm/rcache_rgpusm_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_rcache_rgpusm_deregister_lru
  2. mca_rcache_rgpusm_module_init
  3. mca_rcache_rgpusm_register
  4. mca_rcache_rgpusm_find
  5. registration_is_cachebale
  6. mca_rcache_rgpusm_deregister
  7. mca_rcache_rgpusm_deregister_no_lock
  8. mca_rcache_rgpusm_finalize

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2013 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
  14  * Copyright (c) 2006      Voltaire. All rights reserved.
  15  * Copyright (c) 2007      Mellanox Technologies. All rights reserved.
  16  * Copyright (c) 2010      IBM Corporation.  All rights reserved.
  17  * Copyright (c) 2012-2015 NVIDIA Corporation.  All rights reserved.
  18  * Copyright (c) 2015      Los Alamos National Security, LLC.  All rights
  19  *                         reserved.
  20  *
  21  * $COPYRIGHT$
  22  *
  23  * Additional copyrights may follow
  24  *
  25  * $HEADER$
  26  */
  27 
  28 /**
  29  * @file:
  30  *
  31  * This memory pool is used for getting the memory handle of remote
  32  * GPU memory when using CUDA.  Hence, the name is "rgpusm" for "remote
  33  * CUDA" GPU memory.  There is a cache that can be used to store the
  34  * remote handles in case they are reused to save on the registration
  35  * cost as that can be expensive, on the order of 100 usecs.  The
  36  * cache can also be used just to track how many handles are in use at
  37  * a time.  It is best to look at this with the three different
  38  * scenarios that are possible.
  39  * 1. rcache_rgpusm_leave_pinned=0, cache_size=unlimited
  40  * 2. rcache_rgpusm_leave_pinned=0, cache_size=limited
  41  * 3. rcache_rgpusm_leave_pinned=1, cache_size=unlimited (default)
  42  * 4. rcache_rgpusm_leave_pinned=1, cache_size=limited.
  43  *
  44  * Case 1: The cache is unused and remote memory is registered and
  45  * unregistered for each transaction.  The amount of outstanding
  46  * registered memory is unlimited.
  47  * Case 2: The cache keeps track of how much memory is registered at a
  48  * time.  Since leave pinned is 0, any memory that is registered is in
  49  * use.  If the amount to register exceeds the amount, we will error
  50  * out.  This could be handled more gracefully, but this is not a
  51  * common way to run, so we will leave as is.
  52  * Case 3: The cache is needed to track current and past transactions.
  53  * However, there is no limit on the number that can be stored.
  54  * Therefore, once memory enters the cache, and gets registered, it
  55  * stays that way forever.
  56  * Case 4: The cache is needed to track current and past transactions.
  57  * In addition, a list of most recently used (but no longer in use)
  58  * registrations is stored so that it can be used to evict
  59  * registrations from the cache.  In addition, these registrations are
  60  * deregistered.
  61  *
  62  * I also want to capture how we can run into the case where we do not
  63  * find something in the cache, but when we try to register it, we get
  64  * an error back from the CUDA library saying the memory is in use.
  65  * This can happen in the following scenario.  The application mallocs
  66  * a buffer of size 32K.  The library loads this in the cache and
  67  * registers it.  The application then frees the buffer.  It then
  68  * mallocs a buffer of size 64K.  This malloc returns the same base
  69  * address as the first 32K allocation.  The library searches the
  70  * cache, but since the size is larger than the original allocation it
  71  * does not find the registration.  It then attempts to register this.
  72  * The CUDA library returns an error saying it is already mapped.  To
  73  * handle this, we return an error of OPAL_ERR_WOULD_BLOCK to the
  74  * memory pool.  The memory pool then looks for the registration based
  75  * on the base address and a size of 4.  We use the small size to make
  76  * sure that we find the registration.  This registration is evicted,
  77  * and we try to register again.
  78  */
  79 
  80 #define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
  81 #include "opal_config.h"
  82 #include "opal/align.h"
  83 #include "opal/mca/rcache/rgpusm/rcache_rgpusm.h"
  84 #include <errno.h>
  85 #include <string.h>
  86 #ifdef HAVE_MALLOC_H
  87 #include <malloc.h>
  88 #endif
  89 #include "opal/util/proc.h"
  90 #include "opal/mca/rcache/rcache.h"
  91 #include "opal/mca/rcache/base/base.h"
  92 #include "opal/mca/rcache/base/base.h"
  93 #include "opal/mca/common/cuda/common_cuda.h"
  94 
  95 
  96 static int mca_rcache_rgpusm_deregister_no_lock(struct mca_rcache_base_module_t *,
  97                                                mca_rcache_base_registration_t *);
  98 static inline bool mca_rcache_rgpusm_deregister_lru (mca_rcache_base_module_t *rcache) {
  99     mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t *) rcache;
 100     mca_rcache_base_registration_t *old_reg;
 101     int rc;
 102 
 103     /* Remove the registration from the cache and list before
 104        deregistering the memory */
 105     old_reg = (mca_rcache_base_registration_t*)
 106         opal_list_remove_first (&rcache_rgpusm->lru_list);
 107     if (NULL == old_reg) {
 108         opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 109                             "RGPUSM: The LRU list is empty. There is nothing to deregister");
 110         return false;
 111     }
 112 
 113     mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, old_reg);
 114 
 115     /* Drop the rcache lock while we deregister the memory */
 116     OPAL_THREAD_UNLOCK(&rcache->lock);
 117     assert(old_reg->ref_count == 0);
 118     rc = cuda_closememhandle (NULL, old_reg);
 119     OPAL_THREAD_LOCK(&rcache->lock);
 120 
 121     /* This introduces a potential leak of registrations if
 122        the deregistration fails to occur as we no longer have
 123        a reference to it. Is this possible? */
 124     if (OPAL_SUCCESS != rc) {
 125         opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 126                             "RGPUSM: Failed to deregister the memory addr=%p, size=%d",
 127                             old_reg->base, (int)(old_reg->bound - old_reg->base + 1));
 128         return false;
 129     }
 130 
 131     opal_free_list_return (&rcache_rgpusm->reg_list,
 132                            (opal_free_list_item_t*)old_reg);
 133     rcache_rgpusm->stat_evicted++;
 134 
 135     return true;
 136 }
 137 
 138 
 139 /*
 140  *  Initializes the rcache module.
 141  */
 142 void mca_rcache_rgpusm_module_init(mca_rcache_rgpusm_module_t* rcache)
 143 {
 144     rcache->super.rcache_component = &mca_rcache_rgpusm_component.super;
 145     rcache->super.rcache_register = mca_rcache_rgpusm_register;
 146     rcache->super.rcache_find = mca_rcache_rgpusm_find;
 147     rcache->super.rcache_deregister = mca_rcache_rgpusm_deregister;
 148     rcache->super.rcache_finalize = mca_rcache_rgpusm_finalize;
 149     rcache->vma_module = mca_rcache_base_vma_module_alloc ();
 150 
 151     OBJ_CONSTRUCT(&rcache->reg_list, opal_free_list_t);
 152     opal_free_list_init (&rcache->reg_list, sizeof(struct mca_rcache_common_cuda_reg_t),
 153             opal_cache_line_size,
 154             OBJ_CLASS(mca_rcache_base_registration_t),
 155             0,opal_cache_line_size,
 156             0, -1, 32, NULL, 0, NULL, NULL, NULL);
 157     OBJ_CONSTRUCT(&rcache->lru_list, opal_list_t);
 158     rcache->stat_cache_hit = rcache->stat_cache_miss = rcache->stat_evicted = 0;
 159     rcache->stat_cache_found = rcache->stat_cache_notfound = 0;
 160     rcache->stat_cache_valid = rcache->stat_cache_invalid = 0;
 161 
 162 }
 163 
 164 /*
 165  * This function opens and handle using the handle that was received
 166  * from the remote memory.  It uses the addr and size of the remote
 167  * memory for caching the registration.
 168  */
 169 int mca_rcache_rgpusm_register (mca_rcache_base_module_t *rcache, void *addr,
 170                                size_t size, uint32_t flags, int32_t access_flags,
 171                                mca_rcache_base_registration_t **reg)
 172 {
 173     mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
 174     mca_rcache_common_cuda_reg_t *rgpusm_reg;
 175     mca_rcache_common_cuda_reg_t *rget_reg;
 176     opal_free_list_item_t *item;
 177     int rc;
 178     int mypeer;  /* just for debugging */
 179 
 180     /* In order to preserve the signature of the mca_rcache_rgpusm_register
 181      * function, we are using the **reg variable to not only get back the
 182      * registration information, but to hand in the memory handle received
 183      * from the remote side. */
 184     rget_reg = (mca_rcache_common_cuda_reg_t *)*reg;
 185 
 186     mypeer = flags;
 187     flags = 0;
 188     /* No need to support MCA_RCACHE_FLAGS_CACHE_BYPASS in here. It is not used. */
 189     assert(0 == (flags & MCA_RCACHE_FLAGS_CACHE_BYPASS));
 190 
 191     /* This chunk of code handles the case where leave pinned is not
 192      * set and we do not use the cache.  This is not typically how we
 193      * will be running.  This means that one can have an unlimited
 194      * number of registrations occuring at the same time.  Since we
 195      * are not leaving the registrations pinned, the number of
 196      * registrations is unlimited and there is no need for a cache. */
 197     if(!mca_rcache_rgpusm_component.leave_pinned && 0 == mca_rcache_rgpusm_component.rcache_size_limit) {
 198         item = opal_free_list_get (&rcache_rgpusm->reg_list);
 199         if(NULL == item) {
 200             return OPAL_ERR_OUT_OF_RESOURCE;
 201         }
 202         rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item;
 203         rgpusm_reg->base.rcache = rcache;
 204         rgpusm_reg->base.base = addr;
 205         rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;;
 206         rgpusm_reg->base.flags = flags;
 207 
 208         /* Copy the memory handle received into the registration */
 209         memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle));
 210 
 211         /* The rget_reg registration is holding the memory handle needed
 212          * to register the remote memory.  This was received from the remote
 213          * process.  A pointer to the memory is returned in the alloc_base field. */
 214         rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
 215                                  (mca_rcache_base_registration_t *)rget_reg);
 216 
 217         /* This error should not happen with no cache in use. */
 218         assert(OPAL_ERR_WOULD_BLOCK != rc);
 219 
 220         if(rc != OPAL_SUCCESS) {
 221             opal_free_list_return (&rcache_rgpusm->reg_list, item);
 222             return rc;
 223         }
 224         rgpusm_reg->base.ref_count++;
 225         *reg = (mca_rcache_base_registration_t *)rgpusm_reg;
 226         return OPAL_SUCCESS;
 227     }
 228 
 229     /* Check to see if memory is registered and stored in the cache. */
 230     OPAL_THREAD_LOCK(&rcache->lock);
 231     mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, size, reg);
 232 
 233     /* If *reg is not NULL, we have a registration.  Let us see if the
 234      * memory handle matches the one we were looking for.  If not, the
 235      * registration is invalid and needs to be removed. This happens
 236      * if memory was allocated, freed, and allocated again and ends up
 237      * with the same virtual address and within the limits of the
 238      * previous registration.  The memory handle check will catch that
 239      * scenario as the handles have unique serial numbers.  */
 240     if (*reg != NULL) {
 241         rcache_rgpusm->stat_cache_hit++;
 242         opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 243                             "RGPUSM: Found addr=%p,size=%d (base=%p,size=%d) in cache",
 244                             addr, (int)size, (*reg)->base,
 245                             (int)((*reg)->bound - (*reg)->base));
 246 
 247         if (mca_common_cuda_memhandle_matches((mca_rcache_common_cuda_reg_t *)*reg, rget_reg)) {
 248             /* Registration matches what was requested.  All is good. */
 249             rcache_rgpusm->stat_cache_valid++;
 250         } else {
 251             /* This is an old registration.  Need to boot it. */
 252             opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 253                                 "RGPUSM: Mismatched Handle: Evicting/unregistering "
 254                                 "addr=%p,size=%d (base=%p,size=%d) from cache",
 255                                 addr, (int)size, (*reg)->base,
 256                                 (int)((*reg)->bound - (*reg)->base));
 257 
 258             /* The ref_count has to be zero as this memory cannot possibly
 259              * be in use.  Assert on that just to make sure. */
 260             assert(0 == (*reg)->ref_count);
 261             if (mca_rcache_rgpusm_component.leave_pinned) {
 262                 opal_list_remove_item(&rcache_rgpusm->lru_list,
 263                                       (opal_list_item_t*)(*reg));
 264             }
 265 
 266             /* Bump the reference count to keep things copacetic in deregister */
 267             (*reg)->ref_count++;
 268             /* Invalidate the registration so it will get booted out. */
 269             (*reg)->flags |= MCA_RCACHE_FLAGS_INVALID;
 270             mca_rcache_rgpusm_deregister_no_lock(rcache, *reg);
 271             *reg = NULL;
 272             rcache_rgpusm->stat_cache_invalid++;
 273         }
 274     } else {
 275         /* Nothing was found in the cache. */
 276         rcache_rgpusm->stat_cache_miss++;
 277     }
 278 
 279     /* If we have a registration here, then we know it is valid. */
 280     if (*reg != NULL) {
 281         opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 282                             "RGPUSM: CACHE HIT is good: ep=%d, addr=%p, size=%d in cache",
 283                             mypeer, addr, (int)size);
 284 
 285         /* When using leave pinned, we keep an LRU list. */
 286         if ((0 == (*reg)->ref_count) && mca_rcache_rgpusm_component.leave_pinned) {
 287             opal_output_verbose(20, mca_rcache_rgpusm_component.output,
 288                                 "RGPUSM: POP OFF LRU: ep=%d, addr=%p, size=%d in cache",
 289                                 mypeer, addr, (int)size);
 290             opal_list_remove_item(&rcache_rgpusm->lru_list,
 291                                   (opal_list_item_t*)(*reg));
 292         }
 293         (*reg)->ref_count++;
 294         OPAL_THREAD_UNLOCK(&rcache->lock);
 295         opal_output(-1, "reg->ref_count=%d", (int)(*reg)->ref_count);
 296         opal_output_verbose(80, mca_rcache_rgpusm_component.output,
 297                            "RGPUSM: Found entry in cache addr=%p, size=%d", addr, (int)size);
 298         return OPAL_SUCCESS;
 299     }
 300 
 301     /* If we are here, then we did not find a registration, or it was invalid,
 302      * so this is a new one, and we are going to use the cache. */
 303     assert(NULL == *reg);
 304     opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 305                         "RGPUSM: New registration ep=%d, addr=%p, size=%d. Need to register and insert in cache",
 306                          mypeer, addr, (int)size);
 307 
 308     item = opal_free_list_get (&rcache_rgpusm->reg_list);
 309     if(NULL == item) {
 310         OPAL_THREAD_UNLOCK(&rcache->lock);
 311         return OPAL_ERR_OUT_OF_RESOURCE;
 312     }
 313     rgpusm_reg = (mca_rcache_common_cuda_reg_t*)item;
 314 
 315     rgpusm_reg->base.rcache = rcache;
 316     rgpusm_reg->base.base = addr;
 317     rgpusm_reg->base.bound = (unsigned char *)addr + size - 1;
 318     rgpusm_reg->base.flags = flags;
 319 
 320     /* Need the memory handle saved in the registration */
 321     memcpy(rgpusm_reg->data.memHandle, rget_reg->data.memHandle, sizeof(rget_reg->data.memHandle));
 322 
 323     /* Actually register the memory, which opens the memory handle.
 324      * Need to do this prior to putting in the cache as the base and
 325      * bound values may be changed by the registration.  The memory
 326      * associated with the handle comes back in the alloc_base
 327      * value. */
 328     rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
 329                              (mca_rcache_base_registration_t *)rget_reg);
 330     /* There is a chance we can get the OPAL_ERR_WOULD_BLOCK from the
 331      * CUDA codes attempt to register the memory.  The case that this
 332      * can happen is as follows.  A block of memory is registered.
 333      * Then the sending side frees the memory.  The sending side then
 334      * cuMemAllocs memory again and gets the same base
 335      * address. However, it cuMemAllocs a block that is larger than
 336      * the one in the cache.  The cache will return that memory is not
 337      * registered and call into CUDA to register it.  However, that
 338      * will fail with CUDA_ERROR_ALREADY_MAPPED.  Therefore we need to
 339      * boot that previous allocation out and deregister it first.
 340      */
 341     if (OPAL_ERR_WOULD_BLOCK == rc) {
 342         mca_rcache_base_registration_t *oldreg;
 343 
 344         /* Need to make sure it is at least 4 bytes in size  This will
 345          * ensure we get the hit in the cache. */
 346         mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, 4, &oldreg);
 347 
 348         /* For most cases, we will find a registration that overlaps.
 349          * Removal of it should allow the registration we are
 350          * attempting to succeed. */
 351         if (NULL != oldreg) {
 352             /* The ref_count has to be zero as this memory cannot
 353              * possibly be in use.  Assert on that just to make sure. */
 354             assert(0 == oldreg->ref_count);
 355             if (mca_rcache_rgpusm_component.leave_pinned) {
 356                 opal_list_remove_item(&rcache_rgpusm->lru_list,
 357                                       (opal_list_item_t*)oldreg);
 358             }
 359 
 360             /* Bump the reference count to keep things copacetic in deregister */
 361             oldreg->ref_count++;
 362             /* Invalidate the registration so it will get booted out. */
 363             oldreg->flags |= MCA_RCACHE_FLAGS_INVALID;
 364             mca_rcache_rgpusm_deregister_no_lock(rcache, oldreg);
 365             rcache_rgpusm->stat_evicted++;
 366 
 367             /* And try again.  This one usually works. */
 368             rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
 369                                      (mca_rcache_base_registration_t *)rget_reg);
 370         }
 371 
 372         /* There is a chance that another registration is blocking our
 373          * ability to register.  Check the rc to see if we still need
 374          * to try and clear out registrations. */
 375         while (OPAL_SUCCESS != rc) {
 376             if (true != mca_rcache_rgpusm_deregister_lru(rcache)) {
 377                 rc = OPAL_ERROR;
 378                 break;
 379             }
 380             /* Clear out one registration. */
 381             rc = cuda_openmemhandle (addr, size, (mca_rcache_base_registration_t *)rgpusm_reg,
 382                                      (mca_rcache_base_registration_t *)rget_reg);
 383         }
 384     }
 385 
 386     if(rc != OPAL_SUCCESS) {
 387         OPAL_THREAD_UNLOCK(&rcache->lock);
 388         opal_free_list_return (&rcache_rgpusm->reg_list, item);
 389         return rc;
 390     }
 391 
 392     opal_output_verbose(80, mca_rcache_rgpusm_component.output,
 393                         "RGPUSM: About to insert in rgpusm cache addr=%p, size=%d", addr, (int)size);
 394     rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
 395                                       mca_rcache_rgpusm_component.rcache_size_limit);
 396     if (OPAL_ERR_TEMP_OUT_OF_RESOURCE == rc) {
 397         opal_output_verbose(40, mca_rcache_rgpusm_component.output,
 398                             "RGPUSM: No room in the cache - boot the first one out");
 399         (void)mca_rcache_rgpusm_deregister_lru(rcache);
 400         if (mca_rcache_rgpusm_component.empty_cache) {
 401             int remNum = 1;
 402             /* Empty out every registration from LRU until it is empty */
 403             opal_output_verbose(40, mca_rcache_rgpusm_component.output,
 404                                 "RGPUSM: About to delete all the unused entries in the cache");
 405             while (mca_rcache_rgpusm_deregister_lru(rcache)) {
 406                 remNum++;
 407             }
 408             opal_output_verbose(40, mca_rcache_rgpusm_component.output,
 409                                 "RGPUSM: Deleted and deregistered %d entries", remNum);
 410             rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
 411                                              mca_rcache_rgpusm_component.rcache_size_limit);
 412         } else {
 413             /* Check for room after one removal. If not, remove another one until there is space */
 414             while((rc = mca_rcache_base_vma_insert (rcache_rgpusm->vma_module, (mca_rcache_base_registration_t *)rgpusm_reg,
 415                                                     mca_rcache_rgpusm_component.rcache_size_limit)) ==
 416                   OPAL_ERR_TEMP_OUT_OF_RESOURCE) {
 417                 opal_output_verbose(40, mca_rcache_rgpusm_component.output,
 418                                     "RGPUSM: No room in the cache - boot one out");
 419                 if (!mca_rcache_rgpusm_deregister_lru(rcache)) {
 420                     break;
 421                 }
 422             }
 423         }
 424     }
 425 
 426     if(rc != OPAL_SUCCESS) {
 427         OPAL_THREAD_UNLOCK(&rcache->lock);
 428         opal_free_list_return (&rcache_rgpusm->reg_list, item);
 429         /* We cannot recover from this.  We can be here if the size of
 430          * the cache is smaller than the amount of memory we are
 431          * trying to register in a single transfer.  In that case, rc
 432          * is MPI_ERR_OUT_OF_RESOURCES, but everything is stuck at
 433          * that point.  Therefore, just error out completely.
 434          */
 435         opal_output_verbose(10, mca_rcache_rgpusm_component.output,
 436                             "RGPUSM: Failed to register addr=%p, size=%d", addr, (int)size);
 437         return OPAL_ERROR;
 438     }
 439 
 440     rgpusm_reg->base.ref_count++;
 441     *reg = (mca_rcache_base_registration_t *)rgpusm_reg;
 442     OPAL_THREAD_UNLOCK(&rcache->lock);
 443 
 444     return OPAL_SUCCESS;
 445 }
 446 
 447 int mca_rcache_rgpusm_find(struct mca_rcache_base_module_t *rcache, void *addr,
 448         size_t size, mca_rcache_base_registration_t **reg)
 449 {
 450     mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
 451     int rc;
 452     unsigned char *base, *bound;
 453 
 454     base = addr;
 455     bound = base + size - 1; /* To keep cache hits working correctly */
 456 
 457     OPAL_THREAD_LOCK(&rcache->lock);
 458     opal_output(-1, "Looking for addr=%p, size=%d", addr, (int)size);
 459     rc = mca_rcache_base_vma_find (rcache_rgpusm->vma_module, addr, size, reg);
 460     if(*reg != NULL && mca_rcache_rgpusm_component.leave_pinned) {
 461         if(0 == (*reg)->ref_count && mca_rcache_rgpusm_component.leave_pinned) {
 462             opal_list_remove_item(&rcache_rgpusm->lru_list, (opal_list_item_t*)(*reg));
 463         }
 464         rcache_rgpusm->stat_cache_found++;
 465         (*reg)->ref_count++;
 466     } else {
 467         rcache_rgpusm->stat_cache_notfound++;
 468     }
 469     OPAL_THREAD_UNLOCK(&rcache->lock);
 470 
 471     return rc;
 472 }
 473 
 474 static inline bool registration_is_cachebale(mca_rcache_base_registration_t *reg)
 475 {
 476      return !(reg->flags &
 477              (MCA_RCACHE_FLAGS_CACHE_BYPASS |
 478               MCA_RCACHE_FLAGS_INVALID));
 479 }
 480 
 481 int mca_rcache_rgpusm_deregister(struct mca_rcache_base_module_t *rcache,
 482                             mca_rcache_base_registration_t *reg)
 483 {
 484     mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
 485     int rc = OPAL_SUCCESS;
 486     assert(reg->ref_count > 0);
 487 
 488     OPAL_THREAD_LOCK(&rcache->lock);
 489     reg->ref_count--;
 490     opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
 491     if(reg->ref_count > 0) {
 492         OPAL_THREAD_UNLOCK(&rcache->lock);
 493         return OPAL_SUCCESS;
 494     }
 495     if(mca_rcache_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
 496     {
 497         /* if leave_pinned is set don't deregister memory, but put it
 498          * on LRU list for future use */
 499         opal_output_verbose(20, mca_rcache_rgpusm_component.output,
 500                             "RGPUSM: Deregister: addr=%p, size=%d: cacheable and pinned, leave in cache, PUSH IN LRU",
 501                             reg->base, (int)(reg->bound - reg->base + 1));
 502         opal_list_prepend(&rcache_rgpusm->lru_list, (opal_list_item_t*)reg);
 503     } else {
 504         /* Remove from rcache first */
 505         if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS))
 506             mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg);
 507 
 508         /* Drop the rcache lock before deregistring the memory */
 509         OPAL_THREAD_UNLOCK(&rcache->lock);
 510 
 511         {
 512              assert(reg->ref_count == 0);
 513              rc = cuda_closememhandle (NULL, reg);
 514          }
 515 
 516         OPAL_THREAD_LOCK(&rcache->lock);
 517 
 518         if(OPAL_SUCCESS == rc) {
 519             opal_free_list_return (&rcache_rgpusm->reg_list,
 520                                    (opal_free_list_item_t*)reg);
 521         }
 522     }
 523     OPAL_THREAD_UNLOCK(&rcache->lock);
 524 
 525     return rc;
 526 }
 527 
 528 int mca_rcache_rgpusm_deregister_no_lock(struct mca_rcache_base_module_t *rcache,
 529                             mca_rcache_base_registration_t *reg)
 530 {
 531     mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
 532     int rc = OPAL_SUCCESS;
 533     assert(reg->ref_count > 0);
 534 
 535     reg->ref_count--;
 536     opal_output(-1, "Deregister: reg->ref_count=%d", (int)reg->ref_count);
 537     if(reg->ref_count > 0) {
 538         return OPAL_SUCCESS;
 539     }
 540     if(mca_rcache_rgpusm_component.leave_pinned && registration_is_cachebale(reg))
 541     {
 542         /* if leave_pinned is set don't deregister memory, but put it
 543          * on LRU list for future use */
 544         opal_list_prepend(&rcache_rgpusm->lru_list, (opal_list_item_t*)reg);
 545     } else {
 546         /* Remove from rcache first */
 547         if(!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS))
 548             mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg);
 549 
 550         assert(reg->ref_count == 0);
 551         rc = cuda_closememhandle (NULL, reg);
 552 
 553         if(OPAL_SUCCESS == rc) {
 554             opal_free_list_return (&rcache_rgpusm->reg_list,
 555                                    (opal_free_list_item_t*)reg);
 556         }
 557     }
 558 
 559     return rc;
 560 }
 561 
 562 #define RGPUSM_RCACHE_NREGS 100
 563 
 564 void mca_rcache_rgpusm_finalize(struct mca_rcache_base_module_t *rcache)
 565 {
 566     mca_rcache_rgpusm_module_t *rcache_rgpusm = (mca_rcache_rgpusm_module_t*)rcache;
 567     mca_rcache_base_registration_t *reg;
 568     mca_rcache_base_registration_t *regs[RGPUSM_RCACHE_NREGS];
 569     int reg_cnt, i;
 570     int rc;
 571 
 572     /* Statistic */
 573     if(true == mca_rcache_rgpusm_component.print_stats) {
 574         opal_output(0, "%s rgpusm: stats "
 575                 "(hit/valid/invalid/miss/evicted): %d/%d/%d/%d/%d\n",
 576                 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
 577                 rcache_rgpusm->stat_cache_hit, rcache_rgpusm->stat_cache_valid,
 578                 rcache_rgpusm->stat_cache_invalid, rcache_rgpusm->stat_cache_miss,
 579                 rcache_rgpusm->stat_evicted);
 580     }
 581 
 582     OPAL_THREAD_LOCK(&rcache->lock);
 583     do {
 584         reg_cnt = mca_rcache_base_vma_find_all (rcache_rgpusm->vma_module, 0, (size_t)-1,
 585                 regs, RGPUSM_RCACHE_NREGS);
 586         opal_output(-1, "Registration size at finalize = %d", reg_cnt);
 587 
 588         for(i = 0; i < reg_cnt; i++) {
 589             reg = regs[i];
 590 
 591             if(reg->ref_count) {
 592                 reg->ref_count = 0; /* otherway dereg will fail on assert */
 593             } else if (mca_rcache_rgpusm_component.leave_pinned) {
 594                 opal_list_remove_item(&rcache_rgpusm->lru_list,
 595                         (opal_list_item_t*)reg);
 596             }
 597 
 598             /* Remove from rcache first */
 599             mca_rcache_base_vma_delete (rcache_rgpusm->vma_module, reg);
 600 
 601             /* Drop lock before deregistering memory */
 602             OPAL_THREAD_UNLOCK(&rcache->lock);
 603             assert(reg->ref_count == 0);
 604             rc = cuda_closememhandle (NULL, reg);
 605             OPAL_THREAD_LOCK(&rcache->lock);
 606 
 607             if(rc != OPAL_SUCCESS) {
 608                 /* Potentially lose track of registrations
 609                    do we have to put it back? */
 610                 continue;
 611             }
 612 
 613             opal_free_list_return (&rcache_rgpusm->reg_list,
 614                                    (opal_free_list_item_t *) reg);
 615         }
 616     } while(reg_cnt == RGPUSM_RCACHE_NREGS);
 617 
 618     OBJ_DESTRUCT(&rcache_rgpusm->lru_list);
 619     OBJ_DESTRUCT(&rcache_rgpusm->reg_list);
 620     OPAL_THREAD_UNLOCK(&rcache->lock);
 621 }

/* [<][>][^][v][top][bottom][index][help] */