root/opal/mca/rcache/udreg/rcache_udreg_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_rcache_udreg_module_init
  2. mca_rcache_udreg_reg_func
  3. mca_rcache_udreg_dereg_func
  4. mca_rcache_udreg_evict
  5. mca_rcache_udreg_register
  6. mca_rcache_udreg_find
  7. mca_rcache_udreg_deregister
  8. mca_rcache_udreg_finalize

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2013 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006-2009 Cisco Systems, Inc.  All rights reserved.
  14  * Copyright (c) 2006      Voltaire. All rights reserved.
  15  * Copyright (c) 2007      Mellanox Technologies. All rights reserved.
  16  * Copyright (c) 2010      IBM Corporation.  All rights reserved.
  17  * Copyright (c) 2011-2016 Los Alamos National Security, LLC. All rights
  18  *                         reserved.
  19  *
  20  * $COPYRIGHT$
  21  *
  22  * Additional copyrights may follow
  23  *
  24  * $HEADER$
  25  */
  26 
  27 #define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
  28 #include "opal_config.h"
  29 #include "opal/align.h"
  30 #include "rcache_udreg.h"
  31 #include <errno.h>
  32 #include <string.h>
  33 #ifdef HAVE_MALLOC_H
  34 #include <malloc.h>
  35 #endif
  36 #include "opal/mca/rcache/base/base.h"
  37 #include "opal/runtime/opal_params.h"
  38 #include "opal/include/opal_stdint.h"
  39 #include "opal/util/sys_limits.h"
  40 #include "opal/util/string_copy.h"
  41 
  42 #include <fcntl.h>
  43 
  44 #include <udreg_pub.h>
  45 
  46 #include <sys/mman.h>
  47 
  48 
  49 static int mca_rcache_udreg_register (mca_rcache_base_module_t* rcache, void *addr,
  50                                       size_t size, uint32_t flags, int32_t access_flags,
  51                                       mca_rcache_base_registration_t **reg);
  52 static int mca_rcache_udreg_deregister (mca_rcache_base_module_t *rcache,
  53                                         mca_rcache_base_registration_t *reg);
  54 static int mca_rcache_udreg_find (mca_rcache_base_module_t* rcache, void* addr,
  55                                   size_t size, mca_rcache_base_registration_t **reg);
  56 static void mca_rcache_udreg_finalize (mca_rcache_base_module_t *rcache);
  57 static bool mca_rcache_udreg_evict (mca_rcache_base_module_t *rcache);
  58 
  59 static void *mca_rcache_udreg_reg_func (void *addr, uint64_t len, void *reg_context);
  60 static uint32_t mca_rcache_udreg_dereg_func (void *device_data, void *dreg_context);
  61 
  62 
  63 /*
  64  *  Initializes the rcache module.
  65  */
  66 int mca_rcache_udreg_module_init (mca_rcache_udreg_module_t *rcache)
  67 {
  68     struct udreg_cache_attr cache_attr;
  69     int urc;
  70 
  71     rcache->super.rcache_component = &mca_rcache_udreg_component.super;
  72     rcache->super.rcache_register = mca_rcache_udreg_register;
  73     rcache->super.rcache_find = mca_rcache_udreg_find;
  74     rcache->super.rcache_deregister = mca_rcache_udreg_deregister;
  75     /* This module relies on udreg for notification of memory release */
  76     rcache->super.rcache_invalidate_range = NULL;
  77     rcache->super.rcache_finalize = mca_rcache_udreg_finalize;
  78 
  79     cache_attr.modes = 0;
  80 
  81     /* Create udreg cache */
  82     if (rcache->resources.use_kernel_cache) {
  83         cache_attr.modes |= UDREG_CC_MODE_USE_KERNEL_CACHE;
  84     }
  85 
  86     if (rcache->resources.use_evict_w_unreg) {
  87         cache_attr.modes |= UDREG_CC_MODE_USE_EVICT_W_UNREG;
  88     }
  89 
  90     if (mca_rcache_udreg_component.leave_pinned) {
  91         cache_attr.modes |= UDREG_CC_MODE_USE_LAZY_DEREG;
  92     }
  93 
  94     OBJ_CONSTRUCT(&rcache->lock, opal_mutex_t);
  95 
  96     opal_string_copy (cache_attr.cache_name, rcache->resources.base.cache_name, UDREG_MAX_CACHENAME_LEN);
  97     cache_attr.max_entries         = rcache->resources.max_entries;
  98     cache_attr.debug_mode          = 0;
  99     cache_attr.debug_rank          = 0;
 100     cache_attr.reg_context         = rcache;
 101     cache_attr.dreg_context        = rcache;
 102     cache_attr.destructor_context  = rcache;
 103     cache_attr.device_reg_func     = mca_rcache_udreg_reg_func;
 104     cache_attr.device_dereg_func   = mca_rcache_udreg_dereg_func;
 105     cache_attr.destructor_callback = NULL;
 106 
 107     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 108                          "rcache/udreg: creating udreg cache with name %s", cache_attr.cache_name);
 109 
 110     /* attempt to create the udreg cache. this will fail if one already exists */
 111     (void) UDREG_CacheCreate (&cache_attr);
 112 
 113     urc = UDREG_CacheAccess (rcache->resources.base.cache_name, (udreg_cache_handle_t *) &rcache->udreg_handle);
 114     if (UDREG_RC_SUCCESS != urc) {
 115         opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_rcache_base_framework.framework_output,
 116                              "rcache/udreg: call to UDREG_CacheAccess failed with rc: %d", urc);
 117         return OPAL_ERROR;
 118     }
 119 
 120     OBJ_CONSTRUCT(&rcache->reg_list, opal_free_list_t);
 121     opal_free_list_init (&rcache->reg_list, rcache->resources.base.sizeof_reg,
 122                          opal_cache_line_size, OBJ_CLASS(mca_rcache_base_registration_t),
 123                          0, opal_cache_line_size, 0, -1, 32, NULL, 0,
 124                          NULL, NULL, NULL);
 125 
 126     return OPAL_SUCCESS;
 127 }
 128 
 129 /* udreg callback functions */
 130 static void *mca_rcache_udreg_reg_func (void *addr, uint64_t size, void *reg_context)
 131 {
 132     mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) reg_context;
 133     unsigned int page_size = opal_getpagesize ();
 134     mca_rcache_base_registration_t *udreg_reg;
 135     opal_free_list_item_t *item;
 136     int rc;
 137 
 138     item = opal_free_list_get (&rcache_udreg->reg_list);
 139     if (NULL == item) {
 140         return NULL;
 141     }
 142 
 143     udreg_reg = (mca_rcache_base_registration_t *) item;
 144 
 145     udreg_reg->rcache = reg_context;
 146     udreg_reg->base  = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
 147     udreg_reg->bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
 148     udreg_reg->ref_count = 0;
 149 
 150     addr = (void *) udreg_reg->base;
 151     size = (uint64_t) (udreg_reg->bound - udreg_reg->base + 1);
 152 
 153     /* pull the flags and access flags out of the rcache module */
 154     udreg_reg->access_flags = rcache_udreg->requested_access_flags;
 155     udreg_reg->flags = rcache_udreg->requested_flags;
 156 
 157     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 158                          "rcache/udreg: calling underlying register function for address range {%p, %p}",
 159                          addr, (void *)((intptr_t) addr + size));
 160     rc = rcache_udreg->resources.base.register_mem (rcache_udreg->resources.base.reg_data, udreg_reg->base, size,
 161                                                     udreg_reg);
 162     if (OPAL_SUCCESS != rc) {
 163         opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_rcache_base_framework.framework_output,
 164                              "rcache/udreg: could not register memory. rc: %d", rc);
 165         opal_free_list_return (&rcache_udreg->reg_list, item);
 166         /* NTH: this is the only way to get UDReg_Register to recognize a failure */
 167         udreg_reg = UDREG_DEVICE_REG_FAILED;
 168     }
 169 
 170     return udreg_reg;
 171 }
 172 
 173 static uint32_t mca_rcache_udreg_dereg_func (void *device_data, void *dreg_context)
 174 {
 175     mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) dreg_context;
 176     mca_rcache_base_registration_t *udreg_reg = (mca_rcache_base_registration_t *) device_data;
 177     int rc;
 178 
 179     assert (udreg_reg->ref_count == 0);
 180 
 181     rc = rcache_udreg->resources.base.deregister_mem (rcache_udreg->resources.base.reg_data, udreg_reg);
 182     if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
 183         opal_free_list_return (&rcache_udreg->reg_list,
 184                                (opal_free_list_item_t *) udreg_reg);
 185     }
 186     /* might be worth printing out a warning if an error occurs here */
 187 
 188     return 0;
 189 }
 190 
 191 static bool mca_rcache_udreg_evict (mca_rcache_base_module_t *rcache)
 192 {
 193     mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) rcache;
 194     udreg_return_t urc;
 195 
 196     urc = UDREG_Evict (rcache_udreg->udreg_handle);
 197     return (UDREG_RC_SUCCESS == urc);
 198 }
 199 
 200 /*
 201  * register memory
 202  */
 203 static int mca_rcache_udreg_register(mca_rcache_base_module_t *rcache, void *addr,
 204                                     size_t size, uint32_t flags, int32_t access_flags,
 205                                     mca_rcache_base_registration_t **reg)
 206 {
 207     mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) rcache;
 208     mca_rcache_base_registration_t *udreg_reg, *old_reg;
 209     bool bypass_cache = !!(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS);
 210     const unsigned int page_size = opal_getpagesize ();
 211     unsigned char *base, *bound;
 212     udreg_entry_t *udreg_entry = NULL;
 213 
 214     *reg = NULL;
 215 
 216     OPAL_THREAD_LOCK(&rcache_udreg->lock);
 217 
 218     /* we hold the lock so no other thread can modify these flags until the registration is complete */
 219     rcache_udreg->requested_access_flags = access_flags;
 220     rcache_udreg->requested_flags = flags;
 221 
 222     base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
 223     bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
 224 
 225     addr = base;
 226     size = (size_t) (uintptr_t) (bound - base) + 1;
 227 
 228     if (false == bypass_cache) {
 229         /* Get a udreg entry for this region */
 230         do {
 231             opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 232                                  "rcache/udreg: XXX registering region {%p, %p} with udreg", addr, (void *)((intptr_t) addr + size));
 233             while (UDREG_RC_SUCCESS != UDREG_Register (rcache_udreg->udreg_handle, addr, size, &udreg_entry)) {
 234                 /* try to remove one unused reg and retry */
 235                 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 236                                      "calling evict!");
 237                 if (!mca_rcache_udreg_evict (rcache)) {
 238                     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 239                                          "rcache/udreg: could not register memory with udreg");
 240                     OPAL_THREAD_UNLOCK(&rcache_udreg->lock);
 241                     return OPAL_ERR_OUT_OF_RESOURCE;
 242                 }
 243             }
 244 
 245             udreg_reg = (mca_rcache_base_registration_t *) udreg_entry->device_data;
 246             if (NULL != udreg_reg && (udreg_reg->access_flags & access_flags) == access_flags) {
 247                 /* sufficient access */
 248                 break;
 249             }
 250 
 251             old_reg = udreg_reg;
 252 
 253             if (old_reg) {
 254                 /* to not confuse udreg make sure the new registration covers the same address
 255                  * range as the old one. */
 256                 addr = old_reg->base;
 257                 size = (size_t)((intptr_t) old_reg->bound - (intptr_t) old_reg->base);
 258 
 259                 /* make the new access flags more permissive */
 260                 access_flags |= old_reg->access_flags;
 261 
 262                 if (!old_reg->ref_count) {
 263                     /* deregister the region before attempting to re-register */
 264                     mca_rcache_udreg_dereg_func (old_reg, rcache);
 265                     udreg_entry->device_data = NULL;
 266                     old_reg = NULL;
 267                 } else {
 268                     /* ensure that mca_rcache_udreg_deregister does not call into udreg since
 269                      * we are forcefully evicting the registration here */
 270                     old_reg->flags |= MCA_RCACHE_FLAGS_CACHE_BYPASS | MCA_RCACHE_FLAGS_INVALID;
 271                 }
 272             }
 273 
 274             rcache_udreg->requested_access_flags = access_flags;
 275 
 276             /* get a new registration */
 277             while (UDREG_DEVICE_REG_FAILED == (udreg_reg = mca_rcache_udreg_reg_func (addr, size, rcache))) {
 278                 if (!mca_rcache_udreg_evict (rcache)) {
 279                     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 280                                          "rcache/udreg: could not register memory with udreg");
 281                     OPAL_THREAD_UNLOCK(&rcache_udreg->lock);
 282                     return OPAL_ERR_OUT_OF_RESOURCE;
 283                 }
 284             }
 285 
 286             /* update the device data with the new registration */
 287             udreg_entry->device_data = udreg_reg;
 288         } while (0);
 289     } else {
 290         /* if cache bypass is requested don't use the udreg cache */
 291         while (UDREG_DEVICE_REG_FAILED == (udreg_reg = mca_rcache_udreg_reg_func (addr, size, rcache))) {
 292             /* try to remove one unused reg and retry */
 293             if (!mca_rcache_udreg_evict (rcache)) {
 294                 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_rcache_base_framework.framework_output,
 295                                      "rcache/udreg: could not register memory");
 296                 OPAL_THREAD_UNLOCK(&rcache_udreg->lock);
 297                 return OPAL_ERR_OUT_OF_RESOURCE;
 298             }
 299         }
 300     }
 301 
 302     OPAL_THREAD_UNLOCK(&rcache_udreg->lock);
 303 
 304     *reg = udreg_reg;
 305     (void) OPAL_THREAD_ADD_FETCH32(&udreg_reg->ref_count, 1);
 306     udreg_reg->rcache_context = udreg_entry;
 307 
 308     return OPAL_SUCCESS;
 309 }
 310 
 311 static int mca_rcache_udreg_find (mca_rcache_base_module_t *rcache, void *addr,
 312                                  size_t size, mca_rcache_base_registration_t **reg)
 313 {
 314     *reg = NULL;
 315     return OPAL_ERR_NOT_FOUND;
 316 }
 317 
 318 static int mca_rcache_udreg_deregister(mca_rcache_base_module_t *rcache,
 319                                       mca_rcache_base_registration_t *reg)
 320 {
 321     mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t *) rcache;
 322     int32_t ref_count = OPAL_THREAD_ADD_FETCH32 (&reg->ref_count, -1);
 323 
 324     assert(ref_count >= 0);
 325 
 326     if (!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
 327         OPAL_THREAD_LOCK(&rcache_udreg->lock);
 328         UDREG_DecrRefcount (rcache_udreg->udreg_handle, reg->rcache_context);
 329         OPAL_THREAD_UNLOCK(&rcache_udreg->lock);
 330     } else if (!ref_count) {
 331         mca_rcache_udreg_dereg_func (reg, rcache);
 332     }
 333 
 334     return OPAL_SUCCESS;
 335 }
 336 
 337 static void mca_rcache_udreg_finalize (mca_rcache_base_module_t *rcache)
 338 {
 339     mca_rcache_udreg_module_t *rcache_udreg = (mca_rcache_udreg_module_t*)rcache;
 340 
 341     /* Statistic */
 342     if (true == mca_rcache_udreg_component.print_stats) {
 343         uint64_t hit = 0, miss = 0, evicted = 0;
 344 
 345         (void) UDREG_GetStat (rcache_udreg->udreg_handle,
 346                               UDREG_STAT_CACHE_HIT, &hit);
 347 
 348         (void) UDREG_GetStat (rcache_udreg->udreg_handle,
 349                               UDREG_STAT_CACHE_MISS, &miss);
 350 
 351         (void) UDREG_GetStat (rcache_udreg->udreg_handle,
 352                               UDREG_STAT_CACHE_EVICTED, &evicted);
 353 
 354         opal_output(0, "%s udreg: stats (hit/miss/evicted): %" PRIu64 "/%" PRIu64 "/%" PRIu64 "\n",
 355                     OPAL_NAME_PRINT(OPAL_PROC_MY_NAME), hit, miss, evicted);
 356     }
 357 
 358     UDREG_CacheRelease (rcache_udreg->udreg_handle);
 359     OBJ_DESTRUCT(&rcache_udreg->reg_list);
 360     OBJ_DESTRUCT(&rcache_udreg->lock);
 361 }

/* [<][>][^][v][top][bottom][index][help] */