root/opal/mca/mpool/hugepage/mpool_hugepage_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_mpool_hugepage_register
  2. mca_mpool_hugepage_open
  3. mca_mpool_hugepage_close
  4. page_compare
  5. mca_mpool_hugepage_find_hugepages
  6. mca_mpool_hugepage_query

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2013 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006      Voltaire. All rights reserved.
  14  * Copyright (c) 2007-2009 Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2012-2016 Los Alamos National Security, LLC. All rights
  16  *                         reserved.
  17  * Copyright (c) 2016      Intel, Inc. All rights reserved.
  18  * Copyright (c) 2016-2019 Research Organization for Information Science
  19  *                         and Technology (RIST).  All rights reserved.
  20  *
  21  * $COPYRIGHT$
  22  *
  23  * Additional copyrights may follow
  24  *
  25  * $HEADER$
  26  */
  27 
  28 #define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
  29 #include "opal_config.h"
  30 #include "opal/mca/base/base.h"
  31 #include "opal/runtime/opal_params.h"
  32 #include "opal/mca/base/mca_base_pvar.h"
  33 #include "opal/mca/mpool/base/base.h"
  34 #include "opal/mca/allocator/base/base.h"
  35 
  36 #include "opal/util/argv.h"
  37 
  38 #include "mpool_hugepage.h"
  39 
  40 #ifdef HAVE_UNISTD_H
  41 #include <unistd.h>
  42 #endif
  43 #ifdef HAVE_MALLOC_H
  44 #include <malloc.h>
  45 #endif
  46 #ifdef HAVE_SYS_VFS_H
  47 #include <sys/vfs.h>
  48 #endif
  49 #ifdef HAVE_SYS_MOUNT_H
  50 #include <sys/mount.h>
  51 #endif
  52 #ifdef HAVE_SYS_PARAM_H
  53 #include <sys/param.h>
  54 #endif
  55 #ifdef HAVE_SYS_MMAN_H
  56 #include <sys/mman.h>
  57 #endif
  58 #ifdef HAVE_MNTENT_H
  59 #include <mntent.h>
  60 #endif
  61 
  62 #include <fcntl.h>
  63 
  64 /*
  65  * Note that some OS's (e.g., NetBSD and Solaris) have statfs(), but
  66  * no struct statfs (!).  So check to make sure we have struct statfs
  67  * before allowing the use of statfs().
  68  */
  69 #if defined(HAVE_STATFS) && (defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) || \
  70                              defined(HAVE_STRUCT_STATFS_F_TYPE))
  71 #define USE_STATFS 1
  72 #endif
  73 
  74 
  75 /*
  76  * Local functions
  77  */
  78 static int mca_mpool_hugepage_open (void);
  79 static int mca_mpool_hugepage_close (void);
  80 static int mca_mpool_hugepage_register (void);
  81 static int mca_mpool_hugepage_query (const char *hints, int *priority,
  82                                      mca_mpool_base_module_t **module);
  83 static void mca_mpool_hugepage_find_hugepages (void);
  84 
  85 static int mca_mpool_hugepage_priority;
  86 static unsigned long mca_mpool_hugepage_page_size;
  87 
  88 mca_mpool_hugepage_component_t mca_mpool_hugepage_component = {
  89     {
  90         /* First, the mca_base_component_t struct containing meta
  91            information about the component itself */
  92 
  93         .mpool_version ={
  94             MCA_MPOOL_BASE_VERSION_3_0_0,
  95 
  96             .mca_component_name = "hugepage",
  97             MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
  98                                   OPAL_RELEASE_VERSION),
  99             .mca_open_component = mca_mpool_hugepage_open,
 100             .mca_close_component = mca_mpool_hugepage_close,
 101             .mca_register_component_params = mca_mpool_hugepage_register,
 102         },
 103         .mpool_data = {
 104             /* The component is checkpoint ready */
 105             MCA_BASE_METADATA_PARAM_CHECKPOINT
 106         },
 107 
 108         .mpool_query = mca_mpool_hugepage_query,
 109     },
 110 };
 111 
 112 /**
 113   * component open/close/init function
 114   */
 115 
 116 static int mca_mpool_hugepage_register(void)
 117 {
 118     mca_mpool_hugepage_priority = 50;
 119     (void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
 120                                             "priority", "Default priority of the hugepage mpool component "
 121                                             "(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 122                                             OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
 123                                             &mca_mpool_hugepage_priority);
 124 
 125     mca_mpool_hugepage_page_size = 1 << 21;
 126     (void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
 127                                             "page_size", "Default huge page size of the hugepage mpool component "
 128                                             "(default: 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 129                                             OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
 130                                             &mca_mpool_hugepage_page_size);
 131 
 132     mca_mpool_hugepage_component.bytes_allocated = 0;
 133     (void) mca_base_component_pvar_register (&mca_mpool_hugepage_component.super.mpool_version,
 134                                              "bytes_allocated", "Number of bytes currently allocated in the mpool "
 135                                              "hugepage component", OPAL_INFO_LVL_3, MCA_BASE_PVAR_CLASS_SIZE,
 136                                              MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
 137                                              MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
 138                                              NULL, NULL, NULL, (void *) &mca_mpool_hugepage_component.bytes_allocated);
 139 
 140     return OPAL_SUCCESS;
 141 }
 142 
 143 static int mca_mpool_hugepage_open (void)
 144 {
 145     mca_mpool_hugepage_module_t *hugepage_module;
 146     mca_mpool_hugepage_hugepage_t *hp;
 147     int module_index, rc;
 148 
 149     OBJ_CONSTRUCT(&mca_mpool_hugepage_component.huge_pages, opal_list_t);
 150     mca_mpool_hugepage_find_hugepages ();
 151 
 152     if (0 == opal_list_get_size (&mca_mpool_hugepage_component.huge_pages)) {
 153         return OPAL_SUCCESS;
 154     }
 155 
 156     mca_mpool_hugepage_component.modules = (mca_mpool_hugepage_module_t *)
 157         calloc (opal_list_get_size (&mca_mpool_hugepage_component.huge_pages),
 158                 sizeof (mca_mpool_hugepage_module_t));
 159     if (NULL == mca_mpool_hugepage_component.modules) {
 160         return OPAL_ERR_OUT_OF_RESOURCE;
 161     }
 162 
 163     module_index = 0;
 164     OPAL_LIST_FOREACH(hp, &mca_mpool_hugepage_component.huge_pages, mca_mpool_hugepage_hugepage_t) {
 165         hugepage_module = mca_mpool_hugepage_component.modules + module_index;
 166         rc = mca_mpool_hugepage_module_init (hugepage_module, hp);
 167         if (OPAL_SUCCESS != rc) {
 168             continue;
 169         }
 170         module_index++;
 171     }
 172 
 173     mca_mpool_hugepage_component.module_count = module_index;
 174 
 175     return OPAL_SUCCESS;
 176 }
 177 
 178 static int mca_mpool_hugepage_close (void)
 179 {
 180     OPAL_LIST_DESTRUCT(&mca_mpool_hugepage_component.huge_pages);
 181 
 182     for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
 183         mca_mpool_hugepage_module_t *module =  mca_mpool_hugepage_component.modules + i;
 184         module->super.mpool_finalize (&module->super);
 185     }
 186 
 187     free (mca_mpool_hugepage_component.modules);
 188     mca_mpool_hugepage_component.modules = NULL;
 189 
 190     return OPAL_SUCCESS;
 191 }
 192 
 193 #ifdef HAVE_MNTENT_H
 194 static int page_compare (opal_list_item_t **a, opal_list_item_t **b) {
 195     mca_mpool_hugepage_hugepage_t *pagea = (mca_mpool_hugepage_hugepage_t *) *a;
 196     mca_mpool_hugepage_hugepage_t *pageb = (mca_mpool_hugepage_hugepage_t *) *b;
 197     if (pagea->page_size > pageb->page_size) {
 198         return 1;
 199     } else if (pagea->page_size < pageb->page_size) {
 200         return -1;
 201     }
 202 
 203     return 0;
 204 }
 205 #endif
 206 
 207 static void mca_mpool_hugepage_find_hugepages (void) {
 208 #ifdef HAVE_MNTENT_H
 209     mca_mpool_hugepage_hugepage_t *hp;
 210     FILE *fh;
 211     struct mntent *mntent;
 212     char *opts, *tok, *ctx;
 213 
 214     fh = setmntent ("/proc/mounts", "r");
 215     if (NULL == fh) {
 216         return;
 217     }
 218 
 219     while (NULL != (mntent = getmntent(fh))) {
 220         unsigned long page_size = 0;
 221 
 222         if (0 != strcmp(mntent->mnt_type, "hugetlbfs")) {
 223             continue;
 224         }
 225 
 226         opts = strdup(mntent->mnt_opts);
 227         if (NULL == opts) {
 228             break;
 229         }
 230 
 231         tok = strtok_r (opts, ",", &ctx);
 232 
 233         do {
 234             if (0 == strncmp (tok, "pagesize", 8)) {
 235                 break;
 236             }
 237             tok = strtok_r (NULL, ",", &ctx);
 238         } while (tok);
 239 
 240         if (!tok) {
 241 #if defined(USE_STATFS)
 242             struct statfs info;
 243 
 244             statfs (mntent->mnt_dir, &info);
 245 #elif defined(HAVE_STATVFS)
 246             struct statvfs info;
 247             statvfs (mntent->mnt_dir, &info);
 248 #endif
 249             page_size = info.f_bsize;
 250         } else {
 251             (void) sscanf (tok, "pagesize=%lu", &page_size);
 252         }
 253         free(opts);
 254 
 255         if (0 == page_size) {
 256             /* could not get page size */
 257             continue;
 258         }
 259 
 260         hp = OBJ_NEW(mca_mpool_hugepage_hugepage_t);
 261         if (NULL == hp) {
 262             break;
 263         }
 264 
 265         hp->path = strdup (mntent->mnt_dir);
 266         hp->page_size = page_size;
 267         
 268         if(0 == access (hp->path, R_OK | W_OK)){        
 269             opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
 270                                  "found huge page with size = %lu, path = %s, mmap flags = 0x%x, adding to list",
 271                                  hp->page_size, hp->path, hp->mmap_flags);
 272             opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super);
 273         } else {
 274             opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
 275                                  "found huge page with size = %lu, path = %s, mmap flags = 0x%x, with invalid " 
 276                                  "permissions, skipping", hp->page_size, hp->path, hp->mmap_flags);
 277             OBJ_RELEASE(hp);
 278         }        
 279     }
 280 
 281     opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare);
 282 
 283     endmntent (fh);
 284 #endif
 285 }
 286 
 287 static int mca_mpool_hugepage_query (const char *hints, int *priority_out,
 288                                      mca_mpool_base_module_t **module)
 289 {
 290     unsigned long page_size = 0;
 291     char **hints_array;
 292     int my_priority = mca_mpool_hugepage_priority;
 293     char *tmp;
 294     bool found = false;
 295 
 296     if (0 == mca_mpool_hugepage_component.module_count) {
 297         return OPAL_ERR_NOT_AVAILABLE;
 298     }
 299 
 300     if (hints) {
 301         hints_array = opal_argv_split (hints, ',');
 302         if (NULL == hints_array) {
 303             return OPAL_ERR_OUT_OF_RESOURCE;
 304         }
 305 
 306         for (int i = 0 ; hints_array[i] ; ++i) {
 307             char *key = hints_array[i];
 308             char *value = NULL;
 309 
 310             if (NULL != (tmp = strchr (key, '='))) {
 311                 value = tmp + 1;
 312                 *tmp = '\0';
 313             }
 314 
 315             if (0 == strcasecmp ("mpool", key)) {
 316                 if (value && 0 == strcasecmp ("hugepage", value)) {
 317                     /* this mpool was requested by name */
 318                     my_priority = 100;
 319                     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
 320                                          "hugepage mpool matches hint: %s=%s", key, value);
 321                 } else {
 322                     /* different mpool requested */
 323                     my_priority = 0;
 324                     opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
 325                                          "hugepage mpool does not match hint: %s=%s", key, value);
 326                     opal_argv_free (hints_array);
 327                     return OPAL_ERR_NOT_FOUND;
 328                 }
 329             }
 330 
 331             if (0 == strcasecmp ("page_size", key) && value) {
 332                 page_size = strtoul (value, &tmp, 0);
 333                 if (*tmp) {
 334                     switch (*tmp) {
 335                     case 'g':
 336                     case 'G':
 337                         page_size *= 1024;
 338                         /* fall through */
 339                     case 'm':
 340                     case 'M':
 341                         page_size *= 1024;
 342                         /* fall through */
 343                     case 'k':
 344                     case 'K':
 345                         page_size *= 1024;
 346                         break;
 347                     default:
 348                         page_size = -1;
 349                     }
 350                 }
 351                 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
 352                                      "hugepage mpool requested page size: %lu", page_size);
 353             }
 354         }
 355 
 356         opal_argv_free (hints_array);
 357     }
 358 
 359     if (0 == page_size) {
 360         /* use default huge page size */
 361         page_size = mca_mpool_hugepage_page_size;
 362         if (my_priority < 100) {
 363             /* take a priority hit if this mpool was not asked for by name */
 364             my_priority = 0;
 365         }
 366         opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
 367                              "hugepage mpool did not match any hints: %s", hints);
 368     }
 369 
 370     for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
 371         mca_mpool_hugepage_module_t *hugepage_module = mca_mpool_hugepage_component.modules + i;
 372 
 373         if (hugepage_module->huge_page->page_size != page_size) {
 374             continue;
 375         }
 376 
 377         my_priority = (my_priority < 80) ? my_priority + 20 : 100;
 378 
 379         if (module) {
 380             *module = &hugepage_module->super;
 381         }
 382 
 383         opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
 384                              "matches page size hint. page size: %lu, path: %s, mmap flags: "
 385                              "0x%x", page_size, hugepage_module->huge_page->path,
 386                              hugepage_module->huge_page->mmap_flags);
 387         found = true;
 388         break;
 389     }
 390 
 391     if (!found) {
 392         opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
 393                              "could not find page matching page request: %lu", page_size);
 394         return OPAL_ERR_NOT_FOUND;
 395     }
 396 
 397     if (priority_out) {
 398         *priority_out = my_priority;
 399     }
 400 
 401     return OPAL_SUCCESS;
 402 }

/* [<][>][^][v][top][bottom][index][help] */