root/opal/mca/common/sm/common_sm_mpool.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_common_sm_mpool_module_init
  2. common_sm_mpool_create
  3. mca_common_sm_mpool_base
  4. mca_common_sm_mpool_alloc
  5. mca_common_sm_mpool_free
  6. sm_module_finalize
  7. mca_common_sm_mpool_ft_event
  8. mca_common_sm_mpool_ft_event

   1 /*
   2  * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana
   3  *                         University Research and Technology
   4  *                         Corporation.  All rights reserved.
   5  * Copyright (c) 2004-2005 The University of Tennessee and The University
   6  *                         of Tennessee Research Foundation.  All rights
   7  *                         reserved.
   8  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
   9  *                         University of Stuttgart.  All rights reserved.
  10  * Copyright (c) 2004-2005 The Regents of the University of California.
  11  *                         All rights reserved.
  12  * Copyright (c) 2009-2012 Cisco Systems, Inc.  All rights reserved.
  13  * Copyright (c) 2011-2012 Los Alamos National Security, LLC.
  14  *                         All rights reserved.
  15  * Copyright (c) 2011-2014 NVIDIA Corporation.  All rights reserved.
  16  * Copyright (c) 2015      Intel, Inc. All rights reserved
  17  * Copyright (c) 2018      Amazon.com, Inc. or its affiliates.  All Rights reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 #include "opal_config.h"
  26 #include <string.h>
  27 #include "opal/util/printf.h"
  28 #include "common_sm_mpool.h"
  29 #include "opal/mca/common/sm/common_sm.h"
  30 #include "opal/mca/common/cuda/common_cuda.h"
  31 #include "opal/mca/allocator/base/base.h"
  32 #ifdef HAVE_UNISTD_H
  33 #include <unistd.h>
  34 #endif
  35 #include "opal/mca/hwloc/base/base.h"
  36 
  37 #if OPAL_ENABLE_FT_CR    == 1
  38 #include "orte/mca/sstore/sstore.h"
  39 #include "opal/mca/mpool/base/base.h"
  40 #include "ompi/runtime/ompi_cr.h" /* TODO */
  41 #endif
  42 
  43 static void sm_module_finalize(mca_mpool_base_module_t* module);
  44 
  45 /*
  46  *  Returns base address of shared memory mapping.
  47  */
  48 static void *mca_common_sm_mpool_base (mca_mpool_base_module_t *mpool);
  49 
  50 /**
  51   *  Allocate block of shared memory.
  52   */
  53 static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t *mpool,
  54                                         size_t size, size_t align,
  55                                         uint32_t flags);
  56 
  57 /**
  58   * free function typedef
  59   */
  60 static void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool,
  61                                      void *addr);
  62 
  63 /**
  64  * Fault Tolerance Event Notification Function
  65  * @param state Checkpoint Stae
  66  * @return OPAL_SUCCESS or failure status
  67  */
  68 static int mca_common_sm_mpool_ft_event (int state);
  69 
  70 
  71 /*
  72  *  Initializes the mpool module.
  73  */
  74 static void mca_common_sm_mpool_module_init(mca_common_sm_mpool_module_t* mpool)
  75 {
  76     mpool->super.mpool_base = mca_common_sm_mpool_base;
  77     mpool->super.mpool_alloc = mca_common_sm_mpool_alloc;
  78     mpool->super.mpool_free = mca_common_sm_mpool_free;
  79     mpool->super.mpool_finalize = sm_module_finalize;
  80     mpool->super.mpool_ft_event = mca_common_sm_mpool_ft_event;
  81     mpool->super.flags = 0;
  82 
  83     mpool->sm_size = 0;
  84     mpool->sm_allocator = NULL;
  85     mpool->sm_mmap = NULL;
  86     mpool->sm_common_module = NULL;
  87     mpool->mem_node = -1;
  88 }
  89 
  90 mca_mpool_base_module_t *common_sm_mpool_create (mca_common_sm_mpool_resources_t *resources)
  91 {
  92     mca_common_sm_mpool_module_t *mpool_module;
  93     mca_allocator_base_component_t* allocator_component;
  94 
  95     /* Make a new mpool module */
  96     mpool_module = (mca_common_sm_mpool_module_t *) malloc (sizeof (*mpool_module));
  97     mca_common_sm_mpool_module_init(mpool_module);
  98 
  99     /* set sm_size */
 100     mpool_module->sm_size = resources->size;
 101 
 102     allocator_component = mca_allocator_component_lookup(resources->allocator);
 103 
 104     /* if specified allocator cannot be loaded - look for an alternative */
 105     if (NULL == allocator_component) {
 106         if (opal_list_get_size(&opal_allocator_base_framework.framework_components) == 0) {
 107             mca_base_component_list_item_t *item =
 108                 (mca_base_component_list_item_t *)
 109                 opal_list_get_first(&opal_allocator_base_framework.framework_components);
 110             allocator_component =
 111                 (mca_allocator_base_component_t *)item->cli_component;
 112             opal_output(
 113                 0, "mca_common_sm_mpool_init: "
 114                 "unable to locate allocator: %s - using %s\n",
 115                 resources->allocator,
 116                 allocator_component->allocator_version.mca_component_name);
 117         } else {
 118             opal_output(0, "mca_common_sm_mpool_init: "
 119                         "unable to locate allocator: %s\n",
 120                         resources->allocator);
 121             free(mpool_module);
 122             return NULL;
 123         }
 124     }
 125 
 126     mpool_module->mem_node = resources->mem_node;
 127 
 128     if (NULL == (mpool_module->sm_common_module =
 129         mca_common_sm_module_attach(&resources->bs_meta_buf,
 130                                     sizeof(mca_common_sm_module_t), 8))) {
 131         opal_output(0, "mca_common_sm_mpool_init: "
 132                     "unable to create shared memory mapping (%s)",
 133                     resources->bs_meta_buf.seg_name);
 134         free(mpool_module);
 135         return NULL;
 136     }
 137 
 138     /* setup allocator */
 139     mpool_module->sm_allocator =
 140       allocator_component->allocator_init (true, mca_common_sm_seg_alloc,
 141                                            NULL, mpool_module->sm_common_module);
 142     if (NULL == mpool_module->sm_allocator) {
 143         opal_output(0, "mca_common_sm_mpool_init: unable to initialize allocator");
 144         free(mpool_module);
 145         return NULL;
 146     }
 147 
 148     return &mpool_module->super;
 149 }
 150 
 151 
 152 /*
 153  * base address of shared memory mapping
 154  */
 155 static void *mca_common_sm_mpool_base(mca_mpool_base_module_t *mpool)
 156 {
 157     mca_common_sm_mpool_module_t *sm_mpool = (mca_common_sm_mpool_module_t *) mpool;
 158     return (NULL != sm_mpool->sm_common_module) ?
 159         sm_mpool->sm_common_module->module_seg_addr : NULL;
 160 }
 161 
 162 /**
 163   * allocate function
 164   */
 165 static void *mca_common_sm_mpool_alloc (mca_mpool_base_module_t* mpool,
 166                                         size_t size, size_t align, uint32_t flags)
 167 {
 168     mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool;
 169     opal_hwloc_base_memory_segment_t mseg;
 170 
 171     mseg.mbs_start_addr =
 172         mpool_sm->sm_allocator->alc_alloc(mpool_sm->sm_allocator, size, align);
 173 
 174     if (mpool_sm->mem_node >= 0) {
 175         mseg.mbs_len = size;
 176         opal_hwloc_base_membind(&mseg, 1, mpool_sm->mem_node);
 177     }
 178 
 179     return mseg.mbs_start_addr;
 180 }
 181 
 182 /**
 183   * free function
 184   */
 185 void mca_common_sm_mpool_free(mca_mpool_base_module_t *mpool, void *addr)
 186 {
 187     mca_common_sm_mpool_module_t* mpool_sm = (mca_common_sm_mpool_module_t*)mpool;
 188     mpool_sm->sm_allocator->alc_free(mpool_sm->sm_allocator, addr);
 189 }
 190 
 191 static void sm_module_finalize(mca_mpool_base_module_t* module)
 192 {
 193     mca_common_sm_mpool_module_t *sm_module = (mca_common_sm_mpool_module_t*) module;
 194 
 195     if (NULL != sm_module->sm_common_module) {
 196         if (OPAL_SUCCESS ==
 197             mca_common_sm_fini(sm_module->sm_common_module)) {
 198 #if OPAL_ENABLE_FT_CR == 1
 199             /* Only unlink the file if we are *not* restarting.  If we
 200                are restarting the file will be unlinked at a later
 201                time. */
 202             if (OPAL_CR_STATUS_RESTART_PRE  != opal_cr_checkpointing_state &&
 203                 OPAL_CR_STATUS_RESTART_POST != opal_cr_checkpointing_state ) {
 204                 unlink(sm_module->sm_common_module->shmem_ds.seg_name);
 205             }
 206 #else
 207             unlink(sm_module->sm_common_module->shmem_ds.seg_name);
 208 #endif
 209         }
 210         OBJ_RELEASE(sm_module->sm_common_module);
 211         sm_module->sm_common_module = NULL;
 212     }
 213 }
 214 
 215 #if OPAL_ENABLE_FT_CR    == 0
 216 int mca_common_sm_mpool_ft_event(int state) {
 217     return OPAL_SUCCESS;
 218 }
 219 #else
 220 int mca_common_sm_mpool_ft_event(int state) {
 221     mca_mpool_base_module_t *self_module = NULL;
 222     mca_common_sm_mpool_module_t   *self_sm_module = NULL;
 223     char * file_name = NULL;
 224 
 225     if(OPAL_CRS_CHECKPOINT == state) {
 226         /* Record the shared memory filename */
 227         opal_asprintf( &file_name, "%s"OPAL_PATH_SEP"shared_mem_pool.%s",
 228                   opal_process_info.job_session_dir,
 229                   opal_proc_local_get()->proc_hostname );
 230         /* Disabled to get FT code compiled again
 231          * TODO: FIXIT soon
 232         orte_sstore.set_attr(orte_sstore_handle_current, SSTORE_METADATA_LOCAL_TOUCH, file_name);
 233          */
 234         free(file_name);
 235         file_name = NULL;
 236     }
 237     else if(OPAL_CRS_CONTINUE == state) {
 238         if (opal_cr_continue_like_restart) {
 239             /* Find the sm module */
 240             self_module = mca_mpool_base_module_lookup("sm");
 241             self_sm_module = (mca_common_sm_mpool_module_t*) self_module;
 242 
 243             /* Mark the old sm file for eventual removal via CRS */
 244             if (NULL != self_sm_module->sm_common_module) {
 245                 opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false);
 246             }
 247 
 248             /* Remove self from the list of all modules */
 249             mca_mpool_base_module_destroy(self_module);
 250         }
 251     }
 252     else if(OPAL_CRS_RESTART == state ||
 253             OPAL_CRS_RESTART_PRE == state) {
 254         /* Find the sm module */
 255         self_module = mca_mpool_base_module_lookup("sm");
 256         self_sm_module = (mca_common_sm_mpool_module_t*) self_module;
 257 
 258         /* Mark the old sm file for eventual removal via CRS */
 259         if (NULL != self_sm_module->sm_common_module) {
 260             opal_crs_base_cleanup_append(self_sm_module->sm_common_module->shmem_ds.seg_name, false);
 261         }
 262 
 263         /* Remove self from the list of all modules */
 264         mca_mpool_base_module_destroy(self_module);
 265     }
 266     else if(OPAL_CRS_TERM == state ) {
 267         ;
 268     }
 269     else {
 270         ;
 271     }
 272 
 273     return OPAL_SUCCESS;
 274 }
 275 #endif /* OPAL_ENABLE_FT_CR */

/* [<][>][^][v][top][bottom][index][help] */