root/oshmem/mca/sshmem/ucx/sshmem_ucx_module.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. module_init
  2. module_finalize
  3. segment_create_internal
  4. alloc_device_mem
  5. segment_create
  6. segment_attach
  7. segment_detach
  8. segment_unlink
  9. sshmem_ucx_memheap_index2ptr
  10. sshmem_ucx_memheap_ptr2index
  11. sshmem_ucx_memheap_wordcopy
  12. sshmem_ucx_memheap_realloc
  13. sshmem_ucx_memheap_free

   1 /*
   2  * Copyright (c) 2017      Mellanox Technologies, Inc.
   3  *                         All rights reserved.
   4  * Copyright (c) 2019      Research Organization for Information Science
   5  *                         and Technology (RIST).  All rights reserved.
   6  * $COPYRIGHT$
   7  *
   8  * Additional copyrights may follow
   9  *
  10  * $HEADER$
  11  */
  12 
  13 #include "oshmem_config.h"
  14 
  15 #include "opal/constants.h"
  16 #include "opal/util/output.h"
  17 #include "opal/util/path.h"
  18 #include "opal/util/show_help.h"
  19 
  20 #include "oshmem/proc/proc.h"
  21 #include "oshmem/mca/sshmem/sshmem.h"
  22 #include "oshmem/include/shmemx.h"
  23 #include "oshmem/mca/sshmem/base/base.h"
  24 #include "oshmem/util/oshmem_util.h"
  25 #include "oshmem/mca/spml/ucx/spml_ucx.h"
  26 
  27 #include "sshmem_ucx.h"
  28 
  29 //#include <ucs/sys/math.h>
  30 
  31 #if HAVE_UCX_DEVICE_MEM
  32 #include <ucp/core/ucp_resource.h>
  33 #include <uct/ib/base/ib_alloc.h>
  34 #endif
  35 
  36 #define ALLOC_ELEM_SIZE sizeof(uint64_t)
  37 #define min(a,b) ((a) < (b) ? (a) : (b))
  38 #define max(a,b) ((a) > (b) ? (a) : (b))
  39 
  40 /* ////////////////////////////////////////////////////////////////////////// */
  41 /*local functions */
  42 /* local functions */
  43 static int
  44 module_init(void);
  45 
  46 static int
  47 segment_create(map_segment_t *ds_buf,
  48                const char *file_name,
  49                size_t size, long hint);
  50 
  51 static void *
  52 segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
  53 
  54 static int
  55 segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey);
  56 
  57 static int
  58 segment_unlink(map_segment_t *ds_buf);
  59 
  60 static int
  61 module_finalize(void);
  62 
  63 static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
  64                                       void* old_ptr, void** new_ptr);
  65 
  66 static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr);
  67 
  68 /*
  69  * ucx shmem module
  70  */
  71 mca_sshmem_ucx_module_t mca_sshmem_ucx_module = {
  72     /* super */
  73     {
  74         module_init,
  75         segment_create,
  76         segment_attach,
  77         segment_detach,
  78         segment_unlink,
  79         module_finalize
  80     }
  81 };
  82 
  83 static int
  84 module_init(void)
  85 {
  86     /* nothing to do */
  87     return OSHMEM_SUCCESS;
  88 }
  89 
  90 /* ////////////////////////////////////////////////////////////////////////// */
  91 static int
  92 module_finalize(void)
  93 {
  94     /* nothing to do */
  95     return OSHMEM_SUCCESS;
  96 }
  97 
  98 /* ////////////////////////////////////////////////////////////////////////// */
  99 
 100 static segment_allocator_t sshmem_ucx_allocator = {
 101     .realloc = sshmem_ucx_memheap_realloc,
 102     .free    = sshmem_ucx_memheap_free
 103 };
 104 
 105 static int
 106 segment_create_internal(map_segment_t *ds_buf, void *address, size_t size,
 107                         unsigned flags, long hint, void *dev_mem)
 108 {
 109     mca_sshmem_ucx_segment_context_t *ctx;
 110     int rc = OSHMEM_SUCCESS;
 111     mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
 112     ucp_mem_map_params_t mem_map_params;
 113     ucp_mem_h mem_h;
 114     ucs_status_t status;
 115 
 116     assert(ds_buf);
 117 
 118     /* init the contents of map_segment_t */
 119     shmem_ds_reset(ds_buf);
 120 
 121     mem_map_params.field_mask = UCP_MEM_MAP_PARAM_FIELD_ADDRESS |
 122                                 UCP_MEM_MAP_PARAM_FIELD_LENGTH |
 123                                 UCP_MEM_MAP_PARAM_FIELD_FLAGS;
 124 
 125     mem_map_params.address    = address;
 126     mem_map_params.length     = size;
 127     mem_map_params.flags      = flags;
 128 
 129     status = ucp_mem_map(spml->ucp_context, &mem_map_params, &mem_h);
 130     if (UCS_OK != status) {
 131         SSHMEM_ERROR("ucp_mem_map() failed: %s\n", ucs_status_string(status));
 132         rc = OSHMEM_ERROR;
 133         goto out;
 134     }
 135 
 136     if (!(flags & UCP_MEM_MAP_FIXED)) {
 137         /* Memory was allocated at an arbitrary address; obtain it */
 138         ucp_mem_attr_t mem_attr;
 139         mem_attr.field_mask = UCP_MEM_ATTR_FIELD_ADDRESS;
 140         status = ucp_mem_query(mem_h, &mem_attr);
 141         if (status != UCS_OK) {
 142             SSHMEM_ERROR("ucp_mem_query() failed: %s\n", ucs_status_string(status));
 143             ucp_mem_unmap(spml->ucp_context, mem_h);
 144             rc = OSHMEM_ERROR;
 145             goto out;
 146         }
 147 
 148         ds_buf->super.va_base = mem_attr.address;
 149     } else {
 150         ds_buf->super.va_base = mem_map_params.address;
 151     }
 152 
 153     ctx = calloc(1, sizeof(*ctx));
 154     if (!ctx) {
 155         ucp_mem_unmap(spml->ucp_context, mem_h);
 156         rc = OSHMEM_ERR_OUT_OF_RESOURCE;
 157         goto out;
 158     }
 159 
 160     ds_buf->seg_size      = size;
 161     ds_buf->super.va_end  = (void*)((uintptr_t)ds_buf->super.va_base + ds_buf->seg_size);
 162     ds_buf->context       = ctx;
 163     ds_buf->type          = MAP_SEGMENT_ALLOC_UCX;
 164     ds_buf->alloc_hints   = hint;
 165     ctx->ucp_memh         = mem_h;
 166     ctx->dev_mem          = dev_mem;
 167     if (hint) {
 168         ds_buf->allocator = &sshmem_ucx_allocator;
 169     }
 170 
 171 out:
 172     OPAL_OUTPUT_VERBOSE(
 173           (70, oshmem_sshmem_base_framework.framework_output,
 174            "%s: %s: create %s "
 175            "(id: %d, addr: %p size: %lu)\n",
 176            mca_sshmem_ucx_component.super.base_version.mca_type_name,
 177            mca_sshmem_ucx_component.super.base_version.mca_component_name,
 178            (rc ? "failure" : "successful"),
 179            ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
 180       );
 181     return rc;
 182 }
 183 
 184 #if HAVE_UCX_DEVICE_MEM
 185 static uct_ib_device_mem_h alloc_device_mem(mca_spml_ucx_t *spml, size_t size,
 186                                             void **address_p)
 187 {
 188     uct_ib_device_mem_h dev_mem = NULL;
 189     ucs_status_t status;
 190     uct_md_h uct_md;
 191     void *address;
 192     size_t length;
 193 
 194     uct_md = ucp_context_find_tl_md(spml->ucp_context, "mlx5");
 195     if (uct_md == NULL) {
 196         SSHMEM_VERBOSE(1, "ucp_context_find_tl_md() returned NULL\n");
 197         return NULL;
 198     }
 199 
 200     /* If found a matching memory domain, allocate device memory on it */
 201     length  = size;
 202     address = NULL;
 203     status = uct_ib_md_alloc_device_mem(uct_md, &length, &address,
 204                                         UCT_MD_MEM_ACCESS_ALL, "sshmem_seg",
 205                                         &dev_mem);
 206     if (status != UCS_OK) {
 207         /* If could not allocate device memory - fallback to mmap (since some
 208          * PEs in the job may succeed and while others failed */
 209         SSHMEM_VERBOSE(1, "uct_ib_md_alloc_dm() failed: %s\n",
 210                        ucs_status_string(status));
 211         return NULL;
 212     }
 213 
 214     SSHMEM_VERBOSE(3, "uct_ib_md_alloc_dm() returned address %p\n", address);
 215     *address_p = address;
 216     return dev_mem;
 217 }
 218 #endif
 219 
 220 static int
 221 segment_create(map_segment_t *ds_buf,
 222                const char *file_name,
 223                size_t size, long hint)
 224 {
 225     mca_spml_ucx_t *spml = (mca_spml_ucx_t*)mca_spml.self;
 226     unsigned flags;
 227 
 228 #if HAVE_UCX_DEVICE_MEM
 229     if (hint & SHMEM_HINT_DEVICE_NIC_MEM) {
 230         if (size > UINT_MAX) {
 231             return OSHMEM_ERR_BAD_PARAM;
 232         }
 233 
 234         void *dev_mem_address;
 235         uct_ib_device_mem_h dev_mem = alloc_device_mem(spml, size,
 236                                                        &dev_mem_address);
 237         if (dev_mem != NULL) {
 238             ret = segment_create_internal(ds_buf, dev_mem_address, size, 0,
 239                                           hint, dev_mem);
 240             if (ret == OSHMEM_SUCCESS) {
 241                 return OSHMEM_SUCCESS;
 242             } else if (dev_mem != NULL) {
 243                 uct_ib_md_release_device_mem(dev_mem);
 244                 /* fallback to regular allocation */
 245             }
 246         }
 247     }
 248 #endif
 249 
 250     flags = UCP_MEM_MAP_ALLOCATE | (spml->heap_reg_nb ? UCP_MEM_MAP_NONBLOCK : 0);
 251     if (hint) {
 252         return segment_create_internal(ds_buf, NULL, size, flags, hint, NULL);
 253     } else {
 254         return segment_create_internal(ds_buf, mca_sshmem_base_start_address,
 255                                        size, flags | UCP_MEM_MAP_FIXED, hint,
 256                                        NULL);
 257     }
 258 }
 259 
 260 static void *
 261 segment_attach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
 262 {
 263     assert(ds_buf);
 264     assert(mkey->va_base == 0);
 265 
 266     OPAL_OUTPUT((oshmem_sshmem_base_framework.framework_output,
 267                 "can not attach to ucx segment"));
 268     oshmem_shmem_abort(-1);
 269     return NULL;
 270 }
 271 
 272 static int
 273 segment_detach(map_segment_t *ds_buf, sshmem_mkey_t *mkey)
 274 {
 275     OPAL_OUTPUT_VERBOSE(
 276         (70, oshmem_sshmem_base_framework.framework_output,
 277          "%s: %s: detaching "
 278             "(id: %d, addr: %p size: %lu)\n",
 279             mca_sshmem_ucx_component.super.base_version.mca_type_name,
 280             mca_sshmem_ucx_component.super.base_version.mca_component_name,
 281             ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
 282     );
 283 
 284     /* reset the contents of the map_segment_t associated with this
 285      * shared memory segment.
 286      */
 287     shmem_ds_reset(ds_buf);
 288 
 289     return OSHMEM_SUCCESS;
 290 }
 291 
 292 static int
 293 segment_unlink(map_segment_t *ds_buf)
 294 {
 295     mca_spml_ucx_t *spml = (mca_spml_ucx_t *)mca_spml.self;
 296     mca_sshmem_ucx_segment_context_t *ctx = ds_buf->context;
 297 
 298     if (ctx->shadow_allocator) {
 299         sshmem_ucx_shadow_destroy(ctx->shadow_allocator);
 300     }
 301 
 302     ucp_mem_unmap(spml->ucp_context, ctx->ucp_memh);
 303 
 304 #if HAVE_UCX_DEVICE_MEM
 305     if (ctx->dev_mem) {
 306         uct_ib_md_release_device_mem(ctx->dev_mem);
 307     }
 308 #endif
 309 
 310     ds_buf->context = NULL;
 311     free(ctx);
 312 
 313     OPAL_OUTPUT_VERBOSE(
 314         (70, oshmem_sshmem_base_framework.framework_output,
 315          "%s: %s: unlinking "
 316             "(id: %d, addr: %p size: %lu)\n",
 317             mca_sshmem_ucx_component.super.base_version.mca_type_name,
 318             mca_sshmem_ucx_component.super.base_version.mca_component_name,
 319             ds_buf->seg_id, ds_buf->super.va_base, (unsigned long)ds_buf->seg_size)
 320     );
 321 
 322     ds_buf->seg_id = MAP_SEGMENT_SHM_INVALID;
 323     MAP_SEGMENT_INVALIDATE(ds_buf);
 324 
 325     return OSHMEM_SUCCESS;
 326 }
 327 
 328 static void *sshmem_ucx_memheap_index2ptr(map_segment_t *s, unsigned index)
 329 {
 330     return (char*)s->super.va_base + (index * ALLOC_ELEM_SIZE);
 331 }
 332 
 333 static unsigned sshmem_ucx_memheap_ptr2index(map_segment_t *s, void *ptr)
 334 {
 335     return ((char*)ptr - (char*)s->super.va_base) / ALLOC_ELEM_SIZE;
 336 }
 337 
 338 static void sshmem_ucx_memheap_wordcopy(void *dst, void *src, size_t size)
 339 {
 340     const size_t count = (size + sizeof(uint64_t) - 1) / sizeof(uint64_t);
 341     uint64_t *dst64 = (uint64_t*)dst;
 342     uint64_t *src64 = (uint64_t*)src;
 343     size_t i;
 344 
 345     for (i = 0; i < count; ++i) {
 346         *(dst64++) = *(src64++);
 347     }
 348     opal_atomic_wmb();
 349 }
 350 
 351 static int sshmem_ucx_memheap_realloc(map_segment_t *s, size_t size,
 352                                       void* old_ptr, void** new_ptr)
 353 {
 354     mca_sshmem_ucx_segment_context_t *ctx = s->context;
 355     unsigned alloc_count, index, old_index, old_alloc_count;
 356     int res;
 357     int inplace;
 358 
 359     if (size > s->seg_size) {
 360         return OSHMEM_ERR_OUT_OF_RESOURCE;
 361     }
 362 
 363     /* create allocator on demand */
 364     if (!ctx->shadow_allocator) {
 365         ctx->shadow_allocator = sshmem_ucx_shadow_create(s->seg_size);
 366         if (!ctx->shadow_allocator) {
 367             return OSHMEM_ERR_OUT_OF_RESOURCE;
 368         }
 369     }
 370 
 371     /* Allocate new element. Zero-size allocation should still return a unique
 372      * pointer, so allocate 1 byte */
 373     alloc_count = max((size + ALLOC_ELEM_SIZE - 1) / ALLOC_ELEM_SIZE, 1);
 374 
 375     if (!old_ptr) {
 376         res = sshmem_ucx_shadow_alloc(ctx->shadow_allocator, alloc_count, &index);
 377     } else {
 378         old_index = sshmem_ucx_memheap_ptr2index(s, old_ptr);
 379         res       = sshmem_ucx_shadow_realloc(ctx->shadow_allocator, alloc_count,
 380                                               old_index, &index, &inplace);
 381     }
 382 
 383     if (res != OSHMEM_SUCCESS) {
 384         return res;
 385     }
 386 
 387     *new_ptr = sshmem_ucx_memheap_index2ptr(s, index);
 388 
 389     /* Copy to new segment and release old*/
 390     if (old_ptr && !inplace) {
 391         old_alloc_count = sshmem_ucx_shadow_size(ctx->shadow_allocator, old_index);
 392         sshmem_ucx_memheap_wordcopy(*new_ptr, old_ptr,
 393                                     min(size, old_alloc_count * ALLOC_ELEM_SIZE));
 394         sshmem_ucx_shadow_free(ctx->shadow_allocator, old_index);
 395     }
 396 
 397     return OSHMEM_SUCCESS;
 398 }
 399 
 400 static int sshmem_ucx_memheap_free(map_segment_t *s, void* ptr)
 401 {
 402     mca_sshmem_ucx_segment_context_t *ctx = s->context;
 403 
 404     if (!ptr) {
 405         return OSHMEM_SUCCESS;
 406     }
 407 
 408     return sshmem_ucx_shadow_free(ctx->shadow_allocator,
 409                                   sshmem_ucx_memheap_ptr2index(s, ptr));
 410 }

/* [<][>][^][v][top][bottom][index][help] */