root/opal/mca/btl/vader/btl_vader_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_vader_component_register
  2. mca_btl_vader_component_open
  3. mca_btl_vader_component_close
  4. mca_btl_base_vader_modex_send
  5. mca_btl_vader_select_next_single_copy_mechanism
  6. mca_btl_vader_check_single_copy
  7. mca_btl_vader_component_init
  8. mca_btl_vader_poll_handle_frag
  9. mca_btl_vader_poll_fifo
  10. mca_btl_vader_progress_waiting
  11. mca_btl_vader_progress_endpoints
  12. mca_btl_vader_component_progress

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2011 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2009 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2006-2007 Voltaire. All rights reserved.
  14  * Copyright (c) 2009-2010 Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2010-2018 Los Alamos National Security, LLC.
  16  *                         All rights reserved.
  17  * Copyright (c) 2011      NVIDIA Corporation.  All rights reserved.
  18  * Copyright (c) 2014-2018 Intel, Inc. All rights reserved.
  19  * Copyright (c) 2014-2019 Research Organization for Information Science
  20  *                         and Technology (RIST).  All rights reserved.
  21  * Copyright (c) 2018      Amazon.com, Inc. or its affiliates.  All Rights reserved.
  22  * Copyright (c) 2018      Triad National Security, LLC. All rights
  23  *                         reserved.
  24  * $COPYRIGHT$
  25  *
  26  * Additional copyrights may follow
  27  *
  28  * $HEADER$
  29  */
  30 #include "opal_config.h"
  31 
  32 #include "opal/util/output.h"
  33 #include "opal/util/show_help.h"
  34 #include "opal/util/printf.h"
  35 #include "opal/threads/mutex.h"
  36 #include "opal/mca/btl/base/btl_base_error.h"
  37 
  38 #include "btl_vader.h"
  39 #include "btl_vader_frag.h"
  40 #include "btl_vader_fifo.h"
  41 #include "btl_vader_fbox.h"
  42 #include "btl_vader_xpmem.h"
  43 
  44 #include <sys/mman.h>
  45 #include <fcntl.h>
  46 
  47 #ifdef HAVE_SYS_PRCTL_H
  48 #include <sys/prctl.h>
  49 #endif
  50 
  51 /* NTH: OS X does not define MAP_ANONYMOUS */
  52 #if !defined(MAP_ANONYMOUS)
  53 #define MAP_ANONYMOUS MAP_ANON
  54 #endif
  55 
  56 static int mca_btl_vader_component_progress (void);
  57 static int mca_btl_vader_component_open(void);
  58 static int mca_btl_vader_component_close(void);
  59 static int mca_btl_vader_component_register(void);
  60 static mca_btl_base_module_t** mca_btl_vader_component_init(int *num_btls,
  61                                                             bool enable_progress_threads,
  62                                                             bool enable_mpi_threads);
  63 
  64 /* This enumeration is in order of preference */
  65 static mca_base_var_enum_value_t single_copy_mechanisms[] = {
  66 #if OPAL_BTL_VADER_HAVE_XPMEM
  67     {.value = MCA_BTL_VADER_XPMEM, .string = "xpmem"},
  68 #endif
  69 #if OPAL_BTL_VADER_HAVE_CMA
  70     {.value = MCA_BTL_VADER_CMA, .string = "cma"},
  71 #endif
  72 #if OPAL_BTL_VADER_HAVE_KNEM
  73     {.value = MCA_BTL_VADER_KNEM, .string = "knem"},
  74 #endif
  75     {.value = MCA_BTL_VADER_EMUL, .string = "emulated"},
  76     {.value = MCA_BTL_VADER_NONE, .string = "none"},
  77     {.value = 0, .string = NULL}
  78 };
  79 
  80 /*
  81  * Shared Memory (VADER) component instance.
  82  */
  83 mca_btl_vader_component_t mca_btl_vader_component = {
  84     .super = {
  85         /* First, the mca_base_component_t struct containing meta information
  86            about the component itself */
  87         .btl_version = {
  88             MCA_BTL_DEFAULT_VERSION("vader"),
  89             .mca_open_component = mca_btl_vader_component_open,
  90             .mca_close_component = mca_btl_vader_component_close,
  91             .mca_register_component_params = mca_btl_vader_component_register,
  92         },
  93         .btl_data = {
  94             /* The component is checkpoint ready */
  95             .param_field = MCA_BASE_METADATA_PARAM_CHECKPOINT
  96         },
  97 
  98         .btl_init = mca_btl_vader_component_init,
  99         .btl_progress = mca_btl_vader_component_progress,
 100     }  /* end super */
 101 };
 102 
 103 static int mca_btl_vader_component_register (void)
 104 {
 105     mca_base_var_enum_t *new_enum;
 106 
 107     (void) mca_base_var_group_component_register(&mca_btl_vader_component.super.btl_version,
 108                                                  "Enhanced shared memory byte transport later");
 109 
 110     /* register VADER component variables */
 111     mca_btl_vader_component.vader_free_list_num = 8;
 112     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 113                                            "free_list_num", "Initial number of fragments "
 114                                            "to allocate for shared memory communication.",
 115                                            MCA_BASE_VAR_TYPE_INT, NULL, 0,
 116                                            MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
 117                                            MCA_BASE_VAR_SCOPE_LOCAL,
 118                                            &mca_btl_vader_component.vader_free_list_num);
 119     mca_btl_vader_component.vader_free_list_max = 512;
 120     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 121                                            "free_list_max", "Maximum number of fragments "
 122                                            "to allocate for shared memory communication.",
 123                                            MCA_BASE_VAR_TYPE_INT, NULL, 0,
 124                                            MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
 125                                            MCA_BASE_VAR_SCOPE_LOCAL,
 126                                            &mca_btl_vader_component.vader_free_list_max);
 127     mca_btl_vader_component.vader_free_list_inc = 64;
 128     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 129                                            "free_list_inc", "Number of fragments to create "
 130                                            "on each allocation.", MCA_BASE_VAR_TYPE_INT, NULL, 0,
 131                                            MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_9,
 132                                            MCA_BASE_VAR_SCOPE_LOCAL,
 133                                            &mca_btl_vader_component.vader_free_list_inc);
 134 
 135     mca_btl_vader_component.memcpy_limit = 524288;
 136     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 137                                            "memcpy_limit", "Message size to switch from using "
 138                                            "memove to memcpy. The relative speed of these two "
 139                                            "routines can vary by size.", MCA_BASE_VAR_TYPE_INT,
 140                                            NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
 141                                            MCA_BASE_VAR_SCOPE_LOCAL,
 142                                            &mca_btl_vader_component.memcpy_limit);
 143 #if OPAL_BTL_VADER_HAVE_XPMEM
 144     mca_btl_vader_component.log_attach_align = 21;
 145     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 146                                            "log_align", "Log base 2 of the alignment to use for xpmem "
 147                                            "segments (default: 21, minimum: 12, maximum: 25)",
 148                                            MCA_BASE_VAR_TYPE_INT, NULL, 0,
 149                                            MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
 150                                            MCA_BASE_VAR_SCOPE_LOCAL,
 151                                            &mca_btl_vader_component.log_attach_align);
 152 #endif
 153 
 154 #if OPAL_BTL_VADER_HAVE_XPMEM && 64 == MCA_BTL_VADER_BITNESS
 155     mca_btl_vader_component.segment_size = 1 << 24;
 156 #else
 157     mca_btl_vader_component.segment_size = 1 << 22;
 158 #endif
 159     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 160                                            "segment_size", "Maximum size of all shared "
 161 #if OPAL_BTL_VADER_HAVE_XPMEM && 64 == MCA_BTL_VADER_BITNESS
 162                                            "memory buffers (default: 16M)",
 163 #else
 164                                            "memory buffers (default: 4M)",
 165 #endif
 166                                            MCA_BASE_VAR_TYPE_INT, NULL, 0,
 167                                            MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
 168                                            MCA_BASE_VAR_SCOPE_LOCAL,
 169                                            &mca_btl_vader_component.segment_size);
 170 
 171     mca_btl_vader_component.max_inline_send = 256;
 172     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 173                                            "max_inline_send", "Maximum size to transfer "
 174                                            "using copy-in copy-out semantics",
 175                                            MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0,
 176                                            MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
 177                                            MCA_BASE_VAR_SCOPE_LOCAL,
 178                                            &mca_btl_vader_component.max_inline_send);
 179 
 180     mca_btl_vader_component.fbox_threshold = 16;
 181     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 182                                            "fbox_threshold", "Number of sends required "
 183                                            "before an eager send buffer is setup for a peer "
 184                                            "(default: 16)", MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL,
 185                                            0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
 186                                            MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.fbox_threshold);
 187 
 188     mca_btl_vader_component.fbox_max = 32;
 189     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 190                                            "fbox_max", "Maximum number of eager send buffers "
 191                                            "to allocate (default: 32)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
 192                                            NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE, OPAL_INFO_LVL_5,
 193                                            MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.fbox_max);
 194 
 195     mca_btl_vader_component.fbox_size = 4096;
 196     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 197                                            "fbox_size", "Size of per-peer fast transfer buffers (default: 4k)",
 198                                            MCA_BASE_VAR_TYPE_UNSIGNED_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 199                                            OPAL_INFO_LVL_5, MCA_BASE_VAR_SCOPE_LOCAL, &mca_btl_vader_component.fbox_size);
 200 
 201     (void) mca_base_var_enum_create ("btl_vader_single_copy_mechanisms", single_copy_mechanisms, &new_enum);
 202 
 203     /* Default to the best available mechanism (see the enumerator for ordering) */
 204     mca_btl_vader_component.single_copy_mechanism = single_copy_mechanisms[0].value;
 205     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version,
 206                                            "single_copy_mechanism", "Single copy mechanism to use (defaults to best available)",
 207                                            MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 208                                            OPAL_INFO_LVL_3, MCA_BASE_VAR_SCOPE_GROUP, &mca_btl_vader_component.single_copy_mechanism);
 209     OBJ_RELEASE(new_enum);
 210 
 211     if (0 == access ("/dev/shm", W_OK)) {
 212         mca_btl_vader_component.backing_directory = "/dev/shm";
 213     } else {
 214         mca_btl_vader_component.backing_directory = opal_process_info.job_session_dir;
 215     }
 216     (void) mca_base_component_var_register (&mca_btl_vader_component.super.btl_version, "backing_directory",
 217                                             "Directory to place backing files for shared memory communication. "
 218                                             "This directory should be on a local filesystem such as /tmp or "
 219                                             "/dev/shm (default: (linux) /dev/shm, (others) session directory)",
 220                                             MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0, OPAL_INFO_LVL_3,
 221                                             MCA_BASE_VAR_SCOPE_READONLY, &mca_btl_vader_component.backing_directory);
 222 
 223 
 224 #if OPAL_BTL_VADER_HAVE_KNEM
 225     /* Currently disabling DMA mode by default; it's not clear that this is useful in all applications and architectures. */
 226     mca_btl_vader_component.knem_dma_min = 0;
 227     (void) mca_base_component_var_register(&mca_btl_vader_component.super.btl_version, "knem_dma_min",
 228                                            "Minimum message size (in bytes) to use the knem DMA mode; "
 229                                            "ignored if knem does not support DMA mode (0 = do not use the "
 230                                            "knem DMA mode, default: 0)", MCA_BASE_VAR_TYPE_UNSIGNED_INT,
 231                                            NULL, 0, 0, OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_READONLY,
 232                                            &mca_btl_vader_component.knem_dma_min);
 233 #endif
 234 
 235     mca_btl_vader.super.btl_exclusivity               = MCA_BTL_EXCLUSIVITY_HIGH;
 236 
 237     if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
 238         mca_btl_vader.super.btl_eager_limit               = 32 * 1024;
 239         mca_btl_vader.super.btl_rndv_eager_limit          = mca_btl_vader.super.btl_eager_limit;
 240         mca_btl_vader.super.btl_max_send_size             = mca_btl_vader.super.btl_eager_limit;
 241         mca_btl_vader.super.btl_min_rdma_pipeline_size    = INT_MAX;
 242     } else {
 243         mca_btl_vader.super.btl_eager_limit               = 4 * 1024;
 244         mca_btl_vader.super.btl_rndv_eager_limit          = 32 * 1024;
 245         mca_btl_vader.super.btl_max_send_size             = 32 * 1024;
 246         mca_btl_vader.super.btl_min_rdma_pipeline_size    = INT_MAX;
 247     }
 248 
 249     mca_btl_vader.super.btl_rdma_pipeline_send_length = mca_btl_vader.super.btl_eager_limit;
 250     mca_btl_vader.super.btl_rdma_pipeline_frag_size   = mca_btl_vader.super.btl_eager_limit;
 251 
 252 #if OPAL_HAVE_ATOMIC_MATH_64
 253     mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA |
 254         MCA_BTL_FLAGS_ATOMIC_OPS | MCA_BTL_FLAGS_ATOMIC_FOPS;
 255 
 256     mca_btl_vader.super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD | MCA_BTL_ATOMIC_SUPPORTS_AND |
 257         MCA_BTL_ATOMIC_SUPPORTS_OR | MCA_BTL_ATOMIC_SUPPORTS_XOR | MCA_BTL_ATOMIC_SUPPORTS_CSWAP |
 258         MCA_BTL_ATOMIC_SUPPORTS_GLOB | MCA_BTL_ATOMIC_SUPPORTS_SWAP;
 259 #if OPAL_HAVE_ATOMIC_MATH_32
 260     mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_32BIT;
 261 #endif /* OPAL_HAVE_ATOMIC_MATH_32 */
 262 
 263 #if OPAL_HAVE_ATOMIC_MIN_64
 264     mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MIN;
 265 #endif /* OPAL_HAVE_ATOMIC_MIN_64 */
 266 
 267 #if OPAL_HAVE_ATOMIC_MAX_64
 268     mca_btl_vader.super.btl_atomic_flags |= MCA_BTL_ATOMIC_SUPPORTS_MAX;
 269 #endif /* OPAL_HAVE_ATOMIC_MAX_64 */
 270 
 271 #else
 272     mca_btl_vader.super.btl_flags = MCA_BTL_FLAGS_SEND_INPLACE | MCA_BTL_FLAGS_SEND | MCA_BTL_FLAGS_RDMA;
 273 #endif /* OPAL_HAVE_ATOMIC_MATH_64 */
 274 
 275     if (MCA_BTL_VADER_NONE != mca_btl_vader_component.single_copy_mechanism) {
 276         /* True single copy mechanisms should provide better bandwidth */
 277         mca_btl_vader.super.btl_bandwidth = 40000; /* Mbs */
 278     } else {
 279         mca_btl_vader.super.btl_bandwidth = 10000; /* Mbs */
 280     }
 281 
 282     mca_btl_vader.super.btl_get = mca_btl_vader_get_sc_emu;
 283     mca_btl_vader.super.btl_put = mca_btl_vader_put_sc_emu;
 284     mca_btl_vader.super.btl_atomic_op = mca_btl_vader_emu_aop;
 285     mca_btl_vader.super.btl_atomic_fop = mca_btl_vader_emu_afop;
 286     mca_btl_vader.super.btl_atomic_cswap = mca_btl_vader_emu_acswap;
 287 
 288     mca_btl_vader.super.btl_latency   = 1;     /* Microsecs */
 289 
 290     /* Call the BTL based to register its MCA params */
 291     mca_btl_base_param_register(&mca_btl_vader_component.super.btl_version,
 292                                 &mca_btl_vader.super);
 293 
 294     return OPAL_SUCCESS;
 295 }
 296 
 297 /*
 298  *  Called by MCA framework to open the component, registers
 299  *  component parameters.
 300  */
 301 
 302 static int mca_btl_vader_component_open(void)
 303 {
 304     /* initialize objects */
 305     OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_eager, opal_free_list_t);
 306     OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_user, opal_free_list_t);
 307     OBJ_CONSTRUCT(&mca_btl_vader_component.vader_frags_max_send, opal_free_list_t);
 308     OBJ_CONSTRUCT(&mca_btl_vader_component.vader_fboxes, opal_free_list_t);
 309     OBJ_CONSTRUCT(&mca_btl_vader_component.lock, opal_mutex_t);
 310     OBJ_CONSTRUCT(&mca_btl_vader_component.pending_endpoints, opal_list_t);
 311     OBJ_CONSTRUCT(&mca_btl_vader_component.pending_fragments, opal_list_t);
 312 #if OPAL_BTL_VADER_HAVE_KNEM
 313     mca_btl_vader.knem_fd = -1;
 314 #endif
 315 
 316     return OPAL_SUCCESS;
 317 }
 318 
 319 
 320 /*
 321  * component cleanup - sanity checking of queue lengths
 322  */
 323 
 324 static int mca_btl_vader_component_close(void)
 325 {
 326     OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_eager);
 327     OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_user);
 328     OBJ_DESTRUCT(&mca_btl_vader_component.vader_frags_max_send);
 329     OBJ_DESTRUCT(&mca_btl_vader_component.vader_fboxes);
 330     OBJ_DESTRUCT(&mca_btl_vader_component.lock);
 331     OBJ_DESTRUCT(&mca_btl_vader_component.pending_endpoints);
 332     OBJ_DESTRUCT(&mca_btl_vader_component.pending_fragments);
 333 
 334     if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism &&
 335         NULL != mca_btl_vader_component.my_segment) {
 336         munmap (mca_btl_vader_component.my_segment, mca_btl_vader_component.segment_size);
 337     }
 338 
 339     mca_btl_vader_component.my_segment = NULL;
 340 
 341 #if OPAL_BTL_VADER_HAVE_KNEM
 342     mca_btl_vader_knem_fini ();
 343 #endif
 344 
 345     if (mca_btl_vader_component.mpool) {
 346         mca_btl_vader_component.mpool->mpool_finalize (mca_btl_vader_component.mpool);
 347         mca_btl_vader_component.mpool = NULL;
 348     }
 349 
 350     return OPAL_SUCCESS;
 351 }
 352 
 353 static int mca_btl_base_vader_modex_send (void)
 354 {
 355     union vader_modex_t modex;
 356     int modex_size, rc;
 357 
 358 #if OPAL_BTL_VADER_HAVE_XPMEM
 359     if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
 360         modex.xpmem.seg_id = mca_btl_vader_component.my_seg_id;
 361         modex.xpmem.segment_base = mca_btl_vader_component.my_segment;
 362 
 363         modex_size = sizeof (modex.xpmem);
 364     } else {
 365 #endif
 366         modex_size = opal_shmem_sizeof_shmem_ds (&mca_btl_vader_component.seg_ds);
 367         memmove (&modex.seg_ds, &mca_btl_vader_component.seg_ds, modex_size);
 368 
 369 #if OPAL_BTL_VADER_HAVE_XPMEM
 370     }
 371 #endif
 372 
 373     OPAL_MODEX_SEND(rc, OPAL_PMIX_LOCAL,
 374                     &mca_btl_vader_component.super.btl_version, &modex, modex_size);
 375 
 376     return rc;
 377 }
 378 
 379 #if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM
 380 static void mca_btl_vader_select_next_single_copy_mechanism (void)
 381 {
 382     for (int i = 0 ; single_copy_mechanisms[i].value != MCA_BTL_VADER_NONE ; ++i) {
 383         if (single_copy_mechanisms[i].value == mca_btl_vader_component.single_copy_mechanism) {
 384             mca_btl_vader_component.single_copy_mechanism = single_copy_mechanisms[i+1].value;
 385             return;
 386         }
 387     }
 388 }
 389 #endif
 390 
 391 static void mca_btl_vader_check_single_copy (void)
 392 {
 393 #if OPAL_BTL_VADER_HAVE_XPMEM || OPAL_BTL_VADER_HAVE_CMA || OPAL_BTL_VADER_HAVE_KNEM
 394     int initial_mechanism = mca_btl_vader_component.single_copy_mechanism;
 395 #endif
 396 
 397     /* single-copy emulation is always used to support AMO's right now */
 398     mca_btl_vader_sc_emu_init ();
 399 
 400 #if OPAL_BTL_VADER_HAVE_XPMEM
 401     if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
 402         /* try to create an xpmem segment for the entire address space */
 403         int rc = mca_btl_vader_xpmem_init ();
 404         if (OPAL_SUCCESS != rc) {
 405             if (MCA_BTL_VADER_XPMEM == initial_mechanism) {
 406                 opal_show_help("help-btl-vader.txt", "xpmem-make-failed",
 407                                true, opal_process_info.nodename, errno,
 408                                strerror(errno));
 409             }
 410 
 411             mca_btl_vader_select_next_single_copy_mechanism ();
 412         }
 413     }
 414 #endif
 415 
 416 #if OPAL_BTL_VADER_HAVE_CMA
 417     if (MCA_BTL_VADER_CMA == mca_btl_vader_component.single_copy_mechanism) {
 418         /* Check if we have the proper permissions for CMA */
 419         char buffer = '0';
 420         bool cma_happy = false;
 421         int fd;
 422 
 423         /* check system setting for current ptrace scope */
 424         fd = open ("/proc/sys/kernel/yama/ptrace_scope", O_RDONLY);
 425         if (0 <= fd) {
 426             read (fd, &buffer, 1);
 427             close (fd);
 428         }
 429 
 430         /* ptrace scope 0 will allow an attach from any of the process owner's
 431          * processes. ptrace scope 1 limits attachers to the process tree
 432          * starting at the parent of this process. */
 433         if ('0' != buffer) {
 434 #if defined PR_SET_PTRACER
 435             /* try setting the ptrace scope to allow attach */
 436             int ret = prctl (PR_SET_PTRACER, PR_SET_PTRACER_ANY, 0, 0, 0);
 437             if (0 == ret) {
 438                 cma_happy = true;
 439             }
 440 #endif
 441         } else {
 442             cma_happy = true;
 443         }
 444 
 445         if (!cma_happy) {
 446             mca_btl_vader_select_next_single_copy_mechanism ();
 447 
 448             if (MCA_BTL_VADER_CMA == initial_mechanism) {
 449                 opal_show_help("help-btl-vader.txt", "cma-permission-denied",
 450                                true, opal_process_info.nodename);
 451             }
 452         } else {
 453             /* ptrace_scope will allow CMA */
 454             mca_btl_vader.super.btl_get = mca_btl_vader_get_cma;
 455             mca_btl_vader.super.btl_put = mca_btl_vader_put_cma;
 456         }
 457     }
 458 #endif
 459 
 460 #if OPAL_BTL_VADER_HAVE_KNEM
 461     if (MCA_BTL_VADER_KNEM == mca_btl_vader_component.single_copy_mechanism) {
 462         /* mca_btl_vader_knem_init will set the appropriate get/put functions */
 463         int rc = mca_btl_vader_knem_init ();
 464         if (OPAL_SUCCESS != rc) {
 465             if (MCA_BTL_VADER_KNEM == initial_mechanism) {
 466                 opal_show_help("help-btl-vader.txt", "knem requested but not available",
 467                                true, opal_process_info.nodename);
 468             }
 469 
 470             /* disable single copy */
 471             mca_btl_vader_select_next_single_copy_mechanism ();
 472         }
 473     }
 474 #endif
 475 
 476     if (MCA_BTL_VADER_NONE == mca_btl_vader_component.single_copy_mechanism) {
 477         mca_btl_vader.super.btl_flags &= ~MCA_BTL_FLAGS_RDMA;
 478         mca_btl_vader.super.btl_get = NULL;
 479         mca_btl_vader.super.btl_put = NULL;
 480     }
 481 
 482     if (MCA_BTL_VADER_EMUL == mca_btl_vader_component.single_copy_mechanism) {
 483         /* limit to the maximum fragment size */
 484         mca_btl_vader.super.btl_put_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
 485         mca_btl_vader.super.btl_get_limit = mca_btl_vader.super.btl_max_send_size - sizeof (mca_btl_vader_sc_emu_hdr_t);
 486     }
 487 }
 488 
 489 /*
 490  *  VADER component initialization
 491  */
 492 static mca_btl_base_module_t **mca_btl_vader_component_init (int *num_btls,
 493                                                              bool enable_progress_threads,
 494                                                              bool enable_mpi_threads)
 495 {
 496     mca_btl_vader_component_t *component = &mca_btl_vader_component;
 497     mca_btl_base_module_t **btls = NULL;
 498     int rc;
 499 
 500     *num_btls = 0;
 501 
 502     /* disable if there are no local peers */
 503     if (0 == MCA_BTL_VADER_NUM_LOCAL_PEERS) {
 504         BTL_VERBOSE(("No peers to communicate with. Disabling vader."));
 505         return NULL;
 506     }
 507 
 508 #if OPAL_BTL_VADER_HAVE_XPMEM
 509     /* limit segment alignment to be between 4k and 16M */
 510     if (component->log_attach_align < 12) {
 511         component->log_attach_align = 12;
 512     } else if (component->log_attach_align > 25) {
 513         component->log_attach_align = 25;
 514     }
 515 #endif
 516 
 517     btls = (mca_btl_base_module_t **) calloc (1, sizeof (mca_btl_base_module_t *));
 518     if (NULL == btls) {
 519         return NULL;
 520     }
 521 
 522     /* ensure a sane segment size */
 523     if (component->segment_size < (2 << 20)) {
 524         component->segment_size = (2 << 20);
 525     }
 526 
 527     component->fbox_size = (component->fbox_size + MCA_BTL_VADER_FBOX_ALIGNMENT_MASK) & ~MCA_BTL_VADER_FBOX_ALIGNMENT_MASK;
 528 
 529     if (component->segment_size > (1ul << MCA_BTL_VADER_OFFSET_BITS)) {
 530         component->segment_size = 2ul << MCA_BTL_VADER_OFFSET_BITS;
 531     }
 532 
 533     /* no fast boxes allocated initially */
 534     component->num_fbox_in_endpoints = 0;
 535 
 536     mca_btl_vader_check_single_copy ();
 537 
 538     if (MCA_BTL_VADER_XPMEM != mca_btl_vader_component.single_copy_mechanism) {
 539         char *sm_file;
 540 
 541         rc = opal_asprintf(&sm_file, "%s" OPAL_PATH_SEP "vader_segment.%s.%x.%d", mca_btl_vader_component.backing_directory,
 542                       opal_process_info.nodename, OPAL_PROC_MY_NAME.jobid, MCA_BTL_VADER_LOCAL_RANK);
 543         if (0 > rc) {
 544             free (btls);
 545             return NULL;
 546         }
 547         if (NULL != opal_pmix.register_cleanup) {
 548             opal_pmix.register_cleanup (sm_file, false, false, false);
 549         }
 550 
 551         rc = opal_shmem_segment_create (&component->seg_ds, sm_file, component->segment_size);
 552         free (sm_file);
 553         if (OPAL_SUCCESS != rc) {
 554             BTL_VERBOSE(("Could not create shared memory segment"));
 555             free (btls);
 556             return NULL;
 557         }
 558 
 559         component->my_segment = opal_shmem_segment_attach (&component->seg_ds);
 560         if (NULL == component->my_segment) {
 561             BTL_VERBOSE(("Could not attach to just created shared memory segment"));
 562             goto failed;
 563         }
 564     } else {
 565         /* when using xpmem it is safe to use an anonymous segment */
 566         component->my_segment = mmap (NULL, component->segment_size, PROT_READ |
 567                                       PROT_WRITE, MAP_ANONYMOUS | MAP_SHARED, -1, 0);
 568         if ((void *)-1 == component->my_segment) {
 569             BTL_VERBOSE(("Could not create anonymous memory segment"));
 570             free (btls);
 571             return NULL;
 572         }
 573     }
 574 
 575     /* initialize my fifo */
 576     vader_fifo_init ((struct vader_fifo_t *) component->my_segment);
 577 
 578     rc = mca_btl_base_vader_modex_send ();
 579     if (OPAL_SUCCESS != rc) {
 580         BTL_VERBOSE(("Error sending modex"));
 581         goto failed;
 582     }
 583 
 584     *num_btls = 1;
 585 
 586     /* get pointer to the btls */
 587     btls[0] = (mca_btl_base_module_t *) &mca_btl_vader;
 588 
 589     /* set flag indicating btl not inited */
 590     mca_btl_vader.btl_inited = false;
 591 
 592     return btls;
 593 failed:
 594 #if OPAL_BTL_VADER_HAVE_XPMEM
 595     if (MCA_BTL_VADER_XPMEM == mca_btl_vader_component.single_copy_mechanism) {
 596         munmap (component->my_segment, component->segment_size);
 597     } else
 598 #endif
 599         opal_shmem_unlink (&component->seg_ds);
 600 
 601     if (btls) {
 602         free (btls);
 603     }
 604 
 605     return NULL;
 606 }
 607 
 608 void mca_btl_vader_poll_handle_frag (mca_btl_vader_hdr_t *hdr, struct mca_btl_base_endpoint_t *endpoint)
 609 {
 610     mca_btl_base_segment_t segments[2];
 611     mca_btl_base_descriptor_t frag = {.des_segments = segments, .des_segment_count = 1};
 612     const mca_btl_active_message_callback_t *reg;
 613 
 614     if (hdr->flags & MCA_BTL_VADER_FLAG_COMPLETE) {
 615         mca_btl_vader_frag_complete (hdr->frag);
 616         return;
 617     }
 618 
 619     reg = mca_btl_base_active_message_trigger + hdr->tag;
 620     segments[0].seg_addr.pval = (void *) (hdr + 1);
 621     segments[0].seg_len       = hdr->len;
 622 
 623     if (hdr->flags & MCA_BTL_VADER_FLAG_SINGLE_COPY) {
 624         mca_rcache_base_registration_t *xpmem_reg;
 625 
 626         xpmem_reg = vader_get_registation (endpoint, hdr->sc_iov.iov_base,
 627                                            hdr->sc_iov.iov_len, 0,
 628                                            &segments[1].seg_addr.pval);
 629         assert (NULL != xpmem_reg);
 630 
 631         segments[1].seg_len = hdr->sc_iov.iov_len;
 632         frag.des_segment_count = 2;
 633 
 634         /* recv upcall */
 635         reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata);
 636         vader_return_registration (xpmem_reg, endpoint);
 637     } else {
 638         reg->cbfunc(&mca_btl_vader.super, hdr->tag, &frag, reg->cbdata);
 639     }
 640 
 641     if (OPAL_UNLIKELY(MCA_BTL_VADER_FLAG_SETUP_FBOX & hdr->flags)) {
 642         mca_btl_vader_endpoint_setup_fbox_recv (endpoint, relative2virtual(hdr->fbox_base));
 643         mca_btl_vader_component.fbox_in_endpoints[mca_btl_vader_component.num_fbox_in_endpoints++] = endpoint;
 644     }
 645 
 646     hdr->flags = MCA_BTL_VADER_FLAG_COMPLETE;
 647     vader_fifo_write_back (hdr, endpoint);
 648 }
 649 
 650 static int mca_btl_vader_poll_fifo (void)
 651 {
 652     struct mca_btl_base_endpoint_t *endpoint;
 653     mca_btl_vader_hdr_t *hdr;
 654 
 655     /* poll the fifo until it is empty or a limit has been hit (8 is arbitrary) */
 656     for (int fifo_count = 0 ; fifo_count < 31 ; ++fifo_count) {
 657         hdr = vader_fifo_read (mca_btl_vader_component.my_fifo, &endpoint);
 658         if (NULL == hdr) {
 659             return fifo_count;
 660         }
 661 
 662         mca_btl_vader_poll_handle_frag (hdr, endpoint);
 663     }
 664 
 665     return 1;
 666 }
 667 
 668 /**
 669  * Progress pending messages on an endpoint
 670  *
 671  * @param ep (IN)       Vader BTL endpoint
 672  *
 673  * This is called with the component lock held so the component lock does
 674  * not need to be aquired before modifying the pending_endpoints list.
 675  */
 676 static void mca_btl_vader_progress_waiting (mca_btl_base_endpoint_t *ep)
 677 {
 678     mca_btl_vader_frag_t *frag, *next;
 679     int ret = 1;
 680 
 681     if (OPAL_UNLIKELY(NULL == ep)) {
 682         return;
 683     }
 684 
 685     OPAL_THREAD_LOCK(&ep->pending_frags_lock);
 686     OPAL_LIST_FOREACH_SAFE(frag, next, &ep->pending_frags, mca_btl_vader_frag_t) {
 687         ret = vader_fifo_write_ep (frag->hdr, ep);
 688         if (!ret) {
 689             OPAL_THREAD_UNLOCK(&ep->pending_frags_lock);
 690             return;
 691         }
 692 
 693         (void) opal_list_remove_first (&ep->pending_frags);
 694     }
 695 
 696     ep->waiting = false;
 697     opal_list_remove_item (&mca_btl_vader_component.pending_endpoints, &ep->super);
 698 
 699     OPAL_THREAD_UNLOCK(&ep->pending_frags_lock);
 700 }
 701 
 702 /**
 703  * Progress pending messages on all waiting endpoints
 704  *
 705  * @param ep (IN)       Vader BTL endpoint
 706  */
 707 static void mca_btl_vader_progress_endpoints (void)
 708 {
 709     mca_btl_base_endpoint_t *ep, *next;
 710     int count;
 711 
 712     count = opal_list_get_size (&mca_btl_vader_component.pending_endpoints);
 713     if (OPAL_LIKELY(0 == count)) {
 714         return;
 715     }
 716 
 717     OPAL_THREAD_LOCK(&mca_btl_vader_component.lock);
 718     OPAL_LIST_FOREACH_SAFE(ep, next, &mca_btl_vader_component.pending_endpoints, mca_btl_base_endpoint_t) {
 719         mca_btl_vader_progress_waiting (ep);
 720     }
 721     OPAL_THREAD_UNLOCK(&mca_btl_vader_component.lock);
 722 }
 723 
 724 static int mca_btl_vader_component_progress (void)
 725 {
 726     static opal_atomic_int32_t lock = 0;
 727     int count = 0;
 728 
 729     if (opal_using_threads()) {
 730         if (opal_atomic_swap_32 (&lock, 1)) {
 731             return 0;
 732         }
 733     }
 734 
 735     /* check for messages in fast boxes */
 736     if (mca_btl_vader_component.num_fbox_in_endpoints) {
 737         count = mca_btl_vader_check_fboxes ();
 738     }
 739 
 740     mca_btl_vader_progress_endpoints ();
 741 
 742     if (VADER_FIFO_FREE == mca_btl_vader_component.my_fifo->fifo_head) {
 743         lock = 0;
 744         return count;
 745     }
 746 
 747     count += mca_btl_vader_poll_fifo ();
 748     opal_atomic_mb ();
 749     lock = 0;
 750 
 751     return count;
 752 }

/* [<][>][^][v][top][bottom][index][help] */