root/opal/mca/btl/portals4/btl_portals4_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_btl_portals4_component_register
  2. mca_btl_portals4_component_open
  3. mca_btl_portals4_component_close
  4. mca_btl_portals4_component_init
  5. mca_btl_portals4_get_error
  6. mca_btl_portals4_component_progress

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2005 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2010-2012 Sandia National Laboratories.  All rights reserved.
  14  * Copyright (c) 2014-2015 Intel, Inc. All rights reserved.
  15  * Copyright (c) 2014      Bull SAS.  All rights reserved.
  16  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
  17  *                         reserved.
  18  * $COPYRIGHT$
  19  *
  20  * Additional copyrights may follow
  21  *
  22  * $HEADER$
  23  */
  24 
  25 #include "opal_config.h"
  26 
  27 #include "opal/mca/event/event.h"
  28 #include "opal/util/output.h"
  29 #include "opal/mca/pmix/pmix.h"
  30 #include "opal/util/show_help.h"
  31 #include "opal/mca/btl/btl.h"
  32 #include "opal/mca/btl/base/base.h"
  33 #include "opal/mca/mpool/base/base.h"
  34 
  35 #include "portals4.h"
  36 #include "btl_portals4.h"
  37 #include "btl_portals4_frag.h"
  38 #include "btl_portals4_recv.h"
  39 
  40 static int mca_btl_portals4_component_register(void);
  41 static int mca_btl_portals4_component_open(void);
  42 static int mca_btl_portals4_component_close(void);
  43 static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
  44                                                        bool enable_progress_threads,
  45                                                        bool enable_mpi_threads);
  46 int mca_btl_portals4_component_progress(void);
  47 
  48 OPAL_MODULE_DECLSPEC extern mca_btl_portals4_component_t mca_btl_portals4_component;
  49 
  50 mca_btl_portals4_component_t mca_btl_portals4_component = {
  51     {
  52       /* First, the mca_base_module_t struct containing meta
  53          information about the module itself */
  54       .btl_version = {
  55         MCA_BTL_DEFAULT_VERSION("portals4"),
  56         .mca_open_component = mca_btl_portals4_component_open,
  57         .mca_close_component = mca_btl_portals4_component_close,
  58         .mca_register_component_params = mca_btl_portals4_component_register,
  59       },
  60       .btl_data = {
  61           /* The component is not checkpoint ready */
  62           .param_field = MCA_BASE_METADATA_PARAM_NONE
  63       },
  64 
  65       .btl_init = mca_btl_portals4_component_init,
  66       .btl_progress = mca_btl_portals4_component_progress,
  67     }
  68 };
  69 
  70 static int
  71 mca_btl_portals4_component_register(void)
  72 {
  73     mca_btl_portals4_component.use_logical = 0;
  74     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
  75                            "use_logical",
  76                            "Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false)",
  77                            MCA_BASE_VAR_TYPE_INT,
  78                            NULL,
  79                            0,
  80                            0,
  81                            OPAL_INFO_LVL_5,
  82                            MCA_BASE_VAR_SCOPE_READONLY,
  83                            &mca_btl_portals4_component.use_logical);
  84 
  85     mca_btl_portals4_component.max_btls = 1;
  86     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
  87                            "max_btls",
  88                            "Maximum number of accepted Portals4 cards",
  89                            MCA_BASE_VAR_TYPE_UNSIGNED_INT,
  90                            NULL,
  91                            0,
  92                            0,
  93                            OPAL_INFO_LVL_5,
  94                            MCA_BASE_VAR_SCOPE_READONLY,
  95                            &mca_btl_portals4_component.max_btls);
  96 
  97     mca_btl_portals4_component.portals_free_list_init_num = 16;
  98     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
  99                            "free_list_init_num",
 100                            "Initial number of elements to initialize in free lists",
 101                            MCA_BASE_VAR_TYPE_INT,
 102                            NULL,
 103                            0,
 104                            0,
 105                            OPAL_INFO_LVL_5,
 106                            MCA_BASE_VAR_SCOPE_READONLY,
 107                            &(mca_btl_portals4_component.portals_free_list_init_num));
 108 
 109     mca_btl_portals4_component.portals_free_list_max_num = 1024;
 110     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 111                            "free_list_max_num",
 112                            "Max number of elements to initialize in free lists",
 113                            MCA_BASE_VAR_TYPE_INT,
 114                            NULL,
 115                            0,
 116                            0,
 117                            OPAL_INFO_LVL_5,
 118                            MCA_BASE_VAR_SCOPE_READONLY,
 119                            &(mca_btl_portals4_component.portals_free_list_max_num));
 120 
 121     mca_btl_portals4_component.portals_free_list_inc_num = 16;
 122     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 123                            "free_list_inc_num",
 124                            "Increment count for free lists",
 125                            MCA_BASE_VAR_TYPE_INT,
 126                            NULL,
 127                            0,
 128                            0,
 129                            OPAL_INFO_LVL_5,
 130                            MCA_BASE_VAR_SCOPE_READONLY,
 131                            &(mca_btl_portals4_component.portals_free_list_inc_num));
 132 
 133     mca_btl_portals4_component.portals_free_list_eager_max_num = 32;
 134     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 135                            "eager_frag_limit",
 136                            "Maximum number of pre-pinned eager fragments",
 137                            MCA_BASE_VAR_TYPE_INT,
 138                            NULL,
 139                            0,
 140                            0,
 141                            OPAL_INFO_LVL_5,
 142                            MCA_BASE_VAR_SCOPE_READONLY,
 143                            &(mca_btl_portals4_component.portals_free_list_eager_max_num));
 144 
 145     mca_btl_portals4_component.portals_need_ack =  1; /* default to true.. */
 146     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 147                            "needs_ack",
 148                            "Require a portals level ACK",
 149                            MCA_BASE_VAR_TYPE_INT,
 150                            NULL,
 151                            0,
 152                            0,
 153                            OPAL_INFO_LVL_5,
 154                            MCA_BASE_VAR_SCOPE_READONLY,
 155                            &(mca_btl_portals4_component.portals_need_ack));
 156 
 157     mca_btl_portals4_component.recv_queue_size = 4 * 1024;
 158     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 159                            "eq_recv_size",
 160                            "Size of the receive event queue",
 161                            MCA_BASE_VAR_TYPE_INT,
 162                            NULL,
 163                            0,
 164                            0,
 165                            OPAL_INFO_LVL_5,
 166                            MCA_BASE_VAR_SCOPE_READONLY,
 167                            &(mca_btl_portals4_component.recv_queue_size));
 168 
 169     mca_btl_portals4_component.portals_max_outstanding_ops = 8 * 1024;
 170     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 171                            "max_pending_ops",
 172                            "Maximum number of pending send/rdma frags",
 173                            MCA_BASE_VAR_TYPE_INT,
 174                            NULL,
 175                            0,
 176                            0,
 177                            OPAL_INFO_LVL_5,
 178                            MCA_BASE_VAR_SCOPE_READONLY,
 179                            &(mca_btl_portals4_component.portals_max_outstanding_ops));
 180 
 181     mca_btl_portals4_component.portals_recv_mds_num = 8;
 182     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 183                            "recv_md_num",
 184                            "Number of send frag receive descriptors",
 185                            MCA_BASE_VAR_TYPE_INT,
 186                            NULL,
 187                            0,
 188                            0,
 189                            OPAL_INFO_LVL_5,
 190                            MCA_BASE_VAR_SCOPE_READONLY,
 191                            &(mca_btl_portals4_component.portals_recv_mds_num));
 192 
 193     mca_btl_portals4_component.portals_recv_mds_size = 256 * 1024;
 194     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 195                            "recv_md_size",
 196                            "Size of send frag receive descriptors",
 197                            MCA_BASE_VAR_TYPE_INT,
 198                            NULL,
 199                            0,
 200                            0,
 201                            OPAL_INFO_LVL_5,
 202                            MCA_BASE_VAR_SCOPE_READONLY,
 203                            &(mca_btl_portals4_component.portals_recv_mds_size));
 204 
 205     mca_btl_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
 206     (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
 207                            "max_msg_size",
 208                            "Max size supported by portals4 (above that, a message is cut into messages less than that size)",
 209                            MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
 210                            NULL,
 211                            0,
 212                            0,
 213                            OPAL_INFO_LVL_5,
 214                            MCA_BASE_VAR_SCOPE_READONLY,
 215                            &(mca_btl_portals4_component.portals_max_msg_size));
 216     return OPAL_SUCCESS;
 217 }
 218 
 219 static int
 220 mca_btl_portals4_component_open(void)
 221 {
 222     OPAL_OUTPUT_VERBOSE((1, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_open\n"));
 223 
 224     /*
 225      * fill default module state
 226      */
 227     mca_btl_portals4_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100;
 228     mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024;
 229     mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024;
 230     mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024;
 231     if (mca_btl_portals4_module.super.btl_max_send_size > mca_btl_portals4_component.portals_max_msg_size)
 232         mca_btl_portals4_module.super.btl_max_send_size = mca_btl_portals4_component.portals_max_msg_size;
 233     mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024;
 234     mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
 235     mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
 236     mca_btl_portals4_module.super.btl_flags =
 237         MCA_BTL_FLAGS_RDMA |
 238         MCA_BTL_FLAGS_RDMA_MATCHED |
 239         MCA_BTL_FLAGS_SEND;
 240 
 241     mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
 242 
 243     mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
 244     if (mca_btl_portals4_module.super.btl_get_limit > mca_btl_portals4_component.portals_max_msg_size)
 245          mca_btl_portals4_module.super.btl_get_limit = mca_btl_portals4_component.portals_max_msg_size;
 246     mca_btl_portals4_module.super.btl_put_limit = 0;        /* not implemented */
 247     mca_btl_portals4_module.super.btl_get_alignment = 0;
 248     mca_btl_portals4_module.super.btl_put_alignment = 0;
 249 
 250     mca_btl_portals4_module.super.btl_get_local_registration_threshold = 0;
 251     mca_btl_portals4_module.super.btl_put_local_registration_threshold = 0;
 252 
 253     mca_btl_portals4_module.super.btl_bandwidth = 1000;
 254     mca_btl_portals4_module.super.btl_latency = 0;
 255 
 256     mca_btl_base_param_register(&mca_btl_portals4_component.super.btl_version, &mca_btl_portals4_module.super);
 257 
 258     mca_btl_portals4_module.portals_num_procs = 0;
 259 
 260     mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE;
 261 
 262     mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE;
 263 
 264     mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
 265     mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;
 266 
 267     mca_btl_portals4_module.long_overflow_me_h = PTL_INVALID_HANDLE;
 268     mca_btl_portals4_module.portals_outstanding_ops = 0;
 269     mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL;
 270 
 271     if (1 == mca_btl_portals4_component.use_logical) {
 272         /*
 273          * set the MCA_BTL_FLAGS_SINGLE_ADD_PROCS flag here in the default
 274          * module, so it gets copied into the module for each Portals4
 275          * interface during init().
 276          */
 277         mca_btl_portals4_module.super.btl_flags |= MCA_BTL_FLAGS_SINGLE_ADD_PROCS;
 278     }
 279 
 280     return OPAL_SUCCESS;
 281 }
 282 
 283 
 284 static int
 285 mca_btl_portals4_component_close(void)
 286 {
 287     opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_close\n");
 288 
 289     /* release resources */
 290     /* close debugging stream */
 291     opal_output_close(opal_btl_base_framework.framework_output);
 292     opal_btl_base_framework.framework_output = -1;
 293 
 294     if (NULL != mca_btl_portals4_component.btls)  free(mca_btl_portals4_component.btls);
 295     if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
 296     mca_btl_portals4_component.btls = NULL;
 297     mca_btl_portals4_component.eqs_h = NULL;
 298 
 299     PtlFini();
 300 
 301     return OPAL_SUCCESS;
 302 }
 303 
 304 static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
 305                                                        bool enable_progress_threads,
 306                                                        bool enable_mpi_threads)
 307 {
 308     mca_btl_portals4_module_t *portals4_btl = NULL;
 309     mca_btl_base_module_t **btls = NULL;
 310     unsigned int ret, interface;
 311     ptl_handle_ni_t *portals4_nis_h = NULL;
 312     ptl_ni_limits_t portals4_ni_limits ;
 313     ptl_process_t *ptl_process_ids = NULL;
 314 
 315     opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n");
 316 
 317     if (enable_mpi_threads && !mca_btl_base_thread_multiple_override) {
 318         opal_output_verbose(1, opal_btl_base_framework.framework_output,
 319                             "btl portals4 disabled because threads enabled");
 320         return NULL;
 321     }
 322 
 323     /* Initialize Portals */
 324     ret = PtlInit();
 325     if (PTL_OK != ret) {
 326         opal_output_verbose(1, opal_btl_base_framework.framework_output,
 327                             "%s:%d: PtlInit failed: %d\n",
 328                             __FILE__, __LINE__, ret);
 329         goto error;
 330     }
 331     OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlInit OK\n"));
 332 
 333     /*
 334      * Initialize the network interfaces (try to open the interfaces 0 to (max_btls-1) )
 335      */
 336     *num_btls = 0;
 337     portals4_nis_h = malloc(mca_btl_portals4_component.max_btls * sizeof(ptl_handle_ni_t));
 338     for (interface=0; interface<mca_btl_portals4_component.max_btls; interface++) {
 339 
 340         if (mca_btl_portals4_component.use_logical)
 341             ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
 342                     PTL_NI_LOGICAL | PTL_NI_MATCHING,
 343                     PTL_PID_ANY,       /* let library assign our pid */
 344                     NULL,              /* no desired limits */
 345                     &portals4_ni_limits, /* actual limits */
 346                     &portals4_nis_h[*num_btls] /* our interface handle */
 347                     );
 348         else ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
 349                     PTL_NI_PHYSICAL | PTL_NI_MATCHING,
 350                     PTL_PID_ANY,       /* let library assign our pid */
 351                     NULL,              /* no desired limits */
 352                     &portals4_ni_limits, /* actual limits */
 353                     &portals4_nis_h[*num_btls] /* our interface handle */
 354                     );
 355         if (PTL_OK != ret) {
 356             opal_output_verbose(90, opal_btl_base_framework.framework_output,
 357                             "%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret);
 358         }
 359         else {
 360             if (mca_btl_portals4_component.portals_max_msg_size > portals4_ni_limits.max_msg_size)
 361                 mca_btl_portals4_component.portals_max_msg_size = portals4_ni_limits.max_msg_size;
 362             if (mca_btl_portals4_module.super.btl_max_send_size > portals4_ni_limits.max_msg_size)
 363                 mca_btl_portals4_module.super.btl_max_send_size = portals4_ni_limits.max_msg_size;
 364             if (mca_btl_portals4_module.super.btl_get_limit > portals4_ni_limits.max_msg_size)
 365                 mca_btl_portals4_module.super.btl_get_limit = portals4_ni_limits.max_msg_size;
 366             OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d max_msg_size=%ld",
 367                                  *num_btls, mca_btl_portals4_component.portals_max_msg_size));
 368 
 369             (*num_btls)++;
 370         }
 371     }
 372     if (0 == *num_btls) goto error;
 373 
 374     /*
 375      * Configure the different network interfaces and the associated btl modules
 376      */
 377     mca_btl_portals4_component.num_btls = *num_btls;
 378     mca_btl_portals4_component.btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) );
 379     mca_btl_portals4_component.eqs_h = malloc(mca_btl_portals4_component.num_btls * sizeof(ptl_handle_eq_t));
 380     ptl_process_ids = malloc(mca_btl_portals4_component.num_btls * sizeof(ptl_process_t) );
 381 
 382     for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
 383         mca_btl_portals4_component.btls[interface] = NULL;
 384         mca_btl_portals4_component.eqs_h[interface] = PTL_EQ_NONE;
 385     }
 386     for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
 387         portals4_btl = malloc(sizeof(mca_btl_portals4_module_t));
 388         mca_btl_portals4_component.btls[interface] = portals4_btl;
 389 
 390         /* Copy the default module */
 391         memcpy(portals4_btl, &mca_btl_portals4_module, sizeof(mca_btl_portals4_module_t));
 392 
 393         portals4_btl->interface_num = interface;
 394         portals4_btl->portals_ni_h = portals4_nis_h[interface];
 395         portals4_btl->portals_max_outstanding_ops = mca_btl_portals4_component.portals_max_outstanding_ops;
 396 
 397         OBJ_CONSTRUCT(&(portals4_btl->portals_frag_eager), opal_free_list_t);
 398         OBJ_CONSTRUCT(&(portals4_btl->portals_frag_max), opal_free_list_t);
 399         OBJ_CONSTRUCT(&(portals4_btl->portals_frag_user), opal_free_list_t);
 400 
 401         /* eager frags */
 402         opal_free_list_init (&(portals4_btl->portals_frag_eager),
 403                         sizeof(mca_btl_portals4_frag_eager_t) +
 404                         portals4_btl->super.btl_eager_limit,
 405                         opal_cache_line_size,
 406                         OBJ_CLASS(mca_btl_portals4_frag_eager_t),
 407                         0,opal_cache_line_size,
 408                         mca_btl_portals4_component.portals_free_list_init_num,
 409                         mca_btl_portals4_component.portals_free_list_eager_max_num,
 410                         mca_btl_portals4_component.portals_free_list_inc_num,
 411                         NULL, 0, NULL, NULL, NULL);
 412 
 413         /* send frags */
 414         opal_free_list_init (&(portals4_btl->portals_frag_max),
 415                         sizeof(mca_btl_portals4_frag_max_t) +
 416                         portals4_btl->super.btl_max_send_size,
 417                         opal_cache_line_size,
 418                         OBJ_CLASS(mca_btl_portals4_frag_max_t),
 419                         0,opal_cache_line_size,
 420                         mca_btl_portals4_component.portals_free_list_init_num,
 421                         mca_btl_portals4_component.portals_free_list_max_num,
 422                         mca_btl_portals4_component.portals_free_list_inc_num,
 423                         NULL, 0, NULL, NULL, NULL);
 424 
 425         /* user frags */
 426         opal_free_list_init (&(portals4_btl->portals_frag_user),
 427                         sizeof(mca_btl_portals4_frag_user_t),
 428                         opal_cache_line_size,
 429                         OBJ_CLASS(mca_btl_portals4_frag_user_t),
 430                         0,opal_cache_line_size,
 431                         mca_btl_portals4_component.portals_free_list_init_num,
 432                         mca_btl_portals4_component.portals_free_list_max_num,
 433                         mca_btl_portals4_component.portals_free_list_inc_num,
 434                         NULL, 0, NULL, NULL, NULL);
 435 
 436         /* receive block list */
 437         OBJ_CONSTRUCT(&(portals4_btl->portals_recv_blocks), opal_list_t);
 438     }
 439     free(portals4_nis_h);
 440     portals4_nis_h = NULL;
 441 
 442     /* Publish our NID(s)/PID(s) in the modex */
 443     for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
 444         portals4_btl = mca_btl_portals4_component.btls[interface];
 445 
 446         ret = PtlGetPhysId(portals4_btl->portals_ni_h ,&ptl_process_ids[interface]);
 447         if (PTL_OK != ret) {
 448             opal_output_verbose(1, opal_btl_base_framework.framework_output,
 449                             "%s:%d: PtlGetPhysId for NI %d failed: %d\n",
 450                             __FILE__, __LINE__, interface, ret);
 451             goto error;
 452         }
 453 
 454         OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 455                   "PtlGetPhysId NI number %d: ni_h=%d rank=%x nid=%x pid=%x\n",
 456                   interface, portals4_btl->portals_ni_h,
 457                   ptl_process_ids[interface].rank,
 458                   ptl_process_ids[interface].phys.nid, ptl_process_ids[interface].phys.pid));
 459     }
 460     OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL,
 461                     &mca_btl_portals4_component.super.btl_version,
 462                     ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t));
 463     if (OPAL_SUCCESS != ret) {
 464         opal_output_verbose(1, opal_btl_base_framework.framework_output,
 465                         "%s:%d: opal_modex_send failed: %d\n",
 466                         __FILE__, __LINE__, ret);
 467         goto error;
 468     }
 469     free(ptl_process_ids);
 470     ptl_process_ids = NULL;
 471 
 472     btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) );
 473     memcpy(btls , mca_btl_portals4_component.btls,
 474             mca_btl_portals4_component.num_btls*sizeof(mca_btl_portals4_module_t*) );
 475 
 476     opal_output_verbose(1, opal_btl_base_framework.framework_output, "The btl portals4 component has been initialized and uses %d NI(s)",
 477         mca_btl_portals4_component.num_btls);
 478 
 479     mca_btl_portals4_component.need_init = 1;
 480 
 481     return btls;
 482 
 483  error:
 484     opal_output_verbose(1, opal_btl_base_framework.framework_output, "Error in mca_btl_portals4_component_init\n");
 485 
 486     if (*num_btls) {
 487         if (NULL != portals4_nis_h) free(portals4_nis_h);
 488         if (NULL != ptl_process_ids) free(ptl_process_ids);
 489 
 490         for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
 491             portals4_btl = mca_btl_portals4_component.btls[interface];
 492             if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl);
 493         }
 494         mca_btl_portals4_component.num_btls = 0;
 495         *num_btls = 0;
 496         if (NULL != mca_btl_portals4_component.btls)  free(mca_btl_portals4_component.btls);
 497         if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
 498         mca_btl_portals4_component.btls = NULL;
 499         mca_btl_portals4_component.eqs_h = NULL;
 500 
 501     }
 502  return NULL;
 503 }
 504 
 505 int
 506 mca_btl_portals4_get_error(int ptl_error)
 507 {
 508     int ret;
 509 
 510     switch (ptl_error) {
 511     case PTL_OK:
 512         ret = OPAL_SUCCESS;
 513         break;
 514     case PTL_ARG_INVALID:
 515         ret = OPAL_ERR_BAD_PARAM;
 516         break;
 517     case PTL_CT_NONE_REACHED:
 518         ret = OPAL_ERR_TIMEOUT;
 519         break;
 520     case PTL_EQ_DROPPED:
 521         ret = OPAL_ERR_OUT_OF_RESOURCE;
 522         break;
 523     case PTL_EQ_EMPTY:
 524         ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
 525         break;
 526     case PTL_FAIL:
 527         ret = OPAL_ERROR;
 528         break;
 529     case PTL_IN_USE:
 530         ret = OPAL_ERR_RESOURCE_BUSY;
 531         break;
 532     case PTL_INTERRUPTED:
 533         ret = OPAL_ERR_RESOURCE_BUSY;
 534         break;
 535     case PTL_LIST_TOO_LONG:
 536         ret = OPAL_ERR_OUT_OF_RESOURCE;
 537         break;
 538     case PTL_NO_INIT:
 539         ret = OPAL_ERR_FATAL;
 540         break;
 541     case PTL_NO_SPACE:
 542         ret = OPAL_ERR_OUT_OF_RESOURCE;
 543         break;
 544     case PTL_PID_IN_USE:
 545         ret = OPAL_ERR_BAD_PARAM;
 546         break;
 547     case PTL_PT_FULL:
 548         ret = OPAL_ERR_OUT_OF_RESOURCE;
 549         break;
 550     case PTL_PT_EQ_NEEDED:
 551         ret = OPAL_ERR_FATAL;
 552         break;
 553     case PTL_PT_IN_USE:
 554         ret = OPAL_ERR_RESOURCE_BUSY;
 555         break;
 556 
 557     default:
 558         ret = OPAL_ERROR;
 559     }
 560 
 561     return ret;
 562 }
 563 
 564 int
 565 mca_btl_portals4_component_progress(void)
 566 {
 567     mca_btl_portals4_module_t *portals4_btl;
 568     int num_progressed = 0;
 569     int ret, btl_ownership;
 570     mca_btl_portals4_frag_t *frag = NULL;
 571     mca_btl_base_tag_t tag;
 572     static ptl_event_t ev;
 573     unsigned int which;
 574     mca_btl_active_message_callback_t* reg;
 575     mca_btl_base_segment_t seg[2];
 576     mca_btl_base_descriptor_t btl_base_descriptor;
 577 
 578     while (true) {
 579         ret = PtlEQPoll(mca_btl_portals4_component.eqs_h, mca_btl_portals4_component.num_btls, 0, &ev, &which);
 580 
 581         if (PTL_OK == ret) {
 582             OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlEQPoll Event received: %d (fail=%d) on NI %d\n",
 583                 ev.type, ev.ni_fail_type, which));
 584             num_progressed++;
 585             portals4_btl = mca_btl_portals4_component.btls[which];
 586 
 587             switch (ev.type) {
 588 
 589             case PTL_EVENT_SEND:   /* generated on source (origin) when put stops sending */
 590 
 591                 frag = ev.user_ptr;
 592                 if (NULL == frag) {
 593                     opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_SEND event with NULL user_ptr");
 594                     break;
 595                 }
 596                 btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
 597 
 598                 if (!mca_btl_portals4_component.portals_need_ack) {
 599                     /* my part's done, in portals we trust! */
 600                     if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ){
 601                         OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 602                             "PTL_EVENT_SEND: Direct call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
 603                         frag->base.des_cbfunc(&portals4_btl->super,
 604                                               frag->endpoint,
 605                                               &frag->base,
 606                                               OPAL_SUCCESS);
 607                     }
 608                     if (btl_ownership) {
 609                         mca_btl_portals4_free(&portals4_btl->super, &frag->base);
 610                     }
 611                     if (0 != frag->size) {
 612                         OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
 613                         OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 614                             "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
 615                             portals4_btl->portals_outstanding_ops));
 616                     }
 617                 }
 618 
 619                 goto done;
 620                 break;
 621 
 622             case PTL_EVENT_ACK:   /* Ack that a put as completed on other side. We just call the callback function */
 623 
 624                 frag = ev.user_ptr;
 625                 if (NULL == frag) {
 626                     opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_ACK event with NULL user_ptr");
 627                     break;
 628                 }
 629                 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 630                     "PTL_EVENT_ACK received rlength=%ld mlength=%ld des_flags=%d\n", ev.rlength, ev.mlength, frag->base.des_flags));
 631                 btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
 632 
 633                 /* other side received the message.  should have
 634                    received entire thing */
 635                 /* let the PML know we're done */
 636                 if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
 637                     OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 638                         "PTL_EVENT_ACK: Call to des_cbfunc %lx\n", (uint64_t)frag->base.des_cbfunc));
 639                     frag->base.des_cbfunc(&portals4_btl->super,
 640                                           frag->endpoint,
 641                                           &frag->base,
 642                                           OPAL_SUCCESS);
 643                 }
 644                 if (btl_ownership) {
 645                     mca_btl_portals4_free(&portals4_btl->super, &frag->base);
 646                 }
 647 
 648                 if (0 != frag->size) {
 649                     OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
 650                     OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 651                         "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
 652                 }
 653 
 654                 goto done;
 655                 break;
 656 
 657             case PTL_EVENT_PUT:   /* Generated on destination (target) when a put into memory ends */
 658 
 659                 tag = (unsigned char) (ev.hdr_data);
 660 
 661                 btl_base_descriptor.des_segments = seg;
 662                 btl_base_descriptor.des_segment_count = 1;
 663                 seg[0].seg_addr.pval = ev.start;
 664                 seg[0].seg_len = ev.mlength;
 665 
 666                 reg = mca_btl_base_active_message_trigger + tag;
 667                 OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output,
 668                     "PTL_EVENT_PUT: tag=%x base_descriptor=%p cbfunc: %lx\n", tag, (void*)&btl_base_descriptor, (uint64_t)reg->cbfunc));
 669                 reg->cbfunc(&portals4_btl->super, tag, &btl_base_descriptor, reg->cbdata);
 670 
 671                 goto done;
 672                 break;
 673 
 674             case PTL_EVENT_PUT_OVERFLOW:
 675                 /* */
 676                 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 677                     "PTL_EVENT_OVERFLOW received\n"));
 678                 goto done;
 679                 break;
 680 
 681             case PTL_EVENT_LINK:
 682                 /* */
 683                 frag = ev.user_ptr;
 684                 if (NULL == frag) {
 685                     opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_LINK event with NULL user_ptr");
 686                     break;
 687                 }
 688                 goto done;
 689                 break;
 690 
 691             case PTL_EVENT_AUTO_UNLINK:
 692                 /* */
 693                 /* The Priority List is used, so PTL_EVENT_AUTO_FREE will never be received. So, we have to reactivate the block here */
 694                 mca_btl_portals4_activate_block(ev.user_ptr);
 695                 goto done;
 696                 break;
 697 
 698             case PTL_EVENT_AUTO_FREE:
 699                 /* */
 700                 goto done;
 701                 break;
 702 
 703             case PTL_EVENT_GET:   /* Generated on source (target) when a get from memory ends */
 704                 /* */
 705                 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 706                     "PTL_EVENT_GET received at target rlength=%ld mlength=%ld\n", ev.rlength, ev.mlength));
 707                 goto done;
 708                 break;
 709 
 710             case PTL_EVENT_REPLY:
 711                 /* */
 712                 frag = ev.user_ptr;
 713 
 714                 if (PTL_NI_PERM_VIOLATION == ev.ni_fail_type) {
 715                         opal_output_verbose(1, opal_btl_base_framework.framework_output,
 716                             "Warning : PTL_EVENT_REPLY with PTL_NI_PERM_VIOLATION received, try to re-issue a PtlGet");
 717 
 718                     /* The distant PtlMEAppend is not finished (distant PTL_EVENT_LINK not received) */
 719                     /* Re-issue the PtlGet (see btl_portals4_rdma.c) */
 720                     ret = PtlGet(portals4_btl->send_md_h,
 721                                  (ptl_size_t) frag->addr,
 722                                  frag->length,
 723                                  frag->peer_proc,
 724                                  portals4_btl->recv_idx,
 725                                  frag->match_bits, /* match bits */
 726                                  0, // Warning : should be  ev.remote_offset but it is not defined,
 727                                  frag);
 728                     if (OPAL_UNLIKELY(PTL_OK != ret)) {
 729                         opal_output_verbose(1, opal_btl_base_framework.framework_output,
 730                                             "%s:%d: Re-issued PtlGet failed: %d",
 731                                             __FILE__, __LINE__, ret);
 732                         return OPAL_ERROR;
 733                     }
 734 
 735                     OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 736                         "Re-issued PtlGet length=%ld recv_idx=%d rank=%x pid=%x nid=%x match_bits=%lx\n",
 737                         frag->length, portals4_btl->recv_idx,
 738                         frag->peer_proc.rank, frag->peer_proc.phys.pid, frag->peer_proc.phys.nid, frag->match_bits));
 739                 }
 740                 else {
 741                     OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 742                         "PTL_EVENT_REPLY: Call to rdma_cbfunc=%lx\n", (uint64_t)frag->rdma_cb.func));
 743                     frag->rdma_cb.func(&portals4_btl->super,
 744                                  frag->endpoint,
 745                                  ev.start,
 746                                  frag->rdma_cb.local_handle,
 747                                  frag->rdma_cb.context,
 748                                  frag->rdma_cb.data,
 749                                  OPAL_SUCCESS);
 750 
 751                     OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
 752                     OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
 753                     OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
 754                         "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
 755                     goto done;
 756                 }
 757                 break;
 758 
 759             default:
 760                 /* */
 761                 goto done;
 762                 break;
 763             }
 764         } else if (PTL_EQ_EMPTY == ret) {
 765             /* there's nothing in the queue.  This is actually the
 766                common case, so the easiest way to make the compiler
 767                emit something that doesn't completely blow here is to
 768                just go back to a good old goto */
 769             goto done;
 770             break;
 771 
 772         } else if (PTL_EQ_DROPPED == ret) {
 773             opal_output(opal_btl_base_framework.framework_output,
 774                         "Flow control situation without recovery (EQ_DROPPED)");
 775             break;
 776         } else {
 777             opal_output(opal_btl_base_framework.framework_output,
 778                         "Error returned from PtlEQPoll: %d", ret);
 779             break;
 780         }
 781     }
 782  done:
 783     return num_progressed;
 784 }

/* [<][>][^][v][top][bottom][index][help] */