root/ompi/mca/pml/ob1/pml_ob1.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. mca_pml_ob1_enable
  2. mca_pml_ob1_add_comm
  3. mca_pml_ob1_del_comm
  4. mca_pml_ob1_add_procs
  5. mca_pml_ob1_del_procs
  6. mca_pml_ob1_dump_hdr
  7. mca_pml_ob1_dump_frag_list
  8. mca_pml_ob1_dump_cant_match
  9. mca_pml_ob1_dump
  10. mca_pml_ob1_fin_completion
  11. mca_pml_ob1_send_fin
  12. mca_pml_ob1_process_pending_packets
  13. mca_pml_ob1_process_pending_rdma
  14. mca_pml_ob1_error_handler
  15. mca_pml_ob1_ft_event
  16. mca_pml_ob1_ft_event
  17. mca_pml_ob1_com_btl_comp

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2018 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
  14  * Copyright (c) 2006-2008 University of Houston.  All rights reserved.
  15  * Copyright (c) 2009-2010 Oracle and/or its affiliates.  All rights reserved
  16  * Copyright (c) 2011      Sandia National Laboratories. All rights reserved.
  17  * Copyright (c) 2011-2015 Los Alamos National Security, LLC. All rights
  18  *                         reserved.
  19  * Copyright (c) 2012      Cisco Systems, Inc.  All rights reserved.
  20  * Copyright (c) 2015      FUJITSU LIMITED.  All rights reserved.
  21  * Copyright (c) 2018      Sandia National Laboratories
  22  *                         All rights reserved.
  23  * $COPYRIGHT$
  24  *
  25  * Additional copyrights may follow
  26  *
  27  * $HEADER$
  28  */
  29 
  30 #include "ompi_config.h"
  31 
  32 #include <stdlib.h>
  33 #include <string.h>
  34 
  35 #include "opal/class/opal_bitmap.h"
  36 #include "opal/util/output.h"
  37 #include "opal/util/show_help.h"
  38 #include "opal_stdint.h"
  39 #include "opal/mca/btl/btl.h"
  40 #include "opal/mca/btl/base/base.h"
  41 
  42 #include "ompi/mca/pml/pml.h"
  43 #include "ompi/mca/pml/base/base.h"
  44 #include "ompi/mca/pml/base/base.h"
  45 #include "ompi/mca/bml/base/base.h"
  46 #include "opal/mca/pmix/pmix.h"
  47 #include "ompi/runtime/ompi_cr.h"
  48 
  49 #include "pml_ob1.h"
  50 #include "pml_ob1_component.h"
  51 #include "pml_ob1_comm.h"
  52 #include "pml_ob1_hdr.h"
  53 #include "pml_ob1_recvfrag.h"
  54 #include "pml_ob1_sendreq.h"
  55 #include "pml_ob1_recvreq.h"
  56 #include "pml_ob1_rdmafrag.h"
  57 
  58 mca_pml_ob1_t mca_pml_ob1 = {
  59     {
  60         mca_pml_ob1_add_procs,
  61         mca_pml_ob1_del_procs,
  62         mca_pml_ob1_enable,
  63         NULL,  /* mca_pml_ob1_progress, */
  64         mca_pml_ob1_add_comm,
  65         mca_pml_ob1_del_comm,
  66         mca_pml_ob1_irecv_init,
  67         mca_pml_ob1_irecv,
  68         mca_pml_ob1_recv,
  69         mca_pml_ob1_isend_init,
  70         mca_pml_ob1_isend,
  71         mca_pml_ob1_send,
  72         mca_pml_ob1_iprobe,
  73         mca_pml_ob1_probe,
  74         mca_pml_ob1_start,
  75         mca_pml_ob1_improbe,
  76         mca_pml_ob1_mprobe,
  77         mca_pml_ob1_imrecv,
  78         mca_pml_ob1_mrecv,
  79         mca_pml_ob1_dump,
  80         mca_pml_ob1_ft_event,
  81         65535,
  82         INT_MAX
  83     }
  84 };
  85 
  86 #if OPAL_CUDA_SUPPORT
  87 extern void mca_pml_ob1_cuda_add_ipc_support(struct mca_btl_base_module_t* btl,
  88                                              int32_t flags, ompi_proc_t* errproc,
  89                                              char* btlinfo);
  90 #endif /* OPAL_CUDA_SUPPORT */
  91 
  92 void mca_pml_ob1_error_handler( struct mca_btl_base_module_t* btl,
  93                                 int32_t flags, opal_proc_t* errproc,
  94                                 char* btlinfo );
  95 
  96 int mca_pml_ob1_enable(bool enable)
  97 {
  98     if( false == enable ) {
  99         return OMPI_SUCCESS;
 100     }
 101 
 102     OBJ_CONSTRUCT(&mca_pml_ob1.lock, opal_mutex_t);
 103 
 104     /* fragments */
 105     OBJ_CONSTRUCT(&mca_pml_ob1.rdma_frags, opal_free_list_t);
 106     opal_free_list_init ( &mca_pml_ob1.rdma_frags,
 107                           sizeof(mca_pml_ob1_rdma_frag_t),
 108                           opal_cache_line_size,
 109                           OBJ_CLASS(mca_pml_ob1_rdma_frag_t),
 110                           0,opal_cache_line_size,
 111                           mca_pml_ob1.free_list_num,
 112                           mca_pml_ob1.free_list_max,
 113                           mca_pml_ob1.free_list_inc,
 114                           NULL, 0, NULL, NULL, NULL);
 115 
 116     OBJ_CONSTRUCT(&mca_pml_ob1.recv_frags, opal_free_list_t);
 117 
 118     opal_free_list_init ( &mca_pml_ob1.recv_frags,
 119                           sizeof(mca_pml_ob1_recv_frag_t) + mca_pml_ob1.unexpected_limit,
 120                           opal_cache_line_size,
 121                           OBJ_CLASS(mca_pml_ob1_recv_frag_t),
 122                           0,opal_cache_line_size,
 123                           mca_pml_ob1.free_list_num,
 124                           mca_pml_ob1.free_list_max,
 125                           mca_pml_ob1.free_list_inc,
 126                           NULL, 0, NULL, NULL, NULL);
 127 
 128     OBJ_CONSTRUCT(&mca_pml_ob1.pending_pckts, opal_free_list_t);
 129     opal_free_list_init ( &mca_pml_ob1.pending_pckts,
 130                           sizeof(mca_pml_ob1_pckt_pending_t),
 131                           opal_cache_line_size,
 132                           OBJ_CLASS(mca_pml_ob1_pckt_pending_t),
 133                           0,opal_cache_line_size,
 134                           mca_pml_ob1.free_list_num,
 135                           mca_pml_ob1.free_list_max,
 136                           mca_pml_ob1.free_list_inc,
 137                           NULL, 0, NULL, NULL, NULL);
 138 
 139 
 140     OBJ_CONSTRUCT(&mca_pml_ob1.buffers, opal_free_list_t);
 141     OBJ_CONSTRUCT(&mca_pml_ob1.send_ranges, opal_free_list_t);
 142     opal_free_list_init ( &mca_pml_ob1.send_ranges,
 143                           sizeof(mca_pml_ob1_send_range_t) +
 144                           sizeof(mca_pml_ob1_com_btl_t[mca_pml_ob1.max_send_per_range]),
 145                           opal_cache_line_size,
 146                           OBJ_CLASS(mca_pml_ob1_send_range_t),
 147                           0,opal_cache_line_size,
 148                           mca_pml_ob1.free_list_num,
 149                           mca_pml_ob1.free_list_max,
 150                           mca_pml_ob1.free_list_inc,
 151                           NULL, 0, NULL, NULL, NULL);
 152 
 153     /* pending operations */
 154     OBJ_CONSTRUCT(&mca_pml_ob1.send_pending, opal_list_t);
 155     OBJ_CONSTRUCT(&mca_pml_ob1.recv_pending, opal_list_t);
 156     OBJ_CONSTRUCT(&mca_pml_ob1.pckt_pending, opal_list_t);
 157     OBJ_CONSTRUCT(&mca_pml_ob1.rdma_pending, opal_list_t);
 158 
 159     /* missing communicator pending list */
 160     OBJ_CONSTRUCT(&mca_pml_ob1.non_existing_communicator_pending, opal_list_t);
 161 
 162     /**
 163      * If we get here this is the PML who get selected for the run. We
 164      * should get ownership for the send and receive requests list, and
 165      * initialize them with the size of our own requests.
 166      */
 167     opal_free_list_init ( &mca_pml_base_send_requests,
 168                           sizeof(mca_pml_ob1_send_request_t) +
 169                           sizeof(mca_pml_ob1_com_btl_t[mca_pml_ob1.max_rdma_per_request]),
 170                           opal_cache_line_size,
 171                           OBJ_CLASS(mca_pml_ob1_send_request_t),
 172                           0,opal_cache_line_size,
 173                           mca_pml_ob1.free_list_num,
 174                           mca_pml_ob1.free_list_max,
 175                           mca_pml_ob1.free_list_inc,
 176                           NULL, 0, NULL, NULL, NULL);
 177 
 178     opal_free_list_init ( &mca_pml_base_recv_requests,
 179                           sizeof(mca_pml_ob1_recv_request_t) +
 180                           sizeof(mca_pml_ob1_com_btl_t[mca_pml_ob1.max_rdma_per_request]),
 181                           opal_cache_line_size,
 182                           OBJ_CLASS(mca_pml_ob1_recv_request_t),
 183                           0,opal_cache_line_size,
 184                           mca_pml_ob1.free_list_num,
 185                           mca_pml_ob1.free_list_max,
 186                           mca_pml_ob1.free_list_inc,
 187                           NULL, 0, NULL, NULL, NULL);
 188 
 189     mca_pml_ob1.enabled = true;
 190     return OMPI_SUCCESS;
 191 }
 192 
 193 int mca_pml_ob1_add_comm(ompi_communicator_t* comm)
 194 {
 195     /* allocate pml specific comm data */
 196     mca_pml_ob1_comm_t* pml_comm = OBJ_NEW(mca_pml_ob1_comm_t);
 197     mca_pml_ob1_recv_frag_t *frag, *next_frag;
 198     mca_pml_ob1_comm_proc_t* pml_proc;
 199     mca_pml_ob1_match_hdr_t* hdr;
 200 
 201     if (NULL == pml_comm) {
 202         return OMPI_ERR_OUT_OF_RESOURCE;
 203     }
 204 
 205     /* should never happen, but it was, so check */
 206     if (comm->c_contextid > mca_pml_ob1.super.pml_max_contextid) {
 207         OBJ_RELEASE(pml_comm);
 208         return OMPI_ERR_OUT_OF_RESOURCE;
 209     }
 210 
 211     ompi_comm_assert_subscribe (comm, OMPI_COMM_ASSERT_NO_ANY_SOURCE);
 212     ompi_comm_assert_subscribe (comm, OMPI_COMM_ASSERT_ALLOW_OVERTAKE);
 213 
 214     mca_pml_ob1_comm_init_size(pml_comm, comm->c_remote_group->grp_proc_count);
 215     comm->c_pml_comm = pml_comm;
 216 
 217     /* Grab all related messages from the non_existing_communicator pending queue */
 218     OPAL_LIST_FOREACH_SAFE(frag, next_frag, &mca_pml_ob1.non_existing_communicator_pending, mca_pml_ob1_recv_frag_t) {
 219         hdr = &frag->hdr.hdr_match;
 220 
 221         /* Is this fragment for the current communicator ? */
 222         if( frag->hdr.hdr_match.hdr_ctx != comm->c_contextid )
 223             continue;
 224 
 225         /* As we now know we work on a fragment for this communicator
 226          * we should remove it from the
 227          * non_existing_communicator_pending list. */
 228         opal_list_remove_item (&mca_pml_ob1.non_existing_communicator_pending,
 229                                (opal_list_item_t *) frag);
 230 
 231         /* We generate the MSG_ARRIVED event as soon as the PML is aware
 232          * of a matching fragment arrival. Independing if it is received
 233          * on the correct order or not. This will allow the tools to
 234          * figure out if the messages are not received in the correct
 235          * order (if multiple network interfaces).
 236          */
 237         PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_ARRIVED, comm,
 238                                hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
 239 
 240         /* There is no matching to be done, and no lock to be held on the communicator as
 241          * we know at this point that the communicator has not yet been returned to the user.
 242          * The only required protection is around the non_existing_communicator_pending queue.
 243          * We just have to push the fragment into the unexpected list of the corresponding
 244          * proc, or into the out-of-order (cant_match) list.
 245          */
 246         pml_proc = mca_pml_ob1_peer_lookup(comm, hdr->hdr_src);
 247 
 248         if (OMPI_COMM_CHECK_ASSERT_ALLOW_OVERTAKE(comm)) {
 249 #if !MCA_PML_OB1_CUSTOM_MATCH
 250             opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag );
 251 #else
 252             custom_match_umq_append(pml_comm->umq, hdr->hdr_tag, hdr->hdr_src, frag);
 253 #endif
 254             PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm,
 255                                    hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
 256             continue;
 257         }
 258 
 259         if (((uint16_t)hdr->hdr_seq) == ((uint16_t)pml_proc->expected_sequence) ) {
 260 
 261         add_fragment_to_unexpected:
 262             /* We're now expecting the next sequence number. */
 263             pml_proc->expected_sequence++;
 264 #if !MCA_PML_OB1_CUSTOM_MATCH
 265             opal_list_append( &pml_proc->unexpected_frags, (opal_list_item_t*)frag );
 266 #else
 267             custom_match_umq_append(pml_comm->umq, hdr->hdr_tag, hdr->hdr_src, frag);
 268 #endif
 269             PERUSE_TRACE_MSG_EVENT(PERUSE_COMM_MSG_INSERT_IN_UNEX_Q, comm,
 270                                    hdr->hdr_src, hdr->hdr_tag, PERUSE_RECV);
 271             /* And now the ugly part. As some fragments can be inserted in the cant_match list,
 272              * every time we succesfully add a fragment in the unexpected list we have to make
 273              * sure the next one is not in the cant_match. Otherwise, we will endup in a deadlock
 274              * situation as the cant_match is only checked when a new fragment is received from
 275              * the network.
 276              */
 277             if( NULL != pml_proc->frags_cant_match ) {
 278                 frag = check_cantmatch_for_match(pml_proc);
 279                 if( NULL != frag ) {
 280                     hdr = &frag->hdr.hdr_match;
 281                     goto add_fragment_to_unexpected;
 282                 }
 283             }
 284         } else {
 285             append_frag_to_ordered_list(&pml_proc->frags_cant_match, frag,
 286                                         pml_proc->expected_sequence);
 287         }
 288     }
 289     return OMPI_SUCCESS;
 290 }
 291 
 292 int mca_pml_ob1_del_comm(ompi_communicator_t* comm)
 293 {
 294     OBJ_RELEASE(comm->c_pml_comm);
 295     comm->c_pml_comm = NULL;
 296     return OMPI_SUCCESS;
 297 }
 298 
 299 
 300 /*
 301  *   For each proc setup a datastructure that indicates the BTLs
 302  *   that can be used to reach the destination.
 303  *
 304  */
 305 
 306 int mca_pml_ob1_add_procs(ompi_proc_t** procs, size_t nprocs)
 307 {
 308     mca_btl_base_selected_module_t *sm;
 309     opal_bitmap_t reachable;
 310     int rc;
 311 
 312     if(nprocs == 0)
 313         return OMPI_SUCCESS;
 314 
 315     OBJ_CONSTRUCT(&reachable, opal_bitmap_t);
 316     rc = opal_bitmap_init(&reachable, (int)nprocs);
 317     if(OMPI_SUCCESS != rc)
 318         return rc;
 319 
 320     /*
 321      * JJH: Disable this in FT enabled builds since
 322      * we use a wrapper PML. It will cause this check to
 323      * return failure as all processes will return the wrapper PML
 324      * component in use instead of the wrapped PML component underneath.
 325      */
 326 #if OPAL_ENABLE_FT_CR == 0
 327     /* make sure remote procs are using the same PML as us */
 328     if (OMPI_SUCCESS != (rc = mca_pml_base_pml_check_selected("ob1",
 329                                                               procs,
 330                                                               nprocs))) {
 331         return rc;
 332     }
 333 #endif
 334 
 335     rc = mca_bml.bml_add_procs( nprocs,
 336                                 procs,
 337                                 &reachable );
 338     if(OMPI_SUCCESS != rc)
 339         goto cleanup_and_return;
 340 
 341     /* Check that values supplied by all initialized btls will work
 342        for us.  Note that this is the list of all initialized BTLs,
 343        not the ones used for the just added procs.  This is a little
 344        overkill and inaccurate, as we may end up not using the BTL in
 345        question and all add_procs calls after the first one are
 346        duplicating an already completed check.  But the final
 347        initialization of the PML occurs before the final
 348        initialization of the BTLs, and iterating through the in-use
 349        BTLs requires iterating over the procs, as the BML does not
 350        expose all currently in use btls. */
 351 
 352     OPAL_LIST_FOREACH(sm, &mca_btl_base_modules_initialized, mca_btl_base_selected_module_t) {
 353         if ((MCA_BTL_FLAGS_SEND & sm->btl_module->btl_flags) && sm->btl_module->btl_eager_limit < sizeof(mca_pml_ob1_hdr_t)) {
 354             opal_show_help("help-mpi-pml-ob1.txt", "eager_limit_too_small",
 355                            true,
 356                            sm->btl_component->btl_version.mca_component_name,
 357                            ompi_process_info.nodename,
 358                            sm->btl_component->btl_version.mca_component_name,
 359                            sm->btl_module->btl_eager_limit,
 360                            sm->btl_component->btl_version.mca_component_name,
 361                            sizeof(mca_pml_ob1_hdr_t),
 362                            sm->btl_component->btl_version.mca_component_name);
 363             rc = OMPI_ERR_BAD_PARAM;
 364             goto cleanup_and_return;
 365         }
 366 #if OPAL_CUDA_GDR_SUPPORT
 367         /* If size is SIZE_MAX, then we know we want to set this to the minimum possible
 368          * value which is the size of the PML header. */
 369         if (SIZE_MAX == sm->btl_module->btl_cuda_eager_limit) {
 370             sm->btl_module->btl_cuda_eager_limit = sizeof(mca_pml_ob1_hdr_t);
 371         }
 372         /* If size is 0, then this value is unused.  If it is non-zero then do some
 373          * extra checking of it. */
 374         if (0 != sm->btl_module->btl_cuda_eager_limit) {
 375             if (sm->btl_module->btl_cuda_eager_limit < sizeof(mca_pml_ob1_hdr_t)) {
 376                 opal_show_help("help-mpi-pml-ob1.txt", "cuda_eager_limit_too_small",
 377                                true,
 378                                sm->btl_component->btl_version.mca_component_name,
 379                                ompi_process_info.nodename,
 380                                sm->btl_component->btl_version.mca_component_name,
 381                                sm->btl_module->btl_cuda_eager_limit,
 382                                sm->btl_component->btl_version.mca_component_name,
 383                                sizeof(mca_pml_ob1_hdr_t),
 384                                sm->btl_component->btl_version.mca_component_name);
 385                 rc = OMPI_ERR_BAD_PARAM;
 386                 goto cleanup_and_return;
 387             }
 388         }
 389         if (0 == sm->btl_module->btl_cuda_rdma_limit) {
 390             /* All is fine.  0 means to ignore value so set to SIZE_MAX */
 391             sm->btl_module->btl_cuda_rdma_limit = SIZE_MAX;
 392         } else {
 393             if (sm->btl_module->btl_cuda_rdma_limit < sm->btl_module->btl_cuda_eager_limit) {
 394                 opal_show_help("help-mpi-pml-ob1.txt", "cuda_rdma_limit_too_small",
 395                                true,
 396                                sm->btl_component->btl_version.mca_component_name,
 397                                ompi_process_info.nodename,
 398                                sm->btl_component->btl_version.mca_component_name,
 399                                sm->btl_module->btl_cuda_rdma_limit,
 400                                sm->btl_component->btl_version.mca_component_name,
 401                                sm->btl_module->btl_cuda_eager_limit,
 402                                sm->btl_component->btl_version.mca_component_name);
 403                 rc = OMPI_ERR_BAD_PARAM;
 404                 goto cleanup_and_return;
 405             }
 406         }
 407 #endif /* OPAL_CUDA_GDR_SUPPORT */
 408     }
 409 
 410 
 411     /* TODO: Move these callback registration to another place */
 412     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_MATCH,
 413                                mca_pml_ob1_recv_frag_callback_match,
 414                                NULL );
 415     if(OMPI_SUCCESS != rc)
 416         goto cleanup_and_return;
 417 
 418     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RNDV,
 419                                mca_pml_ob1_recv_frag_callback_rndv,
 420                                NULL );
 421     if(OMPI_SUCCESS != rc)
 422         goto cleanup_and_return;
 423 
 424     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_RGET,
 425                                mca_pml_ob1_recv_frag_callback_rget,
 426                                NULL );
 427     if(OMPI_SUCCESS != rc)
 428         goto cleanup_and_return;
 429 
 430     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_ACK,
 431                                mca_pml_ob1_recv_frag_callback_ack,
 432                                NULL );
 433     if(OMPI_SUCCESS != rc)
 434         goto cleanup_and_return;
 435 
 436     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FRAG,
 437                                mca_pml_ob1_recv_frag_callback_frag,
 438                                NULL );
 439     if(OMPI_SUCCESS != rc)
 440         goto cleanup_and_return;
 441 
 442     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_PUT,
 443                                mca_pml_ob1_recv_frag_callback_put,
 444                                NULL );
 445     if(OMPI_SUCCESS != rc)
 446         goto cleanup_and_return;
 447 
 448     rc = mca_bml.bml_register( MCA_PML_OB1_HDR_TYPE_FIN,
 449                                mca_pml_ob1_recv_frag_callback_fin,
 450                                NULL );
 451     if(OMPI_SUCCESS != rc)
 452         goto cleanup_and_return;
 453 
 454     /* register error handlers */
 455     rc = mca_bml.bml_register_error(mca_pml_ob1_error_handler);
 456     if(OMPI_SUCCESS != rc)
 457         goto cleanup_and_return;
 458 
 459   cleanup_and_return:
 460     OBJ_DESTRUCT(&reachable);
 461 
 462     return rc;
 463 }
 464 
 465 /*
 466  * iterate through each proc and notify any PTLs associated
 467  * with the proc that it is/has gone away
 468  */
 469 
 470 int mca_pml_ob1_del_procs(ompi_proc_t** procs, size_t nprocs)
 471 {
 472     return mca_bml.bml_del_procs(nprocs, procs);
 473 }
 474 
 475 /*
 476  * diagnostics
 477  */
 478 
 479 static void mca_pml_ob1_dump_hdr(mca_pml_ob1_hdr_t* hdr)
 480 {
 481     char *type, header[128];
 482 
 483     switch(hdr->hdr_common.hdr_type) {
 484     case MCA_PML_OB1_HDR_TYPE_MATCH:
 485         type = "MATCH";
 486         snprintf( header, 128, "ctx %5d src %d tag %d seq %d",
 487                   hdr->hdr_match.hdr_ctx, hdr->hdr_match.hdr_src,
 488                   hdr->hdr_match.hdr_tag, hdr->hdr_match.hdr_seq);
 489         break;
 490     case MCA_PML_OB1_HDR_TYPE_RNDV:
 491         type = "RNDV";
 492         snprintf( header, 128, "ctx %5d src %d tag %d seq %d msg_length %" PRIu64,
 493                   hdr->hdr_rndv.hdr_match.hdr_ctx, hdr->hdr_rndv.hdr_match.hdr_src,
 494                   hdr->hdr_rndv.hdr_match.hdr_tag, hdr->hdr_rndv.hdr_match.hdr_seq,
 495                   hdr->hdr_rndv.hdr_msg_length);
 496         break;
 497     case MCA_PML_OB1_HDR_TYPE_RGET:
 498         type = "RGET";
 499         snprintf( header, 128, "ctx %5d src %d tag %d seq %d msg_length %" PRIu64
 500                   "frag %" PRIu64 " src_ptr %" PRIu64,
 501                   hdr->hdr_rndv.hdr_match.hdr_ctx, hdr->hdr_rndv.hdr_match.hdr_src,
 502                   hdr->hdr_rndv.hdr_match.hdr_tag, hdr->hdr_rndv.hdr_match.hdr_seq,
 503                   hdr->hdr_rndv.hdr_msg_length, hdr->hdr_rget.hdr_frag.lval,
 504                   hdr->hdr_rget.hdr_src_ptr);
 505         break;
 506     case MCA_PML_OB1_HDR_TYPE_ACK:
 507         type = "ACK";
 508         snprintf( header, 128, "src_req %p dst_req %p offset %" PRIu64 " size %" PRIu64,
 509                   hdr->hdr_ack.hdr_src_req.pval, hdr->hdr_ack.hdr_dst_req.pval,
 510                   hdr->hdr_ack.hdr_send_offset, hdr->hdr_ack.hdr_send_size);
 511         break;
 512     case MCA_PML_OB1_HDR_TYPE_FRAG:
 513         type = "FRAG";
 514         snprintf( header, 128, "offset %" PRIu64 " src_req %p dst_req %p",
 515                   hdr->hdr_frag.hdr_frag_offset,
 516                   hdr->hdr_frag.hdr_src_req.pval, hdr->hdr_frag.hdr_dst_req.pval);
 517         break;
 518     case MCA_PML_OB1_HDR_TYPE_PUT:
 519         type = "PUT";
 520         snprintf( header, 128, "dst_req %p src_frag %p recv_req %p offset %" PRIu64
 521                   " dst_ptr %" PRIu64 " dst_size %" PRIu64,
 522                   hdr->hdr_rdma.hdr_req.pval, hdr->hdr_rdma.hdr_frag.pval,
 523                   hdr->hdr_rdma.hdr_recv_req.pval, hdr->hdr_rdma.hdr_rdma_offset,
 524                   hdr->hdr_rdma.hdr_dst_ptr, hdr->hdr_rdma.hdr_dst_size);
 525         break;
 526     case MCA_PML_OB1_HDR_TYPE_FIN:
 527         type = "FIN";
 528         header[0] = '\0';
 529         break;
 530     default:
 531         type = "UNKWN";
 532         header[0] = '\0';
 533         break;
 534     }
 535     opal_output(0,"hdr %s [%s] %s", type,
 536                 (hdr->hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NBO ? "nbo" : "   "),
 537                 header);
 538 }
 539 
 540 #if !MCA_PML_OB1_CUSTOM_MATCH
 541 static void mca_pml_ob1_dump_frag_list(opal_list_t* queue, bool is_req)
 542 {
 543     opal_list_item_t* item;
 544     char cpeer[64], ctag[64];
 545 
 546     for( item = opal_list_get_first(queue);
 547          item != opal_list_get_end(queue);
 548          item =  opal_list_get_next(item) ) {
 549 
 550         if( is_req ) {
 551             mca_pml_base_request_t *req = &(((mca_pml_ob1_recv_request_t*)item)->req_recv.req_base);
 552 
 553             if( OMPI_ANY_SOURCE == req->req_peer ) snprintf(cpeer, 64, "%s", "ANY_SOURCE");
 554             else snprintf(cpeer, 64, "%d", req->req_peer);
 555 
 556             if( OMPI_ANY_TAG == req->req_tag ) snprintf(ctag, 64, "%s", "ANY_TAG");
 557             else snprintf(ctag, 64, "%d", req->req_tag);
 558 
 559             opal_output(0, "req %p peer %s tag %s addr %p count %lu datatype %s [%p] [%s %s] req_seq %" PRIu64,
 560                         (void*) req, cpeer, ctag,
 561                         (void*) req->req_addr, req->req_count,
 562                         (0 != req->req_count ? req->req_datatype->name : "N/A"),
 563                         (void*) req->req_datatype,
 564                         (req->req_pml_complete ? "pml_complete" : ""),
 565                         (req->req_free_called ? "freed" : ""),
 566                         req->req_sequence);
 567         } else {
 568             mca_pml_ob1_recv_frag_t* frag = (mca_pml_ob1_recv_frag_t*)item;
 569             mca_pml_ob1_dump_hdr( &frag->hdr );
 570         }
 571     }
 572 }
 573 #endif
 574 
 575 void mca_pml_ob1_dump_cant_match(mca_pml_ob1_recv_frag_t* queue)
 576 {
 577     mca_pml_ob1_recv_frag_t* item = queue;
 578 
 579     do {
 580         mca_pml_ob1_dump_hdr( &item->hdr );
 581         if( NULL != item->range ) {
 582             mca_pml_ob1_recv_frag_t* frag = item->range;
 583             do {
 584                 mca_pml_ob1_dump_hdr( &frag->hdr );
 585                 frag = (mca_pml_ob1_recv_frag_t*)frag->super.super.opal_list_next;
 586             } while( frag != item->range );
 587         }
 588         item = (mca_pml_ob1_recv_frag_t*)item->super.super.opal_list_next;
 589     } while( item != queue );
 590 }
 591 
 592 int mca_pml_ob1_dump(struct ompi_communicator_t* comm, int verbose)
 593 {
 594     struct mca_pml_comm_t* pml_comm = comm->c_pml_comm;
 595     int i;
 596 
 597     /* TODO: don't forget to dump mca_pml_ob1.non_existing_communicator_pending */
 598 
 599     opal_output(0, "Communicator %s [%p](%d) rank %d recv_seq %d num_procs %lu last_probed %lu\n",
 600                 comm->c_name, (void*) comm, comm->c_contextid, comm->c_my_rank,
 601                 pml_comm->recv_sequence, pml_comm->num_procs, pml_comm->last_probed);
 602 
 603 #if !MCA_PML_OB1_CUSTOM_MATCH
 604     if( opal_list_get_size(&pml_comm->wild_receives) ) {
 605         opal_output(0, "expected MPI_ANY_SOURCE fragments\n");
 606         mca_pml_ob1_dump_frag_list(&pml_comm->wild_receives, true);
 607     }
 608 #endif
 609 
 610 #if MCA_PML_OB1_CUSTOM_MATCH
 611      opal_output(0, "expected receives\n");
 612      custom_match_prq_dump(pml_comm->prq);
 613      opal_output(0, "unexpected frag\n");
 614      custom_match_umq_dump(pml_comm->umq);
 615 #endif
 616 
 617     /* iterate through all procs on communicator */
 618     for( i = 0; i < (int)pml_comm->num_procs; i++ ) {
 619         mca_pml_ob1_comm_proc_t* proc = pml_comm->procs[i];
 620 
 621         if (NULL == proc) {
 622             continue;
 623         }
 624 
 625         mca_bml_base_endpoint_t* ep = mca_bml_base_get_endpoint(proc->ompi_proc);
 626         size_t n;
 627 
 628         opal_output(0, "[Rank %d] expected_seq %d ompi_proc %p send_seq %d\n",
 629                     i, proc->expected_sequence, (void*) proc->ompi_proc,
 630                     proc->send_sequence);
 631 
 632         /* dump all receive queues */
 633 #if !MCA_PML_OB1_CUSTOM_MATCH
 634        if( opal_list_get_size(&proc->specific_receives) ) {
 635             opal_output(0, "expected specific receives\n");
 636             mca_pml_ob1_dump_frag_list(&proc->specific_receives, true);
 637         }
 638 #endif
 639         if( NULL != proc->frags_cant_match ) {
 640             opal_output(0, "out of sequence\n");
 641             mca_pml_ob1_dump_cant_match(proc->frags_cant_match);
 642         }
 643 #if !MCA_PML_OB1_CUSTOM_MATCH
 644         if( opal_list_get_size(&proc->unexpected_frags) ) {
 645             opal_output(0, "unexpected frag\n");
 646             mca_pml_ob1_dump_frag_list(&proc->unexpected_frags, false);
 647         }
 648 #endif
 649         /* dump all btls used for eager messages */
 650         for( n = 0; n < ep->btl_eager.arr_size; n++ ) {
 651             mca_bml_base_btl_t* bml_btl = &ep->btl_eager.bml_btls[n];
 652             bml_btl->btl->btl_dump(bml_btl->btl, bml_btl->btl_endpoint, verbose);
 653         }
 654     }
 655     return OMPI_SUCCESS;
 656 }
 657 
 658 static void mca_pml_ob1_fin_completion( mca_btl_base_module_t* btl,
 659                                         struct mca_btl_base_endpoint_t* ep,
 660                                         struct mca_btl_base_descriptor_t* des,
 661                                         int status )
 662 {
 663 
 664     mca_bml_base_btl_t* bml_btl = (mca_bml_base_btl_t*) des->des_context;
 665 
 666     /* check for pending requests */
 667     MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
 668 }
 669 
 670 /**
 671  * Send an FIN to the peer. If we fail to send this ack (no more available
 672  * fragments or the send failed) this function automatically add the FIN
 673  * to the list of pending FIN, Which guarantee that the FIN will be sent
 674  * later.
 675  */
 676 int mca_pml_ob1_send_fin( ompi_proc_t* proc,
 677                           mca_bml_base_btl_t* bml_btl,
 678                           opal_ptr_t hdr_frag,
 679                           uint64_t rdma_size,
 680                           uint8_t order,
 681                           int status )
 682 {
 683     mca_btl_base_descriptor_t* fin;
 684     int rc;
 685 
 686     mca_bml_base_alloc(bml_btl, &fin, order, sizeof(mca_pml_ob1_fin_hdr_t),
 687                        MCA_BTL_DES_FLAGS_PRIORITY | MCA_BTL_DES_FLAGS_BTL_OWNERSHIP | MCA_BTL_DES_FLAGS_SIGNAL);
 688 
 689     if(NULL == fin) {
 690         MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status);
 691         return OMPI_ERR_OUT_OF_RESOURCE;
 692     }
 693     fin->des_cbfunc = mca_pml_ob1_fin_completion;
 694     fin->des_cbdata = NULL;
 695 
 696     /* fill in header */
 697     mca_pml_ob1_fin_hdr_prepare ((mca_pml_ob1_fin_hdr_t *) fin->des_segments->seg_addr.pval,
 698                                  0, hdr_frag.lval, status ? status : (int64_t) rdma_size);
 699 
 700     ob1_hdr_hton((mca_pml_ob1_hdr_t *) fin->des_segments->seg_addr.pval, MCA_PML_OB1_HDR_TYPE_FIN, proc);
 701 
 702     /* queue request */
 703     rc = mca_bml_base_send( bml_btl, fin, MCA_PML_OB1_HDR_TYPE_FIN );
 704     if( OPAL_LIKELY( rc >= 0 ) ) {
 705         if( OPAL_LIKELY( 1 == rc ) ) {
 706             MCA_PML_OB1_PROGRESS_PENDING(bml_btl);
 707         }
 708         return OMPI_SUCCESS;
 709     }
 710     mca_bml_base_free(bml_btl, fin);
 711     MCA_PML_OB1_ADD_FIN_TO_PENDING(proc, hdr_frag, rdma_size, bml_btl, order, status);
 712     return OMPI_ERR_OUT_OF_RESOURCE;
 713 }
 714 
 715 void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl)
 716 {
 717     mca_pml_ob1_pckt_pending_t *pckt;
 718     int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_ob1.pckt_pending);
 719 
 720     for(i = 0; i < s; i++) {
 721         mca_bml_base_btl_t *send_dst = NULL;
 722         OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
 723         pckt = (mca_pml_ob1_pckt_pending_t*)
 724             opal_list_remove_first(&mca_pml_ob1.pckt_pending);
 725         OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
 726         if(NULL == pckt)
 727             break;
 728         if(pckt->bml_btl != NULL &&
 729                 pckt->bml_btl->btl == bml_btl->btl) {
 730             send_dst = pckt->bml_btl;
 731         } else {
 732             mca_bml_base_endpoint_t* endpoint =
 733                 (mca_bml_base_endpoint_t*) pckt->proc->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML];
 734             send_dst = mca_bml_base_btl_array_find(
 735                     &endpoint->btl_eager, bml_btl->btl);
 736         }
 737         if(NULL == send_dst) {
 738             OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
 739             opal_list_append(&mca_pml_ob1.pckt_pending,
 740                              (opal_list_item_t*)pckt);
 741             OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
 742             continue;
 743         }
 744 
 745         switch(pckt->hdr.hdr_common.hdr_type) {
 746             case MCA_PML_OB1_HDR_TYPE_ACK:
 747                 rc = mca_pml_ob1_recv_request_ack_send_btl(pckt->proc,
 748                         send_dst,
 749                         pckt->hdr.hdr_ack.hdr_src_req.lval,
 750                         pckt->hdr.hdr_ack.hdr_dst_req.pval,
 751                         pckt->hdr.hdr_ack.hdr_send_offset,
 752                         pckt->hdr.hdr_ack.hdr_send_size,
 753                         pckt->hdr.hdr_common.hdr_flags & MCA_PML_OB1_HDR_FLAGS_NORDMA);
 754                 if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
 755                     OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
 756                     opal_list_append(&mca_pml_ob1.pckt_pending,
 757                                      (opal_list_item_t*)pckt);
 758                     OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
 759                     return;
 760                 }
 761                 break;
 762             case MCA_PML_OB1_HDR_TYPE_FIN:
 763                 rc = mca_pml_ob1_send_fin(pckt->proc, send_dst,
 764                                           pckt->hdr.hdr_fin.hdr_frag,
 765                                           pckt->hdr.hdr_fin.hdr_size,
 766                                           pckt->order,
 767                                           pckt->status);
 768                 if( OPAL_UNLIKELY(OMPI_ERR_OUT_OF_RESOURCE == rc) ) {
 769                     MCA_PML_OB1_PCKT_PENDING_RETURN(pckt);
 770                     return;
 771                 }
 772                 break;
 773             default:
 774                 opal_output(0, "[%s:%d] wrong header type\n",
 775                             __FILE__, __LINE__);
 776                 break;
 777         }
 778         /* We're done with this packet, return it back to the free list */
 779         MCA_PML_OB1_PCKT_PENDING_RETURN(pckt);
 780     }
 781 }
 782 
 783 void mca_pml_ob1_process_pending_rdma(void)
 784 {
 785     mca_pml_ob1_rdma_frag_t* frag;
 786     int32_t i, rc, s = (int32_t)opal_list_get_size(&mca_pml_ob1.rdma_pending);
 787 
 788     for(i = 0; i < s; i++) {
 789         OPAL_THREAD_LOCK(&mca_pml_ob1.lock);
 790         frag = (mca_pml_ob1_rdma_frag_t*)
 791             opal_list_remove_first(&mca_pml_ob1.rdma_pending);
 792         OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);
 793         if(NULL == frag)
 794             break;
 795 
 796         frag->retries++;
 797 
 798         if(frag->rdma_state == MCA_PML_OB1_RDMA_PUT) {
 799             rc = mca_pml_ob1_send_request_put_frag(frag);
 800         } else {
 801             rc = mca_pml_ob1_recv_request_get_frag(frag);
 802         }
 803         if(OMPI_ERR_OUT_OF_RESOURCE == rc)
 804             break;
 805     }
 806 }
 807 
 808 
 809 void mca_pml_ob1_error_handler(
 810         struct mca_btl_base_module_t* btl, int32_t flags,
 811         opal_proc_t* errproc, char* btlinfo ) {
 812 #if OPAL_CUDA_SUPPORT
 813     if (flags & MCA_BTL_ERROR_FLAGS_ADD_CUDA_IPC) {
 814         mca_pml_ob1_cuda_add_ipc_support(btl, flags, (struct ompi_proc_t*)errproc, btlinfo);
 815         return;
 816     }
 817 #endif /* OPAL_CUDA_SUPPORT */
 818     ompi_rte_abort(-1, btlinfo);
 819 }
 820 
 821 #if OPAL_ENABLE_FT_CR    == 0
 822 int mca_pml_ob1_ft_event( int state ) {
 823     return OMPI_SUCCESS;
 824 }
 825 #else
 826 int mca_pml_ob1_ft_event( int state )
 827 {
 828     static bool first_continue_pass = false;
 829     ompi_proc_t** procs = NULL;
 830     size_t num_procs;
 831     int ret, p;
 832 
 833     if(OPAL_CRS_CHECKPOINT == state) {
 834         if( opal_cr_timing_barrier_enabled ) {
 835             OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCPBR1);
 836             if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 837                 opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
 838                 return ret;
 839             }
 840         }
 841 
 842         OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P0);
 843     }
 844     else if(OPAL_CRS_CONTINUE == state) {
 845         first_continue_pass = !first_continue_pass;
 846 
 847         if( !first_continue_pass ) {
 848             if( opal_cr_timing_barrier_enabled ) {
 849                 OPAL_CR_SET_TIMER(OPAL_CR_TIMER_COREBR0);
 850                 if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 851                     opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
 852                     return ret;
 853                 }
 854             }
 855             OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P2);
 856         }
 857 
 858         if (opal_cr_continue_like_restart && !first_continue_pass) {
 859             /*
 860              * Get a list of processes
 861              */
 862             procs = ompi_proc_all(&num_procs);
 863             if(NULL == procs) {
 864                 return OMPI_ERR_OUT_OF_RESOURCE;
 865             }
 866 
 867             /*
 868              * Refresh the proc structure, and publish our proc info in the modex.
 869              * NOTE: Do *not* call ompi_proc_finalize as there are many places in
 870              *       the code that point to indv. procs in this strucutre. For our
 871              *       needs here we only need to fix up the modex, bml and pml
 872              *       references.
 873              */
 874             if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
 875                 opal_output(0,
 876                             "pml:ob1: ft_event(Restart): proc_refresh Failed %d",
 877                             ret);
 878                 for(p = 0; p < (int)num_procs; ++p) {
 879                     OBJ_RELEASE(procs[p]);
 880                 }
 881                 free (procs);
 882                 return ret;
 883             }
 884         }
 885     }
 886     else if(OPAL_CRS_RESTART_PRE == state ) {
 887         /* Nothing here */
 888     }
 889     else if(OPAL_CRS_RESTART == state ) {
 890         /*
 891          * Get a list of processes
 892          */
 893         procs = ompi_proc_all(&num_procs);
 894         if(NULL == procs) {
 895             return OMPI_ERR_OUT_OF_RESOURCE;
 896         }
 897 
 898         /*
 899          * Clean out the modex information since it is invalid now.
 900          *    ompi_rte_purge_proc_attrs();
 901          * This happens at the ORTE level, so doing it again here will cause
 902          * some issues with socket caching.
 903          */
 904 
 905 
 906         /*
 907          * Refresh the proc structure, and publish our proc info in the modex.
 908          * NOTE: Do *not* call ompi_proc_finalize as there are many places in
 909          *       the code that point to indv. procs in this strucutre. For our
 910          *       needs here we only need to fix up the modex, bml and pml
 911          *       references.
 912          */
 913         if (OMPI_SUCCESS != (ret = ompi_proc_refresh())) {
 914             opal_output(0,
 915                         "pml:ob1: ft_event(Restart): proc_refresh Failed %d",
 916                         ret);
 917             for(p = 0; p < (int)num_procs; ++p) {
 918                 OBJ_RELEASE(procs[p]);
 919             }
 920             free (procs);
 921             return ret;
 922         }
 923     }
 924     else if(OPAL_CRS_TERM == state ) {
 925         ;
 926     }
 927     else {
 928         ;
 929     }
 930 
 931     /* Call the BML
 932      * BML is expected to call ft_event in
 933      * - BTL(s)
 934      * - MPool(s)
 935      */
 936     if( OMPI_SUCCESS != (ret = mca_bml.bml_ft_event(state))) {
 937         opal_output(0, "pml:base: ft_event: BML ft_event function failed: %d\n",
 938                     ret);
 939     }
 940 
 941     if(OPAL_CRS_CHECKPOINT == state) {
 942         OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P1);
 943 
 944         if( opal_cr_timing_barrier_enabled ) {
 945             OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR0);
 946             /* JJH Cannot barrier here due to progress engine -- ompi_rte_barrier();*/
 947         }
 948     }
 949     else if(OPAL_CRS_CONTINUE == state) {
 950         if( !first_continue_pass ) {
 951             if( opal_cr_timing_barrier_enabled ) {
 952                 OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR1);
 953                 if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 954                     opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
 955                     return ret;
 956                 }
 957             }
 958             OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2P3);
 959         }
 960 
 961         if (opal_cr_continue_like_restart && !first_continue_pass) {
 962             if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 963                 opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
 964                 return ret;
 965             }
 966 
 967             /*
 968              * Startup the PML stack now that the modex is running again
 969              * Add the new procs (BTLs redo modex recv's)
 970              */
 971             if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) {
 972                 opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret);
 973                 return ret;
 974             }
 975 
 976             /* Is this barrier necessary ? JJH */
 977             if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 978                 opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
 979                 return ret;
 980             }
 981 
 982             if( NULL != procs ) {
 983                 for(p = 0; p < (int)num_procs; ++p) {
 984                     OBJ_RELEASE(procs[p]);
 985                 }
 986                 free(procs);
 987                 procs = NULL;
 988             }
 989         }
 990         if( !first_continue_pass ) {
 991             if( opal_cr_timing_barrier_enabled ) {
 992                 OPAL_CR_SET_TIMER(OPAL_CR_TIMER_P2PBR2);
 993                 if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
 994                     opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
 995                     return ret;
 996                 }
 997             }
 998             OPAL_CR_SET_TIMER(OPAL_CR_TIMER_CRCP1);
 999         }
1000     }
1001     else if(OPAL_CRS_RESTART_PRE == state ) {
1002         /* Nothing here */
1003     }
1004     else if(OPAL_CRS_RESTART == state  ) {
1005         /*
1006          * Exchange the modex information once again.
1007          * BTLs will have republished their modex information.
1008          */
1009         if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
1010             opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
1011             return ret;
1012         }
1013 
1014         /*
1015          * Startup the PML stack now that the modex is running again
1016          * Add the new procs (BTLs redo modex recv's)
1017          */
1018         if( OMPI_SUCCESS != (ret = mca_pml_ob1_add_procs(procs, num_procs) ) ) {
1019             opal_output(0, "pml:ob1: ft_event(Restart): Failed in add_procs (%d)", ret);
1020             return ret;
1021         }
1022 
1023         /* Is this barrier necessary ? JJH */
1024         if (OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
1025             opal_output(0, "pml:ob1: ft_event(Restart): Failed to fence complete");
1026             return ret;
1027         }
1028 
1029         if( NULL != procs ) {
1030             for(p = 0; p < (int)num_procs; ++p) {
1031                 OBJ_RELEASE(procs[p]);
1032             }
1033             free(procs);
1034             procs = NULL;
1035         }
1036     }
1037     else if(OPAL_CRS_TERM == state ) {
1038         ;
1039     }
1040     else {
1041         ;
1042     }
1043 
1044     return OMPI_SUCCESS;
1045 }
1046 #endif /* OPAL_ENABLE_FT_CR */
1047 
1048 int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2)
1049 {
1050     const mca_pml_ob1_com_btl_t *b1 = (const mca_pml_ob1_com_btl_t *) v1;
1051     const mca_pml_ob1_com_btl_t *b2 = (const mca_pml_ob1_com_btl_t *) v2;
1052 
1053     if(b1->bml_btl->btl_weight < b2->bml_btl->btl_weight)
1054         return 1;
1055     if(b1->bml_btl->btl_weight > b2->bml_btl->btl_weight)
1056         return -1;
1057 
1058     return 0;
1059 }
1060 

/* [<][>][^][v][top][bottom][index][help] */