root/ompi/mca/pml/ob1/pml_ob1.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. mca_pml_ob1_compute_segment_length_base
  2. mca_pml_ob1_compute_segment_length_remote
  3. mca_pml_ob1_calc_weighted_length

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2018 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2010      Oracle and/or its affiliates.  All rights reserved
  14  * Copyright (c) 2011      Sandia National Laboratories. All rights reserved.
  15  * Copyright (c) 2012-2017 Los Alamos National Security, LLC. All rights
  16  *                         reserved.
  17  * Copyright (c) 2015      Research Organization for Information Science
  18  *                         and Technology (RIST). All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 /**
  26  *  @file
  27  */
  28 
  29 #ifndef MCA_PML_OB1_H
  30 #define MCA_PML_OB1_H
  31 
  32 #include "ompi_config.h"
  33 #include "opal/class/opal_free_list.h"
  34 #include "ompi/request/request.h"
  35 #include "ompi/mca/pml/pml.h"
  36 #include "ompi/mca/pml/base/pml_base_request.h"
  37 #include "ompi/mca/pml/base/pml_base_bsend.h"
  38 #include "ompi/mca/pml/base/pml_base_sendreq.h"
  39 #include "ompi/datatype/ompi_datatype.h"
  40 #include "pml_ob1_hdr.h"
  41 #include "ompi/mca/bml/base/base.h"
  42 #include "ompi/proc/proc.h"
  43 #include "opal/mca/allocator/base/base.h"
  44 
  45 BEGIN_C_DECLS
  46 
  47 /**
  48  * OB1 PML module
  49  */
  50 
  51 struct mca_pml_ob1_t {
  52     mca_pml_base_module_t super;
  53 
  54     int priority;
  55     int free_list_num;      /* initial size of free list */
  56     int free_list_max;      /* maximum size of free list */
  57     int free_list_inc;      /* number of elements to grow free list */
  58     int32_t send_pipeline_depth;
  59     int32_t recv_pipeline_depth;
  60     size_t rdma_retries_limit;
  61     int max_rdma_per_request;
  62     int max_send_per_range;
  63     bool use_all_rdma;
  64 
  65     /* lock queue access */
  66     opal_mutex_t lock;
  67 
  68     /* free lists */
  69     opal_free_list_t rdma_frags;
  70     opal_free_list_t recv_frags;
  71     opal_free_list_t pending_pckts;
  72     opal_free_list_t buffers;
  73     opal_free_list_t send_ranges;
  74 
  75     /* list of pending operations */
  76     opal_list_t pckt_pending;
  77     opal_list_t send_pending;
  78     opal_list_t recv_pending;
  79     opal_list_t rdma_pending;
  80     /* List of pending fragments without a matching communicator */
  81     opal_list_t non_existing_communicator_pending;
  82     bool enabled;
  83     char* allocator_name;
  84     mca_allocator_base_module_t* allocator;
  85     unsigned int unexpected_limit;
  86 };
  87 typedef struct mca_pml_ob1_t mca_pml_ob1_t;
  88 
  89 extern mca_pml_ob1_t mca_pml_ob1;
  90 extern int mca_pml_ob1_output;
  91 extern bool mca_pml_ob1_matching_protection;
  92 /*
  93  * PML interface functions.
  94  */
  95 
  96 extern int mca_pml_ob1_add_comm(
  97     struct ompi_communicator_t* comm
  98 );
  99 
 100 extern int mca_pml_ob1_del_comm(
 101     struct ompi_communicator_t* comm
 102 );
 103 
 104 extern int mca_pml_ob1_add_procs(
 105     struct ompi_proc_t **procs,
 106     size_t nprocs
 107 );
 108 
 109 extern int mca_pml_ob1_del_procs(
 110     struct ompi_proc_t **procs,
 111     size_t nprocs
 112 );
 113 
 114 extern int mca_pml_ob1_enable( bool enable );
 115 
 116 extern int mca_pml_ob1_progress(void);
 117 
 118 extern int mca_pml_ob1_iprobe( int dst,
 119                                int tag,
 120                                struct ompi_communicator_t* comm,
 121                                int *matched,
 122                                ompi_status_public_t* status );
 123 
 124 extern int mca_pml_ob1_probe( int dst,
 125                               int tag,
 126                               struct ompi_communicator_t* comm,
 127                               ompi_status_public_t* status );
 128 
 129 extern int mca_pml_ob1_improbe( int dst,
 130                                int tag,
 131                                struct ompi_communicator_t* comm,
 132                                int *matched,
 133                                struct ompi_message_t **message,
 134                                ompi_status_public_t* status );
 135 
 136 extern int mca_pml_ob1_mprobe( int dst,
 137                               int tag,
 138                               struct ompi_communicator_t* comm,
 139                               struct ompi_message_t **message,
 140                               ompi_status_public_t* status );
 141 
 142 extern int mca_pml_ob1_isend_init( const void *buf,
 143                                    size_t count,
 144                                    ompi_datatype_t *datatype,
 145                                    int dst,
 146                                    int tag,
 147                                    mca_pml_base_send_mode_t mode,
 148                                    struct ompi_communicator_t* comm,
 149                                    struct ompi_request_t **request );
 150 
 151 extern int mca_pml_ob1_isend( const void *buf,
 152                               size_t count,
 153                               ompi_datatype_t *datatype,
 154                               int dst,
 155                               int tag,
 156                               mca_pml_base_send_mode_t mode,
 157                               struct ompi_communicator_t* comm,
 158                               struct ompi_request_t **request );
 159 
 160 extern int mca_pml_ob1_send( const void *buf,
 161                              size_t count,
 162                              ompi_datatype_t *datatype,
 163                              int dst,
 164                              int tag,
 165                              mca_pml_base_send_mode_t mode,
 166                              struct ompi_communicator_t* comm );
 167 
 168 extern int mca_pml_ob1_irecv_init( void *buf,
 169                                    size_t count,
 170                                    ompi_datatype_t *datatype,
 171                                    int src,
 172                                    int tag,
 173                                    struct ompi_communicator_t* comm,
 174                                    struct ompi_request_t **request );
 175 
 176 extern int mca_pml_ob1_irecv( void *buf,
 177                               size_t count,
 178                               ompi_datatype_t *datatype,
 179                               int src,
 180                               int tag,
 181                               struct ompi_communicator_t* comm,
 182                               struct ompi_request_t **request );
 183 
 184 extern int mca_pml_ob1_recv( void *buf,
 185                              size_t count,
 186                              ompi_datatype_t *datatype,
 187                              int src,
 188                              int tag,
 189                              struct ompi_communicator_t* comm,
 190                              ompi_status_public_t* status );
 191 
 192 extern int mca_pml_ob1_imrecv( void *buf,
 193                                size_t count,
 194                                ompi_datatype_t *datatype,
 195                                struct ompi_message_t **message,
 196                                struct ompi_request_t **request );
 197 
 198 extern int mca_pml_ob1_mrecv( void *buf,
 199                               size_t count,
 200                               ompi_datatype_t *datatype,
 201                               struct ompi_message_t **message,
 202                               ompi_status_public_t* status );
 203 
 204 extern int mca_pml_ob1_dump( struct ompi_communicator_t* comm,
 205                              int verbose );
 206 
 207 extern int mca_pml_ob1_start( size_t count,
 208                               ompi_request_t** requests );
 209 
 210 extern int mca_pml_ob1_ft_event( int state );
 211 
 212 /**
 213  * We will use these requests to hold on a traditionally allocated
 214  * requests in order to allow the parallel debugger full access to the
 215  * message queues (instead of allocating the requests on the stack).
 216  */
 217 extern struct mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq;
 218 extern struct mca_pml_ob1_send_request_t *mca_pml_ob1_sendreq;
 219 
 220 END_C_DECLS
 221 
 222 struct mca_pml_ob1_pckt_pending_t {
 223     opal_free_list_item_t super;
 224     ompi_proc_t* proc;
 225     mca_pml_ob1_hdr_t hdr;
 226     struct mca_bml_base_btl_t *bml_btl;
 227     uint8_t order;
 228     int status;
 229 };
 230 typedef struct mca_pml_ob1_pckt_pending_t mca_pml_ob1_pckt_pending_t;
 231 OBJ_CLASS_DECLARATION(mca_pml_ob1_pckt_pending_t);
 232 
 233 #define MCA_PML_OB1_PCKT_PENDING_ALLOC(pckt)                    \
 234 do {                                                            \
 235     pckt = (mca_pml_ob1_pckt_pending_t *)                       \
 236         opal_free_list_get (&mca_pml_ob1.pending_pckts);        \
 237 } while (0)
 238 
 239 #define MCA_PML_OB1_PCKT_PENDING_RETURN(pckt)                   \
 240 do {                                                            \
 241     /* return packet */                                         \
 242     opal_free_list_return (&mca_pml_ob1.pending_pckts,          \
 243         (opal_free_list_item_t*)pckt);                          \
 244 } while(0)
 245 
 246 #define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, Sz, B, O, S)           \
 247     do {                                                            \
 248         mca_pml_ob1_pckt_pending_t *_pckt;                          \
 249                                                                     \
 250         MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt);                      \
 251         mca_pml_ob1_fin_hdr_prepare (&_pckt->hdr.hdr_fin, 0,        \
 252                                      (D).lval, (Sz));               \
 253         _pckt->proc = (P);                                          \
 254         _pckt->bml_btl = (B);                                       \
 255         _pckt->order = (O);                                         \
 256         _pckt->status = (S);                                        \
 257         OPAL_THREAD_LOCK(&mca_pml_ob1.lock);                        \
 258         opal_list_append(&mca_pml_ob1.pckt_pending,                 \
 259                 (opal_list_item_t*)_pckt);                          \
 260         OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);                      \
 261     } while(0)
 262 
 263 #define OB1_MATCHING_LOCK(lock)                                  \
 264     do {                                                         \
 265         if( mca_pml_ob1_matching_protection ) {                  \
 266             opal_mutex_lock(lock);                               \
 267         }                                                        \
 268         else { OPAL_THREAD_LOCK(lock); }                         \
 269     } while(0)
 270 
 271 
 272 #define OB1_MATCHING_UNLOCK(lock)                                \
 273     do {                                                         \
 274         if( mca_pml_ob1_matching_protection ) {                  \
 275             opal_mutex_unlock(lock);                             \
 276         }                                                        \
 277         else { OPAL_THREAD_UNLOCK(lock); }                       \
 278     } while(0)
 279 
 280 
 281 
 282 int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
 283         opal_ptr_t hdr_frag, uint64_t size, uint8_t order, int status);
 284 
 285 /* This function tries to resend FIN/ACK packets from pckt_pending queue.
 286  * Packets are added to the queue when sending of FIN or ACK is failed due to
 287  * resource unavailability. bml_btl passed to the function doesn't represents
 288  * packet's destination, it represents BTL on which resource was freed, so only
 289  * this BTL should be considered for resending packets */
 290 void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl);
 291 
 292 /* This function retries failed PUT/GET operations on frag. When RDMA operation
 293  * cannot be accomplished for some reason, frag is put on the rdma_pending list.
 294  * Later the operation is retried. The destination of RDMA operation is stored
 295  * inside the frag structure */
 296 void mca_pml_ob1_process_pending_rdma(void);
 297 
 298 #define MCA_PML_OB1_PROGRESS_PENDING(bml_btl)                   \
 299     do {                                                        \
 300         if(opal_list_get_size(&mca_pml_ob1.pckt_pending))       \
 301             mca_pml_ob1_process_pending_packets(bml_btl);       \
 302         if(opal_list_get_size(&mca_pml_ob1.recv_pending))       \
 303             mca_pml_ob1_recv_request_process_pending();         \
 304         if(opal_list_get_size(&mca_pml_ob1.send_pending))       \
 305             mca_pml_ob1_send_request_process_pending(bml_btl);  \
 306         if(opal_list_get_size(&mca_pml_ob1.rdma_pending))       \
 307             mca_pml_ob1_process_pending_rdma();                 \
 308     } while (0)
 309 
 310 /*
 311  * Compute the total number of bytes on supplied descriptor
 312  */
 313 static inline size_t
 314 mca_pml_ob1_compute_segment_length_base(mca_btl_base_segment_t *segments,
 315                                         size_t count, size_t hdrlen)
 316 {
 317     size_t i, length = 0;
 318 
 319     for (i = 0; i < count ; ++i) {
 320         length += segments[i].seg_len;
 321     }
 322     return (length - hdrlen);
 323 }
 324 
 325 static inline size_t
 326 mca_pml_ob1_compute_segment_length_remote (size_t seg_size, void *segments,
 327                                            size_t count, ompi_proc_t *rem_proc)
 328 {
 329     mca_btl_base_segment_t *segment = (mca_btl_base_segment_t *) segments;
 330 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
 331     ompi_proc_t *local_proc = ompi_proc_local();
 332 #endif
 333     size_t i, length = 0;
 334 
 335     for (i = 0 ; i < count ; ++i) {
 336 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
 337         if ((rem_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
 338             (local_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN))
 339             /* NTH: seg_len is always 64-bit so use swap_bytes8 */
 340             length += opal_swap_bytes8(segment->seg_len);
 341         else
 342 #endif
 343             length += segment->seg_len;
 344 
 345         segment = (mca_btl_base_segment_t *)((char *)segment + seg_size);
 346     }
 347 
 348     return length;
 349 }
 350 
 351 /* represent BTL chosen for sending request */
 352 struct mca_pml_ob1_com_btl_t {
 353     mca_bml_base_btl_t *bml_btl;
 354     struct mca_btl_base_registration_handle_t *btl_reg;
 355     size_t length;
 356 };
 357 typedef struct mca_pml_ob1_com_btl_t mca_pml_ob1_com_btl_t;
 358 
 359 int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2);
 360 
 361 /* Calculate what percentage of a message to send through each BTL according to
 362  * relative weight */
 363 static inline void
 364 mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, size_t size,
 365                                   double weight_total )
 366 {
 367     int i;
 368     size_t length_left;
 369 
 370     /* shortcut for common case for only one BTL */
 371     if( OPAL_LIKELY(1 == num_btls) ) {
 372         btls[0].length = size;
 373         return;
 374     }
 375 
 376     /* sort BTLs according of their weights so BTLs with smaller weight will
 377      * not hijack all of the traffic */
 378     qsort( btls, num_btls, sizeof(mca_pml_ob1_com_btl_t),
 379            mca_pml_ob1_com_btl_comp );
 380 
 381     for(length_left = size, i = 0; i < num_btls; i++) {
 382         mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
 383         size_t length = 0;
 384         if( OPAL_UNLIKELY(0 != length_left) ) {
 385             length = (length_left > bml_btl->btl->btl_eager_limit)?
 386                 ((size_t)(size * (bml_btl->btl_weight / weight_total))) :
 387                 length_left;
 388 
 389             if(length > length_left)
 390                 length = length_left;
 391             length_left -= length;
 392         }
 393         btls[i].length = length;
 394     }
 395 
 396     /* account for rounding errors */
 397     btls[0].length += length_left;
 398 }
 399 
 400 /**
 401  * A thread-safe function that should be called every time we need the OB1
 402  * progress to be turned (or kept) on.
 403  */
 404 int mca_pml_ob1_enable_progress(int32_t count);
 405 
 406 #endif

/* [<][>][^][v][top][bottom][index][help] */