root/ompi/mca/pml/ob1/pml_ob1_recvreq.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. lock_recv_request
  2. unlock_recv_request
  3. mca_pml_ob1_recv_request_fini
  4. recv_request_pml_complete
  5. recv_request_pml_complete_check
  6. prepare_recv_req_converter
  7. recv_req_matched
  8. mca_pml_ob1_recv_request_schedule_exclusive
  9. mca_pml_ob1_recv_request_schedule
  10. mca_pml_ob1_recv_request_ack_send

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2016 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2007 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2008      UT-Battelle, LLC. All rights reserved.
  14  * Copyright (c) 2011-2018 Los Alamos National Security, LLC. All rights
  15  *                         reserved.
  16  * Copyright (c) 2014      Research Organization for Information Science
  17  *                         and Technology (RIST). All rights reserved.
  18  *
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  */
  25 /**
  26  * @file
  27  */
  28 #ifndef OMPI_PML_OB1_RECV_REQUEST_H
  29 #define OMPI_PML_OB1_RECV_REQUEST_H
  30 
  31 #include "pml_ob1.h"
  32 #include "pml_ob1_rdma.h"
  33 #include "pml_ob1_rdmafrag.h"
  34 #include "ompi/proc/proc.h"
  35 #include "ompi/mca/pml/ob1/pml_ob1_comm.h"
  36 #include "opal/mca/mpool/base/base.h"
  37 #include "ompi/mca/pml/base/pml_base_recvreq.h"
  38 
  39 BEGIN_C_DECLS
  40 
  41 struct mca_pml_ob1_recv_request_t {
  42     mca_pml_base_recv_request_t req_recv;
  43     opal_ptr_t remote_req_send;
  44     opal_atomic_int32_t  req_lock;
  45     opal_atomic_int32_t  req_pipeline_depth;
  46     opal_atomic_size_t   req_bytes_received;  /**< amount of data transferred into the user buffer */
  47     size_t   req_bytes_expected; /**< local size of the data as suggested by the user */
  48     size_t   req_rdma_offset;
  49     size_t   req_send_offset;
  50     uint32_t req_rdma_cnt;
  51     uint32_t req_rdma_idx;
  52     bool req_pending;
  53     bool req_ack_sent; /**< whether ack was sent to the sender */
  54     bool req_match_received; /**< Prevent request to be completed prematurely */
  55     opal_mutex_t lock;
  56     mca_bml_base_btl_t *rdma_bml;
  57     mca_btl_base_registration_handle_t *local_handle;
  58     /** The size of this array is set from mca_pml_ob1.max_rdma_per_request */
  59     mca_pml_ob1_com_btl_t req_rdma[];
  60 };
  61 typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
  62 
  63 OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t);
  64 
  65 static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
  66 {
  67         return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock,  1) == 1;
  68 }
  69 
  70 static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
  71 {
  72         return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
  73 }
  74 
  75 /**
  76  *  Allocate a recv request from the modules free list.
  77  *
  78  *  @param rc (OUT)  OMPI_SUCCESS or error status on failure.
  79  *  @return          Receive request.
  80  */
  81 #define MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq)                    \
  82 do {                                                               \
  83     recvreq = (mca_pml_ob1_recv_request_t *)                          \
  84         opal_free_list_get (&mca_pml_base_recv_requests);             \
  85 } while(0)
  86 
  87 
  88 /**
  89  * Initialize a receive request with call parameters.
  90  *
  91  * @param request (IN)       Receive request.
  92  * @param addr (IN)          User buffer.
  93  * @param count (IN)         Number of elements of indicated datatype.
  94  * @param datatype (IN)      User defined datatype.
  95  * @param src (IN)           Source rank w/in the communicator.
  96  * @param tag (IN)           User defined tag.
  97  * @param comm (IN)          Communicator.
  98  * @param persistent (IN)    Is this a ersistent request.
  99  */
 100 #define MCA_PML_OB1_RECV_REQUEST_INIT( request,                     \
 101                                        addr,                        \
 102                                        count,                       \
 103                                        datatype,                    \
 104                                        src,                         \
 105                                        tag,                         \
 106                                        comm,                        \
 107                                        persistent)                  \
 108 do {                                                                \
 109     MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv,           \
 110                                     addr,                           \
 111                                     count,                          \
 112                                     datatype,                       \
 113                                     src,                            \
 114                                     tag,                            \
 115                                     comm,                           \
 116                                     persistent);                    \
 117 } while(0)
 118 
 119 /**
 120  * Mark the request as completed at MPI level for internal purposes.
 121  *
 122  *  @param recvreq (IN)  Receive request.
 123  */
 124 #define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq )                              \
 125     do {                                                                              \
 126         PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE,                            \
 127                                  &(recvreq->req_recv.req_base), PERUSE_RECV );        \
 128         ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true );        \
 129     } while (0)
 130 
 131 static inline void mca_pml_ob1_recv_request_fini (mca_pml_ob1_recv_request_t *recvreq)
 132 {
 133     MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
 134     if ((recvreq)->local_handle) {
 135         mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle);
 136         recvreq->local_handle = NULL;
 137     }
 138 }
 139 
 140 /*
 141  *  Free the PML receive request
 142  */
 143 #define MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq)                        \
 144     {                                                                   \
 145         mca_pml_ob1_recv_request_fini (recvreq);                        \
 146         opal_free_list_return (&mca_pml_base_recv_requests,             \
 147                                (opal_free_list_item_t*)(recvreq));      \
 148     }
 149 
 150 /**
 151  * Complete receive request. Request structure cannot be accessed after calling
 152  * this function any more.
 153  *
 154  *  @param recvreq (IN)  Receive request.
 155  */
 156 static inline void
 157 recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq)
 158 {
 159     size_t i;
 160 
 161     if(false == recvreq->req_recv.req_base.req_pml_complete){
 162 
 163         if(recvreq->req_recv.req_bytes_packed > 0) {
 164             PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
 165                     &recvreq->req_recv.req_base, PERUSE_RECV );
 166         }
 167 
 168         for(i = 0; i < recvreq->req_rdma_cnt; i++) {
 169             struct mca_btl_base_registration_handle_t *handle = recvreq->req_rdma[i].btl_reg;
 170             mca_bml_base_btl_t *bml_btl = recvreq->req_rdma[i].bml_btl;
 171 
 172             if (NULL != handle) {
 173                 mca_bml_base_deregister_mem (bml_btl, handle);
 174             }
 175         }
 176         recvreq->req_rdma_cnt = 0;
 177 
 178 
 179         if(true == recvreq->req_recv.req_base.req_free_called) {
 180             if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) {
 181                 ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST);
 182             }
 183             MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq);
 184         } else {
 185             /* initialize request status */
 186             recvreq->req_recv.req_base.req_pml_complete = true;
 187             recvreq->req_recv.req_base.req_ompi.req_status._ucount =
 188                 recvreq->req_bytes_received;
 189             if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) {
 190                 recvreq->req_recv.req_base.req_ompi.req_status._ucount =
 191                     recvreq->req_recv.req_bytes_packed;
 192                 recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR =
 193                     MPI_ERR_TRUNCATE;
 194             }
 195             if (OPAL_UNLIKELY(recvreq->local_handle)) {
 196                 mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle);
 197                 recvreq->local_handle = NULL;
 198             }
 199             MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(recvreq);
 200         }
 201 
 202     }
 203 }
 204 
 205 static inline bool
 206 recv_request_pml_complete_check(mca_pml_ob1_recv_request_t *recvreq)
 207 {
 208     opal_atomic_rmb();
 209 
 210     if(recvreq->req_match_received &&
 211             recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
 212             lock_recv_request(recvreq)) {
 213         recv_request_pml_complete(recvreq);
 214         return true;
 215     }
 216 
 217     return false;
 218 }
 219 
 220 extern void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req);
 221 #define MCA_PML_OB1_RECV_REQUEST_START(r) mca_pml_ob1_recv_req_start(r)
 222 
 223 static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req)
 224 {
 225     if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) {
 226         opal_convertor_copy_and_prepare_for_recv(
 227                 req->req_recv.req_base.req_proc->super.proc_convertor,
 228                 &(req->req_recv.req_base.req_datatype->super),
 229                 req->req_recv.req_base.req_count,
 230                 req->req_recv.req_base.req_addr,
 231                 0,
 232                 &req->req_recv.req_base.req_convertor);
 233         opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
 234                                          &req->req_bytes_expected);
 235     }
 236 }
 237 
 238 #define MCA_PML_OB1_RECV_REQUEST_MATCHED(request, hdr) \
 239     recv_req_matched(request, hdr)
 240 
 241 static inline void recv_req_matched(mca_pml_ob1_recv_request_t *req,
 242                                     mca_pml_ob1_match_hdr_t *hdr)
 243 {
 244     req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
 245     req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
 246     req->req_match_received = true;
 247 
 248     opal_atomic_wmb();
 249 
 250     if(req->req_recv.req_bytes_packed > 0) {
 251 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
 252         if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) {
 253             /* non wildcard prepared during post recv */
 254             prepare_recv_req_converter(req);
 255         }
 256 #endif  /* OPAL_ENABLE_HETEROGENEOUS_SUPPORT */
 257         PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN,
 258                                 &req->req_recv.req_base, PERUSE_RECV);
 259     }
 260 }
 261 
 262 
 263 /**
 264  *
 265  */
 266 
 267 #define MCA_PML_OB1_RECV_REQUEST_UNPACK( request,                                 \
 268                                          segments,                                \
 269                                          num_segments,                            \
 270                                          seg_offset,                              \
 271                                          data_offset,                             \
 272                                          bytes_received,                          \
 273                                          bytes_delivered)                         \
 274 do {                                                                              \
 275     bytes_delivered = 0;                                                          \
 276     if(request->req_recv.req_bytes_packed > 0) {                                  \
 277         struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS];                               \
 278         uint32_t iov_count = 0;                                                   \
 279         size_t max_data = bytes_received;                                         \
 280         size_t n, offset = seg_offset;                                            \
 281         mca_btl_base_segment_t* segment = segments;                               \
 282                                                                                   \
 283         for( n = 0; n < num_segments; n++, segment++ ) {                          \
 284             if(offset >= segment->seg_len) {                                      \
 285                 offset -= segment->seg_len;                                       \
 286             } else {                                                              \
 287                 iov[iov_count].iov_len = segment->seg_len - offset;               \
 288                 iov[iov_count].iov_base = (IOVBASE_TYPE*)                         \
 289                     ((unsigned char*)segment->seg_addr.pval + offset);            \
 290                 iov_count++;                                                      \
 291                 offset = 0;                                                       \
 292             }                                                                     \
 293         }                                                                         \
 294         OPAL_THREAD_LOCK(&request->lock);                                         \
 295         PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE,              \
 296                                       &(request->req_recv.req_base), max_data,    \
 297                                       PERUSE_RECV);                               \
 298         opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
 299                                      &data_offset );                              \
 300         opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor,       \
 301                                iov,                                               \
 302                                &iov_count,                                        \
 303                                &max_data );                                       \
 304         bytes_delivered = max_data;                                               \
 305         OPAL_THREAD_UNLOCK(&request->lock);                                       \
 306     }                                                                             \
 307 } while (0)
 308 
 309 
 310 /**
 311  *
 312  */
 313 
 314 void mca_pml_ob1_recv_request_progress_match(
 315     mca_pml_ob1_recv_request_t* req,
 316     struct mca_btl_base_module_t* btl,
 317     mca_btl_base_segment_t* segments,
 318     size_t num_segments);
 319 
 320 /**
 321  *
 322  */
 323 
 324 void mca_pml_ob1_recv_request_progress_frag(
 325     mca_pml_ob1_recv_request_t* req,
 326     struct mca_btl_base_module_t* btl,
 327     mca_btl_base_segment_t* segments,
 328     size_t num_segments);
 329 
 330 #if OPAL_CUDA_SUPPORT
 331 void mca_pml_ob1_recv_request_frag_copy_start(
 332     mca_pml_ob1_recv_request_t* req,
 333     struct mca_btl_base_module_t* btl,
 334     mca_btl_base_segment_t* segments,
 335     size_t num_segments,
 336     mca_btl_base_descriptor_t* des);
 337 
 338 void mca_pml_ob1_recv_request_frag_copy_finished(struct mca_btl_base_module_t* btl,
 339     struct mca_btl_base_endpoint_t* ep,
 340     struct mca_btl_base_descriptor_t* des,
 341     int status );
 342 #endif /* OPAL_CUDA_SUPPORT */
 343 /**
 344  *
 345  */
 346 
 347 void mca_pml_ob1_recv_request_progress_rndv(
 348     mca_pml_ob1_recv_request_t* req,
 349     struct mca_btl_base_module_t* btl,
 350     mca_btl_base_segment_t* segments,
 351     size_t num_segments);
 352 
 353 /**
 354  *
 355  */
 356 
 357 void mca_pml_ob1_recv_request_progress_rget(
 358     mca_pml_ob1_recv_request_t* req,
 359     struct mca_btl_base_module_t* btl,
 360     mca_btl_base_segment_t* segments,
 361     size_t num_segments);
 362 
 363 /**
 364  *
 365  */
 366 
 367 void mca_pml_ob1_recv_request_matched_probe(
 368     mca_pml_ob1_recv_request_t* req,
 369     struct mca_btl_base_module_t* btl,
 370     mca_btl_base_segment_t* segments,
 371     size_t num_segments);
 372 
 373 /**
 374  *
 375  */
 376 
 377 int mca_pml_ob1_recv_request_schedule_once(
 378     mca_pml_ob1_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl);
 379 
 380 static inline int mca_pml_ob1_recv_request_schedule_exclusive(
 381         mca_pml_ob1_recv_request_t* req,
 382         mca_bml_base_btl_t* start_bml_btl)
 383 {
 384     int rc;
 385 
 386     do {
 387         rc = mca_pml_ob1_recv_request_schedule_once(req, start_bml_btl);
 388         if(rc == OMPI_ERR_OUT_OF_RESOURCE)
 389             break;
 390     } while(!unlock_recv_request(req));
 391 
 392     if(OMPI_SUCCESS == rc)
 393         recv_request_pml_complete_check(req);
 394 
 395     return rc;
 396 }
 397 
 398 static inline void mca_pml_ob1_recv_request_schedule(
 399         mca_pml_ob1_recv_request_t* req,
 400         mca_bml_base_btl_t* start_bml_btl)
 401 {
 402     if(!lock_recv_request(req))
 403         return;
 404 
 405     (void)mca_pml_ob1_recv_request_schedule_exclusive(req, start_bml_btl);
 406 }
 407 
 408 #define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O, Sz)                  \
 409     do {                                                                \
 410         mca_pml_ob1_pckt_pending_t *_pckt;                              \
 411                                                                         \
 412         MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt);                          \
 413         _pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK;      \
 414         _pckt->hdr.hdr_ack.hdr_src_req.lval = (S);                      \
 415         _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D);                      \
 416         _pckt->hdr.hdr_ack.hdr_send_offset = (O);                       \
 417         _pckt->hdr.hdr_ack.hdr_send_size = (Sz);                        \
 418         _pckt->proc = (P);                                              \
 419         _pckt->bml_btl = NULL;                                          \
 420         OPAL_THREAD_LOCK(&mca_pml_ob1.lock);                            \
 421         opal_list_append(&mca_pml_ob1.pckt_pending,                     \
 422                          (opal_list_item_t*)_pckt);                     \
 423         OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock);                          \
 424     } while(0)
 425 
 426 int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
 427         mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
 428         uint64_t hdr_rdma_offset, uint64_t size, bool nordma);
 429 
 430 static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
 431         uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
 432         uint64_t size, bool nordma)
 433 {
 434     size_t i;
 435     mca_bml_base_btl_t* bml_btl;
 436     mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
 437 
 438     assert (NULL != endpoint);
 439 
 440     for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
 441         bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
 442         if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
 443                     hdr_dst_req, hdr_send_offset, size, nordma) == OMPI_SUCCESS)
 444             return OMPI_SUCCESS;
 445     }
 446 
 447     MCA_PML_OB1_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
 448                                    hdr_send_offset, size);
 449 
 450     return OMPI_ERR_OUT_OF_RESOURCE;
 451 }
 452 
 453 int mca_pml_ob1_recv_request_get_frag(mca_pml_ob1_rdma_frag_t* frag);
 454 
 455 /* This function tries to continue recvreq that stuck due to resource
 456  * unavailability. Recvreq is added to recv_pending list if scheduling of put
 457  * operation cannot be accomplished for some reason. */
 458 void mca_pml_ob1_recv_request_process_pending(void);
 459 
 460 END_C_DECLS
 461 
 462 #endif
 463 

/* [<][>][^][v][top][bottom][index][help] */