This source file includes following definitions.
- lock_recv_request
- unlock_recv_request
- mca_pml_ob1_recv_request_fini
- recv_request_pml_complete
- recv_request_pml_complete_check
- prepare_recv_req_converter
- recv_req_matched
- mca_pml_ob1_recv_request_schedule_exclusive
- mca_pml_ob1_recv_request_schedule
- mca_pml_ob1_recv_request_ack_send
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 #ifndef OMPI_PML_OB1_RECV_REQUEST_H
29 #define OMPI_PML_OB1_RECV_REQUEST_H
30
31 #include "pml_ob1.h"
32 #include "pml_ob1_rdma.h"
33 #include "pml_ob1_rdmafrag.h"
34 #include "ompi/proc/proc.h"
35 #include "ompi/mca/pml/ob1/pml_ob1_comm.h"
36 #include "opal/mca/mpool/base/base.h"
37 #include "ompi/mca/pml/base/pml_base_recvreq.h"
38
39 BEGIN_C_DECLS
40
41 struct mca_pml_ob1_recv_request_t {
42 mca_pml_base_recv_request_t req_recv;
43 opal_ptr_t remote_req_send;
44 opal_atomic_int32_t req_lock;
45 opal_atomic_int32_t req_pipeline_depth;
46 opal_atomic_size_t req_bytes_received;
47 size_t req_bytes_expected;
48 size_t req_rdma_offset;
49 size_t req_send_offset;
50 uint32_t req_rdma_cnt;
51 uint32_t req_rdma_idx;
52 bool req_pending;
53 bool req_ack_sent;
54 bool req_match_received;
55 opal_mutex_t lock;
56 mca_bml_base_btl_t *rdma_bml;
57 mca_btl_base_registration_handle_t *local_handle;
58
59 mca_pml_ob1_com_btl_t req_rdma[];
60 };
61 typedef struct mca_pml_ob1_recv_request_t mca_pml_ob1_recv_request_t;
62
63 OBJ_CLASS_DECLARATION(mca_pml_ob1_recv_request_t);
64
65 static inline bool lock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
66 {
67 return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, 1) == 1;
68 }
69
70 static inline bool unlock_recv_request(mca_pml_ob1_recv_request_t *recvreq)
71 {
72 return OPAL_THREAD_ADD_FETCH32(&recvreq->req_lock, -1) == 0;
73 }
74
75
76
77
78
79
80
81 #define MCA_PML_OB1_RECV_REQUEST_ALLOC(recvreq) \
82 do { \
83 recvreq = (mca_pml_ob1_recv_request_t *) \
84 opal_free_list_get (&mca_pml_base_recv_requests); \
85 } while(0)
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100 #define MCA_PML_OB1_RECV_REQUEST_INIT( request, \
101 addr, \
102 count, \
103 datatype, \
104 src, \
105 tag, \
106 comm, \
107 persistent) \
108 do { \
109 MCA_PML_BASE_RECV_REQUEST_INIT( &(request)->req_recv, \
110 addr, \
111 count, \
112 datatype, \
113 src, \
114 tag, \
115 comm, \
116 persistent); \
117 } while(0)
118
119
120
121
122
123
124 #define MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE( recvreq ) \
125 do { \
126 PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_COMPLETE, \
127 &(recvreq->req_recv.req_base), PERUSE_RECV ); \
128 ompi_request_complete( &(recvreq->req_recv.req_base.req_ompi), true ); \
129 } while (0)
130
131 static inline void mca_pml_ob1_recv_request_fini (mca_pml_ob1_recv_request_t *recvreq)
132 {
133 MCA_PML_BASE_RECV_REQUEST_FINI(&recvreq->req_recv);
134 if ((recvreq)->local_handle) {
135 mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle);
136 recvreq->local_handle = NULL;
137 }
138 }
139
140
141
142
143 #define MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq) \
144 { \
145 mca_pml_ob1_recv_request_fini (recvreq); \
146 opal_free_list_return (&mca_pml_base_recv_requests, \
147 (opal_free_list_item_t*)(recvreq)); \
148 }
149
150
151
152
153
154
155
156 static inline void
157 recv_request_pml_complete(mca_pml_ob1_recv_request_t *recvreq)
158 {
159 size_t i;
160
161 if(false == recvreq->req_recv.req_base.req_pml_complete){
162
163 if(recvreq->req_recv.req_bytes_packed > 0) {
164 PERUSE_TRACE_COMM_EVENT( PERUSE_COMM_REQ_XFER_END,
165 &recvreq->req_recv.req_base, PERUSE_RECV );
166 }
167
168 for(i = 0; i < recvreq->req_rdma_cnt; i++) {
169 struct mca_btl_base_registration_handle_t *handle = recvreq->req_rdma[i].btl_reg;
170 mca_bml_base_btl_t *bml_btl = recvreq->req_rdma[i].bml_btl;
171
172 if (NULL != handle) {
173 mca_bml_base_deregister_mem (bml_btl, handle);
174 }
175 }
176 recvreq->req_rdma_cnt = 0;
177
178
179 if(true == recvreq->req_recv.req_base.req_free_called) {
180 if( MPI_SUCCESS != recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR ) {
181 ompi_mpi_abort(&ompi_mpi_comm_world.comm, MPI_ERR_REQUEST);
182 }
183 MCA_PML_OB1_RECV_REQUEST_RETURN(recvreq);
184 } else {
185
186 recvreq->req_recv.req_base.req_pml_complete = true;
187 recvreq->req_recv.req_base.req_ompi.req_status._ucount =
188 recvreq->req_bytes_received;
189 if (recvreq->req_recv.req_bytes_packed > recvreq->req_bytes_expected) {
190 recvreq->req_recv.req_base.req_ompi.req_status._ucount =
191 recvreq->req_recv.req_bytes_packed;
192 recvreq->req_recv.req_base.req_ompi.req_status.MPI_ERROR =
193 MPI_ERR_TRUNCATE;
194 }
195 if (OPAL_UNLIKELY(recvreq->local_handle)) {
196 mca_bml_base_deregister_mem (recvreq->rdma_bml, recvreq->local_handle);
197 recvreq->local_handle = NULL;
198 }
199 MCA_PML_OB1_RECV_REQUEST_MPI_COMPLETE(recvreq);
200 }
201
202 }
203 }
204
205 static inline bool
206 recv_request_pml_complete_check(mca_pml_ob1_recv_request_t *recvreq)
207 {
208 opal_atomic_rmb();
209
210 if(recvreq->req_match_received &&
211 recvreq->req_bytes_received >= recvreq->req_recv.req_bytes_packed &&
212 lock_recv_request(recvreq)) {
213 recv_request_pml_complete(recvreq);
214 return true;
215 }
216
217 return false;
218 }
219
220 extern void mca_pml_ob1_recv_req_start(mca_pml_ob1_recv_request_t *req);
221 #define MCA_PML_OB1_RECV_REQUEST_START(r) mca_pml_ob1_recv_req_start(r)
222
223 static inline void prepare_recv_req_converter(mca_pml_ob1_recv_request_t *req)
224 {
225 if( req->req_recv.req_base.req_datatype->super.size | req->req_recv.req_base.req_count ) {
226 opal_convertor_copy_and_prepare_for_recv(
227 req->req_recv.req_base.req_proc->super.proc_convertor,
228 &(req->req_recv.req_base.req_datatype->super),
229 req->req_recv.req_base.req_count,
230 req->req_recv.req_base.req_addr,
231 0,
232 &req->req_recv.req_base.req_convertor);
233 opal_convertor_get_unpacked_size(&req->req_recv.req_base.req_convertor,
234 &req->req_bytes_expected);
235 }
236 }
237
238 #define MCA_PML_OB1_RECV_REQUEST_MATCHED(request, hdr) \
239 recv_req_matched(request, hdr)
240
241 static inline void recv_req_matched(mca_pml_ob1_recv_request_t *req,
242 mca_pml_ob1_match_hdr_t *hdr)
243 {
244 req->req_recv.req_base.req_ompi.req_status.MPI_SOURCE = hdr->hdr_src;
245 req->req_recv.req_base.req_ompi.req_status.MPI_TAG = hdr->hdr_tag;
246 req->req_match_received = true;
247
248 opal_atomic_wmb();
249
250 if(req->req_recv.req_bytes_packed > 0) {
251 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
252 if(MPI_ANY_SOURCE == req->req_recv.req_base.req_peer) {
253
254 prepare_recv_req_converter(req);
255 }
256 #endif
257 PERUSE_TRACE_COMM_EVENT(PERUSE_COMM_REQ_XFER_BEGIN,
258 &req->req_recv.req_base, PERUSE_RECV);
259 }
260 }
261
262
263
264
265
266
267 #define MCA_PML_OB1_RECV_REQUEST_UNPACK( request, \
268 segments, \
269 num_segments, \
270 seg_offset, \
271 data_offset, \
272 bytes_received, \
273 bytes_delivered) \
274 do { \
275 bytes_delivered = 0; \
276 if(request->req_recv.req_bytes_packed > 0) { \
277 struct iovec iov[MCA_BTL_DES_MAX_SEGMENTS]; \
278 uint32_t iov_count = 0; \
279 size_t max_data = bytes_received; \
280 size_t n, offset = seg_offset; \
281 mca_btl_base_segment_t* segment = segments; \
282 \
283 for( n = 0; n < num_segments; n++, segment++ ) { \
284 if(offset >= segment->seg_len) { \
285 offset -= segment->seg_len; \
286 } else { \
287 iov[iov_count].iov_len = segment->seg_len - offset; \
288 iov[iov_count].iov_base = (IOVBASE_TYPE*) \
289 ((unsigned char*)segment->seg_addr.pval + offset); \
290 iov_count++; \
291 offset = 0; \
292 } \
293 } \
294 OPAL_THREAD_LOCK(&request->lock); \
295 PERUSE_TRACE_COMM_OMPI_EVENT (PERUSE_COMM_REQ_XFER_CONTINUE, \
296 &(request->req_recv.req_base), max_data, \
297 PERUSE_RECV); \
298 opal_convertor_set_position( &(request->req_recv.req_base.req_convertor), \
299 &data_offset ); \
300 opal_convertor_unpack( &(request)->req_recv.req_base.req_convertor, \
301 iov, \
302 &iov_count, \
303 &max_data ); \
304 bytes_delivered = max_data; \
305 OPAL_THREAD_UNLOCK(&request->lock); \
306 } \
307 } while (0)
308
309
310
311
312
313
314 void mca_pml_ob1_recv_request_progress_match(
315 mca_pml_ob1_recv_request_t* req,
316 struct mca_btl_base_module_t* btl,
317 mca_btl_base_segment_t* segments,
318 size_t num_segments);
319
320
321
322
323
324 void mca_pml_ob1_recv_request_progress_frag(
325 mca_pml_ob1_recv_request_t* req,
326 struct mca_btl_base_module_t* btl,
327 mca_btl_base_segment_t* segments,
328 size_t num_segments);
329
330 #if OPAL_CUDA_SUPPORT
331 void mca_pml_ob1_recv_request_frag_copy_start(
332 mca_pml_ob1_recv_request_t* req,
333 struct mca_btl_base_module_t* btl,
334 mca_btl_base_segment_t* segments,
335 size_t num_segments,
336 mca_btl_base_descriptor_t* des);
337
338 void mca_pml_ob1_recv_request_frag_copy_finished(struct mca_btl_base_module_t* btl,
339 struct mca_btl_base_endpoint_t* ep,
340 struct mca_btl_base_descriptor_t* des,
341 int status );
342 #endif
343
344
345
346
347 void mca_pml_ob1_recv_request_progress_rndv(
348 mca_pml_ob1_recv_request_t* req,
349 struct mca_btl_base_module_t* btl,
350 mca_btl_base_segment_t* segments,
351 size_t num_segments);
352
353
354
355
356
357 void mca_pml_ob1_recv_request_progress_rget(
358 mca_pml_ob1_recv_request_t* req,
359 struct mca_btl_base_module_t* btl,
360 mca_btl_base_segment_t* segments,
361 size_t num_segments);
362
363
364
365
366
367 void mca_pml_ob1_recv_request_matched_probe(
368 mca_pml_ob1_recv_request_t* req,
369 struct mca_btl_base_module_t* btl,
370 mca_btl_base_segment_t* segments,
371 size_t num_segments);
372
373
374
375
376
377 int mca_pml_ob1_recv_request_schedule_once(
378 mca_pml_ob1_recv_request_t* req, mca_bml_base_btl_t* start_bml_btl);
379
380 static inline int mca_pml_ob1_recv_request_schedule_exclusive(
381 mca_pml_ob1_recv_request_t* req,
382 mca_bml_base_btl_t* start_bml_btl)
383 {
384 int rc;
385
386 do {
387 rc = mca_pml_ob1_recv_request_schedule_once(req, start_bml_btl);
388 if(rc == OMPI_ERR_OUT_OF_RESOURCE)
389 break;
390 } while(!unlock_recv_request(req));
391
392 if(OMPI_SUCCESS == rc)
393 recv_request_pml_complete_check(req);
394
395 return rc;
396 }
397
398 static inline void mca_pml_ob1_recv_request_schedule(
399 mca_pml_ob1_recv_request_t* req,
400 mca_bml_base_btl_t* start_bml_btl)
401 {
402 if(!lock_recv_request(req))
403 return;
404
405 (void)mca_pml_ob1_recv_request_schedule_exclusive(req, start_bml_btl);
406 }
407
408 #define MCA_PML_OB1_ADD_ACK_TO_PENDING(P, S, D, O, Sz) \
409 do { \
410 mca_pml_ob1_pckt_pending_t *_pckt; \
411 \
412 MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \
413 _pckt->hdr.hdr_common.hdr_type = MCA_PML_OB1_HDR_TYPE_ACK; \
414 _pckt->hdr.hdr_ack.hdr_src_req.lval = (S); \
415 _pckt->hdr.hdr_ack.hdr_dst_req.pval = (D); \
416 _pckt->hdr.hdr_ack.hdr_send_offset = (O); \
417 _pckt->hdr.hdr_ack.hdr_send_size = (Sz); \
418 _pckt->proc = (P); \
419 _pckt->bml_btl = NULL; \
420 OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \
421 opal_list_append(&mca_pml_ob1.pckt_pending, \
422 (opal_list_item_t*)_pckt); \
423 OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); \
424 } while(0)
425
426 int mca_pml_ob1_recv_request_ack_send_btl(ompi_proc_t* proc,
427 mca_bml_base_btl_t* bml_btl, uint64_t hdr_src_req, void *hdr_dst_req,
428 uint64_t hdr_rdma_offset, uint64_t size, bool nordma);
429
430 static inline int mca_pml_ob1_recv_request_ack_send(ompi_proc_t* proc,
431 uint64_t hdr_src_req, void *hdr_dst_req, uint64_t hdr_send_offset,
432 uint64_t size, bool nordma)
433 {
434 size_t i;
435 mca_bml_base_btl_t* bml_btl;
436 mca_bml_base_endpoint_t* endpoint = mca_bml_base_get_endpoint (proc);
437
438 assert (NULL != endpoint);
439
440 for(i = 0; i < mca_bml_base_btl_array_get_size(&endpoint->btl_eager); i++) {
441 bml_btl = mca_bml_base_btl_array_get_next(&endpoint->btl_eager);
442 if(mca_pml_ob1_recv_request_ack_send_btl(proc, bml_btl, hdr_src_req,
443 hdr_dst_req, hdr_send_offset, size, nordma) == OMPI_SUCCESS)
444 return OMPI_SUCCESS;
445 }
446
447 MCA_PML_OB1_ADD_ACK_TO_PENDING(proc, hdr_src_req, hdr_dst_req,
448 hdr_send_offset, size);
449
450 return OMPI_ERR_OUT_OF_RESOURCE;
451 }
452
453 int mca_pml_ob1_recv_request_get_frag(mca_pml_ob1_rdma_frag_t* frag);
454
455
456
457
458 void mca_pml_ob1_recv_request_process_pending(void);
459
460 END_C_DECLS
461
462 #endif
463