This source file includes following definitions.
- mca_pml_ob1_compute_segment_length_base
- mca_pml_ob1_compute_segment_length_remote
- mca_pml_ob1_calc_weighted_length
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29 #ifndef MCA_PML_OB1_H
30 #define MCA_PML_OB1_H
31
32 #include "ompi_config.h"
33 #include "opal/class/opal_free_list.h"
34 #include "ompi/request/request.h"
35 #include "ompi/mca/pml/pml.h"
36 #include "ompi/mca/pml/base/pml_base_request.h"
37 #include "ompi/mca/pml/base/pml_base_bsend.h"
38 #include "ompi/mca/pml/base/pml_base_sendreq.h"
39 #include "ompi/datatype/ompi_datatype.h"
40 #include "pml_ob1_hdr.h"
41 #include "ompi/mca/bml/base/base.h"
42 #include "ompi/proc/proc.h"
43 #include "opal/mca/allocator/base/base.h"
44
45 BEGIN_C_DECLS
46
47
48
49
50
51 struct mca_pml_ob1_t {
52 mca_pml_base_module_t super;
53
54 int priority;
55 int free_list_num;
56 int free_list_max;
57 int free_list_inc;
58 int32_t send_pipeline_depth;
59 int32_t recv_pipeline_depth;
60 size_t rdma_retries_limit;
61 int max_rdma_per_request;
62 int max_send_per_range;
63 bool use_all_rdma;
64
65
66 opal_mutex_t lock;
67
68
69 opal_free_list_t rdma_frags;
70 opal_free_list_t recv_frags;
71 opal_free_list_t pending_pckts;
72 opal_free_list_t buffers;
73 opal_free_list_t send_ranges;
74
75
76 opal_list_t pckt_pending;
77 opal_list_t send_pending;
78 opal_list_t recv_pending;
79 opal_list_t rdma_pending;
80
81 opal_list_t non_existing_communicator_pending;
82 bool enabled;
83 char* allocator_name;
84 mca_allocator_base_module_t* allocator;
85 unsigned int unexpected_limit;
86 };
87 typedef struct mca_pml_ob1_t mca_pml_ob1_t;
88
89 extern mca_pml_ob1_t mca_pml_ob1;
90 extern int mca_pml_ob1_output;
91 extern bool mca_pml_ob1_matching_protection;
92
93
94
95
96 extern int mca_pml_ob1_add_comm(
97 struct ompi_communicator_t* comm
98 );
99
100 extern int mca_pml_ob1_del_comm(
101 struct ompi_communicator_t* comm
102 );
103
104 extern int mca_pml_ob1_add_procs(
105 struct ompi_proc_t **procs,
106 size_t nprocs
107 );
108
109 extern int mca_pml_ob1_del_procs(
110 struct ompi_proc_t **procs,
111 size_t nprocs
112 );
113
114 extern int mca_pml_ob1_enable( bool enable );
115
116 extern int mca_pml_ob1_progress(void);
117
118 extern int mca_pml_ob1_iprobe( int dst,
119 int tag,
120 struct ompi_communicator_t* comm,
121 int *matched,
122 ompi_status_public_t* status );
123
124 extern int mca_pml_ob1_probe( int dst,
125 int tag,
126 struct ompi_communicator_t* comm,
127 ompi_status_public_t* status );
128
129 extern int mca_pml_ob1_improbe( int dst,
130 int tag,
131 struct ompi_communicator_t* comm,
132 int *matched,
133 struct ompi_message_t **message,
134 ompi_status_public_t* status );
135
136 extern int mca_pml_ob1_mprobe( int dst,
137 int tag,
138 struct ompi_communicator_t* comm,
139 struct ompi_message_t **message,
140 ompi_status_public_t* status );
141
142 extern int mca_pml_ob1_isend_init( const void *buf,
143 size_t count,
144 ompi_datatype_t *datatype,
145 int dst,
146 int tag,
147 mca_pml_base_send_mode_t mode,
148 struct ompi_communicator_t* comm,
149 struct ompi_request_t **request );
150
151 extern int mca_pml_ob1_isend( const void *buf,
152 size_t count,
153 ompi_datatype_t *datatype,
154 int dst,
155 int tag,
156 mca_pml_base_send_mode_t mode,
157 struct ompi_communicator_t* comm,
158 struct ompi_request_t **request );
159
160 extern int mca_pml_ob1_send( const void *buf,
161 size_t count,
162 ompi_datatype_t *datatype,
163 int dst,
164 int tag,
165 mca_pml_base_send_mode_t mode,
166 struct ompi_communicator_t* comm );
167
168 extern int mca_pml_ob1_irecv_init( void *buf,
169 size_t count,
170 ompi_datatype_t *datatype,
171 int src,
172 int tag,
173 struct ompi_communicator_t* comm,
174 struct ompi_request_t **request );
175
176 extern int mca_pml_ob1_irecv( void *buf,
177 size_t count,
178 ompi_datatype_t *datatype,
179 int src,
180 int tag,
181 struct ompi_communicator_t* comm,
182 struct ompi_request_t **request );
183
184 extern int mca_pml_ob1_recv( void *buf,
185 size_t count,
186 ompi_datatype_t *datatype,
187 int src,
188 int tag,
189 struct ompi_communicator_t* comm,
190 ompi_status_public_t* status );
191
192 extern int mca_pml_ob1_imrecv( void *buf,
193 size_t count,
194 ompi_datatype_t *datatype,
195 struct ompi_message_t **message,
196 struct ompi_request_t **request );
197
198 extern int mca_pml_ob1_mrecv( void *buf,
199 size_t count,
200 ompi_datatype_t *datatype,
201 struct ompi_message_t **message,
202 ompi_status_public_t* status );
203
204 extern int mca_pml_ob1_dump( struct ompi_communicator_t* comm,
205 int verbose );
206
207 extern int mca_pml_ob1_start( size_t count,
208 ompi_request_t** requests );
209
210 extern int mca_pml_ob1_ft_event( int state );
211
212
213
214
215
216
217 extern struct mca_pml_ob1_recv_request_t *mca_pml_ob1_recvreq;
218 extern struct mca_pml_ob1_send_request_t *mca_pml_ob1_sendreq;
219
220 END_C_DECLS
221
222 struct mca_pml_ob1_pckt_pending_t {
223 opal_free_list_item_t super;
224 ompi_proc_t* proc;
225 mca_pml_ob1_hdr_t hdr;
226 struct mca_bml_base_btl_t *bml_btl;
227 uint8_t order;
228 int status;
229 };
230 typedef struct mca_pml_ob1_pckt_pending_t mca_pml_ob1_pckt_pending_t;
231 OBJ_CLASS_DECLARATION(mca_pml_ob1_pckt_pending_t);
232
233 #define MCA_PML_OB1_PCKT_PENDING_ALLOC(pckt) \
234 do { \
235 pckt = (mca_pml_ob1_pckt_pending_t *) \
236 opal_free_list_get (&mca_pml_ob1.pending_pckts); \
237 } while (0)
238
239 #define MCA_PML_OB1_PCKT_PENDING_RETURN(pckt) \
240 do { \
241 \
242 opal_free_list_return (&mca_pml_ob1.pending_pckts, \
243 (opal_free_list_item_t*)pckt); \
244 } while(0)
245
246 #define MCA_PML_OB1_ADD_FIN_TO_PENDING(P, D, Sz, B, O, S) \
247 do { \
248 mca_pml_ob1_pckt_pending_t *_pckt; \
249 \
250 MCA_PML_OB1_PCKT_PENDING_ALLOC(_pckt); \
251 mca_pml_ob1_fin_hdr_prepare (&_pckt->hdr.hdr_fin, 0, \
252 (D).lval, (Sz)); \
253 _pckt->proc = (P); \
254 _pckt->bml_btl = (B); \
255 _pckt->order = (O); \
256 _pckt->status = (S); \
257 OPAL_THREAD_LOCK(&mca_pml_ob1.lock); \
258 opal_list_append(&mca_pml_ob1.pckt_pending, \
259 (opal_list_item_t*)_pckt); \
260 OPAL_THREAD_UNLOCK(&mca_pml_ob1.lock); \
261 } while(0)
262
263 #define OB1_MATCHING_LOCK(lock) \
264 do { \
265 if( mca_pml_ob1_matching_protection ) { \
266 opal_mutex_lock(lock); \
267 } \
268 else { OPAL_THREAD_LOCK(lock); } \
269 } while(0)
270
271
272 #define OB1_MATCHING_UNLOCK(lock) \
273 do { \
274 if( mca_pml_ob1_matching_protection ) { \
275 opal_mutex_unlock(lock); \
276 } \
277 else { OPAL_THREAD_UNLOCK(lock); } \
278 } while(0)
279
280
281
282 int mca_pml_ob1_send_fin(ompi_proc_t* proc, mca_bml_base_btl_t* bml_btl,
283 opal_ptr_t hdr_frag, uint64_t size, uint8_t order, int status);
284
285
286
287
288
289
290 void mca_pml_ob1_process_pending_packets(mca_bml_base_btl_t* bml_btl);
291
292
293
294
295
296 void mca_pml_ob1_process_pending_rdma(void);
297
298 #define MCA_PML_OB1_PROGRESS_PENDING(bml_btl) \
299 do { \
300 if(opal_list_get_size(&mca_pml_ob1.pckt_pending)) \
301 mca_pml_ob1_process_pending_packets(bml_btl); \
302 if(opal_list_get_size(&mca_pml_ob1.recv_pending)) \
303 mca_pml_ob1_recv_request_process_pending(); \
304 if(opal_list_get_size(&mca_pml_ob1.send_pending)) \
305 mca_pml_ob1_send_request_process_pending(bml_btl); \
306 if(opal_list_get_size(&mca_pml_ob1.rdma_pending)) \
307 mca_pml_ob1_process_pending_rdma(); \
308 } while (0)
309
310
311
312
313 static inline size_t
314 mca_pml_ob1_compute_segment_length_base(mca_btl_base_segment_t *segments,
315 size_t count, size_t hdrlen)
316 {
317 size_t i, length = 0;
318
319 for (i = 0; i < count ; ++i) {
320 length += segments[i].seg_len;
321 }
322 return (length - hdrlen);
323 }
324
325 static inline size_t
326 mca_pml_ob1_compute_segment_length_remote (size_t seg_size, void *segments,
327 size_t count, ompi_proc_t *rem_proc)
328 {
329 mca_btl_base_segment_t *segment = (mca_btl_base_segment_t *) segments;
330 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
331 ompi_proc_t *local_proc = ompi_proc_local();
332 #endif
333 size_t i, length = 0;
334
335 for (i = 0 ; i < count ; ++i) {
336 #if OPAL_ENABLE_HETEROGENEOUS_SUPPORT
337 if ((rem_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN) !=
338 (local_proc->super.proc_arch & OPAL_ARCH_ISBIGENDIAN))
339
340 length += opal_swap_bytes8(segment->seg_len);
341 else
342 #endif
343 length += segment->seg_len;
344
345 segment = (mca_btl_base_segment_t *)((char *)segment + seg_size);
346 }
347
348 return length;
349 }
350
351
352 struct mca_pml_ob1_com_btl_t {
353 mca_bml_base_btl_t *bml_btl;
354 struct mca_btl_base_registration_handle_t *btl_reg;
355 size_t length;
356 };
357 typedef struct mca_pml_ob1_com_btl_t mca_pml_ob1_com_btl_t;
358
359 int mca_pml_ob1_com_btl_comp(const void *v1, const void *v2);
360
361
362
363 static inline void
364 mca_pml_ob1_calc_weighted_length( mca_pml_ob1_com_btl_t *btls, int num_btls, size_t size,
365 double weight_total )
366 {
367 int i;
368 size_t length_left;
369
370
371 if( OPAL_LIKELY(1 == num_btls) ) {
372 btls[0].length = size;
373 return;
374 }
375
376
377
378 qsort( btls, num_btls, sizeof(mca_pml_ob1_com_btl_t),
379 mca_pml_ob1_com_btl_comp );
380
381 for(length_left = size, i = 0; i < num_btls; i++) {
382 mca_bml_base_btl_t* bml_btl = btls[i].bml_btl;
383 size_t length = 0;
384 if( OPAL_UNLIKELY(0 != length_left) ) {
385 length = (length_left > bml_btl->btl->btl_eager_limit)?
386 ((size_t)(size * (bml_btl->btl_weight / weight_total))) :
387 length_left;
388
389 if(length > length_left)
390 length = length_left;
391 length_left -= length;
392 }
393 btls[i].length = length;
394 }
395
396
397 btls[0].length += length_left;
398 }
399
400
401
402
403
404 int mca_pml_ob1_enable_progress(int32_t count);
405
406 #endif