This source file includes following definitions.
- mca_btl_uct_alloc
- _mca_btl_uct_send_pack
- mca_btl_uct_prepare_src
- mca_btl_uct_free
- mca_btl_uct_send_frag_pack
- mca_btl_uct_append_pending_frag
- mca_btl_uct_send_frag
- mca_btl_uct_send
- mca_btl_uct_sendi_pack
- mca_btl_uct_max_sendi
- mca_btl_uct_sendi
1
2
3
4
5
6
7
8
9
10
11
12 #include "btl_uct_am.h"
13 #include "btl_uct_rdma.h"
14 #include "btl_uct_device_context.h"
15
16
17
18
19
20
21
22 mca_btl_base_descriptor_t *mca_btl_uct_alloc (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint,
23 uint8_t order, size_t size, uint32_t flags)
24 {
25 mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
26 mca_btl_uct_base_frag_t *frag = NULL;
27
28 if (size <= (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
29 frag = mca_btl_uct_frag_alloc_short (uct_btl, endpoint);
30 } else if (size <= uct_btl->super.btl_eager_limit) {
31 frag = mca_btl_uct_frag_alloc_eager (uct_btl, endpoint);
32 } else {
33 frag = mca_btl_uct_frag_alloc_max (uct_btl, endpoint);
34 }
35
36 if (OPAL_LIKELY(frag != NULL)) {
37 frag->segments[0].seg_len = size;
38
39 frag->base.des_segment_count = 1;
40 frag->base.des_flags = flags;
41 frag->base.order = order;
42 frag->uct_iov.length = size;
43 if (NULL != frag->base.super.registration) {
44
45 frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
46 }
47 }
48
49 return (mca_btl_base_descriptor_t *) frag;
50 }
51
52 static inline void _mca_btl_uct_send_pack (void *data, void *header, size_t header_size, opal_convertor_t *convertor,
53 size_t payload_size)
54 {
55 uint32_t iov_count = 1;
56 struct iovec iov;
57 size_t length;
58
59 if (header_size > 0) {
60 assert (NULL != header);
61 memcpy (data, header, header_size);
62 }
63
64
65 iov.iov_base = (IOVBASE_TYPE *) ((intptr_t) data + header_size);
66 iov.iov_len = length = payload_size;
67
68 (void) opal_convertor_pack (convertor, &iov, &iov_count, &length);
69
70 assert (length == payload_size);
71 }
72
73 struct mca_btl_base_descriptor_t *mca_btl_uct_prepare_src (mca_btl_base_module_t *btl,
74 mca_btl_base_endpoint_t *endpoint,
75 opal_convertor_t *convertor,
76 uint8_t order, size_t reserve,
77 size_t *size, uint32_t flags)
78 {
79 mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
80 const size_t total_size = reserve + *size;
81 mca_btl_uct_base_frag_t *frag;
82 void *data_ptr;
83
84
85 if (OPAL_UNLIKELY(opal_convertor_need_buffers(convertor) || total_size > uct_btl->super.btl_eager_limit)) {
86 frag = (mca_btl_uct_base_frag_t *) mca_btl_uct_alloc (btl, endpoint, order, total_size, flags);
87 if (OPAL_UNLIKELY(NULL == frag)) {
88 return NULL;
89 }
90
91 _mca_btl_uct_send_pack ((void *) ((intptr_t) frag->uct_iov.buffer + reserve), NULL, 0,
92 convertor, *size);
93 } else {
94 opal_convertor_get_current_pointer (convertor, &data_ptr);
95 assert (NULL != data_ptr);
96
97 frag = mca_btl_uct_frag_alloc_short (uct_btl, endpoint);
98 if (OPAL_UNLIKELY(NULL == frag)) {
99 return NULL;
100 }
101
102 frag->uct_iov.length = total_size;
103 frag->base.order = order;
104 frag->base.des_flags = flags;
105 if (total_size > (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
106 frag->segments[0].seg_len = reserve;
107 frag->segments[1].seg_len = *size;
108 frag->segments[1].seg_addr.pval = data_ptr;
109 frag->base.des_segment_count = 2;
110 } else {
111 frag->segments[0].seg_len = total_size;
112 memcpy ((void *)((intptr_t) frag->segments[1].seg_addr.pval + reserve), data_ptr, *size);
113 frag->base.des_segment_count = 1;
114 }
115 }
116
117 return &frag->base;
118 }
119
120
121
122
123
124
125
126 int mca_btl_uct_free (mca_btl_base_module_t *btl, mca_btl_base_descriptor_t *des)
127 {
128 mca_btl_uct_frag_return ((mca_btl_uct_base_frag_t *) des);
129 return OPAL_SUCCESS;
130 }
131
132 static size_t mca_btl_uct_send_frag_pack (void *data, void *arg)
133 {
134 mca_btl_uct_base_frag_t *frag = (mca_btl_uct_base_frag_t *) arg;
135 size_t length = 8;
136
137 memcpy (data, &frag->header, sizeof (frag->header));
138 data = (void *)((intptr_t) data + 8);
139
140
141 for (size_t i = 0 ; i < frag->base.des_segment_count ; ++i) {
142 const size_t seg_len = frag->segments[i].seg_len;
143 memcpy (data, frag->segments[i].seg_addr.pval, seg_len);
144 data = (void *)((intptr_t) data + seg_len);
145 length += seg_len;
146 }
147
148 return length;
149 }
150
151 static void mca_btl_uct_append_pending_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag,
152 mca_btl_uct_device_context_t *context, bool ready)
153 {
154 frag->ready = ready;
155 frag->base.des_flags |= MCA_BTL_DES_SEND_ALWAYS_CALLBACK;
156 opal_atomic_wmb ();
157
158 opal_list_append (&uct_btl->pending_frags, (opal_list_item_t *) frag);
159 }
160
161 int mca_btl_uct_send_frag (mca_btl_uct_module_t *uct_btl, mca_btl_uct_base_frag_t *frag, bool append)
162 {
163 mca_btl_uct_device_context_t *context = frag->context;
164 const ssize_t msg_size = frag->uct_iov.length + 8;
165 ssize_t size;
166 ucs_status_t ucs_status;
167 uct_ep_h ep_handle = NULL;
168
169
170 (void) mca_btl_uct_endpoint_test_am (uct_btl, frag->endpoint, frag->context, &ep_handle);
171 assert (NULL != ep_handle);
172
173
174
175 if (!context->in_am_callback) {
176 mca_btl_uct_context_lock (context);
177
178 if (NULL != frag->base.super.registration) {
179 frag->comp.dev_context = context;
180 ucs_status = uct_ep_am_zcopy (ep_handle, MCA_BTL_UCT_FRAG, &frag->header, sizeof (frag->header),
181 &frag->uct_iov, 1, 0, &frag->comp.uct_comp);
182
183 if (OPAL_LIKELY(UCS_INPROGRESS == ucs_status)) {
184 uct_worker_progress (context->uct_worker);
185 mca_btl_uct_context_unlock (context);
186 return OPAL_SUCCESS;
187 }
188 } else {
189
190 if (1 == frag->base.des_segment_count && (frag->uct_iov.length + 8) < MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, 0).cap.am.max_short) {
191 ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, frag->header.value, frag->uct_iov.buffer,
192 frag->uct_iov.length);
193
194 if (OPAL_LIKELY(UCS_OK == ucs_status)) {
195 uct_worker_progress (context->uct_worker);
196 mca_btl_uct_context_unlock (context);
197
198 mca_btl_uct_frag_complete (frag, OPAL_SUCCESS);
199 return 1;
200 }
201 }
202
203 size = uct_ep_am_bcopy (ep_handle, MCA_BTL_UCT_FRAG, mca_btl_uct_send_frag_pack, frag, 0);
204 if (OPAL_LIKELY(size == msg_size)) {
205 uct_worker_progress (context->uct_worker);
206 mca_btl_uct_context_unlock (context);
207
208 mca_btl_uct_frag_complete (frag, OPAL_SUCCESS);
209 return 1;
210 }
211 }
212
213
214 uct_worker_progress (context->uct_worker);
215 mca_btl_uct_context_unlock (context);
216
217 mca_btl_uct_device_handle_completions (context);
218 }
219
220 if (!append) {
221 return OPAL_ERR_OUT_OF_RESOURCE;
222 }
223
224 OPAL_THREAD_LOCK(&uct_btl->lock);
225 mca_btl_uct_append_pending_frag (uct_btl, frag, context, true);
226 OPAL_THREAD_UNLOCK(&uct_btl->lock);
227
228 return OPAL_SUCCESS;
229 }
230
231 int mca_btl_uct_send (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, mca_btl_base_descriptor_t *descriptor,
232 mca_btl_base_tag_t tag)
233 {
234 mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
235 mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_am_context (uct_btl);
236 mca_btl_uct_base_frag_t *frag = (mca_btl_uct_base_frag_t *) descriptor;
237 uct_ep_h ep_handle;
238 int rc;
239
240 BTL_VERBOSE(("btl/uct sending descriptor %p from %d -> %d. length = %" PRIu64, (void *)descriptor,
241 OPAL_PROC_MY_NAME.vpid, endpoint->ep_proc->proc_name.vpid, frag->uct_iov.length));
242
243
244 frag->header.data.tag = tag;
245 frag->context = context;
246
247 rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle);
248 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
249 OPAL_THREAD_LOCK(&uct_btl->lock);
250
251 if (OPAL_SUCCESS != mca_btl_uct_endpoint_test_am (uct_btl, endpoint, context, &ep_handle)) {
252 mca_btl_uct_append_pending_frag (uct_btl, frag, context, false);
253 OPAL_THREAD_UNLOCK(&uct_btl->lock);
254 return OPAL_SUCCESS;
255 }
256 OPAL_THREAD_UNLOCK(&uct_btl->lock);
257 }
258
259 return mca_btl_uct_send_frag (uct_btl, frag, true);
260 }
261
262 struct mca_btl_uct_sendi_pack_args_t {
263 uint64_t am_header;
264 void *header;
265 size_t header_size;
266 opal_convertor_t *convertor;
267 size_t payload_size;
268 };
269
270 typedef struct mca_btl_uct_sendi_pack_args_t mca_btl_uct_sendi_pack_args_t;
271
272 static size_t mca_btl_uct_sendi_pack (void *data, void *arg)
273 {
274 mca_btl_uct_sendi_pack_args_t *args = (mca_btl_uct_sendi_pack_args_t *) arg;
275 mca_btl_uct_am_header_t *am_header = (mca_btl_uct_am_header_t *) data;
276
277 am_header->value = args->am_header;
278 _mca_btl_uct_send_pack ((void *)((intptr_t)data + 8), args->header, args->header_size, args->convertor,
279 args->payload_size);
280 return args->header_size + args->payload_size + 8;
281 }
282
283 static inline size_t mca_btl_uct_max_sendi (mca_btl_uct_module_t *uct_btl, int context_id)
284 {
285 return MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context_id).cap.am.max_bcopy;
286 }
287
288 int mca_btl_uct_sendi (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, opal_convertor_t *convertor,
289 void *header, size_t header_size, size_t payload_size, uint8_t order, uint32_t flags,
290 mca_btl_base_tag_t tag, mca_btl_base_descriptor_t **descriptor)
291 {
292 mca_btl_uct_module_t *uct_btl = (mca_btl_uct_module_t *) btl;
293 mca_btl_uct_device_context_t *context = mca_btl_uct_module_get_am_context (uct_btl);
294 const size_t total_size = header_size + payload_size;
295
296 const size_t msg_size = total_size + 8;
297 mca_btl_uct_am_header_t am_header;
298 ucs_status_t ucs_status = UCS_ERR_NO_RESOURCE;
299 uct_ep_h ep_handle;
300 int rc;
301
302 rc = mca_btl_uct_endpoint_check_am (uct_btl, endpoint, context, &ep_handle);
303 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc || msg_size > mca_btl_uct_max_sendi (uct_btl, context->context_id))) {
304 if (descriptor) {
305 *descriptor = mca_btl_uct_alloc (btl, endpoint, order, total_size, flags);
306 }
307
308 return OPAL_ERR_OUT_OF_RESOURCE;
309 }
310
311 am_header.data.tag = tag;
312
313 mca_btl_uct_context_lock (context);
314 if (0 == payload_size) {
315 ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, header, header_size);
316 } else if (msg_size < (size_t) MCA_BTL_UCT_TL_ATTR(uct_btl->am_tl, context->context_id).cap.am.max_short) {
317 int8_t *data = alloca (total_size);
318 _mca_btl_uct_send_pack (data, header, header_size, convertor, payload_size);
319 ucs_status = uct_ep_am_short (ep_handle, MCA_BTL_UCT_FRAG, am_header.value, data, total_size);
320 } else {
321 ssize_t size;
322
323 size = uct_ep_am_bcopy (ep_handle, MCA_BTL_UCT_FRAG, mca_btl_uct_sendi_pack,
324 &(mca_btl_uct_sendi_pack_args_t) {.am_header = am_header.value,
325 .header = header, .header_size = header_size,
326 .convertor = convertor, .payload_size = payload_size}, 0);
327 if (OPAL_LIKELY(size == (ssize_t) msg_size)) {
328 ucs_status = UCS_OK;
329 }
330 }
331
332 mca_btl_uct_context_unlock (context);
333
334 if (OPAL_UNLIKELY(UCS_OK != ucs_status)) {
335 if (descriptor) {
336 *descriptor = mca_btl_uct_alloc (btl, endpoint, order, total_size, flags);
337 }
338
339 return OPAL_ERR_OUT_OF_RESOURCE;
340 }
341
342 return OPAL_SUCCESS;
343 }