This source file includes following definitions.
- init_context_freelists
- mca_btl_ofi_context_alloc_normal
- mca_btl_ofi_context_alloc_scalable
- mca_btl_ofi_context_finalize
- get_ofi_context
- get_ofi_context_rr
- mca_btl_ofi_context_progress
1
2
3
4
5
6
7
8
9
10
11
12 #include "btl_ofi.h"
13 #include "btl_ofi_frag.h"
14 #include "btl_ofi_rdma.h"
15
16 #if OPAL_HAVE_THREAD_LOCAL
17 opal_thread_local mca_btl_ofi_context_t *my_context = NULL;
18 #endif
19
20 int init_context_freelists(mca_btl_ofi_context_t *context)
21 {
22 int rc;
23 OBJ_CONSTRUCT(&context->rdma_comp_list, opal_free_list_t);
24 rc = opal_free_list_init(&context->rdma_comp_list,
25 sizeof(mca_btl_ofi_rdma_completion_t),
26 opal_cache_line_size,
27 OBJ_CLASS(mca_btl_ofi_rdma_completion_t),
28 0,
29 0,
30 512,
31 -1,
32 512,
33 NULL,
34 0,
35 NULL,
36 NULL,
37 NULL);
38 if (rc != OPAL_SUCCESS) {
39 BTL_VERBOSE(("cannot allocate completion freelist"));
40 return rc;
41 }
42
43 if (TWO_SIDED_ENABLED) {
44 OBJ_CONSTRUCT(&context->frag_comp_list, opal_free_list_t);
45 rc = opal_free_list_init(&context->frag_comp_list,
46 sizeof(mca_btl_ofi_frag_completion_t),
47 opal_cache_line_size,
48 OBJ_CLASS(mca_btl_ofi_frag_completion_t),
49 0,
50 0,
51 512,
52 -1,
53 512,
54 NULL,
55 0,
56 NULL,
57 NULL,
58 NULL);
59 if (rc != OPAL_SUCCESS) {
60 BTL_VERBOSE(("cannot allocate completion freelist"));
61 return rc;
62 }
63
64
65 OBJ_CONSTRUCT(&context->frag_list, opal_free_list_t);
66 rc = opal_free_list_init(&context->frag_list,
67 sizeof(mca_btl_ofi_base_frag_t) +
68 MCA_BTL_OFI_FRAG_SIZE,
69 opal_cache_line_size,
70 OBJ_CLASS(mca_btl_ofi_base_frag_t),
71 0,
72 0,
73 1024,
74 -1,
75 1024,
76 NULL,
77 0,
78 NULL,
79 NULL,
80 NULL);
81 if (OPAL_SUCCESS != rc) {
82 BTL_VERBOSE(("failed to init frag pool (free_list)"));
83 }
84 }
85
86 return rc;
87 }
88
89
90
91
92
93
94 mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_normal(struct fi_info *info,
95 struct fid_domain *domain,
96 struct fid_ep *ep,
97 struct fid_av *av)
98 {
99 int rc;
100 uint32_t cq_flags = FI_TRANSMIT | FI_SEND | FI_RECV;
101 char *linux_device_name = info->domain_attr->name;
102
103 struct fi_cq_attr cq_attr = {0};
104
105 mca_btl_ofi_context_t *context;
106
107 context = (mca_btl_ofi_context_t*) calloc(1, sizeof(*context));
108 if (NULL == context) {
109 BTL_VERBOSE(("cannot allocate context"));
110 return NULL;
111 }
112
113
114
115
116 if (NULL == linux_device_name) {
117 BTL_VERBOSE(("linux device name is NULL. This shouldn't happen."));
118 goto single_fail;
119 }
120
121 cq_attr.format = FI_CQ_FORMAT_CONTEXT;
122 cq_attr.wait_obj = FI_WAIT_NONE;
123 rc = fi_cq_open(domain, &cq_attr, &context->cq, NULL);
124 if (0 != rc) {
125 BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
126 linux_device_name,
127 fi_strerror(-rc)
128 ));
129 goto single_fail;
130 }
131
132 rc = fi_ep_bind(ep, (fid_t)av, 0);
133 if (0 != rc) {
134 BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
135 linux_device_name,
136 fi_strerror(-rc)
137 ));
138 goto single_fail;
139 }
140
141 rc = fi_ep_bind(ep, (fid_t)context->cq, cq_flags);
142 if (0 != rc) {
143 BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s",
144 linux_device_name,
145 fi_strerror(-rc)
146 ));
147 goto single_fail;
148 }
149
150 rc = init_context_freelists(context);
151 if (rc != OPAL_SUCCESS) {
152 goto single_fail;
153 }
154
155 context->tx_ctx = ep;
156 context->rx_ctx = ep;
157 context->context_id = 0;
158
159 return context;
160
161 single_fail:
162 mca_btl_ofi_context_finalize(context, false);
163 return NULL;
164 }
165
166
167
168
169
170
171 mca_btl_ofi_context_t *mca_btl_ofi_context_alloc_scalable(struct fi_info *info,
172 struct fid_domain *domain,
173 struct fid_ep *sep,
174 struct fid_av *av,
175 size_t num_contexts)
176 {
177 BTL_VERBOSE(("creating %zu contexts", num_contexts));
178
179 int rc;
180 size_t i;
181 char *linux_device_name = info->domain_attr->name;
182
183 struct fi_cq_attr cq_attr = {0};
184 struct fi_tx_attr tx_attr = {0};
185 struct fi_rx_attr rx_attr = {0};
186
187 mca_btl_ofi_context_t *contexts;
188 tx_attr.op_flags = FI_DELIVERY_COMPLETE;
189
190 contexts = (mca_btl_ofi_context_t*) calloc(num_contexts, sizeof(*contexts));
191 if (NULL == contexts) {
192 BTL_VERBOSE(("cannot allocate communication contexts."));
193 return NULL;
194 }
195
196
197
198
199 if (NULL == linux_device_name) {
200 BTL_VERBOSE(("linux device name is NULL. This shouldn't happen."));
201 goto scalable_fail;
202 }
203
204
205 rc = fi_scalable_ep_bind(sep, (fid_t)av, 0);
206 if (0 != rc) {
207 BTL_VERBOSE(("%s failed fi_scalable_ep_bind with err=%s",
208 linux_device_name,
209 fi_strerror(-rc)
210 ));
211 goto scalable_fail;
212 }
213
214 for (i=0; i < num_contexts; i++) {
215 rc = fi_tx_context(sep, i, &tx_attr, &contexts[i].tx_ctx, NULL);
216 if (0 != rc) {
217 BTL_VERBOSE(("%s failed fi_tx_context with err=%s",
218 linux_device_name,
219 fi_strerror(-rc)
220 ));
221 goto scalable_fail;
222 }
223
224
225
226
227 rc = fi_rx_context(sep, i, &rx_attr, &contexts[i].rx_ctx, NULL);
228 if (0 != rc) {
229 BTL_VERBOSE(("%s failed fi_rx_context with err=%s",
230 linux_device_name,
231 fi_strerror(-rc)
232 ));
233 goto scalable_fail;
234 }
235
236
237 cq_attr.format = FI_CQ_FORMAT_CONTEXT;
238 cq_attr.wait_obj = FI_WAIT_NONE;
239 rc = fi_cq_open(domain, &cq_attr, &contexts[i].cq, NULL);
240 if (0 != rc) {
241 BTL_VERBOSE(("%s failed fi_cq_open with err=%s",
242 linux_device_name,
243 fi_strerror(-rc)
244 ));
245 goto scalable_fail;
246 }
247
248
249 rc = fi_ep_bind(contexts[i].tx_ctx, (fid_t)contexts[i].cq, FI_TRANSMIT);
250 if (0 != rc) {
251 BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
252 linux_device_name,
253 fi_strerror(-rc)
254 ));
255 goto scalable_fail;
256 }
257
258
259 if (TWO_SIDED_ENABLED) {
260 rc = fi_ep_bind(contexts[i].rx_ctx, (fid_t)contexts[i].cq, FI_RECV);
261 if (0 != rc) {
262 BTL_VERBOSE(("%s failed fi_ep_bind with err=%s",
263 linux_device_name,
264 fi_strerror(-rc)
265 ));
266 goto scalable_fail;
267 }
268 }
269
270
271 rc = fi_enable(contexts[i].tx_ctx);
272 if (0 != rc) {
273 BTL_VERBOSE(("%s failed fi_enable with err=%s",
274 linux_device_name,
275 fi_strerror(-rc)
276 ));
277 goto scalable_fail;
278 }
279
280 rc = fi_enable(contexts[i].rx_ctx);
281 if (0 != rc) {
282 BTL_VERBOSE(("%s failed fi_enable with err=%s",
283 linux_device_name,
284 fi_strerror(-rc)
285 ));
286 goto scalable_fail;
287 }
288
289
290 rc = init_context_freelists(&contexts[i]);
291 if (rc != OPAL_SUCCESS) {
292 goto scalable_fail;
293 }
294
295
296 contexts[i].context_id = i;
297 }
298
299 return contexts;
300
301 scalable_fail:
302
303 for(i=0; i < num_contexts; i++) {
304 mca_btl_ofi_context_finalize(&contexts[i], true);
305 }
306 free(contexts);
307
308 return NULL;
309 }
310
311 void mca_btl_ofi_context_finalize(mca_btl_ofi_context_t *context, bool scalable_ep) {
312
313
314 if (scalable_ep) {
315 if (NULL != context->tx_ctx) {
316 fi_close(&context->tx_ctx->fid);
317 }
318
319 if (NULL != context->rx_ctx) {
320 fi_close(&context->rx_ctx->fid);
321 }
322 }
323
324 if( NULL != context->cq) {
325 fi_close(&context->cq->fid);
326 }
327
328
329 OBJ_DESTRUCT(&context->rdma_comp_list);
330
331 if (TWO_SIDED_ENABLED) {
332 OBJ_DESTRUCT(&context->frag_comp_list);
333 OBJ_DESTRUCT(&context->frag_list);
334 }
335 }
336
337
338
339
340 mca_btl_ofi_context_t *get_ofi_context(mca_btl_ofi_module_t *btl)
341 {
342 #if OPAL_HAVE_THREAD_LOCAL
343
344 static volatile int64_t cur_num = 0;
345
346 if (OPAL_UNLIKELY(my_context == NULL)) {
347 OPAL_THREAD_LOCK(&btl->module_lock);
348
349 my_context = &btl->contexts[cur_num];
350 cur_num = (cur_num + 1) %btl->num_contexts;
351
352 OPAL_THREAD_UNLOCK(&btl->module_lock);
353 }
354
355 assert (my_context);
356 return my_context;
357 #else
358 return get_ofi_context_rr(btl);
359 #endif
360 }
361
362
363
364 mca_btl_ofi_context_t *get_ofi_context_rr(mca_btl_ofi_module_t *btl)
365 {
366 static volatile uint64_t rr_num = 0;
367 return &btl->contexts[rr_num++%btl->num_contexts];
368 }
369
370 int mca_btl_ofi_context_progress(mca_btl_ofi_context_t *context) {
371
372 int ret = 0;
373 int events_read;
374 int events = 0;
375 struct fi_cq_entry cq_entry[MCA_BTL_OFI_DEFAULT_MAX_CQE];
376 struct fi_cq_err_entry cqerr = {0};
377
378 mca_btl_ofi_completion_context_t *c_ctx;
379 mca_btl_ofi_base_completion_t *comp;
380 mca_btl_ofi_rdma_completion_t *rdma_comp;
381 mca_btl_ofi_frag_completion_t *frag_comp;
382
383 ret = fi_cq_read(context->cq, &cq_entry, mca_btl_ofi_component.num_cqe_read);
384
385 if (0 < ret) {
386 events_read = ret;
387 for (int i = 0; i < events_read; i++) {
388 if (NULL != cq_entry[i].op_context) {
389 ++events;
390
391 c_ctx = (mca_btl_ofi_completion_context_t*) cq_entry[i].op_context;
392
393
394 comp = (mca_btl_ofi_base_completion_t*) c_ctx->comp;
395 frag_comp = (mca_btl_ofi_frag_completion_t*) c_ctx->comp;
396 rdma_comp = (mca_btl_ofi_rdma_completion_t*) c_ctx->comp;
397
398 switch (comp->type) {
399 case MCA_BTL_OFI_TYPE_GET:
400 case MCA_BTL_OFI_TYPE_PUT:
401 case MCA_BTL_OFI_TYPE_AOP:
402 case MCA_BTL_OFI_TYPE_AFOP:
403 case MCA_BTL_OFI_TYPE_CSWAP:
404
405 if (rdma_comp->cbfunc) {
406 rdma_comp->cbfunc (comp->btl, comp->endpoint,
407 rdma_comp->local_address, rdma_comp->local_handle,
408 rdma_comp->cbcontext, rdma_comp->cbdata, OPAL_SUCCESS);
409 }
410
411 MCA_BTL_OFI_NUM_RDMA_DEC((mca_btl_ofi_module_t*) comp->btl);
412 break;
413
414 case MCA_BTL_OFI_TYPE_RECV:
415 mca_btl_ofi_recv_frag((mca_btl_ofi_module_t*) comp->btl,
416 (mca_btl_ofi_endpoint_t*) comp->endpoint,
417 context, frag_comp->frag);
418 break;
419
420 case MCA_BTL_OFI_TYPE_SEND:
421 MCA_BTL_OFI_NUM_SEND_DEC((mca_btl_ofi_module_t*) comp->btl);
422 mca_btl_ofi_frag_complete(frag_comp->frag, OPAL_SUCCESS);
423 break;
424
425 default:
426
427 BTL_ERROR(("unknown completion type"));
428 MCA_BTL_OFI_ABORT();
429 }
430
431
432 opal_free_list_return(comp->my_list, (opal_free_list_item_t*) comp);
433 }
434 }
435 } else if (OPAL_UNLIKELY(ret == -FI_EAVAIL)) {
436 ret = fi_cq_readerr(context->cq, &cqerr, 0);
437
438
439 if (0 > ret) {
440 BTL_ERROR(("%s:%d: Error returned from fi_cq_readerr: %s(%d)",
441 __FILE__, __LINE__, fi_strerror(-ret), ret));
442 } else {
443 BTL_ERROR(("fi_cq_readerr: (provider err_code = %d)\n",
444 cqerr.prov_errno));
445 }
446 MCA_BTL_OFI_ABORT();
447 }
448 #ifdef FI_EINTR
449
450 else if (OPAL_UNLIKELY(ret == -FI_EINTR)) {
451
452 }
453 #endif
454
455 else if (OPAL_UNLIKELY(ret != -FI_EAGAIN)) {
456 BTL_ERROR(("fi_cq_read returned error %d:%s", ret, fi_strerror(-ret)));
457 MCA_BTL_OFI_ABORT();
458 }
459
460 return events;
461 }
462
463