This source file includes following definitions.
- mca_btl_ofi_add_procs
- mca_btl_ofi_del_procs
- mca_btl_ofi_rcache_init
- mca_btl_ofi_register_mem
- mca_btl_ofi_deregister_mem
- mca_btl_ofi_reg_mem
- mca_btl_ofi_dereg_mem
- mca_btl_ofi_finalize
- mca_btl_ofi_post_recvs
- mca_btl_ofi_module_alloc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "opal_config.h"
26 #include <string.h>
27 #include "opal/class/opal_bitmap.h"
28 #include "opal/util/printf.h"
29 #include "opal/mca/btl/btl.h"
30 #include "opal/datatype/opal_convertor.h"
31 #include "opal/mca/mpool/base/base.h"
32 #include "opal/mca/mpool/mpool.h"
33
34 #include "btl_ofi.h"
35 #include "btl_ofi_endpoint.h"
36 #include "btl_ofi_frag.h"
37
38 static int mca_btl_ofi_add_procs (mca_btl_base_module_t *btl,
39 size_t nprocs, opal_proc_t **opal_procs,
40 mca_btl_base_endpoint_t **peers,
41 opal_bitmap_t *reachable)
42 {
43 int rc;
44 int count;
45 char *ep_name = NULL;
46 size_t namelen = mca_btl_ofi_component.namelen;
47
48 opal_proc_t *proc;
49 mca_btl_base_endpoint_t *ep;
50
51 mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
52
53 for (size_t i = 0 ; i < nprocs ; ++i) {
54
55 proc = opal_procs[i];
56
57
58 rc = opal_hash_table_get_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) proc, (void **) &ep);
59
60 if (OPAL_SUCCESS == rc) {
61 BTL_VERBOSE(("returning existing endpoint for proc %s", OPAL_NAME_PRINT(proc->proc_name)));
62 peers[i] = ep;
63
64 } else {
65
66 peers[i] = mca_btl_ofi_endpoint_create (proc, ofi_btl->ofi_endpoint);
67 BTL_VERBOSE(("creating peer %p", (void*) peers[i]));
68
69 if (OPAL_UNLIKELY(NULL == peers[i])) {
70 return OPAL_ERR_OUT_OF_RESOURCE;
71 }
72
73
74 (void) opal_hash_table_set_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) proc, (void**) &ep);
75 }
76
77 OPAL_MODEX_RECV(rc, &mca_btl_ofi_component.super.btl_version,
78 &peers[i]->ep_proc->proc_name, (void **)&ep_name, &namelen);
79 if (OPAL_SUCCESS != rc) {
80 BTL_ERROR(("error receiving modex"));
81 MCA_BTL_OFI_ABORT();
82 }
83
84
85 count = fi_av_insert(ofi_btl->av,
86 ep_name,
87 1,
88 &peers[i]->peer_addr,
89 0,
90 NULL);
91
92
93 if (count == 1) {
94 opal_list_append (&ofi_btl->endpoints, &peers[i]->super);
95 opal_bitmap_set_bit(reachable, i);
96 } else {
97 BTL_VERBOSE(("fi_av_insert failed with rc = %d", count));
98 MCA_BTL_OFI_ABORT();
99 }
100 }
101
102 return OPAL_SUCCESS;
103 }
104
105 static int mca_btl_ofi_del_procs (mca_btl_base_module_t *btl, size_t nprocs,
106 opal_proc_t **procs, mca_btl_base_endpoint_t **peers)
107 {
108 int rc;
109 mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
110 mca_btl_base_endpoint_t *ep;
111
112 for (size_t i = 0 ; i < nprocs ; ++i) {
113 if (peers[i]) {
114 rc = opal_hash_table_get_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) procs[i], (void **) &ep);
115
116 if (OPAL_SUCCESS == rc) {
117
118 rc = fi_av_remove(ofi_btl->av, &peers[i]->peer_addr, 1, 0);
119 if (rc < 0) {
120
121
122 BTL_ERROR(("fi_av_remove failed with error %d:%s",
123 rc, fi_strerror(-rc)));
124 }
125
126
127 opal_list_remove_item (&ofi_btl->endpoints, &peers[i]->super);
128 (void) opal_hash_table_remove_value_uint64 (&ofi_btl->id_to_endpoint, (intptr_t) procs[i]);
129 OBJ_RELEASE(peers[i]);
130 }
131 }
132 }
133
134 return OPAL_SUCCESS;
135 }
136
137 void mca_btl_ofi_rcache_init (mca_btl_ofi_module_t *module)
138 {
139 if (!module->initialized) {
140 mca_rcache_base_resources_t rcache_resources;
141 char *tmp;
142
143 (void) opal_asprintf (&tmp, "ofi.%s", module->linux_device_name);
144
145 rcache_resources.cache_name = tmp;
146 rcache_resources.reg_data = (void *) module;
147 rcache_resources.sizeof_reg = sizeof (mca_btl_ofi_reg_t);
148 rcache_resources.register_mem = mca_btl_ofi_reg_mem;
149 rcache_resources.deregister_mem = mca_btl_ofi_dereg_mem;
150
151 module->rcache = mca_rcache_base_module_create ("grdma", module, &rcache_resources);
152 free (tmp);
153
154 if (NULL == module->rcache) {
155
156 BTL_ERROR(("cannot create rcache"));
157 MCA_BTL_OFI_ABORT();
158 }
159
160 module->initialized = true;
161 }
162 }
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184 static struct mca_btl_base_registration_handle_t *
185 mca_btl_ofi_register_mem (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *base,
186 size_t size, uint32_t flags)
187 {
188 mca_btl_ofi_module_t *ofi_module = (mca_btl_ofi_module_t *) btl;
189 mca_btl_ofi_reg_t *reg;
190 int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
191 int rc;
192
193 rc = ofi_module->rcache->rcache_register (ofi_module->rcache, base, size, 0, access_flags,
194 (mca_rcache_base_registration_t **) ®);
195 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
196 return NULL;
197 }
198
199 return ®->handle;
200 }
201
202
203
204
205
206
207
208
209
210
211
212
213 static int mca_btl_ofi_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
214 {
215 mca_btl_ofi_module_t *ofi_module = (mca_btl_ofi_module_t *) btl;
216 mca_btl_ofi_reg_t *reg =
217 (mca_btl_ofi_reg_t *)((intptr_t) handle - offsetof (mca_btl_ofi_reg_t, handle));
218
219 (void) ofi_module->rcache->rcache_deregister (ofi_module->rcache, ®->base);
220
221 return OPAL_SUCCESS;
222 }
223
224 int mca_btl_ofi_reg_mem (void *reg_data, void *base, size_t size, mca_rcache_base_registration_t *reg)
225 {
226 int rc;
227 static uint64_t access_flags = FI_REMOTE_WRITE | FI_REMOTE_READ | FI_READ | FI_WRITE;
228
229 mca_btl_ofi_module_t *btl = (mca_btl_ofi_module_t*) reg_data;
230 mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t*) reg;
231
232 rc = fi_mr_reg(btl->domain, base, size, access_flags, 0,
233 (uint64_t) reg, 0, &ur->ur_mr, NULL);
234 if (0 != rc) {
235 return OPAL_ERR_OUT_OF_RESOURCE;
236 }
237
238 ur->handle.rkey = fi_mr_key(ur->ur_mr);
239 ur->handle.desc = fi_mr_desc(ur->ur_mr);
240
241
242
243
244 if (btl->use_virt_addr) {
245 ur->handle.base_addr = 0;
246 } else {
247 ur->handle.base_addr = base;
248 }
249
250 return OPAL_SUCCESS;
251 }
252
253 int mca_btl_ofi_dereg_mem (void *reg_data, mca_rcache_base_registration_t *reg)
254 {
255 mca_btl_ofi_reg_t *ur = (mca_btl_ofi_reg_t*)reg;
256
257 if (ur->ur_mr != NULL) {
258 if (0 != fi_close(&ur->ur_mr->fid)) {
259 BTL_ERROR(("%s: error unpinning memory mr=%p: %s",
260 __func__, (void*) ur->ur_mr, strerror(errno)));
261 return OPAL_ERROR;
262 }
263 }
264
265 return OPAL_SUCCESS;
266 }
267
268
269
270
271
272 int mca_btl_ofi_finalize (mca_btl_base_module_t* btl)
273 {
274 int i;
275 mca_btl_ofi_module_t *ofi_btl = (mca_btl_ofi_module_t *) btl;
276 mca_btl_ofi_endpoint_t *endpoint, *next;
277
278 assert(btl);
279
280
281 for (i=0; i < ofi_btl->num_contexts; i++) {
282 mca_btl_ofi_context_finalize(&ofi_btl->contexts[i], ofi_btl->is_scalable_ep);
283 }
284 free(ofi_btl->contexts);
285
286 if (NULL != ofi_btl->av) {
287 fi_close(&ofi_btl->av->fid);
288 }
289
290 if (NULL != ofi_btl->ofi_endpoint) {
291 fi_close(&ofi_btl->ofi_endpoint->fid);
292 }
293
294 if (NULL != ofi_btl->domain) {
295 fi_close(&ofi_btl->domain->fid);
296 }
297
298 if (NULL != ofi_btl->fabric) {
299 fi_close(&ofi_btl->fabric->fid);
300 }
301
302 if (NULL != ofi_btl->fabric_info) {
303 fi_freeinfo(ofi_btl->fabric_info);
304 }
305
306
307 OPAL_LIST_FOREACH_SAFE(endpoint, next, &ofi_btl->endpoints, mca_btl_ofi_endpoint_t) {
308 opal_list_remove_item (&ofi_btl->endpoints, &endpoint->super);
309 OBJ_RELEASE(endpoint);
310 }
311
312 OBJ_DESTRUCT(&ofi_btl->endpoints);
313 OBJ_DESTRUCT(&ofi_btl->id_to_endpoint);
314 OBJ_DESTRUCT(&ofi_btl->module_lock);
315
316 if (ofi_btl->rcache) {
317 mca_rcache_base_module_destroy (ofi_btl->rcache);
318 ofi_btl->rcache = NULL;
319 }
320
321 free (btl);
322
323 return OPAL_SUCCESS;
324 }
325
326
327 int mca_btl_ofi_post_recvs (mca_btl_base_module_t *module,
328 mca_btl_ofi_context_t *context,
329 int count)
330 {
331 int i;
332 int rc;
333 mca_btl_ofi_base_frag_t *frag;
334 mca_btl_ofi_frag_completion_t *comp;
335
336 for (i=0; i < count; i++) {
337 frag = (mca_btl_ofi_base_frag_t*) mca_btl_ofi_alloc(module,
338 NULL,
339 0,
340 MCA_BTL_OFI_FRAG_SIZE,
341 MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
342 if (NULL == frag) {
343 BTL_ERROR(("cannot allocate recv frag."));
344 return OPAL_ERROR;
345 }
346
347 comp = mca_btl_ofi_frag_completion_alloc (module,
348 context,
349 frag,
350 MCA_BTL_OFI_TYPE_RECV);
351
352 rc = fi_recv (context->rx_ctx, &frag->hdr, MCA_BTL_OFI_RECV_SIZE,
353 NULL, FI_ADDR_UNSPEC, &comp->comp_ctx);
354
355 if (FI_SUCCESS != rc) {
356 BTL_ERROR(("cannot post recvs"));
357 return OPAL_ERROR;
358 }
359 }
360 return OPAL_SUCCESS;
361 }
362
363
364 mca_btl_ofi_module_t * mca_btl_ofi_module_alloc (int mode)
365 {
366 mca_btl_ofi_module_t *module;
367
368
369 module = (mca_btl_ofi_module_t*) calloc(1, sizeof(mca_btl_ofi_module_t));
370 if (NULL == module) {
371 return NULL;
372 }
373
374
375 *module = mca_btl_ofi_module_template;
376
377 if (mode == MCA_BTL_OFI_MODE_ONE_SIDED || mode == MCA_BTL_OFI_MODE_FULL_SUPPORT) {
378
379 module->super.btl_put = mca_btl_ofi_put;
380 module->super.btl_get = mca_btl_ofi_get;
381 module->super.btl_atomic_op = mca_btl_ofi_aop;
382 module->super.btl_atomic_fop = mca_btl_ofi_afop;
383 module->super.btl_atomic_cswap = mca_btl_ofi_acswap;
384 module->super.btl_flush = mca_btl_ofi_flush;
385
386 module->super.btl_register_mem = mca_btl_ofi_register_mem;
387 module->super.btl_deregister_mem = mca_btl_ofi_deregister_mem;
388
389 module->super.btl_flags |= MCA_BTL_FLAGS_ATOMIC_FOPS |
390 MCA_BTL_FLAGS_ATOMIC_OPS |
391 MCA_BTL_FLAGS_RDMA;
392
393 module->super.btl_atomic_flags = MCA_BTL_ATOMIC_SUPPORTS_ADD |
394 MCA_BTL_ATOMIC_SUPPORTS_SWAP |
395 MCA_BTL_ATOMIC_SUPPORTS_CSWAP |
396 MCA_BTL_ATOMIC_SUPPORTS_32BIT ;
397
398 module->super.btl_put_limit = 1 << 23;
399 module->super.btl_put_alignment = 0;
400
401 module->super.btl_get_limit = 1 << 23;
402 module->super.btl_get_alignment = 0;
403
404 module->super.btl_registration_handle_size =
405 sizeof(mca_btl_base_registration_handle_t);
406 }
407
408 if (mode == MCA_BTL_OFI_MODE_TWO_SIDED || mode == MCA_BTL_OFI_MODE_FULL_SUPPORT) {
409
410 module->super.btl_alloc = mca_btl_ofi_alloc;
411 module->super.btl_free = mca_btl_ofi_free;
412 module->super.btl_prepare_src = mca_btl_ofi_prepare_src;
413
414 module->super.btl_send = mca_btl_ofi_send;
415
416 module->super.btl_flags |= MCA_BTL_FLAGS_SEND;
417 module->super.btl_eager_limit = MCA_BTL_OFI_FRAG_SIZE;
418 module->super.btl_max_send_size = MCA_BTL_OFI_FRAG_SIZE;
419 module->super.btl_rndv_eager_limit = MCA_BTL_OFI_FRAG_SIZE;
420
421
422
423 module->super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_HIGH;
424 }
425
426 if (mode == MCA_BTL_OFI_MODE_FULL_SUPPORT) {
427 module->super.btl_rdma_pipeline_frag_size = 4 * 1024 * 1024;
428 module->super.btl_rdma_pipeline_send_length = 8 * 1024;
429 }
430
431 return module;
432 }
433
434 mca_btl_ofi_module_t mca_btl_ofi_module_template = {
435 .super = {
436 .btl_component = &mca_btl_ofi_component.super,
437 .btl_add_procs = mca_btl_ofi_add_procs,
438 .btl_del_procs = mca_btl_ofi_del_procs,
439 .btl_finalize = mca_btl_ofi_finalize,
440 }
441 };