This source file includes following definitions.
- mca_btl_ugni_datagram_event
- mca_btl_ugni_module_init
- mca_btl_ugni_module_finalize
- mca_btl_ugni_alloc
- mca_btl_ugni_free
- mca_btl_ugni_prepare_src
- mca_btl_ugni_register_mem
- mca_btl_ugni_deregister_mem
- mca_btl_ugni_event_fatal_error
- mca_btl_ugni_device_handle_event_error
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 #include "opal_config.h"
17
18 #include "btl_ugni.h"
19 #include "btl_ugni_frag.h"
20 #include "btl_ugni_endpoint.h"
21 #include "btl_ugni_prepare.h"
22 #include "btl_ugni_smsg.h"
23
24 static int
25 mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
26 mca_btl_base_descriptor_t *des);
27
28 static int
29 mca_btl_ugni_module_finalize (struct mca_btl_base_module_t* btl);
30
31 static struct mca_btl_base_descriptor_t *
32 mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
33 struct mca_btl_base_endpoint_t *endpoint,
34 struct opal_convertor_t *convertor,
35 uint8_t order, size_t reserve, size_t *size,
36 uint32_t flags);
37
38 static mca_btl_base_registration_handle_t *
39 mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
40 size_t size, uint32_t flags);
41
42 static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle);
43
44 mca_btl_ugni_module_t mca_btl_ugni_module = {
45 .super = {
46 .btl_component = &mca_btl_ugni_component.super,
47 .btl_add_procs = mca_btl_ugni_add_procs,
48 .btl_del_procs = mca_btl_ugni_del_procs,
49 .btl_finalize = mca_btl_ugni_module_finalize,
50 .btl_alloc = mca_btl_ugni_alloc,
51 .btl_free = mca_btl_ugni_free,
52 .btl_prepare_src = mca_btl_ugni_prepare_src,
53 .btl_send = mca_btl_ugni_send,
54 .btl_sendi = mca_btl_ugni_sendi,
55 .btl_put = mca_btl_ugni_put,
56 .btl_get = mca_btl_ugni_get,
57 .btl_register_mem = mca_btl_ugni_register_mem,
58 .btl_deregister_mem = mca_btl_ugni_deregister_mem,
59 .btl_atomic_op = mca_btl_ugni_aop,
60 .btl_atomic_fop = mca_btl_ugni_afop,
61 .btl_atomic_cswap = mca_btl_ugni_acswap,
62 .btl_flush = mca_btl_ugni_flush,
63 }
64 };
65
66 static void mca_btl_ugni_datagram_event (int foo, short bar, void *arg)
67 {
68 mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) arg;
69 mca_btl_ugni_device_t *device = ugni_module->devices;
70 struct timeval tv = {.tv_sec = 0, .tv_usec = MCA_BTL_UGNI_CONNECT_USEC};
71
72 mca_btl_ugni_progress_datagram (device);
73
74 opal_event_evtimer_add (&ugni_module->connection_event, &tv);
75 }
76
77 int
78 mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module)
79 {
80 int rc;
81
82 BTL_VERBOSE(("binding module %p to device 0", (void *) ugni_module));
83
84
85 memmove (ugni_module, &mca_btl_ugni_module, sizeof (mca_btl_ugni_module));
86
87 ugni_module->initialized = false;
88 ugni_module->nlocal_procs = 0;
89 ugni_module->active_datagrams = 0;
90 ugni_module->active_rdma_count = 0;
91
92 opal_event_evtimer_set (opal_sync_event_base, &ugni_module->connection_event,
93 mca_btl_ugni_datagram_event, ugni_module);
94
95 OBJ_CONSTRUCT(&ugni_module->failed_frags, opal_list_t);
96 OBJ_CONSTRUCT(&ugni_module->failed_frags_lock, opal_mutex_t);
97
98 OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
99 OBJ_CONSTRUCT(&ugni_module->eager_get_pending_lock,opal_mutex_t);
100
101 for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
102 OBJ_CONSTRUCT(ugni_module->frags_lists + i, opal_free_list_t);
103 }
104
105 OBJ_CONSTRUCT(&ugni_module->pending_smsg_frags_bb, opal_pointer_array_t);
106 OBJ_CONSTRUCT(&ugni_module->ep_wait_list_lock,opal_mutex_t);
107 OBJ_CONSTRUCT(&ugni_module->ep_wait_list, opal_list_t);
108 OBJ_CONSTRUCT(&ugni_module->endpoint_lock, opal_mutex_t);
109 OBJ_CONSTRUCT(&ugni_module->endpoints, opal_pointer_array_t);
110 OBJ_CONSTRUCT(&ugni_module->id_to_endpoint, opal_hash_table_t);
111 OBJ_CONSTRUCT(&ugni_module->smsg_mboxes, opal_free_list_t);
112 OBJ_CONSTRUCT(&ugni_module->eager_get_pending, opal_list_t);
113
114
115 for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
116 rc = mca_btl_ugni_device_init (ugni_module->devices + i, i);
117 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
118 BTL_VERBOSE(("error initializing uGNI device handle"));
119 return rc;
120 }
121 }
122
123
124
125
126 rc = GNI_EpCreate (ugni_module->devices[0].dev_handle, NULL,
127 &ugni_module->wildcard_ep);
128 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
129 BTL_ERROR(("error creating wildcard ugni endpoint"));
130 return mca_btl_rc_ugni_to_opal (rc);
131 }
132
133
134 rc = mca_btl_ugni_wildcard_ep_post (ugni_module);
135 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
136 BTL_ERROR(("error posting wildcard datagram"));
137 return rc;
138 }
139
140 return OPAL_SUCCESS;
141 }
142
143 static int
144 mca_btl_ugni_module_finalize (struct mca_btl_base_module_t *btl)
145 {
146 mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *)btl;
147 mca_btl_base_endpoint_t *ep;
148 uint64_t key;
149 int rc;
150
151 if (ugni_module->initialized) {
152
153 OPAL_HASH_TABLE_FOREACH(key, uint64, ep, &ugni_module->id_to_endpoint) {
154 if (NULL != ep) {
155 mca_btl_ugni_release_ep (ep);
156 }
157 }
158
159 if (mca_btl_ugni_component.progress_thread_enabled) {
160 mca_btl_ugni_kill_progress_thread();
161 }
162
163
164 rc = GNI_CqDestroy (ugni_module->smsg_remote_cq);
165 if (GNI_RC_SUCCESS != rc) {
166 BTL_ERROR(("error tearing down RX SMSG CQ - %s",gni_err_str[rc]));
167 }
168
169 if (mca_btl_ugni_component.progress_thread_enabled) {
170 rc = GNI_CqDestroy (ugni_module->smsg_remote_irq_cq);
171 if (GNI_RC_SUCCESS != rc) {
172 BTL_ERROR(("error tearing down remote SMSG CQ - %s",gni_err_str[rc]));
173 }
174 }
175
176
177 rc = GNI_EpPostDataCancelById (ugni_module->wildcard_ep,
178 MCA_BTL_UGNI_CONNECT_WILDCARD_ID |
179 OPAL_PROC_MY_NAME.vpid);
180 if (GNI_RC_SUCCESS != rc) {
181 BTL_VERBOSE(("btl/ugni error cancelling wildcard post"));
182 }
183
184
185 rc = GNI_EpDestroy (ugni_module->wildcard_ep);
186 if (GNI_RC_SUCCESS != rc) {
187 BTL_VERBOSE(("btl/ugni error destroying endpoint - %s",gni_err_str[rc]));
188 }
189
190 opal_event_del (&ugni_module->connection_event);
191 }
192
193 for (int i = 0 ; i < MCA_BTL_UGNI_LIST_MAX ; ++i) {
194 OBJ_DESTRUCT(ugni_module->frags_lists + i);
195 }
196
197 OBJ_DESTRUCT(&ugni_module->ep_wait_list);
198 OBJ_DESTRUCT(&ugni_module->smsg_mboxes);
199 OBJ_DESTRUCT(&ugni_module->pending_smsg_frags_bb);
200 OBJ_DESTRUCT(&ugni_module->id_to_endpoint);
201 OBJ_DESTRUCT(&ugni_module->endpoint_lock);
202 OBJ_DESTRUCT(&ugni_module->endpoints);
203
204 OBJ_DESTRUCT(&ugni_module->eager_get_pending);
205 OBJ_DESTRUCT(&ugni_module->eager_get_pending_lock);
206
207 if (ugni_module->rcache) {
208 mca_rcache_base_module_destroy (ugni_module->rcache);
209 }
210
211 for (int i = 0 ; i < mca_btl_ugni_component.virtual_device_count ; ++i) {
212 mca_btl_ugni_device_fini (ugni_module->devices + i);
213 }
214
215 ugni_module->initialized = false;
216
217 return OPAL_SUCCESS;
218 }
219
220
221 mca_btl_base_descriptor_t *
222 mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
223 struct mca_btl_base_endpoint_t *endpoint,
224 uint8_t order, size_t size, uint32_t flags)
225 {
226 mca_btl_ugni_base_frag_t *frag = NULL;
227
228
229
230
231 if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
232 return NULL;
233 }
234
235 if (size <= mca_btl_ugni_component.smsg_max_data) {
236 frag = mca_btl_ugni_frag_alloc_smsg (endpoint);
237 } else if (size <= btl->btl_eager_limit) {
238 frag = mca_btl_ugni_frag_alloc_eager_send (endpoint);
239 }
240
241 if (OPAL_UNLIKELY(NULL == frag)) {
242 return NULL;
243 }
244
245 BTL_VERBOSE(("btl/ugni_module allocated frag of size: %u, flags: %x. frag = %p",
246 (unsigned int)size, flags, (void *) frag));
247
248 frag->base.des_flags = flags;
249 frag->base.order = order;
250 frag->base.des_segments = &frag->segments[1];
251 frag->base.des_segment_count = 1;
252
253 frag->segments[0].seg_addr.pval = NULL;
254 frag->segments[0].seg_len = 0;
255 frag->segments[1].seg_addr.pval = frag->base.super.ptr;
256 frag->segments[1].seg_len = size;
257
258 frag->flags = MCA_BTL_UGNI_FRAG_BUFFERED;
259 if (size > mca_btl_ugni_component.smsg_max_data) {
260 mca_btl_ugni_reg_t *registration;
261
262 frag->hdr_size = sizeof (frag->hdr.eager);
263 frag->flags |= MCA_BTL_UGNI_FRAG_EAGER | MCA_BTL_UGNI_FRAG_IGNORE;
264
265 registration = (mca_btl_ugni_reg_t *) frag->base.super.registration;
266
267 frag->hdr.eager.memory_handle = registration->handle;
268 } else {
269 frag->hdr_size = sizeof (frag->hdr.send);
270 }
271
272 return &frag->base;
273 }
274
275 static int
276 mca_btl_ugni_free (struct mca_btl_base_module_t *btl,
277 mca_btl_base_descriptor_t *des)
278 {
279 return mca_btl_ugni_frag_return ((mca_btl_ugni_base_frag_t *) des);
280 }
281
282 static struct mca_btl_base_descriptor_t *
283 mca_btl_ugni_prepare_src (struct mca_btl_base_module_t *btl,
284 mca_btl_base_endpoint_t *endpoint,
285 struct opal_convertor_t *convertor,
286 uint8_t order, size_t reserve, size_t *size,
287 uint32_t flags)
288 {
289
290
291
292 if (OPAL_UNLIKELY(opal_list_get_size (&endpoint->frag_wait_list) > 32)) {
293 return NULL;
294 }
295
296 return mca_btl_ugni_prepare_src_send (btl, endpoint, convertor,
297 order, reserve, size, flags);
298 }
299
300 static mca_btl_base_registration_handle_t *
301 mca_btl_ugni_register_mem (mca_btl_base_module_t *btl, mca_btl_base_endpoint_t *endpoint, void *base,
302 size_t size, uint32_t flags)
303 {
304 mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
305 mca_btl_ugni_reg_t *reg;
306 int access_flags = flags & MCA_BTL_REG_FLAG_ACCESS_ANY;
307 int rc;
308
309 rc = ugni_module->rcache->rcache_register (ugni_module->rcache, base, size, 0, access_flags,
310 (mca_rcache_base_registration_t **) ®);
311 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
312 return NULL;
313 }
314
315 return ®->handle;
316 }
317
318 static int mca_btl_ugni_deregister_mem (mca_btl_base_module_t *btl, mca_btl_base_registration_handle_t *handle)
319 {
320 mca_btl_ugni_module_t *ugni_module = (mca_btl_ugni_module_t *) btl;
321 mca_btl_ugni_reg_t *reg =
322 (mca_btl_ugni_reg_t *)((intptr_t) handle - offsetof (mca_btl_ugni_reg_t, handle));
323
324 (void) ugni_module->rcache->rcache_deregister (ugni_module->rcache, ®->base);
325
326 return OPAL_SUCCESS;
327 }
328
329 int mca_btl_ugni_event_fatal_error (gni_return_t grc, gni_cq_entry_t event_data)
330 {
331
332
333
334 if (GNI_CQ_OVERRUN(event_data)) {
335
336
337 BTL_ERROR(("CQ overrun detected in RDMA event data. can not recover"));
338 } else {
339 BTL_ERROR(("Error in GNI_GetComplete %s", gni_err_str[grc]));
340 }
341
342 return mca_btl_rc_ugni_to_opal (grc);
343 }
344
345 int mca_btl_ugni_device_handle_event_error (mca_btl_ugni_rdma_desc_t *rdma_desc, gni_cq_entry_t event_data)
346 {
347 mca_btl_ugni_device_t *device = rdma_desc->device;
348 uint32_t recoverable = 1;
349
350 (void) GNI_CqErrorRecoverable (event_data, &recoverable);
351
352 if (OPAL_UNLIKELY(++rdma_desc->tries >= mca_btl_ugni_component.rdma_max_retries || !recoverable)) {
353 char char_buffer[1024];
354 GNI_CqErrorStr (event_data, char_buffer, sizeof (char_buffer));
355
356 BTL_ERROR(("giving up on desciptor %p, recoverable %d: %s", (void *) rdma_desc, recoverable, char_buffer));
357
358 return OPAL_ERROR;
359 }
360
361 return _mca_btl_ugni_repost_rdma_desc_device (device, rdma_desc);
362 }