This source file includes following definitions.
- mca_btl_ugni_ep_construct
- mca_btl_ugni_ep_destruct
- mca_btl_ugni_endpoint_get_modex
- mca_btl_ugni_init_ep
- mca_btl_ugni_release_ep
- mca_btl_ugni_ep_smsg_get_mbox
- mca_btl_ugni_ep_send_disconnect
- mca_btl_ugni_ep_disconnect
- mca_btl_ugni_ep_connect_start
- mca_btl_ugni_ep_connect_finish
- mca_btl_ugni_directed_ep_post
- mca_btl_ugni_wildcard_ep_post
- mca_btl_ugni_ep_connect_progress
- mca_btl_ugni_ep_handle_init
- mca_btl_ugni_ep_handle_cleanup
1
2
3
4
5
6
7
8
9
10
11
12
13
14 #include "btl_ugni_endpoint.h"
15 #include "btl_ugni_smsg.h"
16 #include "opal/mca/pmix/pmix.h"
17
18 static void mca_btl_ugni_ep_construct (mca_btl_base_endpoint_t *ep)
19 {
20 memset ((char *) ep + sizeof(ep->super), 0, sizeof (*ep) - sizeof (ep->super));
21 OBJ_CONSTRUCT(&ep->frag_wait_list, opal_list_t);
22 OBJ_CONSTRUCT(&ep->lock, opal_recursive_mutex_t);
23 }
24
25 static void mca_btl_ugni_ep_destruct (mca_btl_base_endpoint_t *ep)
26 {
27 OBJ_DESTRUCT(&ep->frag_wait_list);
28 OBJ_DESTRUCT(&ep->lock);
29 free (ep->remote_attr);
30 }
31
32 OBJ_CLASS_INSTANCE(mca_btl_ugni_endpoint_t, opal_list_item_t,
33 mca_btl_ugni_ep_construct, mca_btl_ugni_ep_destruct);
34
35 static int mca_btl_ugni_endpoint_get_modex (mca_btl_base_endpoint_t *ep)
36 {
37 mca_btl_ugni_modex_t *modex;
38 size_t msg_size;
39 int rc;
40
41 assert (NULL != ep && NULL != ep->peer_proc);
42
43
44 OPAL_MODEX_RECV(rc, &mca_btl_ugni_component.super.btl_version,
45 &ep->peer_proc->proc_name, (void **)&modex, &msg_size);
46 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
47 BTL_ERROR(("error receiving modex"));
48 return rc;
49 }
50
51 ep->ep_rem_addr = modex->addr;
52 ep->ep_rem_id = modex->id;
53
54
55 BTL_VERBOSE(("received modex for ep %p. addr: %d, id: %d", (void*)ep, ep->ep_rem_addr, ep->ep_rem_id));
56
57 free (modex);
58
59 return OPAL_SUCCESS;
60 }
61
62 int mca_btl_ugni_init_ep (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_endpoint_t **ep,
63 mca_btl_ugni_module_t *btl, opal_proc_t *peer_proc)
64 {
65 mca_btl_ugni_endpoint_t *endpoint;
66 int rc;
67
68 endpoint = OBJ_NEW(mca_btl_ugni_endpoint_t);
69 assert (endpoint != NULL);
70
71 endpoint->smsg_progressing = 0;
72 endpoint->state = MCA_BTL_UGNI_EP_STATE_INIT;
73 endpoint->peer_proc = peer_proc;
74
75
76
77 rc = mca_btl_ugni_endpoint_get_modex (endpoint);
78 if (OPAL_SUCCESS != rc) {
79 assert (0);
80 return rc;
81 }
82
83
84 endpoint->index = opal_pointer_array_add (&ugni_module->endpoints, endpoint);
85
86 *ep = endpoint;
87
88 return OPAL_SUCCESS;
89 }
90
91 void mca_btl_ugni_release_ep (mca_btl_ugni_endpoint_t *ep)
92 {
93 mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
94 int rc;
95
96 opal_mutex_lock (&ep->lock);
97
98 rc = mca_btl_ugni_ep_disconnect (ep, false);
99 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
100 BTL_VERBOSE(("btl/ugni error disconnecting endpoint"));
101 }
102
103
104 opal_pointer_array_set_item (&ugni_module->endpoints, ep->index, NULL);
105
106 opal_mutex_unlock (&ep->lock);
107
108 OBJ_RELEASE(ep);
109 }
110
111 static inline int mca_btl_ugni_ep_smsg_get_mbox (mca_btl_base_endpoint_t *ep) {
112 mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
113 opal_free_list_item_t *mbox;
114
115 assert (NULL == ep->mailbox);
116
117 mbox = opal_free_list_get (&ugni_module->smsg_mboxes);
118 if (OPAL_UNLIKELY(NULL == mbox)) {
119 return OPAL_ERR_OUT_OF_RESOURCE;
120 }
121
122 ep->mailbox = (mca_btl_ugni_smsg_mbox_t *) mbox;
123 ep->mailbox->attr.index = ep->index;
124
125
126 memset ((char *)ep->mailbox->attr.smsg_attr.msg_buffer + ep->mailbox->attr.smsg_attr.mbox_offset, 0,
127 ep->mailbox->attr.smsg_attr.buff_size);
128 return OPAL_SUCCESS;
129 }
130
131 static int mca_btl_ugni_ep_send_disconnect (mca_btl_base_endpoint_t *ep)
132 {
133 int rc;
134
135 do {
136 rc = mca_btl_ugni_endpoint_smsg_send_wtag (ep, NULL, 0, NULL, 0, -1, MCA_BTL_UGNI_TAG_DISCONNECT);
137 if (OPAL_LIKELY(GNI_RC_NOT_DONE != rc)) {
138 break;
139 }
140
141
142 (void) mca_btl_ugni_progress_remote_smsg (mca_btl_ugni_ep_btl (ep));
143 } while (1);
144
145 return mca_btl_rc_ugni_to_opal (rc);
146 }
147
148 int mca_btl_ugni_ep_disconnect (mca_btl_base_endpoint_t *ep, bool send_disconnect)
149 {
150 mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
151 mca_btl_ugni_device_t *device;
152 int rc;
153
154 if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
155
156 return OPAL_SUCCESS;
157 }
158
159 device = ep->smsg_ep_handle.device;
160
161 while (device->dev_smsg_local_cq.active_operations) {
162
163 rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
164 if (OPAL_SUCCESS != rc) {
165 break;
166 }
167 }
168
169 if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state && send_disconnect) {
170 rc = mca_btl_ugni_ep_send_disconnect (ep);
171 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
172 BTL_VERBOSE(("could not send disconnect message to peer"));
173 }
174
175
176 do {
177
178 rc = mca_btl_ugni_progress_local_smsg (ugni_module, device);
179 if (OPAL_SUCCESS != rc) {
180 break;
181 }
182 } while (device->dev_smsg_local_cq.active_operations);
183
184 (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, -1);
185 }
186
187 mca_btl_ugni_device_lock (device);
188
189
190
191 (void) mca_btl_ugni_ep_handle_cleanup (&ep->smsg_ep_handle);
192
193 mca_btl_ugni_device_unlock (device);
194
195 if (ep->mailbox) {
196 opal_free_list_return (&ugni_module->smsg_mboxes, ((opal_free_list_item_t *) ep->mailbox));
197 ep->mailbox = NULL;
198 }
199
200 ep->state = MCA_BTL_UGNI_EP_STATE_INIT;
201
202 return OPAL_SUCCESS;
203 }
204
205 static inline int mca_btl_ugni_ep_connect_start (mca_btl_base_endpoint_t *ep) {
206 mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
207 mca_btl_ugni_device_t *device = ugni_module->devices;
208 int rc;
209
210
211 if (OPAL_UNLIKELY(MCA_BTL_UGNI_EP_STATE_CONNECTING == ep->state)) {
212 return OPAL_ERR_RESOURCE_BUSY;
213 }
214
215 ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTING;
216
217 BTL_VERBOSE(("initiating connection to remote peer with address: %u id: %u proc: %p",
218 ep->ep_rem_addr, ep->ep_rem_id, (void *)ep->peer_proc));
219
220
221
222 mca_btl_ugni_device_lock (device);
223 rc = mca_btl_ugni_ep_handle_init (ep, device->dev_smsg_local_cq.gni_handle, device, &ep->smsg_ep_handle);
224 mca_btl_ugni_device_unlock (device);
225 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
226 return rc;
227 }
228
229
230 rc = mca_btl_ugni_ep_smsg_get_mbox (ep);
231 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
232 return rc;
233 }
234
235 ep->remote_attr = calloc (1, sizeof (*ep->remote_attr));
236 if (OPAL_UNLIKELY(NULL == ep->remote_attr)) {
237 return OPAL_ERR_OUT_OF_RESOURCE;
238 }
239
240 BTL_VERBOSE(("btl/ugni connection to remote peer initiated"));
241
242 return OPAL_SUCCESS;
243 }
244
245 static inline int mca_btl_ugni_ep_connect_finish (mca_btl_base_endpoint_t *ep) {
246 mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
247 gni_return_t grc;
248 int rc;
249
250 BTL_VERBOSE(("finishing connection. remote attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
251 "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
252 "msg_maxsize = %d", ep->remote_attr->smsg_attr.msg_type, ep->remote_attr->smsg_attr.msg_buffer,
253 ep->remote_attr->smsg_attr.buff_size, ep->remote_attr->smsg_attr.mem_hndl.qword1,
254 ep->remote_attr->smsg_attr.mem_hndl.qword2, ep->remote_attr->smsg_attr.mbox_offset,
255 ep->remote_attr->smsg_attr.mbox_maxcredit, ep->remote_attr->smsg_attr.msg_maxsize));
256
257 BTL_VERBOSE(("finishing connection. local attributes: msg_type = %d, msg_buffer = %p, buff_size = %d, "
258 "mem_hndl = {qword1 = %" PRIu64 ", qword2 = %" PRIu64 "}, mbox = %d, mbox_maxcredit = %d, "
259 "msg_maxsize = %d", ep->mailbox->attr.smsg_attr.msg_type, ep->mailbox->attr.smsg_attr.msg_buffer,
260 ep->mailbox->attr.smsg_attr.buff_size, ep->mailbox->attr.smsg_attr.mem_hndl.qword1,
261 ep->mailbox->attr.smsg_attr.mem_hndl.qword2, ep->mailbox->attr.smsg_attr.mbox_offset,
262 ep->mailbox->attr.smsg_attr.mbox_maxcredit, ep->mailbox->attr.smsg_attr.msg_maxsize));
263
264 grc = GNI_SmsgInit (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr.smsg_attr,
265 &ep->remote_attr->smsg_attr);
266 if (OPAL_UNLIKELY(GNI_RC_SUCCESS != grc)) {
267 BTL_ERROR(("error initializing SMSG protocol. rc = %d", grc));
268
269 return mca_btl_rc_ugni_to_opal (grc);
270 }
271
272
273
274
275
276 GNI_EpSetEventData (ep->smsg_ep_handle.gni_handle, ep->index, ep->remote_attr->index);
277
278 ep->rmt_irq_mem_hndl = ep->remote_attr->rmt_irq_mem_hndl;
279 ep->state = MCA_BTL_UGNI_EP_STATE_CONNECTED;
280 (void) opal_atomic_add_fetch_32 (&ep->smsg_ep_handle.device->smsg_connections, 1);
281
282
283 BTL_VERBOSE(("endpoint connected. posting %u sends", (unsigned int) opal_list_get_size (&ep->frag_wait_list)));
284
285 rc = mca_btl_ugni_progress_send_wait_list (ep);
286 if (OPAL_UNLIKELY(OPAL_SUCCESS != rc)) {
287 OPAL_THREAD_LOCK(&ugni_module->ep_wait_list_lock);
288 if (false == ep->wait_listed) {
289 opal_list_append (&ugni_module->ep_wait_list, &ep->super);
290 ep->wait_listed = true;
291 }
292 OPAL_THREAD_UNLOCK(&ugni_module->ep_wait_list_lock);
293 }
294
295 free (ep->remote_attr);
296 ep->remote_attr = NULL;
297
298 return OPAL_SUCCESS;
299 }
300
301 static int mca_btl_ugni_directed_ep_post (mca_btl_base_endpoint_t *ep)
302 {
303 mca_btl_ugni_module_t *ugni_module = mca_btl_ugni_ep_btl (ep);
304 gni_return_t rc;
305
306 BTL_VERBOSE(("posting directed datagram to remote id: %d for endpoint %p", ep->ep_rem_id, (void *)ep));
307
308 ep->mailbox->attr.rmt_irq_mem_hndl = ugni_module->devices->smsg_irq_mhndl;
309
310 rc = GNI_EpPostDataWId (ep->smsg_ep_handle.gni_handle, &ep->mailbox->attr, sizeof (ep->mailbox->attr),
311 ep->remote_attr, sizeof (*ep->remote_attr),
312 MCA_BTL_UGNI_CONNECT_DIRECTED_ID | ep->index);
313 if (OPAL_LIKELY(GNI_RC_SUCCESS == rc)) {
314 (void) opal_atomic_add_fetch_32 (&ugni_module->active_datagrams, 1);
315 }
316
317 return mca_btl_rc_ugni_to_opal (rc);
318 }
319
320 int mca_btl_ugni_wildcard_ep_post (mca_btl_ugni_module_t *ugni_module)
321 {
322 gni_return_t rc;
323
324 BTL_VERBOSE(("posting wildcard datagram"));
325
326 memset (&ugni_module->wc_local_attr, 0, sizeof (ugni_module->wc_local_attr));
327 memset (&ugni_module->wc_remote_attr, 0, sizeof (ugni_module->wc_remote_attr));
328 rc = GNI_EpPostDataWId (ugni_module->wildcard_ep, &ugni_module->wc_local_attr,
329 sizeof (ugni_module->wc_local_attr), &ugni_module->wc_remote_attr,
330 sizeof (ugni_module->wc_remote_attr), MCA_BTL_UGNI_CONNECT_WILDCARD_ID);
331
332 return mca_btl_rc_ugni_to_opal (rc);
333 }
334
335
336 int mca_btl_ugni_ep_connect_progress (mca_btl_base_endpoint_t *ep)
337 {
338 int rc;
339
340 BTL_VERBOSE(("progressing connection for endpoint %p with state %d", (void *)ep, ep->state));
341
342 if (MCA_BTL_UGNI_EP_STATE_CONNECTED == ep->state) {
343 return OPAL_SUCCESS;
344 }
345
346 if (MCA_BTL_UGNI_EP_STATE_INIT == ep->state) {
347 rc = mca_btl_ugni_ep_connect_start (ep);
348 if (OPAL_SUCCESS != rc) {
349 return rc;
350 }
351 }
352
353 BTL_VERBOSE(("ep->remote_attr->smsg_attr = {.msg_type = %d, .msg_buffer = %p}", ep->remote_attr->smsg_attr.msg_type,
354 (void*)ep->remote_attr->smsg_attr.msg_buffer));
355
356 if (GNI_SMSG_TYPE_INVALID == ep->remote_attr->smsg_attr.msg_type) {
357
358 if (!ep->dg_posted) {
359 rc = mca_btl_ugni_directed_ep_post (ep);
360 if (OPAL_SUCCESS == rc) {
361 ep->dg_posted = true;
362 rc = OPAL_ERR_RESOURCE_BUSY;
363 }
364
365 return rc;
366 }
367
368 return OPAL_SUCCESS;
369 }
370
371 return mca_btl_ugni_ep_connect_finish (ep);
372 }
373
374 int mca_btl_ugni_ep_handle_init (mca_btl_ugni_endpoint_t *ep, gni_cq_handle_t cq,
375 mca_btl_ugni_device_t *device, mca_btl_ugni_endpoint_handle_t *ep_handle)
376 {
377 gni_return_t grc;
378
379 ep_handle->device = device;
380
381
382 grc = GNI_EpCreate (device->dev_handle, cq, &ep_handle->gni_handle);
383 if (OPAL_LIKELY(GNI_RC_SUCCESS == grc)) {
384 grc = GNI_EpBind (ep_handle->gni_handle, ep->ep_rem_addr, ep->ep_rem_id);
385 }
386
387 return mca_btl_rc_ugni_to_opal (grc);
388 }
389
390 int mca_btl_ugni_ep_handle_cleanup (mca_btl_ugni_endpoint_handle_t *ep_handle)
391 {
392 int rc;
393
394 if (0 == ep_handle->gni_handle) {
395 return OPAL_SUCCESS;
396 }
397
398
399 rc = GNI_EpUnbind (ep_handle->gni_handle);
400 if (OPAL_UNLIKELY(GNI_RC_SUCCESS != rc)) {
401
402 } else {
403 (void) GNI_EpDestroy (ep_handle->gni_handle);
404 }
405
406 ep_handle->gni_handle = 0;
407
408 return OPAL_SUCCESS;
409 }