This source file includes following definitions.
- mca_btl_ugni_ep_get_device_index
- mca_btl_ugni_ep_get_device
- mca_btl_rc_ugni_to_opal
- mca_btl_ugni_proc_name_to_id
- mca_btl_ugni_device_trylock
- mca_btl_ugni_device_lock
- mca_btl_ugni_device_unlock
- mca_btl_ugni_device_serialize
- mca_btl_ugni_device_serialize_any
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #ifndef MCA_BTL_UGNI_H
22 #define MCA_BTL_UGNI_H
23
24 #include "opal_config.h"
25
26 #include "opal/mca/mpool/mpool.h"
27 #include "opal/mca/mpool/base/base.h"
28 #include "opal/mca/rcache/base/base.h"
29 #include "opal/mca/rcache/udreg/rcache_udreg.h"
30 #include "opal/util/output.h"
31 #include "opal_stdint.h"
32 #include "opal/mca/btl/btl.h"
33 #include "opal/mca/btl/base/base.h"
34 #include "opal/mca/btl/base/btl_base_error.h"
35 #include "opal/class/opal_hash_table.h"
36 #include "opal/class/opal_free_list.h"
37
38 #include <errno.h>
39 #include <stdint.h>
40 #include <sys/types.h>
41 #include <assert.h>
42 #include <sys/time.h>
43 #include <gni_pub.h>
44
45
46 #define MCA_BTL_UGNI_CONNECT_WILDCARD_ID 0x0000000000000000ull
47 #define MCA_BTL_UGNI_CONNECT_DIRECTED_ID 0x8000000000000000ull
48 #define MCA_BTL_UGNI_DATAGRAM_MASK 0x8000000000000000ull
49
50
51 #define MCA_BTL_UGNI_MAX_DEV_HANDLES 128
52
53
54 #define MCA_BTL_UGNI_COMPLETIONS_PER_LOOP 32
55
56
57 #define MCA_BTL_UGNI_CONNECT_USEC 10
58
59
60
61
62 struct mca_btl_ugni_modex_t {
63
64 uint32_t addr;
65
66 int id;
67 };
68 typedef struct mca_btl_ugni_modex_t mca_btl_ugni_modex_t;
69
70
71 typedef struct mca_btl_ugni_endpoint_attr_t {
72 opal_process_name_t proc_name;
73 uint32_t index;
74 gni_smsg_attr_t smsg_attr;
75 gni_mem_handle_t rmt_irq_mem_hndl;
76 } mca_btl_ugni_endpoint_attr_t;
77
78 enum {
79 MCA_BTL_UGNI_RCACHE_UDREG,
80 MCA_BTL_UGNI_RCACHE_GRDMA
81 };
82
83 enum mca_btl_ugni_free_list_id_t {
84
85 MCA_BTL_UGNI_LIST_EAGER_SEND,
86 MCA_BTL_UGNI_LIST_EAGER_RECV,
87
88 MCA_BTL_UGNI_LIST_SMSG,
89
90 MCA_BTL_UGNI_LIST_RDMA,
91 MCA_BTL_UGNI_LIST_RDMA_INT,
92 MCA_BTL_UGNI_LIST_MAX,
93 };
94
95 struct mca_btl_ugni_cq_t {
96
97 gni_cq_handle_t gni_handle;
98
99 volatile int32_t active_operations;
100 };
101 typedef struct mca_btl_ugni_cq_t mca_btl_ugni_cq_t;
102
103
104
105
106 struct mca_btl_ugni_device_t {
107
108 gni_cdm_handle_t dev_cd_handle;
109
110
111 volatile int32_t lock;
112
113
114 int dev_index;
115
116
117 opal_atomic_int32_t smsg_connections;
118
119
120 volatile bool flushed;
121
122
123 gni_nic_handle_t dev_handle;
124
125
126 mca_btl_ugni_cq_t dev_rdma_local_cq;
127
128
129 mca_btl_ugni_cq_t dev_rdma_local_irq_cq;
130
131
132 mca_btl_ugni_cq_t dev_smsg_local_cq;
133
134
135 gni_mem_handle_t smsg_irq_mhndl;
136
137
138 opal_free_list_t rdma_descs;
139 };
140 typedef struct mca_btl_ugni_device_t mca_btl_ugni_device_t;
141
142 typedef intptr_t (*mca_btl_ugni_device_serialize_fn_t) (mca_btl_ugni_device_t *device, void *arg);
143
144 typedef struct mca_btl_ugni_module_t {
145 mca_btl_base_module_t super;
146
147 bool initialized;
148
149 mca_btl_ugni_device_t devices[MCA_BTL_UGNI_MAX_DEV_HANDLES];
150
151 opal_mutex_t endpoint_lock;
152 size_t endpoint_count;
153 opal_pointer_array_t endpoints;
154 opal_hash_table_t id_to_endpoint;
155
156
157 opal_mutex_t failed_frags_lock;
158
159 opal_list_t failed_frags;
160
161
162 opal_mutex_t eager_get_pending_lock;
163 opal_list_t eager_get_pending;
164
165 mca_mpool_base_module_t *mpool;
166 opal_free_list_t smsg_mboxes;
167
168 gni_ep_handle_t wildcard_ep;
169 struct mca_btl_base_endpoint_t *local_ep;
170
171 opal_atomic_int32_t active_datagrams;
172 opal_event_t connection_event;
173
174 struct mca_btl_ugni_endpoint_attr_t wc_remote_attr, wc_local_attr;
175
176 gni_cq_handle_t smsg_remote_cq;
177 gni_cq_handle_t smsg_remote_irq_cq;
178
179
180 opal_free_list_t frags_lists[MCA_BTL_UGNI_LIST_MAX];
181
182
183 opal_mutex_t ep_wait_list_lock;
184
185 opal_list_t ep_wait_list;
186
187
188 opal_pointer_array_t pending_smsg_frags_bb;
189
190 int32_t reg_max;
191 opal_atomic_int32_t reg_count;
192
193
194
195 int nlocal_procs;
196
197 opal_atomic_int32_t active_rdma_count;
198
199 mca_rcache_base_module_t *rcache;
200 } mca_btl_ugni_module_t;
201
202 typedef struct mca_btl_ugni_component_t {
203
204 mca_btl_base_component_3_0_0_t super;
205
206
207 uint32_t ugni_max_btls;
208
209 uint32_t remote_cq_size;
210 uint32_t local_cq_size;
211 uint32_t local_rdma_cq_size;
212
213
214 int32_t active_rdma_threshold;
215
216
217 uint32_t ugni_num_btls;
218
219 mca_btl_ugni_module_t *modules;
220
221 size_t smsg_max_data;
222
223
224 long int ugni_fma_limit;
225
226 long int ugni_fma_get_limit;
227
228 long int ugni_fma_put_limit;
229
230 #if OPAL_C_HAVE__THREAD_LOCAL
231 bool bind_threads_to_devices;
232 #endif
233
234
235 size_t ugni_smsg_limit;
236
237
238 int ugni_free_list_num;
239 int ugni_free_list_max;
240 int ugni_free_list_inc;
241
242
243 int ugni_eager_num;
244 int ugni_eager_max;
245 int ugni_eager_inc;
246
247 int smsg_max_retries;
248
249 int rdma_max_retries;
250
251
252 int smsg_max_credits;
253
254 int smsg_mbox_size;
255
256
257 int max_mem_reg;
258
259
260 unsigned int smsg_page_size;
261
262
263 int rcache_type;
264
265
266 char *mpool_hints;
267
268
269 unsigned int mbox_increment;
270
271
272 bool progress_thread_requested;
273
274
275 bool progress_thread_enabled;
276
277
278 int virtual_device_count;
279
280
281 uint8_t ptag;
282
283
284 uint32_t cookie;
285
286
287 uint32_t cdm_id_base;
288
289
290 uint32_t cdm_flags;
291
292
293 uint32_t dev_addr;
294
295
296 int cdm_flags_id;
297 } mca_btl_ugni_component_t;
298
299
300
301 OPAL_MODULE_DECLSPEC extern mca_btl_ugni_component_t mca_btl_ugni_component;
302 OPAL_MODULE_DECLSPEC extern mca_btl_ugni_module_t mca_btl_ugni_module;
303
304 static inline uint32_t mca_btl_ugni_ep_get_device_index (mca_btl_ugni_module_t *ugni_module)
305 {
306 static volatile uint32_t device_index = (uint32_t) 0;
307
308
309 return opal_atomic_fetch_add_32 ((volatile int32_t *) &device_index, 1) % mca_btl_ugni_component.virtual_device_count;
310 }
311
312
313
314
315 static inline mca_btl_ugni_device_t *mca_btl_ugni_ep_get_device (mca_btl_ugni_module_t *ugni_module)
316 {
317 return ugni_module->devices + mca_btl_ugni_ep_get_device_index (ugni_module);
318 }
319
320 static inline int mca_btl_rc_ugni_to_opal (gni_return_t rc)
321 {
322 static int codes[] = {OPAL_SUCCESS,
323 OPAL_ERR_RESOURCE_BUSY,
324 OPAL_ERR_BAD_PARAM,
325 OPAL_ERR_OUT_OF_RESOURCE,
326 OPAL_ERR_TIMEOUT,
327 OPAL_ERR_PERM,
328 OPAL_ERROR,
329 OPAL_ERR_BAD_PARAM,
330 OPAL_ERR_BAD_PARAM,
331 OPAL_ERR_NOT_FOUND,
332 OPAL_ERR_VALUE_OUT_OF_BOUNDS,
333 OPAL_ERROR,
334 OPAL_ERR_NOT_SUPPORTED,
335 OPAL_ERR_OUT_OF_RESOURCE};
336 return codes[rc];
337 }
338
339
340 int mca_btl_ugni_flush (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint);
341
342
343
344
345
346
347
348
349
350
351
352
353
354 int
355 mca_btl_ugni_add_procs (struct mca_btl_base_module_t* btl,
356 size_t nprocs,
357 struct opal_proc_t **procs,
358 struct mca_btl_base_endpoint_t **peers,
359 opal_bitmap_t *reachable);
360
361
362
363
364
365
366
367
368
369
370
371
372 int
373 mca_btl_ugni_del_procs (struct mca_btl_base_module_t *btl,
374 size_t nprocs,
375 struct opal_proc_t **procs,
376 struct mca_btl_base_endpoint_t **peers);
377
378 struct mca_btl_base_endpoint_t *mca_btl_ugni_get_ep (struct mca_btl_base_module_t *module, opal_proc_t *proc);
379
380
381
382
383
384
385
386
387
388
389
390 int
391 mca_btl_ugni_send (struct mca_btl_base_module_t *btl,
392 struct mca_btl_base_endpoint_t *btl_peer,
393 struct mca_btl_base_descriptor_t *descriptor,
394 mca_btl_base_tag_t tag);
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412 int
413 mca_btl_ugni_sendi (struct mca_btl_base_module_t *btl,
414 struct mca_btl_base_endpoint_t *endpoint,
415 struct opal_convertor_t *convertor,
416 void *header, size_t header_size,
417 size_t payload_size, uint8_t order,
418 uint32_t flags, mca_btl_base_tag_t tag,
419 mca_btl_base_descriptor_t **descriptor);
420
421 int mca_btl_ugni_get (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
422 uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
423 mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
424 int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
425
426 int mca_btl_ugni_put (mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint, void *local_address,
427 uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
428 mca_btl_base_registration_handle_t *remote_handle, size_t size, int flags,
429 int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
430
431 int mca_btl_ugni_aop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
432 uint64_t remote_address, mca_btl_base_registration_handle_t *remote_handle,
433 mca_btl_base_atomic_op_t op, uint64_t operand, int flags, int order,
434 mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
435
436 int mca_btl_ugni_afop (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
437 void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
438 mca_btl_base_registration_handle_t *remote_handle, mca_btl_base_atomic_op_t op,
439 uint64_t operand, int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc,
440 void *cbcontext, void *cbdata);
441
442 int mca_btl_ugni_acswap (struct mca_btl_base_module_t *btl, struct mca_btl_base_endpoint_t *endpoint,
443 void *local_address, uint64_t remote_address, mca_btl_base_registration_handle_t *local_handle,
444 mca_btl_base_registration_handle_t *remote_handle, uint64_t compare, uint64_t value,
445 int flags, int order, mca_btl_base_rdma_completion_fn_t cbfunc, void *cbcontext, void *cbdata);
446
447 int mca_btl_ugni_progress_send_wait_list (struct mca_btl_base_endpoint_t *endpoint);
448 int mca_btl_ugni_progress_datagram (mca_btl_ugni_device_t *device);
449
450 mca_btl_base_descriptor_t *
451 mca_btl_ugni_alloc(struct mca_btl_base_module_t *btl,
452 struct mca_btl_base_endpoint_t *endpoint,
453 uint8_t order, size_t size, uint32_t flags);
454
455 struct mca_btl_base_registration_handle_t {
456
457 gni_mem_handle_t gni_handle;
458 };
459
460 typedef struct mca_btl_ugni_reg_t {
461 mca_rcache_base_registration_t base;
462 mca_btl_base_registration_handle_t handle;
463 } mca_btl_ugni_reg_t;
464
465
466
467
468 int mca_btl_ugni_init (void);
469
470
471
472
473 int mca_btl_ugni_fini (void);
474
475 int mca_btl_ugni_module_init (mca_btl_ugni_module_t *ugni_module);
476
477
478
479
480
481
482
483 int mca_btl_ugni_device_init (mca_btl_ugni_device_t *device, int virtual_device_id);
484
485
486
487
488
489
490 int mca_btl_ugni_device_fini (mca_btl_ugni_device_t *dev);
491
492
493 static inline uint64_t mca_btl_ugni_proc_name_to_id (opal_process_name_t name) {
494
495 return ((uint64_t) (name.jobid & 0x7fffffff) << 32 | name.vpid);
496 }
497
498 int mca_btl_ugni_spawn_progress_thread(struct mca_btl_base_module_t* btl);
499 int mca_btl_ugni_kill_progress_thread(void);
500
501 struct mca_btl_ugni_post_descriptor_t;
502
503 void btl_ugni_dump_post_desc (struct mca_btl_ugni_post_descriptor_t *desc);
504
505
506 struct mca_btl_ugni_post_descriptor_t;
507
508 void mca_btl_ugni_handle_rdma_completions (mca_btl_ugni_module_t *ugni_module, mca_btl_ugni_device_t *device,
509 struct mca_btl_ugni_post_descriptor_t *post_desc, const int count);
510
511
512
513
514 static inline int mca_btl_ugni_device_trylock (mca_btl_ugni_device_t *device)
515 {
516
517
518 return (device->lock || opal_atomic_swap_32 (&device->lock, 1));
519 }
520
521
522
523
524 static inline void mca_btl_ugni_device_lock (mca_btl_ugni_device_t *device)
525 {
526 while (mca_btl_ugni_device_trylock (device));
527 }
528
529
530
531
532 static inline void mca_btl_ugni_device_unlock (mca_btl_ugni_device_t *device)
533 {
534 opal_atomic_wmb ();
535 device->lock = 0;
536 }
537
538
539
540
541
542
543
544
545 static inline intptr_t mca_btl_ugni_device_serialize (mca_btl_ugni_device_t *device,
546 mca_btl_ugni_device_serialize_fn_t fn, void *arg)
547 {
548 intptr_t rc;
549
550 if (!opal_using_threads ()) {
551 return fn (device, arg);
552 }
553
554
555 mca_btl_ugni_device_lock (device);
556 rc = fn (device, arg);
557 mca_btl_ugni_device_unlock (device);
558 return rc;
559 }
560
561 static inline intptr_t mca_btl_ugni_device_serialize_any (mca_btl_ugni_module_t *ugni_module,
562 mca_btl_ugni_device_serialize_fn_t fn, void *arg)
563 {
564 mca_btl_ugni_device_t *device;
565 intptr_t rc;
566
567 if (!opal_using_threads ()) {
568 return fn (ugni_module->devices, arg);
569 }
570
571 #if OPAL_C_HAVE__THREAD_LOCAL
572 if (mca_btl_ugni_component.bind_threads_to_devices) {
573
574
575 static _Thread_local mca_btl_ugni_device_t *device_local = NULL;
576
577 device = device_local;
578 if (OPAL_UNLIKELY(NULL == device)) {
579
580 device_local = device = mca_btl_ugni_ep_get_device (ugni_module);
581 }
582
583 mca_btl_ugni_device_lock (device);
584 } else {
585 #endif
586
587 uint32_t device_index = mca_btl_ugni_ep_get_device_index (ugni_module);
588 const int device_count = mca_btl_ugni_component.virtual_device_count;
589
590 for (int i = 0 ; i < device_count ; ++i) {
591 device = ugni_module->devices + ((device_index + i) % device_count);
592 if (!mca_btl_ugni_device_trylock (device)) {
593 break;
594 }
595
596 device = NULL;
597 }
598
599 if (NULL == device) {
600 device = mca_btl_ugni_ep_get_device (ugni_module);
601 mca_btl_ugni_device_lock (device);
602 }
603 #if OPAL_C_HAVE__THREAD_LOCAL
604 }
605 #endif
606
607 rc = fn (device, arg);
608 mca_btl_ugni_device_unlock (device);
609
610 return rc;
611 }
612
613
614
615 extern unsigned int mca_btl_ugni_progress_thread_wakeups;
616
617 #endif