1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2018 Los Alamos National Security, LLC. All rights
4 * reserved.
5 * $COPYRIGHT$
6 *
7 * Additional copyrights may follow
8 *
9 * $HEADER$
10 */
11
12 #if !defined(BTL_UCT_TYPES_H)
13 #define BTL_UCT_TYPES_H
14
15 #include "opal/mca/btl/btl.h"
16
17 /* forward declarations */
18 struct mca_btl_uct_module_t;
19 struct mca_btl_base_endpoint_t;
20 struct mca_btl_uct_base_frag_t;
21
22 /* TL endpoint flags */
23 /** connection data was received */
24 #define MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REC 0x1
25 /** remote endpoint read */
26 #define MCA_BTL_UCT_ENDPOINT_FLAG_CONN_REM_READY 0x2
27 /** connection was established */
28 #define MCA_BTL_UCT_ENDPOINT_FLAG_CONN_READY 0x4
29
30 /* AM tags */
31 /** BTL fragment */
32 #define MCA_BTL_UCT_FRAG 0x0d
33 /** connection request */
34 #define MCA_BTL_UCT_CONNECT_RDMA 0x0e
35
36 /** maximum number of modules supported by the btl component */
37 #define MCA_BTL_UCT_MAX_MODULES 16
38 /** maximum number of UCT workers */
39 #define MCA_BTL_UCT_MAX_WORKERS 64
40
41 /**
42 * @brief MODEx data
43 */
44 struct mca_btl_uct_modex_t {
45 /** number of modules whose data is stored in this modex */
46 int32_t module_count;
47
48 /** variable length modex data */
49 uint8_t data[];
50 };
51
52 typedef struct mca_btl_uct_modex_t mca_btl_uct_modex_t;
53
54 /**
55 * @brief BTL UCT memory domain structure
56 *
57 * Each BTL module supports a single memory domain. Each memory domain has
58 * one or more transport layers.
59 */
60 struct mca_btl_uct_md_t {
61 /** make this an opal object */
62 opal_object_t super;
63
64 /** UCT memory domain handle */
65 uct_md_h uct_md;
66 };
67
68 typedef struct mca_btl_uct_md_t mca_btl_uct_md_t;
69
70 OBJ_CLASS_DECLARATION(mca_btl_uct_md_t);
71
72
73 /**
74 * @brief Connection request structure
75 */
76 struct mca_btl_uct_conn_req_t {
77 /** name of the requesting process */
78 opal_process_name_t proc_name;
79
80 /** request type: 0 == endpoint data, 1 == endpoint data + remote ready */
81 int type;
82
83 /** context id that should be connected */
84 int context_id;
85
86 /** transport index that should be connected */
87 int tl_index;
88
89 /** endpoint address data */
90 uint8_t ep_addr[];
91 };
92
93 typedef struct mca_btl_uct_conn_req_t mca_btl_uct_conn_req_t;
94
95 /**
96 * @brief Transport endpoint stucture
97 */
98 struct mca_btl_uct_tl_endpoint_t {
99 /** current flags (connected, requested, etc) */
100 opal_atomic_int32_t flags;
101
102 /** UCT endpoint handle */
103 uct_ep_h uct_ep;
104 };
105
106 typedef struct mca_btl_uct_tl_endpoint_t mca_btl_uct_tl_endpoint_t;
107
108 /**
109 * @brief Structure to keep track of connection endpoints
110 */
111 struct mca_btl_uct_connection_ep_t {
112 /** opal base object */
113 opal_object_t super;
114
115 /** UCT endpoint used for connection */
116 uct_ep_h uct_ep;
117 };
118
119 typedef struct mca_btl_uct_connection_ep_t mca_btl_uct_connection_ep_t;
120
121 OBJ_CLASS_DECLARATION(mca_btl_uct_connection_ep_t);
122
123 /**
124 * @brief Context for UCT device interface
125 *
126 * This structure uses atomic locks to protect the UCT worker (which is not thread safe).
127 * In order to make device access fast pthread mutexes are not used. To deal with recursion
128 * (unavoidable with active messages) we implement an atomic lock using C11 atomics (or
129 * pthread thread-specific values with older compilers).
130 */
131 struct mca_btl_uct_device_context_t {
132 /** index of this context */
133 int context_id;
134
135 /** btl module this context is associated with */
136 struct mca_btl_uct_module_t *uct_btl;
137
138 /** mutex for protecting the UCT worker */
139 opal_recursive_mutex_t mutex;
140
141 /** UCT worker handle */
142 uct_worker_h uct_worker;
143
144 /** UCT interface handle */
145 uct_iface_h uct_iface;
146
147 /** interface attributes */
148 uct_iface_attr_t uct_iface_attr;
149
150 /** RDMA completions */
151 opal_free_list_t rdma_completions;
152
153 /** complete fragments and rdma operations. this fifo is used to avoid making
154 * callbacks while holding the device lock. */
155 opal_fifo_t completion_fifo;
156
157 /** progress is enabled on this context */
158 bool progress_enabled;
159
160 /** context is in AM callback */
161 volatile bool in_am_callback;
162 };
163
164 typedef struct mca_btl_uct_device_context_t mca_btl_uct_device_context_t;
165
166 /**
167 * @brief Header for all BTL UCT active messages
168 */
169 union mca_btl_uct_am_header_t {
170 /** active message header data */
171 struct mca_btl_uct_am_header_data_t {
172 /** callback tag */
173 mca_btl_base_tag_t tag;
174
175 /** padding */
176 uint8_t padding[7];
177 } data;
178
179 /** header value. this is 64-bits to support using this with uct_ep_am_short */
180 uint64_t value;
181 };
182
183 typedef union mca_btl_uct_am_header_t mca_btl_uct_am_header_t;
184
185 /**
186 * @brief structure to keep track of btl callback
187 *
188 * This structuere is passed to various uct functions. It
189 * does the translation between the uct callback and the
190 * btl callback.
191 */
192 struct mca_btl_uct_uct_completion_t {
193 /** allocated from a free list */
194 opal_free_list_item_t super;
195
196 /** uct completion structure */
197 uct_completion_t uct_comp;
198
199 /** AM completion context */
200 struct mca_btl_uct_base_frag_t *frag;
201
202 /** btl module associated with the callback */
203 struct mca_btl_base_module_t *btl;
204
205 /** btl endpoint associated with the callback */
206 struct mca_btl_base_endpoint_t *endpoint;
207
208 /** local address */
209 void *local_address;
210
211 /** local registration handle */
212 mca_btl_base_registration_handle_t *local_handle;
213
214 /** user callback function */
215 mca_btl_base_rdma_completion_fn_t cbfunc;
216
217 /** user callback context */
218 void *cbcontext;
219
220 /** user callback data */
221 void *cbdata;
222
223 /** device context */
224 mca_btl_uct_device_context_t *dev_context;
225
226 /** status */
227 int status;
228 };
229
230 typedef struct mca_btl_uct_uct_completion_t mca_btl_uct_uct_completion_t;
231
232 OBJ_CLASS_DECLARATION(mca_btl_uct_uct_completion_t);
233
234 /**
235 * @brief Base fragment structure
236 */
237 struct mca_btl_uct_base_frag_t {
238 /** btl base fragment */
239 mca_btl_base_descriptor_t base;
240
241 /** segments (used with the base fragment) */
242 mca_btl_base_segment_t segments[2];
243
244 /** module this fragment is associated with */
245 struct mca_btl_uct_module_t *btl;
246
247 /* tl context */
248 mca_btl_uct_device_context_t *context;
249
250 /** is this frag ready to send (only used when pending) */
251 bool ready;
252
253 /** endpoint this fragment is associated with */
254 struct mca_btl_base_endpoint_t *endpoint;
255
256 /** free list this fragment was allocated from */
257 opal_free_list_t *free_list;
258
259 /** fragment btl/uct header */
260 mca_btl_uct_am_header_t header;
261
262 /** pre-filled UCT io vector */
263 uct_iov_t uct_iov;
264
265 /** completion structure */
266 mca_btl_uct_uct_completion_t comp;
267 };
268
269 typedef struct mca_btl_uct_base_frag_t mca_btl_uct_base_frag_t;
270
271 OBJ_CLASS_DECLARATION(mca_btl_uct_base_frag_t);
272
273 struct mca_btl_base_endpoint_t {
274 /** opal base class */
275 opal_object_t super;
276
277 /** endpoint proc */
278 opal_proc_t *ep_proc;
279
280 /** mutex to protect this structure */
281 opal_recursive_mutex_t ep_lock;
282
283 /** cached connection endpoint */
284 mca_btl_uct_connection_ep_t *conn_ep;
285
286 /** endpoints into UCT for this BTL endpoint */
287 mca_btl_uct_tl_endpoint_t uct_eps[][2];
288 };
289
290 typedef struct mca_btl_base_endpoint_t mca_btl_base_endpoint_t;
291 typedef mca_btl_base_endpoint_t mca_btl_uct_endpoint_t;
292 OBJ_CLASS_DECLARATION(mca_btl_uct_endpoint_t);
293
294 /**
295 * @brief BTL UCT abstraction of a UCT transport layer
296 */
297 struct mca_btl_uct_tl_t {
298 /** make this an opal object */
299 opal_list_item_t super;
300
301 /** relative priority 0 == highest */
302 int priority;
303
304 /** memory domain associated with this tl */
305 mca_btl_uct_md_t *uct_md;
306
307 /** lock protecting tl structures */
308 opal_mutex_t tl_lock;
309
310 /** tl configuration (used for creating device contexts) */
311 uct_iface_config_t *uct_tl_config;
312
313 /** name of this tl (used for creating device contexts) */
314 char *uct_tl_name;
315
316 /** device name for this tl (used for creating device contexts) */
317 char *uct_dev_name;
318
319 /** maxiumum number of device contexts that can be created */
320 int max_device_contexts;
321
322 /** array of device contexts */
323 mca_btl_uct_device_context_t **uct_dev_contexts;
324
325 /** tl index. this is used to differentiate (if there is any difference)
326 * between rdma and am endpoints */
327 int tl_index;
328 };
329
330 typedef struct mca_btl_uct_tl_t mca_btl_uct_tl_t;
331 OBJ_CLASS_DECLARATION(mca_btl_uct_tl_t);
332
333 #define MCA_BTL_UCT_TL_ATTR(tl, context_id) (tl)->uct_dev_contexts[(context_id)]->uct_iface_attr
334
335 struct mca_btl_uct_pending_connection_request_t {
336 opal_list_item_t super;
337 uint8_t request_data[];
338 };
339
340 typedef struct mca_btl_uct_pending_connection_request_t mca_btl_uct_pending_connection_request_t;
341 OBJ_CLASS_DECLARATION(mca_btl_uct_pending_connection_request_t);
342
343 #endif /* !defined(BTL_UCT_TYPES_H) */