This source file includes following definitions.
- check_mxm_tls
- set_mxm_tls
- check_mxm_hw_tls
- set_mxm_hw_rdma_tls
- mca_spml_ikrit_param_register_int
- mca_spml_ikrit_param_register_size_t
- mca_spml_ikrit_param_register_string
- mca_spml_ikrit_component_register
- spml_ikrit_progress
- mca_spml_ikrit_component_open
- mca_spml_ikrit_component_close
- spml_ikrit_mxm_init
- mca_spml_ikrit_component_init
- mca_spml_ikrit_component_fini
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 #define _GNU_SOURCE
17 #include <stdio.h>
18
19 #include <sys/types.h>
20 #include <unistd.h>
21
22 #include "opal/util/printf.h"
23 #include "opal/util/show_help.h"
24 #include "oshmem_config.h"
25 #include "shmem.h"
26 #include "oshmem/runtime/params.h"
27 #include "oshmem/mca/spml/spml.h"
28 #include "oshmem/mca/spml/base/base.h"
29 #include "spml_ikrit_component.h"
30 #include "oshmem/mca/spml/ikrit/spml_ikrit.h"
31
32 static int mca_spml_ikrit_component_register(void);
33 static int mca_spml_ikrit_component_open(void);
34 static int mca_spml_ikrit_component_close(void);
35 static mca_spml_base_module_t*
36 mca_spml_ikrit_component_init(int* priority,
37 bool enable_progress_threads,
38 bool enable_mpi_threads);
39 static int mca_spml_ikrit_component_fini(void);
40 mca_spml_base_component_2_0_0_t mca_spml_ikrit_component = {
41
42
43
44
45 .spmlm_version = {
46 MCA_SPML_BASE_VERSION_2_0_0,
47
48 .mca_component_name = "ikrit",
49 MCA_BASE_MAKE_VERSION(component, OSHMEM_MAJOR_VERSION, OSHMEM_MINOR_VERSION,
50 OSHMEM_RELEASE_VERSION),
51 .mca_open_component = mca_spml_ikrit_component_open,
52 .mca_close_component = mca_spml_ikrit_component_close,
53 .mca_register_component_params = mca_spml_ikrit_component_register,
54 },
55 .spmlm_data = {
56
57 MCA_BASE_METADATA_PARAM_CHECKPOINT
58 },
59
60 .spmlm_init = mca_spml_ikrit_component_init,
61 .spmlm_finalize = mca_spml_ikrit_component_fini,
62 };
63
64 #if MXM_API >= MXM_VERSION(2,1)
65 static inline int check_mxm_tls(char *var)
66 {
67 char *str;
68
69 str = getenv(var);
70 if (NULL == str) {
71 return OSHMEM_SUCCESS;
72 }
73
74 if (NULL != strstr(str, "shm")) {
75 if (0 < opal_asprintf(&str,
76 "%s=%s",
77 var, getenv(var)
78 )) {
79 opal_show_help("help-oshmem-spml-ikrit.txt", "mxm shm tls", true,
80 str);
81 free(str);
82 }
83 return OSHMEM_ERROR;
84 }
85 if (NULL == strstr(str, "rc") && NULL == strstr(str, "dc")) {
86 mca_spml_ikrit.ud_only = 1;
87 } else {
88 mca_spml_ikrit.ud_only = 0;
89 }
90 return OSHMEM_SUCCESS;
91 }
92
93 static inline int set_mxm_tls(void)
94 {
95 char *tls;
96
97
98
99
100 opal_setenv("MXM_OSHMEM_DC_QP_LIMIT", "2", 0, &environ);
101 opal_setenv("MXM_OSHMEM_DC_RNDV_QP_LIMIT", "2", 0, &environ);
102 opal_setenv("MXM_OSHMEM_DC_MSS", "8196", 0, &environ);
103
104 tls = getenv("MXM_OSHMEM_TLS");
105 if (NULL != tls) {
106 return check_mxm_tls("MXM_OSHMEM_TLS");
107 }
108
109 tls = getenv("MXM_TLS");
110 if (NULL == tls) {
111 opal_setenv("MXM_OSHMEM_TLS", mca_spml_ikrit.mxm_tls, 1, &environ);
112 return check_mxm_tls("MXM_OSHMEM_TLS");
113 }
114 if (OSHMEM_SUCCESS == check_mxm_tls("MXM_TLS")) {
115 opal_setenv("MXM_OSHMEM_TLS", tls, 1, &environ);
116 return OSHMEM_SUCCESS;
117 }
118 return OSHMEM_ERROR;
119 }
120
121 static inline int check_mxm_hw_tls(char *v, char *tls)
122 {
123 if (v && tls) {
124 if ((0 == strcmp(tls, "rc") || 0 == strcmp(tls, "dc"))) {
125 mca_spml_ikrit.ud_only = 0;
126 return OSHMEM_SUCCESS;
127 }
128
129 if (strstr(tls, "ud") &&
130 (NULL == strstr(tls, "rc") && NULL == strstr(tls, "dc") &&
131 NULL == strstr(tls, "shm"))) {
132 return OSHMEM_SUCCESS;
133 }
134 }
135
136 opal_show_help("help-oshmem-spml-ikrit.txt", "mxm tls", true,
137 v, tls);
138 return OSHMEM_ERROR;
139 }
140
141 static inline int set_mxm_hw_rdma_tls(void)
142 {
143 if (!mca_spml_ikrit.hw_rdma_channel) {
144 return check_mxm_hw_tls("MXM_OSHMEM_TLS", getenv("MXM_OSHMEM_TLS"));
145 }
146 opal_setenv("MXM_OSHMEM_HW_RDMA_RC_QP_LIMIT", "-1", 0, &environ);
147 opal_setenv("MXM_OSHMEM_HW_RDMA_TLS", "rc", 0, &environ);
148 SPML_VERBOSE(5, "Additional communication channel is enabled. Transports are: %s",
149 getenv("MXM_OSHMEM_HW_RDMA_TLS"));
150
151 return check_mxm_hw_tls("MXM_OSHMEM_HW_RDMA_TLS",
152 getenv("MXM_OSHMEM_HW_RDMA_TLS"));
153 }
154
155 #endif
156
157 static inline void mca_spml_ikrit_param_register_int(const char* param_name,
158 int default_value,
159 const char *help_msg,
160 int *storage)
161 {
162 *storage = default_value;
163 (void) mca_base_component_var_register(&mca_spml_ikrit_component.spmlm_version,
164 param_name,
165 help_msg,
166 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
167 OPAL_INFO_LVL_9,
168 MCA_BASE_VAR_SCOPE_READONLY,
169 storage);
170 }
171
172 static inline void mca_spml_ikrit_param_register_size_t(const char* param_name,
173 size_t default_value,
174 const char *help_msg,
175 size_t *storage)
176 {
177 *storage = default_value;
178 (void) mca_base_component_var_register(&mca_spml_ikrit_component.spmlm_version,
179 param_name,
180 help_msg,
181 MCA_BASE_VAR_TYPE_SIZE_T, NULL, 0, 0,
182 OPAL_INFO_LVL_9,
183 MCA_BASE_VAR_SCOPE_READONLY,
184 storage);
185 }
186
187 static inline void mca_spml_ikrit_param_register_string(const char* param_name,
188 char* default_value,
189 const char *help_msg,
190 char **storage)
191 {
192 *storage = default_value;
193 (void) mca_base_component_var_register(&mca_spml_ikrit_component.spmlm_version,
194 param_name,
195 help_msg,
196 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
197 OPAL_INFO_LVL_9,
198 MCA_BASE_VAR_SCOPE_READONLY,
199 storage);
200 }
201
202 static int mca_spml_ikrit_component_register(void)
203 {
204 char *v;
205
206 mca_spml_ikrit_param_register_int("free_list_num", 1024,
207 0,
208 &mca_spml_ikrit.free_list_num);
209 mca_spml_ikrit_param_register_int("free_list_max", 1024,
210 0,
211 &mca_spml_ikrit.free_list_max);
212 mca_spml_ikrit_param_register_int("free_list_inc", 16,
213 0,
214 &mca_spml_ikrit.free_list_inc);
215 mca_spml_ikrit_param_register_int("bulk_connect", 1,
216 0,
217 &mca_spml_ikrit.bulk_connect);
218 mca_spml_ikrit_param_register_int("bulk_disconnect", 1,
219 0,
220 &mca_spml_ikrit.bulk_disconnect);
221 mca_spml_ikrit_param_register_int("priority", 20,
222 "[integer] ikrit priority",
223 &mca_spml_ikrit.priority);
224 mca_spml_ikrit_param_register_int("hw_rdma_channel", 0,
225 "create separate reliable connection channel",
226 &mca_spml_ikrit.hw_rdma_channel);
227
228 if (!mca_spml_ikrit.hw_rdma_channel)
229 v = "ud,self";
230 else
231 v = "rc,ud,self";
232 mca_spml_ikrit_param_register_string("mxm_tls",
233 v,
234 "[string] TL channels for MXM",
235 &mca_spml_ikrit.mxm_tls);
236
237 mca_spml_ikrit_param_register_int("np",
238 0,
239 "[integer] Minimal allowed job's NP to activate ikrit", &mca_spml_ikrit.np);
240 mca_spml_ikrit_param_register_int("unsync_conn_max", 8,
241 "[integer] Max number of connections that do not require notification of PUT operation remote completion. Increasing this number improves efficiency of p2p communication but increases overhead of shmem_fence/shmem_quiet/shmem_barrier",
242 &mca_spml_ikrit.unsync_conn_max);
243
244 mca_spml_ikrit_param_register_size_t("put_zcopy_threshold", 16384ULL,
245 "[size_t] Use zero copy put if message size is greater than the threshold",
246 &mca_spml_ikrit.put_zcopy_threshold);
247 if (oshmem_num_procs() < mca_spml_ikrit.np) {
248 SPML_VERBOSE(1,
249 "Not enough ranks (%d<%d), disqualifying spml/ikrit",
250 oshmem_num_procs(), mca_spml_ikrit.np);
251 return OSHMEM_ERR_NOT_AVAILABLE;
252 }
253
254 return OSHMEM_SUCCESS;
255 }
256
257 int spml_ikrit_progress(void)
258 {
259 mxm_error_t err;
260
261 err = mxm_progress(mca_spml_ikrit.mxm_context);
262 if ((MXM_OK != err) && (MXM_ERR_NO_PROGRESS != err)) {
263 opal_show_help("help-oshmem-spml-ikrit.txt",
264 "errors during mxm_progress",
265 true,
266 mxm_error_string(err));
267 }
268 return 1;
269 }
270
271 static int mca_spml_ikrit_component_open(void)
272 {
273 mxm_error_t err;
274 unsigned long cur_ver;
275
276 cur_ver = mxm_get_version();
277 if (cur_ver != MXM_API) {
278 SPML_WARNING(
279 "OSHMEM was compiled with MXM version %d.%d but version %ld.%ld detected.",
280 MXM_VERNO_MAJOR, MXM_VERNO_MINOR,
281 (cur_ver >> MXM_MAJOR_BIT) & 0xff,
282 (cur_ver >> MXM_MINOR_BIT) & 0xff);
283 }
284
285 mca_spml_ikrit.mxm_mq = NULL;
286 mca_spml_ikrit.mxm_context = NULL;
287 mca_spml_ikrit.ud_only = 0;
288 #if MXM_API < MXM_VERSION(2,1)
289 mca_spml_ikrit.hw_rdma_channel = 0;
290 if ((MXM_OK != mxm_config_read_context_opts(&mca_spml_ikrit.mxm_ctx_opts)) ||
291 (MXM_OK != mxm_config_read_ep_opts(&mca_spml_ikrit.mxm_ep_opts)))
292 #else
293 if (OSHMEM_SUCCESS != set_mxm_tls()) {
294 return OSHMEM_ERROR;
295 }
296 if (OSHMEM_SUCCESS != set_mxm_hw_rdma_tls()) {
297 return OSHMEM_ERROR;
298 }
299 if ((mca_spml_ikrit.hw_rdma_channel && MXM_OK != mxm_config_read_opts(&mca_spml_ikrit.mxm_ctx_opts,
300 &mca_spml_ikrit.mxm_ep_hw_rdma_opts,
301 "OSHMEM_HW_RDMA", NULL, 0)) ||
302 MXM_OK != mxm_config_read_opts(&mca_spml_ikrit.mxm_ctx_opts,
303 &mca_spml_ikrit.mxm_ep_opts,
304 "OSHMEM", NULL, 0))
305 #endif
306 {
307 SPML_ERROR("Failed to parse MXM configuration");
308 return OSHMEM_ERROR;
309 }
310
311 SPML_VERBOSE(5, "UD only mode is %s",
312 mca_spml_ikrit.ud_only ? "enabled" : "disabled");
313
314 err = mxm_init(mca_spml_ikrit.mxm_ctx_opts, &mca_spml_ikrit.mxm_context);
315 if (MXM_OK != err) {
316 if (MXM_ERR_NO_DEVICE == err) {
317 SPML_VERBOSE(1,
318 "No supported device found, disqualifying spml/ikrit");
319 } else {
320 opal_show_help("help-oshmem-spml-ikrit.txt",
321 "mxm init",
322 true,
323 mxm_error_string(err));
324 }
325 return OSHMEM_ERR_NOT_AVAILABLE;
326 }
327
328 err = mxm_mq_create(mca_spml_ikrit.mxm_context,
329 MXM_SHMEM_MQ_ID,
330 &mca_spml_ikrit.mxm_mq);
331 if (MXM_OK != err) {
332 opal_show_help("help-oshmem-spml-ikrit.txt",
333 "mxm mq create",
334 true,
335 mxm_error_string(err));
336 return OSHMEM_ERROR;
337 }
338
339 return OSHMEM_SUCCESS;
340 }
341
342 static int mca_spml_ikrit_component_close(void)
343 {
344 if (mca_spml_ikrit.mxm_mq) {
345 mxm_mq_destroy(mca_spml_ikrit.mxm_mq);
346 }
347 if (mca_spml_ikrit.mxm_context) {
348 mxm_cleanup(mca_spml_ikrit.mxm_context);
349 mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_opts);
350 mxm_config_free_context_opts(mca_spml_ikrit.mxm_ctx_opts);
351 if (mca_spml_ikrit.hw_rdma_channel)
352 mxm_config_free_ep_opts(mca_spml_ikrit.mxm_ep_hw_rdma_opts);
353 }
354 mca_spml_ikrit.mxm_mq = NULL;
355 mca_spml_ikrit.mxm_context = NULL;
356 return OSHMEM_SUCCESS;
357 }
358
359 static int spml_ikrit_mxm_init(void)
360 {
361 mxm_error_t err;
362
363
364 err = mxm_ep_create(mca_spml_ikrit.mxm_context,
365 mca_spml_ikrit.mxm_ep_opts,
366 &mca_spml_ikrit.mxm_ep);
367 if (MXM_OK != err) {
368 opal_show_help("help-oshmem-spml-ikrit.txt",
369 "unable to create endpoint",
370 true,
371 mxm_error_string(err));
372 return OSHMEM_ERROR;
373 }
374 if (mca_spml_ikrit.hw_rdma_channel) {
375 err = mxm_ep_create(mca_spml_ikrit.mxm_context,
376 mca_spml_ikrit.mxm_ep_hw_rdma_opts,
377 &mca_spml_ikrit.mxm_hw_rdma_ep);
378 if (MXM_OK != err) {
379 opal_show_help("help-oshmem-spml-ikrit.txt",
380 "unable to create endpoint",
381 true,
382 mxm_error_string(err));
383 return OSHMEM_ERROR;
384 }
385 } else {
386 mca_spml_ikrit.mxm_hw_rdma_ep = mca_spml_ikrit.mxm_ep;
387 }
388
389 oshmem_ctx_default = (shmem_ctx_t) &mca_spml_ikrit_ctx_default;
390
391 return OSHMEM_SUCCESS;
392 }
393
394 static mca_spml_base_module_t*
395 mca_spml_ikrit_component_init(int* priority,
396 bool enable_progress_threads,
397 bool enable_mpi_threads)
398 {
399 SPML_VERBOSE( 10, "in ikrit, my priority is %d\n", mca_spml_ikrit.priority);
400
401 if ((*priority) > mca_spml_ikrit.priority) {
402 *priority = mca_spml_ikrit.priority;
403 return NULL ;
404 }
405 *priority = mca_spml_ikrit.priority;
406
407 if (OSHMEM_SUCCESS != spml_ikrit_mxm_init())
408 return NULL ;
409
410 mca_spml_ikrit.n_active_puts = 0;
411 mca_spml_ikrit.n_active_gets = 0;
412 mca_spml_ikrit.n_mxm_fences = 0;
413 SPML_VERBOSE(50, "*** ikrit initialized ****");
414 return &mca_spml_ikrit.super;
415 }
416
417 static int mca_spml_ikrit_component_fini(void)
418 {
419 opal_progress_unregister(spml_ikrit_progress);
420 if (NULL != mca_spml_ikrit.mxm_ep) {
421 mxm_ep_destroy(mca_spml_ikrit.mxm_ep);
422 }
423 if (mca_spml_ikrit.hw_rdma_channel) {
424 mxm_ep_destroy(mca_spml_ikrit.mxm_hw_rdma_ep);
425 }
426
427 if(!mca_spml_ikrit.enabled)
428 return OSHMEM_SUCCESS;
429 mca_spml_ikrit.enabled = false;
430
431 return OSHMEM_SUCCESS;
432 }
433