This source file includes following definitions.
- ompi_mtl_psm2_errhandler
- ompi_mtl_psm2_module_init
- ompi_mtl_psm2_finalize
- ompi_mtl_psm2_connect_error_msg
- ompi_mtl_psm2_add_procs
- ompi_mtl_psm2_del_procs
- ompi_mtl_psm2_add_comm
- ompi_mtl_psm2_del_comm
- ompi_mtl_psm2_progress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include "ompi_config.h"
27
28 #include "opal/mca/pmix/pmix.h"
29 #include "ompi/mca/mtl/mtl.h"
30 #include "ompi/mca/mtl/base/mtl_base_datatype.h"
31 #include "opal/util/show_help.h"
32 #include "ompi/proc/proc.h"
33
34 #include "mtl_psm2.h"
35 #include "mtl_psm2_types.h"
36 #include "mtl_psm2_endpoint.h"
37 #include "mtl_psm2_request.h"
38
39 mca_mtl_psm2_module_t ompi_mtl_psm2 = {
40 .super = {
41
42 .mtl_max_contextid = (1UL << 16) - 1,
43 .mtl_max_tag = (1UL << 30),
44
45 .mtl_add_procs = ompi_mtl_psm2_add_procs,
46 .mtl_del_procs = ompi_mtl_psm2_del_procs,
47 .mtl_finalize = ompi_mtl_psm2_finalize,
48
49 .mtl_send = ompi_mtl_psm2_send,
50 .mtl_isend = ompi_mtl_psm2_isend,
51
52 .mtl_irecv = ompi_mtl_psm2_irecv,
53 .mtl_iprobe = ompi_mtl_psm2_iprobe,
54 .mtl_imrecv = ompi_mtl_psm2_imrecv,
55 .mtl_improbe = ompi_mtl_psm2_improbe,
56
57 .mtl_cancel = ompi_mtl_psm2_cancel,
58 .mtl_add_comm = ompi_mtl_psm2_add_comm,
59 .mtl_del_comm = ompi_mtl_psm2_del_comm
60 }
61 };
62
63 static
64 psm2_error_t
65 ompi_mtl_psm2_errhandler(psm2_ep_t ep, const psm2_error_t error,
66 const char *error_string, psm2_error_token_t token)
67 {
68 switch (error) {
69
70 case PSM2_EP_DEVICE_FAILURE:
71 case PSM2_EP_NO_DEVICE:
72 case PSM2_EP_NO_PORTS_AVAIL:
73 case PSM2_EP_NO_NETWORK:
74 case PSM2_EP_INVALID_UUID_KEY:
75 opal_show_help("help-mtl-psm2.txt",
76 "unable to open endpoint", true,
77 psm2_error_get_string(error));
78 break;
79
80
81 default:
82 opal_output(0, "Open MPI detected an unexpected PSM2 error in opening "
83 "an endpoint: %s\n", error_string);
84 return psm2_error_defer(token);
85 break;
86 }
87 return error;
88 }
89
90 int ompi_mtl_psm2_progress( void );
91
92 int ompi_mtl_psm2_module_init(int local_rank, int num_local_procs) {
93 psm2_error_t err;
94 psm2_ep_t ep;
95 psm2_mq_t mq;
96 psm2_epid_t epid;
97 psm2_uuid_t unique_job_key;
98 struct psm2_ep_open_opts ep_opt;
99 unsigned long long *uu = (unsigned long long *) unique_job_key;
100 char *generated_key;
101 char env_string[256];
102 int rc;
103
104 generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
105 memset(uu, 0, sizeof(psm2_uuid_t));
106
107 if (!generated_key || (strlen(generated_key) != 33) ||
108 sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
109 {
110 opal_show_help("help-mtl-psm2.txt",
111 "no uuid present", true,
112 generated_key ? "could not be parsed from" :
113 "not present in", ompi_process_info.nodename);
114 return OMPI_ERROR;
115
116 }
117
118
119 psm2_error_register_handler(ompi_mtl_psm2.ep, ompi_mtl_psm2_errhandler);
120
121
122
123
124 snprintf(env_string, sizeof(env_string), "%d", local_rank);
125 setenv("MPI_LOCALRANKID", env_string, 0);
126 snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
127 setenv("MPI_LOCALNRANKS", env_string, 0);
128
129
130 psm2_ep_open_opts_get_defaults(&ep_opt);
131 ep_opt.timeout = ompi_mtl_psm2.connect_timeout * 1e9;
132 ep_opt.affinity = PSM2_EP_OPEN_AFFINITY_SKIP;
133
134
135 err = psm2_ep_open(unique_job_key, &ep_opt, &ep, &epid);
136 if (err) {
137 opal_show_help("help-mtl-psm2.txt",
138 "unable to open endpoint", true,
139 psm2_error_get_string(err));
140 return OMPI_ERROR;
141 }
142
143
144 psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);
145
146 err = psm2_mq_init(ep,
147 0xffff000000000000ULL,
148 NULL,
149 0,
150 &mq);
151 if (err) {
152 opal_show_help("help-mtl-psm2.txt",
153 "psm2 init", true,
154 psm2_error_get_string(err));
155 return OMPI_ERROR;
156 }
157
158 ompi_mtl_psm2.ep = ep;
159 ompi_mtl_psm2.epid = epid;
160 ompi_mtl_psm2.mq = mq;
161
162 OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
163 &mca_mtl_psm2_component.super.mtl_version,
164 &ompi_mtl_psm2.epid,
165 sizeof(psm2_epid_t));
166
167 if (OMPI_SUCCESS != rc) {
168 opal_output(0, "Open MPI couldn't send PSM2 epid to head node process");
169 return OMPI_ERROR;
170 }
171
172
173
174 opal_progress_register(ompi_mtl_psm2_progress);
175
176 #if OPAL_CUDA_SUPPORT
177 ompi_mtl_psm2.super.mtl_flags |= MCA_MTL_BASE_FLAG_CUDA_INIT_DISABLE;
178 #endif
179
180 return OMPI_SUCCESS;
181 }
182
183 int
184 ompi_mtl_psm2_finalize(struct mca_mtl_base_module_t* mtl) {
185 psm2_error_t err;
186
187 opal_progress_unregister(ompi_mtl_psm2_progress);
188
189
190 err = psm2_mq_finalize(ompi_mtl_psm2.mq);
191 if (err) {
192 opal_output(0, "Error in psm2_mq_finalize (error %s)\n",
193 psm2_error_get_string(err));
194 return OMPI_ERROR;
195 }
196
197 err = psm2_ep_close(ompi_mtl_psm2.ep, PSM2_EP_CLOSE_GRACEFUL, 1*1e9);
198 if (err) {
199 opal_output(0, "Error in psm2_ep_close (error %s)\n",
200 psm2_error_get_string(err));
201 return OMPI_ERROR;
202 }
203
204 err = psm2_finalize();
205 if (err) {
206 opal_output(0, "Error in psm2_finalize (error %s)\n",
207 psm2_error_get_string(err));
208 return OMPI_ERROR;
209 }
210
211 return OMPI_SUCCESS;
212 }
213
214 static
215 const char *
216 ompi_mtl_psm2_connect_error_msg(psm2_error_t err)
217 {
218 switch (err) {
219 case PSM2_EPID_UNREACHABLE:
220 case PSM2_EPID_INVALID_NODE:
221 case PSM2_EPID_INVALID_MTU:
222 case PSM2_EPID_INVALID_UUID_KEY:
223 case PSM2_EPID_INVALID_VERSION:
224 case PSM2_EPID_INVALID_CONNECT:
225 return psm2_error_get_string(err);
226 break;
227 case PSM2_EPID_UNKNOWN:
228 return "Connect status could not be determined "
229 "because of other errors";
230 default:
231 return NULL;
232 }
233 }
234
235 #ifndef min
236 # define min(a,b) ((a) < (b) ? (a) : (b))
237 #endif
238
239 #ifndef max
240 # define max(a,b) ((a) > (b) ? (a) : (b))
241 #endif
242
243 int
244 ompi_mtl_psm2_add_procs(struct mca_mtl_base_module_t *mtl,
245 size_t nprocs,
246 struct ompi_proc_t** procs)
247 {
248 int i,j;
249 int rc;
250 psm2_epid_t *epids_in = NULL;
251 int *mask_in = NULL;
252 psm2_epid_t *epid;
253 psm2_epaddr_t *epaddrs_out = NULL;
254 psm2_error_t *errs_out = NULL, err;
255 size_t size;
256 int proc_errors[PSM2_ERROR_LAST] = { 0 };
257 int timeout_in_secs;
258
259 assert(mtl == &ompi_mtl_psm2.super);
260 rc = OMPI_ERR_OUT_OF_RESOURCE;
261
262 errs_out = (psm2_error_t *) malloc(nprocs * sizeof(psm2_error_t));
263 if (errs_out == NULL) {
264 goto bail;
265 }
266 epids_in = (psm2_epid_t *) malloc(nprocs * sizeof(psm2_epid_t));
267 if (epids_in == NULL) {
268 goto bail;
269 }
270 mask_in = (int *) malloc(nprocs * sizeof(int));
271 if (mask_in == NULL) {
272 goto bail;
273 }
274 epaddrs_out = (psm2_epaddr_t *) malloc(nprocs * sizeof(psm2_epaddr_t));
275 if (epaddrs_out == NULL) {
276 goto bail;
277 }
278 rc = OMPI_SUCCESS;
279
280
281 for (i = 0; i < (int) nprocs; i++) {
282 if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
283
284 mask_in[i] = 0;
285 continue;
286 }
287
288 OPAL_MODEX_RECV(rc, &mca_mtl_psm2_component.super.mtl_version,
289 &procs[i]->super.proc_name, (void**)&epid, &size);
290 if (rc != OMPI_SUCCESS || size != sizeof(psm2_epid_t)) {
291 return OMPI_ERROR;
292 }
293 epids_in[i] = *epid;
294 mask_in[i] = 1;
295 }
296
297 timeout_in_secs = max(ompi_mtl_psm2.connect_timeout, 0.5 * nprocs);
298
299 psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_NOP);
300
301 err = psm2_ep_connect(ompi_mtl_psm2.ep,
302 nprocs,
303 epids_in,
304 mask_in,
305 errs_out,
306 epaddrs_out,
307 timeout_in_secs * 1e9);
308 if (err) {
309 char *errstr = (char *) ompi_mtl_psm2_connect_error_msg(err);
310 if (errstr == NULL) {
311 opal_output(0, "PSM2 returned unhandled/unknown connect error: %s\n",
312 psm2_error_get_string(err));
313 }
314 for (i = 0; i < (int) nprocs; i++) {
315 if (0 == mask_in[i]) {
316 continue;
317 }
318
319 psm2_error_t thiserr = errs_out[i];
320 errstr = (char *) ompi_mtl_psm2_connect_error_msg(thiserr);
321 if (proc_errors[thiserr] == 0) {
322 proc_errors[thiserr] = 1;
323 opal_output(0, "PSM2 EP connect error (%s):",
324 errstr ? errstr : "unknown connect error");
325 for (j = 0; j < (int) nprocs; j++) {
326 if (errs_out[j] == thiserr) {
327 opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ?
328 "unknown" : procs[j]->super.proc_hostname);
329 }
330 }
331 opal_output(0, "\n");
332 }
333 }
334
335 rc = OMPI_ERROR;
336 }
337 else {
338
339
340
341 psm2_error_register_handler(ompi_mtl_psm2.ep, PSM2_ERRHANDLER_DEFAULT);
342
343
344 for (i = 0; i < (int) nprocs; i++) {
345 if (0 == mask_in[i]) {
346 continue;
347 }
348
349 mca_mtl_psm2_endpoint_t *endpoint =
350 (mca_mtl_psm2_endpoint_t *) OBJ_NEW(mca_mtl_psm2_endpoint_t);
351 endpoint->peer_epid = epids_in[i];
352 endpoint->peer_addr = epaddrs_out[i];
353 procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
354 }
355
356 rc = OMPI_SUCCESS;
357 }
358
359 bail:
360 if (epids_in != NULL) {
361 free(epids_in);
362 }
363 if (mask_in != NULL) {
364 free(mask_in);
365 }
366 if (errs_out != NULL) {
367 free(errs_out);
368 }
369 if (epaddrs_out != NULL) {
370 free(epaddrs_out);
371 }
372
373 return rc;
374 }
375
376 int
377 ompi_mtl_psm2_del_procs(struct mca_mtl_base_module_t *mtl,
378 size_t nprocs,
379 struct ompi_proc_t** procs)
380 {
381 return OMPI_SUCCESS;
382 }
383
384
385 int
386 ompi_mtl_psm2_add_comm(struct mca_mtl_base_module_t *mtl,
387 struct ompi_communicator_t *comm)
388 {
389 return OMPI_SUCCESS;
390 }
391
392
393 int
394 ompi_mtl_psm2_del_comm(struct mca_mtl_base_module_t *mtl,
395 struct ompi_communicator_t *comm)
396 {
397 return OMPI_SUCCESS;
398 }
399
400
401 int ompi_mtl_psm2_progress( void ) {
402 psm2_error_t err;
403 mca_mtl_psm2_request_t* mtl_psm2_request;
404 psm2_mq_status2_t psm2_status;
405 psm2_mq_req_t req;
406 int completed = 1;
407
408 do {
409 OPAL_THREAD_LOCK(&mtl_psm2_mq_mutex);
410 err = psm2_mq_ipeek2(ompi_mtl_psm2.mq, &req, NULL);
411 if (err == PSM2_MQ_INCOMPLETE) {
412 OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
413 return completed;
414 } else if (OPAL_UNLIKELY(err != PSM2_OK)) {
415 OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
416 goto error;
417 }
418
419 err = psm2_mq_test2(&req, &psm2_status);
420 OPAL_THREAD_UNLOCK(&mtl_psm2_mq_mutex);
421
422 if (OPAL_UNLIKELY (err != PSM2_OK)) {
423 goto error;
424 }
425
426 completed++;
427
428 mtl_psm2_request = (mca_mtl_psm2_request_t*) psm2_status.context;
429
430 if (mtl_psm2_request->type == OMPI_mtl_psm2_IRECV) {
431
432 mtl_psm2_request->super.ompi_req->req_status.MPI_SOURCE =
433 psm2_status.msg_tag.tag1;
434 mtl_psm2_request->super.ompi_req->req_status.MPI_TAG =
435 psm2_status.msg_tag.tag0;
436 mtl_psm2_request->super.ompi_req->req_status._ucount =
437 psm2_status.nbytes;
438
439 ompi_mtl_datatype_unpack(mtl_psm2_request->convertor,
440 mtl_psm2_request->buf,
441 psm2_status.msg_length);
442 }
443
444 if(mtl_psm2_request->type == OMPI_mtl_psm2_ISEND) {
445 if (mtl_psm2_request->free_after) {
446 free(mtl_psm2_request->buf);
447 }
448 }
449
450 switch (psm2_status.error_code) {
451 case PSM2_OK:
452 mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
453 OMPI_SUCCESS;
454 break;
455 case PSM2_MQ_TRUNCATION:
456 mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
457 MPI_ERR_TRUNCATE;
458 break;
459 default:
460 mtl_psm2_request->super.ompi_req->req_status.MPI_ERROR =
461 MPI_ERR_INTERN;
462 }
463
464 mtl_psm2_request->super.completion_callback(&mtl_psm2_request->super);
465 }
466 while (1);
467
468 error:
469 opal_show_help("help-mtl-psm2.txt",
470 "error polling network", true,
471 psm2_error_get_string(err));
472 return 1;
473 }