This source file includes following definitions.
- ompi_mtl_psm_errhandler
- ompi_mtl_psm_module_init
- ompi_mtl_psm_finalize
- ompi_mtl_psm_connect_error_msg
- ompi_mtl_psm_add_procs
- ompi_mtl_psm_del_procs
- ompi_mtl_psm_add_comm
- ompi_mtl_psm_del_comm
- ompi_mtl_psm_progress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include "ompi_config.h"
27
28 #include "opal/mca/pmix/pmix.h"
29 #include "ompi/mca/mtl/mtl.h"
30 #include "ompi/mca/mtl/base/mtl_base_datatype.h"
31 #include "opal/util/show_help.h"
32 #include "ompi/proc/proc.h"
33
34 #include "mtl_psm.h"
35 #include "mtl_psm_types.h"
36 #include "mtl_psm_endpoint.h"
37 #include "mtl_psm_request.h"
38
39 mca_mtl_psm_module_t ompi_mtl_psm = {
40 .super = {
41
42 .mtl_max_contextid = (1UL << 16) - 1,
43 .mtl_max_tag = (1UL << 30),
44
45 .mtl_add_procs = ompi_mtl_psm_add_procs,
46 .mtl_del_procs = ompi_mtl_psm_del_procs,
47 .mtl_finalize = ompi_mtl_psm_finalize,
48
49 .mtl_send = ompi_mtl_psm_send,
50 .mtl_isend = ompi_mtl_psm_isend,
51
52 .mtl_irecv = ompi_mtl_psm_irecv,
53 .mtl_iprobe = ompi_mtl_psm_iprobe,
54 .mtl_imrecv = ompi_mtl_psm_imrecv,
55 .mtl_improbe = ompi_mtl_psm_improbe,
56
57 .mtl_cancel = ompi_mtl_psm_cancel,
58 .mtl_add_comm = ompi_mtl_psm_add_comm,
59 .mtl_del_comm = ompi_mtl_psm_del_comm
60 }
61 };
62
63 static
64 psm_error_t
65 ompi_mtl_psm_errhandler(psm_ep_t ep, const psm_error_t error,
66 const char *error_string, psm_error_token_t token)
67 {
68 switch (error) {
69
70 case PSM_EP_DEVICE_FAILURE:
71 case PSM_EP_NO_DEVICE:
72 case PSM_EP_NO_PORTS_AVAIL:
73 case PSM_EP_NO_NETWORK:
74 case PSM_EP_INVALID_UUID_KEY:
75 opal_show_help("help-mtl-psm.txt",
76 "unable to open endpoint", true,
77 psm_error_get_string(error));
78 break;
79
80
81 default:
82 opal_output(0, "Open MPI detected an unexpected PSM error in opening "
83 "an endpoint: %s\n", error_string);
84 return psm_error_defer(token);
85 break;
86 }
87 return error;
88 }
89
90 int ompi_mtl_psm_progress( void );
91
92 int ompi_mtl_psm_module_init(int local_rank, int num_local_procs) {
93 psm_error_t err;
94 psm_ep_t ep;
95 psm_mq_t mq;
96 psm_epid_t epid;
97 psm_uuid_t unique_job_key;
98 struct psm_ep_open_opts ep_opt;
99 unsigned long long *uu = (unsigned long long *) unique_job_key;
100 char *generated_key;
101 char env_string[256];
102 int rc;
103
104 generated_key = getenv(OPAL_MCA_PREFIX"orte_precondition_transports");
105 memset(uu, 0, sizeof(psm_uuid_t));
106
107 if (!generated_key || (strlen(generated_key) != 33) ||
108 sscanf(generated_key, "%016llx-%016llx", &uu[0], &uu[1]) != 2)
109 {
110 opal_show_help("help-mtl-psm.txt",
111 "no uuid present", true,
112 generated_key ? "could not be parsed from" :
113 "not present in", ompi_process_info.nodename);
114 return OMPI_ERROR;
115
116 }
117
118
119 psm_error_register_handler(ompi_mtl_psm.ep, ompi_mtl_psm_errhandler);
120
121
122
123
124 snprintf(env_string, sizeof(env_string), "%d", local_rank);
125 setenv("MPI_LOCALRANKID", env_string, 0);
126 snprintf(env_string, sizeof(env_string), "%d", num_local_procs);
127 setenv("MPI_LOCALNRANKS", env_string, 0);
128
129
130 bzero((void*) &ep_opt, sizeof(ep_opt));
131 ep_opt.timeout = ompi_mtl_psm.connect_timeout * 1e9;
132 ep_opt.unit = ompi_mtl_psm.ib_unit;
133 ep_opt.affinity = PSM_EP_OPEN_AFFINITY_SKIP;
134 ep_opt.shm_mbytes = -1;
135 ep_opt.sendbufs_num = -1;
136
137 #if PSM_VERNO >= 0x0101
138 ep_opt.network_pkey = ompi_mtl_psm.ib_pkey;
139 #endif
140
141 #if PSM_VERNO >= 0x0107
142 ep_opt.port = ompi_mtl_psm.ib_port;
143 ep_opt.outsl = ompi_mtl_psm.ib_service_level;
144 #endif
145
146 #if PSM_VERNO >= 0x010d
147 ep_opt.service_id = ompi_mtl_psm.ib_service_id;
148 ep_opt.path_res_type = ompi_mtl_psm.path_res_type;
149 #endif
150
151
152 err = psm_ep_open(unique_job_key, &ep_opt, &ep, &epid);
153 if (err) {
154 opal_show_help("help-mtl-psm.txt",
155 "unable to open endpoint", true,
156 psm_error_get_string(err));
157 return OMPI_ERROR;
158 }
159
160
161 psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
162
163 err = psm_mq_init(ep,
164 0xffff000000000000ULL,
165 NULL,
166 0,
167 &mq);
168 if (err) {
169 opal_show_help("help-mtl-psm.txt",
170 "psm init", true,
171 psm_error_get_string(err));
172 return OMPI_ERROR;
173 }
174
175 ompi_mtl_psm.ep = ep;
176 ompi_mtl_psm.epid = epid;
177 ompi_mtl_psm.mq = mq;
178
179 OPAL_MODEX_SEND(rc, OPAL_PMIX_GLOBAL,
180 &mca_mtl_psm_component.super.mtl_version,
181 &ompi_mtl_psm.epid,
182 sizeof(psm_epid_t));
183
184 if (OMPI_SUCCESS != rc) {
185 opal_output(0, "Open MPI couldn't send PSM epid to head node process");
186 return OMPI_ERROR;
187 }
188
189
190 opal_progress_register(ompi_mtl_psm_progress);
191
192 return OMPI_SUCCESS;
193 }
194
195 int
196 ompi_mtl_psm_finalize(struct mca_mtl_base_module_t* mtl) {
197 psm_error_t err;
198
199 opal_progress_unregister(ompi_mtl_psm_progress);
200
201
202 err = psm_mq_finalize(ompi_mtl_psm.mq);
203 if (err) {
204 opal_output(0, "Error in psm_mq_finalize (error %s)\n",
205 psm_error_get_string(err));
206 return OMPI_ERROR;
207 }
208
209 err = psm_ep_close(ompi_mtl_psm.ep, PSM_EP_CLOSE_GRACEFUL, 1*1e9);
210 if (err) {
211 opal_output(0, "Error in psm_ep_close (error %s)\n",
212 psm_error_get_string(err));
213 return OMPI_ERROR;
214 }
215
216 err = psm_finalize();
217 if (err) {
218 opal_output(0, "Error in psm_finalize (error %s)\n",
219 psm_error_get_string(err));
220 return OMPI_ERROR;
221 }
222
223 return OMPI_SUCCESS;
224 }
225
226 static
227 const char *
228 ompi_mtl_psm_connect_error_msg(psm_error_t err)
229 {
230 switch (err) {
231 case PSM_EPID_UNREACHABLE:
232 case PSM_EPID_INVALID_NODE:
233 case PSM_EPID_INVALID_MTU:
234 case PSM_EPID_INVALID_UUID_KEY:
235 case PSM_EPID_INVALID_VERSION:
236 case PSM_EPID_INVALID_CONNECT:
237 return psm_error_get_string(err);
238 break;
239 case PSM_EPID_UNKNOWN:
240 return "Connect status could not be determined "
241 "because of other errors";
242 default:
243 return NULL;
244 }
245 }
246
247 #ifndef min
248 # define min(a,b) ((a) < (b) ? (a) : (b))
249 #endif
250
251 #ifndef max
252 # define max(a,b) ((a) > (b) ? (a) : (b))
253 #endif
254
255 int
256 ompi_mtl_psm_add_procs(struct mca_mtl_base_module_t *mtl,
257 size_t nprocs,
258 struct ompi_proc_t** procs)
259 {
260 int i,j;
261 int rc;
262 psm_epid_t *epids_in = NULL;
263 int *mask_in = NULL;
264 psm_epid_t *epid;
265 psm_epaddr_t *epaddrs_out = NULL;
266 psm_error_t *errs_out = NULL, err;
267 size_t size;
268 int proc_errors[PSM_ERROR_LAST] = { 0 };
269 int timeout_in_secs;
270
271 assert(mtl == &ompi_mtl_psm.super);
272 rc = OMPI_ERR_OUT_OF_RESOURCE;
273
274 errs_out = (psm_error_t *) malloc(nprocs * sizeof(psm_error_t));
275 if (errs_out == NULL) {
276 goto bail;
277 }
278 epids_in = (psm_epid_t *) malloc(nprocs * sizeof(psm_epid_t));
279 if (epids_in == NULL) {
280 goto bail;
281 }
282 mask_in = (int *) malloc(nprocs * sizeof(int));
283 if (mask_in == NULL) {
284 goto bail;
285 }
286 epaddrs_out = (psm_epaddr_t *) malloc(nprocs * sizeof(psm_epaddr_t));
287 if (epaddrs_out == NULL) {
288 goto bail;
289 }
290 rc = OMPI_SUCCESS;
291
292
293 for (i = 0; i < (int) nprocs; i++) {
294 if (NULL != procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL]) {
295
296 mask_in[i] = 0;
297 continue;
298 }
299
300 OPAL_MODEX_RECV(rc, &mca_mtl_psm_component.super.mtl_version,
301 &procs[i]->super.proc_name, (void**)&epid, &size);
302 if (rc != OMPI_SUCCESS || size != sizeof(psm_epid_t)) {
303 rc = OMPI_ERROR;
304 goto bail;
305 }
306 epids_in[i] = *epid;
307 mask_in[i] = 1;
308 }
309
310 timeout_in_secs = max(ompi_mtl_psm.connect_timeout, 0.5 * nprocs);
311
312 psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_NOP);
313
314 err = psm_ep_connect(ompi_mtl_psm.ep,
315 nprocs,
316 epids_in,
317 mask_in,
318 errs_out,
319 epaddrs_out,
320 timeout_in_secs * 1e9);
321 if (err) {
322 char *errstr = (char *) ompi_mtl_psm_connect_error_msg(err);
323 if (errstr == NULL) {
324 opal_output(0, "PSM returned unhandled/unknown connect error: %s\n",
325 psm_error_get_string(err));
326 }
327 for (i = 0; i < (int) nprocs; i++) {
328 if (0 == mask_in[i]) {
329 continue;
330 }
331
332 psm_error_t thiserr = errs_out[i];
333 errstr = (char *) ompi_mtl_psm_connect_error_msg(thiserr);
334 if (proc_errors[thiserr] == 0) {
335 proc_errors[thiserr] = 1;
336 opal_output(0, "PSM EP connect error (%s):",
337 errstr ? errstr : "unknown connect error");
338 for (j = 0; j < (int) nprocs; j++) {
339 if (errs_out[j] == thiserr) {
340 opal_output(0, " %s", (NULL == procs[j]->super.proc_hostname) ?
341 "unknown" : procs[j]->super.proc_hostname);
342 }
343 }
344 opal_output(0, "\n");
345 }
346 }
347
348 rc = OMPI_ERROR;
349 }
350 else {
351
352
353
354 psm_error_register_handler(ompi_mtl_psm.ep, PSM_ERRHANDLER_DEFAULT);
355
356
357 for (i = 0; i < (int) nprocs; i++) {
358 if (0 == mask_in[i]) {
359 continue;
360 }
361
362 mca_mtl_psm_endpoint_t *endpoint =
363 (mca_mtl_psm_endpoint_t *) OBJ_NEW(mca_mtl_psm_endpoint_t);
364 endpoint->peer_epid = epids_in[i];
365 endpoint->peer_addr = epaddrs_out[i];
366 procs[i]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_MTL] = endpoint;
367 }
368
369 rc = OMPI_SUCCESS;
370 }
371
372 bail:
373 if (epids_in != NULL) {
374 free(epids_in);
375 }
376 if (mask_in != NULL) {
377 free(mask_in);
378 }
379 if (errs_out != NULL) {
380 free(errs_out);
381 }
382 if (epaddrs_out != NULL) {
383 free(epaddrs_out);
384 }
385
386 return rc;
387 }
388
389 int
390 ompi_mtl_psm_del_procs(struct mca_mtl_base_module_t *mtl,
391 size_t nprocs,
392 struct ompi_proc_t** procs)
393 {
394 return OMPI_SUCCESS;
395 }
396
397
398 int
399 ompi_mtl_psm_add_comm(struct mca_mtl_base_module_t *mtl,
400 struct ompi_communicator_t *comm)
401 {
402 return OMPI_SUCCESS;
403 }
404
405
406 int
407 ompi_mtl_psm_del_comm(struct mca_mtl_base_module_t *mtl,
408 struct ompi_communicator_t *comm)
409 {
410 return OMPI_SUCCESS;
411 }
412
413
414 int ompi_mtl_psm_progress( void ) {
415 psm_error_t err;
416 mca_mtl_psm_request_t* mtl_psm_request;
417 psm_mq_status_t psm_status;
418 psm_mq_req_t req;
419 int completed = 1;
420
421 do {
422 err = psm_mq_ipeek(ompi_mtl_psm.mq, &req, NULL);
423 if (err == PSM_MQ_INCOMPLETE) {
424 return completed;
425 } else if (err != PSM_OK) {
426 goto error;
427 }
428
429 completed++;
430
431 err = psm_mq_test(&req, &psm_status);
432 if (err != PSM_OK) {
433 goto error;
434 }
435
436 mtl_psm_request = (mca_mtl_psm_request_t*) psm_status.context;
437
438 if (mtl_psm_request->type == OMPI_MTL_PSM_IRECV) {
439 ompi_mtl_datatype_unpack(mtl_psm_request->convertor,
440 mtl_psm_request->buf,
441 psm_status.msg_length);
442
443 mtl_psm_request->super.ompi_req->req_status.MPI_SOURCE =
444 PSM_GET_MQRANK(psm_status.msg_tag);
445 mtl_psm_request->super.ompi_req->req_status.MPI_TAG =
446 PSM_GET_MQUTAG(psm_status.msg_tag);
447 mtl_psm_request->super.ompi_req->req_status._ucount =
448 psm_status.nbytes;
449 }
450
451 if(mtl_psm_request->type == OMPI_MTL_PSM_ISEND) {
452 if (mtl_psm_request->free_after) {
453 free(mtl_psm_request->buf);
454 }
455 }
456
457 switch (psm_status.error_code) {
458 case PSM_OK:
459 mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
460 OMPI_SUCCESS;
461 break;
462 case PSM_MQ_TRUNCATION:
463 mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
464 MPI_ERR_TRUNCATE;
465 break;
466 default:
467 mtl_psm_request->super.ompi_req->req_status.MPI_ERROR =
468 MPI_ERR_INTERN;
469 }
470
471 mtl_psm_request->super.completion_callback(&mtl_psm_request->super);
472
473 }
474 while (1);
475
476 error:
477 opal_show_help("help-mtl-psm.txt",
478 "error polling network", true,
479 psm_error_get_string(err));
480 return 1;
481 }
482