This source file includes following definitions.
- opal_stderr
- portals4_register
- portals4_open
- portals4_close
- portals4_init_query
- portals4_comm_query
- portals4_module_enable
- portals4_progress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "ompi_config.h"
26
27 #include "coll_portals4.h"
28 #include "coll_portals4_request.h"
29
30 #include "mpi.h"
31 #include "ompi/op/op.h"
32 #include "ompi/datatype/ompi_datatype_internal.h"
33 #include "ompi/mca/coll/coll.h"
34 #include "ompi/mca/coll/base/base.h"
35
36 #define REQ_COLL_TABLE_ID 15
37 #define REQ_COLL_FINISH_TABLE_ID 16
38
39
40 ptl_op_t ompi_coll_portals4_atomic_op [OMPI_OP_NUM_OF_TYPES] =
41 {
42 [OMPI_OP_NULL] = COLL_PORTALS4_NO_OP,
43 [OMPI_OP_MAX] = PTL_MAX,
44 [OMPI_OP_MIN] = PTL_MIN,
45 [OMPI_OP_SUM] = PTL_SUM,
46 [OMPI_OP_PROD] = PTL_PROD,
47 [OMPI_OP_LAND] = PTL_LAND,
48 [OMPI_OP_BAND] = PTL_BAND,
49 [OMPI_OP_LOR] = PTL_LOR,
50 [OMPI_OP_BOR] = PTL_BOR,
51 [OMPI_OP_LXOR] = PTL_LXOR,
52 [OMPI_OP_BXOR] = PTL_BXOR,
53 [OMPI_OP_MAXLOC] = COLL_PORTALS4_NO_OP,
54 [OMPI_OP_MINLOC] = COLL_PORTALS4_NO_OP,
55 [OMPI_OP_REPLACE] = PTL_CSWAP,
56 };
57
58 ptl_datatype_t ompi_coll_portals4_atomic_datatype [OMPI_DATATYPE_MPI_MAX_PREDEFINED] =
59 {
60 [OMPI_DATATYPE_MPI_EMPTY] = COLL_PORTALS4_NO_DTYPE,
61 [OMPI_DATATYPE_MPI_UINT8_T] = PTL_UINT8_T,
62 [OMPI_DATATYPE_MPI_INT16_T] = PTL_INT16_T,
63 [OMPI_DATATYPE_MPI_UINT16_T] = PTL_UINT16_T,
64 [OMPI_DATATYPE_MPI_INT32_T] = PTL_INT32_T,
65 [OMPI_DATATYPE_MPI_UINT32_T] = PTL_UINT32_T,
66 [OMPI_DATATYPE_MPI_INT64_T] = PTL_INT64_T,
67 [OMPI_DATATYPE_MPI_UINT64_T] = PTL_UINT64_T,
68 [OMPI_DATATYPE_MPI_FLOAT] = PTL_FLOAT,
69 [OMPI_DATATYPE_MPI_DOUBLE] = PTL_DOUBLE,
70 [OMPI_DATATYPE_MPI_LONG_DOUBLE] = COLL_PORTALS4_NO_DTYPE,
71 [OMPI_DATATYPE_MPI_COMPLEX4] = COLL_PORTALS4_NO_DTYPE,
72 [OMPI_DATATYPE_MPI_COMPLEX8] = COLL_PORTALS4_NO_DTYPE,
73 [OMPI_DATATYPE_MPI_COMPLEX16] = COLL_PORTALS4_NO_DTYPE,
74 [OMPI_DATATYPE_MPI_COMPLEX32] = COLL_PORTALS4_NO_DTYPE,
75 [OMPI_DATATYPE_MPI_WCHAR] = COLL_PORTALS4_NO_DTYPE,
76 [OMPI_DATATYPE_MPI_PACKED] = COLL_PORTALS4_NO_DTYPE,
77
78
79 [OMPI_DATATYPE_MPI_BOOL] = COLL_PORTALS4_NO_DTYPE,
80
81
82 [OMPI_DATATYPE_MPI_LOGICAL] = COLL_PORTALS4_NO_DTYPE,
83 [OMPI_DATATYPE_MPI_CHARACTER] = PTL_INT8_T,
84 [OMPI_DATATYPE_MPI_INTEGER] = PTL_INT64_T,
85 [OMPI_DATATYPE_MPI_REAL] = PTL_FLOAT,
86 [OMPI_DATATYPE_MPI_DOUBLE_PRECISION] = PTL_DOUBLE,
87
88 [OMPI_DATATYPE_MPI_COMPLEX] = PTL_FLOAT_COMPLEX,
89 [OMPI_DATATYPE_MPI_DOUBLE_COMPLEX] = PTL_DOUBLE_COMPLEX,
90 [OMPI_DATATYPE_MPI_LONG_DOUBLE_COMPLEX] = PTL_LONG_DOUBLE_COMPLEX,
91 [OMPI_DATATYPE_MPI_2INT] = COLL_PORTALS4_NO_DTYPE,
92 [OMPI_DATATYPE_MPI_2INTEGER] = COLL_PORTALS4_NO_DTYPE,
93 [OMPI_DATATYPE_MPI_2REAL] = COLL_PORTALS4_NO_DTYPE,
94 [OMPI_DATATYPE_MPI_2DBLPREC] = COLL_PORTALS4_NO_DTYPE,
95 [OMPI_DATATYPE_MPI_2COMPLEX] = COLL_PORTALS4_NO_DTYPE,
96 [OMPI_DATATYPE_MPI_2DOUBLE_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
97
98 [OMPI_DATATYPE_MPI_FLOAT_INT] = COLL_PORTALS4_NO_DTYPE,
99
100 [OMPI_DATATYPE_MPI_DOUBLE_INT] = PTL_INT64_T,
101 [OMPI_DATATYPE_MPI_LONG_DOUBLE_INT] = COLL_PORTALS4_NO_DTYPE,
102 [OMPI_DATATYPE_MPI_LONG_INT] = PTL_INT32_T,
103 [OMPI_DATATYPE_MPI_SHORT_INT] = PTL_INT16_T,
104
105
106 [OMPI_DATATYPE_MPI_AINT] = COLL_PORTALS4_NO_DTYPE,
107 [OMPI_DATATYPE_MPI_OFFSET] = COLL_PORTALS4_NO_DTYPE,
108 [OMPI_DATATYPE_MPI_C_BOOL] = COLL_PORTALS4_NO_DTYPE,
109 [OMPI_DATATYPE_MPI_C_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
110 [OMPI_DATATYPE_MPI_C_FLOAT_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
111 [OMPI_DATATYPE_MPI_C_DOUBLE_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
112 [OMPI_DATATYPE_MPI_C_LONG_DOUBLE_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
113
114 [OMPI_DATATYPE_MPI_LB] = COLL_PORTALS4_NO_DTYPE,
115 [OMPI_DATATYPE_MPI_UB] = COLL_PORTALS4_NO_DTYPE,
116
117
118 [OMPI_DATATYPE_MPI_COUNT] = COLL_PORTALS4_NO_DTYPE,
119
120
121
122
123 [OMPI_DATATYPE_MPI_SHORT_FLOAT] = COLL_PORTALS4_NO_DTYPE,
124 [OMPI_DATATYPE_MPI_C_SHORT_FLOAT_COMPLEX] = COLL_PORTALS4_NO_DTYPE,
125
126 [OMPI_DATATYPE_MPI_UNAVAILABLE] = COLL_PORTALS4_NO_DTYPE,
127
128 };
129
130
131 #define PORTALS4_SAVE_PREV_COLL_API(__module, __comm, __api) \
132 do { \
133 __module->previous_ ## __api = __comm->c_coll->coll_ ## __api; \
134 __module->previous_ ## __api ## _module = __comm->c_coll->coll_ ## __api ## _module; \
135 if (!comm->c_coll->coll_ ## __api || !comm->c_coll->coll_ ## __api ## _module) { \
136 opal_output_verbose(1, ompi_coll_base_framework.framework_output, \
137 "(%d/%s): no underlying " # __api"; disqualifying myself", \
138 __comm->c_contextid, __comm->c_name); \
139 return OMPI_ERROR; \
140 } \
141 OBJ_RETAIN(__module->previous_ ## __api ## _module); \
142 } while(0)
143
144
145 const char *mca_coll_portals4_component_version_string =
146 "Open MPI Portals 4 collective MCA component version " OMPI_VERSION;
147
148 int mca_coll_portals4_priority = 10;
149
150 #define MCA_COLL_PORTALS4_EQ_SIZE 4096
151
152 static int portals4_open(void);
153 static int portals4_close(void);
154 static int portals4_register(void);
155 static int portals4_init_query(bool enable_progress_threads,
156 bool enable_mpi_threads);
157 static mca_coll_base_module_t* portals4_comm_query(struct ompi_communicator_t *comm,
158 int *priority);
159 static int portals4_module_enable(mca_coll_base_module_t *module,
160 struct ompi_communicator_t *comm);
161 static int portals4_progress(void);
162
163
164 mca_coll_portals4_component_t mca_coll_portals4_component = {
165 {
166
167
168
169 .collm_version = {
170 MCA_COLL_BASE_VERSION_2_0_0,
171
172
173 .mca_component_name = "portals4",
174 MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
175 OMPI_RELEASE_VERSION),
176
177
178 .mca_open_component = portals4_open,
179 .mca_close_component = portals4_close,
180 .mca_register_component_params = portals4_register
181 },
182 .collm_data = {
183
184 MCA_BASE_METADATA_PARAM_NONE
185 },
186
187
188 .collm_init_query = portals4_init_query,
189 .collm_comm_query = portals4_comm_query,
190 },
191 };
192
193 int
194 opal_stderr(const char *msg, const char *file,
195 const int line, const int ret)
196 {
197 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
198 "%s:%d: %s: %d\n", file, line, msg, ret);
199 return (OMPI_ERR_TEMP_OUT_OF_RESOURCE);
200 }
201
202 static int
203 portals4_register(void)
204 {
205 mca_coll_portals4_priority = 100;
206 (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "priority",
207 "Priority of the portals4 coll component",
208 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
209 OPAL_INFO_LVL_9,
210 MCA_BASE_VAR_SCOPE_READONLY,
211 &mca_coll_portals4_priority);
212
213 mca_coll_portals4_component.use_binomial_gather_algorithm = 0;
214 (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version, "use_binomial_gather_algorithm",
215 "if 1 use a binomial tree algorithm for gather, otherwise use linear",
216 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
217 OPAL_INFO_LVL_9,
218 MCA_BASE_VAR_SCOPE_READONLY,
219 &mca_coll_portals4_component.use_binomial_gather_algorithm);
220
221 mca_coll_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
222 (void) mca_base_component_var_register(&mca_coll_portals4_component.super.collm_version,
223 "max_msg_size",
224 "Max size supported by portals4 (above that, a message is cut into messages less than that size)",
225 MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
226 NULL, 0, 0,
227 OPAL_INFO_LVL_9,
228 MCA_BASE_VAR_SCOPE_READONLY,
229 &mca_coll_portals4_component.portals_max_msg_size);
230
231 return OMPI_SUCCESS;
232 }
233
234
235 static int
236 portals4_open(void)
237 {
238 int ret;
239
240 mca_coll_portals4_component.ni_h = PTL_INVALID_HANDLE;
241 mca_coll_portals4_component.uid = PTL_UID_ANY;
242 mca_coll_portals4_component.pt_idx = -1;
243 mca_coll_portals4_component.finish_pt_idx = -1;
244 mca_coll_portals4_component.eq_h = PTL_INVALID_HANDLE;
245 mca_coll_portals4_component.unex_me_h = PTL_INVALID_HANDLE;
246 mca_coll_portals4_component.finish_me_h = PTL_INVALID_HANDLE;
247 mca_coll_portals4_component.zero_md_h = PTL_INVALID_HANDLE;
248 mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE;
249
250 OBJ_CONSTRUCT(&mca_coll_portals4_component.requests, opal_free_list_t);
251 ret = opal_free_list_init(&mca_coll_portals4_component.requests,
252 sizeof(ompi_coll_portals4_request_t),
253 opal_cache_line_size,
254 OBJ_CLASS(ompi_coll_portals4_request_t),
255 0, 0, 8, 0, 8, NULL, 0, NULL, NULL, NULL);
256 if (OMPI_SUCCESS != ret) {
257 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
258 "%s:%d: ompi_free_list_init failed: %d\n",
259 __FILE__, __LINE__, ret);
260 return ret;
261 }
262
263 return OMPI_SUCCESS;
264 }
265
266
267 static int
268 portals4_close(void)
269 {
270 int ret;
271
272 OBJ_DESTRUCT(&mca_coll_portals4_component.requests);
273
274 if (!PtlHandleIsEqual(mca_coll_portals4_component.zero_md_h, PTL_INVALID_HANDLE)) {
275 ret = PtlMDRelease(mca_coll_portals4_component.zero_md_h);
276 if (PTL_OK != ret) {
277 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
278 "%s:%d: PtlMDRelease failed: %d\n",
279 __FILE__, __LINE__, ret);
280 }
281 }
282 mca_coll_portals4_component.zero_md_h = PTL_INVALID_HANDLE;
283
284 if (!PtlHandleIsEqual(mca_coll_portals4_component.data_md_h, PTL_INVALID_HANDLE)) {
285 ret = PtlMDRelease(mca_coll_portals4_component.data_md_h);
286 if (PTL_OK != ret) {
287 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
288 "%s:%d: PtlMDRelease failed: %d\n",
289 __FILE__, __LINE__, ret);
290 }
291 }
292 mca_coll_portals4_component.data_md_h = PTL_INVALID_HANDLE;
293
294 if (!PtlHandleIsEqual(mca_coll_portals4_component.finish_me_h, PTL_INVALID_HANDLE)) {
295 do {
296 ret = PtlMEUnlink(mca_coll_portals4_component.finish_me_h);
297 } while (PTL_IN_USE == ret);
298 if (PTL_OK != ret) {
299 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
300 "%s:%d: PtlMEUnlink failed: %d\n",
301 __FILE__, __LINE__, ret);
302 }
303 }
304 if (!PtlHandleIsEqual(mca_coll_portals4_component.unex_me_h, PTL_INVALID_HANDLE)) {
305 do {
306 ret = PtlMEUnlink(mca_coll_portals4_component.unex_me_h);
307 } while (PTL_IN_USE == ret);
308 if (PTL_OK != ret) {
309 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
310 "%s:%d: PtlMEUnlink failed: %d\n",
311 __FILE__, __LINE__, ret);
312 }
313 }
314 if (mca_coll_portals4_component.finish_pt_idx >= 0) {
315 ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.finish_pt_idx);
316 if (PTL_OK != ret) {
317 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
318 "%s:%d: PtlPTFree failed: %d\n",
319 __FILE__, __LINE__, ret);
320 }
321 }
322 if (mca_coll_portals4_component.pt_idx >= 0) {
323 ret = PtlPTFree(mca_coll_portals4_component.ni_h, mca_coll_portals4_component.pt_idx);
324 if (PTL_OK != ret) {
325 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
326 "%s:%d: PtlPTFree failed: %d\n",
327 __FILE__, __LINE__, ret);
328 }
329 }
330 if (!PtlHandleIsEqual(mca_coll_portals4_component.eq_h, PTL_INVALID_HANDLE)) {
331 ret = PtlEQFree(mca_coll_portals4_component.eq_h);
332 if (PTL_OK != ret) {
333 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
334 "%s:%d: PtlEQFree failed: %d\n",
335 __FILE__, __LINE__, ret);
336 }
337 }
338 if (!PtlHandleIsEqual(mca_coll_portals4_component.ni_h, PTL_INVALID_HANDLE)) {
339 ret = PtlNIFini(mca_coll_portals4_component.ni_h);
340 if (PTL_OK != ret) {
341 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
342 "%s:%d: PtlNIFini failed: %d\n",
343 __FILE__, __LINE__, ret);
344 }
345
346 PtlFini();
347 }
348
349 opal_progress_unregister(portals4_progress);
350
351 return OMPI_SUCCESS;
352 }
353
354
355
356
357
358
359
360
361
362
363
364 static int
365 portals4_init_query(bool enable_progress_threads,
366 bool enable_mpi_threads)
367 {
368 int ret;
369 ptl_md_t md;
370 ptl_me_t me;
371
372
373 ret = PtlInit();
374 if (PTL_OK != ret) {
375 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
376 "%s:%d: PtlInit failed: %d\n",
377 __FILE__, __LINE__, ret);
378 return OMPI_ERROR;
379 }
380
381 ret = PtlNIInit(PTL_IFACE_DEFAULT,
382 PTL_NI_PHYSICAL | PTL_NI_MATCHING,
383 PTL_PID_ANY,
384 NULL,
385 &mca_coll_portals4_component.ni_limits,
386 &mca_coll_portals4_component.ni_h);
387 if (PTL_OK != ret) {
388 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
389 "%s:%d: PtlNIInit failed: %d\n",
390 __FILE__, __LINE__, ret);
391 return OMPI_ERROR;
392 }
393 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
394 "ni_limits.max_atomic_size=%ld", mca_coll_portals4_component.ni_limits.max_atomic_size);
395
396 if (mca_coll_portals4_component.portals_max_msg_size < mca_coll_portals4_component.ni_limits.max_msg_size)
397 mca_coll_portals4_component.ni_limits.max_msg_size = mca_coll_portals4_component.portals_max_msg_size;
398 opal_output_verbose(10, ompi_coll_base_framework.framework_output,
399 "ni_limits.max_msg_size=%lu", mca_coll_portals4_component.ni_limits.max_msg_size);
400
401 ret = PtlGetId(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.id);
402 if (PTL_OK != ret) {
403 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
404 "%s:%d: PtlGetid failed: %d\n",
405 __FILE__, __LINE__, ret);
406 return OMPI_ERROR;
407 }
408
409 ret = PtlGetUid(mca_coll_portals4_component.ni_h, &mca_coll_portals4_component.uid);
410 if (PTL_OK != ret) {
411 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
412 "%s:%d: PtlGetUid failed: %d\n",
413 __FILE__, __LINE__, ret);
414 return OMPI_ERROR;
415 }
416
417 ret = PtlEQAlloc(mca_coll_portals4_component.ni_h,
418 MCA_COLL_PORTALS4_EQ_SIZE,
419 &mca_coll_portals4_component.eq_h);
420 if (PTL_OK != ret) {
421 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
422 "%s:%d: PtlEQAlloc failed: %d\n",
423 __FILE__, __LINE__, ret);
424 return OMPI_ERROR;
425 }
426
427 ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
428 0,
429 mca_coll_portals4_component.eq_h,
430 REQ_COLL_TABLE_ID,
431 &mca_coll_portals4_component.pt_idx);
432 if (PTL_OK != ret) {
433 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
434 "%s:%d: PtlPTAlloc failed: %d\n",
435 __FILE__, __LINE__, ret);
436 return OMPI_ERROR;
437 }
438
439 if (mca_coll_portals4_component.pt_idx != REQ_COLL_TABLE_ID) {
440 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
441 "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
442 __FILE__, __LINE__,
443 mca_coll_portals4_component.finish_pt_idx);
444 return OMPI_ERROR;
445 }
446
447 ret = PtlPTAlloc(mca_coll_portals4_component.ni_h,
448 0,
449 mca_coll_portals4_component.eq_h,
450 REQ_COLL_FINISH_TABLE_ID,
451 &mca_coll_portals4_component.finish_pt_idx);
452 if (PTL_OK != ret) {
453 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
454 "%s:%d: PtlPTAlloc failed: %d\n",
455 __FILE__, __LINE__, ret);
456 return OMPI_ERROR;
457 }
458
459 if (mca_coll_portals4_component.finish_pt_idx != REQ_COLL_FINISH_TABLE_ID) {
460 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
461 "%s:%d: PtlPTAlloc return wrong pt_idx: %d\n",
462 __FILE__, __LINE__,
463 mca_coll_portals4_component.finish_pt_idx);
464 return OMPI_ERROR;
465 }
466
467
468
469 memset(&md, 0, sizeof(ptl_md_t));
470 md.start = 0;
471 md.length = 0;
472 md.options = 0;
473 md.eq_handle = PTL_EQ_NONE;
474 md.ct_handle = PTL_CT_NONE;
475
476 ret = PtlMDBind(mca_coll_portals4_component.ni_h,
477 &md,
478 &mca_coll_portals4_component.zero_md_h);
479 if (PTL_OK != ret) {
480 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
481 "%s:%d: PtlMDBind failed: %d\n",
482 __FILE__, __LINE__, ret);
483 return OMPI_ERROR;
484 }
485
486 md.start = 0;
487 md.length = PTL_SIZE_MAX;
488 md.options = 0;
489 md.eq_handle = PTL_EQ_NONE;
490 md.ct_handle = PTL_CT_NONE;
491
492 ret = PtlMDBind(mca_coll_portals4_component.ni_h,
493 &md,
494 &mca_coll_portals4_component.data_md_h);
495 if (PTL_OK != ret) {
496 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
497 "%s:%d: PtlMDBind failed: %d\n",
498 __FILE__, __LINE__, ret);
499 return OMPI_ERROR;
500 }
501 OPAL_OUTPUT_VERBOSE((90, ompi_coll_base_framework.framework_output, "PtlMDBind start=%p length=%lx\n", md.start, md.length));
502
503
504 me.start = NULL;
505 me.length = 0;
506 me.ct_handle = PTL_CT_NONE;
507 me.min_free = 0;
508 me.uid = mca_coll_portals4_component.uid;
509 me.options = PTL_ME_OP_PUT |
510 PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
511 me.match_id.phys.nid = PTL_NID_ANY;
512 me.match_id.phys.pid = PTL_PID_ANY;
513 me.match_bits = 0;
514 me.ignore_bits = 0;
515
516 ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
517 mca_coll_portals4_component.finish_pt_idx,
518 &me,
519 PTL_PRIORITY_LIST,
520 NULL,
521 &mca_coll_portals4_component.finish_me_h);
522 if (PTL_OK != ret) {
523 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
524 "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
525 __FILE__, __LINE__, ret);
526 return OMPI_ERROR;
527 }
528
529
530 me.start = NULL;
531 me.length = 0;
532 me.ct_handle = PTL_CT_NONE;
533 me.min_free = 0;
534 me.uid = mca_coll_portals4_component.uid;
535 me.options = PTL_ME_OP_PUT |
536 PTL_ME_EVENT_SUCCESS_DISABLE | PTL_ME_EVENT_OVER_DISABLE |
537 PTL_ME_EVENT_LINK_DISABLE | PTL_ME_EVENT_UNLINK_DISABLE;
538 me.match_id.phys.nid = PTL_NID_ANY;
539 me.match_id.phys.pid = PTL_PID_ANY;
540
541
542
543
544
545 COLL_PORTALS4_SET_BITS(me.match_bits, 0, 0, 1, 0, 0, 0);
546 me.ignore_bits = ~COLL_PORTALS4_RTR_MASK;
547
548 ret = PtlMEAppend(mca_coll_portals4_component.ni_h,
549 mca_coll_portals4_component.pt_idx,
550 &me,
551 PTL_OVERFLOW_LIST,
552 NULL,
553 &mca_coll_portals4_component.unex_me_h);
554 if (PTL_OK != ret) {
555 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
556 "%s:%d: PtlMEAppend of barrier unexpected failed: %d\n",
557 __FILE__, __LINE__, ret);
558 return OMPI_ERROR;
559 }
560
561
562 ret = opal_progress_register(portals4_progress);
563 if (OMPI_SUCCESS != ret) {
564 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
565 "%s:%d: opal_progress_register failed: %d\n",
566 __FILE__, __LINE__, ret);
567 return OMPI_ERROR;
568
569 }
570 return OMPI_SUCCESS;
571
572 }
573
574
575
576
577
578
579 mca_coll_base_module_t *
580 portals4_comm_query(struct ompi_communicator_t *comm,
581 int *priority)
582 {
583 mca_coll_portals4_module_t *portals4_module;
584 ptl_process_t *proc;
585
586
587
588
589 if (OMPI_COMM_IS_INTER(comm) || ompi_comm_size(comm) < 2) {
590 return NULL;
591 }
592
593
594
595 proc = ompi_proc_local()->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_PORTALS4];
596 if (NULL == proc) {
597 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
598 "%s:%d: Proc table not previously populated",
599 __FILE__, __LINE__);
600 return NULL;
601 }
602
603 opal_output_verbose(50, ompi_coll_base_framework.framework_output,
604 "%s:%d: My nid,pid = (%x,%x)\n",
605 __FILE__, __LINE__, proc->phys.nid, proc->phys.pid);
606
607
608 if (0 == proc->phys.pid) {
609 opal_output_verbose(1, ompi_coll_base_framework.framework_output,
610 "%s:%d: proc->phys.pid==0, so mtl-portals4 is using logical addressing which coll-portals4 doesn't support. Disqualifying myself.",
611 __FILE__, __LINE__);
612 return NULL;
613 }
614
615 portals4_module = OBJ_NEW(mca_coll_portals4_module_t);
616 if (NULL == portals4_module) return NULL;
617
618 *priority = mca_coll_portals4_priority;
619 portals4_module->coll_count = 0;
620 portals4_module->super.coll_module_enable = portals4_module_enable;
621 portals4_module->super.ft_event = NULL;
622
623 portals4_module->super.coll_barrier = ompi_coll_portals4_barrier_intra;
624 portals4_module->super.coll_ibarrier = ompi_coll_portals4_ibarrier_intra;
625
626 portals4_module->super.coll_gather = ompi_coll_portals4_gather_intra;
627 portals4_module->super.coll_igather = ompi_coll_portals4_igather_intra;
628
629 portals4_module->super.coll_scatter = ompi_coll_portals4_scatter_intra;
630 portals4_module->super.coll_iscatter = ompi_coll_portals4_iscatter_intra;
631
632 portals4_module->cached_in_order_bmtree=NULL;
633 portals4_module->cached_in_order_bmtree_root=-1;
634
635 portals4_module->super.coll_bcast = ompi_coll_portals4_bcast_intra;
636 portals4_module->super.coll_ibcast = ompi_coll_portals4_ibcast_intra;
637
638 portals4_module->super.coll_allreduce = ompi_coll_portals4_allreduce_intra;
639 portals4_module->super.coll_iallreduce = ompi_coll_portals4_iallreduce_intra;
640
641 portals4_module->super.coll_reduce = ompi_coll_portals4_reduce_intra;
642 portals4_module->super.coll_ireduce = ompi_coll_portals4_ireduce_intra;
643
644 return &(portals4_module->super);
645 }
646
647
648
649
650
651 static int
652 portals4_module_enable(mca_coll_base_module_t *module,
653 struct ompi_communicator_t *comm)
654 {
655 mca_coll_portals4_module_t *portals4_module = (mca_coll_portals4_module_t*) module;
656
657 PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, allreduce);
658 PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, iallreduce);
659 PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, reduce);
660 PORTALS4_SAVE_PREV_COLL_API(portals4_module, comm, ireduce);
661
662 return OMPI_SUCCESS;
663 }
664
665
666 static char *failtype[] = {
667 "PTL_NI_OK",
668 "PTL_NI_PERM_VIOLATION",
669 "PTL_NI_SEGV",
670 "PTL_NI_PT_DISABLED",
671 "PTL_NI_DROPPED",
672 "PTL_NI_UNDELIVERABLE",
673 "PTL_FAIL",
674 "PTL_ARG_INVALID",
675 "PTL_IN_USE",
676 "PTL_ME_NO_MATCH",
677 "PTL_NI_TARGET_INVALID",
678 "PTL_NI_OP_VIOLATION"
679 };
680
681 static char *evname[] = {
682 "PTL_EVENT_GET",
683 "PTL_EVENT_GET_OVERFLOW",
684 "PTL_EVENT_PUT",
685 "PTL_EVENT_PUT_OVERFLOW",
686 "PTL_EVENT_ATOMIC",
687 "PTL_EVENT_ATOMIC_OVERFLOW",
688 "PTL_EVENT_FETCH_ATOMIC",
689 "PTL_EVENT_FETCH_ATOMIC_OVERFLOW",
690 "PTL_EVENT_REPLY",
691 "PTL_EVENT_SEND",
692 "PTL_EVENT_ACK",
693 "PTL_EVENT_PT_DISABLED",
694 "PTL_EVENT_AUTO_UNLINK",
695 "PTL_EVENT_AUTO_FREE",
696 "PTL_EVENT_SEARCH",
697 "PTL_EVENT_LINK"
698 };
699
700
701 static int
702 portals4_progress(void)
703 {
704 int count = 0, ret;
705 ptl_event_t ev;
706 ompi_coll_portals4_request_t *ptl_request;
707
708 while (true) {
709 ret = PtlEQGet(mca_coll_portals4_component.eq_h, &ev);
710 if (PTL_OK == ret) {
711
712 OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "event type=%s\n", evname[ev.type]));
713 count++;
714
715 switch (ev.type) {
716 case PTL_EVENT_PUT:
717
718 if (PTL_OK == ev.ni_fail_type) {
719 OPAL_OUTPUT_VERBOSE((50, ompi_coll_base_framework.framework_output,
720 "hdr_data %p, matchbits 0x%lx",
721 (void*) ev.hdr_data, ev.match_bits));
722 assert(0 != ev.hdr_data);
723 ptl_request = (ompi_coll_portals4_request_t*) ev.hdr_data;
724 assert(NULL != ptl_request);
725
726 switch (ptl_request->type) {
727 case OMPI_COLL_PORTALS4_TYPE_BARRIER:
728 ompi_coll_portals4_ibarrier_intra_fini(ptl_request);
729 break;
730 case OMPI_COLL_PORTALS4_TYPE_BCAST:
731 ompi_coll_portals4_ibcast_intra_fini(ptl_request);
732 break;
733 case OMPI_COLL_PORTALS4_TYPE_REDUCE:
734 ompi_coll_portals4_ireduce_intra_fini(ptl_request);
735 break;
736 case OMPI_COLL_PORTALS4_TYPE_ALLREDUCE:
737 ompi_coll_portals4_iallreduce_intra_fini(ptl_request);
738 break;
739 case OMPI_COLL_PORTALS4_TYPE_SCATTER:
740 ompi_coll_portals4_iscatter_intra_fini(ptl_request);
741 break;
742 case OMPI_COLL_PORTALS4_TYPE_GATHER:
743 ompi_coll_portals4_igather_intra_fini(ptl_request);
744 break;
745 }
746 }
747
748 if (PTL_OK != ev.ni_fail_type) {
749 OPAL_OUTPUT_VERBOSE((10, ompi_coll_base_framework.framework_output, "ni_fail_type=%s\n", failtype[ev.ni_fail_type]));
750 }
751 break;
752 default:
753 opal_output(ompi_coll_base_framework.framework_output,
754 "Unexpected event of type %d", ev.type);
755 break;
756 }
757
758 }
759 else if (PTL_EQ_EMPTY == ret) {
760 break;
761 }
762 else if (PTL_EQ_DROPPED == ret) {
763 opal_output(ompi_coll_base_framework.framework_output, "Flow control situation without recovery (EQ_DROPPED)\n");
764 abort();
765 }
766 else {
767 opal_output(ompi_coll_base_framework.framework_output, "Error returned from PtlEQGet: %d", ret);
768 break;
769 }
770 }
771 return count;
772 }
773
774 OBJ_CLASS_INSTANCE(mca_coll_portals4_module_t,
775 mca_coll_base_module_t,
776 NULL, NULL);