This source file includes following definitions.
- mca_btl_portals4_component_register
- mca_btl_portals4_component_open
- mca_btl_portals4_component_close
- mca_btl_portals4_component_init
- mca_btl_portals4_get_error
- mca_btl_portals4_component_progress
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "opal_config.h"
26
27 #include "opal/mca/event/event.h"
28 #include "opal/util/output.h"
29 #include "opal/mca/pmix/pmix.h"
30 #include "opal/util/show_help.h"
31 #include "opal/mca/btl/btl.h"
32 #include "opal/mca/btl/base/base.h"
33 #include "opal/mca/mpool/base/base.h"
34
35 #include "portals4.h"
36 #include "btl_portals4.h"
37 #include "btl_portals4_frag.h"
38 #include "btl_portals4_recv.h"
39
40 static int mca_btl_portals4_component_register(void);
41 static int mca_btl_portals4_component_open(void);
42 static int mca_btl_portals4_component_close(void);
43 static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
44 bool enable_progress_threads,
45 bool enable_mpi_threads);
46 int mca_btl_portals4_component_progress(void);
47
48 OPAL_MODULE_DECLSPEC extern mca_btl_portals4_component_t mca_btl_portals4_component;
49
50 mca_btl_portals4_component_t mca_btl_portals4_component = {
51 {
52
53
54 .btl_version = {
55 MCA_BTL_DEFAULT_VERSION("portals4"),
56 .mca_open_component = mca_btl_portals4_component_open,
57 .mca_close_component = mca_btl_portals4_component_close,
58 .mca_register_component_params = mca_btl_portals4_component_register,
59 },
60 .btl_data = {
61
62 .param_field = MCA_BASE_METADATA_PARAM_NONE
63 },
64
65 .btl_init = mca_btl_portals4_component_init,
66 .btl_progress = mca_btl_portals4_component_progress,
67 }
68 };
69
70 static int
71 mca_btl_portals4_component_register(void)
72 {
73 mca_btl_portals4_component.use_logical = 0;
74 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
75 "use_logical",
76 "Use the logical to physical table to accelerate portals4 adressing: 1 (true) : 0 (false)",
77 MCA_BASE_VAR_TYPE_INT,
78 NULL,
79 0,
80 0,
81 OPAL_INFO_LVL_5,
82 MCA_BASE_VAR_SCOPE_READONLY,
83 &mca_btl_portals4_component.use_logical);
84
85 mca_btl_portals4_component.max_btls = 1;
86 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
87 "max_btls",
88 "Maximum number of accepted Portals4 cards",
89 MCA_BASE_VAR_TYPE_UNSIGNED_INT,
90 NULL,
91 0,
92 0,
93 OPAL_INFO_LVL_5,
94 MCA_BASE_VAR_SCOPE_READONLY,
95 &mca_btl_portals4_component.max_btls);
96
97 mca_btl_portals4_component.portals_free_list_init_num = 16;
98 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
99 "free_list_init_num",
100 "Initial number of elements to initialize in free lists",
101 MCA_BASE_VAR_TYPE_INT,
102 NULL,
103 0,
104 0,
105 OPAL_INFO_LVL_5,
106 MCA_BASE_VAR_SCOPE_READONLY,
107 &(mca_btl_portals4_component.portals_free_list_init_num));
108
109 mca_btl_portals4_component.portals_free_list_max_num = 1024;
110 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
111 "free_list_max_num",
112 "Max number of elements to initialize in free lists",
113 MCA_BASE_VAR_TYPE_INT,
114 NULL,
115 0,
116 0,
117 OPAL_INFO_LVL_5,
118 MCA_BASE_VAR_SCOPE_READONLY,
119 &(mca_btl_portals4_component.portals_free_list_max_num));
120
121 mca_btl_portals4_component.portals_free_list_inc_num = 16;
122 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
123 "free_list_inc_num",
124 "Increment count for free lists",
125 MCA_BASE_VAR_TYPE_INT,
126 NULL,
127 0,
128 0,
129 OPAL_INFO_LVL_5,
130 MCA_BASE_VAR_SCOPE_READONLY,
131 &(mca_btl_portals4_component.portals_free_list_inc_num));
132
133 mca_btl_portals4_component.portals_free_list_eager_max_num = 32;
134 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
135 "eager_frag_limit",
136 "Maximum number of pre-pinned eager fragments",
137 MCA_BASE_VAR_TYPE_INT,
138 NULL,
139 0,
140 0,
141 OPAL_INFO_LVL_5,
142 MCA_BASE_VAR_SCOPE_READONLY,
143 &(mca_btl_portals4_component.portals_free_list_eager_max_num));
144
145 mca_btl_portals4_component.portals_need_ack = 1;
146 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
147 "needs_ack",
148 "Require a portals level ACK",
149 MCA_BASE_VAR_TYPE_INT,
150 NULL,
151 0,
152 0,
153 OPAL_INFO_LVL_5,
154 MCA_BASE_VAR_SCOPE_READONLY,
155 &(mca_btl_portals4_component.portals_need_ack));
156
157 mca_btl_portals4_component.recv_queue_size = 4 * 1024;
158 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
159 "eq_recv_size",
160 "Size of the receive event queue",
161 MCA_BASE_VAR_TYPE_INT,
162 NULL,
163 0,
164 0,
165 OPAL_INFO_LVL_5,
166 MCA_BASE_VAR_SCOPE_READONLY,
167 &(mca_btl_portals4_component.recv_queue_size));
168
169 mca_btl_portals4_component.portals_max_outstanding_ops = 8 * 1024;
170 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
171 "max_pending_ops",
172 "Maximum number of pending send/rdma frags",
173 MCA_BASE_VAR_TYPE_INT,
174 NULL,
175 0,
176 0,
177 OPAL_INFO_LVL_5,
178 MCA_BASE_VAR_SCOPE_READONLY,
179 &(mca_btl_portals4_component.portals_max_outstanding_ops));
180
181 mca_btl_portals4_component.portals_recv_mds_num = 8;
182 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
183 "recv_md_num",
184 "Number of send frag receive descriptors",
185 MCA_BASE_VAR_TYPE_INT,
186 NULL,
187 0,
188 0,
189 OPAL_INFO_LVL_5,
190 MCA_BASE_VAR_SCOPE_READONLY,
191 &(mca_btl_portals4_component.portals_recv_mds_num));
192
193 mca_btl_portals4_component.portals_recv_mds_size = 256 * 1024;
194 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
195 "recv_md_size",
196 "Size of send frag receive descriptors",
197 MCA_BASE_VAR_TYPE_INT,
198 NULL,
199 0,
200 0,
201 OPAL_INFO_LVL_5,
202 MCA_BASE_VAR_SCOPE_READONLY,
203 &(mca_btl_portals4_component.portals_recv_mds_size));
204
205 mca_btl_portals4_component.portals_max_msg_size = PTL_SIZE_MAX;
206 (void) mca_base_component_var_register(&mca_btl_portals4_component.super.btl_version,
207 "max_msg_size",
208 "Max size supported by portals4 (above that, a message is cut into messages less than that size)",
209 MCA_BASE_VAR_TYPE_UNSIGNED_LONG,
210 NULL,
211 0,
212 0,
213 OPAL_INFO_LVL_5,
214 MCA_BASE_VAR_SCOPE_READONLY,
215 &(mca_btl_portals4_component.portals_max_msg_size));
216 return OPAL_SUCCESS;
217 }
218
219 static int
220 mca_btl_portals4_component_open(void)
221 {
222 OPAL_OUTPUT_VERBOSE((1, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_open\n"));
223
224
225
226
227 mca_btl_portals4_module.super.btl_exclusivity = MCA_BTL_EXCLUSIVITY_LOW + 100;
228 mca_btl_portals4_module.super.btl_eager_limit = 32 * 1024;
229 mca_btl_portals4_module.super.btl_rndv_eager_limit = 32 * 1024;
230 mca_btl_portals4_module.super.btl_max_send_size = 64 * 1024;
231 if (mca_btl_portals4_module.super.btl_max_send_size > mca_btl_portals4_component.portals_max_msg_size)
232 mca_btl_portals4_module.super.btl_max_send_size = mca_btl_portals4_component.portals_max_msg_size;
233 mca_btl_portals4_module.super.btl_rdma_pipeline_send_length = 64 * 1024;
234 mca_btl_portals4_module.super.btl_rdma_pipeline_frag_size = INT_MAX;
235 mca_btl_portals4_module.super.btl_min_rdma_pipeline_size = 0;
236 mca_btl_portals4_module.super.btl_flags =
237 MCA_BTL_FLAGS_RDMA |
238 MCA_BTL_FLAGS_RDMA_MATCHED |
239 MCA_BTL_FLAGS_SEND;
240
241 mca_btl_portals4_module.super.btl_registration_handle_size = sizeof (mca_btl_base_registration_handle_t);
242
243 mca_btl_portals4_module.super.btl_get_limit = SIZE_MAX;
244 if (mca_btl_portals4_module.super.btl_get_limit > mca_btl_portals4_component.portals_max_msg_size)
245 mca_btl_portals4_module.super.btl_get_limit = mca_btl_portals4_component.portals_max_msg_size;
246 mca_btl_portals4_module.super.btl_put_limit = 0;
247 mca_btl_portals4_module.super.btl_get_alignment = 0;
248 mca_btl_portals4_module.super.btl_put_alignment = 0;
249
250 mca_btl_portals4_module.super.btl_get_local_registration_threshold = 0;
251 mca_btl_portals4_module.super.btl_put_local_registration_threshold = 0;
252
253 mca_btl_portals4_module.super.btl_bandwidth = 1000;
254 mca_btl_portals4_module.super.btl_latency = 0;
255
256 mca_btl_base_param_register(&mca_btl_portals4_component.super.btl_version, &mca_btl_portals4_module.super);
257
258 mca_btl_portals4_module.portals_num_procs = 0;
259
260 mca_btl_portals4_module.recv_eq_h = PTL_EQ_NONE;
261
262 mca_btl_portals4_module.send_md_h = PTL_INVALID_HANDLE;
263
264 mca_btl_portals4_module.portals_ni_h = PTL_INVALID_HANDLE;
265 mca_btl_portals4_module.zero_md_h = PTL_INVALID_HANDLE;
266
267 mca_btl_portals4_module.long_overflow_me_h = PTL_INVALID_HANDLE;
268 mca_btl_portals4_module.portals_outstanding_ops = 0;
269 mca_btl_portals4_module.recv_idx = (ptl_pt_index_t) ~0UL;
270
271 if (1 == mca_btl_portals4_component.use_logical) {
272
273
274
275
276
277 mca_btl_portals4_module.super.btl_flags |= MCA_BTL_FLAGS_SINGLE_ADD_PROCS;
278 }
279
280 return OPAL_SUCCESS;
281 }
282
283
284 static int
285 mca_btl_portals4_component_close(void)
286 {
287 opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_close\n");
288
289
290
291 opal_output_close(opal_btl_base_framework.framework_output);
292 opal_btl_base_framework.framework_output = -1;
293
294 if (NULL != mca_btl_portals4_component.btls) free(mca_btl_portals4_component.btls);
295 if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
296 mca_btl_portals4_component.btls = NULL;
297 mca_btl_portals4_component.eqs_h = NULL;
298
299 PtlFini();
300
301 return OPAL_SUCCESS;
302 }
303
304 static mca_btl_base_module_t** mca_btl_portals4_component_init(int *num_btls,
305 bool enable_progress_threads,
306 bool enable_mpi_threads)
307 {
308 mca_btl_portals4_module_t *portals4_btl = NULL;
309 mca_btl_base_module_t **btls = NULL;
310 unsigned int ret, interface;
311 ptl_handle_ni_t *portals4_nis_h = NULL;
312 ptl_ni_limits_t portals4_ni_limits ;
313 ptl_process_t *ptl_process_ids = NULL;
314
315 opal_output_verbose(50, opal_btl_base_framework.framework_output, "mca_btl_portals4_component_init\n");
316
317 if (enable_mpi_threads && !mca_btl_base_thread_multiple_override) {
318 opal_output_verbose(1, opal_btl_base_framework.framework_output,
319 "btl portals4 disabled because threads enabled");
320 return NULL;
321 }
322
323
324 ret = PtlInit();
325 if (PTL_OK != ret) {
326 opal_output_verbose(1, opal_btl_base_framework.framework_output,
327 "%s:%d: PtlInit failed: %d\n",
328 __FILE__, __LINE__, ret);
329 goto error;
330 }
331 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlInit OK\n"));
332
333
334
335
336 *num_btls = 0;
337 portals4_nis_h = malloc(mca_btl_portals4_component.max_btls * sizeof(ptl_handle_ni_t));
338 for (interface=0; interface<mca_btl_portals4_component.max_btls; interface++) {
339
340 if (mca_btl_portals4_component.use_logical)
341 ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
342 PTL_NI_LOGICAL | PTL_NI_MATCHING,
343 PTL_PID_ANY,
344 NULL,
345 &portals4_ni_limits,
346 &portals4_nis_h[*num_btls]
347 );
348 else ret = PtlNIInit((1 == mca_btl_portals4_component.max_btls) ? PTL_IFACE_DEFAULT : interface,
349 PTL_NI_PHYSICAL | PTL_NI_MATCHING,
350 PTL_PID_ANY,
351 NULL,
352 &portals4_ni_limits,
353 &portals4_nis_h[*num_btls]
354 );
355 if (PTL_OK != ret) {
356 opal_output_verbose(90, opal_btl_base_framework.framework_output,
357 "%s:%d: PtlNIInit failed for NI %d: %d\n", __FILE__, __LINE__, interface, ret);
358 }
359 else {
360 if (mca_btl_portals4_component.portals_max_msg_size > portals4_ni_limits.max_msg_size)
361 mca_btl_portals4_component.portals_max_msg_size = portals4_ni_limits.max_msg_size;
362 if (mca_btl_portals4_module.super.btl_max_send_size > portals4_ni_limits.max_msg_size)
363 mca_btl_portals4_module.super.btl_max_send_size = portals4_ni_limits.max_msg_size;
364 if (mca_btl_portals4_module.super.btl_get_limit > portals4_ni_limits.max_msg_size)
365 mca_btl_portals4_module.super.btl_get_limit = portals4_ni_limits.max_msg_size;
366 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlNIInit OK for NI %d max_msg_size=%ld",
367 *num_btls, mca_btl_portals4_component.portals_max_msg_size));
368
369 (*num_btls)++;
370 }
371 }
372 if (0 == *num_btls) goto error;
373
374
375
376
377 mca_btl_portals4_component.num_btls = *num_btls;
378 mca_btl_portals4_component.btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) );
379 mca_btl_portals4_component.eqs_h = malloc(mca_btl_portals4_component.num_btls * sizeof(ptl_handle_eq_t));
380 ptl_process_ids = malloc(mca_btl_portals4_component.num_btls * sizeof(ptl_process_t) );
381
382 for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
383 mca_btl_portals4_component.btls[interface] = NULL;
384 mca_btl_portals4_component.eqs_h[interface] = PTL_EQ_NONE;
385 }
386 for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
387 portals4_btl = malloc(sizeof(mca_btl_portals4_module_t));
388 mca_btl_portals4_component.btls[interface] = portals4_btl;
389
390
391 memcpy(portals4_btl, &mca_btl_portals4_module, sizeof(mca_btl_portals4_module_t));
392
393 portals4_btl->interface_num = interface;
394 portals4_btl->portals_ni_h = portals4_nis_h[interface];
395 portals4_btl->portals_max_outstanding_ops = mca_btl_portals4_component.portals_max_outstanding_ops;
396
397 OBJ_CONSTRUCT(&(portals4_btl->portals_frag_eager), opal_free_list_t);
398 OBJ_CONSTRUCT(&(portals4_btl->portals_frag_max), opal_free_list_t);
399 OBJ_CONSTRUCT(&(portals4_btl->portals_frag_user), opal_free_list_t);
400
401
402 opal_free_list_init (&(portals4_btl->portals_frag_eager),
403 sizeof(mca_btl_portals4_frag_eager_t) +
404 portals4_btl->super.btl_eager_limit,
405 opal_cache_line_size,
406 OBJ_CLASS(mca_btl_portals4_frag_eager_t),
407 0,opal_cache_line_size,
408 mca_btl_portals4_component.portals_free_list_init_num,
409 mca_btl_portals4_component.portals_free_list_eager_max_num,
410 mca_btl_portals4_component.portals_free_list_inc_num,
411 NULL, 0, NULL, NULL, NULL);
412
413
414 opal_free_list_init (&(portals4_btl->portals_frag_max),
415 sizeof(mca_btl_portals4_frag_max_t) +
416 portals4_btl->super.btl_max_send_size,
417 opal_cache_line_size,
418 OBJ_CLASS(mca_btl_portals4_frag_max_t),
419 0,opal_cache_line_size,
420 mca_btl_portals4_component.portals_free_list_init_num,
421 mca_btl_portals4_component.portals_free_list_max_num,
422 mca_btl_portals4_component.portals_free_list_inc_num,
423 NULL, 0, NULL, NULL, NULL);
424
425
426 opal_free_list_init (&(portals4_btl->portals_frag_user),
427 sizeof(mca_btl_portals4_frag_user_t),
428 opal_cache_line_size,
429 OBJ_CLASS(mca_btl_portals4_frag_user_t),
430 0,opal_cache_line_size,
431 mca_btl_portals4_component.portals_free_list_init_num,
432 mca_btl_portals4_component.portals_free_list_max_num,
433 mca_btl_portals4_component.portals_free_list_inc_num,
434 NULL, 0, NULL, NULL, NULL);
435
436
437 OBJ_CONSTRUCT(&(portals4_btl->portals_recv_blocks), opal_list_t);
438 }
439 free(portals4_nis_h);
440 portals4_nis_h = NULL;
441
442
443 for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
444 portals4_btl = mca_btl_portals4_component.btls[interface];
445
446 ret = PtlGetPhysId(portals4_btl->portals_ni_h ,&ptl_process_ids[interface]);
447 if (PTL_OK != ret) {
448 opal_output_verbose(1, opal_btl_base_framework.framework_output,
449 "%s:%d: PtlGetPhysId for NI %d failed: %d\n",
450 __FILE__, __LINE__, interface, ret);
451 goto error;
452 }
453
454 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
455 "PtlGetPhysId NI number %d: ni_h=%d rank=%x nid=%x pid=%x\n",
456 interface, portals4_btl->portals_ni_h,
457 ptl_process_ids[interface].rank,
458 ptl_process_ids[interface].phys.nid, ptl_process_ids[interface].phys.pid));
459 }
460 OPAL_MODEX_SEND(ret, OPAL_PMIX_GLOBAL,
461 &mca_btl_portals4_component.super.btl_version,
462 ptl_process_ids, mca_btl_portals4_component.num_btls * sizeof(ptl_process_t));
463 if (OPAL_SUCCESS != ret) {
464 opal_output_verbose(1, opal_btl_base_framework.framework_output,
465 "%s:%d: opal_modex_send failed: %d\n",
466 __FILE__, __LINE__, ret);
467 goto error;
468 }
469 free(ptl_process_ids);
470 ptl_process_ids = NULL;
471
472 btls = malloc(mca_btl_portals4_component.num_btls * sizeof(mca_btl_portals4_module_t*) );
473 memcpy(btls , mca_btl_portals4_component.btls,
474 mca_btl_portals4_component.num_btls*sizeof(mca_btl_portals4_module_t*) );
475
476 opal_output_verbose(1, opal_btl_base_framework.framework_output, "The btl portals4 component has been initialized and uses %d NI(s)",
477 mca_btl_portals4_component.num_btls);
478
479 mca_btl_portals4_component.need_init = 1;
480
481 return btls;
482
483 error:
484 opal_output_verbose(1, opal_btl_base_framework.framework_output, "Error in mca_btl_portals4_component_init\n");
485
486 if (*num_btls) {
487 if (NULL != portals4_nis_h) free(portals4_nis_h);
488 if (NULL != ptl_process_ids) free(ptl_process_ids);
489
490 for (interface=0; interface<mca_btl_portals4_component.num_btls; interface++) {
491 portals4_btl = mca_btl_portals4_component.btls[interface];
492 if (NULL != portals4_btl) mca_btl_portals4_free_module(portals4_btl);
493 }
494 mca_btl_portals4_component.num_btls = 0;
495 *num_btls = 0;
496 if (NULL != mca_btl_portals4_component.btls) free(mca_btl_portals4_component.btls);
497 if (NULL != mca_btl_portals4_component.eqs_h) free(mca_btl_portals4_component.eqs_h);
498 mca_btl_portals4_component.btls = NULL;
499 mca_btl_portals4_component.eqs_h = NULL;
500
501 }
502 return NULL;
503 }
504
505 int
506 mca_btl_portals4_get_error(int ptl_error)
507 {
508 int ret;
509
510 switch (ptl_error) {
511 case PTL_OK:
512 ret = OPAL_SUCCESS;
513 break;
514 case PTL_ARG_INVALID:
515 ret = OPAL_ERR_BAD_PARAM;
516 break;
517 case PTL_CT_NONE_REACHED:
518 ret = OPAL_ERR_TIMEOUT;
519 break;
520 case PTL_EQ_DROPPED:
521 ret = OPAL_ERR_OUT_OF_RESOURCE;
522 break;
523 case PTL_EQ_EMPTY:
524 ret = OPAL_ERR_TEMP_OUT_OF_RESOURCE;
525 break;
526 case PTL_FAIL:
527 ret = OPAL_ERROR;
528 break;
529 case PTL_IN_USE:
530 ret = OPAL_ERR_RESOURCE_BUSY;
531 break;
532 case PTL_INTERRUPTED:
533 ret = OPAL_ERR_RESOURCE_BUSY;
534 break;
535 case PTL_LIST_TOO_LONG:
536 ret = OPAL_ERR_OUT_OF_RESOURCE;
537 break;
538 case PTL_NO_INIT:
539 ret = OPAL_ERR_FATAL;
540 break;
541 case PTL_NO_SPACE:
542 ret = OPAL_ERR_OUT_OF_RESOURCE;
543 break;
544 case PTL_PID_IN_USE:
545 ret = OPAL_ERR_BAD_PARAM;
546 break;
547 case PTL_PT_FULL:
548 ret = OPAL_ERR_OUT_OF_RESOURCE;
549 break;
550 case PTL_PT_EQ_NEEDED:
551 ret = OPAL_ERR_FATAL;
552 break;
553 case PTL_PT_IN_USE:
554 ret = OPAL_ERR_RESOURCE_BUSY;
555 break;
556
557 default:
558 ret = OPAL_ERROR;
559 }
560
561 return ret;
562 }
563
564 int
565 mca_btl_portals4_component_progress(void)
566 {
567 mca_btl_portals4_module_t *portals4_btl;
568 int num_progressed = 0;
569 int ret, btl_ownership;
570 mca_btl_portals4_frag_t *frag = NULL;
571 mca_btl_base_tag_t tag;
572 static ptl_event_t ev;
573 unsigned int which;
574 mca_btl_active_message_callback_t* reg;
575 mca_btl_base_segment_t seg[2];
576 mca_btl_base_descriptor_t btl_base_descriptor;
577
578 while (true) {
579 ret = PtlEQPoll(mca_btl_portals4_component.eqs_h, mca_btl_portals4_component.num_btls, 0, &ev, &which);
580
581 if (PTL_OK == ret) {
582 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output, "PtlEQPoll Event received: %d (fail=%d) on NI %d\n",
583 ev.type, ev.ni_fail_type, which));
584 num_progressed++;
585 portals4_btl = mca_btl_portals4_component.btls[which];
586
587 switch (ev.type) {
588
589 case PTL_EVENT_SEND:
590
591 frag = ev.user_ptr;
592 if (NULL == frag) {
593 opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_SEND event with NULL user_ptr");
594 break;
595 }
596 btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
597
598 if (!mca_btl_portals4_component.portals_need_ack) {
599
600 if( MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ){
601 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
602 "PTL_EVENT_SEND: Direct call to des_cbfunc: %lx\n", (uint64_t)frag->base.des_cbfunc));
603 frag->base.des_cbfunc(&portals4_btl->super,
604 frag->endpoint,
605 &frag->base,
606 OPAL_SUCCESS);
607 }
608 if (btl_ownership) {
609 mca_btl_portals4_free(&portals4_btl->super, &frag->base);
610 }
611 if (0 != frag->size) {
612 OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
613 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
614 "PTL_EVENT_SEND: Decrementing portals_outstanding_ops=%d (1)\n",
615 portals4_btl->portals_outstanding_ops));
616 }
617 }
618
619 goto done;
620 break;
621
622 case PTL_EVENT_ACK:
623
624 frag = ev.user_ptr;
625 if (NULL == frag) {
626 opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_ACK event with NULL user_ptr");
627 break;
628 }
629 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
630 "PTL_EVENT_ACK received rlength=%ld mlength=%ld des_flags=%d\n", ev.rlength, ev.mlength, frag->base.des_flags));
631 btl_ownership = (frag->base.des_flags & MCA_BTL_DES_FLAGS_BTL_OWNERSHIP);
632
633
634
635
636 if (MCA_BTL_DES_SEND_ALWAYS_CALLBACK & frag->base.des_flags ) {
637 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
638 "PTL_EVENT_ACK: Call to des_cbfunc %lx\n", (uint64_t)frag->base.des_cbfunc));
639 frag->base.des_cbfunc(&portals4_btl->super,
640 frag->endpoint,
641 &frag->base,
642 OPAL_SUCCESS);
643 }
644 if (btl_ownership) {
645 mca_btl_portals4_free(&portals4_btl->super, &frag->base);
646 }
647
648 if (0 != frag->size) {
649 OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
650 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
651 "PTL_EVENT_ACK: Decrementing portals_outstanding_ops=%d (2)\n", portals4_btl->portals_outstanding_ops));
652 }
653
654 goto done;
655 break;
656
657 case PTL_EVENT_PUT:
658
659 tag = (unsigned char) (ev.hdr_data);
660
661 btl_base_descriptor.des_segments = seg;
662 btl_base_descriptor.des_segment_count = 1;
663 seg[0].seg_addr.pval = ev.start;
664 seg[0].seg_len = ev.mlength;
665
666 reg = mca_btl_base_active_message_trigger + tag;
667 OPAL_OUTPUT_VERBOSE((50, opal_btl_base_framework.framework_output,
668 "PTL_EVENT_PUT: tag=%x base_descriptor=%p cbfunc: %lx\n", tag, (void*)&btl_base_descriptor, (uint64_t)reg->cbfunc));
669 reg->cbfunc(&portals4_btl->super, tag, &btl_base_descriptor, reg->cbdata);
670
671 goto done;
672 break;
673
674 case PTL_EVENT_PUT_OVERFLOW:
675
676 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
677 "PTL_EVENT_OVERFLOW received\n"));
678 goto done;
679 break;
680
681 case PTL_EVENT_LINK:
682
683 frag = ev.user_ptr;
684 if (NULL == frag) {
685 opal_output(opal_btl_base_framework.framework_output, "btl/portals4: PTL_EVENT_LINK event with NULL user_ptr");
686 break;
687 }
688 goto done;
689 break;
690
691 case PTL_EVENT_AUTO_UNLINK:
692
693
694 mca_btl_portals4_activate_block(ev.user_ptr);
695 goto done;
696 break;
697
698 case PTL_EVENT_AUTO_FREE:
699
700 goto done;
701 break;
702
703 case PTL_EVENT_GET:
704
705 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
706 "PTL_EVENT_GET received at target rlength=%ld mlength=%ld\n", ev.rlength, ev.mlength));
707 goto done;
708 break;
709
710 case PTL_EVENT_REPLY:
711
712 frag = ev.user_ptr;
713
714 if (PTL_NI_PERM_VIOLATION == ev.ni_fail_type) {
715 opal_output_verbose(1, opal_btl_base_framework.framework_output,
716 "Warning : PTL_EVENT_REPLY with PTL_NI_PERM_VIOLATION received, try to re-issue a PtlGet");
717
718
719
720 ret = PtlGet(portals4_btl->send_md_h,
721 (ptl_size_t) frag->addr,
722 frag->length,
723 frag->peer_proc,
724 portals4_btl->recv_idx,
725 frag->match_bits,
726 0,
727 frag);
728 if (OPAL_UNLIKELY(PTL_OK != ret)) {
729 opal_output_verbose(1, opal_btl_base_framework.framework_output,
730 "%s:%d: Re-issued PtlGet failed: %d",
731 __FILE__, __LINE__, ret);
732 return OPAL_ERROR;
733 }
734
735 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
736 "Re-issued PtlGet length=%ld recv_idx=%d rank=%x pid=%x nid=%x match_bits=%lx\n",
737 frag->length, portals4_btl->recv_idx,
738 frag->peer_proc.rank, frag->peer_proc.phys.pid, frag->peer_proc.phys.nid, frag->match_bits));
739 }
740 else {
741 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
742 "PTL_EVENT_REPLY: Call to rdma_cbfunc=%lx\n", (uint64_t)frag->rdma_cb.func));
743 frag->rdma_cb.func(&portals4_btl->super,
744 frag->endpoint,
745 ev.start,
746 frag->rdma_cb.local_handle,
747 frag->rdma_cb.context,
748 frag->rdma_cb.data,
749 OPAL_SUCCESS);
750
751 OPAL_BTL_PORTALS4_FRAG_RETURN_USER(&portals4_btl->super, frag);
752 OPAL_THREAD_ADD_FETCH32(&portals4_btl->portals_outstanding_ops, -1);
753 OPAL_OUTPUT_VERBOSE((90, opal_btl_base_framework.framework_output,
754 "PTL_EVENT_REPLY: Decrementing portals_outstanding_ops=%d\n", portals4_btl->portals_outstanding_ops));
755 goto done;
756 }
757 break;
758
759 default:
760
761 goto done;
762 break;
763 }
764 } else if (PTL_EQ_EMPTY == ret) {
765
766
767
768
769 goto done;
770 break;
771
772 } else if (PTL_EQ_DROPPED == ret) {
773 opal_output(opal_btl_base_framework.framework_output,
774 "Flow control situation without recovery (EQ_DROPPED)");
775 break;
776 } else {
777 opal_output(opal_btl_base_framework.framework_output,
778 "Error returned from PtlEQPoll: %d", ret);
779 break;
780 }
781 }
782 done:
783 return num_progressed;
784 }