This source file includes following definitions.
- ompi_osc_portals4_get_op
- get_sized_type
- ompi_osc_portals4_get_dt
- number_of_fragments
- segmentedPut
- segmentedGet
- segmentedAtomic
- segmentedFetchAtomic
- segmentedSwap
- create_iov_list
- get_to_iovec
- atomic_get_to_iovec
- put_from_iovec
- atomic_put_from_iovec
- atomic_from_iovec
- swap_to_iovec
- fetch_atomic_to_iovec
- put_to_noncontig
- atomic_put_to_noncontig
- atomic_to_noncontig
- get_from_noncontig
- atomic_get_from_noncontig
- swap_from_noncontig
- fetch_atomic_from_noncontig
- ompi_osc_portals4_rput
- ompi_osc_portals4_rget
- ompi_osc_portals4_raccumulate
- ompi_osc_portals4_rget_accumulate
- ompi_osc_portals4_put
- ompi_osc_portals4_get
- ompi_osc_portals4_accumulate
- ompi_osc_portals4_get_accumulate
- ompi_osc_portals4_compare_and_swap
- ompi_osc_portals4_fetch_and_op
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #include "ompi_config.h"
16
17 #include "ompi/mca/osc/osc.h"
18 #include "ompi/mca/osc/base/base.h"
19 #include "ompi/mca/osc/base/osc_base_obj_convert.h"
20
21 #include "osc_portals4.h"
22 #include "osc_portals4_request.h"
23
24
25 static int
26 ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op)
27 {
28 if (MPI_MAX == op) {
29 *ptl_op = PTL_MAX;
30 } else if (MPI_MIN == op) {
31 *ptl_op = PTL_MIN;
32 } else if (MPI_SUM == op) {
33 *ptl_op = PTL_SUM;
34 } else if (MPI_PROD == op) {
35 *ptl_op = PTL_PROD;
36 } else if (MPI_LAND == op) {
37 *ptl_op = PTL_LAND;
38 } else if (MPI_BAND == op) {
39 *ptl_op = PTL_BAND;
40 } else if (MPI_LOR == op) {
41 *ptl_op = PTL_LOR;
42 } else if (MPI_BOR == op) {
43 *ptl_op = PTL_BOR;
44 } else if (MPI_LXOR == op) {
45 *ptl_op = PTL_LXOR;
46 } else if (MPI_BXOR == op) {
47 *ptl_op = PTL_BXOR;
48 } else {
49 return OMPI_ERROR;
50 }
51
52 return OMPI_SUCCESS;
53 }
54
55
56 static int
57 get_sized_type(bool sign, size_t size, ptl_datatype_t *ptl_dt)
58 {
59 if (sign) {
60 switch (size) {
61 case 1:
62 *ptl_dt = PTL_INT8_T;
63 break;
64 case 2:
65 *ptl_dt = PTL_INT16_T;
66 break;
67 case 4:
68 *ptl_dt = PTL_INT32_T;
69 break;
70 case 8:
71 *ptl_dt = PTL_INT64_T;
72 break;
73 default:
74 return OMPI_ERROR;
75 }
76 } else {
77 switch (size) {
78 case 1:
79 *ptl_dt = PTL_UINT8_T;
80 break;
81 case 2:
82 *ptl_dt = PTL_UINT16_T;
83 break;
84 case 4:
85 *ptl_dt = PTL_UINT32_T;
86 break;
87 case 8:
88 *ptl_dt = PTL_UINT64_T;
89 break;
90 default:
91 return OMPI_ERROR;
92 }
93 }
94
95 return OMPI_SUCCESS;
96 }
97
98
99 static int
100 ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
101 {
102 ompi_datatype_t *base_dt = ompi_datatype_get_single_predefined_type_from_args(dt);
103
104 if (MPI_BYTE == base_dt) {
105 *ptl_dt = PTL_INT8_T;
106 } else if (MPI_CHAR == base_dt) {
107 *ptl_dt = PTL_INT8_T;
108 } else if (MPI_SHORT == base_dt) {
109 return get_sized_type(true, sizeof(short), ptl_dt);
110 } else if (MPI_INT == base_dt) {
111 return get_sized_type(true, sizeof(int), ptl_dt);
112 } else if (MPI_LONG == base_dt) {
113 return get_sized_type(true, sizeof(long), ptl_dt);
114 } else if (MPI_FLOAT == base_dt) {
115 *ptl_dt = PTL_FLOAT;
116 } else if (MPI_DOUBLE == base_dt) {
117 *ptl_dt = PTL_DOUBLE;
118 } else if (MPI_LONG_DOUBLE == base_dt) {
119 *ptl_dt = PTL_LONG_DOUBLE;
120 } else if (MPI_UNSIGNED_CHAR == base_dt) {
121 *ptl_dt = PTL_UINT8_T;
122 } else if (MPI_SIGNED_CHAR == base_dt) {
123 *ptl_dt = PTL_UINT8_T;
124 } else if (MPI_UNSIGNED_SHORT == base_dt) {
125 return get_sized_type(false, sizeof(short), ptl_dt);
126 } else if (MPI_UNSIGNED_LONG == base_dt) {
127 return get_sized_type(false, sizeof(long), ptl_dt);
128 } else if (MPI_UNSIGNED == base_dt) {
129 return get_sized_type(false, sizeof(int), ptl_dt);
130 } else if (MPI_LONG_LONG_INT == base_dt) {
131 return get_sized_type(true, sizeof(long long int), ptl_dt);
132 } else if (MPI_LONG_LONG == base_dt) {
133 return get_sized_type(true, sizeof(long long), ptl_dt);
134 } else if (MPI_INT8_T == base_dt) {
135 *ptl_dt = PTL_INT8_T;
136 } else if (MPI_UINT8_T == base_dt) {
137 *ptl_dt = PTL_UINT8_T;
138 } else if (MPI_INT16_T == base_dt) {
139 *ptl_dt = PTL_INT16_T;
140 } else if (MPI_UINT16_T == base_dt) {
141 *ptl_dt = PTL_UINT16_T;
142 } else if (MPI_INT32_T == base_dt) {
143 *ptl_dt = PTL_INT32_T;
144 } else if (MPI_UINT32_T == base_dt) {
145 *ptl_dt = PTL_UINT32_T;
146 } else if (MPI_INT64_T == base_dt) {
147 *ptl_dt = PTL_INT64_T;
148 } else if (MPI_UINT64_T == base_dt) {
149 *ptl_dt = PTL_UINT64_T;
150 } else if (MPI_C_COMPLEX == base_dt) {
151 *ptl_dt = PTL_DOUBLE_COMPLEX;
152 } else if (MPI_C_FLOAT_COMPLEX == base_dt) {
153 *ptl_dt = PTL_FLOAT_COMPLEX;
154 } else if (MPI_C_DOUBLE_COMPLEX == base_dt) {
155 *ptl_dt = PTL_DOUBLE_COMPLEX;
156 } else if (MPI_C_LONG_DOUBLE_COMPLEX == base_dt) {
157 *ptl_dt = PTL_LONG_DOUBLE_COMPLEX;
158 } else if (MPI_AINT == base_dt) {
159 if (sizeof(MPI_Aint) == 2) {
160 *ptl_dt = PTL_UINT16_T;
161 } else if (sizeof(MPI_Aint) == 4) {
162 *ptl_dt = PTL_UINT32_T;
163 } else if (sizeof(MPI_Aint) == 8) {
164 *ptl_dt = PTL_UINT64_T;
165 }
166 } else {
167 return OMPI_ERROR;
168 }
169
170 return 0;
171 }
172
173 static ptl_size_t
174 number_of_fragments(ptl_size_t length, ptl_size_t maxlength)
175 {
176 ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1;
177 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
178 "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag));
179 return nb_frag;
180 }
181
182
183 static int
184 segmentedPut(opal_atomic_int64_t *opcount,
185 ptl_handle_md_t md_h,
186 ptl_size_t origin_offset,
187 ptl_size_t put_length,
188 ptl_size_t segment_length,
189 ptl_ack_req_t ack_req,
190 ptl_process_t target_id,
191 ptl_pt_index_t pt_index,
192 ptl_match_bits_t match_bits,
193 ptl_size_t target_offset,
194 void *user_ptr,
195 ptl_hdr_data_t hdr_data)
196 {
197 int ret;
198 ptl_size_t bytes_put = 0;
199
200 do {
201 opal_atomic_add_fetch_64(opcount, 1);
202
203 ptl_size_t frag_length = MIN(put_length, segment_length);
204 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
205 "Put size : %lu/%lu, offset:%lu", frag_length, put_length, bytes_put));
206 ret = PtlPut(md_h,
207 origin_offset + bytes_put,
208 frag_length,
209 ack_req,
210 target_id,
211 pt_index,
212 match_bits,
213 target_offset + bytes_put,
214 user_ptr,
215 hdr_data);
216 if (PTL_OK != ret) {
217 opal_atomic_add_fetch_64(opcount, -1);
218 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
219 "%s:%d PtlPut failed with return value %d",
220 __FUNCTION__, __LINE__, ret);
221 return ret;
222 }
223 put_length -= frag_length;
224 bytes_put += frag_length;
225 } while (put_length);
226 return PTL_OK;
227 }
228
229
230 static int
231 segmentedGet(opal_atomic_int64_t *opcount,
232 ptl_handle_md_t md_h,
233 ptl_size_t origin_offset,
234 ptl_size_t get_length,
235 ptl_size_t segment_length,
236 ptl_process_t target_id,
237 ptl_pt_index_t pt_index,
238 ptl_match_bits_t match_bits,
239 ptl_size_t target_offset,
240 void *user_ptr)
241 {
242 int ret;
243 ptl_size_t bytes_gotten = 0;
244
245 do {
246 opal_atomic_add_fetch_64(opcount, 1);
247
248 ptl_size_t frag_length = MIN(get_length, segment_length);
249 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
250 "Get size : %lu/%lu, offset:%lu", frag_length, get_length, bytes_gotten));
251
252 ret = PtlGet(md_h,
253 (ptl_size_t) origin_offset + bytes_gotten,
254 frag_length,
255 target_id,
256 pt_index,
257 match_bits,
258 target_offset + bytes_gotten,
259 user_ptr);
260 if (PTL_OK != ret) {
261 opal_atomic_add_fetch_64(opcount, -1);
262 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
263 "%s:%d PtlGet failed with return value %d",
264 __FUNCTION__, __LINE__, ret);
265 return ret;
266 }
267 get_length -= frag_length;
268 bytes_gotten += frag_length;
269 } while (get_length);
270 return PTL_OK;
271 }
272
273
274 static int
275 segmentedAtomic(opal_atomic_int64_t *opcount,
276 ptl_handle_md_t md_h,
277 ptl_size_t origin_offset,
278 ptl_size_t length,
279 ptl_size_t segment_length,
280 ptl_process_t target_id,
281 ptl_pt_index_t pt_index,
282 ptl_match_bits_t match_bits,
283 ptl_size_t target_offset,
284 void *user_ptr,
285 ptl_op_t ptl_op,
286 ptl_datatype_t ptl_dt)
287 {
288 int ret;
289 ptl_size_t sent = 0;
290
291 do {
292 opal_atomic_add_fetch_64(opcount, 1);
293
294 ptl_size_t frag_length = MIN(length, segment_length);
295 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
296 "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
297 ret = PtlAtomic(md_h,
298 (ptl_size_t) origin_offset + sent,
299 frag_length,
300 PTL_ACK_REQ,
301 target_id,
302 pt_index,
303 match_bits,
304 target_offset + sent,
305 user_ptr,
306 0,
307 ptl_op,
308 ptl_dt);
309 if (PTL_OK != ret) {
310 opal_atomic_add_fetch_64(opcount, -1);
311 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
312 "%s:%d PtlAtomic failed with return value %d",
313 __FUNCTION__, __LINE__, ret);
314 return ret;
315 }
316 length -= frag_length;
317 sent += frag_length;
318 } while (length);
319 return PTL_OK;
320 }
321
322
323 static int
324 segmentedFetchAtomic(opal_atomic_int64_t *opcount,
325 ptl_handle_md_t result_md_h,
326 ptl_size_t result_offset,
327 ptl_handle_md_t origin_md_h,
328 ptl_size_t origin_offset,
329 ptl_size_t length,
330 ptl_size_t segment_length,
331 ptl_process_t target_id,
332 ptl_pt_index_t pt_index,
333 ptl_match_bits_t match_bits,
334 ptl_size_t target_offset,
335 void *user_ptr,
336 ptl_op_t ptl_op,
337 ptl_datatype_t ptl_dt)
338 {
339 int ret;
340 ptl_size_t sent = 0;
341
342 do {
343 opal_atomic_add_fetch_64(opcount, 1);
344
345 ptl_size_t frag_length = MIN(length, segment_length);
346 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
347 "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
348 ret = PtlFetchAtomic(result_md_h,
349 result_offset + sent,
350 origin_md_h,
351 origin_offset + sent,
352 frag_length,
353 target_id,
354 pt_index,
355 match_bits,
356 target_offset + sent,
357 user_ptr,
358 0,
359 ptl_op,
360 ptl_dt);
361 if (PTL_OK != ret) {
362 opal_atomic_add_fetch_64(opcount, -1);
363 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
364 "%s:%d PtlFetchAtomic failed with return value %d",
365 __FUNCTION__, __LINE__, ret);
366 return ret;
367 }
368 length -= frag_length;
369 sent += frag_length;
370 } while (length);
371 return PTL_OK;
372 }
373
374
375 static int
376 segmentedSwap(opal_atomic_int64_t *opcount,
377 ptl_handle_md_t result_md_h,
378 ptl_size_t result_offset,
379 ptl_handle_md_t origin_md_h,
380 ptl_size_t origin_offset,
381 ptl_size_t length,
382 ptl_size_t segment_length,
383 ptl_process_t target_id,
384 ptl_pt_index_t pt_index,
385 ptl_match_bits_t match_bits,
386 ptl_size_t target_offset,
387 void *user_ptr,
388 ptl_datatype_t ptl_dt)
389 {
390 int ret;
391 ptl_size_t sent = 0;
392
393 do {
394 opal_atomic_add_fetch_64(opcount, 1);
395
396 ptl_size_t frag_length = MIN(length, segment_length);
397 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
398 "Swap size : %lu/%lu, offset:%lu", frag_length, length, sent));
399 ret = PtlSwap(result_md_h,
400 result_offset + sent,
401 origin_md_h,
402 (ptl_size_t) origin_offset + sent,
403 frag_length,
404 target_id,
405 pt_index,
406 match_bits,
407 target_offset + sent,
408 user_ptr,
409 0,
410 NULL,
411 PTL_SWAP,
412 ptl_dt);
413 if (PTL_OK != ret) {
414 opal_atomic_add_fetch_64(opcount, -1);
415 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
416 "%s:%d PtlSwap failed with return value %d",
417 __FUNCTION__, __LINE__, ret);
418 return ret;
419 }
420 length -= frag_length;
421 sent += frag_length;
422 } while (length);
423 return PTL_OK;
424 }
425
426 static int
427 create_iov_list(const void *address,
428 int count,
429 ompi_datatype_t *datatype,
430 ptl_iovec_t **ptl_iovec,
431 ptl_size_t *ptl_iovec_count)
432 {
433 struct iovec iov[OSC_PORTALS4_IOVEC_MAX];
434 opal_convertor_t convertor;
435 uint32_t iov_count;
436 uint32_t iov_index, ptl_iovec_index;
437
438 size_t size;
439 int ret;
440 bool done;
441
442 OBJ_CONSTRUCT(&convertor, opal_convertor_t);
443 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &datatype->super, count,
444 address, 0, &convertor);
445 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
446 return ret;
447 }
448
449
450 *ptl_iovec_count = 0;
451 ptl_iovec_index = 0;
452 do {
453
454 iov_count = OSC_PORTALS4_IOVEC_MAX;
455 iov_index = 0;
456
457
458 done = opal_convertor_raw (&convertor, iov, &iov_count, &size);
459
460 *ptl_iovec_count += iov_count;
461 *ptl_iovec = (ptl_iovec_t *)realloc(*ptl_iovec, *ptl_iovec_count * sizeof(ptl_iovec_t));
462
463 while (iov_index != iov_count) {
464 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
465 "adding iov[%d].[%p,%lu] to ptl_iovec", iov_index, iov[iov_index].iov_base, iov[iov_index].iov_len));
466 (*ptl_iovec)[ptl_iovec_index].iov_base = iov[iov_index].iov_base;
467 (*ptl_iovec)[ptl_iovec_index].iov_len = iov[iov_index].iov_len;
468
469 ptl_iovec_index++;
470 iov_index++;
471 }
472
473 assert(*ptl_iovec_count == ptl_iovec_index);
474 } while (!done);
475
476 return OMPI_SUCCESS;
477
478 }
479
480
481 static int
482 get_to_iovec(ompi_osc_portals4_module_t *module,
483 const void *origin_address,
484 int origin_count,
485 ompi_datatype_t *origin_datatype,
486 ptl_process_t peer,
487 int target_count,
488 ompi_datatype_t *target_datatype,
489 size_t offset,
490 ptl_pt_index_t pt_index,
491 ptl_match_bits_t match_bits,
492 void *user_ptr)
493 {
494 int ret;
495 size_t size;
496 ptrdiff_t length, origin_lb, target_lb, extent;
497 ptl_md_t md;
498
499 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
500 PtlMDRelease(module->origin_iovec_md_h);
501 free(module->origin_iovec_list);
502 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
503 module->origin_iovec_list = NULL;
504 }
505
506 ptl_size_t iovec_count=0;
507 create_iov_list(
508 origin_address,
509 origin_count,
510 origin_datatype,
511 &module->origin_iovec_list,
512 &iovec_count);
513
514 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
515 if (OMPI_SUCCESS != ret) {
516 return ret;
517 }
518 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
519 if (OMPI_SUCCESS != ret) {
520 return ret;
521 }
522 ompi_datatype_type_size(origin_datatype, &size);
523 length = size * origin_count;
524
525 md.start = module->origin_iovec_list;
526 md.length = iovec_count;
527 if (user_ptr) {
528 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
529 } else {
530 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
531 }
532 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
533 md.ct_handle = module->ct_h;
534 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
535 if (PTL_OK != ret) {
536 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
537 "%s:%d: PtlMDBind(iovec) failed: %d\n",
538 __FILE__, __LINE__, ret);
539 return ret;
540 }
541
542 opal_atomic_add_fetch_64(&module->opcount, 1);
543
544 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
545 "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
546 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
547 ret = PtlGet(module->origin_iovec_md_h,
548 (ptl_size_t) origin_lb,
549 length,
550 peer,
551 module->pt_idx,
552 module->match_bits,
553 offset + target_lb,
554 user_ptr);
555 if (PTL_OK != ret) {
556 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
557 "%s,%d PtlGet() failed: ret = %d",
558 __FUNCTION__, __LINE__, ret));
559 opal_atomic_add_fetch_64(&module->opcount, -1);
560 return ret;
561 }
562
563 return OMPI_SUCCESS;
564 }
565
566
567
568 static int
569 atomic_get_to_iovec(ompi_osc_portals4_module_t *module,
570 const void *origin_address,
571 int origin_count,
572 ompi_datatype_t *origin_datatype,
573 ptl_process_t peer,
574 int target_count,
575 ompi_datatype_t *target_datatype,
576 size_t offset,
577 ptl_pt_index_t pt_index,
578 ptl_match_bits_t match_bits,
579 void *user_ptr)
580 {
581 int ret;
582 size_t size;
583 ptrdiff_t length, origin_lb, target_lb, extent;
584 ptl_md_t md;
585
586 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
587 PtlMDRelease(module->origin_iovec_md_h);
588 free(module->origin_iovec_list);
589 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
590 module->origin_iovec_list = NULL;
591 }
592
593 ptl_size_t iovec_count=0;
594 create_iov_list(
595 origin_address,
596 origin_count,
597 origin_datatype,
598 &module->origin_iovec_list,
599 &iovec_count);
600
601 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
602 if (OMPI_SUCCESS != ret) {
603 return ret;
604 }
605 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
606 if (OMPI_SUCCESS != ret) {
607 return ret;
608 }
609 ompi_datatype_type_size(origin_datatype, &size);
610 length = size * origin_count;
611
612 md.start = module->origin_iovec_list;
613 md.length = iovec_count;
614 if (user_ptr) {
615 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
616 } else {
617 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
618 }
619 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
620 md.ct_handle = module->ct_h;
621 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
622 if (PTL_OK != ret) {
623 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
624 "%s:%d: PtlMDBind(iovec) failed: %d\n",
625 __FILE__, __LINE__, ret);
626 return ret;
627 }
628
629 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
630 "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
631 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
632 ret = segmentedGet(&module->opcount,
633 module->origin_iovec_md_h,
634 (ptl_size_t) origin_lb,
635 length,
636 module->fetch_atomic_max,
637 peer,
638 module->pt_idx,
639 module->match_bits,
640 offset + target_lb,
641 user_ptr);
642 if (PTL_OK != ret) {
643 return ret;
644 }
645
646 return OMPI_SUCCESS;
647 }
648
649
650 static int
651 put_from_iovec(ompi_osc_portals4_module_t *module,
652 const void *origin_address,
653 int origin_count,
654 ompi_datatype_t *origin_datatype,
655 ptl_process_t peer,
656 int target_count,
657 ompi_datatype_t *target_datatype,
658 size_t offset,
659 ptl_pt_index_t pt_index,
660 ptl_match_bits_t match_bits,
661 void *user_ptr)
662 {
663 int ret;
664 size_t size;
665 ptrdiff_t length, origin_lb, target_lb, extent;
666 ptl_md_t md;
667
668 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
669 PtlMDRelease(module->origin_iovec_md_h);
670 free(module->origin_iovec_list);
671 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
672 module->origin_iovec_list = NULL;
673 }
674
675 ptl_size_t iovec_count=0;
676 create_iov_list(
677 origin_address,
678 origin_count,
679 origin_datatype,
680 &module->origin_iovec_list,
681 &iovec_count);
682
683 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
684 if (OMPI_SUCCESS != ret) {
685 return ret;
686 }
687 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
688 if (OMPI_SUCCESS != ret) {
689 return ret;
690 }
691 ompi_datatype_type_size(origin_datatype, &size);
692 length = size * origin_count;
693
694 md.start = module->origin_iovec_list;
695 md.length = iovec_count;
696 if (user_ptr) {
697 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
698 } else {
699 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
700 }
701 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
702 md.ct_handle = module->ct_h;
703 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
704 if (PTL_OK != ret) {
705 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
706 "%s:%d: PtlMDBind(iovec) failed: %d\n",
707 __FILE__, __LINE__, ret);
708 return ret;
709 }
710
711 opal_atomic_add_fetch_64(&module->opcount, 1);
712
713 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
714 "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
715 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
716 ret = PtlPut(module->origin_iovec_md_h,
717 (ptl_size_t) origin_lb,
718 length,
719 PTL_ACK_REQ,
720 peer,
721 module->pt_idx,
722 module->match_bits,
723 offset + target_lb,
724 user_ptr,
725 0);
726 if (PTL_OK != ret) {
727 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
728 "%s,%d PtlPut() failed: ret = %d",
729 __FUNCTION__, __LINE__, ret));
730 opal_atomic_add_fetch_64(&module->opcount, -1);
731 return ret;
732 }
733
734 return OMPI_SUCCESS;
735 }
736
737
738
739 static int
740 atomic_put_from_iovec(ompi_osc_portals4_module_t *module,
741 const void *origin_address,
742 int origin_count,
743 ompi_datatype_t *origin_datatype,
744 ptl_process_t peer,
745 int target_count,
746 ompi_datatype_t *target_datatype,
747 size_t offset,
748 ptl_pt_index_t pt_index,
749 ptl_match_bits_t match_bits,
750 void *user_ptr)
751 {
752 int ret;
753 size_t size;
754 ptrdiff_t length, origin_lb, target_lb, extent;
755 ptl_md_t md;
756
757 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
758 PtlMDRelease(module->origin_iovec_md_h);
759 free(module->origin_iovec_list);
760 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
761 module->origin_iovec_list = NULL;
762 }
763
764 ptl_size_t iovec_count=0;
765 create_iov_list(
766 origin_address,
767 origin_count,
768 origin_datatype,
769 &module->origin_iovec_list,
770 &iovec_count);
771
772 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
773 if (OMPI_SUCCESS != ret) {
774 return ret;
775 }
776 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
777 if (OMPI_SUCCESS != ret) {
778 return ret;
779 }
780 ompi_datatype_type_size(origin_datatype, &size);
781 length = size * origin_count;
782
783 md.start = module->origin_iovec_list;
784 md.length = iovec_count;
785 if (user_ptr) {
786 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
787 } else {
788 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
789 }
790 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
791 md.ct_handle = module->ct_h;
792 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
793 if (PTL_OK != ret) {
794 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
795 "%s:%d: PtlMDBind(iovec) failed: %d\n",
796 __FILE__, __LINE__, ret);
797 return ret;
798 }
799
800 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
801 "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
802 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
803 ret = segmentedPut(&module->opcount,
804 module->origin_iovec_md_h,
805 (ptl_size_t) origin_lb,
806 length,
807 module->atomic_max,
808 PTL_ACK_REQ,
809 peer,
810 module->pt_idx,
811 module->match_bits,
812 offset + target_lb,
813 NULL,
814 0);
815 if (OMPI_SUCCESS != ret) {
816 return ret;
817 }
818
819 return OMPI_SUCCESS;
820 }
821
822
823 static int
824 atomic_from_iovec(ompi_osc_portals4_module_t *module,
825 const void *origin_address,
826 int origin_count,
827 ompi_datatype_t *origin_datatype,
828 ptl_process_t peer,
829 int target_count,
830 ompi_datatype_t *target_datatype,
831 size_t offset,
832 ptl_pt_index_t pt_index,
833 ptl_match_bits_t match_bits,
834 struct ompi_op_t *op,
835 void *user_ptr)
836 {
837 int ret;
838 size_t size;
839 ptrdiff_t length, origin_lb, target_lb, extent;
840 ptl_md_t md;
841 ptl_op_t ptl_op;
842 ptl_datatype_t ptl_dt;
843
844 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
845 PtlMDRelease(module->origin_iovec_md_h);
846 free(module->origin_iovec_list);
847 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
848 module->origin_iovec_list = NULL;
849 }
850
851 ptl_size_t iovec_count=0;
852 create_iov_list(
853 origin_address,
854 origin_count,
855 origin_datatype,
856 &module->origin_iovec_list,
857 &iovec_count);
858
859 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
860 if (OMPI_SUCCESS != ret) {
861 opal_output(ompi_osc_base_framework.framework_output,
862 "datatype is not currently supported");
863 return OMPI_ERR_NOT_SUPPORTED;
864 }
865 ret = ompi_osc_portals4_get_op(op, &ptl_op);
866 if (OMPI_SUCCESS != ret) {
867 opal_output(ompi_osc_base_framework.framework_output,
868 "operation is not currently supported");
869 return OMPI_ERR_NOT_SUPPORTED;
870 }
871
872 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
873 if (OMPI_SUCCESS != ret) {
874 return ret;
875 }
876 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
877 if (OMPI_SUCCESS != ret) {
878 return ret;
879 }
880 ompi_datatype_type_size(origin_datatype, &size);
881 length = size * origin_count;
882
883 md.start = module->origin_iovec_list;
884 md.length = iovec_count;
885 if (user_ptr) {
886 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
887 } else {
888 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
889 }
890 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
891 md.ct_handle = module->ct_h;
892 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
893 if (PTL_OK != ret) {
894 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
895 "%s:%d: PtlMDBind(iovec) failed: %d\n",
896 __FILE__, __LINE__, ret);
897 return ret;
898 }
899
900 ret = segmentedAtomic(&module->opcount,
901 module->origin_iovec_md_h,
902 (ptl_size_t) origin_lb,
903 length,
904 module->atomic_max,
905 peer,
906 module->pt_idx,
907 module->match_bits,
908 offset + target_lb,
909 user_ptr,
910 ptl_op,
911 ptl_dt);
912 if (OMPI_SUCCESS != ret) {
913 return ret;
914 }
915
916 return OMPI_SUCCESS;
917 }
918
919
920 static int
921 swap_to_iovec(ompi_osc_portals4_module_t *module,
922 const void *result_address,
923 int result_count,
924 ompi_datatype_t *result_datatype,
925 const void *origin_address,
926 int origin_count,
927 ompi_datatype_t *origin_datatype,
928 ptl_process_t peer,
929 int target_count,
930 ompi_datatype_t *target_datatype,
931 size_t offset,
932 ptl_pt_index_t pt_index,
933 ptl_match_bits_t match_bits,
934 void *user_ptr)
935 {
936 int ret;
937 size_t size;
938 ptl_size_t iovec_count=0;
939 ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
940 ptl_md_t md;
941 ptl_datatype_t ptl_dt;
942
943 if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
944 PtlMDRelease(module->result_iovec_md_h);
945 free(module->result_iovec_list);
946 module->result_iovec_md_h = PTL_INVALID_HANDLE;
947 module->result_iovec_list = NULL;
948 }
949
950 create_iov_list(
951 result_address,
952 result_count,
953 result_datatype,
954 &module->result_iovec_list,
955 &iovec_count);
956
957 md.start = module->result_iovec_list;
958 md.length = iovec_count;
959 if (user_ptr) {
960 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
961 } else {
962 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
963 }
964 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
965 md.ct_handle = module->ct_h;
966 ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
967 if (PTL_OK != ret) {
968 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
969 "%s:%d: PtlMDBind(iovec) failed: %d\n",
970 __FILE__, __LINE__, ret);
971 return ret;
972 }
973
974 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
975 PtlMDRelease(module->origin_iovec_md_h);
976 free(module->origin_iovec_list);
977 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
978 module->origin_iovec_list = NULL;
979 }
980
981 create_iov_list(
982 origin_address,
983 origin_count,
984 origin_datatype,
985 &module->origin_iovec_list,
986 &iovec_count);
987
988 md.start = module->origin_iovec_list;
989 md.length = iovec_count;
990 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
991 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
992 md.ct_handle = module->ct_h;
993 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
994 if (PTL_OK != ret) {
995 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
996 "%s:%d: PtlMDBind(iovec) failed: %d\n",
997 __FILE__, __LINE__, ret);
998 return ret;
999 }
1000
1001 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1002 if (OMPI_SUCCESS != ret) {
1003 opal_output(ompi_osc_base_framework.framework_output,
1004 "datatype is not currently supported");
1005 return OMPI_ERR_NOT_SUPPORTED;
1006 }
1007
1008 ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1009 if (OMPI_SUCCESS != ret) {
1010 return ret;
1011 }
1012 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1013 if (OMPI_SUCCESS != ret) {
1014 return ret;
1015 }
1016 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1017 if (OMPI_SUCCESS != ret) {
1018 return ret;
1019 }
1020 ompi_datatype_type_size(origin_datatype, &size);
1021 length = size * origin_count;
1022
1023 ret = segmentedSwap(&module->opcount,
1024 module->result_iovec_md_h,
1025 (ptl_size_t) result_lb,
1026 module->origin_iovec_md_h,
1027 (ptl_size_t) origin_lb,
1028 length,
1029 module->fetch_atomic_max,
1030 peer,
1031 module->pt_idx,
1032 module->match_bits,
1033 offset + target_lb,
1034 user_ptr,
1035 ptl_dt);
1036 if (OMPI_SUCCESS != ret) {
1037 return ret;
1038 }
1039
1040 return OMPI_SUCCESS;
1041 }
1042
1043
1044 static int
1045 fetch_atomic_to_iovec(ompi_osc_portals4_module_t *module,
1046 const void *result_address,
1047 int result_count,
1048 ompi_datatype_t *result_datatype,
1049 const void *origin_address,
1050 int origin_count,
1051 ompi_datatype_t *origin_datatype,
1052 ptl_process_t peer,
1053 int target_count,
1054 ompi_datatype_t *target_datatype,
1055 size_t offset,
1056 ptl_pt_index_t pt_index,
1057 ptl_match_bits_t match_bits,
1058 struct ompi_op_t *op,
1059 void *user_ptr)
1060 {
1061 int ret;
1062 size_t size;
1063 ptl_size_t iovec_count=0;
1064 ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
1065 ptl_md_t md;
1066 ptl_op_t ptl_op;
1067 ptl_datatype_t ptl_dt;
1068
1069 if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
1070 PtlMDRelease(module->result_iovec_md_h);
1071 free(module->result_iovec_list);
1072 module->result_iovec_md_h = PTL_INVALID_HANDLE;
1073 module->result_iovec_list = NULL;
1074 }
1075
1076 create_iov_list(
1077 result_address,
1078 result_count,
1079 result_datatype,
1080 &module->result_iovec_list,
1081 &iovec_count);
1082
1083 md.start = module->result_iovec_list;
1084 md.length = iovec_count;
1085 if (user_ptr) {
1086 md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1087 } else {
1088 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1089 }
1090 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1091 md.ct_handle = module->ct_h;
1092 ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
1093 if (PTL_OK != ret) {
1094 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1095 "%s:%d: PtlMDBind(iovec) failed: %d\n",
1096 __FILE__, __LINE__, ret);
1097 return ret;
1098 }
1099
1100 if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
1101 PtlMDRelease(module->origin_iovec_md_h);
1102 free(module->origin_iovec_list);
1103 module->origin_iovec_md_h = PTL_INVALID_HANDLE;
1104 module->origin_iovec_list = NULL;
1105 }
1106
1107 create_iov_list(
1108 origin_address,
1109 origin_count,
1110 origin_datatype,
1111 &module->origin_iovec_list,
1112 &iovec_count);
1113
1114 md.start = module->origin_iovec_list;
1115 md.length = iovec_count;
1116 md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1117 md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1118 md.ct_handle = module->ct_h;
1119 ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
1120 if (PTL_OK != ret) {
1121 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1122 "%s:%d: PtlMDBind(iovec) failed: %d\n",
1123 __FILE__, __LINE__, ret);
1124 return ret;
1125 }
1126
1127 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1128 if (OMPI_SUCCESS != ret) {
1129 opal_output(ompi_osc_base_framework.framework_output,
1130 "datatype is not currently supported");
1131 return OMPI_ERR_NOT_SUPPORTED;
1132 }
1133 ret = ompi_osc_portals4_get_op(op, &ptl_op);
1134 if (OMPI_SUCCESS != ret) {
1135 opal_output(ompi_osc_base_framework.framework_output,
1136 "operation is not currently supported");
1137 return OMPI_ERR_NOT_SUPPORTED;
1138 }
1139
1140 ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1141 if (OMPI_SUCCESS != ret) {
1142 return ret;
1143 }
1144 ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1145 if (OMPI_SUCCESS != ret) {
1146 return ret;
1147 }
1148 ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1149 if (OMPI_SUCCESS != ret) {
1150 return ret;
1151 }
1152 ompi_datatype_type_size(origin_datatype, &size);
1153 length = size * origin_count;
1154
1155 ret = segmentedFetchAtomic(&module->opcount,
1156 module->result_iovec_md_h,
1157 (ptl_size_t) result_lb,
1158 module->origin_iovec_md_h,
1159 (ptl_size_t) origin_lb,
1160 length,
1161 module->fetch_atomic_max,
1162 peer,
1163 module->pt_idx,
1164 module->match_bits,
1165 offset + target_lb,
1166 user_ptr,
1167 ptl_op,
1168 ptl_dt);
1169 if (OMPI_SUCCESS != ret) {
1170 return ret;
1171 }
1172
1173 return OMPI_SUCCESS;
1174 }
1175
1176
1177
1178
1179
1180
1181 static int
1182 put_to_noncontig(opal_atomic_int64_t *opcount,
1183 ptl_handle_md_t md_h,
1184 const void *origin_address,
1185 int origin_count,
1186 ompi_datatype_t *origin_datatype,
1187 ptl_process_t peer,
1188 int target_count,
1189 ompi_datatype_t *target_datatype,
1190 size_t offset,
1191 ptl_pt_index_t pt_index,
1192 ptl_match_bits_t match_bits,
1193 void *user_ptr)
1194 {
1195 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1196 opal_convertor_t origin_convertor, target_convertor;
1197 uint32_t origin_iov_count, target_iov_count;
1198 uint32_t origin_iov_index, target_iov_index;
1199
1200 size_t origin_size, target_size, rdma_len;
1201 size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1202 int ret;
1203 bool done;
1204
1205
1206
1207 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1208 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1209 (void*)origin_address, 0, &origin_convertor);
1210 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1211 return ret;
1212 }
1213
1214 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1215 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1216 (void *)NULL, 0, &target_convertor);
1217 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1218 return ret;
1219 }
1220
1221 origin_iov_index = 0;
1222 origin_iov_count = 0;
1223
1224 do {
1225
1226 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1227 target_iov_index = 0;
1228
1229
1230 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1231
1232
1233 while (target_iov_index != target_iov_count) {
1234 if (origin_iov_index == origin_iov_count) {
1235
1236 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1237 origin_iov_index = 0;
1238 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1239 }
1240
1241
1242 assert (0 != origin_iov_count);
1243
1244
1245 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1246
1247 opal_atomic_add_fetch_64(opcount, 1);
1248
1249 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1250 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1251 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1252 (unsigned long) target_iovec[target_iov_index].iov_len));
1253
1254 ret = PtlPut(md_h,
1255 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1256 rdma_len,
1257 PTL_ACK_REQ,
1258 peer,
1259 pt_index,
1260 match_bits,
1261 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1262 user_ptr,
1263 0);
1264 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1265 opal_atomic_add_fetch_64(opcount, -1);
1266 return ret;
1267 }
1268
1269
1270 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1271 target_iovec[target_iov_index].iov_len -= rdma_len;
1272 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1273 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1274
1275 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1276 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1277 }
1278 } while (!done);
1279
1280
1281 opal_convertor_cleanup (&origin_convertor);
1282 OBJ_DESTRUCT(&origin_convertor);
1283 opal_convertor_cleanup (&target_convertor);
1284 OBJ_DESTRUCT(&target_convertor);
1285
1286 return OMPI_SUCCESS;
1287 }
1288
1289
1290 static int
1291 atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
1292 ptl_handle_md_t md_h,
1293 const void *origin_address,
1294 int origin_count,
1295 ompi_datatype_t *origin_datatype,
1296 ptl_process_t peer,
1297 int target_count,
1298 ompi_datatype_t *target_datatype,
1299 size_t offset,
1300 ptl_pt_index_t pt_index,
1301 ptl_match_bits_t match_bits,
1302 void *user_ptr)
1303 {
1304 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1305 opal_convertor_t origin_convertor, target_convertor;
1306 uint32_t origin_iov_count, target_iov_count;
1307 uint32_t origin_iov_index, target_iov_index;
1308
1309 size_t origin_size, target_size, rdma_len;
1310 size_t max_rdma_len = module->atomic_max;
1311 int ret;
1312 bool done;
1313
1314
1315
1316 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1317 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1318 (void*)origin_address, 0, &origin_convertor);
1319 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1320 return ret;
1321 }
1322
1323 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1324 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1325 (void *)NULL, 0, &target_convertor);
1326 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1327 return ret;
1328 }
1329
1330 origin_iov_index = 0;
1331 origin_iov_count = 0;
1332
1333 do {
1334
1335 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1336 target_iov_index = 0;
1337
1338
1339 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1340
1341
1342 while (target_iov_index != target_iov_count) {
1343 if (origin_iov_index == origin_iov_count) {
1344
1345 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1346 origin_iov_index = 0;
1347 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1348 }
1349
1350
1351 assert (0 != origin_iov_count);
1352
1353
1354 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1355
1356 opal_atomic_add_fetch_64(&module->opcount, 1);
1357
1358 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1359 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1360 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1361 (unsigned long) target_iovec[target_iov_index].iov_len));
1362
1363 ret = PtlPut(md_h,
1364 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1365 rdma_len,
1366 PTL_ACK_REQ,
1367 peer,
1368 pt_index,
1369 match_bits,
1370 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1371 user_ptr,
1372 0);
1373 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1374 opal_atomic_add_fetch_64(&module->opcount, -1);
1375 return ret;
1376 }
1377
1378
1379 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1380 target_iovec[target_iov_index].iov_len -= rdma_len;
1381 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1382 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1383
1384 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1385 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1386 }
1387 } while (!done);
1388
1389 return OMPI_SUCCESS;
1390 }
1391
1392
1393 static int
1394 atomic_to_noncontig(ompi_osc_portals4_module_t *module,
1395 ptl_handle_md_t md_h,
1396 const void *origin_address,
1397 int origin_count,
1398 ompi_datatype_t *origin_datatype,
1399 ptl_process_t peer,
1400 int target_count,
1401 ompi_datatype_t *target_datatype,
1402 size_t offset,
1403 ptl_pt_index_t pt_index,
1404 ptl_match_bits_t match_bits,
1405 struct ompi_op_t *op,
1406 void *user_ptr)
1407 {
1408 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1409 opal_convertor_t origin_convertor, target_convertor;
1410 uint32_t origin_iov_count, target_iov_count;
1411 uint32_t origin_iov_index, target_iov_index;
1412 ptl_op_t ptl_op;
1413 ptl_datatype_t ptl_dt;
1414
1415 size_t origin_size, target_size, atomic_len;
1416 int ret;
1417 bool done;
1418
1419
1420
1421 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1422 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1423 (void*)origin_address, 0, &origin_convertor);
1424 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1425 return ret;
1426 }
1427
1428 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1429 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1430 (void *)NULL, 0, &target_convertor);
1431 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1432 return ret;
1433 }
1434
1435 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1436 if (OMPI_SUCCESS != ret) {
1437 opal_output(ompi_osc_base_framework.framework_output,
1438 "datatype is not currently supported");
1439 return OMPI_ERR_NOT_SUPPORTED;
1440 }
1441 ret = ompi_osc_portals4_get_op(op, &ptl_op);
1442 if (OMPI_SUCCESS != ret) {
1443 opal_output(ompi_osc_base_framework.framework_output,
1444 "operation is not currently supported");
1445 return OMPI_ERR_NOT_SUPPORTED;
1446 }
1447
1448 origin_iov_index = 0;
1449 origin_iov_count = 0;
1450
1451 do {
1452
1453 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1454 target_iov_index = 0;
1455
1456
1457 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1458
1459
1460 while (target_iov_index != target_iov_count) {
1461 if (origin_iov_index == origin_iov_count) {
1462
1463 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1464 origin_iov_index = 0;
1465 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1466 }
1467
1468
1469 assert (0 != origin_iov_count);
1470
1471
1472 atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
1473
1474 opal_atomic_add_fetch_64(&module->opcount, 1);
1475
1476 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1477 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1478 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1479 (unsigned long) target_iovec[target_iov_index].iov_len));
1480
1481 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1482 "%s,%d Atomic", __FUNCTION__, __LINE__));
1483 ret = PtlAtomic(md_h,
1484 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1485 atomic_len,
1486 PTL_ACK_REQ,
1487 peer,
1488 pt_index,
1489 match_bits,
1490 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1491 user_ptr,
1492 0,
1493 ptl_op,
1494 ptl_dt);
1495 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1496 opal_atomic_add_fetch_64(&module->opcount, -1);
1497 return ret;
1498 }
1499
1500
1501 origin_iovec[origin_iov_index].iov_len -= atomic_len;
1502 target_iovec[target_iov_index].iov_len -= atomic_len;
1503 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + atomic_len);
1504 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + atomic_len);
1505
1506 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1507 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1508 }
1509 } while (!done);
1510
1511 return OMPI_SUCCESS;
1512 }
1513
1514
1515 static int
1516 get_from_noncontig(opal_atomic_int64_t *opcount,
1517 ptl_handle_md_t md_h,
1518 const void *origin_address,
1519 int origin_count,
1520 ompi_datatype_t *origin_datatype,
1521 ptl_process_t peer,
1522 int target_count,
1523 ompi_datatype_t *target_datatype,
1524 size_t offset,
1525 ptl_pt_index_t pt_index,
1526 ptl_match_bits_t match_bits,
1527 void *user_ptr)
1528 {
1529 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1530 opal_convertor_t origin_convertor, target_convertor;
1531 uint32_t origin_iov_count, target_iov_count;
1532 uint32_t origin_iov_index, target_iov_index;
1533
1534 size_t origin_size, target_size, rdma_len;
1535 size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1536 int ret;
1537 bool done;
1538
1539
1540
1541 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1542 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1543 (void*)origin_address, 0, &origin_convertor);
1544 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1545 return ret;
1546 }
1547
1548 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1549 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1550 (void *)NULL, 0, &target_convertor);
1551 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1552 return ret;
1553 }
1554
1555 origin_iov_index = 0;
1556 origin_iov_count = 0;
1557
1558 do {
1559
1560 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1561 target_iov_index = 0;
1562
1563
1564 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1565
1566
1567 while (target_iov_index != target_iov_count) {
1568 if (origin_iov_index == origin_iov_count) {
1569
1570 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1571 origin_iov_index = 0;
1572 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1573 }
1574
1575
1576 assert (0 != origin_iov_count);
1577
1578
1579 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1580
1581 opal_atomic_add_fetch_64(opcount, 1);
1582
1583 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1584 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1585 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1586 (unsigned long) target_iovec[target_iov_index].iov_len));
1587
1588 ret = PtlGet(md_h,
1589 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1590 rdma_len,
1591 peer,
1592 pt_index,
1593 match_bits,
1594 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1595 user_ptr);
1596 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1597 opal_atomic_add_fetch_64(opcount, -1);
1598 return ret;
1599 }
1600
1601
1602 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1603 target_iovec[target_iov_index].iov_len -= rdma_len;
1604 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1605 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1606
1607 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1608 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1609 }
1610 } while (!done);
1611
1612 return OMPI_SUCCESS;
1613 }
1614
1615
1616 static int
1617 atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
1618 ptl_handle_md_t md_h,
1619 const void *origin_address,
1620 int origin_count,
1621 ompi_datatype_t *origin_datatype,
1622 ptl_process_t peer,
1623 int target_count,
1624 ompi_datatype_t *target_datatype,
1625 size_t offset,
1626 ptl_pt_index_t pt_index,
1627 ptl_match_bits_t match_bits,
1628 void *user_ptr)
1629 {
1630 struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1631 opal_convertor_t origin_convertor, target_convertor;
1632 uint32_t origin_iov_count, target_iov_count;
1633 uint32_t origin_iov_index, target_iov_index;
1634
1635 size_t origin_size, target_size, rdma_len;
1636 size_t max_rdma_len = module->fetch_atomic_max;
1637 int ret;
1638 bool done;
1639
1640
1641
1642 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1643 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1644 (void*)origin_address, 0, &origin_convertor);
1645 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1646 return ret;
1647 }
1648
1649 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1650 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1651 (void *)NULL, 0, &target_convertor);
1652 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1653 return ret;
1654 }
1655
1656 origin_iov_index = 0;
1657 origin_iov_count = 0;
1658
1659 do {
1660
1661 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1662 target_iov_index = 0;
1663
1664
1665 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1666
1667
1668 while (target_iov_index != target_iov_count) {
1669 if (origin_iov_index == origin_iov_count) {
1670
1671 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1672 origin_iov_index = 0;
1673 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1674 }
1675
1676
1677 assert (0 != origin_iov_count);
1678
1679
1680 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1681
1682 opal_atomic_add_fetch_64(&module->opcount, 1);
1683
1684 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1685 "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1686 origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1687 (unsigned long) target_iovec[target_iov_index].iov_len));
1688
1689 ret = PtlGet(md_h,
1690 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1691 rdma_len,
1692 peer,
1693 pt_index,
1694 match_bits,
1695 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1696 user_ptr);
1697 if (OPAL_UNLIKELY(PTL_OK != ret)) {
1698 opal_atomic_add_fetch_64(&module->opcount, -1);
1699 return ret;
1700 }
1701
1702
1703 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1704 target_iovec[target_iov_index].iov_len -= rdma_len;
1705 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1706 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1707
1708 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1709 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1710 }
1711 } while (!done);
1712
1713 return OMPI_SUCCESS;
1714 }
1715
1716
1717 static int
1718 swap_from_noncontig(ompi_osc_portals4_module_t *module,
1719 ptl_handle_md_t result_md_h,
1720 const void *result_address,
1721 int result_count,
1722 ompi_datatype_t *result_datatype,
1723 ptl_handle_md_t origin_md_h,
1724 const void *origin_address,
1725 int origin_count,
1726 ompi_datatype_t *origin_datatype,
1727 ptl_process_t peer,
1728 int target_count,
1729 ompi_datatype_t *target_datatype,
1730 size_t offset,
1731 ptl_pt_index_t pt_index,
1732 ptl_match_bits_t match_bits,
1733 void *user_ptr)
1734 {
1735 struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1736 opal_convertor_t result_convertor, origin_convertor, target_convertor;
1737 uint32_t result_iov_count, origin_iov_count, target_iov_count;
1738 uint32_t result_iov_index, origin_iov_index, target_iov_index;
1739
1740 size_t result_size, origin_size, target_size, rdma_len;
1741 size_t max_rdma_len = module->fetch_atomic_max;
1742 ptl_datatype_t ptl_dt;
1743
1744 int ret;
1745 bool done;
1746
1747
1748
1749 OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1750 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1751 (void*)result_address, 0, &result_convertor);
1752 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1753 return ret;
1754 }
1755
1756 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1757 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1758 (void*)origin_address, 0, &origin_convertor);
1759 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1760 return ret;
1761 }
1762
1763 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1764 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1765 (void *)NULL, 0, &target_convertor);
1766 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1767 return ret;
1768 }
1769
1770 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1771 if (OMPI_SUCCESS != ret) {
1772 opal_output(ompi_osc_base_framework.framework_output,
1773 "datatype is not currently supported");
1774 return OMPI_ERR_NOT_SUPPORTED;
1775 }
1776
1777 result_iov_index = 0;
1778 result_iov_count = 0;
1779 origin_iov_index = 0;
1780 origin_iov_count = 0;
1781
1782 do {
1783
1784 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1785 target_iov_index = 0;
1786
1787
1788 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1789
1790
1791 while (target_iov_index != target_iov_count) {
1792 if (result_iov_index == result_iov_count) {
1793
1794 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1795 result_iov_index = 0;
1796 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1797 }
1798 if (origin_iov_index == origin_iov_count) {
1799
1800 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1801 origin_iov_index = 0;
1802 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1803 }
1804
1805
1806 assert (0 != result_iov_count);
1807 assert (0 != origin_iov_count);
1808
1809
1810 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1811
1812 opal_atomic_add_fetch_64(&module->opcount, 1);
1813
1814 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1815 "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1816 result_iovec[result_iov_index].iov_base,
1817 origin_iovec[origin_iov_index].iov_base,
1818 target_iovec[target_iov_index].iov_base,
1819 (unsigned long) target_iovec[target_iov_index].iov_len));
1820
1821 ret = PtlSwap(result_md_h,
1822 (ptl_size_t)result_iovec[result_iov_index].iov_base,
1823 origin_md_h,
1824 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1825 rdma_len,
1826 peer,
1827 pt_index,
1828 match_bits,
1829 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1830 user_ptr,
1831 0,
1832 NULL,
1833 PTL_SWAP,
1834 ptl_dt);
1835 if (PTL_OK != ret) {
1836 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1837 "%s:%d PtlSwap failed with return value %d",
1838 __FUNCTION__, __LINE__, ret);
1839 opal_atomic_add_fetch_64(&module->opcount, -1);
1840 return ret;
1841 }
1842
1843
1844 result_iovec[result_iov_index].iov_len -= rdma_len;
1845 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1846 target_iovec[target_iov_index].iov_len -= rdma_len;
1847 result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
1848 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1849 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1850
1851 result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
1852 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1853 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1854 }
1855 } while (!done);
1856
1857 return OMPI_SUCCESS;
1858 }
1859
1860
1861 static int
1862 fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
1863 ptl_handle_md_t result_md_h,
1864 const void *result_address,
1865 int result_count,
1866 ompi_datatype_t *result_datatype,
1867 ptl_handle_md_t origin_md_h,
1868 const void *origin_address,
1869 int origin_count,
1870 ompi_datatype_t *origin_datatype,
1871 ptl_process_t peer,
1872 int target_count,
1873 ompi_datatype_t *target_datatype,
1874 size_t offset,
1875 ptl_pt_index_t pt_index,
1876 ptl_match_bits_t match_bits,
1877 struct ompi_op_t *op,
1878 void *user_ptr)
1879 {
1880 struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1881 opal_convertor_t result_convertor, origin_convertor, target_convertor;
1882 uint32_t result_iov_count, origin_iov_count, target_iov_count;
1883 uint32_t result_iov_index, origin_iov_index, target_iov_index;
1884
1885 size_t result_size, origin_size, target_size, rdma_len;
1886 size_t max_rdma_len = module->fetch_atomic_max;
1887 ptl_op_t ptl_op;
1888 ptl_datatype_t ptl_dt;
1889
1890 int ret;
1891 bool done;
1892
1893
1894
1895 OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1896 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1897 (void*)result_address, 0, &result_convertor);
1898 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1899 return ret;
1900 }
1901
1902 OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1903 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1904 (void*)origin_address, 0, &origin_convertor);
1905 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1906 return ret;
1907 }
1908
1909 OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1910 ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1911 (void *)NULL, 0, &target_convertor);
1912 if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1913 return ret;
1914 }
1915
1916 ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1917 if (OMPI_SUCCESS != ret) {
1918 opal_output(ompi_osc_base_framework.framework_output,
1919 "datatype is not currently supported");
1920 return OMPI_ERR_NOT_SUPPORTED;
1921 }
1922 ret = ompi_osc_portals4_get_op(op, &ptl_op);
1923 if (OMPI_SUCCESS != ret) {
1924 opal_output(ompi_osc_base_framework.framework_output,
1925 "operation is not currently supported");
1926 return OMPI_ERR_NOT_SUPPORTED;
1927 }
1928
1929 result_iov_index = 0;
1930 result_iov_count = 0;
1931 origin_iov_index = 0;
1932 origin_iov_count = 0;
1933
1934 do {
1935
1936 target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1937 target_iov_index = 0;
1938
1939
1940 done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1941
1942
1943 while (target_iov_index != target_iov_count) {
1944 if (result_iov_index == result_iov_count) {
1945
1946 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1947 result_iov_index = 0;
1948 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1949 }
1950 if (origin_iov_index == origin_iov_count) {
1951
1952 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1953 origin_iov_index = 0;
1954 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1955 }
1956
1957
1958 assert (0 != result_iov_count);
1959 assert (0 != origin_iov_count);
1960
1961
1962 rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1963
1964 opal_atomic_add_fetch_64(&module->opcount, 1);
1965
1966 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1967 "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1968 result_iovec[result_iov_index].iov_base,
1969 origin_iovec[origin_iov_index].iov_base,
1970 target_iovec[target_iov_index].iov_base,
1971 (unsigned long) target_iovec[target_iov_index].iov_len));
1972
1973 ret = PtlFetchAtomic(result_md_h,
1974 (ptl_size_t)result_iovec[result_iov_index].iov_base,
1975 origin_md_h,
1976 (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1977 rdma_len,
1978 peer,
1979 pt_index,
1980 match_bits,
1981 offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1982 user_ptr,
1983 0,
1984 ptl_op,
1985 ptl_dt);
1986 if (PTL_OK != ret) {
1987 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1988 "%s:%d PtlFetchAtomic failed with return value %d",
1989 __FUNCTION__, __LINE__, ret);
1990 opal_atomic_add_fetch_64(&module->opcount, -1);
1991 return ret;
1992 }
1993
1994
1995 result_iovec[result_iov_index].iov_len -= rdma_len;
1996 origin_iovec[origin_iov_index].iov_len -= rdma_len;
1997 target_iovec[target_iov_index].iov_len -= rdma_len;
1998 result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
1999 origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
2000 target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
2001
2002 result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
2003 origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
2004 target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
2005 }
2006 } while (!done);
2007
2008 return OMPI_SUCCESS;
2009 }
2010
2011 int
2012 ompi_osc_portals4_rput(const void *origin_addr,
2013 int origin_count,
2014 struct ompi_datatype_t *origin_dt,
2015 int target,
2016 ptrdiff_t target_disp,
2017 int target_count,
2018 struct ompi_datatype_t *target_dt,
2019 struct ompi_win_t *win,
2020 struct ompi_request_t **ompi_req)
2021 {
2022 int ret;
2023 ompi_osc_portals4_request_t *request;
2024 ompi_osc_portals4_module_t *module =
2025 (ompi_osc_portals4_module_t*) win->w_osc_module;
2026 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2027 size_t size, offset;
2028 ptrdiff_t length, origin_lb, target_lb, extent;
2029
2030 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2031 "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2032 (unsigned long) origin_addr, origin_count,
2033 origin_dt->name, target, (unsigned long) target_disp,
2034 target_count, target_dt->name,
2035 (unsigned long) win));
2036
2037 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2038 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2039 *ompi_req = &request->super;
2040
2041 offset = get_displacement(module, target) * target_disp;
2042
2043 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2044 ret = put_to_noncontig(&module->opcount,
2045 module->req_md_h,
2046 origin_addr,
2047 origin_count,
2048 origin_dt,
2049 peer,
2050 target_count,
2051 target_dt,
2052 offset,
2053 module->pt_idx,
2054 module->match_bits,
2055 request);
2056 if (PTL_OK != ret) {
2057 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2058 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2059 "%s,%d put_to_noncontig() failed: ret = %d",
2060 __FUNCTION__, __LINE__, ret));
2061 return ret;
2062 }
2063 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2064 ret = put_from_iovec(module,
2065 origin_addr,
2066 origin_count,
2067 origin_dt,
2068 peer,
2069 target_count,
2070 target_dt,
2071 offset,
2072 module->pt_idx,
2073 module->match_bits,
2074 request);
2075 if (PTL_OK != ret) {
2076 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2077 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2078 "%s,%d put_from_iovec() failed: ret = %d",
2079 __FUNCTION__, __LINE__, ret));
2080 return ret;
2081 }
2082 } else {
2083 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2084 if (OMPI_SUCCESS != ret) {
2085 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2086 return ret;
2087 }
2088 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2089 if (OMPI_SUCCESS != ret) {
2090 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2091 return ret;
2092 }
2093 ompi_datatype_type_size(origin_dt, &size);
2094 length = size * origin_count;
2095
2096 request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2097
2098 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2099 "%s,%d RPut(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2100 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2101 ret = segmentedPut(&module->opcount,
2102 module->req_md_h,
2103 (ptl_size_t) origin_addr + origin_lb,
2104 length,
2105 mca_osc_portals4_component.ptl_max_msg_size,
2106 PTL_ACK_REQ,
2107 peer,
2108 module->pt_idx,
2109 module->match_bits,
2110 offset + target_lb,
2111 request,
2112 0);
2113 if (OMPI_SUCCESS != ret) {
2114 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2115 return ret;
2116 }
2117 }
2118
2119 return OMPI_SUCCESS;
2120 }
2121
2122
2123 int
2124 ompi_osc_portals4_rget(void *origin_addr,
2125 int origin_count,
2126 struct ompi_datatype_t *origin_dt,
2127 int target,
2128 ptrdiff_t target_disp,
2129 int target_count,
2130 struct ompi_datatype_t *target_dt,
2131 struct ompi_win_t *win,
2132 struct ompi_request_t **ompi_req)
2133 {
2134 int ret;
2135 ompi_osc_portals4_request_t *request;
2136 ompi_osc_portals4_module_t *module =
2137 (ompi_osc_portals4_module_t*) win->w_osc_module;
2138 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2139 size_t offset, size;
2140 ptrdiff_t length, origin_lb, target_lb, extent;
2141
2142 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2143 "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2144 (unsigned long) origin_addr, origin_count,
2145 origin_dt->name, target, (unsigned long) target_disp,
2146 target_count, target_dt->name,
2147 (unsigned long) win));
2148
2149 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2150 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2151 *ompi_req = &request->super;
2152
2153 offset = get_displacement(module, target) * target_disp;
2154
2155 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2156 ret = get_from_noncontig(&module->opcount,
2157 module->req_md_h,
2158 origin_addr,
2159 origin_count,
2160 origin_dt,
2161 peer,
2162 target_count,
2163 target_dt,
2164 offset,
2165 module->pt_idx,
2166 module->match_bits,
2167 request);
2168 if (PTL_OK != ret) {
2169 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2170 "%s,%d get_from_noncontig() failed: ret = %d",
2171 __FUNCTION__, __LINE__, ret));
2172 return ret;
2173 }
2174 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2175 ret = get_to_iovec(module,
2176 origin_addr,
2177 origin_count,
2178 origin_dt,
2179 peer,
2180 target_count,
2181 target_dt,
2182 offset,
2183 module->pt_idx,
2184 module->match_bits,
2185 request);
2186 if (PTL_OK != ret) {
2187 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2188 "%s,%d get_to_iovec() failed: ret = %d",
2189 __FUNCTION__, __LINE__, ret));
2190 return ret;
2191 }
2192 } else {
2193 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2194 if (OMPI_SUCCESS != ret) {
2195 return ret;
2196 }
2197 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2198 if (OMPI_SUCCESS != ret) {
2199 return ret;
2200 }
2201 ompi_datatype_type_size(origin_dt, &size);
2202 length = size * origin_count;
2203
2204 request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2205
2206 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2207 "%s,%d RGet", __FUNCTION__, __LINE__));
2208 ret = segmentedGet(&module->opcount,
2209 module->req_md_h,
2210 (ptl_size_t) origin_addr + origin_lb,
2211 length,
2212 mca_osc_portals4_component.ptl_max_msg_size,
2213 peer,
2214 module->pt_idx,
2215 module->match_bits,
2216 offset + target_lb,
2217 request);
2218 if (OMPI_SUCCESS != ret) {
2219 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2220 return ret;
2221 }
2222 }
2223
2224 return OMPI_SUCCESS;
2225 }
2226
2227
2228 int
2229 ompi_osc_portals4_raccumulate(const void *origin_addr,
2230 int origin_count,
2231 struct ompi_datatype_t *origin_dt,
2232 int target,
2233 ptrdiff_t target_disp,
2234 int target_count,
2235 struct ompi_datatype_t *target_dt,
2236 struct ompi_op_t *op,
2237 struct ompi_win_t *win,
2238 struct ompi_request_t **ompi_req)
2239 {
2240 int ret;
2241 ompi_osc_portals4_request_t *request;
2242 ompi_osc_portals4_module_t *module =
2243 (ompi_osc_portals4_module_t*) win->w_osc_module;
2244 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2245 size_t offset, size;
2246 ptl_op_t ptl_op;
2247 ptl_datatype_t ptl_dt;
2248 ptrdiff_t sent, length, origin_lb, target_lb, extent;
2249
2250 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2251 "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx",
2252 (unsigned long) origin_addr, origin_count,
2253 origin_dt->name, target, (unsigned long) target_disp,
2254 target_count, target_dt->name,
2255 op->o_name,
2256 (unsigned long) win));
2257
2258 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2259 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2260 *ompi_req = &request->super;
2261
2262 offset = get_displacement(module, target) * target_disp;
2263
2264 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2265 if (MPI_REPLACE == op) {
2266 ret = atomic_put_to_noncontig(module,
2267 module->req_md_h,
2268 origin_addr,
2269 origin_count,
2270 origin_dt,
2271 peer,
2272 target_count,
2273 target_dt,
2274 offset,
2275 module->pt_idx,
2276 module->match_bits,
2277 request);
2278 if (PTL_OK != ret) {
2279 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2280 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2281 "%s,%d atomic_put_to_noncontig() failed: ret = %d",
2282 __FUNCTION__, __LINE__, ret));
2283 return ret;
2284 }
2285 } else {
2286 ret = atomic_to_noncontig(module,
2287 module->req_md_h,
2288 origin_addr,
2289 origin_count,
2290 origin_dt,
2291 peer,
2292 target_count,
2293 target_dt,
2294 offset,
2295 module->pt_idx,
2296 module->match_bits,
2297 op,
2298 request);
2299 if (PTL_OK != ret) {
2300 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2301 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2302 "%s,%d atomic_to_noncontig() failed: ret = %d",
2303 __FUNCTION__, __LINE__, ret));
2304 return ret;
2305 }
2306 }
2307 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2308 if (MPI_REPLACE == op) {
2309 ret = atomic_put_from_iovec(module,
2310 origin_addr,
2311 origin_count,
2312 origin_dt,
2313 peer,
2314 target_count,
2315 target_dt,
2316 offset,
2317 module->pt_idx,
2318 module->match_bits,
2319 request);
2320 if (PTL_OK != ret) {
2321 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2322 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2323 "%s,%d atomic_put_from_iovec() failed: ret = %d",
2324 __FUNCTION__, __LINE__, ret));
2325 return ret;
2326 }
2327 } else {
2328 ret = atomic_from_iovec(module,
2329 origin_addr,
2330 origin_count,
2331 origin_dt,
2332 peer,
2333 target_count,
2334 target_dt,
2335 offset,
2336 module->pt_idx,
2337 module->match_bits,
2338 op,
2339 request);
2340 if (PTL_OK != ret) {
2341 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2342 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2343 "%s,%d atomic_from_iovec() failed: ret = %d",
2344 __FUNCTION__, __LINE__, ret));
2345 return ret;
2346 }
2347 }
2348 } else {
2349 ptl_size_t md_offset;
2350
2351 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2352 if (OMPI_SUCCESS != ret) {
2353 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2354 return ret;
2355 }
2356 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2357 if (OMPI_SUCCESS != ret) {
2358 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2359 return ret;
2360 }
2361 ompi_datatype_type_size(origin_dt, &size);
2362 length = size * origin_count;
2363 sent = 0;
2364
2365 md_offset = (ptl_size_t) origin_addr;
2366
2367 request->ops_expected += number_of_fragments(length, module->atomic_max);
2368
2369 if (MPI_REPLACE == op) {
2370 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2371 "%s,%d Put", __FUNCTION__, __LINE__));
2372 ret = segmentedPut(&module->opcount,
2373 module->req_md_h,
2374 md_offset + origin_lb,
2375 length,
2376 module->atomic_max,
2377 PTL_ACK_REQ,
2378 peer,
2379 module->pt_idx,
2380 module->match_bits,
2381 offset + target_lb,
2382 request,
2383 0);
2384 if (OMPI_SUCCESS != ret) {
2385 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2386 return ret;
2387 }
2388 } else {
2389 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2390 if (OMPI_SUCCESS != ret) {
2391 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2392 opal_output(ompi_osc_base_framework.framework_output,
2393 "datatype is not currently supported");
2394 return OMPI_ERR_NOT_SUPPORTED;
2395 }
2396 ret = ompi_osc_portals4_get_op(op, &ptl_op);
2397 if (OMPI_SUCCESS != ret) {
2398 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2399 opal_output(ompi_osc_base_framework.framework_output,
2400 "operation is not currently supported");
2401 return OMPI_ERR_NOT_SUPPORTED;
2402 }
2403 do {
2404 size_t msg_length = MIN(module->atomic_max, length - sent);
2405
2406 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
2407
2408 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2409 "%s,%d Atomic", __FUNCTION__, __LINE__));
2410 ret = PtlAtomic(module->req_md_h,
2411 md_offset + sent + origin_lb,
2412 msg_length,
2413 PTL_ACK_REQ,
2414 peer,
2415 module->pt_idx,
2416 module->match_bits,
2417 offset + sent + target_lb,
2418 request,
2419 0,
2420 ptl_op,
2421 ptl_dt);
2422 if (OMPI_SUCCESS != ret) {
2423 (void)opal_atomic_add_fetch_64(&module->opcount, -1);
2424 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2425 return ret;
2426 }
2427 sent += msg_length;
2428 } while (sent < length);
2429 }
2430 }
2431
2432 return OMPI_SUCCESS;
2433 }
2434
2435
2436 int
2437 ompi_osc_portals4_rget_accumulate(const void *origin_addr,
2438 int origin_count,
2439 struct ompi_datatype_t *origin_dt,
2440 void *result_addr,
2441 int result_count,
2442 struct ompi_datatype_t *result_dt,
2443 int target,
2444 ptrdiff_t target_disp,
2445 int target_count,
2446 struct ompi_datatype_t *target_dt,
2447 struct ompi_op_t *op,
2448 struct ompi_win_t *win,
2449 struct ompi_request_t **ompi_req)
2450 {
2451 int ret;
2452 ompi_osc_portals4_request_t *request;
2453 ompi_osc_portals4_module_t *module =
2454 (ompi_osc_portals4_module_t*) win->w_osc_module;
2455 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2456 size_t target_offset, size;
2457 ptl_op_t ptl_op;
2458 ptl_datatype_t ptl_dt;
2459 ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
2460
2461 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2462 "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
2463 (unsigned long) origin_addr, origin_count,
2464 origin_dt->name, (unsigned long) result_addr,
2465 result_count, result_dt->name,
2466 target, (unsigned long) target_disp,
2467 target_count, target_dt->name,
2468 op->o_name,
2469 (unsigned long) win));
2470
2471 OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2472 if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2473 *ompi_req = &request->super;
2474
2475 target_offset = get_displacement(module, target) * target_disp;
2476
2477 if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2478 if (MPI_REPLACE == op) {
2479 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2480 "rget_accumulate: MPI_REPLACE non-contiguous target"));
2481 ret = swap_from_noncontig(module,
2482 module->req_md_h,
2483 result_addr,
2484 result_count,
2485 result_dt,
2486 module->md_h,
2487 origin_addr,
2488 origin_count,
2489 origin_dt,
2490 peer,
2491 target_count,
2492 target_dt,
2493 target_offset,
2494 module->pt_idx,
2495 module->match_bits,
2496 request);
2497 if (PTL_OK != ret) {
2498 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2499 "%s,%d swap_from_noncontig() failed: ret = %d",
2500 __FUNCTION__, __LINE__, ret));
2501 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2502 return ret;
2503 }
2504 } else if (MPI_NO_OP == op) {
2505 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2506 "rget_accumulate: MPI_NO_OP non-contiguous target"));
2507 ret = atomic_get_from_noncontig(module,
2508 module->req_md_h,
2509 result_addr,
2510 result_count,
2511 result_dt,
2512 peer,
2513 target_count,
2514 target_dt,
2515 target_offset,
2516 module->pt_idx,
2517 module->match_bits,
2518 request);
2519 if (PTL_OK != ret) {
2520 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2521 "%s,%d atomic_get_from_noncontig() failed: ret = %d",
2522 __FUNCTION__, __LINE__, ret));
2523 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2524 return ret;
2525 }
2526 } else {
2527 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2528 "rget_accumulate: other-op non-contiguous target"));
2529 ret = fetch_atomic_from_noncontig(module,
2530 module->req_md_h,
2531 result_addr,
2532 result_count,
2533 result_dt,
2534 module->md_h,
2535 origin_addr,
2536 origin_count,
2537 origin_dt,
2538 peer,
2539 target_count,
2540 target_dt,
2541 target_offset,
2542 module->pt_idx,
2543 module->match_bits,
2544 op,
2545 request);
2546 if (PTL_OK != ret) {
2547 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2548 "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
2549 __FUNCTION__, __LINE__, ret));
2550 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2551 return ret;
2552 }
2553 }
2554 } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
2555 (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
2556 if (MPI_REPLACE == op) {
2557 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2558 "rget_accumulate: MPI_REPLACE non-contiguous origin/result"));
2559 ret = swap_to_iovec(module,
2560 result_addr,
2561 result_count,
2562 result_dt,
2563 origin_addr,
2564 origin_count,
2565 origin_dt,
2566 peer,
2567 target_count,
2568 target_dt,
2569 target_offset,
2570 module->pt_idx,
2571 module->match_bits,
2572 request);
2573 if (PTL_OK != ret) {
2574 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2575 "%s,%d swap_to_iovec() failed: ret = %d",
2576 __FUNCTION__, __LINE__, ret));
2577 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2578 return ret;
2579 }
2580 } else if (MPI_NO_OP == op) {
2581 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2582 "rget_accumulate: MPI_NO_OP non-contiguous origin/result"));
2583 ret = atomic_get_to_iovec(module,
2584 result_addr,
2585 result_count,
2586 result_dt,
2587 peer,
2588 target_count,
2589 target_dt,
2590 target_offset,
2591 module->pt_idx,
2592 module->match_bits,
2593 request);
2594 if (PTL_OK != ret) {
2595 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2596 "%s,%d atomic_get_to_iovec() failed: ret = %d",
2597 __FUNCTION__, __LINE__, ret));
2598 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2599 return ret;
2600 }
2601 } else {
2602 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2603 "rget_accumulate: other-op non-contiguous origin/result"));
2604 ret = fetch_atomic_to_iovec(module,
2605 result_addr,
2606 result_count,
2607 result_dt,
2608 origin_addr,
2609 origin_count,
2610 origin_dt,
2611 peer,
2612 target_count,
2613 target_dt,
2614 target_offset,
2615 module->pt_idx,
2616 module->match_bits,
2617 op,
2618 request);
2619 if (PTL_OK != ret) {
2620 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2621 "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
2622 __FUNCTION__, __LINE__, ret));
2623 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2624 return ret;
2625 }
2626 }
2627 } else {
2628 if (MPI_REPLACE == op) {
2629 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2630 "rget_accumulate: MPI_REPLACE contiguous"));
2631 ptl_size_t result_md_offset, origin_md_offset;
2632
2633 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2634 if (OMPI_SUCCESS != ret) {
2635 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2636 return ret;
2637 }
2638 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2639 if (OMPI_SUCCESS != ret) {
2640 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2641 return ret;
2642 }
2643 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2644 if (OMPI_SUCCESS != ret) {
2645 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2646 return ret;
2647 }
2648 ompi_datatype_type_size(origin_dt, &size);
2649 length = size * origin_count;
2650
2651 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2652 if (OMPI_SUCCESS != ret) {
2653 opal_output(ompi_osc_base_framework.framework_output,
2654 "datatype is not currently supported");
2655 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2656 return OMPI_ERR_NOT_SUPPORTED;
2657 }
2658
2659 result_md_offset = (ptl_size_t) result_addr;
2660 origin_md_offset = (ptl_size_t) origin_addr;
2661
2662 request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2663
2664 ret = segmentedSwap(&module->opcount,
2665 module->req_md_h,
2666 result_md_offset + result_lb,
2667 module->md_h,
2668 origin_md_offset + origin_lb,
2669 length,
2670 module->fetch_atomic_max,
2671 peer,
2672 module->pt_idx,
2673 module->match_bits,
2674 target_offset + target_lb,
2675 request,
2676 ptl_dt);
2677 if (OMPI_SUCCESS != ret) {
2678 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2679 return ret;
2680 }
2681 } else if (MPI_NO_OP == op) {
2682 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2683 "rget_accumulate: MPI_NO_OP contiguous"));
2684 ptl_size_t md_offset;
2685
2686 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2687 if (OMPI_SUCCESS != ret) {
2688 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2689 return ret;
2690 }
2691 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2692 if (OMPI_SUCCESS != ret) {
2693 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2694 return ret;
2695 }
2696 ompi_datatype_type_size(target_dt, &size);
2697 length = size * target_count;
2698
2699 md_offset = (ptl_size_t) result_addr;
2700
2701 request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2702
2703 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2704 "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
2705 ret = segmentedGet(&module->opcount,
2706 module->req_md_h,
2707 (ptl_size_t) md_offset + result_lb,
2708 length,
2709 module->fetch_atomic_max,
2710 peer,
2711 module->pt_idx,
2712 module->match_bits,
2713 target_offset + target_lb,
2714 request);
2715 if (OMPI_SUCCESS != ret) {
2716 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2717 return ret;
2718 }
2719 } else {
2720 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2721 "rget_accumulate: other-op contiguous"));
2722 ptl_size_t result_md_offset, origin_md_offset;
2723
2724 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2725 if (OMPI_SUCCESS != ret) {
2726 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2727 return ret;
2728 }
2729 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2730 if (OMPI_SUCCESS != ret) {
2731 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2732 return ret;
2733 }
2734 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2735 if (OMPI_SUCCESS != ret) {
2736 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2737 return ret;
2738 }
2739 ompi_datatype_type_size(origin_dt, &size);
2740 length = size * origin_count;
2741
2742 result_md_offset = (ptl_size_t) result_addr;
2743 origin_md_offset = (ptl_size_t) origin_addr;
2744
2745 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2746 if (OMPI_SUCCESS != ret) {
2747 opal_output(ompi_osc_base_framework.framework_output,
2748 "datatype is not currently supported");
2749 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2750 return OMPI_ERR_NOT_SUPPORTED;
2751 }
2752
2753 ret = ompi_osc_portals4_get_op(op, &ptl_op);
2754 if (OMPI_SUCCESS != ret) {
2755 opal_output(ompi_osc_base_framework.framework_output,
2756 "operation is not currently supported");
2757 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2758 return OMPI_ERR_NOT_SUPPORTED;
2759 }
2760
2761 request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2762
2763 ret = segmentedFetchAtomic(&module->opcount,
2764 module->req_md_h,
2765 result_md_offset + result_lb,
2766 module->md_h,
2767 origin_md_offset + origin_lb,
2768 length,
2769 module->fetch_atomic_max,
2770 peer,
2771 module->pt_idx,
2772 module->match_bits,
2773 target_offset + target_lb,
2774 request,
2775 ptl_op,
2776 ptl_dt);
2777 if (OMPI_SUCCESS != ret) {
2778 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2779 return ret;
2780 }
2781 }
2782 }
2783
2784 return OMPI_SUCCESS;
2785 }
2786
2787
2788 int
2789 ompi_osc_portals4_put(const void *origin_addr,
2790 int origin_count,
2791 struct ompi_datatype_t *origin_dt,
2792 int target,
2793 ptrdiff_t target_disp,
2794 int target_count,
2795 struct ompi_datatype_t *target_dt,
2796 struct ompi_win_t *win)
2797 {
2798 int ret;
2799 ompi_osc_portals4_module_t *module =
2800 (ompi_osc_portals4_module_t*) win->w_osc_module;
2801 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2802 size_t offset, size;
2803 ptrdiff_t length, origin_lb, target_lb, extent;
2804
2805 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2806 "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2807 (unsigned long) origin_addr, origin_count,
2808 origin_dt->name, target, (unsigned long) target_disp,
2809 target_count, target_dt->name,
2810 (unsigned long) win));
2811
2812 offset = get_displacement(module, target) * target_disp;
2813
2814 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2815 ret = put_to_noncontig(&module->opcount,
2816 module->md_h,
2817 origin_addr,
2818 origin_count,
2819 origin_dt,
2820 peer,
2821 target_count,
2822 target_dt,
2823 offset,
2824 module->pt_idx,
2825 module->match_bits,
2826 NULL);
2827 if (PTL_OK != ret) {
2828 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2829 "%s,%d put_to_noncontig() failed: ret = %d",
2830 __FUNCTION__, __LINE__, ret));
2831 return ret;
2832 }
2833 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2834 ret = put_from_iovec(module,
2835 origin_addr,
2836 origin_count,
2837 origin_dt,
2838 peer,
2839 target_count,
2840 target_dt,
2841 offset,
2842 module->pt_idx,
2843 module->match_bits,
2844 NULL);
2845 if (PTL_OK != ret) {
2846 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2847 "%s,%d put_from_iovec() failed: ret = %d",
2848 __FUNCTION__, __LINE__, ret));
2849 return ret;
2850 }
2851 } else {
2852 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2853 if (OMPI_SUCCESS != ret) {
2854 return ret;
2855 }
2856 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2857 if (OMPI_SUCCESS != ret) {
2858 return ret;
2859 }
2860 ompi_datatype_type_size(origin_dt, &size);
2861 length = size * origin_count;
2862
2863 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2864 "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2865 __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2866 ret = segmentedPut(&module->opcount,
2867 module->md_h,
2868 (ptl_size_t) origin_addr + origin_lb,
2869 length,
2870 mca_osc_portals4_component.ptl_max_msg_size,
2871 PTL_ACK_REQ,
2872 peer,
2873 module->pt_idx,
2874 module->match_bits,
2875 offset + target_lb,
2876 NULL,
2877 0);
2878 if (OMPI_SUCCESS != ret) {
2879 return ret;
2880 }
2881 }
2882
2883 return OMPI_SUCCESS;
2884 }
2885
2886
2887 int
2888 ompi_osc_portals4_get(void *origin_addr,
2889 int origin_count,
2890 struct ompi_datatype_t *origin_dt,
2891 int target,
2892 ptrdiff_t target_disp,
2893 int target_count,
2894 struct ompi_datatype_t *target_dt,
2895 struct ompi_win_t *win)
2896 {
2897 int ret;
2898 ompi_osc_portals4_module_t *module =
2899 (ompi_osc_portals4_module_t*) win->w_osc_module;
2900 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2901 size_t offset, size;
2902 ptrdiff_t length, origin_lb, target_lb, extent;
2903
2904 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2905 "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2906 (unsigned long) origin_addr, origin_count,
2907 origin_dt->name, target, (unsigned long) target_disp,
2908 target_count, target_dt->name,
2909 (unsigned long) win));
2910
2911 offset = get_displacement(module, target) * target_disp;
2912
2913 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2914 ret = get_from_noncontig(&module->opcount,
2915 module->md_h,
2916 origin_addr,
2917 origin_count,
2918 origin_dt,
2919 peer,
2920 target_count,
2921 target_dt,
2922 offset,
2923 module->pt_idx,
2924 module->match_bits,
2925 NULL);
2926 if (PTL_OK != ret) {
2927 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2928 "%s,%d get_from_noncontig() failed: ret = %d",
2929 __FUNCTION__, __LINE__, ret));
2930 return ret;
2931 }
2932 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2933 ret = get_to_iovec(module,
2934 origin_addr,
2935 origin_count,
2936 origin_dt,
2937 peer,
2938 target_count,
2939 target_dt,
2940 offset,
2941 module->pt_idx,
2942 module->match_bits,
2943 NULL);
2944 if (PTL_OK != ret) {
2945 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2946 "%s,%d get_to_iovec() failed: ret = %d",
2947 __FUNCTION__, __LINE__, ret));
2948 return ret;
2949 }
2950 } else {
2951 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2952 if (OMPI_SUCCESS != ret) {
2953 return ret;
2954 }
2955 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2956 if (OMPI_SUCCESS != ret) {
2957 return ret;
2958 }
2959 ompi_datatype_type_size(origin_dt, &size);
2960 length = size * origin_count;
2961
2962 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2963 "%s,%d Get", __FUNCTION__, __LINE__));
2964 ret = segmentedGet(&module->opcount,
2965 module->md_h,
2966 (ptl_size_t) origin_addr + origin_lb,
2967 length,
2968 mca_osc_portals4_component.ptl_max_msg_size,
2969 peer,
2970 module->pt_idx,
2971 module->match_bits,
2972 offset + target_lb,
2973 NULL);
2974 if (OMPI_SUCCESS != ret) {
2975 return ret;
2976 }
2977 }
2978
2979 return OMPI_SUCCESS;
2980 }
2981
2982
2983 int
2984 ompi_osc_portals4_accumulate(const void *origin_addr,
2985 int origin_count,
2986 struct ompi_datatype_t *origin_dt,
2987 int target,
2988 ptrdiff_t target_disp,
2989 int target_count,
2990 struct ompi_datatype_t *target_dt,
2991 struct ompi_op_t *op,
2992 struct ompi_win_t *win)
2993 {
2994 int ret;
2995 ompi_osc_portals4_module_t *module =
2996 (ompi_osc_portals4_module_t*) win->w_osc_module;
2997 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2998 size_t offset, size;
2999 ptl_op_t ptl_op;
3000 ptl_datatype_t ptl_dt;
3001 ptrdiff_t sent, length, origin_lb, target_lb, extent;
3002
3003 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3004 "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3005 (unsigned long) origin_addr, origin_count,
3006 origin_dt->name, target, (unsigned long) target_disp,
3007 target_count, target_dt->name,
3008 op->o_name,
3009 (unsigned long) win));
3010
3011 offset = get_displacement(module, target) * target_disp;
3012
3013 if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3014 if (MPI_REPLACE == op) {
3015 ret = atomic_put_to_noncontig(module,
3016 module->md_h,
3017 origin_addr,
3018 origin_count,
3019 origin_dt,
3020 peer,
3021 target_count,
3022 target_dt,
3023 offset,
3024 module->pt_idx,
3025 module->match_bits,
3026 NULL);
3027 if (PTL_OK != ret) {
3028 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3029 "%s,%d atomic_put_to_noncontig() failed: ret = %d",
3030 __FUNCTION__, __LINE__, ret));
3031 return ret;
3032 }
3033 } else {
3034 ret = atomic_to_noncontig(module,
3035 module->md_h,
3036 origin_addr,
3037 origin_count,
3038 origin_dt,
3039 peer,
3040 target_count,
3041 target_dt,
3042 offset,
3043 module->pt_idx,
3044 module->match_bits,
3045 op,
3046 NULL);
3047 if (PTL_OK != ret) {
3048 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3049 "%s,%d atomic_to_noncontig() failed: ret = %d",
3050 __FUNCTION__, __LINE__, ret));
3051 return ret;
3052 }
3053 }
3054 } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
3055 if (MPI_REPLACE == op) {
3056 ret = atomic_put_from_iovec(module,
3057 origin_addr,
3058 origin_count,
3059 origin_dt,
3060 peer,
3061 target_count,
3062 target_dt,
3063 offset,
3064 module->pt_idx,
3065 module->match_bits,
3066 NULL);
3067 if (PTL_OK != ret) {
3068 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3069 "%s,%d atomic_put_from_iovec() failed: ret = %d",
3070 __FUNCTION__, __LINE__, ret));
3071 return ret;
3072 }
3073 } else {
3074 ret = atomic_from_iovec(module,
3075 origin_addr,
3076 origin_count,
3077 origin_dt,
3078 peer,
3079 target_count,
3080 target_dt,
3081 offset,
3082 module->pt_idx,
3083 module->match_bits,
3084 op,
3085 NULL);
3086 if (PTL_OK != ret) {
3087 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3088 "%s,%d atomic_from_iovec() failed: ret = %d",
3089 __FUNCTION__, __LINE__, ret));
3090 return ret;
3091 }
3092 }
3093 } else {
3094 ptl_size_t md_offset;
3095
3096 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3097 if (OMPI_SUCCESS != ret) {
3098 return ret;
3099 }
3100 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3101 if (OMPI_SUCCESS != ret) {
3102 return ret;
3103 }
3104 ompi_datatype_type_size(origin_dt, &size);
3105 length = size * origin_count;
3106 sent = 0;
3107
3108 md_offset = (ptl_size_t) origin_addr;
3109
3110 if (MPI_REPLACE == op) {
3111 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3112 "%s,%d Put", __FUNCTION__, __LINE__));
3113 ret = segmentedPut(&module->opcount,
3114 module->md_h,
3115 md_offset + origin_lb,
3116 length,
3117 module->atomic_max,
3118 PTL_ACK_REQ,
3119 peer,
3120 module->pt_idx,
3121 module->match_bits,
3122 offset + target_lb,
3123 NULL,
3124 0);
3125 if (OMPI_SUCCESS != ret) {
3126 return ret;
3127 }
3128 } else {
3129 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3130 if (OMPI_SUCCESS != ret) {
3131 opal_output(ompi_osc_base_framework.framework_output,
3132 "datatype is not currently supported");
3133 return OMPI_ERR_NOT_SUPPORTED;
3134 }
3135 ret = ompi_osc_portals4_get_op(op, &ptl_op);
3136 if (OMPI_SUCCESS != ret) {
3137 opal_output(ompi_osc_base_framework.framework_output,
3138 "operation is not currently supported");
3139 return OMPI_ERR_NOT_SUPPORTED;
3140 }
3141 do {
3142 size_t msg_length = MIN(module->atomic_max, length - sent);
3143
3144 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3145
3146 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3147 "%s,%d Atomic", __FUNCTION__, __LINE__));
3148 ret = PtlAtomic(module->md_h,
3149 md_offset + sent + origin_lb,
3150 msg_length,
3151 PTL_ACK_REQ,
3152 peer,
3153 module->pt_idx,
3154 module->match_bits,
3155 offset + sent + target_lb,
3156 NULL,
3157 0,
3158 ptl_op,
3159 ptl_dt);
3160 if (OMPI_SUCCESS != ret) {
3161 (void)opal_atomic_add_fetch_64(&module->opcount, -1);
3162 return ret;
3163 }
3164 sent += msg_length;
3165 } while (sent < length);
3166 }
3167 }
3168
3169 return OMPI_SUCCESS;
3170 }
3171
3172
3173 int
3174 ompi_osc_portals4_get_accumulate(const void *origin_addr,
3175 int origin_count,
3176 struct ompi_datatype_t *origin_dt,
3177 void *result_addr,
3178 int result_count,
3179 struct ompi_datatype_t *result_dt,
3180 int target,
3181 ptrdiff_t target_disp,
3182 int target_count,
3183 struct ompi_datatype_t *target_dt,
3184 struct ompi_op_t *op,
3185 struct ompi_win_t *win)
3186 {
3187 int ret;
3188 ompi_osc_portals4_module_t *module =
3189 (ompi_osc_portals4_module_t*) win->w_osc_module;
3190 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3191 size_t target_offset, size;
3192 ptl_op_t ptl_op;
3193 ptl_datatype_t ptl_dt;
3194 ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
3195
3196 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3197 "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3198 (unsigned long) origin_addr, origin_count,
3199 origin_dt->name, (unsigned long) result_addr,
3200 result_count, result_dt->name,
3201 target, (unsigned long) target_disp,
3202 target_count, target_dt->name,
3203 op->o_name,
3204 (unsigned long) win));
3205
3206 target_offset = get_displacement(module, target) * target_disp;
3207
3208 if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3209 if (MPI_REPLACE == op) {
3210 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3211 "get_accumulate: MPI_REPLACE non-contiguous target"));
3212 ret = swap_from_noncontig(module,
3213 module->md_h,
3214 result_addr,
3215 result_count,
3216 result_dt,
3217 module->md_h,
3218 origin_addr,
3219 origin_count,
3220 origin_dt,
3221 peer,
3222 target_count,
3223 target_dt,
3224 target_offset,
3225 module->pt_idx,
3226 module->match_bits,
3227 NULL);
3228 if (PTL_OK != ret) {
3229 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3230 "%s,%d swap_from_noncontig() failed: ret = %d",
3231 __FUNCTION__, __LINE__, ret));
3232 return ret;
3233 }
3234 } else if (MPI_NO_OP == op) {
3235 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3236 "get_accumulate: MPI_NO_OP non-contiguous target"));
3237 ret = atomic_get_from_noncontig(module,
3238 module->md_h,
3239 result_addr,
3240 result_count,
3241 result_dt,
3242 peer,
3243 target_count,
3244 target_dt,
3245 target_offset,
3246 module->pt_idx,
3247 module->match_bits,
3248 NULL);
3249 if (PTL_OK != ret) {
3250 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3251 "%s,%d atomic_get_from_noncontig() failed: ret = %d",
3252 __FUNCTION__, __LINE__, ret));
3253 return ret;
3254 }
3255 } else {
3256 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3257 "get_accumulate: other-op non-contiguous target"));
3258 ret = fetch_atomic_from_noncontig(module,
3259 module->md_h,
3260 result_addr,
3261 result_count,
3262 result_dt,
3263 module->md_h,
3264 origin_addr,
3265 origin_count,
3266 origin_dt,
3267 peer,
3268 target_count,
3269 target_dt,
3270 target_offset,
3271 module->pt_idx,
3272 module->match_bits,
3273 op,
3274 NULL);
3275 if (PTL_OK != ret) {
3276 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3277 "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
3278 __FUNCTION__, __LINE__, ret));
3279 return ret;
3280 }
3281 }
3282 } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
3283 (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
3284 if (MPI_REPLACE == op) {
3285 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3286 "get_accumulate: MPI_REPLACE non-contiguous origin/result"));
3287 ret = swap_to_iovec(module,
3288 result_addr,
3289 result_count,
3290 result_dt,
3291 origin_addr,
3292 origin_count,
3293 origin_dt,
3294 peer,
3295 target_count,
3296 target_dt,
3297 target_offset,
3298 module->pt_idx,
3299 module->match_bits,
3300 NULL);
3301 if (PTL_OK != ret) {
3302 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3303 "%s,%d swap_to_iovec() failed: ret = %d",
3304 __FUNCTION__, __LINE__, ret));
3305 return ret;
3306 }
3307 } else if (MPI_NO_OP == op) {
3308 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3309 "get_accumulate: MPI_NO_OP non-contiguous origin/result"));
3310 ret = atomic_get_to_iovec(module,
3311 result_addr,
3312 result_count,
3313 result_dt,
3314 peer,
3315 target_count,
3316 target_dt,
3317 target_offset,
3318 module->pt_idx,
3319 module->match_bits,
3320 NULL);
3321 if (PTL_OK != ret) {
3322 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3323 "%s,%d atomic_get_to_iovec() failed: ret = %d",
3324 __FUNCTION__, __LINE__, ret));
3325 return ret;
3326 }
3327 } else {
3328 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3329 "get_accumulate: other-op non-contiguous origin/result"));
3330 ret = fetch_atomic_to_iovec(module,
3331 result_addr,
3332 result_count,
3333 result_dt,
3334 origin_addr,
3335 origin_count,
3336 origin_dt,
3337 peer,
3338 target_count,
3339 target_dt,
3340 target_offset,
3341 module->pt_idx,
3342 module->match_bits,
3343 op,
3344 NULL);
3345 if (PTL_OK != ret) {
3346 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3347 "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
3348 __FUNCTION__, __LINE__, ret));
3349 return ret;
3350 }
3351 }
3352 } else {
3353 if (MPI_REPLACE == op) {
3354 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3355 "get_accumulate: MPI_REPLACE contiguous"));
3356 ptl_size_t result_md_offset, origin_md_offset;
3357
3358 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3359 if (OMPI_SUCCESS != ret) {
3360 return ret;
3361 }
3362 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3363 if (OMPI_SUCCESS != ret) {
3364 return ret;
3365 }
3366 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3367 if (OMPI_SUCCESS != ret) {
3368 return ret;
3369 }
3370 ompi_datatype_type_size(origin_dt, &size);
3371 length = size * origin_count;
3372
3373 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3374 if (OMPI_SUCCESS != ret) {
3375 opal_output(ompi_osc_base_framework.framework_output,
3376 "MPI_Get_accumulate: datatype is not currently supported");
3377 return OMPI_ERR_NOT_SUPPORTED;
3378 }
3379
3380 result_md_offset = (ptl_size_t) result_addr;
3381 origin_md_offset = (ptl_size_t) origin_addr;
3382
3383 ret = segmentedSwap(&module->opcount,
3384 module->md_h,
3385 result_md_offset + result_lb,
3386 module->md_h,
3387 origin_md_offset + origin_lb,
3388 length,
3389 module->fetch_atomic_max,
3390 peer,
3391 module->pt_idx,
3392 module->match_bits,
3393 target_offset + target_lb,
3394 NULL,
3395 ptl_dt);
3396 if (OMPI_SUCCESS != ret) {
3397 return ret;
3398 }
3399 } else if (MPI_NO_OP == op) {
3400 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3401 "get_accumulate: MPI_NO_OP contiguous"));
3402 ptl_size_t md_offset;
3403
3404 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3405 if (OMPI_SUCCESS != ret) {
3406 return ret;
3407 }
3408 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3409 if (OMPI_SUCCESS != ret) {
3410 return ret;
3411 }
3412 ompi_datatype_type_size(target_dt, &size);
3413 length = size * target_count;
3414
3415 md_offset = (ptl_size_t) result_addr;
3416
3417 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3418 "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
3419 ret = segmentedGet(&module->opcount,
3420 module->md_h,
3421 (ptl_size_t) md_offset + result_lb,
3422 length,
3423 module->fetch_atomic_max,
3424 peer,
3425 module->pt_idx,
3426 module->match_bits,
3427 target_offset + target_lb,
3428 NULL);
3429 if (OMPI_SUCCESS != ret) {
3430 return ret;
3431 }
3432 } else {
3433 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3434 "get_accumulate: other-op contiguous"));
3435 ptl_size_t result_md_offset, origin_md_offset;
3436
3437 ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3438 if (OMPI_SUCCESS != ret) {
3439 return ret;
3440 }
3441 ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3442 if (OMPI_SUCCESS != ret) {
3443 return ret;
3444 }
3445 ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3446 if (OMPI_SUCCESS != ret) {
3447 return ret;
3448 }
3449 ompi_datatype_type_size(origin_dt, &size);
3450 length = size * origin_count;
3451
3452 result_md_offset = (ptl_size_t) result_addr;
3453 origin_md_offset = (ptl_size_t) origin_addr;
3454
3455 ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3456 if (OMPI_SUCCESS != ret) {
3457 opal_output(ompi_osc_base_framework.framework_output,
3458 "MPI_Get_accumulate: datatype is not currently supported");
3459 return OMPI_ERR_NOT_SUPPORTED;
3460 }
3461
3462 ret = ompi_osc_portals4_get_op(op, &ptl_op);
3463 if (OMPI_SUCCESS != ret) {
3464 opal_output(ompi_osc_base_framework.framework_output,
3465 "MPI_Get_accumulate: operation is not currently supported");
3466 return OMPI_ERR_NOT_SUPPORTED;
3467 }
3468
3469 ret = segmentedFetchAtomic(&module->opcount,
3470 module->md_h,
3471 result_md_offset + result_lb,
3472 module->md_h,
3473 origin_md_offset + origin_lb,
3474 length,
3475 module->fetch_atomic_max,
3476 peer,
3477 module->pt_idx,
3478 module->match_bits,
3479 target_offset + target_lb,
3480 NULL,
3481 ptl_op,
3482 ptl_dt);
3483 if (OMPI_SUCCESS != ret) {
3484 return ret;
3485 }
3486 }
3487 }
3488
3489 return OMPI_SUCCESS;
3490 }
3491
3492
3493 int
3494 ompi_osc_portals4_compare_and_swap(const void *origin_addr,
3495 const void *compare_addr,
3496 void *result_addr,
3497 struct ompi_datatype_t *dt,
3498 int target,
3499 ptrdiff_t target_disp,
3500 struct ompi_win_t *win)
3501 {
3502 int ret;
3503 ompi_osc_portals4_module_t *module =
3504 (ompi_osc_portals4_module_t*) win->w_osc_module;
3505 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3506 size_t length;
3507 size_t offset;
3508 ptl_datatype_t ptl_dt;
3509 ptl_size_t result_md_offset, origin_md_offset;
3510
3511 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3512 "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx",
3513 (unsigned long) origin_addr,
3514 (unsigned long) compare_addr,
3515 (unsigned long) result_addr,
3516 dt->name, target, (unsigned long) target_disp,
3517 (unsigned long) win));
3518
3519 ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3520 if (OMPI_SUCCESS != ret) {
3521 opal_output(ompi_osc_base_framework.framework_output,
3522 "MPI_Compare_and_swap: datatype is not currently supported");
3523 return OMPI_ERR_NOT_SUPPORTED;
3524 }
3525
3526 offset = get_displacement(module, target) * target_disp;
3527
3528 ret = ompi_datatype_type_size(dt, &length);
3529 if (OMPI_SUCCESS != ret) return ret;
3530
3531 assert(length <= module->fetch_atomic_max);
3532
3533 result_md_offset = (ptl_size_t) result_addr;
3534 origin_md_offset = (ptl_size_t) origin_addr;
3535
3536 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3537
3538 OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3539 "%s,%d Swap", __FUNCTION__, __LINE__));
3540 ret = PtlSwap(module->md_h,
3541 result_md_offset,
3542 module->md_h,
3543 origin_md_offset,
3544 length,
3545 peer,
3546 module->pt_idx,
3547 module->match_bits,
3548 offset,
3549 NULL,
3550 0,
3551 compare_addr,
3552 PTL_CSWAP,
3553 ptl_dt);
3554 if (OMPI_SUCCESS != ret) {
3555 return ret;
3556 }
3557
3558 return OMPI_SUCCESS;
3559 }
3560
3561
3562 int
3563 ompi_osc_portals4_fetch_and_op(const void *origin_addr,
3564 void *result_addr,
3565 struct ompi_datatype_t *dt,
3566 int target,
3567 ptrdiff_t target_disp,
3568 struct ompi_op_t *op,
3569 struct ompi_win_t *win)
3570 {
3571 int ret;
3572 ompi_osc_portals4_module_t *module =
3573 (ompi_osc_portals4_module_t*) win->w_osc_module;
3574 ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3575 size_t length;
3576 size_t offset;
3577 ptl_op_t ptl_op;
3578 ptl_datatype_t ptl_dt;
3579
3580 OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3581 "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx",
3582 (unsigned long) origin_addr,
3583 (unsigned long) result_addr,
3584 dt->name, target, (unsigned long) target_disp,
3585 op->o_name,
3586 (unsigned long) win));
3587
3588 ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3589 if (OMPI_SUCCESS != ret) {
3590 opal_output(ompi_osc_base_framework.framework_output,
3591 "MPI_Fetch_and_op: datatype is not currently supported");
3592 return OMPI_ERR_NOT_SUPPORTED;
3593 }
3594
3595 offset = get_displacement(module, target) * target_disp;
3596
3597 ret = ompi_datatype_type_size(dt, &length);
3598 if (OMPI_SUCCESS != ret) return ret;
3599
3600 assert(length <= module->fetch_atomic_max);
3601
3602 if (MPI_REPLACE == op) {
3603 ptl_size_t result_md_offset, origin_md_offset;
3604
3605 result_md_offset = (ptl_size_t) result_addr;
3606 origin_md_offset = (ptl_size_t) origin_addr;
3607
3608 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3609 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3610 "%s,%d Swap", __FUNCTION__, __LINE__));
3611 ret = PtlSwap(module->md_h,
3612 result_md_offset,
3613 module->md_h,
3614 origin_md_offset,
3615 length,
3616 peer,
3617 module->pt_idx,
3618 module->match_bits,
3619 offset,
3620 NULL,
3621 0,
3622 NULL,
3623 PTL_SWAP,
3624 ptl_dt);
3625 } else if (MPI_NO_OP == op) {
3626 ptl_size_t md_offset;
3627
3628 md_offset = (ptl_size_t) result_addr;
3629
3630 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3631 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3632 "%s,%d Get", __FUNCTION__, __LINE__));
3633 ret = PtlGet(module->md_h,
3634 md_offset,
3635 length,
3636 peer,
3637 module->pt_idx,
3638 module->match_bits,
3639 offset,
3640 NULL);
3641 } else {
3642 ptl_size_t result_md_offset, origin_md_offset;
3643 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3644
3645 ret = ompi_osc_portals4_get_op(op, &ptl_op);
3646 if (OMPI_SUCCESS != ret) {
3647 opal_output(ompi_osc_base_framework.framework_output,
3648 "MPI_Fetch_and_op: operation is not currently supported");
3649 return OMPI_ERR_NOT_SUPPORTED;
3650 }
3651
3652 result_md_offset = (ptl_size_t) result_addr;
3653 origin_md_offset = (ptl_size_t) origin_addr;
3654
3655 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3656 "%s,%d FetchAtomic", __FUNCTION__, __LINE__));
3657 ret = PtlFetchAtomic(module->md_h,
3658 result_md_offset,
3659 module->md_h,
3660 origin_md_offset,
3661 length,
3662 peer,
3663 module->pt_idx,
3664 module->match_bits,
3665 offset,
3666 NULL,
3667 0,
3668 ptl_op,
3669 ptl_dt);
3670 }
3671 if (OMPI_SUCCESS != ret) {
3672 return ret;
3673 }
3674
3675 return OMPI_SUCCESS;
3676 }