root/ompi/mca/osc/portals4/osc_portals4_comm.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ompi_osc_portals4_get_op
  2. get_sized_type
  3. ompi_osc_portals4_get_dt
  4. number_of_fragments
  5. segmentedPut
  6. segmentedGet
  7. segmentedAtomic
  8. segmentedFetchAtomic
  9. segmentedSwap
  10. create_iov_list
  11. get_to_iovec
  12. atomic_get_to_iovec
  13. put_from_iovec
  14. atomic_put_from_iovec
  15. atomic_from_iovec
  16. swap_to_iovec
  17. fetch_atomic_to_iovec
  18. put_to_noncontig
  19. atomic_put_to_noncontig
  20. atomic_to_noncontig
  21. get_from_noncontig
  22. atomic_get_from_noncontig
  23. swap_from_noncontig
  24. fetch_atomic_from_noncontig
  25. ompi_osc_portals4_rput
  26. ompi_osc_portals4_rget
  27. ompi_osc_portals4_raccumulate
  28. ompi_osc_portals4_rget_accumulate
  29. ompi_osc_portals4_put
  30. ompi_osc_portals4_get
  31. ompi_osc_portals4_accumulate
  32. ompi_osc_portals4_get_accumulate
  33. ompi_osc_portals4_compare_and_swap
  34. ompi_osc_portals4_fetch_and_op

   1 /*
   2  * Copyright (c) 2011-2017 Sandia National Laboratories.  All rights reserved.
   3  * Copyright (c) 2014      The University of Tennessee and The University
   4  *                         of Tennessee Research Foundation.  All rights
   5  *                         reserved.
   6  * Copyright (c) 2015-2017 Research Organization for Information Science
   7  *                         and Technology (RIST). All rights reserved.
   8  * $COPYRIGHT$
   9  *
  10  * Additional copyrights may follow
  11  *
  12  * $HEADER$
  13  */
  14 
  15 #include "ompi_config.h"
  16 
  17 #include "ompi/mca/osc/osc.h"
  18 #include "ompi/mca/osc/base/base.h"
  19 #include "ompi/mca/osc/base/osc_base_obj_convert.h"
  20 
  21 #include "osc_portals4.h"
  22 #include "osc_portals4_request.h"
  23 
  24 
  25 static int
  26 ompi_osc_portals4_get_op(struct ompi_op_t *op, ptl_op_t *ptl_op)
  27 {
  28     if (MPI_MAX == op) {
  29         *ptl_op = PTL_MAX;
  30     } else if (MPI_MIN == op) {
  31         *ptl_op = PTL_MIN;
  32     } else if (MPI_SUM == op) {
  33         *ptl_op = PTL_SUM;
  34     } else if (MPI_PROD == op) {
  35         *ptl_op = PTL_PROD;
  36     } else if (MPI_LAND == op) {
  37         *ptl_op = PTL_LAND;
  38     } else if (MPI_BAND == op) {
  39         *ptl_op = PTL_BAND;
  40     } else if (MPI_LOR == op) {
  41         *ptl_op = PTL_LOR;
  42     } else if (MPI_BOR == op) {
  43         *ptl_op = PTL_BOR;
  44     } else if (MPI_LXOR == op) {
  45         *ptl_op = PTL_LXOR;
  46     } else if (MPI_BXOR == op) {
  47         *ptl_op = PTL_BXOR;
  48     } else {
  49         return OMPI_ERROR;
  50     }
  51 
  52     return OMPI_SUCCESS;
  53 }
  54 
  55 
  56 static int
  57 get_sized_type(bool sign, size_t size, ptl_datatype_t *ptl_dt)
  58 {
  59     if (sign) {
  60         switch (size) {
  61         case 1:
  62             *ptl_dt = PTL_INT8_T;
  63             break;
  64         case 2:
  65             *ptl_dt = PTL_INT16_T;
  66             break;
  67         case 4:
  68             *ptl_dt = PTL_INT32_T;
  69             break;
  70         case 8:
  71             *ptl_dt = PTL_INT64_T;
  72             break;
  73         default:
  74             return OMPI_ERROR;
  75         }
  76     } else {
  77         switch (size) {
  78         case 1:
  79             *ptl_dt = PTL_UINT8_T;
  80             break;
  81         case 2:
  82             *ptl_dt = PTL_UINT16_T;
  83             break;
  84         case 4:
  85             *ptl_dt = PTL_UINT32_T;
  86             break;
  87         case 8:
  88             *ptl_dt = PTL_UINT64_T;
  89             break;
  90         default:
  91             return OMPI_ERROR;
  92         }
  93     }
  94 
  95     return OMPI_SUCCESS;
  96 }
  97 
  98 
  99 static int
 100 ompi_osc_portals4_get_dt(struct ompi_datatype_t *dt, ptl_datatype_t *ptl_dt)
 101 {
 102     ompi_datatype_t *base_dt = ompi_datatype_get_single_predefined_type_from_args(dt);
 103 
 104     if (MPI_BYTE == base_dt) {
 105         *ptl_dt = PTL_INT8_T;
 106     } else if (MPI_CHAR == base_dt) {
 107         *ptl_dt = PTL_INT8_T;
 108     } else if (MPI_SHORT == base_dt) {
 109         return get_sized_type(true, sizeof(short), ptl_dt);
 110     } else if (MPI_INT == base_dt) {
 111         return get_sized_type(true, sizeof(int), ptl_dt);
 112     } else if (MPI_LONG == base_dt) {
 113         return get_sized_type(true, sizeof(long), ptl_dt);
 114     } else if (MPI_FLOAT == base_dt) {
 115         *ptl_dt = PTL_FLOAT;
 116     } else if (MPI_DOUBLE == base_dt) {
 117         *ptl_dt = PTL_DOUBLE;
 118     } else if (MPI_LONG_DOUBLE == base_dt) {
 119         *ptl_dt = PTL_LONG_DOUBLE;
 120     } else if (MPI_UNSIGNED_CHAR == base_dt) {
 121         *ptl_dt = PTL_UINT8_T;
 122     } else if (MPI_SIGNED_CHAR == base_dt) {
 123         *ptl_dt = PTL_UINT8_T;
 124     } else if (MPI_UNSIGNED_SHORT == base_dt) {
 125         return get_sized_type(false, sizeof(short), ptl_dt);
 126     } else if (MPI_UNSIGNED_LONG == base_dt) {
 127         return get_sized_type(false, sizeof(long), ptl_dt);
 128     } else if (MPI_UNSIGNED == base_dt) {
 129         return get_sized_type(false, sizeof(int), ptl_dt);
 130     } else if (MPI_LONG_LONG_INT == base_dt) {
 131         return get_sized_type(true, sizeof(long long int), ptl_dt);
 132     } else if (MPI_LONG_LONG == base_dt) {
 133         return get_sized_type(true, sizeof(long long), ptl_dt);
 134     } else if (MPI_INT8_T == base_dt) {
 135         *ptl_dt = PTL_INT8_T;
 136     } else if (MPI_UINT8_T == base_dt) {
 137         *ptl_dt = PTL_UINT8_T;
 138     } else if (MPI_INT16_T == base_dt) {
 139         *ptl_dt = PTL_INT16_T;
 140     } else if (MPI_UINT16_T == base_dt) {
 141         *ptl_dt = PTL_UINT16_T;
 142     } else if (MPI_INT32_T == base_dt) {
 143         *ptl_dt = PTL_INT32_T;
 144     } else if (MPI_UINT32_T == base_dt) {
 145         *ptl_dt = PTL_UINT32_T;
 146     } else if (MPI_INT64_T == base_dt) {
 147         *ptl_dt = PTL_INT64_T;
 148     } else if (MPI_UINT64_T == base_dt) {
 149         *ptl_dt = PTL_UINT64_T;
 150     } else if (MPI_C_COMPLEX == base_dt) {
 151         *ptl_dt = PTL_DOUBLE_COMPLEX;
 152     } else if (MPI_C_FLOAT_COMPLEX == base_dt) {
 153         *ptl_dt = PTL_FLOAT_COMPLEX;
 154     } else if (MPI_C_DOUBLE_COMPLEX == base_dt) {
 155         *ptl_dt = PTL_DOUBLE_COMPLEX;
 156     } else if (MPI_C_LONG_DOUBLE_COMPLEX == base_dt) {
 157         *ptl_dt = PTL_LONG_DOUBLE_COMPLEX;
 158     } else if (MPI_AINT == base_dt) {
 159         if (sizeof(MPI_Aint) == 2) {
 160             *ptl_dt = PTL_UINT16_T;
 161         } else if (sizeof(MPI_Aint) == 4) {
 162             *ptl_dt = PTL_UINT32_T;
 163         } else if (sizeof(MPI_Aint) == 8) {
 164             *ptl_dt = PTL_UINT64_T;
 165         }
 166     } else {
 167         return OMPI_ERROR;
 168     }
 169 
 170     return 0;
 171 }
 172 
 173 static  ptl_size_t
 174 number_of_fragments(ptl_size_t length, ptl_size_t maxlength)
 175 {
 176     ptl_size_t nb_frag = length == 0 ? 1 : (length - 1) / maxlength + 1;
 177     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 178                          "%s,%d : %ld fragment(s)", __FUNCTION__, __LINE__, nb_frag));
 179     return nb_frag;
 180 }
 181 
 182 /* put in segments no larger than segment_length */
 183 static int
 184 segmentedPut(opal_atomic_int64_t *opcount,
 185              ptl_handle_md_t md_h,
 186              ptl_size_t origin_offset,
 187              ptl_size_t put_length,
 188              ptl_size_t segment_length,
 189              ptl_ack_req_t ack_req,
 190              ptl_process_t target_id,
 191              ptl_pt_index_t pt_index,
 192              ptl_match_bits_t match_bits,
 193              ptl_size_t target_offset,
 194              void *user_ptr,
 195              ptl_hdr_data_t hdr_data)
 196 {
 197     int ret;
 198     ptl_size_t bytes_put = 0;
 199 
 200     do {
 201         opal_atomic_add_fetch_64(opcount, 1);
 202 
 203         ptl_size_t frag_length = MIN(put_length, segment_length);
 204         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 205                              "Put size : %lu/%lu, offset:%lu", frag_length, put_length, bytes_put));
 206         ret = PtlPut(md_h,
 207                      origin_offset + bytes_put,
 208                      frag_length,
 209                      ack_req,
 210                      target_id,
 211                      pt_index,
 212                      match_bits,
 213                      target_offset + bytes_put,
 214                      user_ptr,
 215                      hdr_data);
 216         if (PTL_OK != ret) {
 217             opal_atomic_add_fetch_64(opcount, -1);
 218             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 219                                  "%s:%d PtlPut failed with return value %d",
 220                                  __FUNCTION__, __LINE__, ret);
 221             return ret;
 222         }
 223         put_length -= frag_length;
 224         bytes_put += frag_length;
 225     } while (put_length);
 226     return PTL_OK;
 227 }
 228 
 229 /* get in segments no larger than segment_length */
 230 static int
 231 segmentedGet(opal_atomic_int64_t *opcount,
 232              ptl_handle_md_t md_h,
 233              ptl_size_t origin_offset,
 234              ptl_size_t get_length,
 235              ptl_size_t segment_length,
 236              ptl_process_t target_id,
 237              ptl_pt_index_t pt_index,
 238              ptl_match_bits_t match_bits,
 239              ptl_size_t target_offset,
 240              void *user_ptr)
 241 {
 242     int ret;
 243     ptl_size_t bytes_gotten = 0;
 244 
 245     do {
 246         opal_atomic_add_fetch_64(opcount, 1);
 247 
 248         ptl_size_t frag_length = MIN(get_length, segment_length);
 249         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 250                              "Get size : %lu/%lu, offset:%lu", frag_length, get_length, bytes_gotten));
 251 
 252         ret = PtlGet(md_h,
 253                      (ptl_size_t) origin_offset + bytes_gotten,
 254                      frag_length,
 255                      target_id,
 256                      pt_index,
 257                      match_bits,
 258                      target_offset + bytes_gotten,
 259                      user_ptr);
 260         if (PTL_OK != ret) {
 261             opal_atomic_add_fetch_64(opcount, -1);
 262             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 263                                  "%s:%d PtlGet failed with return value %d",
 264                                  __FUNCTION__, __LINE__, ret);
 265             return ret;
 266         }
 267         get_length -= frag_length;
 268         bytes_gotten += frag_length;
 269     } while (get_length);
 270     return PTL_OK;
 271 }
 272 
 273 /* atomic op in segments no larger than segment_length */
 274 static int
 275 segmentedAtomic(opal_atomic_int64_t *opcount,
 276                 ptl_handle_md_t md_h,
 277                 ptl_size_t origin_offset,
 278                 ptl_size_t length,
 279                 ptl_size_t segment_length,
 280                 ptl_process_t target_id,
 281                 ptl_pt_index_t pt_index,
 282                 ptl_match_bits_t match_bits,
 283                 ptl_size_t target_offset,
 284                 void *user_ptr,
 285                 ptl_op_t ptl_op,
 286                 ptl_datatype_t ptl_dt)
 287 {
 288     int ret;
 289     ptl_size_t sent = 0;
 290 
 291     do {
 292         opal_atomic_add_fetch_64(opcount, 1);
 293 
 294         ptl_size_t frag_length = MIN(length, segment_length);
 295         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 296                              "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
 297         ret = PtlAtomic(md_h,
 298                         (ptl_size_t) origin_offset + sent,
 299                         frag_length,
 300                         PTL_ACK_REQ,
 301                         target_id,
 302                         pt_index,
 303                         match_bits,
 304                         target_offset + sent,
 305                         user_ptr,
 306                         0,
 307                         ptl_op,
 308                         ptl_dt);
 309         if (PTL_OK != ret) {
 310             opal_atomic_add_fetch_64(opcount, -1);
 311             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 312                                  "%s:%d PtlAtomic failed with return value %d",
 313                                  __FUNCTION__, __LINE__, ret);
 314             return ret;
 315         }
 316         length -= frag_length;
 317         sent += frag_length;
 318     } while (length);
 319     return PTL_OK;
 320 }
 321 
 322 /* atomic op in segments no larger than segment_length */
 323 static int
 324 segmentedFetchAtomic(opal_atomic_int64_t *opcount,
 325                      ptl_handle_md_t result_md_h,
 326                      ptl_size_t result_offset,
 327                      ptl_handle_md_t origin_md_h,
 328                      ptl_size_t origin_offset,
 329                      ptl_size_t length,
 330                      ptl_size_t segment_length,
 331                      ptl_process_t target_id,
 332                      ptl_pt_index_t pt_index,
 333                      ptl_match_bits_t match_bits,
 334                      ptl_size_t target_offset,
 335                      void *user_ptr,
 336                      ptl_op_t ptl_op,
 337                      ptl_datatype_t ptl_dt)
 338 {
 339     int ret;
 340     ptl_size_t sent = 0;
 341 
 342     do {
 343         opal_atomic_add_fetch_64(opcount, 1);
 344 
 345         ptl_size_t frag_length = MIN(length, segment_length);
 346         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 347                              "Atomic size : %lu/%lu, offset:%lu", frag_length, length, sent));
 348         ret = PtlFetchAtomic(result_md_h,
 349                              result_offset + sent,
 350                              origin_md_h,
 351                              origin_offset + sent,
 352                              frag_length,
 353                              target_id,
 354                              pt_index,
 355                              match_bits,
 356                              target_offset + sent,
 357                              user_ptr,
 358                              0,
 359                              ptl_op,
 360                              ptl_dt);
 361         if (PTL_OK != ret) {
 362             opal_atomic_add_fetch_64(opcount, -1);
 363             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 364                                  "%s:%d PtlFetchAtomic failed with return value %d",
 365                                  __FUNCTION__, __LINE__, ret);
 366             return ret;
 367         }
 368         length -= frag_length;
 369         sent += frag_length;
 370     } while (length);
 371     return PTL_OK;
 372 }
 373 
 374 /* swap in segments no larger than segment_length */
 375 static int
 376 segmentedSwap(opal_atomic_int64_t *opcount,
 377               ptl_handle_md_t result_md_h,
 378               ptl_size_t result_offset,
 379               ptl_handle_md_t origin_md_h,
 380               ptl_size_t origin_offset,
 381               ptl_size_t length,
 382               ptl_size_t segment_length,
 383               ptl_process_t target_id,
 384               ptl_pt_index_t pt_index,
 385               ptl_match_bits_t match_bits,
 386               ptl_size_t target_offset,
 387               void *user_ptr,
 388               ptl_datatype_t ptl_dt)
 389 {
 390     int ret;
 391     ptl_size_t sent = 0;
 392 
 393     do {
 394         opal_atomic_add_fetch_64(opcount, 1);
 395 
 396         ptl_size_t frag_length = MIN(length, segment_length);
 397         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 398                              "Swap size : %lu/%lu, offset:%lu", frag_length, length, sent));
 399         ret = PtlSwap(result_md_h,
 400                       result_offset + sent,
 401                       origin_md_h,
 402                       (ptl_size_t) origin_offset + sent,
 403                       frag_length,
 404                       target_id,
 405                       pt_index,
 406                       match_bits,
 407                       target_offset + sent,
 408                       user_ptr,
 409                       0,
 410                       NULL,
 411                       PTL_SWAP,
 412                       ptl_dt);
 413         if (PTL_OK != ret) {
 414             opal_atomic_add_fetch_64(opcount, -1);
 415             opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 416                                  "%s:%d PtlSwap failed with return value %d",
 417                                  __FUNCTION__, __LINE__, ret);
 418             return ret;
 419         }
 420         length -= frag_length;
 421         sent += frag_length;
 422     } while (length);
 423     return PTL_OK;
 424 }
 425 
 426 static int
 427 create_iov_list(const void       *address,
 428                 int               count,
 429                 ompi_datatype_t  *datatype,
 430                 ptl_iovec_t     **ptl_iovec,
 431                 ptl_size_t       *ptl_iovec_count)
 432 {
 433     struct iovec iov[OSC_PORTALS4_IOVEC_MAX];
 434     opal_convertor_t convertor;
 435     uint32_t iov_count;
 436     uint32_t iov_index, ptl_iovec_index;
 437     /* needed for opal_convertor_raw but not used */
 438     size_t size;
 439     int ret;
 440     bool done;
 441 
 442     OBJ_CONSTRUCT(&convertor, opal_convertor_t);
 443     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &datatype->super, count,
 444                                                     address, 0, &convertor);
 445     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
 446         return ret;
 447     }
 448 
 449 
 450     *ptl_iovec_count = 0;
 451     ptl_iovec_index = 0;
 452     do {
 453         /* decode segments of the data */
 454         iov_count = OSC_PORTALS4_IOVEC_MAX;
 455         iov_index = 0;
 456 
 457         /* opal_convertor_raw returns done when it has reached the end of the data */
 458         done = opal_convertor_raw (&convertor, iov, &iov_count, &size);
 459 
 460         *ptl_iovec_count += iov_count;
 461         *ptl_iovec = (ptl_iovec_t *)realloc(*ptl_iovec, *ptl_iovec_count * sizeof(ptl_iovec_t));
 462 
 463         while (iov_index != iov_count) {
 464             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 465                                  "adding iov[%d].[%p,%lu] to ptl_iovec", iov_index, iov[iov_index].iov_base, iov[iov_index].iov_len));
 466             (*ptl_iovec)[ptl_iovec_index].iov_base = iov[iov_index].iov_base;
 467             (*ptl_iovec)[ptl_iovec_index].iov_len  = iov[iov_index].iov_len;
 468 
 469             ptl_iovec_index++;
 470             iov_index++;
 471         }
 472 
 473         assert(*ptl_iovec_count == ptl_iovec_index);
 474     } while (!done);
 475 
 476     return OMPI_SUCCESS;
 477 
 478 }
 479 
 480 /* get from a contiguous remote to an iovec local */
 481 static int
 482 get_to_iovec(ompi_osc_portals4_module_t *module,
 483              const void       *origin_address,
 484              int               origin_count,
 485              ompi_datatype_t  *origin_datatype,
 486              ptl_process_t     peer,
 487              int               target_count,
 488              ompi_datatype_t  *target_datatype,
 489              size_t            offset,
 490              ptl_pt_index_t    pt_index,
 491              ptl_match_bits_t  match_bits,
 492              void             *user_ptr)
 493 {
 494     int ret;
 495     size_t size;
 496     ptrdiff_t length, origin_lb, target_lb, extent;
 497     ptl_md_t md;
 498 
 499     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
 500         PtlMDRelease(module->origin_iovec_md_h);
 501         free(module->origin_iovec_list);
 502         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
 503         module->origin_iovec_list = NULL;
 504     }
 505 
 506     ptl_size_t iovec_count=0;
 507     create_iov_list(
 508         origin_address,
 509         origin_count,
 510         origin_datatype,
 511         &module->origin_iovec_list,
 512         &iovec_count);
 513 
 514     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
 515     if (OMPI_SUCCESS != ret) {
 516         return ret;
 517     }
 518     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
 519     if (OMPI_SUCCESS != ret) {
 520         return ret;
 521     }
 522     ompi_datatype_type_size(origin_datatype, &size);
 523     length = size * origin_count;
 524 
 525     md.start = module->origin_iovec_list;
 526     md.length = iovec_count;
 527     if (user_ptr) {
 528         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 529     } else {
 530         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 531     }
 532     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 533     md.ct_handle = module->ct_h;
 534     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
 535     if (PTL_OK != ret) {
 536         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 537                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 538                             __FILE__, __LINE__, ret);
 539         return ret;
 540     }
 541 
 542     opal_atomic_add_fetch_64(&module->opcount, 1);
 543 
 544     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 545                  "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
 546                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
 547     ret = PtlGet(module->origin_iovec_md_h,
 548                  (ptl_size_t) origin_lb,
 549                  length,
 550                  peer,
 551                  module->pt_idx,
 552                  module->match_bits,
 553                  offset + target_lb,
 554                  user_ptr);
 555     if (PTL_OK != ret) {
 556         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 557                      "%s,%d PtlGet() failed: ret = %d",
 558                      __FUNCTION__, __LINE__, ret));
 559         opal_atomic_add_fetch_64(&module->opcount, -1);
 560         return ret;
 561     }
 562 
 563     return OMPI_SUCCESS;
 564 }
 565 
 566 /* get to an iovec MD from a contiguous target using fragments no larger
 567  * than max_fetch_atomic_size to guarantee atomic writes at the origin */
 568 static int
 569 atomic_get_to_iovec(ompi_osc_portals4_module_t *module,
 570                     const void       *origin_address,
 571                     int               origin_count,
 572                     ompi_datatype_t  *origin_datatype,
 573                     ptl_process_t     peer,
 574                     int               target_count,
 575                     ompi_datatype_t  *target_datatype,
 576                     size_t            offset,
 577                     ptl_pt_index_t    pt_index,
 578                     ptl_match_bits_t  match_bits,
 579                     void             *user_ptr)
 580 {
 581     int ret;
 582     size_t size;
 583     ptrdiff_t length, origin_lb, target_lb, extent;
 584     ptl_md_t md;
 585 
 586     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
 587         PtlMDRelease(module->origin_iovec_md_h);
 588         free(module->origin_iovec_list);
 589         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
 590         module->origin_iovec_list = NULL;
 591     }
 592 
 593     ptl_size_t iovec_count=0;
 594     create_iov_list(
 595         origin_address,
 596         origin_count,
 597         origin_datatype,
 598         &module->origin_iovec_list,
 599         &iovec_count);
 600 
 601     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
 602     if (OMPI_SUCCESS != ret) {
 603         return ret;
 604     }
 605     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
 606     if (OMPI_SUCCESS != ret) {
 607         return ret;
 608     }
 609     ompi_datatype_type_size(origin_datatype, &size);
 610     length = size * origin_count;
 611 
 612     md.start = module->origin_iovec_list;
 613     md.length = iovec_count;
 614     if (user_ptr) {
 615         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 616     } else {
 617         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 618     }
 619     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 620     md.ct_handle = module->ct_h;
 621     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
 622     if (PTL_OK != ret) {
 623         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 624                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 625                             __FILE__, __LINE__, ret);
 626         return ret;
 627     }
 628 
 629     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 630                  "%s,%d Get(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
 631                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
 632     ret = segmentedGet(&module->opcount,
 633                        module->origin_iovec_md_h,
 634                        (ptl_size_t) origin_lb,
 635                        length,
 636                        module->fetch_atomic_max,
 637                        peer,
 638                        module->pt_idx,
 639                        module->match_bits,
 640                        offset + target_lb,
 641                        user_ptr);
 642     if (PTL_OK != ret) {
 643         return ret;
 644     }
 645 
 646     return OMPI_SUCCESS;
 647 }
 648 
 649 /* put from an iovec MD into a contiguous target */
 650 static int
 651 put_from_iovec(ompi_osc_portals4_module_t *module,
 652                const void       *origin_address,
 653                int               origin_count,
 654                ompi_datatype_t  *origin_datatype,
 655                ptl_process_t     peer,
 656                int               target_count,
 657                ompi_datatype_t  *target_datatype,
 658                size_t            offset,
 659                ptl_pt_index_t    pt_index,
 660                ptl_match_bits_t  match_bits,
 661                void             *user_ptr)
 662 {
 663     int ret;
 664     size_t size;
 665     ptrdiff_t length, origin_lb, target_lb, extent;
 666     ptl_md_t md;
 667 
 668     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
 669         PtlMDRelease(module->origin_iovec_md_h);
 670         free(module->origin_iovec_list);
 671         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
 672         module->origin_iovec_list = NULL;
 673     }
 674 
 675     ptl_size_t iovec_count=0;
 676     create_iov_list(
 677         origin_address,
 678         origin_count,
 679         origin_datatype,
 680         &module->origin_iovec_list,
 681         &iovec_count);
 682 
 683     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
 684     if (OMPI_SUCCESS != ret) {
 685         return ret;
 686     }
 687     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
 688     if (OMPI_SUCCESS != ret) {
 689         return ret;
 690     }
 691     ompi_datatype_type_size(origin_datatype, &size);
 692     length = size * origin_count;
 693 
 694     md.start = module->origin_iovec_list;
 695     md.length = iovec_count;
 696     if (user_ptr) {
 697         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 698     } else {
 699         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 700     }
 701     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 702     md.ct_handle = module->ct_h;
 703     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
 704     if (PTL_OK != ret) {
 705         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 706                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 707                             __FILE__, __LINE__, ret);
 708         return ret;
 709     }
 710 
 711     opal_atomic_add_fetch_64(&module->opcount, 1);
 712 
 713     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 714                  "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, size=%lu, length=%lu, offset=%lu, op_count=%ld)",
 715                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, size, length, offset, module->opcount));
 716     ret = PtlPut(module->origin_iovec_md_h,
 717                  (ptl_size_t) origin_lb,
 718                  length,
 719                  PTL_ACK_REQ,
 720                  peer,
 721                  module->pt_idx,
 722                  module->match_bits,
 723                  offset + target_lb,
 724                  user_ptr,
 725                  0);
 726     if (PTL_OK != ret) {
 727         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 728                      "%s,%d PtlPut() failed: ret = %d",
 729                      __FUNCTION__, __LINE__, ret));
 730         opal_atomic_add_fetch_64(&module->opcount, -1);
 731         return ret;
 732     }
 733 
 734     return OMPI_SUCCESS;
 735 }
 736 
 737 /* put from an iovec MD into a contiguous target using fragments no larger
 738  * than max_atomic_size to guarantee atomic writes at the target */
 739 static int
 740 atomic_put_from_iovec(ompi_osc_portals4_module_t *module,
 741                       const void       *origin_address,
 742                       int               origin_count,
 743                       ompi_datatype_t  *origin_datatype,
 744                       ptl_process_t     peer,
 745                       int               target_count,
 746                       ompi_datatype_t  *target_datatype,
 747                       size_t            offset,
 748                       ptl_pt_index_t    pt_index,
 749                       ptl_match_bits_t  match_bits,
 750                       void             *user_ptr)
 751 {
 752     int ret;
 753     size_t size;
 754     ptrdiff_t length, origin_lb, target_lb, extent;
 755     ptl_md_t md;
 756 
 757     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
 758         PtlMDRelease(module->origin_iovec_md_h);
 759         free(module->origin_iovec_list);
 760         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
 761         module->origin_iovec_list = NULL;
 762     }
 763 
 764     ptl_size_t iovec_count=0;
 765     create_iov_list(
 766         origin_address,
 767         origin_count,
 768         origin_datatype,
 769         &module->origin_iovec_list,
 770         &iovec_count);
 771 
 772     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
 773     if (OMPI_SUCCESS != ret) {
 774         return ret;
 775     }
 776     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
 777     if (OMPI_SUCCESS != ret) {
 778         return ret;
 779     }
 780     ompi_datatype_type_size(origin_datatype, &size);
 781     length = size * origin_count;
 782 
 783     md.start = module->origin_iovec_list;
 784     md.length = iovec_count;
 785     if (user_ptr) {
 786         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 787     } else {
 788         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 789     }
 790     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 791     md.ct_handle = module->ct_h;
 792     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
 793     if (PTL_OK != ret) {
 794         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 795                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 796                             __FILE__, __LINE__, ret);
 797         return ret;
 798     }
 799 
 800     OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
 801                  "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
 802                  __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
 803     ret = segmentedPut(&module->opcount,
 804                        module->origin_iovec_md_h,
 805                        (ptl_size_t) origin_lb,
 806                        length,
 807                        module->atomic_max,
 808                        PTL_ACK_REQ,
 809                        peer,
 810                        module->pt_idx,
 811                        module->match_bits,
 812                        offset + target_lb,
 813                        NULL,
 814                        0);
 815     if (OMPI_SUCCESS != ret) {
 816         return ret;
 817     }
 818 
 819     return OMPI_SUCCESS;
 820 }
 821 
 822 /* perform atomic operation on iovec local and contiguous remote */
 823 static int
 824 atomic_from_iovec(ompi_osc_portals4_module_t *module,
 825                   const void       *origin_address,
 826                   int               origin_count,
 827                   ompi_datatype_t  *origin_datatype,
 828                   ptl_process_t     peer,
 829                   int               target_count,
 830                   ompi_datatype_t  *target_datatype,
 831                   size_t            offset,
 832                   ptl_pt_index_t    pt_index,
 833                   ptl_match_bits_t  match_bits,
 834                   struct ompi_op_t *op,
 835                   void             *user_ptr)
 836 {
 837     int ret;
 838     size_t size;
 839     ptrdiff_t length, origin_lb, target_lb, extent;
 840     ptl_md_t md;
 841     ptl_op_t ptl_op;
 842     ptl_datatype_t ptl_dt;
 843 
 844     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
 845         PtlMDRelease(module->origin_iovec_md_h);
 846         free(module->origin_iovec_list);
 847         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
 848         module->origin_iovec_list = NULL;
 849     }
 850 
 851     ptl_size_t iovec_count=0;
 852     create_iov_list(
 853         origin_address,
 854         origin_count,
 855         origin_datatype,
 856         &module->origin_iovec_list,
 857         &iovec_count);
 858 
 859     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
 860     if (OMPI_SUCCESS != ret) {
 861         opal_output(ompi_osc_base_framework.framework_output,
 862                 "datatype is not currently supported");
 863         return OMPI_ERR_NOT_SUPPORTED;
 864     }
 865     ret = ompi_osc_portals4_get_op(op, &ptl_op);
 866     if (OMPI_SUCCESS != ret) {
 867         opal_output(ompi_osc_base_framework.framework_output,
 868                 "operation is not currently supported");
 869         return OMPI_ERR_NOT_SUPPORTED;
 870     }
 871 
 872     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
 873     if (OMPI_SUCCESS != ret) {
 874         return ret;
 875     }
 876     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
 877     if (OMPI_SUCCESS != ret) {
 878         return ret;
 879     }
 880     ompi_datatype_type_size(origin_datatype, &size);
 881     length = size * origin_count;
 882 
 883     md.start = module->origin_iovec_list;
 884     md.length = iovec_count;
 885     if (user_ptr) {
 886         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 887     } else {
 888         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 889     }
 890     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 891     md.ct_handle = module->ct_h;
 892     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
 893     if (PTL_OK != ret) {
 894         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 895                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 896                             __FILE__, __LINE__, ret);
 897         return ret;
 898     }
 899 
 900     ret = segmentedAtomic(&module->opcount,
 901                           module->origin_iovec_md_h,
 902                           (ptl_size_t) origin_lb,
 903                           length,
 904                           module->atomic_max,
 905                           peer,
 906                           module->pt_idx,
 907                           module->match_bits,
 908                           offset + target_lb,
 909                           user_ptr,
 910                           ptl_op,
 911                           ptl_dt);
 912     if (OMPI_SUCCESS != ret) {
 913         return ret;
 914     }
 915 
 916     return OMPI_SUCCESS;
 917 }
 918 
 919 /* perform atomic operation on iovec local and contiguous remote */
 920 static int
 921 swap_to_iovec(ompi_osc_portals4_module_t *module,
 922               const void       *result_address,
 923               int               result_count,
 924               ompi_datatype_t  *result_datatype,
 925               const void       *origin_address,
 926               int               origin_count,
 927               ompi_datatype_t  *origin_datatype,
 928               ptl_process_t     peer,
 929               int               target_count,
 930               ompi_datatype_t  *target_datatype,
 931               size_t            offset,
 932               ptl_pt_index_t    pt_index,
 933               ptl_match_bits_t  match_bits,
 934               void             *user_ptr)
 935 {
 936     int ret;
 937     size_t size;
 938     ptl_size_t iovec_count=0;
 939     ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
 940     ptl_md_t md;
 941     ptl_datatype_t ptl_dt;
 942 
 943     if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
 944         PtlMDRelease(module->result_iovec_md_h);
 945         free(module->result_iovec_list);
 946         module->result_iovec_md_h = PTL_INVALID_HANDLE;
 947         module->result_iovec_list = NULL;
 948     }
 949 
 950     create_iov_list(
 951         result_address,
 952         result_count,
 953         result_datatype,
 954         &module->result_iovec_list,
 955         &iovec_count);
 956 
 957     md.start = module->result_iovec_list;
 958     md.length = iovec_count;
 959     if (user_ptr) {
 960         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 961     } else {
 962         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 963     }
 964     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 965     md.ct_handle = module->ct_h;
 966     ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
 967     if (PTL_OK != ret) {
 968         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 969                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 970                             __FILE__, __LINE__, ret);
 971         return ret;
 972     }
 973 
 974     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
 975         PtlMDRelease(module->origin_iovec_md_h);
 976         free(module->origin_iovec_list);
 977         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
 978         module->origin_iovec_list = NULL;
 979     }
 980 
 981     create_iov_list(
 982         origin_address,
 983         origin_count,
 984         origin_datatype,
 985         &module->origin_iovec_list,
 986         &iovec_count);
 987 
 988     md.start = module->origin_iovec_list;
 989     md.length = iovec_count;
 990     md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
 991     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
 992     md.ct_handle = module->ct_h;
 993     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
 994     if (PTL_OK != ret) {
 995         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
 996                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
 997                             __FILE__, __LINE__, ret);
 998         return ret;
 999     }
1000 
1001     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1002     if (OMPI_SUCCESS != ret) {
1003         opal_output(ompi_osc_base_framework.framework_output,
1004                 "datatype is not currently supported");
1005         return OMPI_ERR_NOT_SUPPORTED;
1006     }
1007 
1008     ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1009     if (OMPI_SUCCESS != ret) {
1010         return ret;
1011     }
1012     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1013     if (OMPI_SUCCESS != ret) {
1014         return ret;
1015     }
1016     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1017     if (OMPI_SUCCESS != ret) {
1018         return ret;
1019     }
1020     ompi_datatype_type_size(origin_datatype, &size);
1021     length = size * origin_count;
1022 
1023     ret = segmentedSwap(&module->opcount,
1024                         module->result_iovec_md_h,
1025                         (ptl_size_t) result_lb,
1026                         module->origin_iovec_md_h,
1027                         (ptl_size_t) origin_lb,
1028                         length,
1029                         module->fetch_atomic_max,
1030                         peer,
1031                         module->pt_idx,
1032                         module->match_bits,
1033                         offset + target_lb,
1034                         user_ptr,
1035                         ptl_dt);
1036     if (OMPI_SUCCESS != ret) {
1037         return ret;
1038     }
1039 
1040     return OMPI_SUCCESS;
1041 }
1042 
1043 /* perform fetch atomic operation on iovec local and contiguous remote */
1044 static int
1045 fetch_atomic_to_iovec(ompi_osc_portals4_module_t *module,
1046                       const void       *result_address,
1047                       int               result_count,
1048                       ompi_datatype_t  *result_datatype,
1049                       const void       *origin_address,
1050                       int               origin_count,
1051                       ompi_datatype_t  *origin_datatype,
1052                       ptl_process_t     peer,
1053                       int               target_count,
1054                       ompi_datatype_t  *target_datatype,
1055                       size_t            offset,
1056                       ptl_pt_index_t    pt_index,
1057                       ptl_match_bits_t  match_bits,
1058                       struct ompi_op_t *op,
1059                       void             *user_ptr)
1060 {
1061     int ret;
1062     size_t size;
1063     ptl_size_t iovec_count=0;
1064     ptrdiff_t length, result_lb, origin_lb, target_lb, extent;
1065     ptl_md_t md;
1066     ptl_op_t ptl_op;
1067     ptl_datatype_t ptl_dt;
1068 
1069     if (module->result_iovec_md_h != PTL_INVALID_HANDLE) {
1070         PtlMDRelease(module->result_iovec_md_h);
1071         free(module->result_iovec_list);
1072         module->result_iovec_md_h = PTL_INVALID_HANDLE;
1073         module->result_iovec_list = NULL;
1074     }
1075 
1076     create_iov_list(
1077         result_address,
1078         result_count,
1079         result_datatype,
1080         &module->result_iovec_list,
1081         &iovec_count);
1082 
1083     md.start = module->result_iovec_list;
1084     md.length = iovec_count;
1085     if (user_ptr) {
1086         md.options = PTL_IOVEC | PTL_MD_EVENT_SEND_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1087     } else {
1088         md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1089     }
1090     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1091     md.ct_handle = module->ct_h;
1092     ret = PtlMDBind(module->ni_h, &md, &module->result_iovec_md_h);
1093     if (PTL_OK != ret) {
1094         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1095                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
1096                             __FILE__, __LINE__, ret);
1097         return ret;
1098     }
1099 
1100     if (module->origin_iovec_md_h != PTL_INVALID_HANDLE) {
1101         PtlMDRelease(module->origin_iovec_md_h);
1102         free(module->origin_iovec_list);
1103         module->origin_iovec_md_h = PTL_INVALID_HANDLE;
1104         module->origin_iovec_list = NULL;
1105     }
1106 
1107     create_iov_list(
1108         origin_address,
1109         origin_count,
1110         origin_datatype,
1111         &module->origin_iovec_list,
1112         &iovec_count);
1113 
1114     md.start = module->origin_iovec_list;
1115     md.length = iovec_count;
1116     md.options = PTL_IOVEC | PTL_MD_EVENT_SUCCESS_DISABLE | PTL_MD_EVENT_CT_REPLY | PTL_MD_EVENT_CT_ACK;
1117     md.eq_handle = mca_osc_portals4_component.matching_eq_h;
1118     md.ct_handle = module->ct_h;
1119     ret = PtlMDBind(module->ni_h, &md, &module->origin_iovec_md_h);
1120     if (PTL_OK != ret) {
1121         opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1122                             "%s:%d: PtlMDBind(iovec) failed: %d\n",
1123                             __FILE__, __LINE__, ret);
1124         return ret;
1125     }
1126 
1127     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1128     if (OMPI_SUCCESS != ret) {
1129         opal_output(ompi_osc_base_framework.framework_output,
1130                 "datatype is not currently supported");
1131         return OMPI_ERR_NOT_SUPPORTED;
1132     }
1133     ret = ompi_osc_portals4_get_op(op, &ptl_op);
1134     if (OMPI_SUCCESS != ret) {
1135         opal_output(ompi_osc_base_framework.framework_output,
1136                 "operation is not currently supported");
1137         return OMPI_ERR_NOT_SUPPORTED;
1138     }
1139 
1140     ret = ompi_datatype_get_true_extent(result_datatype, &result_lb, &extent);
1141     if (OMPI_SUCCESS != ret) {
1142         return ret;
1143     }
1144     ret = ompi_datatype_get_true_extent(origin_datatype, &origin_lb, &extent);
1145     if (OMPI_SUCCESS != ret) {
1146         return ret;
1147     }
1148     ret = ompi_datatype_get_true_extent(target_datatype, &target_lb, &extent);
1149     if (OMPI_SUCCESS != ret) {
1150         return ret;
1151     }
1152     ompi_datatype_type_size(origin_datatype, &size);
1153     length = size * origin_count;
1154 
1155     ret = segmentedFetchAtomic(&module->opcount,
1156                                module->result_iovec_md_h,
1157                                (ptl_size_t) result_lb,
1158                                module->origin_iovec_md_h,
1159                                (ptl_size_t) origin_lb,
1160                                length,
1161                                module->fetch_atomic_max,
1162                                peer,
1163                                module->pt_idx,
1164                                module->match_bits,
1165                                offset + target_lb,
1166                                user_ptr,
1167                                ptl_op,
1168                                ptl_dt);
1169     if (OMPI_SUCCESS != ret) {
1170         return ret;
1171     }
1172 
1173     return OMPI_SUCCESS;
1174 }
1175 
1176 /*
1177  * Derived from ompi_osc_rdma_master_noncontig()
1178  */
1179 
1180 /* put in the largest chunks possible given the noncontiguous restriction */
1181 static int
1182 put_to_noncontig(opal_atomic_int64_t *opcount,
1183                  ptl_handle_md_t   md_h,
1184                  const void       *origin_address,
1185                  int               origin_count,
1186                  ompi_datatype_t  *origin_datatype,
1187                  ptl_process_t     peer,
1188                  int               target_count,
1189                  ompi_datatype_t  *target_datatype,
1190                  size_t            offset,
1191                  ptl_pt_index_t    pt_index,
1192                  ptl_match_bits_t  match_bits,
1193                  void             *user_ptr)
1194 {
1195     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1196     opal_convertor_t origin_convertor, target_convertor;
1197     uint32_t origin_iov_count, target_iov_count;
1198     uint32_t origin_iov_index, target_iov_index;
1199     /* needed for opal_convertor_raw but not used */
1200     size_t origin_size, target_size, rdma_len;
1201     size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1202     int ret;
1203     bool done;
1204 
1205     /* prepare convertors for the source and target. these convertors will be used to determine the
1206      * contiguous segments within the source and target. */
1207     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1208     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1209                                                     (void*)origin_address, 0, &origin_convertor);
1210     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1211         return ret;
1212     }
1213 
1214     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1215     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1216                                                     (void *)NULL, 0, &target_convertor);
1217     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1218         return ret;
1219     }
1220 
1221     origin_iov_index = 0;
1222     origin_iov_count = 0;
1223 
1224     do {
1225         /* decode segments of the remote data */
1226         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1227         target_iov_index = 0;
1228 
1229         /* opal_convertor_raw returns done when it has reached the end of the data */
1230         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1231 
1232         /* loop on the target segments until we have exhaused the decoded source data */
1233         while (target_iov_index != target_iov_count) {
1234             if (origin_iov_index == origin_iov_count) {
1235                 /* decode segments of the target buffer */
1236                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1237                 origin_iov_index = 0;
1238                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1239             }
1240 
1241             /* we already checked that the target was large enough. this should be impossible */
1242             assert (0 != origin_iov_count);
1243 
1244             /* determine how much to transfer in this operation */
1245             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1246 
1247             opal_atomic_add_fetch_64(opcount, 1);
1248 
1249             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1250                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1251                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1252                              (unsigned long) target_iovec[target_iov_index].iov_len));
1253 
1254             ret = PtlPut(md_h,
1255                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1256                          rdma_len,
1257                          PTL_ACK_REQ,
1258                          peer,
1259                          pt_index,
1260                          match_bits,
1261                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1262                          user_ptr,
1263                          0);
1264             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1265                 opal_atomic_add_fetch_64(opcount, -1);
1266                 return ret;
1267             }
1268 
1269             /* adjust io vectors */
1270             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1271             target_iovec[target_iov_index].iov_len -= rdma_len;
1272             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1273             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1274 
1275             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1276             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1277         }
1278     } while (!done);
1279 
1280     /* clean up convertors */
1281     opal_convertor_cleanup (&origin_convertor);
1282     OBJ_DESTRUCT(&origin_convertor);
1283     opal_convertor_cleanup (&target_convertor);
1284     OBJ_DESTRUCT(&target_convertor);
1285 
1286     return OMPI_SUCCESS;
1287 }
1288 
1289 /* put in fragments no larger than max_atomic_size to guarantee atomic writes at the target */
1290 static int
1291 atomic_put_to_noncontig(ompi_osc_portals4_module_t *module,
1292                         ptl_handle_md_t   md_h,
1293                         const void       *origin_address,
1294                         int               origin_count,
1295                         ompi_datatype_t  *origin_datatype,
1296                         ptl_process_t     peer,
1297                         int               target_count,
1298                         ompi_datatype_t  *target_datatype,
1299                         size_t            offset,
1300                         ptl_pt_index_t    pt_index,
1301                         ptl_match_bits_t  match_bits,
1302                         void             *user_ptr)
1303 {
1304     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1305     opal_convertor_t origin_convertor, target_convertor;
1306     uint32_t origin_iov_count, target_iov_count;
1307     uint32_t origin_iov_index, target_iov_index;
1308     /* needed for opal_convertor_raw but not used */
1309     size_t origin_size, target_size, rdma_len;
1310     size_t max_rdma_len = module->atomic_max;
1311     int ret;
1312     bool done;
1313 
1314     /* prepare convertors for the source and target. these convertors will be used to determine the
1315      * contiguous segments within the source and target. */
1316     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1317     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1318                                                     (void*)origin_address, 0, &origin_convertor);
1319     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1320         return ret;
1321     }
1322 
1323     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1324     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1325                                                     (void *)NULL, 0, &target_convertor);
1326     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1327         return ret;
1328     }
1329 
1330     origin_iov_index = 0;
1331     origin_iov_count = 0;
1332 
1333     do {
1334         /* decode segments of the remote data */
1335         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1336         target_iov_index = 0;
1337 
1338         /* opal_convertor_raw returns done when it has reached the end of the data */
1339         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1340 
1341         /* loop on the target segments until we have exhaused the decoded source data */
1342         while (target_iov_index != target_iov_count) {
1343             if (origin_iov_index == origin_iov_count) {
1344                 /* decode segments of the target buffer */
1345                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1346                 origin_iov_index = 0;
1347                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1348             }
1349 
1350             /* we already checked that the target was large enough. this should be impossible */
1351             assert (0 != origin_iov_count);
1352 
1353             /* determine how much to transfer in this operation */
1354             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1355 
1356             opal_atomic_add_fetch_64(&module->opcount, 1);
1357 
1358             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1359                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1360                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1361                              (unsigned long) target_iovec[target_iov_index].iov_len));
1362 
1363             ret = PtlPut(md_h,
1364                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1365                          rdma_len,
1366                          PTL_ACK_REQ,
1367                          peer,
1368                          pt_index,
1369                          match_bits,
1370                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1371                          user_ptr,
1372                          0);
1373             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1374                 opal_atomic_add_fetch_64(&module->opcount, -1);
1375                 return ret;
1376             }
1377 
1378             /* adjust io vectors */
1379             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1380             target_iovec[target_iov_index].iov_len -= rdma_len;
1381             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1382             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1383 
1384             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1385             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1386         }
1387     } while (!done);
1388 
1389     return OMPI_SUCCESS;
1390 }
1391 
1392 /* perform atomic operation on (non)contiguous local and noncontiguous remote */
1393 static int
1394 atomic_to_noncontig(ompi_osc_portals4_module_t *module,
1395                     ptl_handle_md_t   md_h,
1396                     const void       *origin_address,
1397                     int               origin_count,
1398                     ompi_datatype_t  *origin_datatype,
1399                     ptl_process_t     peer,
1400                     int               target_count,
1401                     ompi_datatype_t  *target_datatype,
1402                     size_t            offset,
1403                     ptl_pt_index_t    pt_index,
1404                     ptl_match_bits_t  match_bits,
1405                     struct ompi_op_t *op,
1406                     void             *user_ptr)
1407 {
1408     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1409     opal_convertor_t origin_convertor, target_convertor;
1410     uint32_t origin_iov_count, target_iov_count;
1411     uint32_t origin_iov_index, target_iov_index;
1412     ptl_op_t ptl_op;
1413     ptl_datatype_t ptl_dt;
1414     /* needed for opal_convertor_raw but not used */
1415     size_t origin_size, target_size, atomic_len;
1416     int ret;
1417     bool done;
1418 
1419     /* prepare convertors for the source and target. these convertors will be used to determine the
1420      * contiguous segments within the source and target. */
1421     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1422     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1423                                                     (void*)origin_address, 0, &origin_convertor);
1424     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1425         return ret;
1426     }
1427 
1428     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1429     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1430                                                     (void *)NULL, 0, &target_convertor);
1431     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1432         return ret;
1433     }
1434 
1435     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1436     if (OMPI_SUCCESS != ret) {
1437         opal_output(ompi_osc_base_framework.framework_output,
1438                 "datatype is not currently supported");
1439         return OMPI_ERR_NOT_SUPPORTED;
1440     }
1441     ret = ompi_osc_portals4_get_op(op, &ptl_op);
1442     if (OMPI_SUCCESS != ret) {
1443         opal_output(ompi_osc_base_framework.framework_output,
1444                 "operation is not currently supported");
1445         return OMPI_ERR_NOT_SUPPORTED;
1446     }
1447 
1448     origin_iov_index = 0;
1449     origin_iov_count = 0;
1450 
1451     do {
1452         /* decode segments of the remote data */
1453         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1454         target_iov_index = 0;
1455 
1456         /* opal_convertor_raw returns done when it has reached the end of the data */
1457         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1458 
1459         /* loop on the target segments until we have exhaused the decoded source data */
1460         while (target_iov_index != target_iov_count) {
1461             if (origin_iov_index == origin_iov_count) {
1462                 /* decode segments of the target buffer */
1463                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1464                 origin_iov_index = 0;
1465                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1466             }
1467 
1468             /* we already checked that the target was large enough. this should be impossible */
1469             assert (0 != origin_iov_count);
1470 
1471             /* determine how much to transfer in this operation */
1472             atomic_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), module->atomic_max);
1473 
1474             opal_atomic_add_fetch_64(&module->opcount, 1);
1475 
1476             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1477                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1478                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1479                              (unsigned long) target_iovec[target_iov_index].iov_len));
1480 
1481             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1482                          "%s,%d Atomic", __FUNCTION__, __LINE__));
1483             ret = PtlAtomic(md_h,
1484                             (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1485                             atomic_len,
1486                             PTL_ACK_REQ,
1487                             peer,
1488                             pt_index,
1489                             match_bits,
1490                             offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1491                             user_ptr,
1492                             0,
1493                             ptl_op,
1494                             ptl_dt);
1495             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1496                 opal_atomic_add_fetch_64(&module->opcount, -1);
1497                 return ret;
1498             }
1499 
1500             /* adjust io vectors */
1501             origin_iovec[origin_iov_index].iov_len -= atomic_len;
1502             target_iovec[target_iov_index].iov_len -= atomic_len;
1503             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + atomic_len);
1504             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + atomic_len);
1505 
1506             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1507             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1508         }
1509     } while (!done);
1510 
1511     return OMPI_SUCCESS;
1512 }
1513 
1514 /* get from a noncontiguous remote to an (non)contiguous local */
1515 static int
1516 get_from_noncontig(opal_atomic_int64_t *opcount,
1517                    ptl_handle_md_t   md_h,
1518                    const void       *origin_address,
1519                    int               origin_count,
1520                    ompi_datatype_t  *origin_datatype,
1521                    ptl_process_t     peer,
1522                    int               target_count,
1523                    ompi_datatype_t  *target_datatype,
1524                    size_t            offset,
1525                    ptl_pt_index_t    pt_index,
1526                    ptl_match_bits_t  match_bits,
1527                    void             *user_ptr)
1528 {
1529     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1530     opal_convertor_t origin_convertor, target_convertor;
1531     uint32_t origin_iov_count, target_iov_count;
1532     uint32_t origin_iov_index, target_iov_index;
1533     /* needed for opal_convertor_raw but not used */
1534     size_t origin_size, target_size, rdma_len;
1535     size_t max_rdma_len = mca_osc_portals4_component.ptl_max_msg_size;
1536     int ret;
1537     bool done;
1538 
1539     /* prepare convertors for the source and target. these convertors will be used to determine the
1540      * contiguous segments within the source and target. */
1541     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1542     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1543                                                     (void*)origin_address, 0, &origin_convertor);
1544     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1545         return ret;
1546     }
1547 
1548     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1549     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1550                                                     (void *)NULL, 0, &target_convertor);
1551     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1552         return ret;
1553     }
1554 
1555     origin_iov_index = 0;
1556     origin_iov_count = 0;
1557 
1558     do {
1559         /* decode segments of the remote data */
1560         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1561         target_iov_index = 0;
1562 
1563         /* opal_convertor_raw returns done when it has reached the end of the data */
1564         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1565 
1566         /* loop on the target segments until we have exhaused the decoded source data */
1567         while (target_iov_index != target_iov_count) {
1568             if (origin_iov_index == origin_iov_count) {
1569                 /* decode segments of the target buffer */
1570                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1571                 origin_iov_index = 0;
1572                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1573             }
1574 
1575             /* we already checked that the target was large enough. this should be impossible */
1576             assert (0 != origin_iov_count);
1577 
1578             /* determine how much to transfer in this operation */
1579             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1580 
1581             opal_atomic_add_fetch_64(opcount, 1);
1582 
1583             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1584                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1585                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1586                              (unsigned long) target_iovec[target_iov_index].iov_len));
1587 
1588             ret = PtlGet(md_h,
1589                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1590                          rdma_len,
1591                          peer,
1592                          pt_index,
1593                          match_bits,
1594                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1595                          user_ptr);
1596             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1597                 opal_atomic_add_fetch_64(opcount, -1);
1598                 return ret;
1599             }
1600 
1601             /* adjust io vectors */
1602             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1603             target_iovec[target_iov_index].iov_len -= rdma_len;
1604             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1605             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1606 
1607             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1608             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1609         }
1610     } while (!done);
1611 
1612     return OMPI_SUCCESS;
1613 }
1614 
1615 /* get from a noncontiguous remote to an (non)contiguous local */
1616 static int
1617 atomic_get_from_noncontig(ompi_osc_portals4_module_t *module,
1618                           ptl_handle_md_t   md_h,
1619                           const void       *origin_address,
1620                           int               origin_count,
1621                           ompi_datatype_t  *origin_datatype,
1622                           ptl_process_t     peer,
1623                           int               target_count,
1624                           ompi_datatype_t  *target_datatype,
1625                           size_t            offset,
1626                           ptl_pt_index_t    pt_index,
1627                           ptl_match_bits_t  match_bits,
1628                           void             *user_ptr)
1629 {
1630     struct iovec origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1631     opal_convertor_t origin_convertor, target_convertor;
1632     uint32_t origin_iov_count, target_iov_count;
1633     uint32_t origin_iov_index, target_iov_index;
1634     /* needed for opal_convertor_raw but not used */
1635     size_t origin_size, target_size, rdma_len;
1636     size_t max_rdma_len = module->fetch_atomic_max;
1637     int ret;
1638     bool done;
1639 
1640     /* prepare convertors for the source and target. these convertors will be used to determine the
1641      * contiguous segments within the source and target. */
1642     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1643     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1644                                                     (void*)origin_address, 0, &origin_convertor);
1645     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1646         return ret;
1647     }
1648 
1649     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1650     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1651                                                     (void *)NULL, 0, &target_convertor);
1652     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1653         return ret;
1654     }
1655 
1656     origin_iov_index = 0;
1657     origin_iov_count = 0;
1658 
1659     do {
1660         /* decode segments of the remote data */
1661         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1662         target_iov_index = 0;
1663 
1664         /* opal_convertor_raw returns done when it has reached the end of the data */
1665         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1666 
1667         /* loop on the target segments until we have exhaused the decoded source data */
1668         while (target_iov_index != target_iov_count) {
1669             if (origin_iov_index == origin_iov_count) {
1670                 /* decode segments of the target buffer */
1671                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1672                 origin_iov_index = 0;
1673                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1674             }
1675 
1676             /* we already checked that the target was large enough. this should be impossible */
1677             assert (0 != origin_iov_count);
1678 
1679             /* determine how much to transfer in this operation */
1680             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1681 
1682             opal_atomic_add_fetch_64(&module->opcount, 1);
1683 
1684             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1685                              "performing rdma on contiguous region. local: %p, remote: %p, len: %lu",
1686                              origin_iovec[origin_iov_index].iov_base, target_iovec[target_iov_index].iov_base,
1687                              (unsigned long) target_iovec[target_iov_index].iov_len));
1688 
1689             ret = PtlGet(md_h,
1690                          (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1691                          rdma_len,
1692                          peer,
1693                          pt_index,
1694                          match_bits,
1695                          offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1696                          user_ptr);
1697             if (OPAL_UNLIKELY(PTL_OK != ret)) {
1698                 opal_atomic_add_fetch_64(&module->opcount, -1);
1699                 return ret;
1700             }
1701 
1702             /* adjust io vectors */
1703             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1704             target_iovec[target_iov_index].iov_len -= rdma_len;
1705             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1706             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1707 
1708             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1709             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1710         }
1711     } while (!done);
1712 
1713     return OMPI_SUCCESS;
1714 }
1715 
1716 /* swap from a noncontiguous remote to an (non)contiguous local */
1717 static int
1718 swap_from_noncontig(ompi_osc_portals4_module_t *module,
1719                     ptl_handle_md_t   result_md_h,
1720                     const void       *result_address,
1721                     int               result_count,
1722                     ompi_datatype_t  *result_datatype,
1723                     ptl_handle_md_t   origin_md_h,
1724                     const void       *origin_address,
1725                     int               origin_count,
1726                     ompi_datatype_t  *origin_datatype,
1727                     ptl_process_t     peer,
1728                     int               target_count,
1729                     ompi_datatype_t  *target_datatype,
1730                     size_t            offset,
1731                     ptl_pt_index_t    pt_index,
1732                     ptl_match_bits_t  match_bits,
1733                     void             *user_ptr)
1734 {
1735     struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1736     opal_convertor_t result_convertor, origin_convertor, target_convertor;
1737     uint32_t result_iov_count, origin_iov_count, target_iov_count;
1738     uint32_t result_iov_index, origin_iov_index, target_iov_index;
1739     /* needed for opal_convertor_raw but not used */
1740     size_t result_size, origin_size, target_size, rdma_len;
1741     size_t max_rdma_len = module->fetch_atomic_max;
1742     ptl_datatype_t ptl_dt;
1743 
1744     int ret;
1745     bool done;
1746 
1747     /* prepare convertors for the result, source and target. these convertors will be used to determine the
1748      * contiguous segments within the source and target. */
1749     OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1750     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1751                                                     (void*)result_address, 0, &result_convertor);
1752     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1753         return ret;
1754     }
1755 
1756     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1757     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1758                                                     (void*)origin_address, 0, &origin_convertor);
1759     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1760         return ret;
1761     }
1762 
1763     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1764     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1765                                                     (void *)NULL, 0, &target_convertor);
1766     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1767         return ret;
1768     }
1769 
1770     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1771     if (OMPI_SUCCESS != ret) {
1772         opal_output(ompi_osc_base_framework.framework_output,
1773                 "datatype is not currently supported");
1774         return OMPI_ERR_NOT_SUPPORTED;
1775     }
1776 
1777     result_iov_index = 0;
1778     result_iov_count = 0;
1779     origin_iov_index = 0;
1780     origin_iov_count = 0;
1781 
1782     do {
1783         /* decode segments of the remote data */
1784         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1785         target_iov_index = 0;
1786 
1787         /* opal_convertor_raw returns done when it has reached the end of the data */
1788         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1789 
1790         /* loop on the target segments until we have exhaused the decoded source data */
1791         while (target_iov_index != target_iov_count) {
1792             if (result_iov_index == result_iov_count) {
1793                 /* decode segments of the target buffer */
1794                 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1795                 result_iov_index = 0;
1796                 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1797             }
1798             if (origin_iov_index == origin_iov_count) {
1799                 /* decode segments of the target buffer */
1800                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1801                 origin_iov_index = 0;
1802                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1803             }
1804 
1805             /* we already checked that the target was large enough. this should be impossible */
1806             assert (0 != result_iov_count);
1807             assert (0 != origin_iov_count);
1808 
1809             /* determine how much to transfer in this operation */
1810             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1811 
1812             opal_atomic_add_fetch_64(&module->opcount, 1);
1813 
1814             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1815                              "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1816                              result_iovec[result_iov_index].iov_base,
1817                              origin_iovec[origin_iov_index].iov_base,
1818                              target_iovec[target_iov_index].iov_base,
1819                              (unsigned long) target_iovec[target_iov_index].iov_len));
1820 
1821             ret = PtlSwap(result_md_h,
1822                           (ptl_size_t)result_iovec[result_iov_index].iov_base,
1823                           origin_md_h,
1824                           (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1825                           rdma_len,
1826                           peer,
1827                           pt_index,
1828                           match_bits,
1829                           offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1830                           user_ptr,
1831                           0,
1832                           NULL,
1833                           PTL_SWAP,
1834                           ptl_dt);
1835             if (PTL_OK != ret) {
1836                 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1837                                      "%s:%d PtlSwap failed with return value %d",
1838                                      __FUNCTION__, __LINE__, ret);
1839                 opal_atomic_add_fetch_64(&module->opcount, -1);
1840                 return ret;
1841             }
1842 
1843             /* adjust io vectors */
1844             result_iovec[result_iov_index].iov_len -= rdma_len;
1845             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1846             target_iovec[target_iov_index].iov_len -= rdma_len;
1847             result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
1848             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
1849             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
1850 
1851             result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
1852             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
1853             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
1854         }
1855     } while (!done);
1856 
1857     return OMPI_SUCCESS;
1858 }
1859 
1860 /* swap from a noncontiguous remote to an (non)contiguous local */
1861 static int
1862 fetch_atomic_from_noncontig(ompi_osc_portals4_module_t *module,
1863                             ptl_handle_md_t   result_md_h,
1864                             const void       *result_address,
1865                             int               result_count,
1866                             ompi_datatype_t  *result_datatype,
1867                             ptl_handle_md_t   origin_md_h,
1868                             const void       *origin_address,
1869                             int               origin_count,
1870                             ompi_datatype_t  *origin_datatype,
1871                             ptl_process_t     peer,
1872                             int               target_count,
1873                             ompi_datatype_t  *target_datatype,
1874                             size_t            offset,
1875                             ptl_pt_index_t    pt_index,
1876                             ptl_match_bits_t  match_bits,
1877                             struct ompi_op_t *op,
1878                             void             *user_ptr)
1879 {
1880     struct iovec result_iovec[OSC_PORTALS4_IOVEC_MAX], origin_iovec[OSC_PORTALS4_IOVEC_MAX], target_iovec[OSC_PORTALS4_IOVEC_MAX];
1881     opal_convertor_t result_convertor, origin_convertor, target_convertor;
1882     uint32_t result_iov_count, origin_iov_count, target_iov_count;
1883     uint32_t result_iov_index, origin_iov_index, target_iov_index;
1884     /* needed for opal_convertor_raw but not used */
1885     size_t result_size, origin_size, target_size, rdma_len;
1886     size_t max_rdma_len = module->fetch_atomic_max;
1887     ptl_op_t ptl_op;
1888     ptl_datatype_t ptl_dt;
1889 
1890     int ret;
1891     bool done;
1892 
1893     /* prepare convertors for the result, source and target. these convertors will be used to determine the
1894      * contiguous segments within the source and target. */
1895     OBJ_CONSTRUCT(&result_convertor, opal_convertor_t);
1896     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &result_datatype->super, result_count,
1897                                                     (void*)result_address, 0, &result_convertor);
1898     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1899         return ret;
1900     }
1901 
1902     OBJ_CONSTRUCT(&origin_convertor, opal_convertor_t);
1903     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &origin_datatype->super, origin_count,
1904                                                     (void*)origin_address, 0, &origin_convertor);
1905     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1906         return ret;
1907     }
1908 
1909     OBJ_CONSTRUCT(&target_convertor, opal_convertor_t);
1910     ret = opal_convertor_copy_and_prepare_for_send (ompi_mpi_local_convertor, &target_datatype->super, target_count,
1911                                                     (void *)NULL, 0, &target_convertor);
1912     if (OPAL_UNLIKELY(OMPI_SUCCESS != ret)) {
1913         return ret;
1914     }
1915 
1916     ret = ompi_osc_portals4_get_dt(target_datatype, &ptl_dt);
1917     if (OMPI_SUCCESS != ret) {
1918         opal_output(ompi_osc_base_framework.framework_output,
1919                 "datatype is not currently supported");
1920         return OMPI_ERR_NOT_SUPPORTED;
1921     }
1922     ret = ompi_osc_portals4_get_op(op, &ptl_op);
1923     if (OMPI_SUCCESS != ret) {
1924         opal_output(ompi_osc_base_framework.framework_output,
1925                 "operation is not currently supported");
1926         return OMPI_ERR_NOT_SUPPORTED;
1927     }
1928 
1929     result_iov_index = 0;
1930     result_iov_count = 0;
1931     origin_iov_index = 0;
1932     origin_iov_count = 0;
1933 
1934     do {
1935         /* decode segments of the remote data */
1936         target_iov_count = OSC_PORTALS4_IOVEC_MAX;
1937         target_iov_index = 0;
1938 
1939         /* opal_convertor_raw returns done when it has reached the end of the data */
1940         done = opal_convertor_raw (&target_convertor, target_iovec, &target_iov_count, &target_size);
1941 
1942         /* loop on the target segments until we have exhaused the decoded source data */
1943         while (target_iov_index != target_iov_count) {
1944             if (result_iov_index == result_iov_count) {
1945                 /* decode segments of the target buffer */
1946                 result_iov_count = OSC_PORTALS4_IOVEC_MAX;
1947                 result_iov_index = 0;
1948                 (void) opal_convertor_raw (&result_convertor, result_iovec, &result_iov_count, &result_size);
1949             }
1950             if (origin_iov_index == origin_iov_count) {
1951                 /* decode segments of the target buffer */
1952                 origin_iov_count = OSC_PORTALS4_IOVEC_MAX;
1953                 origin_iov_index = 0;
1954                 (void) opal_convertor_raw (&origin_convertor, origin_iovec, &origin_iov_count, &origin_size);
1955             }
1956 
1957             /* we already checked that the target was large enough. this should be impossible */
1958             assert (0 != result_iov_count);
1959             assert (0 != origin_iov_count);
1960 
1961             /* determine how much to transfer in this operation */
1962             rdma_len = MIN(MIN(origin_iovec[origin_iov_index].iov_len, target_iovec[target_iov_index].iov_len), max_rdma_len);
1963 
1964             opal_atomic_add_fetch_64(&module->opcount, 1);
1965 
1966             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
1967                              "performing swap on contiguous region. result: %p origin: %p, target: %p, len: %lu",
1968                              result_iovec[result_iov_index].iov_base,
1969                              origin_iovec[origin_iov_index].iov_base,
1970                              target_iovec[target_iov_index].iov_base,
1971                              (unsigned long) target_iovec[target_iov_index].iov_len));
1972 
1973             ret = PtlFetchAtomic(result_md_h,
1974                                  (ptl_size_t)result_iovec[result_iov_index].iov_base,
1975                                  origin_md_h,
1976                                  (ptl_size_t)origin_iovec[origin_iov_index].iov_base,
1977                                  rdma_len,
1978                                  peer,
1979                                  pt_index,
1980                                  match_bits,
1981                                  offset + (ptl_size_t)target_iovec[target_iov_index].iov_base,
1982                                  user_ptr,
1983                                  0,
1984                                  ptl_op,
1985                                  ptl_dt);
1986             if (PTL_OK != ret) {
1987                 opal_output_verbose(1, ompi_osc_base_framework.framework_output,
1988                                      "%s:%d PtlFetchAtomic failed with return value %d",
1989                                      __FUNCTION__, __LINE__, ret);
1990                 opal_atomic_add_fetch_64(&module->opcount, -1);
1991                 return ret;
1992             }
1993 
1994             /* adjust io vectors */
1995             result_iovec[result_iov_index].iov_len -= rdma_len;
1996             origin_iovec[origin_iov_index].iov_len -= rdma_len;
1997             target_iovec[target_iov_index].iov_len -= rdma_len;
1998             result_iovec[result_iov_index].iov_base = (void *)((intptr_t) result_iovec[result_iov_index].iov_base + rdma_len);
1999             origin_iovec[origin_iov_index].iov_base = (void *)((intptr_t) origin_iovec[origin_iov_index].iov_base + rdma_len);
2000             target_iovec[target_iov_index].iov_base = (void *)((intptr_t) target_iovec[target_iov_index].iov_base + rdma_len);
2001 
2002             result_iov_index += (0 == result_iovec[result_iov_index].iov_len);
2003             origin_iov_index += (0 == origin_iovec[origin_iov_index].iov_len);
2004             target_iov_index += (0 == target_iovec[target_iov_index].iov_len);
2005         }
2006     } while (!done);
2007 
2008     return OMPI_SUCCESS;
2009 }
2010 
2011 int
2012 ompi_osc_portals4_rput(const void *origin_addr,
2013                        int origin_count,
2014                        struct ompi_datatype_t *origin_dt,
2015                        int target,
2016                        ptrdiff_t target_disp,
2017                        int target_count,
2018                        struct ompi_datatype_t *target_dt,
2019                        struct ompi_win_t *win,
2020                        struct ompi_request_t **ompi_req)
2021 {
2022     int ret;
2023     ompi_osc_portals4_request_t *request;
2024     ompi_osc_portals4_module_t *module =
2025         (ompi_osc_portals4_module_t*) win->w_osc_module;
2026     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2027     size_t size, offset;
2028     ptrdiff_t length, origin_lb, target_lb, extent;
2029 
2030     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2031                          "rput: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2032                          (unsigned long) origin_addr, origin_count,
2033                          origin_dt->name, target, (unsigned long) target_disp,
2034                          target_count, target_dt->name,
2035                          (unsigned long) win));
2036 
2037     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2038     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2039     *ompi_req = &request->super;
2040 
2041     offset = get_displacement(module, target) * target_disp;
2042 
2043     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2044         ret = put_to_noncontig(&module->opcount,
2045                                module->req_md_h,
2046                                origin_addr,
2047                                origin_count,
2048                                origin_dt,
2049                                peer,
2050                                target_count,
2051                                target_dt,
2052                                offset,
2053                                module->pt_idx,
2054                                module->match_bits,
2055                                request);
2056         if (PTL_OK != ret) {
2057             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2058             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2059                          "%s,%d put_to_noncontig() failed: ret = %d",
2060                          __FUNCTION__, __LINE__, ret));
2061             return ret;
2062         }
2063     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2064         ret = put_from_iovec(module,
2065                              origin_addr,
2066                              origin_count,
2067                              origin_dt,
2068                              peer,
2069                              target_count,
2070                              target_dt,
2071                              offset,
2072                              module->pt_idx,
2073                              module->match_bits,
2074                              request);
2075         if (PTL_OK != ret) {
2076             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2077             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2078                          "%s,%d put_from_iovec() failed: ret = %d",
2079                          __FUNCTION__, __LINE__, ret));
2080             return ret;
2081         }
2082     } else {
2083         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2084         if (OMPI_SUCCESS != ret) {
2085             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2086             return ret;
2087         }
2088         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2089         if (OMPI_SUCCESS != ret) {
2090             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2091             return ret;
2092         }
2093         ompi_datatype_type_size(origin_dt, &size);
2094         length = size * origin_count;
2095 
2096         request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2097 
2098         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2099                      "%s,%d RPut(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2100                      __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2101         ret = segmentedPut(&module->opcount,
2102                            module->req_md_h,
2103                            (ptl_size_t) origin_addr + origin_lb,
2104                            length,
2105                            mca_osc_portals4_component.ptl_max_msg_size,
2106                            PTL_ACK_REQ,
2107                            peer,
2108                            module->pt_idx,
2109                            module->match_bits,
2110                            offset + target_lb,
2111                            request,
2112                            0);
2113         if (OMPI_SUCCESS != ret) {
2114             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2115             return ret;
2116         }
2117     }
2118 
2119     return OMPI_SUCCESS;
2120 }
2121 
2122 
2123 int
2124 ompi_osc_portals4_rget(void *origin_addr,
2125                        int origin_count,
2126                        struct ompi_datatype_t *origin_dt,
2127                        int target,
2128                        ptrdiff_t target_disp,
2129                        int target_count,
2130                        struct ompi_datatype_t *target_dt,
2131                        struct ompi_win_t *win,
2132                        struct ompi_request_t **ompi_req)
2133 {
2134     int ret;
2135     ompi_osc_portals4_request_t *request;
2136     ompi_osc_portals4_module_t *module =
2137         (ompi_osc_portals4_module_t*) win->w_osc_module;
2138     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2139     size_t offset, size;
2140     ptrdiff_t length, origin_lb, target_lb, extent;
2141 
2142     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2143                          "rget: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2144                          (unsigned long) origin_addr, origin_count,
2145                          origin_dt->name, target, (unsigned long) target_disp,
2146                          target_count, target_dt->name,
2147                          (unsigned long) win));
2148 
2149     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2150     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2151     *ompi_req = &request->super;
2152 
2153     offset = get_displacement(module, target) * target_disp;
2154 
2155     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2156         ret = get_from_noncontig(&module->opcount,
2157                                  module->req_md_h,
2158                                  origin_addr,
2159                                  origin_count,
2160                                  origin_dt,
2161                                  peer,
2162                                  target_count,
2163                                  target_dt,
2164                                  offset,
2165                                  module->pt_idx,
2166                                  module->match_bits,
2167                                  request);
2168         if (PTL_OK != ret) {
2169             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2170                          "%s,%d get_from_noncontig() failed: ret = %d",
2171                          __FUNCTION__, __LINE__, ret));
2172             return ret;
2173         }
2174     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2175         ret = get_to_iovec(module,
2176                            origin_addr,
2177                            origin_count,
2178                            origin_dt,
2179                            peer,
2180                            target_count,
2181                            target_dt,
2182                            offset,
2183                            module->pt_idx,
2184                            module->match_bits,
2185                            request);
2186         if (PTL_OK != ret) {
2187             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2188                          "%s,%d get_to_iovec() failed: ret = %d",
2189                          __FUNCTION__, __LINE__, ret));
2190             return ret;
2191         }
2192     } else {
2193         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2194         if (OMPI_SUCCESS != ret) {
2195             return ret;
2196         }
2197         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2198         if (OMPI_SUCCESS != ret) {
2199             return ret;
2200         }
2201         ompi_datatype_type_size(origin_dt, &size);
2202         length = size * origin_count;
2203 
2204         request->ops_expected += number_of_fragments(length, mca_osc_portals4_component.ptl_max_msg_size);
2205 
2206         OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2207                               "%s,%d RGet", __FUNCTION__, __LINE__));
2208         ret = segmentedGet(&module->opcount,
2209                            module->req_md_h,
2210                            (ptl_size_t) origin_addr + origin_lb,
2211                            length,
2212                            mca_osc_portals4_component.ptl_max_msg_size,
2213                            peer,
2214                            module->pt_idx,
2215                            module->match_bits,
2216                            offset + target_lb,
2217                            request);
2218         if (OMPI_SUCCESS != ret) {
2219             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2220             return ret;
2221         }
2222     }
2223 
2224     return OMPI_SUCCESS;
2225 }
2226 
2227 
2228 int
2229 ompi_osc_portals4_raccumulate(const void *origin_addr,
2230                               int origin_count,
2231                               struct ompi_datatype_t *origin_dt,
2232                               int target,
2233                               ptrdiff_t target_disp,
2234                               int target_count,
2235                               struct ompi_datatype_t *target_dt,
2236                               struct ompi_op_t *op,
2237                               struct ompi_win_t *win,
2238                               struct ompi_request_t **ompi_req)
2239 {
2240     int ret;
2241     ompi_osc_portals4_request_t *request;
2242     ompi_osc_portals4_module_t *module =
2243         (ompi_osc_portals4_module_t*) win->w_osc_module;
2244     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2245     size_t offset, size;
2246     ptl_op_t ptl_op;
2247     ptl_datatype_t ptl_dt;
2248     ptrdiff_t sent, length, origin_lb, target_lb, extent;
2249 
2250     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2251                          "raccumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s 0x%lx",
2252                          (unsigned long) origin_addr, origin_count,
2253                          origin_dt->name, target, (unsigned long) target_disp,
2254                          target_count, target_dt->name,
2255                          op->o_name,
2256                          (unsigned long) win));
2257 
2258     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2259     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2260     *ompi_req = &request->super;
2261 
2262     offset = get_displacement(module, target) * target_disp;
2263 
2264     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2265         if (MPI_REPLACE == op) {
2266             ret = atomic_put_to_noncontig(module,
2267                                           module->req_md_h,
2268                                           origin_addr,
2269                                           origin_count,
2270                                           origin_dt,
2271                                           peer,
2272                                           target_count,
2273                                           target_dt,
2274                                           offset,
2275                                           module->pt_idx,
2276                                           module->match_bits,
2277                                           request);
2278             if (PTL_OK != ret) {
2279                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2280                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2281                     "%s,%d atomic_put_to_noncontig() failed: ret = %d",
2282                     __FUNCTION__, __LINE__, ret));
2283                 return ret;
2284             }
2285         } else {
2286             ret = atomic_to_noncontig(module,
2287                                       module->req_md_h,
2288                                       origin_addr,
2289                                       origin_count,
2290                                       origin_dt,
2291                                       peer,
2292                                       target_count,
2293                                       target_dt,
2294                                       offset,
2295                                       module->pt_idx,
2296                                       module->match_bits,
2297                                       op,
2298                                       request);
2299             if (PTL_OK != ret) {
2300                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2301                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2302                     "%s,%d atomic_to_noncontig() failed: ret = %d",
2303                     __FUNCTION__, __LINE__, ret));
2304                 return ret;
2305             }
2306         }
2307     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2308         if (MPI_REPLACE == op) {
2309             ret = atomic_put_from_iovec(module,
2310                                         origin_addr,
2311                                         origin_count,
2312                                         origin_dt,
2313                                         peer,
2314                                         target_count,
2315                                         target_dt,
2316                                         offset,
2317                                         module->pt_idx,
2318                                         module->match_bits,
2319                                         request);
2320             if (PTL_OK != ret) {
2321                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2322                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2323                     "%s,%d atomic_put_from_iovec() failed: ret = %d",
2324                     __FUNCTION__, __LINE__, ret));
2325                 return ret;
2326             }
2327         } else {
2328             ret = atomic_from_iovec(module,
2329                                     origin_addr,
2330                                     origin_count,
2331                                     origin_dt,
2332                                     peer,
2333                                     target_count,
2334                                     target_dt,
2335                                     offset,
2336                                     module->pt_idx,
2337                                     module->match_bits,
2338                                     op,
2339                                     request);
2340             if (PTL_OK != ret) {
2341                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2342                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2343                     "%s,%d atomic_from_iovec() failed: ret = %d",
2344                     __FUNCTION__, __LINE__, ret));
2345                 return ret;
2346             }
2347         }
2348     } else {
2349         ptl_size_t md_offset;
2350 
2351         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2352         if (OMPI_SUCCESS != ret) {
2353             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2354             return ret;
2355         }
2356         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2357         if (OMPI_SUCCESS != ret) {
2358             OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2359             return ret;
2360         }
2361         ompi_datatype_type_size(origin_dt, &size);
2362         length = size * origin_count;
2363         sent = 0;
2364 
2365         md_offset = (ptl_size_t) origin_addr;
2366 
2367         request->ops_expected += number_of_fragments(length, module->atomic_max);
2368 
2369         if (MPI_REPLACE == op) {
2370             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2371                                  "%s,%d Put", __FUNCTION__, __LINE__));
2372             ret = segmentedPut(&module->opcount,
2373                                module->req_md_h,
2374                                md_offset + origin_lb,
2375                                length,
2376                                module->atomic_max,
2377                                PTL_ACK_REQ,
2378                                peer,
2379                                module->pt_idx,
2380                                module->match_bits,
2381                                offset + target_lb,
2382                                request,
2383                                0);
2384             if (OMPI_SUCCESS != ret) {
2385                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2386                 return ret;
2387             }
2388         } else {
2389             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2390             if (OMPI_SUCCESS != ret) {
2391                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2392                 opal_output(ompi_osc_base_framework.framework_output,
2393                         "datatype is not currently supported");
2394                 return OMPI_ERR_NOT_SUPPORTED;
2395             }
2396             ret = ompi_osc_portals4_get_op(op, &ptl_op);
2397             if (OMPI_SUCCESS != ret) {
2398                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2399                 opal_output(ompi_osc_base_framework.framework_output,
2400                         "operation is not currently supported");
2401                 return OMPI_ERR_NOT_SUPPORTED;
2402             }
2403             do {
2404                 size_t msg_length = MIN(module->atomic_max, length - sent);
2405 
2406                 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
2407 
2408                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2409                              "%s,%d Atomic", __FUNCTION__, __LINE__));
2410                 ret = PtlAtomic(module->req_md_h,
2411                                 md_offset + sent + origin_lb,
2412                                 msg_length,
2413                                 PTL_ACK_REQ,
2414                                 peer,
2415                                 module->pt_idx,
2416                                 module->match_bits,
2417                                 offset + sent + target_lb,
2418                                 request,
2419                                 0,
2420                                 ptl_op,
2421                                 ptl_dt);
2422                 if (OMPI_SUCCESS != ret) {
2423                     (void)opal_atomic_add_fetch_64(&module->opcount, -1);
2424                     OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2425                     return ret;
2426                 }
2427                 sent += msg_length;
2428             } while (sent < length);
2429         }
2430     }
2431 
2432     return OMPI_SUCCESS;
2433 }
2434 
2435 
2436 int
2437 ompi_osc_portals4_rget_accumulate(const void *origin_addr,
2438                                   int origin_count,
2439                                   struct ompi_datatype_t *origin_dt,
2440                                   void *result_addr,
2441                                   int result_count,
2442                                   struct ompi_datatype_t *result_dt,
2443                                   int target,
2444                                   ptrdiff_t target_disp,
2445                                   int target_count,
2446                                   struct ompi_datatype_t *target_dt,
2447                                   struct ompi_op_t *op,
2448                                   struct ompi_win_t *win,
2449                                   struct ompi_request_t **ompi_req)
2450 {
2451     int ret;
2452     ompi_osc_portals4_request_t *request;
2453     ompi_osc_portals4_module_t *module =
2454         (ompi_osc_portals4_module_t*) win->w_osc_module;
2455     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2456     size_t target_offset, size;
2457     ptl_op_t ptl_op;
2458     ptl_datatype_t ptl_dt;
2459     ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
2460 
2461     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2462                          "rget_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
2463                          (unsigned long) origin_addr, origin_count,
2464                          origin_dt->name, (unsigned long) result_addr,
2465                          result_count, result_dt->name,
2466                          target, (unsigned long) target_disp,
2467                          target_count, target_dt->name,
2468                          op->o_name,
2469                          (unsigned long) win));
2470 
2471     OMPI_OSC_PORTALS4_REQUEST_ALLOC(win, request);
2472     if (NULL == request) return OMPI_ERR_TEMP_OUT_OF_RESOURCE;
2473     *ompi_req = &request->super;
2474 
2475     target_offset = get_displacement(module, target) * target_disp;
2476 
2477     if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2478         if (MPI_REPLACE == op) {
2479             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2480                                  "rget_accumulate: MPI_REPLACE  non-contiguous target"));
2481             ret = swap_from_noncontig(module,
2482                                       module->req_md_h,
2483                                       result_addr,
2484                                       result_count,
2485                                       result_dt,
2486                                       module->md_h,
2487                                       origin_addr,
2488                                       origin_count,
2489                                       origin_dt,
2490                                       peer,
2491                                       target_count,
2492                                       target_dt,
2493                                       target_offset,
2494                                       module->pt_idx,
2495                                       module->match_bits,
2496                                       request);
2497             if (PTL_OK != ret) {
2498                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2499                              "%s,%d swap_from_noncontig() failed: ret = %d",
2500                              __FUNCTION__, __LINE__, ret));
2501                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2502                 return ret;
2503             }
2504         } else if (MPI_NO_OP == op) {
2505             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2506                                  "rget_accumulate: MPI_NO_OP  non-contiguous target"));
2507             ret = atomic_get_from_noncontig(module,
2508                                             module->req_md_h,
2509                                             result_addr,
2510                                             result_count,
2511                                             result_dt,
2512                                             peer,
2513                                             target_count,
2514                                             target_dt,
2515                                             target_offset,
2516                                             module->pt_idx,
2517                                             module->match_bits,
2518                                             request);
2519             if (PTL_OK != ret) {
2520                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2521                              "%s,%d atomic_get_from_noncontig() failed: ret = %d",
2522                              __FUNCTION__, __LINE__, ret));
2523                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2524                 return ret;
2525             }
2526         } else {
2527             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2528                                  "rget_accumulate: other-op  non-contiguous target"));
2529             ret = fetch_atomic_from_noncontig(module,
2530                                               module->req_md_h,
2531                                               result_addr,
2532                                               result_count,
2533                                               result_dt,
2534                                               module->md_h,
2535                                               origin_addr,
2536                                               origin_count,
2537                                               origin_dt,
2538                                               peer,
2539                                               target_count,
2540                                               target_dt,
2541                                               target_offset,
2542                                               module->pt_idx,
2543                                               module->match_bits,
2544                                               op,
2545                                               request);
2546             if (PTL_OK != ret) {
2547                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2548                     "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
2549                     __FUNCTION__, __LINE__, ret));
2550                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2551                 return ret;
2552             }
2553         }
2554     } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
2555                (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
2556         if (MPI_REPLACE == op) {
2557             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2558                                  "rget_accumulate: MPI_REPLACE  non-contiguous origin/result"));
2559             ret = swap_to_iovec(module,
2560                                 result_addr,
2561                                 result_count,
2562                                 result_dt,
2563                                 origin_addr,
2564                                 origin_count,
2565                                 origin_dt,
2566                                 peer,
2567                                 target_count,
2568                                 target_dt,
2569                                 target_offset,
2570                                 module->pt_idx,
2571                                 module->match_bits,
2572                                 request);
2573             if (PTL_OK != ret) {
2574                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2575                              "%s,%d swap_to_iovec() failed: ret = %d",
2576                              __FUNCTION__, __LINE__, ret));
2577                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2578                 return ret;
2579             }
2580         } else if (MPI_NO_OP == op) {
2581             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2582                                  "rget_accumulate: MPI_NO_OP  non-contiguous origin/result"));
2583             ret = atomic_get_to_iovec(module,
2584                                       result_addr,
2585                                       result_count,
2586                                       result_dt,
2587                                       peer,
2588                                       target_count,
2589                                       target_dt,
2590                                       target_offset,
2591                                       module->pt_idx,
2592                                       module->match_bits,
2593                                       request);
2594             if (PTL_OK != ret) {
2595                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2596                              "%s,%d atomic_get_to_iovec() failed: ret = %d",
2597                              __FUNCTION__, __LINE__, ret));
2598                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2599                 return ret;
2600             }
2601         } else {
2602             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2603                                  "rget_accumulate: other-op  non-contiguous origin/result"));
2604             ret = fetch_atomic_to_iovec(module,
2605                                         result_addr,
2606                                         result_count,
2607                                         result_dt,
2608                                         origin_addr,
2609                                         origin_count,
2610                                         origin_dt,
2611                                         peer,
2612                                         target_count,
2613                                         target_dt,
2614                                         target_offset,
2615                                         module->pt_idx,
2616                                         module->match_bits,
2617                                         op,
2618                                         request);
2619             if (PTL_OK != ret) {
2620                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2621                              "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
2622                              __FUNCTION__, __LINE__, ret));
2623                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2624                 return ret;
2625             }
2626         }
2627     } else {
2628         if (MPI_REPLACE == op) {
2629             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2630                                  "rget_accumulate: MPI_REPLACE  contiguous"));
2631             ptl_size_t result_md_offset, origin_md_offset;
2632 
2633             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2634             if (OMPI_SUCCESS != ret) {
2635                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2636                 return ret;
2637             }
2638             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2639             if (OMPI_SUCCESS != ret) {
2640                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2641                 return ret;
2642             }
2643             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2644             if (OMPI_SUCCESS != ret) {
2645                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2646                 return ret;
2647             }
2648             ompi_datatype_type_size(origin_dt, &size);
2649             length = size * origin_count;
2650 
2651             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2652             if (OMPI_SUCCESS != ret) {
2653                 opal_output(ompi_osc_base_framework.framework_output,
2654                         "datatype is not currently supported");
2655                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2656                 return OMPI_ERR_NOT_SUPPORTED;
2657             }
2658 
2659             result_md_offset = (ptl_size_t) result_addr;
2660             origin_md_offset = (ptl_size_t) origin_addr;
2661 
2662             request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2663 
2664             ret = segmentedSwap(&module->opcount,
2665                                 module->req_md_h,
2666                                 result_md_offset + result_lb,
2667                                 module->md_h,
2668                                 origin_md_offset + origin_lb,
2669                                 length,
2670                                 module->fetch_atomic_max,
2671                                 peer,
2672                                 module->pt_idx,
2673                                 module->match_bits,
2674                                 target_offset + target_lb,
2675                                 request,
2676                                 ptl_dt);
2677             if (OMPI_SUCCESS != ret) {
2678                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2679                 return ret;
2680             }
2681         } else if (MPI_NO_OP == op) {
2682             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2683                                  "rget_accumulate: MPI_NO_OP  contiguous"));
2684             ptl_size_t md_offset;
2685 
2686             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2687             if (OMPI_SUCCESS != ret) {
2688                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2689                 return ret;
2690             }
2691             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2692             if (OMPI_SUCCESS != ret) {
2693                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2694                 return ret;
2695             }
2696             ompi_datatype_type_size(target_dt, &size);
2697             length = size * target_count;
2698 
2699             md_offset = (ptl_size_t) result_addr;
2700 
2701             request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2702 
2703             OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2704                                   "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
2705             ret = segmentedGet(&module->opcount,
2706                                module->req_md_h,
2707                                (ptl_size_t) md_offset + result_lb,
2708                                length,
2709                                module->fetch_atomic_max,
2710                                peer,
2711                                module->pt_idx,
2712                                module->match_bits,
2713                                target_offset + target_lb,
2714                                request);
2715             if (OMPI_SUCCESS != ret) {
2716                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2717                 return ret;
2718             }
2719         } else {
2720             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2721                                  "rget_accumulate: other-op  contiguous"));
2722             ptl_size_t result_md_offset, origin_md_offset;
2723 
2724             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2725             if (OMPI_SUCCESS != ret) {
2726                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2727                 return ret;
2728             }
2729             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2730             if (OMPI_SUCCESS != ret) {
2731                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2732                 return ret;
2733             }
2734             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
2735             if (OMPI_SUCCESS != ret) {
2736                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2737                 return ret;
2738             }
2739             ompi_datatype_type_size(origin_dt, &size);
2740             length = size * origin_count;
2741 
2742             result_md_offset = (ptl_size_t) result_addr;
2743             origin_md_offset = (ptl_size_t) origin_addr;
2744 
2745             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
2746             if (OMPI_SUCCESS != ret) {
2747                 opal_output(ompi_osc_base_framework.framework_output,
2748                         "datatype is not currently supported");
2749                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2750                 return OMPI_ERR_NOT_SUPPORTED;
2751             }
2752 
2753             ret = ompi_osc_portals4_get_op(op, &ptl_op);
2754             if (OMPI_SUCCESS != ret) {
2755                 opal_output(ompi_osc_base_framework.framework_output,
2756                         "operation is not currently supported");
2757                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2758                 return OMPI_ERR_NOT_SUPPORTED;
2759             }
2760 
2761             request->ops_expected += number_of_fragments(length, module->fetch_atomic_max);
2762 
2763             ret = segmentedFetchAtomic(&module->opcount,
2764                                        module->req_md_h,
2765                                        result_md_offset + result_lb,
2766                                        module->md_h,
2767                                        origin_md_offset + origin_lb,
2768                                        length,
2769                                        module->fetch_atomic_max,
2770                                        peer,
2771                                        module->pt_idx,
2772                                        module->match_bits,
2773                                        target_offset + target_lb,
2774                                        request,
2775                                        ptl_op,
2776                                        ptl_dt);
2777             if (OMPI_SUCCESS != ret) {
2778                 OMPI_OSC_PORTALS4_REQUEST_RETURN(request);
2779                 return ret;
2780             }
2781         }
2782     }
2783 
2784     return OMPI_SUCCESS;
2785 }
2786 
2787 
2788 int
2789 ompi_osc_portals4_put(const void *origin_addr,
2790                       int origin_count,
2791                       struct ompi_datatype_t *origin_dt,
2792                       int target,
2793                       ptrdiff_t target_disp,
2794                       int target_count,
2795                       struct ompi_datatype_t *target_dt,
2796                       struct ompi_win_t *win)
2797 {
2798     int ret;
2799     ompi_osc_portals4_module_t *module =
2800         (ompi_osc_portals4_module_t*) win->w_osc_module;
2801     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2802     size_t offset, size;
2803     ptrdiff_t length, origin_lb, target_lb, extent;
2804 
2805     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2806                          "put: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2807                          (unsigned long) origin_addr, origin_count,
2808                          origin_dt->name, target, (unsigned long) target_disp,
2809                          target_count, target_dt->name,
2810                          (unsigned long) win));
2811 
2812     offset = get_displacement(module, target) * target_disp;
2813 
2814     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2815         ret = put_to_noncontig(&module->opcount,
2816                                module->md_h,
2817                                origin_addr,
2818                                origin_count,
2819                                origin_dt,
2820                                peer,
2821                                target_count,
2822                                target_dt,
2823                                offset,
2824                                module->pt_idx,
2825                                module->match_bits,
2826                                NULL);
2827         if (PTL_OK != ret) {
2828             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2829                          "%s,%d put_to_noncontig() failed: ret = %d",
2830                          __FUNCTION__, __LINE__, ret));
2831             return ret;
2832         }
2833     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2834         ret = put_from_iovec(module,
2835                              origin_addr,
2836                              origin_count,
2837                              origin_dt,
2838                              peer,
2839                              target_count,
2840                              target_dt,
2841                              offset,
2842                              module->pt_idx,
2843                              module->match_bits,
2844                              NULL);
2845         if (PTL_OK != ret) {
2846             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2847                          "%s,%d put_from_iovec() failed: ret = %d",
2848                          __FUNCTION__, __LINE__, ret));
2849             return ret;
2850         }
2851     } else {
2852         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2853         if (OMPI_SUCCESS != ret) {
2854             return ret;
2855         }
2856         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2857         if (OMPI_SUCCESS != ret) {
2858             return ret;
2859         }
2860         ompi_datatype_type_size(origin_dt, &size);
2861         length = size * origin_count;
2862 
2863         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2864                      "%s,%d Put(origin_count=%d, origin_lb=%lu, target_count=%d, target_lb=%lu, length=%lu, op_count=%ld)",
2865                      __FUNCTION__, __LINE__, origin_count, origin_lb, target_count, target_lb, length, module->opcount));
2866         ret = segmentedPut(&module->opcount,
2867                            module->md_h,
2868                            (ptl_size_t) origin_addr + origin_lb,
2869                            length,
2870                            mca_osc_portals4_component.ptl_max_msg_size,
2871                            PTL_ACK_REQ,
2872                            peer,
2873                            module->pt_idx,
2874                            module->match_bits,
2875                            offset + target_lb,
2876                            NULL,
2877                            0);
2878         if (OMPI_SUCCESS != ret) {
2879             return ret;
2880         }
2881     }
2882 
2883     return OMPI_SUCCESS;
2884 }
2885 
2886 
2887 int
2888 ompi_osc_portals4_get(void *origin_addr,
2889                       int origin_count,
2890                       struct ompi_datatype_t *origin_dt,
2891                       int target,
2892                       ptrdiff_t target_disp,
2893                       int target_count,
2894                       struct ompi_datatype_t *target_dt,
2895                       struct ompi_win_t *win)
2896 {
2897     int ret;
2898     ompi_osc_portals4_module_t *module =
2899         (ompi_osc_portals4_module_t*) win->w_osc_module;
2900     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2901     size_t offset, size;
2902     ptrdiff_t length, origin_lb, target_lb, extent;
2903 
2904     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
2905                          "get: 0x%lx, %d, %s, %d, %lu, %d, %s, 0x%lx",
2906                          (unsigned long) origin_addr, origin_count,
2907                          origin_dt->name, target, (unsigned long) target_disp,
2908                          target_count, target_dt->name,
2909                          (unsigned long) win));
2910 
2911     offset = get_displacement(module, target) * target_disp;
2912 
2913     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
2914         ret = get_from_noncontig(&module->opcount,
2915                                  module->md_h,
2916                                  origin_addr,
2917                                  origin_count,
2918                                  origin_dt,
2919                                  peer,
2920                                  target_count,
2921                                  target_dt,
2922                                  offset,
2923                                  module->pt_idx,
2924                                  module->match_bits,
2925                                  NULL);
2926         if (PTL_OK != ret) {
2927             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2928                          "%s,%d get_from_noncontig() failed: ret = %d",
2929                          __FUNCTION__, __LINE__, ret));
2930             return ret;
2931         }
2932     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
2933         ret = get_to_iovec(module,
2934                            origin_addr,
2935                            origin_count,
2936                            origin_dt,
2937                            peer,
2938                            target_count,
2939                            target_dt,
2940                            offset,
2941                            module->pt_idx,
2942                            module->match_bits,
2943                            NULL);
2944         if (PTL_OK != ret) {
2945             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
2946                          "%s,%d get_to_iovec() failed: ret = %d",
2947                          __FUNCTION__, __LINE__, ret));
2948             return ret;
2949         }
2950     } else {
2951         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
2952         if (OMPI_SUCCESS != ret) {
2953             return ret;
2954         }
2955         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
2956         if (OMPI_SUCCESS != ret) {
2957             return ret;
2958         }
2959         ompi_datatype_type_size(origin_dt, &size);
2960         length = size * origin_count;
2961 
2962         OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
2963                               "%s,%d Get", __FUNCTION__, __LINE__));
2964         ret = segmentedGet(&module->opcount,
2965                            module->md_h,
2966                            (ptl_size_t) origin_addr + origin_lb,
2967                            length,
2968                            mca_osc_portals4_component.ptl_max_msg_size,
2969                            peer,
2970                            module->pt_idx,
2971                            module->match_bits,
2972                            offset + target_lb,
2973                            NULL);
2974         if (OMPI_SUCCESS != ret) {
2975             return ret;
2976         }
2977     }
2978 
2979     return OMPI_SUCCESS;
2980 }
2981 
2982 
2983 int
2984 ompi_osc_portals4_accumulate(const void *origin_addr,
2985                              int origin_count,
2986                              struct ompi_datatype_t *origin_dt,
2987                              int target,
2988                              ptrdiff_t target_disp,
2989                              int target_count,
2990                              struct ompi_datatype_t *target_dt,
2991                              struct ompi_op_t *op,
2992                              struct ompi_win_t *win)
2993 {
2994     int ret;
2995     ompi_osc_portals4_module_t *module =
2996         (ompi_osc_portals4_module_t*) win->w_osc_module;
2997     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
2998     size_t offset, size;
2999     ptl_op_t ptl_op;
3000     ptl_datatype_t ptl_dt;
3001     ptrdiff_t sent, length, origin_lb, target_lb, extent;
3002 
3003     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3004                          "accumulate: 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3005                          (unsigned long) origin_addr, origin_count,
3006                          origin_dt->name, target, (unsigned long) target_disp,
3007                          target_count, target_dt->name,
3008                          op->o_name,
3009                          (unsigned long) win));
3010 
3011     offset = get_displacement(module, target) * target_disp;
3012 
3013     if (!ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3014         if (MPI_REPLACE == op) {
3015             ret = atomic_put_to_noncontig(module,
3016                                           module->md_h,
3017                                           origin_addr,
3018                                           origin_count,
3019                                           origin_dt,
3020                                           peer,
3021                                           target_count,
3022                                           target_dt,
3023                                           offset,
3024                                           module->pt_idx,
3025                                           module->match_bits,
3026                                           NULL);
3027             if (PTL_OK != ret) {
3028                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3029                     "%s,%d atomic_put_to_noncontig() failed: ret = %d",
3030                     __FUNCTION__, __LINE__, ret));
3031                 return ret;
3032             }
3033         } else {
3034             ret = atomic_to_noncontig(module,
3035                                       module->md_h,
3036                                       origin_addr,
3037                                       origin_count,
3038                                       origin_dt,
3039                                       peer,
3040                                       target_count,
3041                                       target_dt,
3042                                       offset,
3043                                       module->pt_idx,
3044                                       module->match_bits,
3045                                       op,
3046                                       NULL);
3047             if (PTL_OK != ret) {
3048                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3049                     "%s,%d atomic_to_noncontig() failed: ret = %d",
3050                     __FUNCTION__, __LINE__, ret));
3051                 return ret;
3052             }
3053         }
3054     } else if (!ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) {
3055         if (MPI_REPLACE == op) {
3056             ret = atomic_put_from_iovec(module,
3057                                         origin_addr,
3058                                         origin_count,
3059                                         origin_dt,
3060                                         peer,
3061                                         target_count,
3062                                         target_dt,
3063                                         offset,
3064                                         module->pt_idx,
3065                                         module->match_bits,
3066                                         NULL);
3067             if (PTL_OK != ret) {
3068                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3069                     "%s,%d atomic_put_from_iovec() failed: ret = %d",
3070                     __FUNCTION__, __LINE__, ret));
3071                 return ret;
3072             }
3073         } else {
3074             ret = atomic_from_iovec(module,
3075                                     origin_addr,
3076                                     origin_count,
3077                                     origin_dt,
3078                                     peer,
3079                                     target_count,
3080                                     target_dt,
3081                                     offset,
3082                                     module->pt_idx,
3083                                     module->match_bits,
3084                                     op,
3085                                     NULL);
3086             if (PTL_OK != ret) {
3087                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3088                     "%s,%d atomic_from_iovec() failed: ret = %d",
3089                     __FUNCTION__, __LINE__, ret));
3090                 return ret;
3091             }
3092         }
3093     } else {
3094         ptl_size_t md_offset;
3095 
3096         ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3097         if (OMPI_SUCCESS != ret) {
3098             return ret;
3099         }
3100         ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3101         if (OMPI_SUCCESS != ret) {
3102             return ret;
3103         }
3104         ompi_datatype_type_size(origin_dt, &size);
3105         length = size * origin_count;
3106         sent = 0;
3107 
3108         md_offset = (ptl_size_t) origin_addr;
3109 
3110         if (MPI_REPLACE == op) {
3111             OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3112                                  "%s,%d Put", __FUNCTION__, __LINE__));
3113             ret = segmentedPut(&module->opcount,
3114                                module->md_h,
3115                                md_offset + origin_lb,
3116                                length,
3117                                module->atomic_max,
3118                                PTL_ACK_REQ,
3119                                peer,
3120                                module->pt_idx,
3121                                module->match_bits,
3122                                offset + target_lb,
3123                                NULL,
3124                                0);
3125             if (OMPI_SUCCESS != ret) {
3126                 return ret;
3127             }
3128         } else {
3129             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3130             if (OMPI_SUCCESS != ret) {
3131                 opal_output(ompi_osc_base_framework.framework_output,
3132                         "datatype is not currently supported");
3133                 return OMPI_ERR_NOT_SUPPORTED;
3134             }
3135             ret = ompi_osc_portals4_get_op(op, &ptl_op);
3136             if (OMPI_SUCCESS != ret) {
3137                 opal_output(ompi_osc_base_framework.framework_output,
3138                         "operation is not currently supported");
3139                 return OMPI_ERR_NOT_SUPPORTED;
3140             }
3141             do {
3142                 size_t msg_length = MIN(module->atomic_max, length - sent);
3143 
3144                 (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3145 
3146                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3147                              "%s,%d Atomic", __FUNCTION__, __LINE__));
3148                 ret = PtlAtomic(module->md_h,
3149                                 md_offset + sent + origin_lb,
3150                                 msg_length,
3151                                 PTL_ACK_REQ,
3152                                 peer,
3153                                 module->pt_idx,
3154                                 module->match_bits,
3155                                 offset + sent + target_lb,
3156                                 NULL,
3157                                 0,
3158                                 ptl_op,
3159                                 ptl_dt);
3160                 if (OMPI_SUCCESS != ret) {
3161                     (void)opal_atomic_add_fetch_64(&module->opcount, -1);
3162                     return ret;
3163                 }
3164                 sent += msg_length;
3165             } while (sent < length);
3166         }
3167     }
3168 
3169     return OMPI_SUCCESS;
3170 }
3171 
3172 
3173 int
3174 ompi_osc_portals4_get_accumulate(const void *origin_addr,
3175                                  int origin_count,
3176                                  struct ompi_datatype_t *origin_dt,
3177                                  void *result_addr,
3178                                  int result_count,
3179                                  struct ompi_datatype_t *result_dt,
3180                                  int target,
3181                                  ptrdiff_t target_disp,
3182                                  int target_count,
3183                                  struct ompi_datatype_t *target_dt,
3184                                  struct ompi_op_t *op,
3185                                  struct ompi_win_t *win)
3186 {
3187     int ret;
3188     ompi_osc_portals4_module_t *module =
3189         (ompi_osc_portals4_module_t*) win->w_osc_module;
3190     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3191     size_t target_offset, size;
3192     ptl_op_t ptl_op;
3193     ptl_datatype_t ptl_dt;
3194     ptrdiff_t length, origin_lb, target_lb, result_lb, extent;
3195 
3196     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3197                          "get_accumulate: 0x%lx, %d, %s, 0x%lx, %d, %s, %d, %lu, %d, %s, %s, 0x%lx",
3198                          (unsigned long) origin_addr, origin_count,
3199                          origin_dt->name, (unsigned long) result_addr,
3200                          result_count, result_dt->name,
3201                          target, (unsigned long) target_disp,
3202                          target_count, target_dt->name,
3203                          op->o_name,
3204                          (unsigned long) win));
3205 
3206     target_offset = get_displacement(module, target) * target_disp;
3207 
3208     if (target_count > 0 && !ompi_datatype_is_contiguous_memory_layout(target_dt, target_count)) {
3209         if (MPI_REPLACE == op) {
3210             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3211                                  "get_accumulate: MPI_REPLACE  non-contiguous target"));
3212             ret = swap_from_noncontig(module,
3213                                       module->md_h,
3214                                       result_addr,
3215                                       result_count,
3216                                       result_dt,
3217                                       module->md_h,
3218                                       origin_addr,
3219                                       origin_count,
3220                                       origin_dt,
3221                                       peer,
3222                                       target_count,
3223                                       target_dt,
3224                                       target_offset,
3225                                       module->pt_idx,
3226                                       module->match_bits,
3227                                       NULL);
3228             if (PTL_OK != ret) {
3229                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3230                              "%s,%d swap_from_noncontig() failed: ret = %d",
3231                              __FUNCTION__, __LINE__, ret));
3232                 return ret;
3233             }
3234         } else if (MPI_NO_OP == op) {
3235             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3236                                  "get_accumulate: MPI_NO_OP  non-contiguous target"));
3237             ret = atomic_get_from_noncontig(module,
3238                                             module->md_h,
3239                                             result_addr,
3240                                             result_count,
3241                                             result_dt,
3242                                             peer,
3243                                             target_count,
3244                                             target_dt,
3245                                             target_offset,
3246                                             module->pt_idx,
3247                                             module->match_bits,
3248                                             NULL);
3249             if (PTL_OK != ret) {
3250                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3251                              "%s,%d atomic_get_from_noncontig() failed: ret = %d",
3252                              __FUNCTION__, __LINE__, ret));
3253                 return ret;
3254             }
3255         } else {
3256             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3257                                  "get_accumulate: other-op  non-contiguous target"));
3258             ret = fetch_atomic_from_noncontig(module,
3259                                               module->md_h,
3260                                               result_addr,
3261                                               result_count,
3262                                               result_dt,
3263                                               module->md_h,
3264                                               origin_addr,
3265                                               origin_count,
3266                                               origin_dt,
3267                                               peer,
3268                                               target_count,
3269                                               target_dt,
3270                                               target_offset,
3271                                               module->pt_idx,
3272                                               module->match_bits,
3273                                               op,
3274                                               NULL);
3275             if (PTL_OK != ret) {
3276                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3277                     "%s,%d fetch_atomic_from_noncontig() failed: ret = %d",
3278                     __FUNCTION__, __LINE__, ret));
3279                 return ret;
3280             }
3281         }
3282     } else if ((origin_count > 0 && !ompi_datatype_is_contiguous_memory_layout(origin_dt, origin_count)) ||
3283                (result_count > 0 && !ompi_datatype_is_contiguous_memory_layout(result_dt, result_count))) {
3284         if (MPI_REPLACE == op) {
3285             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3286                                  "get_accumulate: MPI_REPLACE  non-contiguous origin/result"));
3287             ret = swap_to_iovec(module,
3288                                 result_addr,
3289                                 result_count,
3290                                 result_dt,
3291                                 origin_addr,
3292                                 origin_count,
3293                                 origin_dt,
3294                                 peer,
3295                                 target_count,
3296                                 target_dt,
3297                                 target_offset,
3298                                 module->pt_idx,
3299                                 module->match_bits,
3300                                 NULL);
3301             if (PTL_OK != ret) {
3302                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3303                              "%s,%d swap_to_iovec() failed: ret = %d",
3304                              __FUNCTION__, __LINE__, ret));
3305                 return ret;
3306             }
3307         } else if (MPI_NO_OP == op) {
3308             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3309                                  "get_accumulate: MPI_NO_OP  non-contiguous origin/result"));
3310             ret = atomic_get_to_iovec(module,
3311                                       result_addr,
3312                                       result_count,
3313                                       result_dt,
3314                                       peer,
3315                                       target_count,
3316                                       target_dt,
3317                                       target_offset,
3318                                       module->pt_idx,
3319                                       module->match_bits,
3320                                       NULL);
3321             if (PTL_OK != ret) {
3322                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3323                              "%s,%d atomic_get_to_iovec() failed: ret = %d",
3324                              __FUNCTION__, __LINE__, ret));
3325                 return ret;
3326             }
3327         } else {
3328             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3329                                  "get_accumulate: other-op  non-contiguous origin/result"));
3330             ret = fetch_atomic_to_iovec(module,
3331                                         result_addr,
3332                                         result_count,
3333                                         result_dt,
3334                                         origin_addr,
3335                                         origin_count,
3336                                         origin_dt,
3337                                         peer,
3338                                         target_count,
3339                                         target_dt,
3340                                         target_offset,
3341                                         module->pt_idx,
3342                                         module->match_bits,
3343                                         op,
3344                                         NULL);
3345             if (PTL_OK != ret) {
3346                 OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3347                              "%s,%d fetch_atomic_to_iovec() failed: ret = %d",
3348                              __FUNCTION__, __LINE__, ret));
3349                 return ret;
3350             }
3351         }
3352     } else {
3353         if (MPI_REPLACE == op) {
3354             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3355                                  "get_accumulate: MPI_REPLACE  contiguous"));
3356             ptl_size_t result_md_offset, origin_md_offset;
3357 
3358             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3359             if (OMPI_SUCCESS != ret) {
3360                 return ret;
3361             }
3362             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3363             if (OMPI_SUCCESS != ret) {
3364                 return ret;
3365             }
3366             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3367             if (OMPI_SUCCESS != ret) {
3368                 return ret;
3369             }
3370             ompi_datatype_type_size(origin_dt, &size);
3371             length = size * origin_count;
3372 
3373             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3374             if (OMPI_SUCCESS != ret) {
3375                 opal_output(ompi_osc_base_framework.framework_output,
3376                         "MPI_Get_accumulate: datatype is not currently supported");
3377                 return OMPI_ERR_NOT_SUPPORTED;
3378             }
3379 
3380             result_md_offset = (ptl_size_t) result_addr;
3381             origin_md_offset = (ptl_size_t) origin_addr;
3382 
3383             ret = segmentedSwap(&module->opcount,
3384                                 module->md_h,
3385                                 result_md_offset + result_lb,
3386                                 module->md_h,
3387                                 origin_md_offset + origin_lb,
3388                                 length,
3389                                 module->fetch_atomic_max,
3390                                 peer,
3391                                 module->pt_idx,
3392                                 module->match_bits,
3393                                 target_offset + target_lb,
3394                                 NULL,
3395                                 ptl_dt);
3396             if (OMPI_SUCCESS != ret) {
3397                 return ret;
3398             }
3399         } else if (MPI_NO_OP == op) {
3400             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3401                                  "get_accumulate: MPI_NO_OP  contiguous"));
3402             ptl_size_t md_offset;
3403 
3404             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3405             if (OMPI_SUCCESS != ret) {
3406                 return ret;
3407             }
3408             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3409             if (OMPI_SUCCESS != ret) {
3410                 return ret;
3411             }
3412             ompi_datatype_type_size(target_dt, &size);
3413             length = size * target_count;
3414 
3415             md_offset = (ptl_size_t) result_addr;
3416 
3417             OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3418                                   "%s,%d MPI_Get_accumulate", __FUNCTION__, __LINE__));
3419             ret = segmentedGet(&module->opcount,
3420                                module->md_h,
3421                                (ptl_size_t) md_offset + result_lb,
3422                                length,
3423                                module->fetch_atomic_max,
3424                                peer,
3425                                module->pt_idx,
3426                                module->match_bits,
3427                                target_offset + target_lb,
3428                                NULL);
3429             if (OMPI_SUCCESS != ret) {
3430                 return ret;
3431             }
3432         } else {
3433             OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3434                                  "get_accumulate: other-op  contiguous"));
3435             ptl_size_t result_md_offset, origin_md_offset;
3436 
3437             ret = ompi_datatype_get_true_extent(origin_dt, &origin_lb, &extent);
3438             if (OMPI_SUCCESS != ret) {
3439                 return ret;
3440             }
3441             ret = ompi_datatype_get_true_extent(target_dt, &target_lb, &extent);
3442             if (OMPI_SUCCESS != ret) {
3443                 return ret;
3444             }
3445             ret = ompi_datatype_get_true_extent(result_dt, &result_lb, &extent);
3446             if (OMPI_SUCCESS != ret) {
3447                 return ret;
3448             }
3449             ompi_datatype_type_size(origin_dt, &size);
3450             length = size * origin_count;
3451 
3452             result_md_offset = (ptl_size_t) result_addr;
3453             origin_md_offset = (ptl_size_t) origin_addr;
3454 
3455             ret = ompi_osc_portals4_get_dt(origin_dt, &ptl_dt);
3456             if (OMPI_SUCCESS != ret) {
3457                 opal_output(ompi_osc_base_framework.framework_output,
3458                         "MPI_Get_accumulate: datatype is not currently supported");
3459                 return OMPI_ERR_NOT_SUPPORTED;
3460             }
3461 
3462             ret = ompi_osc_portals4_get_op(op, &ptl_op);
3463             if (OMPI_SUCCESS != ret) {
3464                 opal_output(ompi_osc_base_framework.framework_output,
3465                         "MPI_Get_accumulate: operation is not currently supported");
3466                 return OMPI_ERR_NOT_SUPPORTED;
3467             }
3468 
3469             ret = segmentedFetchAtomic(&module->opcount,
3470                                        module->md_h,
3471                                        result_md_offset + result_lb,
3472                                        module->md_h,
3473                                        origin_md_offset + origin_lb,
3474                                        length,
3475                                        module->fetch_atomic_max,
3476                                        peer,
3477                                        module->pt_idx,
3478                                        module->match_bits,
3479                                        target_offset + target_lb,
3480                                        NULL,
3481                                        ptl_op,
3482                                        ptl_dt);
3483             if (OMPI_SUCCESS != ret) {
3484                 return ret;
3485             }
3486         }
3487     }
3488 
3489     return OMPI_SUCCESS;
3490 }
3491 
3492 
3493 int
3494 ompi_osc_portals4_compare_and_swap(const void *origin_addr,
3495                                    const void *compare_addr,
3496                                    void *result_addr,
3497                                    struct ompi_datatype_t *dt,
3498                                    int target,
3499                                    ptrdiff_t target_disp,
3500                                    struct ompi_win_t *win)
3501 {
3502     int ret;
3503     ompi_osc_portals4_module_t *module =
3504         (ompi_osc_portals4_module_t*) win->w_osc_module;
3505     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3506     size_t length;
3507     size_t offset;
3508     ptl_datatype_t ptl_dt;
3509     ptl_size_t result_md_offset, origin_md_offset;
3510 
3511     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3512                          "compare_and_swap: 0x%lx, 0x%lx, 0x%lx, %s, %d, %lu, 0x%lx",
3513                          (unsigned long) origin_addr,
3514                          (unsigned long) compare_addr,
3515                          (unsigned long) result_addr,
3516                          dt->name, target, (unsigned long) target_disp,
3517                          (unsigned long) win));
3518 
3519     ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3520     if (OMPI_SUCCESS != ret) {
3521         opal_output(ompi_osc_base_framework.framework_output,
3522                 "MPI_Compare_and_swap: datatype is not currently supported");
3523         return OMPI_ERR_NOT_SUPPORTED;
3524     }
3525 
3526     offset = get_displacement(module, target) * target_disp;
3527 
3528     ret = ompi_datatype_type_size(dt, &length);
3529     if (OMPI_SUCCESS != ret) return ret;
3530 
3531     assert(length <= module->fetch_atomic_max);
3532 
3533     result_md_offset = (ptl_size_t) result_addr;
3534     origin_md_offset = (ptl_size_t) origin_addr;
3535 
3536     (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3537 
3538     OPAL_OUTPUT_VERBOSE((90,ompi_osc_base_framework.framework_output,
3539                          "%s,%d Swap", __FUNCTION__, __LINE__));
3540     ret = PtlSwap(module->md_h,
3541                   result_md_offset,
3542                   module->md_h,
3543                   origin_md_offset,
3544                   length,
3545                   peer,
3546                   module->pt_idx,
3547                   module->match_bits,
3548                   offset,
3549                   NULL,
3550                   0,
3551                   compare_addr,
3552                   PTL_CSWAP,
3553                   ptl_dt);
3554     if (OMPI_SUCCESS != ret) {
3555         return ret;
3556     }
3557 
3558     return OMPI_SUCCESS;
3559 }
3560 
3561 
3562 int
3563 ompi_osc_portals4_fetch_and_op(const void *origin_addr,
3564                                void *result_addr,
3565                                struct ompi_datatype_t *dt,
3566                                int target,
3567                                ptrdiff_t target_disp,
3568                                struct ompi_op_t *op,
3569                                struct ompi_win_t *win)
3570 {
3571     int ret;
3572     ompi_osc_portals4_module_t *module =
3573         (ompi_osc_portals4_module_t*) win->w_osc_module;
3574     ptl_process_t peer = ompi_osc_portals4_get_peer(module, target);
3575     size_t length;
3576     size_t offset;
3577     ptl_op_t ptl_op;
3578     ptl_datatype_t ptl_dt;
3579 
3580     OPAL_OUTPUT_VERBOSE((50, ompi_osc_base_framework.framework_output,
3581                          "fetch_and_op: 0x%lx, 0x%lx, %s, %d, %lu, %s, 0x%lx",
3582                          (unsigned long) origin_addr,
3583                          (unsigned long) result_addr,
3584                          dt->name, target, (unsigned long) target_disp,
3585                          op->o_name,
3586                          (unsigned long) win));
3587 
3588     ret = ompi_osc_portals4_get_dt(dt, &ptl_dt);
3589     if (OMPI_SUCCESS != ret) {
3590         opal_output(ompi_osc_base_framework.framework_output,
3591                 "MPI_Fetch_and_op: datatype is not currently supported");
3592         return OMPI_ERR_NOT_SUPPORTED;
3593     }
3594 
3595     offset = get_displacement(module, target) * target_disp;
3596 
3597     ret = ompi_datatype_type_size(dt, &length);
3598     if (OMPI_SUCCESS != ret) return ret;
3599 
3600     assert(length <= module->fetch_atomic_max);
3601 
3602     if (MPI_REPLACE == op) {
3603         ptl_size_t result_md_offset, origin_md_offset;
3604 
3605         result_md_offset = (ptl_size_t) result_addr;
3606         origin_md_offset = (ptl_size_t) origin_addr;
3607 
3608         (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3609         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3610                              "%s,%d Swap", __FUNCTION__, __LINE__));
3611         ret = PtlSwap(module->md_h,
3612                       result_md_offset,
3613                       module->md_h,
3614                       origin_md_offset,
3615                       length,
3616                       peer,
3617                       module->pt_idx,
3618                       module->match_bits,
3619                       offset,
3620                       NULL,
3621                       0,
3622                       NULL,
3623                       PTL_SWAP,
3624                       ptl_dt);
3625     } else if (MPI_NO_OP == op) {
3626         ptl_size_t md_offset;
3627 
3628         md_offset = (ptl_size_t) result_addr;
3629 
3630         (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3631         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3632                              "%s,%d Get", __FUNCTION__, __LINE__));
3633         ret = PtlGet(module->md_h,
3634                      md_offset,
3635                      length,
3636                      peer,
3637                      module->pt_idx,
3638                      module->match_bits,
3639                      offset,
3640                      NULL);
3641     } else {
3642         ptl_size_t result_md_offset, origin_md_offset;
3643         (void)opal_atomic_add_fetch_64(&module->opcount, 1);
3644 
3645         ret = ompi_osc_portals4_get_op(op, &ptl_op);
3646         if (OMPI_SUCCESS != ret) {
3647             opal_output(ompi_osc_base_framework.framework_output,
3648                     "MPI_Fetch_and_op: operation is not currently supported");
3649             return OMPI_ERR_NOT_SUPPORTED;
3650         }
3651 
3652         result_md_offset = (ptl_size_t) result_addr;
3653         origin_md_offset = (ptl_size_t) origin_addr;
3654 
3655         OPAL_OUTPUT_VERBOSE((90, ompi_osc_base_framework.framework_output,
3656                              "%s,%d FetchAtomic", __FUNCTION__, __LINE__));
3657         ret = PtlFetchAtomic(module->md_h,
3658                              result_md_offset,
3659                              module->md_h,
3660                              origin_md_offset,
3661                              length,
3662                              peer,
3663                              module->pt_idx,
3664                              module->match_bits,
3665                              offset,
3666                              NULL,
3667                              0,
3668                              ptl_op,
3669                              ptl_dt);
3670     }
3671     if (OMPI_SUCCESS != ret) {
3672         return ret;
3673     }
3674 
3675     return OMPI_SUCCESS;
3676 }

/* [<][>][^][v][top][bottom][index][help] */