root/ompi/mca/coll/portals4/coll_portals4.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. ompi_coll_portals4_get_peer
  2. is_reduce_optimizable
  3. get_nchildren
  4. get_pipeline
  5. get_k_ary_tree
  6. ompi_coll_portals4_create_recv_converter
  7. ompi_coll_portals4_create_send_converter

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2013-2015 Sandia National Laboratories. All rights reserved.
   4  * Copyright (c) 2015-2018 Los Alamos National Security, LLC. All rights
   5  *                         reserved.
   6  * Copyright (c) 2015      Bull SAS.  All rights reserved.
   7  * Copyright (c) 2015      Research Organization for Information Science
   8  *                         and Technology (RIST). All rights reserved.
   9  * Copyright (c) 2017      IBM Corporation.  All rights reserved.
  10  * $COPYRIGHT$
  11  *
  12  * Additional copyrights may follow
  13  *
  14  * $HEADER$
  15  */
  16 
  17 #ifndef MCA_COLL_PORTALS4_EXPORT_H
  18 #define MCA_COLL_PORTALS4_EXPORT_H
  19 
  20 #include "ompi_config.h"
  21 
  22 #include <portals4.h>
  23 #include "mpi.h"
  24 #include "ompi/constants.h"
  25 #include "ompi/datatype/ompi_datatype.h"
  26 #include "ompi/datatype/ompi_datatype_internal.h"
  27 #include "ompi/op/op.h"
  28 #include "ompi/mca/mca.h"
  29 #include "opal/datatype/opal_convertor.h"
  30 #include "ompi/mca/coll/coll.h"
  31 #include "ompi/request/request.h"
  32 #include "ompi/communicator/communicator.h"
  33 #include "ompi/mca/coll/base/base.h"
  34 #include "ompi/datatype/ompi_datatype.h"
  35 #include "ompi/mca/mtl/portals4/mtl_portals4_endpoint.h"
  36 
  37 #include "ompi/mca/mtl/portals4/mtl_portals4.h"
  38 
  39 #define MAXTREEFANOUT 32
  40 
  41 BEGIN_C_DECLS
  42 
  43 #define COLL_PORTALS4_NO_OP ((ptl_op_t)-1)
  44 extern ptl_op_t ompi_coll_portals4_atomic_op[];
  45 
  46 #define COLL_PORTALS4_NO_DTYPE ((ptl_datatype_t)-1)
  47 extern ptl_datatype_t ompi_coll_portals4_atomic_datatype[];
  48 
  49 struct mca_coll_portals4_component_t {
  50     mca_coll_base_component_t super;
  51 
  52     /** Network interface handle for matched interface */
  53     ptl_handle_ni_t ni_h;
  54     ptl_uid_t uid;
  55     ptl_process_t id;
  56     ptl_pt_index_t pt_idx;
  57     ptl_pt_index_t finish_pt_idx;
  58     ptl_handle_eq_t eq_h;
  59     ptl_handle_me_t unex_me_h;
  60     ptl_handle_me_t finish_me_h;
  61     bool ev_link;
  62     opal_mutex_t lock;
  63     opal_condition_t cond;
  64     int nb_links;
  65     ptl_handle_md_t zero_md_h;
  66     ptl_handle_md_t data_md_h;
  67     opal_free_list_t requests; /* request free list for the i collectives */
  68 
  69     ptl_ni_limits_t ni_limits;
  70     ptl_size_t portals_max_msg_size;
  71 
  72     int use_binomial_gather_algorithm;
  73 
  74 };
  75 typedef struct mca_coll_portals4_component_t mca_coll_portals4_component_t;
  76 OMPI_MODULE_DECLSPEC extern mca_coll_portals4_component_t mca_coll_portals4_component;
  77 
  78 
  79 /*
  80  * Borrowed with thanks from the coll-tuned component, then modified for Portals4.
  81  */
  82 typedef struct ompi_coll_portals4_tree_t {
  83     int32_t tree_root;
  84     int32_t tree_fanout;
  85     int32_t tree_bmtree;
  86     int32_t tree_prev;
  87     int32_t tree_next[MAXTREEFANOUT];
  88     int32_t tree_nextsize;
  89     int32_t tree_numdescendants;
  90 } ompi_coll_portals4_tree_t;
  91 
  92 
  93 struct mca_coll_portals4_module_t {
  94     mca_coll_base_module_t super;
  95     opal_atomic_size_t coll_count;
  96 
  97     /* record handlers dedicated to fallback if offloaded operations are not supported */
  98     mca_coll_base_module_reduce_fn_t previous_reduce;
  99     mca_coll_base_module_t *previous_reduce_module;
 100     mca_coll_base_module_ireduce_fn_t previous_ireduce;
 101     mca_coll_base_module_t *previous_ireduce_module;
 102 
 103     mca_coll_base_module_allreduce_fn_t previous_allreduce;
 104     mca_coll_base_module_t *previous_allreduce_module;
 105     mca_coll_base_module_iallreduce_fn_t previous_iallreduce;
 106     mca_coll_base_module_t *previous_iallreduce_module;
 107 
 108     /* binomial tree */
 109     ompi_coll_portals4_tree_t *cached_in_order_bmtree;
 110     int                        cached_in_order_bmtree_root;
 111 };
 112 typedef struct mca_coll_portals4_module_t mca_coll_portals4_module_t;
 113 OBJ_CLASS_DECLARATION(mca_coll_portals4_module_t);
 114 
 115 struct ompi_coll_portals4_request_t;
 116 
 117 #define COLL_PORTALS4_MAX_BW                         4096
 118 #define COLL_PORTALS4_MAX_SEGMENT                    32
 119 
 120 
 121 /* match/ignore bit manipulation
 122  *
 123  * 01234567 01234567 012 3 4 567 012 3 4567 01234567 01234567 01234567 01234567
 124                         | | |       |      |
 125  *  context id          |^|^| type  | int  | op count
 126  *                      |||||       |      |
 127  *                      |||+--------------- is a RTR message
 128  *                      |+----------------- is a data ACK message
 129  */
 130 
 131 #define COLL_PORTALS4_CID_MASK      0xFFE0000000000000ULL
 132 #define COLL_PORTALS4_ACK_MASK      0x0010000000000000ULL
 133 #define COLL_PORTALS4_RTR_MASK      0x0008000000000000ULL
 134 #define COLL_PORTALS4_TYPE_MASK     0x0007E00000000000ULL
 135 #define COLL_PORTALS4_INTERNAL_MASK 0x00001F0000000000ULL
 136 #define COLL_PORTALS4_OP_COUNT_MASK 0x000000FFFFFFFFFFULL
 137 
 138 #define COLL_PORTALS4_BARRIER       0x01
 139 #define COLL_PORTALS4_BCAST         0x02
 140 #define COLL_PORTALS4_SCATTER       0x03
 141 #define COLL_PORTALS4_GATHER        0x04
 142 #define COLL_PORTALS4_REDUCE        0x05
 143 #define COLL_PORTALS4_ALLREDUCE     0x06
 144 
 145 #define PTL_INVALID_RANK ((ptl_rank_t)-1)
 146 #define PTL_FIRST_RANK   ((ptl_rank_t)0)
 147 
 148 #define COLL_PORTALS4_SET_BITS(match_bits, contextid, ack, rtr, type, internal, op_count) \
 149 {                                                                   \
 150     match_bits = contextid;                                         \
 151     match_bits = (match_bits << 1);                                 \
 152     match_bits |= (ack & 0x1);                                      \
 153     match_bits = (match_bits << 1);                                 \
 154     match_bits |= (rtr & 0x1);                                      \
 155     match_bits = (match_bits << 6);                                 \
 156     match_bits |= (type & 0x3F);                                    \
 157     match_bits = (match_bits << 5);                                 \
 158     match_bits |= (internal & 0x1F);                                \
 159     match_bits = (match_bits << 40);                                \
 160     match_bits |= (op_count & 0xFFFFFFFFFF);                        \
 161 }
 162 
 163 int
 164 opal_stderr(const char *msg, const char *file,
 165         const int line, const int ret);
 166 
 167 /*
 168  * Borrowed with thanks from the coll-tuned component.
 169  */
 170 #define COLL_PORTALS4_UPDATE_IN_ORDER_BMTREE( OMPI_COMM, PORTALS4_MODULE, ROOT ) \
 171 do {                                                                                         \
 172     if( !( ((PORTALS4_MODULE)->cached_in_order_bmtree)                                               \
 173            && ((PORTALS4_MODULE)->cached_in_order_bmtree_root == (ROOT)) ) ) {                       \
 174         if( (PORTALS4_MODULE)->cached_in_order_bmtree ) { /* destroy previous binomial if defined */ \
 175             ompi_coll_portals4_destroy_tree( &((PORTALS4_MODULE)->cached_in_order_bmtree) );       \
 176         }                                                                                    \
 177         (PORTALS4_MODULE)->cached_in_order_bmtree = ompi_coll_portals4_build_in_order_bmtree( (OMPI_COMM), (ROOT) ); \
 178         (PORTALS4_MODULE)->cached_in_order_bmtree_root = (ROOT);                                     \
 179     }                                                                                        \
 180 } while (0)
 181 
 182 
 183 int ompi_coll_portals4_barrier_intra(struct ompi_communicator_t *comm,
 184         mca_coll_base_module_t *module);
 185 int ompi_coll_portals4_ibarrier_intra(struct ompi_communicator_t *comm,
 186         ompi_request_t ** request,
 187         mca_coll_base_module_t *module);
 188 int ompi_coll_portals4_ibarrier_intra_fini(struct ompi_coll_portals4_request_t *request);
 189 
 190 int ompi_coll_portals4_bcast_intra(void *buff, int count,
 191         struct ompi_datatype_t *datatype, int root,
 192         struct ompi_communicator_t *comm,mca_coll_base_module_t *module);
 193 int ompi_coll_portals4_ibcast_intra(void *buff, int count,
 194         struct ompi_datatype_t *datatype, int root,
 195         struct ompi_communicator_t *comm,
 196         ompi_request_t **request,
 197         mca_coll_base_module_t *module);
 198 int ompi_coll_portals4_ibcast_intra_fini(struct ompi_coll_portals4_request_t *request);
 199 
 200 int ompi_coll_portals4_reduce_intra(const void *sbuf, void *rbuf, int count,
 201         MPI_Datatype dtype, MPI_Op op,
 202         int root,
 203         struct ompi_communicator_t *comm,
 204         mca_coll_base_module_t *module);
 205 int ompi_coll_portals4_ireduce_intra(const void* sendbuf, void* recvbuf, int count,
 206         MPI_Datatype dype, MPI_Op op,
 207         int root,
 208         struct ompi_communicator_t *comm,
 209         ompi_request_t ** ompi_request,
 210         struct mca_coll_base_module_2_3_0_t *module);
 211 int ompi_coll_portals4_ireduce_intra_fini(struct ompi_coll_portals4_request_t *request);
 212 
 213 int ompi_coll_portals4_allreduce_intra(const void* sendbuf, void* recvbuf, int count,
 214         MPI_Datatype dtype, MPI_Op op,
 215         struct ompi_communicator_t *comm,
 216         struct mca_coll_base_module_2_3_0_t *module);
 217 int ompi_coll_portals4_iallreduce_intra(const void* sendbuf, void* recvbuf, int count,
 218         MPI_Datatype dtype, MPI_Op op,
 219         struct ompi_communicator_t *comm,
 220         ompi_request_t ** ompi_request,
 221         struct mca_coll_base_module_2_3_0_t *module);
 222 int
 223 ompi_coll_portals4_iallreduce_intra_fini(struct ompi_coll_portals4_request_t *request);
 224 
 225 int ompi_coll_portals4_gather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
 226                                     void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
 227                                     int root,
 228                                     struct ompi_communicator_t *comm,
 229                                     mca_coll_base_module_t *module);
 230 int ompi_coll_portals4_igather_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
 231                                      void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
 232                                      int root,
 233                                      struct ompi_communicator_t *comm,
 234                                      ompi_request_t **request,
 235                                      mca_coll_base_module_t *module);
 236 int ompi_coll_portals4_igather_intra_fini(struct ompi_coll_portals4_request_t *request);
 237 
 238 int ompi_coll_portals4_scatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
 239                                      void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
 240                                      int root,
 241                                      struct ompi_communicator_t *comm,
 242                                      mca_coll_base_module_t *module);
 243 int ompi_coll_portals4_iscatter_intra(const void *sbuf, int scount, struct ompi_datatype_t *sdtype,
 244                                       void *rbuf, int rcount, struct ompi_datatype_t *rdtype,
 245                                       int root,
 246                                       struct ompi_communicator_t *comm,
 247                                       ompi_request_t **request,
 248                                       mca_coll_base_module_t *module);
 249 int ompi_coll_portals4_iscatter_intra_fini(struct ompi_coll_portals4_request_t *request);
 250 
 251 
 252 static inline ptl_process_t
 253 ompi_coll_portals4_get_peer(struct ompi_communicator_t *comm, int rank)
 254 {
 255     return ompi_mtl_portals4_get_peer(comm, rank);
 256 }
 257 
 258 
 259 static inline bool
 260 is_reduce_optimizable(struct ompi_datatype_t *dtype, size_t length, struct ompi_op_t *op,
 261         ptl_datatype_t *ptl_dtype, ptl_op_t *ptl_op) {
 262 
 263     /* first check the type of operation and
 264      * map it to the corresponding portals4 one */
 265 
 266     if (!(op->o_flags & OMPI_OP_FLAGS_COMMUTE)) {
 267         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 268                 "atomic op %d is not commutative, deactivate the optimization\n",
 269                 op->op_type);
 270         return false;
 271     }
 272 
 273     if (!(op->o_flags & OMPI_OP_FLAGS_ASSOC)) {
 274         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 275                 "atomic op %d is not float associative, deactivate the optimization\n",
 276                 op->op_type);
 277         return false;
 278     }
 279 
 280     if (op->op_type >= OMPI_OP_NUM_OF_TYPES)  {
 281         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 282                 "unknown atomic op %d\n",
 283                 op->op_type);
 284         return false;
 285     }
 286 
 287     *ptl_op = ompi_coll_portals4_atomic_op[op->op_type];
 288     if (*ptl_op == COLL_PORTALS4_NO_OP) {
 289         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 290                 "unsupported atomic op %d\n",
 291                 op->op_type);
 292         return false;
 293     }
 294 
 295     /* then check the data type and map it
 296      * to the corresponding portals4 one */
 297 
 298     if (!ompi_datatype_is_valid(dtype)) {
 299         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 300                 "not a valid datatype %d\n",
 301                 dtype->id);
 302         return false;
 303     }
 304 
 305     if (dtype->id >= OMPI_DATATYPE_MPI_MAX_PREDEFINED) {
 306         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 307                 "not a valid datatype %d\n",
 308                 dtype->id);
 309         return false;
 310     }
 311 
 312     if (length > mca_coll_portals4_component.ni_limits.max_atomic_size) {
 313         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 314                 "length (%ld) > ni.max_atomic_size (%ld)\n",
 315                 length, mca_coll_portals4_component.ni_limits.max_atomic_size);
 316         return false;
 317     }
 318 
 319     *ptl_dtype = ompi_coll_portals4_atomic_datatype[dtype->id];
 320     if (*ptl_dtype == COLL_PORTALS4_NO_DTYPE) {
 321         opal_output_verbose(50, ompi_coll_base_framework.framework_output,
 322                 "datatype %d not supported\n",
 323                 dtype->id);
 324         return false;
 325     }
 326 
 327     return true;
 328 }
 329 
 330 
 331 static inline int
 332 get_nchildren(int cube_dim, int hibit, int rank, int size)
 333 {
 334     int guess = cube_dim - (hibit + 1);
 335 
 336     if ((rank | (1 << (cube_dim - 1))) >= size) {
 337         guess--;
 338     }
 339     if (guess < 0) {
 340         return 0;
 341     }
 342 
 343     return guess;
 344 }
 345 
 346 static inline
 347 void get_pipeline(ptl_rank_t rank, ptl_rank_t np, ptl_rank_t root,
 348         ptl_rank_t *prev, ptl_rank_t *next)
 349 {
 350     *prev = (rank == root) ?
 351             PTL_INVALID_RANK:
 352             ((rank == PTL_FIRST_RANK) ? (np - 1) : (rank - 1));
 353     *next = (rank == (np - 1)) ?
 354             ((root == PTL_FIRST_RANK) ? PTL_INVALID_RANK : PTL_FIRST_RANK):
 355             ((rank == (root - 1)) ? PTL_INVALID_RANK : (rank + 1));
 356     return;
 357 }
 358 
 359 #define div(a,b) (((a)+(b)-1) / (b))
 360 #define min(a,b) (((a) < (b)) ? (a) : (b))
 361 #define min_zero(a) (((a) < 0) ? 0 : (a))
 362 
 363 static inline
 364 void get_k_ary_tree(const unsigned int k_ary,
 365         ptl_rank_t rank, ptl_rank_t np, ptl_rank_t root,
 366         ptl_rank_t *father, ptl_rank_t *children, unsigned int *child_nb) {
 367 
 368     bool should_continue = true;
 369     unsigned int cnt;
 370     ptl_rank_t first, last, dist, up, my;
 371 
 372     if ((!father)   ||
 373         (!children) ||
 374         (!child_nb)) {
 375         return;
 376     }
 377 
 378     /* initialization and checks */
 379     *father = PTL_INVALID_RANK;
 380     *child_nb = 0;
 381 
 382     if (!k_ary) {
 383         return;
 384     }
 385 
 386     for (cnt = 0 ; cnt < k_ary ; cnt++) {
 387         children[cnt] = PTL_INVALID_RANK;
 388     }
 389 
 390     if ((np <= 0)    ||
 391         (rank < 0)   ||
 392         (rank >= np) ||
 393         (root < 0 )  ||
 394         (root >= np)) {
 395         return;
 396     }
 397 
 398     my = (np + rank - root) % np;
 399 
 400     /* start the loop */
 401     up = PTL_INVALID_RANK;
 402     first = PTL_FIRST_RANK;
 403     last = np - 1;
 404 
 405     while (should_continue) {
 406         if (my == first) {
 407             first++;
 408             dist = div(last - first + 1, k_ary);
 409             should_continue = false;
 410         }
 411         else {
 412             up = first;
 413             first++;
 414             dist = div(last - first + 1, k_ary);
 415             while (my >= (first + dist)) {
 416                 first += dist;
 417             }
 418             last = min(first + dist - 1, last);
 419         }
 420     }
 421     *father = (up == PTL_INVALID_RANK) ? PTL_INVALID_RANK : ((up + root) % np);
 422     *child_nb = min(k_ary, min_zero(last - first + 1));
 423 
 424     for (cnt = 0 ; cnt < *child_nb ; cnt++) {
 425         children[cnt] = (root +
 426                 first + cnt * dist) % np;
 427     }
 428 
 429     return;
 430 }
 431 
 432 
 433 static inline void
 434 ompi_coll_portals4_create_recv_converter (opal_convertor_t *converter,
 435                                           void *target,
 436                                           ompi_proc_t *proc,
 437                                           int count,
 438                                           ompi_datatype_t *datatype)
 439 {
 440     /* create converter */
 441     OBJ_CONSTRUCT(converter, opal_convertor_t);
 442 
 443     /* initialize converter */
 444     opal_convertor_copy_and_prepare_for_recv(proc->super.proc_convertor,
 445                                              &datatype->super,
 446                                              count,
 447                                              target,
 448                                              0,
 449                                              converter);
 450 }
 451 
 452 static inline void
 453 ompi_coll_portals4_create_send_converter (opal_convertor_t *converter,
 454                                           const void *source,
 455                                           ompi_proc_t *proc,
 456                                           int count,
 457                                           ompi_datatype_t *datatype)
 458 {
 459     OBJ_CONSTRUCT(converter, opal_convertor_t);
 460 
 461     opal_convertor_copy_and_prepare_for_send(proc->super.proc_convertor,
 462                                              &datatype->super,
 463                                              count,
 464                                              source,
 465                                              0,
 466                                              converter);
 467 }
 468 
 469 END_C_DECLS
 470 
 471 #endif /* MCA_COLL_PORTALS4_EXPORT_H */

/* [<][>][^][v][top][bottom][index][help] */