This source file includes following definitions.
- ompi_coll_tuned_gather_intra_check_forced_init
- ompi_coll_tuned_gather_intra_do_this
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 #include "ompi_config.h"
  16 
  17 #include "mpi.h"
  18 #include "ompi/constants.h"
  19 #include "ompi/datatype/ompi_datatype.h"
  20 #include "ompi/communicator/communicator.h"
  21 #include "ompi/mca/coll/coll.h"
  22 #include "ompi/mca/coll/base/coll_tags.h"
  23 #include "ompi/mca/pml/pml.h"
  24 #include "coll_tuned.h"
  25 #include "ompi/mca/coll/base/coll_base_topo.h"
  26 #include "ompi/mca/coll/base/coll_base_util.h"
  27 
  28 
  29 static int coll_tuned_gather_forced_algorithm = 0;
  30 static int coll_tuned_gather_segment_size = 0;
  31 static int coll_tuned_gather_tree_fanout;
  32 static int coll_tuned_gather_chain_fanout;
  33 
  34 
  35 static mca_base_var_enum_value_t gather_algorithms[] = {
  36     {0, "ignore"},
  37     {1, "basic_linear"},
  38     {2, "binomial"},
  39     {3, "linear_sync"},
  40     {0, NULL}
  41 };
  42 
  43 
  44 
  45 
  46 
  47 
  48 
  49 
  50 
  51 
  52 
  53 
  54 int
  55 ompi_coll_tuned_gather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices)
  56 {
  57     mca_base_var_enum_t *new_enum;
  58     int cnt;
  59 
  60     for( cnt = 0; NULL != gather_algorithms[cnt].string; cnt++ );
  61     ompi_coll_tuned_forced_max_algorithms[GATHER] = cnt;
  62 
  63     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
  64                                            "gather_algorithm_count",
  65                                            "Number of gather algorithms available",
  66                                            MCA_BASE_VAR_TYPE_INT, NULL, 0,
  67                                            MCA_BASE_VAR_FLAG_DEFAULT_ONLY,
  68                                            OPAL_INFO_LVL_5,
  69                                            MCA_BASE_VAR_SCOPE_CONSTANT,
  70                                            &ompi_coll_tuned_forced_max_algorithms[GATHER]);
  71 
  72     
  73     coll_tuned_gather_forced_algorithm = 0;
  74     (void) mca_base_var_enum_create("coll_tuned_gather_algorithms", gather_algorithms, &new_enum);
  75     mca_param_indices->algorithm_param_index =
  76         mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
  77                                         "gather_algorithm",
  78                                         "Which gather algorithm is used. Can be locked down to choice of: 0 ignore, 1 basic linear, 2 binomial, 3 linear with synchronization.",
  79                                         MCA_BASE_VAR_TYPE_INT, new_enum, 0, MCA_BASE_VAR_FLAG_SETTABLE,
  80                                         OPAL_INFO_LVL_5,
  81                                         MCA_BASE_VAR_SCOPE_ALL,
  82                                         &coll_tuned_gather_forced_algorithm);
  83     OBJ_RELEASE(new_enum);
  84     if (mca_param_indices->algorithm_param_index < 0) {
  85         return mca_param_indices->algorithm_param_index;
  86     }
  87 
  88     coll_tuned_gather_segment_size = 0;
  89     mca_param_indices->segsize_param_index =
  90         mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
  91                                         "gather_algorithm_segmentsize",
  92                                         "Segment size in bytes used by default for gather algorithms. Only has meaning if algorithm is forced and supports segmenting. 0 bytes means no segmentation. Currently, available algorithms do not support segmentation.",
  93                                         MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
  94                                         OPAL_INFO_LVL_5,
  95                                         MCA_BASE_VAR_SCOPE_ALL,
  96                                         &coll_tuned_gather_segment_size);
  97 
  98     coll_tuned_gather_tree_fanout = ompi_coll_tuned_init_tree_fanout; 
  99     mca_param_indices->tree_fanout_param_index =
 100         mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 101                                         "gather_algorithm_tree_fanout",
 102                                         "Fanout for n-tree used for gather algorithms. Only has meaning if algorithm is forced and supports n-tree topo based operation. Currently, available algorithms do not support n-tree topologies.",
 103                                         MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 104                                         OPAL_INFO_LVL_5,
 105                                         MCA_BASE_VAR_SCOPE_ALL,
 106                                         &coll_tuned_gather_tree_fanout);
 107 
 108     coll_tuned_gather_chain_fanout = ompi_coll_tuned_init_chain_fanout; 
 109     mca_param_indices->chain_fanout_param_index =
 110       mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 111                                       "gather_algorithm_chain_fanout",
 112                                       "Fanout for chains used for gather algorithms. Only has meaning if algorithm is forced and supports chain topo based operation. Currently, available algorithms do not support chain topologies.",
 113                                       MCA_BASE_VAR_TYPE_INT, NULL, 0, MCA_BASE_VAR_FLAG_SETTABLE,
 114                                       OPAL_INFO_LVL_5,
 115                                       MCA_BASE_VAR_SCOPE_ALL,
 116                                       &coll_tuned_gather_chain_fanout);
 117 
 118     return (MPI_SUCCESS);
 119 }
 120 
 121 int
 122 ompi_coll_tuned_gather_intra_do_this(const void *sbuf, int scount,
 123                                      struct ompi_datatype_t *sdtype,
 124                                      void* rbuf, int rcount,
 125                                      struct ompi_datatype_t *rdtype,
 126                                      int root,
 127                                      struct ompi_communicator_t *comm,
 128                                      mca_coll_base_module_t *module,
 129                                      int algorithm, int faninout, int segsize)
 130 {
 131     OPAL_OUTPUT((ompi_coll_tuned_stream,
 132                  "coll:tuned:gather_intra_do_this selected algorithm %d topo faninout %d segsize %d",
 133                  algorithm, faninout, segsize));
 134 
 135     switch (algorithm) {
 136     case (0):
 137         return ompi_coll_tuned_gather_intra_dec_fixed(sbuf, scount, sdtype,
 138                                                       rbuf, rcount, rdtype,
 139                                                       root, comm, module);
 140     case (1):
 141         return ompi_coll_base_gather_intra_basic_linear(sbuf, scount, sdtype,
 142                                                         rbuf, rcount, rdtype,
 143                                                         root, comm, module);
 144     case (2):
 145         return ompi_coll_base_gather_intra_binomial(sbuf, scount, sdtype,
 146                                                     rbuf, rcount, rdtype,
 147                                                     root, comm, module);
 148     case (3):
 149         return ompi_coll_base_gather_intra_linear_sync(sbuf, scount, sdtype,
 150                                                        rbuf, rcount, rdtype,
 151                                                        root, comm, module,
 152                                                        segsize);
 153     } 
 154     OPAL_OUTPUT((ompi_coll_tuned_stream,
 155                  "coll:tuned:gather_intra_do_this attempt to select algorithm %d when only 0-%d is valid?",
 156                  algorithm, ompi_coll_tuned_forced_max_algorithms[GATHER]));
 157     return (MPI_ERR_ARG);
 158 }