root/ompi/mca/coll/tuned/coll_tuned.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2015 The University of Tennessee and The University
   4  *                         of Tennessee Research Foundation.  All rights
   5  *                         reserved.
   6  * Copyright (c) 2015-2018 Research Organization for Information Science
   7  *                         and Technology (RIST).  All rights reserved.
   8  * $COPYRIGHT$
   9  *
  10  * Additional copyrights may follow
  11  *
  12  * $HEADER$
  13  */
  14 
  15 #ifndef MCA_COLL_TUNED_EXPORT_H
  16 #define MCA_COLL_TUNED_EXPORT_H
  17 
  18 #include "ompi_config.h"
  19 
  20 #include "mpi.h"
  21 #include "ompi/mca/mca.h"
  22 #include "ompi/request/request.h"
  23 #include "ompi/mca/coll/base/coll_base_functions.h"
  24 #include "opal/util/output.h"
  25 
  26 /* also need the dynamic rule structures */
  27 #include "coll_tuned_dynamic_rules.h"
  28 
  29 BEGIN_C_DECLS
  30 
  31 /* these are the same across all modules and are loaded at component query time */
  32 extern int   ompi_coll_tuned_stream;
  33 extern int   ompi_coll_tuned_priority;
  34 extern bool  ompi_coll_tuned_use_dynamic_rules;
  35 extern char* ompi_coll_tuned_dynamic_rules_filename;
  36 extern int   ompi_coll_tuned_init_tree_fanout;
  37 extern int   ompi_coll_tuned_init_chain_fanout;
  38 extern int   ompi_coll_tuned_init_max_requests;
  39 extern int   ompi_coll_tuned_alltoall_small_msg;
  40 extern int   ompi_coll_tuned_alltoall_intermediate_msg;
  41 
  42 /* forced algorithm choices */
  43 /* this structure is for storing the indexes to the forced algorithm mca params... */
  44 /* we get these at component query (so that registered values appear in ompi_infoi) */
  45 struct coll_tuned_force_algorithm_mca_param_indices_t {
  46     int  algorithm_param_index;      /* which algorithm you want to force */
  47     int  segsize_param_index;        /* segsize to use (if supported), 0 = no segmentation */
  48     int  tree_fanout_param_index;    /* tree fanout/in to use */
  49     int  chain_fanout_param_index;   /* K-chain fanout/in to use */
  50     int  max_requests_param_index;   /* Maximum number of outstanding send or recv requests */
  51 };
  52 typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
  53 
  54 
  55 /* the following type is for storing actual value obtained from the MCA on each tuned module */
  56 /* via their mca param indices lookup in the component */
  57 /* this structure is stored once per collective type per communicator... */
  58 struct coll_tuned_force_algorithm_params_t {
  59     int  algorithm;      /* which algorithm you want to force */
  60     int  segsize;        /* segsize to use (if supported), 0 = no segmentation */
  61     int  tree_fanout;    /* tree fanout/in to use */
  62     int  chain_fanout;   /* K-chain fanout/in to use */
  63     int  max_requests;   /* Maximum number of outstanding send or recv requests */
  64 };
  65 typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
  66 
  67 /* the indices to the MCA params so that modules can look them up at open / comm create time  */
  68 extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
  69 /* the actual max algorithm values (readonly), loaded at component open */
  70 extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
  71 
  72 /*
  73  * coll API functions
  74  */
  75 
  76 /* API functions */
  77 
  78 int ompi_coll_tuned_init_query(bool enable_progress_threads,
  79                                bool enable_mpi_threads);
  80 
  81 mca_coll_base_module_t *
  82 ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority);
  83 
  84 /* API functions of decision functions and any implementations */
  85 
  86 /*
  87  * Note this gets long as we have to have a prototype for each
  88  * MPI collective 4 times.. 2 for the comm type and 2 for each decision
  89  * type.
  90  * we might cut down the decision prototypes by conditional compiling
  91  */
  92 
  93 /* All Gather */
  94 int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
  95 int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
  96 int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
  97 int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
  98 
  99 /* All GatherV */
 100 int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
 101 int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
 102 int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
 103 int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 104 
 105 /* All Reduce */
 106 int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
 107 int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
 108 int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
 109 int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 110 
 111 /* AlltoAll */
 112 int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
 113 int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
 114 int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
 115 int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 116 
 117 /* AlltoAllV */
 118 int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
 119 int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
 120 int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
 121 int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 122 
 123 /* Barrier */
 124 int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
 125 int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
 126 int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
 127 int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 128 
 129 /* Bcast */
 130 int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
 131 int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
 132 int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
 133 int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 134 
 135 /* Gather */
 136 int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
 137 int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
 138 int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
 139 int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 140 
 141 /* Reduce */
 142 int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
 143 int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
 144 int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
 145 int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 146 
 147 /* Reduce_scatter */
 148 int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
 149 int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
 150 int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
 151 int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 152 
 153 /* Reduce_scatter_block */
 154 int ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed(REDUCESCATTERBLOCK_ARGS);
 155 int ompi_coll_tuned_reduce_scatter_block_intra_dec_dynamic(REDUCESCATTERBLOCK_ARGS);
 156 int ompi_coll_tuned_reduce_scatter_block_intra_do_this(REDUCESCATTERBLOCK_ARGS, int algorithm, int faninout, int segsize);
 157 int ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 158 
 159 /* Scatter */
 160 int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
 161 int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
 162 int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
 163 int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 164 
 165 /* Exscan */
 166 int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS);
 167 int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS);
 168 int ompi_coll_tuned_exscan_intra_do_this(EXSCAN_ARGS, int algorithm);
 169 int ompi_coll_tuned_exscan_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 170 
 171 /* Scan */
 172 int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS);
 173 int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS);
 174 int ompi_coll_tuned_scan_intra_do_this(SCAN_ARGS, int algorithm);
 175 int ompi_coll_tuned_scan_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
 176 
 177 int mca_coll_tuned_ft_event(int state);
 178 
 179 struct mca_coll_tuned_component_t {
 180         /** Base coll component */
 181         mca_coll_base_component_2_0_0_t super;
 182 
 183         /** MCA parameter: Priority of this component */
 184         int tuned_priority;
 185 
 186         /** global stuff that I need the component to store */
 187 
 188         /* MCA parameters first */
 189 
 190         /* cached decision table stuff (moved from MCW module) */
 191         ompi_coll_alg_rule_t *all_base_rules;
 192 };
 193 /**
 194  * Convenience typedef
 195  */
 196 typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
 197 
 198 /**
 199  * Global component instance
 200  */
 201 OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component;
 202 
 203 struct mca_coll_tuned_module_t {
 204     mca_coll_base_module_t super;
 205 
 206     /* for forced algorithms we store the information on the module */
 207     /* previously we only had one shared copy, ops, it really is per comm/module */
 208     coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
 209 
 210     /* the communicator rules for each MPI collective for ONLY my comsize */
 211     ompi_coll_com_rule_t *com_rules[COLLCOUNT];
 212 };
 213 typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
 214 OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
 215 
 216 #endif  /* MCA_COLL_TUNED_EXPORT_H */

/* [<][>][^][v][top][bottom][index][help] */