This source file includes following definitions.
- tuned_register
- tuned_open
- tuned_close
- mca_coll_tuned_module_construct
   1 
   2 
   3 
   4 
   5 
   6 
   7 
   8 
   9 
  10 
  11 
  12 
  13 
  14 
  15 
  16 
  17 
  18 
  19 
  20 
  21 
  22 
  23 
  24 
  25 
  26 
  27 
  28 
  29 
  30 
  31 
  32 #include "ompi_config.h"
  33 #include "opal/util/output.h"
  34 #include "coll_tuned.h"
  35 
  36 #include "mpi.h"
  37 #include "ompi/mca/coll/coll.h"
  38 #include "coll_tuned.h"
  39 #include "coll_tuned_dynamic_file.h"
  40 
  41 
  42 
  43 
  44 const char *ompi_coll_tuned_component_version_string =
  45     "Open MPI tuned collective MCA component version " OMPI_VERSION;
  46 
  47 
  48 
  49 
  50 int   ompi_coll_tuned_stream = -1;
  51 int   ompi_coll_tuned_priority = 30;
  52 bool  ompi_coll_tuned_use_dynamic_rules = false;
  53 char* ompi_coll_tuned_dynamic_rules_filename = (char*) NULL;
  54 int   ompi_coll_tuned_init_tree_fanout = 4;
  55 int   ompi_coll_tuned_init_chain_fanout = 4;
  56 int   ompi_coll_tuned_init_max_requests = 128;
  57 int   ompi_coll_tuned_alltoall_small_msg = 200;
  58 int   ompi_coll_tuned_alltoall_intermediate_msg = 3000;
  59 
  60 
  61 
  62 coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};
  63 
  64 int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT] = {0};
  65 
  66 
  67 
  68 
  69 static int tuned_register(void);
  70 static int tuned_open(void);
  71 static int tuned_close(void);
  72 
  73 
  74 
  75 
  76 
  77 
  78 mca_coll_tuned_component_t mca_coll_tuned_component = {
  79     
  80     {
  81         
  82 
  83         .collm_version = {
  84             MCA_COLL_BASE_VERSION_2_0_0,
  85 
  86             
  87             .mca_component_name = "tuned",
  88             MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
  89                                   OMPI_RELEASE_VERSION),
  90 
  91             
  92             .mca_open_component = tuned_open,
  93             .mca_close_component = tuned_close,
  94             .mca_register_component_params = tuned_register,
  95         },
  96         .collm_data = {
  97             
  98             MCA_BASE_METADATA_PARAM_CHECKPOINT
  99         },
 100 
 101         
 102 
 103         .collm_init_query = ompi_coll_tuned_init_query,
 104         .collm_comm_query = ompi_coll_tuned_comm_query,
 105     },
 106 
 107     
 108     0,
 109 
 110     
 111     NULL 
 112 };
 113 
 114 static int tuned_register(void)
 115 {
 116 
 117     
 118     ompi_coll_tuned_priority = 30;
 119     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 120                                            "priority", "Priority of the tuned coll component",
 121                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 122                                            OPAL_INFO_LVL_6,
 123                                            MCA_BASE_VAR_SCOPE_READONLY,
 124                                            &ompi_coll_tuned_priority);
 125 
 126     
 127     ompi_coll_tuned_init_tree_fanout = 4;
 128     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 129                                            "init_tree_fanout",
 130                                            "Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
 131                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 132                                            OPAL_INFO_LVL_6,
 133                                            MCA_BASE_VAR_SCOPE_READONLY,
 134                                            &ompi_coll_tuned_init_tree_fanout);
 135 
 136     ompi_coll_tuned_init_chain_fanout = 4;
 137     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 138                                            "init_chain_fanout",
 139                                            "Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
 140                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 141                                            OPAL_INFO_LVL_6,
 142                                            MCA_BASE_VAR_SCOPE_READONLY,
 143                                            &ompi_coll_tuned_init_chain_fanout);
 144 
 145     ompi_coll_tuned_alltoall_small_msg = 200;
 146     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 147                                            "alltoall_small_msg",
 148                                            "threshold (if supported) to decide if small MSGs alltoall algorithm will be used",
 149                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 150                                            OPAL_INFO_LVL_6,
 151                                            MCA_BASE_VAR_SCOPE_READONLY,
 152                                            &ompi_coll_tuned_alltoall_small_msg);
 153 
 154     ompi_coll_tuned_alltoall_intermediate_msg = 3000;
 155     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 156                                            "alltoall_intermediate_msg",
 157                                            "threshold (if supported) to decide if intermediate MSGs alltoall algorithm will be used",
 158                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 159                                            OPAL_INFO_LVL_6,
 160                                            MCA_BASE_VAR_SCOPE_READONLY,
 161                                            &ompi_coll_tuned_alltoall_intermediate_msg);
 162 
 163     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 164                                            "use_dynamic_rules",
 165                                            "Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules",
 166                                            MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 167                                            OPAL_INFO_LVL_6,
 168                                            MCA_BASE_VAR_SCOPE_READONLY,
 169                                            &ompi_coll_tuned_use_dynamic_rules);
 170 
 171     ompi_coll_tuned_dynamic_rules_filename = NULL;
 172     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 173                                            "dynamic_rules_filename",
 174                                            "Filename of configuration file that contains the dynamic (@runtime) decision function rules",
 175                                            MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 176                                            OPAL_INFO_LVL_6,
 177                                            MCA_BASE_VAR_SCOPE_READONLY,
 178                                            &ompi_coll_tuned_dynamic_rules_filename);
 179 
 180     
 181     ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
 182     ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
 183     ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
 184     ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]);
 185     ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]);
 186     ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
 187     ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
 188     ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);
 189     ompi_coll_tuned_reduce_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTER]);
 190     ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTERBLOCK]);
 191     ompi_coll_tuned_gather_intra_check_forced_init(&ompi_coll_tuned_forced_params[GATHER]);
 192     ompi_coll_tuned_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCATTER]);
 193     ompi_coll_tuned_exscan_intra_check_forced_init(&ompi_coll_tuned_forced_params[EXSCAN]);
 194     ompi_coll_tuned_scan_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCAN]);
 195 
 196     return OMPI_SUCCESS;
 197 }
 198 
 199 static int tuned_open(void)
 200 {
 201     int rc;
 202 
 203 #if OPAL_ENABLE_DEBUG
 204     {
 205         int param;
 206 
 207         param = mca_base_var_find("ompi", "coll", "base", "verbose");
 208         if (param >= 0) {
 209             const int *verbose = NULL;
 210             mca_base_var_get_value(param, &verbose, NULL, NULL);
 211             if (verbose && verbose[0] > 0) {
 212                 ompi_coll_tuned_stream = opal_output_open(NULL);
 213             }
 214         }
 215     }
 216 #endif  
 217 
 218     
 219     
 220     
 221     
 222     
 223 
 224     
 225     
 226     
 227     if (ompi_coll_tuned_use_dynamic_rules) {
 228         if( ompi_coll_tuned_dynamic_rules_filename ) {
 229             OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]",
 230                          ompi_coll_tuned_dynamic_rules_filename));
 231             rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename,
 232                                                          &(mca_coll_tuned_component.all_base_rules), COLLCOUNT);
 233             if( rc >= 0 ) {
 234                 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc));
 235             } else {
 236                 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n"));
 237                 mca_coll_tuned_component.all_base_rules = NULL;
 238             }
 239         }
 240     }
 241 
 242     OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!"));
 243 
 244     return OMPI_SUCCESS;
 245 }
 246 
 247 
 248 
 249 static int tuned_close(void)
 250 {
 251     OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: called"));
 252 
 253     
 254     
 255 
 256     OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!"));
 257 
 258     if( NULL != mca_coll_tuned_component.all_base_rules ) {
 259         ompi_coll_tuned_free_all_rules(mca_coll_tuned_component.all_base_rules, COLLCOUNT);
 260         mca_coll_tuned_component.all_base_rules = NULL;
 261     }
 262 
 263     return OMPI_SUCCESS;
 264 }
 265 
 266 static void
 267 mca_coll_tuned_module_construct(mca_coll_tuned_module_t *module)
 268 {
 269     mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
 270 
 271     for( int i = 0; i < COLLCOUNT; i++ ) {
 272         tuned_module->user_forced[i].algorithm = 0;
 273         tuned_module->com_rules[i] = NULL;
 274     }
 275 }
 276 
 277 OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t, mca_coll_base_module_t,
 278                    mca_coll_tuned_module_construct, NULL);