root/ompi/mca/coll/tuned/coll_tuned_component.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. tuned_register
  2. tuned_open
  3. tuned_close
  4. mca_coll_tuned_module_construct

   1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
   2 /*
   3  * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
   4  *                         University Research and Technology
   5  *                         Corporation.  All rights reserved.
   6  * Copyright (c) 2004-2015 The University of Tennessee and The University
   7  *                         of Tennessee Research Foundation.  All rights
   8  *                         reserved.
   9  * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
  10  *                         University of Stuttgart.  All rights reserved.
  11  * Copyright (c) 2004-2005 The Regents of the University of California.
  12  *                         All rights reserved.
  13  * Copyright (c) 2008      Sun Microsystems, Inc.  All rights reserved.
  14  * Copyright (c) 2008      Cisco Systems, Inc.  All rights reserved.
  15  * Copyright (c) 2015      Los Alamos National Security, LLC. All rights
  16  *                         reserved.
  17  * Copyright (c) 2015-2018 Research Organization for Information Science
  18  *                         and Technology (RIST). All rights reserved.
  19  * $COPYRIGHT$
  20  *
  21  * Additional copyrights may follow
  22  *
  23  * $HEADER$
  24  *
  25  * These symbols are in a file by themselves to provide nice linker
  26  * semantics.  Since linkers generally pull in symbols by object
  27  * files, keeping these symbols as the only symbols in this file
  28  * prevents utility programs such as "ompi_info" from having to import
  29  * entire components just to query their version and parameters.
  30  */
  31 
  32 #include "ompi_config.h"
  33 #include "opal/util/output.h"
  34 #include "coll_tuned.h"
  35 
  36 #include "mpi.h"
  37 #include "ompi/mca/coll/coll.h"
  38 #include "coll_tuned.h"
  39 #include "coll_tuned_dynamic_file.h"
  40 
  41 /*
  42  * Public string showing the coll ompi_tuned component version number
  43  */
  44 const char *ompi_coll_tuned_component_version_string =
  45     "Open MPI tuned collective MCA component version " OMPI_VERSION;
  46 
  47 /*
  48  * Global variable
  49  */
  50 int   ompi_coll_tuned_stream = -1;
  51 int   ompi_coll_tuned_priority = 30;
  52 bool  ompi_coll_tuned_use_dynamic_rules = false;
  53 char* ompi_coll_tuned_dynamic_rules_filename = (char*) NULL;
  54 int   ompi_coll_tuned_init_tree_fanout = 4;
  55 int   ompi_coll_tuned_init_chain_fanout = 4;
  56 int   ompi_coll_tuned_init_max_requests = 128;
  57 int   ompi_coll_tuned_alltoall_small_msg = 200;
  58 int   ompi_coll_tuned_alltoall_intermediate_msg = 3000;
  59 
  60 /* forced alogrithm variables */
  61 /* indices for the MCA parameters */
  62 coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};
  63 /* max algorithm values */
  64 int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT] = {0};
  65 
  66 /*
  67  * Local function
  68  */
  69 static int tuned_register(void);
  70 static int tuned_open(void);
  71 static int tuned_close(void);
  72 
  73 /*
  74  * Instantiate the public struct with all of our public information
  75  * and pointers to our public functions in it
  76  */
  77 
  78 mca_coll_tuned_component_t mca_coll_tuned_component = {
  79     /* First, fill in the super */
  80     {
  81         /* First, the mca_component_t struct containing meta information
  82            about the component itself */
  83         .collm_version = {
  84             MCA_COLL_BASE_VERSION_2_0_0,
  85 
  86             /* Component name and version */
  87             .mca_component_name = "tuned",
  88             MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
  89                                   OMPI_RELEASE_VERSION),
  90 
  91             /* Component open and close functions */
  92             .mca_open_component = tuned_open,
  93             .mca_close_component = tuned_close,
  94             .mca_register_component_params = tuned_register,
  95         },
  96         .collm_data = {
  97             /* The component is checkpoint ready */
  98             MCA_BASE_METADATA_PARAM_CHECKPOINT
  99         },
 100 
 101         /* Initialization / querying functions */
 102 
 103         .collm_init_query = ompi_coll_tuned_init_query,
 104         .collm_comm_query = ompi_coll_tuned_comm_query,
 105     },
 106 
 107     /* priority of the module */
 108     0,
 109 
 110     /* Tuned component specific information */
 111     NULL /* ompi_coll_alg_rule_t ptr */
 112 };
 113 
 114 static int tuned_register(void)
 115 {
 116 
 117     /* Use a low priority, but allow other components to be lower */
 118     ompi_coll_tuned_priority = 30;
 119     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 120                                            "priority", "Priority of the tuned coll component",
 121                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 122                                            OPAL_INFO_LVL_6,
 123                                            MCA_BASE_VAR_SCOPE_READONLY,
 124                                            &ompi_coll_tuned_priority);
 125 
 126     /* some initial guesses at topology parameters */
 127     ompi_coll_tuned_init_tree_fanout = 4;
 128     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 129                                            "init_tree_fanout",
 130                                            "Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
 131                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 132                                            OPAL_INFO_LVL_6,
 133                                            MCA_BASE_VAR_SCOPE_READONLY,
 134                                            &ompi_coll_tuned_init_tree_fanout);
 135 
 136     ompi_coll_tuned_init_chain_fanout = 4;
 137     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 138                                            "init_chain_fanout",
 139                                            "Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
 140                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 141                                            OPAL_INFO_LVL_6,
 142                                            MCA_BASE_VAR_SCOPE_READONLY,
 143                                            &ompi_coll_tuned_init_chain_fanout);
 144 
 145     ompi_coll_tuned_alltoall_small_msg = 200;
 146     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 147                                            "alltoall_small_msg",
 148                                            "threshold (if supported) to decide if small MSGs alltoall algorithm will be used",
 149                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 150                                            OPAL_INFO_LVL_6,
 151                                            MCA_BASE_VAR_SCOPE_READONLY,
 152                                            &ompi_coll_tuned_alltoall_small_msg);
 153 
 154     ompi_coll_tuned_alltoall_intermediate_msg = 3000;
 155     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 156                                            "alltoall_intermediate_msg",
 157                                            "threshold (if supported) to decide if intermediate MSGs alltoall algorithm will be used",
 158                                            MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
 159                                            OPAL_INFO_LVL_6,
 160                                            MCA_BASE_VAR_SCOPE_READONLY,
 161                                            &ompi_coll_tuned_alltoall_intermediate_msg);
 162 
 163     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 164                                            "use_dynamic_rules",
 165                                            "Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules",
 166                                            MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
 167                                            OPAL_INFO_LVL_6,
 168                                            MCA_BASE_VAR_SCOPE_READONLY,
 169                                            &ompi_coll_tuned_use_dynamic_rules);
 170 
 171     ompi_coll_tuned_dynamic_rules_filename = NULL;
 172     (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
 173                                            "dynamic_rules_filename",
 174                                            "Filename of configuration file that contains the dynamic (@runtime) decision function rules",
 175                                            MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
 176                                            OPAL_INFO_LVL_6,
 177                                            MCA_BASE_VAR_SCOPE_READONLY,
 178                                            &ompi_coll_tuned_dynamic_rules_filename);
 179 
 180     /* register forced params */
 181     ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
 182     ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
 183     ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
 184     ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]);
 185     ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]);
 186     ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
 187     ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
 188     ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);
 189     ompi_coll_tuned_reduce_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTER]);
 190     ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTERBLOCK]);
 191     ompi_coll_tuned_gather_intra_check_forced_init(&ompi_coll_tuned_forced_params[GATHER]);
 192     ompi_coll_tuned_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCATTER]);
 193     ompi_coll_tuned_exscan_intra_check_forced_init(&ompi_coll_tuned_forced_params[EXSCAN]);
 194     ompi_coll_tuned_scan_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCAN]);
 195 
 196     return OMPI_SUCCESS;
 197 }
 198 
 199 static int tuned_open(void)
 200 {
 201     int rc;
 202 
 203 #if OPAL_ENABLE_DEBUG
 204     {
 205         int param;
 206 
 207         param = mca_base_var_find("ompi", "coll", "base", "verbose");
 208         if (param >= 0) {
 209             const int *verbose = NULL;
 210             mca_base_var_get_value(param, &verbose, NULL, NULL);
 211             if (verbose && verbose[0] > 0) {
 212                 ompi_coll_tuned_stream = opal_output_open(NULL);
 213             }
 214         }
 215     }
 216 #endif  /* OPAL_ENABLE_DEBUG */
 217 
 218     /* now check that the user hasn't overrode any of the decision functions if dynamic rules are enabled */
 219     /* the user can redo this before every comm dup/create if they like */
 220     /* this is useful for benchmarking and user knows best tuning */
 221     /* as this is the component we only lookup the indicies of the mca params */
 222     /* the actual values are looked up during comm create via module init */
 223 
 224     /* intra functions first */
 225     /* if dynamic rules allowed then look up dynamic rules config filename, else we leave it an empty filename (NULL) */
 226     /* by default DISABLE dynamic rules and instead use fixed [if based] rules */
 227     if (ompi_coll_tuned_use_dynamic_rules) {
 228         if( ompi_coll_tuned_dynamic_rules_filename ) {
 229             OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]",
 230                          ompi_coll_tuned_dynamic_rules_filename));
 231             rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename,
 232                                                          &(mca_coll_tuned_component.all_base_rules), COLLCOUNT);
 233             if( rc >= 0 ) {
 234                 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc));
 235             } else {
 236                 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n"));
 237                 mca_coll_tuned_component.all_base_rules = NULL;
 238             }
 239         }
 240     }
 241 
 242     OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!"));
 243 
 244     return OMPI_SUCCESS;
 245 }
 246 
 247 /* here we should clean up state stored on the component */
 248 /* i.e. alg table and dynamic changable rules if allocated etc */
 249 static int tuned_close(void)
 250 {
 251     OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: called"));
 252 
 253     /* dealloc alg table if allocated */
 254     /* dealloc dynamic changable rules if allocated */
 255 
 256     OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!"));
 257 
 258     if( NULL != mca_coll_tuned_component.all_base_rules ) {
 259         ompi_coll_tuned_free_all_rules(mca_coll_tuned_component.all_base_rules, COLLCOUNT);
 260         mca_coll_tuned_component.all_base_rules = NULL;
 261     }
 262 
 263     return OMPI_SUCCESS;
 264 }
 265 
 266 static void
 267 mca_coll_tuned_module_construct(mca_coll_tuned_module_t *module)
 268 {
 269     mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
 270 
 271     for( int i = 0; i < COLLCOUNT; i++ ) {
 272         tuned_module->user_forced[i].algorithm = 0;
 273         tuned_module->com_rules[i] = NULL;
 274     }
 275 }
 276 
 277 OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t, mca_coll_base_module_t,
 278                    mca_coll_tuned_module_construct, NULL);

/* [<][>][^][v][top][bottom][index][help] */