This source file includes following definitions.
- tuned_register
- tuned_open
- tuned_close
- mca_coll_tuned_module_construct
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32 #include "ompi_config.h"
33 #include "opal/util/output.h"
34 #include "coll_tuned.h"
35
36 #include "mpi.h"
37 #include "ompi/mca/coll/coll.h"
38 #include "coll_tuned.h"
39 #include "coll_tuned_dynamic_file.h"
40
41
42
43
44 const char *ompi_coll_tuned_component_version_string =
45 "Open MPI tuned collective MCA component version " OMPI_VERSION;
46
47
48
49
50 int ompi_coll_tuned_stream = -1;
51 int ompi_coll_tuned_priority = 30;
52 bool ompi_coll_tuned_use_dynamic_rules = false;
53 char* ompi_coll_tuned_dynamic_rules_filename = (char*) NULL;
54 int ompi_coll_tuned_init_tree_fanout = 4;
55 int ompi_coll_tuned_init_chain_fanout = 4;
56 int ompi_coll_tuned_init_max_requests = 128;
57 int ompi_coll_tuned_alltoall_small_msg = 200;
58 int ompi_coll_tuned_alltoall_intermediate_msg = 3000;
59
60
61
62 coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT] = {{0}};
63
64 int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT] = {0};
65
66
67
68
69 static int tuned_register(void);
70 static int tuned_open(void);
71 static int tuned_close(void);
72
73
74
75
76
77
78 mca_coll_tuned_component_t mca_coll_tuned_component = {
79
80 {
81
82
83 .collm_version = {
84 MCA_COLL_BASE_VERSION_2_0_0,
85
86
87 .mca_component_name = "tuned",
88 MCA_BASE_MAKE_VERSION(component, OMPI_MAJOR_VERSION, OMPI_MINOR_VERSION,
89 OMPI_RELEASE_VERSION),
90
91
92 .mca_open_component = tuned_open,
93 .mca_close_component = tuned_close,
94 .mca_register_component_params = tuned_register,
95 },
96 .collm_data = {
97
98 MCA_BASE_METADATA_PARAM_CHECKPOINT
99 },
100
101
102
103 .collm_init_query = ompi_coll_tuned_init_query,
104 .collm_comm_query = ompi_coll_tuned_comm_query,
105 },
106
107
108 0,
109
110
111 NULL
112 };
113
114 static int tuned_register(void)
115 {
116
117
118 ompi_coll_tuned_priority = 30;
119 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
120 "priority", "Priority of the tuned coll component",
121 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
122 OPAL_INFO_LVL_6,
123 MCA_BASE_VAR_SCOPE_READONLY,
124 &ompi_coll_tuned_priority);
125
126
127 ompi_coll_tuned_init_tree_fanout = 4;
128 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
129 "init_tree_fanout",
130 "Inital fanout used in the tree topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
131 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
132 OPAL_INFO_LVL_6,
133 MCA_BASE_VAR_SCOPE_READONLY,
134 &ompi_coll_tuned_init_tree_fanout);
135
136 ompi_coll_tuned_init_chain_fanout = 4;
137 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
138 "init_chain_fanout",
139 "Inital fanout used in the chain (fanout followed by pipeline) topologies for each communicator. This is only an initial guess, if a tuned collective needs a different fanout for an operation, it build it dynamically. This parameter is only for the first guess and might save a little time",
140 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
141 OPAL_INFO_LVL_6,
142 MCA_BASE_VAR_SCOPE_READONLY,
143 &ompi_coll_tuned_init_chain_fanout);
144
145 ompi_coll_tuned_alltoall_small_msg = 200;
146 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
147 "alltoall_small_msg",
148 "threshold (if supported) to decide if small MSGs alltoall algorithm will be used",
149 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
150 OPAL_INFO_LVL_6,
151 MCA_BASE_VAR_SCOPE_READONLY,
152 &ompi_coll_tuned_alltoall_small_msg);
153
154 ompi_coll_tuned_alltoall_intermediate_msg = 3000;
155 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
156 "alltoall_intermediate_msg",
157 "threshold (if supported) to decide if intermediate MSGs alltoall algorithm will be used",
158 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
159 OPAL_INFO_LVL_6,
160 MCA_BASE_VAR_SCOPE_READONLY,
161 &ompi_coll_tuned_alltoall_intermediate_msg);
162
163 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
164 "use_dynamic_rules",
165 "Switch used to decide if we use static (compiled/if statements) or dynamic (built at runtime) decision function rules",
166 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
167 OPAL_INFO_LVL_6,
168 MCA_BASE_VAR_SCOPE_READONLY,
169 &ompi_coll_tuned_use_dynamic_rules);
170
171 ompi_coll_tuned_dynamic_rules_filename = NULL;
172 (void) mca_base_component_var_register(&mca_coll_tuned_component.super.collm_version,
173 "dynamic_rules_filename",
174 "Filename of configuration file that contains the dynamic (@runtime) decision function rules",
175 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
176 OPAL_INFO_LVL_6,
177 MCA_BASE_VAR_SCOPE_READONLY,
178 &ompi_coll_tuned_dynamic_rules_filename);
179
180
181 ompi_coll_tuned_allreduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLREDUCE]);
182 ompi_coll_tuned_alltoall_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALL]);
183 ompi_coll_tuned_allgather_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHER]);
184 ompi_coll_tuned_allgatherv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLGATHERV]);
185 ompi_coll_tuned_alltoallv_intra_check_forced_init(&ompi_coll_tuned_forced_params[ALLTOALLV]);
186 ompi_coll_tuned_barrier_intra_check_forced_init(&ompi_coll_tuned_forced_params[BARRIER]);
187 ompi_coll_tuned_bcast_intra_check_forced_init(&ompi_coll_tuned_forced_params[BCAST]);
188 ompi_coll_tuned_reduce_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCE]);
189 ompi_coll_tuned_reduce_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTER]);
190 ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init(&ompi_coll_tuned_forced_params[REDUCESCATTERBLOCK]);
191 ompi_coll_tuned_gather_intra_check_forced_init(&ompi_coll_tuned_forced_params[GATHER]);
192 ompi_coll_tuned_scatter_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCATTER]);
193 ompi_coll_tuned_exscan_intra_check_forced_init(&ompi_coll_tuned_forced_params[EXSCAN]);
194 ompi_coll_tuned_scan_intra_check_forced_init(&ompi_coll_tuned_forced_params[SCAN]);
195
196 return OMPI_SUCCESS;
197 }
198
199 static int tuned_open(void)
200 {
201 int rc;
202
203 #if OPAL_ENABLE_DEBUG
204 {
205 int param;
206
207 param = mca_base_var_find("ompi", "coll", "base", "verbose");
208 if (param >= 0) {
209 const int *verbose = NULL;
210 mca_base_var_get_value(param, &verbose, NULL, NULL);
211 if (verbose && verbose[0] > 0) {
212 ompi_coll_tuned_stream = opal_output_open(NULL);
213 }
214 }
215 }
216 #endif
217
218
219
220
221
222
223
224
225
226
227 if (ompi_coll_tuned_use_dynamic_rules) {
228 if( ompi_coll_tuned_dynamic_rules_filename ) {
229 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:component_open Reading collective rules file [%s]",
230 ompi_coll_tuned_dynamic_rules_filename));
231 rc = ompi_coll_tuned_read_rules_config_file( ompi_coll_tuned_dynamic_rules_filename,
232 &(mca_coll_tuned_component.all_base_rules), COLLCOUNT);
233 if( rc >= 0 ) {
234 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Read %d valid rules\n", rc));
235 } else {
236 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_open Reading collective rules file failed\n"));
237 mca_coll_tuned_component.all_base_rules = NULL;
238 }
239 }
240 }
241
242 OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_open: done!"));
243
244 return OMPI_SUCCESS;
245 }
246
247
248
249 static int tuned_close(void)
250 {
251 OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: called"));
252
253
254
255
256 OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:component_close: done!"));
257
258 if( NULL != mca_coll_tuned_component.all_base_rules ) {
259 ompi_coll_tuned_free_all_rules(mca_coll_tuned_component.all_base_rules, COLLCOUNT);
260 mca_coll_tuned_component.all_base_rules = NULL;
261 }
262
263 return OMPI_SUCCESS;
264 }
265
266 static void
267 mca_coll_tuned_module_construct(mca_coll_tuned_module_t *module)
268 {
269 mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t*) module;
270
271 for( int i = 0; i < COLLCOUNT; i++ ) {
272 tuned_module->user_forced[i].algorithm = 0;
273 tuned_module->com_rules[i] = NULL;
274 }
275 }
276
277 OBJ_CLASS_INSTANCE(mca_coll_tuned_module_t, mca_coll_base_module_t,
278 mca_coll_tuned_module_construct, NULL);