This source file includes following definitions.
- ompi_coll_tuned_init_query
- ompi_coll_tuned_comm_query
- ompi_coll_tuned_forced_getvalues
- tuned_module_enable
- mca_coll_tuned_ft_event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 #include "ompi_config.h"
24 #include "coll_tuned.h"
25
26 #include <stdio.h>
27
28 #include "mpi.h"
29 #include "ompi/communicator/communicator.h"
30 #include "ompi/mca/coll/coll.h"
31 #include "ompi/mca/coll/base/base.h"
32 #include "ompi/mca/coll/base/coll_base_topo.h"
33 #include "coll_tuned.h"
34 #include "coll_tuned_dynamic_rules.h"
35 #include "coll_tuned_dynamic_file.h"
36
37 static int tuned_module_enable(mca_coll_base_module_t *module,
38 struct ompi_communicator_t *comm);
39
40
41
42
43
44 int ompi_coll_tuned_init_query(bool enable_progress_threads,
45 bool enable_mpi_threads)
46 {
47 return OMPI_SUCCESS;
48 }
49
50
51
52
53
54
55
56 mca_coll_base_module_t *
57 ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority)
58 {
59 mca_coll_tuned_module_t *tuned_module;
60
61 OPAL_OUTPUT((ompi_coll_tuned_stream, "coll:tuned:module_tuned query called"));
62
63
64
65
66 if (OMPI_COMM_IS_INTER(comm)) {
67 *priority = 0;
68 return NULL;
69 }
70
71
72
73
74
75 if (OMPI_COMM_IS_INTRA(comm) && ompi_comm_size(comm) < 2) {
76 *priority = 0;
77 return NULL;
78 }
79
80 tuned_module = OBJ_NEW(mca_coll_tuned_module_t);
81 if (NULL == tuned_module) return NULL;
82
83 *priority = ompi_coll_tuned_priority;
84
85
86
87
88
89
90
91 tuned_module->super.coll_module_enable = tuned_module_enable;
92 tuned_module->super.ft_event = mca_coll_tuned_ft_event;
93
94
95
96
97
98 tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_fixed;
99 tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_fixed;
100 tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_fixed;
101 tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_fixed;
102 tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_fixed;
103 tuned_module->super.coll_alltoallw = NULL;
104 tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_fixed;
105 tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_fixed;
106 tuned_module->super.coll_exscan = NULL;
107 tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_fixed;
108 tuned_module->super.coll_gatherv = NULL;
109 tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_fixed;
110 tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_fixed;
111 tuned_module->super.coll_reduce_scatter_block = ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed;
112 tuned_module->super.coll_scan = NULL;
113 tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_fixed;
114 tuned_module->super.coll_scatterv = NULL;
115
116 return &(tuned_module->super);
117 }
118
119
120
121
122 static int
123 ompi_coll_tuned_forced_getvalues( enum COLLTYPE type,
124 coll_tuned_force_algorithm_params_t *forced_values )
125 {
126 coll_tuned_force_algorithm_mca_param_indices_t* mca_params;
127 const int *tmp = NULL;
128
129 mca_params = &(ompi_coll_tuned_forced_params[type]);
130
131
132
133
134
135
136 mca_base_var_get_value(mca_params->algorithm_param_index, &tmp, NULL, NULL);
137 forced_values->algorithm = tmp ? tmp[0] : 0;
138
139 if( BARRIER != type ) {
140 mca_base_var_get_value(mca_params->segsize_param_index, &tmp, NULL, NULL);
141 if (tmp) forced_values->segsize = tmp[0];
142 mca_base_var_get_value(mca_params->tree_fanout_param_index, &tmp, NULL, NULL);
143 if (tmp) forced_values->tree_fanout = tmp[0];
144 mca_base_var_get_value(mca_params->chain_fanout_param_index, &tmp, NULL, NULL);
145 if (tmp) forced_values->chain_fanout = tmp[0];
146 mca_base_var_get_value(mca_params->max_requests_param_index, &tmp, NULL, NULL);
147 if (tmp) forced_values->max_requests = tmp[0];
148 }
149 return (MPI_SUCCESS);
150 }
151
152 #define COLL_TUNED_EXECUTE_IF_DYNAMIC(TMOD, TYPE, EXECUTE) \
153 { \
154 int need_dynamic_decision = 0; \
155 ompi_coll_tuned_forced_getvalues( (TYPE), &((TMOD)->user_forced[(TYPE)]) ); \
156 (TMOD)->com_rules[(TYPE)] = NULL; \
157 if( 0 != (TMOD)->user_forced[(TYPE)].algorithm ) { \
158 need_dynamic_decision = 1; \
159 } \
160 if( NULL != mca_coll_tuned_component.all_base_rules ) { \
161 (TMOD)->com_rules[(TYPE)] \
162 = ompi_coll_tuned_get_com_rule_ptr( mca_coll_tuned_component.all_base_rules, \
163 (TYPE), size ); \
164 if( NULL != (TMOD)->com_rules[(TYPE)] ) { \
165 need_dynamic_decision = 1; \
166 } \
167 } \
168 if( 1 == need_dynamic_decision ) { \
169 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned: enable dynamic selection for "#TYPE)); \
170 EXECUTE; \
171 } \
172 }
173
174
175
176
177 static int
178 tuned_module_enable( mca_coll_base_module_t *module,
179 struct ompi_communicator_t *comm )
180 {
181 int size;
182 mca_coll_tuned_module_t *tuned_module = (mca_coll_tuned_module_t *) module;
183 mca_coll_base_comm_t *data = NULL;
184
185 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init called."));
186
187
188 if (OMPI_COMM_IS_INTER(comm)) {
189 size = ompi_comm_remote_size(comm);
190 } else {
191 size = ompi_comm_size(comm);
192 }
193
194
195
196
197
198
199
200
201
202
203
204
205
206 data = OBJ_NEW(mca_coll_base_comm_t);
207 if (NULL == data) {
208 return OMPI_ERROR;
209 }
210
211 if (ompi_coll_tuned_use_dynamic_rules) {
212 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init MCW & Dynamic"));
213
214
215
216
217
218 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLGATHER,
219 tuned_module->super.coll_allgather = ompi_coll_tuned_allgather_intra_dec_dynamic);
220 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLGATHERV,
221 tuned_module->super.coll_allgatherv = ompi_coll_tuned_allgatherv_intra_dec_dynamic);
222 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLREDUCE,
223 tuned_module->super.coll_allreduce = ompi_coll_tuned_allreduce_intra_dec_dynamic);
224 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALL,
225 tuned_module->super.coll_alltoall = ompi_coll_tuned_alltoall_intra_dec_dynamic);
226 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALLV,
227 tuned_module->super.coll_alltoallv = ompi_coll_tuned_alltoallv_intra_dec_dynamic);
228 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, ALLTOALLW,
229 tuned_module->super.coll_alltoallw = NULL);
230 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, BARRIER,
231 tuned_module->super.coll_barrier = ompi_coll_tuned_barrier_intra_dec_dynamic);
232 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, BCAST,
233 tuned_module->super.coll_bcast = ompi_coll_tuned_bcast_intra_dec_dynamic);
234 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, EXSCAN,
235 tuned_module->super.coll_exscan = ompi_coll_tuned_exscan_intra_dec_dynamic);
236 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, GATHER,
237 tuned_module->super.coll_gather = ompi_coll_tuned_gather_intra_dec_dynamic);
238 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, GATHERV,
239 tuned_module->super.coll_gatherv = NULL);
240 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCE,
241 tuned_module->super.coll_reduce = ompi_coll_tuned_reduce_intra_dec_dynamic);
242 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCESCATTER,
243 tuned_module->super.coll_reduce_scatter = ompi_coll_tuned_reduce_scatter_intra_dec_dynamic);
244 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, REDUCESCATTERBLOCK,
245 tuned_module->super.coll_reduce_scatter_block = ompi_coll_tuned_reduce_scatter_block_intra_dec_dynamic);
246 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCAN,
247 tuned_module->super.coll_scan = ompi_coll_tuned_scan_intra_dec_dynamic);
248 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCATTER,
249 tuned_module->super.coll_scatter = ompi_coll_tuned_scatter_intra_dec_dynamic);
250 COLL_TUNED_EXECUTE_IF_DYNAMIC(tuned_module, SCATTERV,
251 tuned_module->super.coll_scatterv = NULL);
252 }
253
254
255 data->cached_ntree = NULL;
256
257 data->cached_bintree = NULL;
258
259 data->cached_bmtree = NULL;
260
261 data->cached_in_order_bmtree = NULL;
262
263 data->cached_kmtree = NULL;
264
265 data->cached_chain = NULL;
266
267 data->cached_pipeline = NULL;
268
269 data->cached_in_order_bintree = NULL;
270
271
272 tuned_module->super.base_data = data;
273
274 OPAL_OUTPUT((ompi_coll_tuned_stream,"coll:tuned:module_init Tuned is in use"));
275 return OMPI_SUCCESS;
276 }
277
278 int mca_coll_tuned_ft_event(int state) {
279 if(OPAL_CRS_CHECKPOINT == state) {
280 ;
281 }
282 else if(OPAL_CRS_CONTINUE == state) {
283 ;
284 }
285 else if(OPAL_CRS_RESTART == state) {
286 ;
287 }
288 else if(OPAL_CRS_TERM == state ) {
289 ;
290 }
291 else {
292 ;
293 }
294
295 return OMPI_SUCCESS;
296 }