1
2
3
4
5
6
7
8
9
10
11
12
13
14
15 #ifndef MCA_COLL_TUNED_EXPORT_H
16 #define MCA_COLL_TUNED_EXPORT_H
17
18 #include "ompi_config.h"
19
20 #include "mpi.h"
21 #include "ompi/mca/mca.h"
22 #include "ompi/request/request.h"
23 #include "ompi/mca/coll/base/coll_base_functions.h"
24 #include "opal/util/output.h"
25
26
27 #include "coll_tuned_dynamic_rules.h"
28
29 BEGIN_C_DECLS
30
31
32 extern int ompi_coll_tuned_stream;
33 extern int ompi_coll_tuned_priority;
34 extern bool ompi_coll_tuned_use_dynamic_rules;
35 extern char* ompi_coll_tuned_dynamic_rules_filename;
36 extern int ompi_coll_tuned_init_tree_fanout;
37 extern int ompi_coll_tuned_init_chain_fanout;
38 extern int ompi_coll_tuned_init_max_requests;
39 extern int ompi_coll_tuned_alltoall_small_msg;
40 extern int ompi_coll_tuned_alltoall_intermediate_msg;
41
42
43
44
45 struct coll_tuned_force_algorithm_mca_param_indices_t {
46 int algorithm_param_index;
47 int segsize_param_index;
48 int tree_fanout_param_index;
49 int chain_fanout_param_index;
50 int max_requests_param_index;
51 };
52 typedef struct coll_tuned_force_algorithm_mca_param_indices_t coll_tuned_force_algorithm_mca_param_indices_t;
53
54
55
56
57
58 struct coll_tuned_force_algorithm_params_t {
59 int algorithm;
60 int segsize;
61 int tree_fanout;
62 int chain_fanout;
63 int max_requests;
64 };
65 typedef struct coll_tuned_force_algorithm_params_t coll_tuned_force_algorithm_params_t;
66
67
68 extern coll_tuned_force_algorithm_mca_param_indices_t ompi_coll_tuned_forced_params[COLLCOUNT];
69
70 extern int ompi_coll_tuned_forced_max_algorithms[COLLCOUNT];
71
72
73
74
75
76
77
78 int ompi_coll_tuned_init_query(bool enable_progress_threads,
79 bool enable_mpi_threads);
80
81 mca_coll_base_module_t *
82 ompi_coll_tuned_comm_query(struct ompi_communicator_t *comm, int *priority);
83
84
85
86
87
88
89
90
91
92
93
94 int ompi_coll_tuned_allgather_intra_dec_fixed(ALLGATHER_ARGS);
95 int ompi_coll_tuned_allgather_intra_dec_dynamic(ALLGATHER_ARGS);
96 int ompi_coll_tuned_allgather_intra_do_this(ALLGATHER_ARGS, int algorithm, int faninout, int segsize);
97 int ompi_coll_tuned_allgather_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
98
99
100 int ompi_coll_tuned_allgatherv_intra_dec_fixed(ALLGATHERV_ARGS);
101 int ompi_coll_tuned_allgatherv_intra_dec_dynamic(ALLGATHERV_ARGS);
102 int ompi_coll_tuned_allgatherv_intra_do_this(ALLGATHERV_ARGS, int algorithm, int faninout, int segsize);
103 int ompi_coll_tuned_allgatherv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
104
105
106 int ompi_coll_tuned_allreduce_intra_dec_fixed(ALLREDUCE_ARGS);
107 int ompi_coll_tuned_allreduce_intra_dec_dynamic(ALLREDUCE_ARGS);
108 int ompi_coll_tuned_allreduce_intra_do_this(ALLREDUCE_ARGS, int algorithm, int faninout, int segsize);
109 int ompi_coll_tuned_allreduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
110
111
112 int ompi_coll_tuned_alltoall_intra_dec_fixed(ALLTOALL_ARGS);
113 int ompi_coll_tuned_alltoall_intra_dec_dynamic(ALLTOALL_ARGS);
114 int ompi_coll_tuned_alltoall_intra_do_this(ALLTOALL_ARGS, int algorithm, int faninout, int segsize, int max_requests);
115 int ompi_coll_tuned_alltoall_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
116
117
118 int ompi_coll_tuned_alltoallv_intra_dec_fixed(ALLTOALLV_ARGS);
119 int ompi_coll_tuned_alltoallv_intra_dec_dynamic(ALLTOALLV_ARGS);
120 int ompi_coll_tuned_alltoallv_intra_do_this(ALLTOALLV_ARGS, int algorithm);
121 int ompi_coll_tuned_alltoallv_intra_check_forced_init(coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
122
123
124 int ompi_coll_tuned_barrier_intra_dec_fixed(BARRIER_ARGS);
125 int ompi_coll_tuned_barrier_intra_dec_dynamic(BARRIER_ARGS);
126 int ompi_coll_tuned_barrier_intra_do_this(BARRIER_ARGS, int algorithm, int faninout, int segsize);
127 int ompi_coll_tuned_barrier_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
128
129
130 int ompi_coll_tuned_bcast_intra_dec_fixed(BCAST_ARGS);
131 int ompi_coll_tuned_bcast_intra_dec_dynamic(BCAST_ARGS);
132 int ompi_coll_tuned_bcast_intra_do_this(BCAST_ARGS, int algorithm, int faninout, int segsize);
133 int ompi_coll_tuned_bcast_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
134
135
136 int ompi_coll_tuned_gather_intra_dec_fixed(GATHER_ARGS);
137 int ompi_coll_tuned_gather_intra_dec_dynamic(GATHER_ARGS);
138 int ompi_coll_tuned_gather_intra_do_this(GATHER_ARGS, int algorithm, int faninout, int segsize);
139 int ompi_coll_tuned_gather_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
140
141
142 int ompi_coll_tuned_reduce_intra_dec_fixed(REDUCE_ARGS);
143 int ompi_coll_tuned_reduce_intra_dec_dynamic(REDUCE_ARGS);
144 int ompi_coll_tuned_reduce_intra_do_this(REDUCE_ARGS, int algorithm, int faninout, int segsize, int max_oustanding_reqs);
145 int ompi_coll_tuned_reduce_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
146
147
148 int ompi_coll_tuned_reduce_scatter_intra_dec_fixed(REDUCESCATTER_ARGS);
149 int ompi_coll_tuned_reduce_scatter_intra_dec_dynamic(REDUCESCATTER_ARGS);
150 int ompi_coll_tuned_reduce_scatter_intra_do_this(REDUCESCATTER_ARGS, int algorithm, int faninout, int segsize);
151 int ompi_coll_tuned_reduce_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
152
153
154 int ompi_coll_tuned_reduce_scatter_block_intra_dec_fixed(REDUCESCATTERBLOCK_ARGS);
155 int ompi_coll_tuned_reduce_scatter_block_intra_dec_dynamic(REDUCESCATTERBLOCK_ARGS);
156 int ompi_coll_tuned_reduce_scatter_block_intra_do_this(REDUCESCATTERBLOCK_ARGS, int algorithm, int faninout, int segsize);
157 int ompi_coll_tuned_reduce_scatter_block_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
158
159
160 int ompi_coll_tuned_scatter_intra_dec_fixed(SCATTER_ARGS);
161 int ompi_coll_tuned_scatter_intra_dec_dynamic(SCATTER_ARGS);
162 int ompi_coll_tuned_scatter_intra_do_this(SCATTER_ARGS, int algorithm, int faninout, int segsize);
163 int ompi_coll_tuned_scatter_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
164
165
166 int ompi_coll_tuned_exscan_intra_dec_fixed(EXSCAN_ARGS);
167 int ompi_coll_tuned_exscan_intra_dec_dynamic(EXSCAN_ARGS);
168 int ompi_coll_tuned_exscan_intra_do_this(EXSCAN_ARGS, int algorithm);
169 int ompi_coll_tuned_exscan_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
170
171
172 int ompi_coll_tuned_scan_intra_dec_fixed(SCAN_ARGS);
173 int ompi_coll_tuned_scan_intra_dec_dynamic(SCAN_ARGS);
174 int ompi_coll_tuned_scan_intra_do_this(SCAN_ARGS, int algorithm);
175 int ompi_coll_tuned_scan_intra_check_forced_init (coll_tuned_force_algorithm_mca_param_indices_t *mca_param_indices);
176
177 int mca_coll_tuned_ft_event(int state);
178
179 struct mca_coll_tuned_component_t {
180
181 mca_coll_base_component_2_0_0_t super;
182
183
184 int tuned_priority;
185
186
187
188
189
190
191 ompi_coll_alg_rule_t *all_base_rules;
192 };
193
194
195
196 typedef struct mca_coll_tuned_component_t mca_coll_tuned_component_t;
197
198
199
200
201 OMPI_MODULE_DECLSPEC extern mca_coll_tuned_component_t mca_coll_tuned_component;
202
203 struct mca_coll_tuned_module_t {
204 mca_coll_base_module_t super;
205
206
207
208 coll_tuned_force_algorithm_params_t user_forced[COLLCOUNT];
209
210
211 ompi_coll_com_rule_t *com_rules[COLLCOUNT];
212 };
213 typedef struct mca_coll_tuned_module_t mca_coll_tuned_module_t;
214 OBJ_CLASS_DECLARATION(mca_coll_tuned_module_t);
215
216 #endif