This source file includes following definitions.
- mca_coll_monitoring_component_open
- mca_coll_monitoring_component_close
- mca_coll_monitoring_component_init
- mca_coll_monitoring_component_register
- mca_coll_monitoring_module_enable
- mca_coll_monitoring_module_disable
- mca_coll_monitoring_ft_event
- mca_coll_monitoring_component_query
1
2
3
4
5
6
7
8
9
10
11
12 #include <ompi_config.h>
13 #include "coll_monitoring.h"
14 #include <ompi/constants.h>
15 #include <ompi/communicator/communicator.h>
16 #include <ompi/mca/coll/coll.h>
17 #include <opal/mca/base/mca_base_component_repository.h>
18
19 #define MONITORING_SAVE_PREV_COLL_API(__module, __comm, __api) \
20 do { \
21 if( NULL != __comm->c_coll->coll_ ## __api ## _module ) { \
22 __module->real.coll_ ## __api = __comm->c_coll->coll_ ## __api; \
23 __module->real.coll_ ## __api ## _module = __comm->c_coll->coll_ ## __api ## _module; \
24 OBJ_RETAIN(__module->real.coll_ ## __api ## _module); \
25 } else { \
26 \
27 __module->super.coll_ ## __api = NULL; \
28 OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \
29 "coll_" # __api, __comm->c_name); \
30 } \
31 if( NULL != __comm->c_coll->coll_i ## __api ## _module ) { \
32 __module->real.coll_i ## __api = __comm->c_coll->coll_i ## __api; \
33 __module->real.coll_i ## __api ## _module = __comm->c_coll->coll_i ## __api ## _module; \
34 OBJ_RETAIN(__module->real.coll_i ## __api ## _module); \
35 } else { \
36 \
37 __module->super.coll_i ## __api = NULL; \
38 OPAL_MONITORING_PRINT_WARN("COMM \"%s\": No monitoring available for " \
39 "coll_i" # __api, __comm->c_name); \
40 } \
41 } while(0)
42
43 #define MONITORING_RELEASE_PREV_COLL_API(__module, __comm, __api) \
44 do { \
45 if( NULL != __module->real.coll_ ## __api ## _module ) { \
46 if( NULL != __module->real.coll_ ## __api ## _module->coll_module_disable ) { \
47 __module->real.coll_ ## __api ## _module->coll_module_disable(__module->real.coll_ ## __api ## _module, __comm); \
48 } \
49 OBJ_RELEASE(__module->real.coll_ ## __api ## _module); \
50 __module->real.coll_ ## __api = NULL; \
51 __module->real.coll_ ## __api ## _module = NULL; \
52 } \
53 if( NULL != __module->real.coll_i ## __api ## _module ) { \
54 if( NULL != __module->real.coll_i ## __api ## _module->coll_module_disable ) { \
55 __module->real.coll_i ## __api ## _module->coll_module_disable(__module->real.coll_i ## __api ## _module, __comm); \
56 } \
57 OBJ_RELEASE(__module->real.coll_i ## __api ## _module); \
58 __module->real.coll_i ## __api = NULL; \
59 __module->real.coll_i ## __api ## _module = NULL; \
60 } \
61 } while(0)
62
63 #define MONITORING_SET_FULL_PREV_COLL_API(m, c, operation) \
64 do { \
65 operation(m, c, allgather); \
66 operation(m, c, allgatherv); \
67 operation(m, c, allreduce); \
68 operation(m, c, alltoall); \
69 operation(m, c, alltoallv); \
70 operation(m, c, alltoallw); \
71 operation(m, c, barrier); \
72 operation(m, c, bcast); \
73 operation(m, c, exscan); \
74 operation(m, c, gather); \
75 operation(m, c, gatherv); \
76 operation(m, c, reduce); \
77 operation(m, c, reduce_scatter); \
78 operation(m, c, reduce_scatter_block); \
79 operation(m, c, scan); \
80 operation(m, c, scatter); \
81 operation(m, c, scatterv); \
82 operation(m, c, neighbor_allgather); \
83 operation(m, c, neighbor_allgatherv); \
84 operation(m, c, neighbor_alltoall); \
85 operation(m, c, neighbor_alltoallv); \
86 operation(m, c, neighbor_alltoallw); \
87 } while(0)
88
89 #define MONITORING_SAVE_FULL_PREV_COLL_API(m, c) \
90 MONITORING_SET_FULL_PREV_COLL_API((m), (c), MONITORING_SAVE_PREV_COLL_API)
91
92 #define MONITORING_RELEASE_FULL_PREV_COLL_API(m, c) \
93 MONITORING_SET_FULL_PREV_COLL_API((m), (c), MONITORING_RELEASE_PREV_COLL_API)
94
95 static int mca_coll_monitoring_component_open(void)
96 {
97 return OMPI_SUCCESS;
98 }
99
100 static int mca_coll_monitoring_component_close(void)
101 {
102 OPAL_MONITORING_PRINT_INFO("coll_module_close");
103 mca_common_monitoring_finalize();
104 return OMPI_SUCCESS;
105 }
106
107 static int mca_coll_monitoring_component_init(bool enable_progress_threads,
108 bool enable_mpi_threads)
109 {
110 OPAL_MONITORING_PRINT_INFO("coll_module_init");
111 return mca_common_monitoring_init();
112 }
113
114 static int mca_coll_monitoring_component_register(void)
115 {
116 return OMPI_SUCCESS;
117 }
118
119 static int
120 mca_coll_monitoring_module_enable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
121 {
122 mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
123 if( 1 == opal_atomic_add_fetch_32(&monitoring_module->is_initialized, 1) ) {
124 MONITORING_SAVE_FULL_PREV_COLL_API(monitoring_module, comm);
125 monitoring_module->data = mca_common_monitoring_coll_new(comm);
126 OPAL_MONITORING_PRINT_INFO("coll_module_enabled");
127 }
128 return OMPI_SUCCESS;
129 }
130
131 static int
132 mca_coll_monitoring_module_disable(mca_coll_base_module_t*module, struct ompi_communicator_t*comm)
133 {
134 mca_coll_monitoring_module_t*monitoring_module = (mca_coll_monitoring_module_t*) module;
135 if( 0 == opal_atomic_sub_fetch_32(&monitoring_module->is_initialized, 1) ) {
136 MONITORING_RELEASE_FULL_PREV_COLL_API(monitoring_module, comm);
137 mca_common_monitoring_coll_release(monitoring_module->data);
138 monitoring_module->data = NULL;
139 OPAL_MONITORING_PRINT_INFO("coll_module_disabled");
140 }
141 return OMPI_SUCCESS;
142 }
143
144 static int mca_coll_monitoring_ft_event(int state)
145 {
146 switch(state) {
147 case OPAL_CRS_CHECKPOINT:
148 case OPAL_CRS_CONTINUE:
149 case OPAL_CRS_RESTART:
150 case OPAL_CRS_TERM:
151 default:
152 ;
153 }
154 return OMPI_SUCCESS;
155 }
156
157 static mca_coll_base_module_t*
158 mca_coll_monitoring_component_query(struct ompi_communicator_t*comm, int*priority)
159 {
160 OPAL_MONITORING_PRINT_INFO("coll_module_query");
161 mca_coll_monitoring_module_t*monitoring_module = OBJ_NEW(mca_coll_monitoring_module_t);
162 if( NULL == monitoring_module ) return (*priority = -1, NULL);
163
164
165 monitoring_module->super.coll_module_enable = mca_coll_monitoring_module_enable;
166 monitoring_module->super.coll_module_disable = mca_coll_monitoring_module_disable;
167 monitoring_module->super.ft_event = mca_coll_monitoring_ft_event;
168
169
170
171 monitoring_module->super.coll_allgather = mca_coll_monitoring_allgather;
172 monitoring_module->super.coll_allgatherv = mca_coll_monitoring_allgatherv;
173 monitoring_module->super.coll_allreduce = mca_coll_monitoring_allreduce;
174 monitoring_module->super.coll_alltoall = mca_coll_monitoring_alltoall;
175 monitoring_module->super.coll_alltoallv = mca_coll_monitoring_alltoallv;
176 monitoring_module->super.coll_alltoallw = mca_coll_monitoring_alltoallw;
177 monitoring_module->super.coll_barrier = mca_coll_monitoring_barrier;
178 monitoring_module->super.coll_bcast = mca_coll_monitoring_bcast;
179 monitoring_module->super.coll_exscan = mca_coll_monitoring_exscan;
180 monitoring_module->super.coll_gather = mca_coll_monitoring_gather;
181 monitoring_module->super.coll_gatherv = mca_coll_monitoring_gatherv;
182 monitoring_module->super.coll_reduce = mca_coll_monitoring_reduce;
183 monitoring_module->super.coll_reduce_scatter = mca_coll_monitoring_reduce_scatter;
184 monitoring_module->super.coll_reduce_scatter_block = mca_coll_monitoring_reduce_scatter_block;
185 monitoring_module->super.coll_scan = mca_coll_monitoring_scan;
186 monitoring_module->super.coll_scatter = mca_coll_monitoring_scatter;
187 monitoring_module->super.coll_scatterv = mca_coll_monitoring_scatterv;
188
189
190 monitoring_module->super.coll_iallgather = mca_coll_monitoring_iallgather;
191 monitoring_module->super.coll_iallgatherv = mca_coll_monitoring_iallgatherv;
192 monitoring_module->super.coll_iallreduce = mca_coll_monitoring_iallreduce;
193 monitoring_module->super.coll_ialltoall = mca_coll_monitoring_ialltoall;
194 monitoring_module->super.coll_ialltoallv = mca_coll_monitoring_ialltoallv;
195 monitoring_module->super.coll_ialltoallw = mca_coll_monitoring_ialltoallw;
196 monitoring_module->super.coll_ibarrier = mca_coll_monitoring_ibarrier;
197 monitoring_module->super.coll_ibcast = mca_coll_monitoring_ibcast;
198 monitoring_module->super.coll_iexscan = mca_coll_monitoring_iexscan;
199 monitoring_module->super.coll_igather = mca_coll_monitoring_igather;
200 monitoring_module->super.coll_igatherv = mca_coll_monitoring_igatherv;
201 monitoring_module->super.coll_ireduce = mca_coll_monitoring_ireduce;
202 monitoring_module->super.coll_ireduce_scatter = mca_coll_monitoring_ireduce_scatter;
203 monitoring_module->super.coll_ireduce_scatter_block = mca_coll_monitoring_ireduce_scatter_block;
204 monitoring_module->super.coll_iscan = mca_coll_monitoring_iscan;
205 monitoring_module->super.coll_iscatter = mca_coll_monitoring_iscatter;
206 monitoring_module->super.coll_iscatterv = mca_coll_monitoring_iscatterv;
207
208
209 monitoring_module->super.coll_neighbor_allgather = mca_coll_monitoring_neighbor_allgather;
210 monitoring_module->super.coll_neighbor_allgatherv = mca_coll_monitoring_neighbor_allgatherv;
211 monitoring_module->super.coll_neighbor_alltoall = mca_coll_monitoring_neighbor_alltoall;
212 monitoring_module->super.coll_neighbor_alltoallv = mca_coll_monitoring_neighbor_alltoallv;
213 monitoring_module->super.coll_neighbor_alltoallw = mca_coll_monitoring_neighbor_alltoallw;
214 monitoring_module->super.coll_ineighbor_allgather = mca_coll_monitoring_ineighbor_allgather;
215 monitoring_module->super.coll_ineighbor_allgatherv = mca_coll_monitoring_ineighbor_allgatherv;
216 monitoring_module->super.coll_ineighbor_alltoall = mca_coll_monitoring_ineighbor_alltoall;
217 monitoring_module->super.coll_ineighbor_alltoallv = mca_coll_monitoring_ineighbor_alltoallv;
218 monitoring_module->super.coll_ineighbor_alltoallw = mca_coll_monitoring_ineighbor_alltoallw;
219
220
221 monitoring_module->is_initialized = 0;
222
223 *priority = mca_coll_monitoring_component.priority;
224
225 return &(monitoring_module->super);
226 }
227
228 mca_coll_monitoring_component_t mca_coll_monitoring_component = {
229 .super = {
230
231
232 .collm_version = {
233 MCA_COLL_BASE_VERSION_2_0_0,
234
235 .mca_component_name = "monitoring",
236 MCA_MONITORING_MAKE_VERSION,
237 .mca_open_component = mca_coll_monitoring_component_open,
238 .mca_close_component = mca_coll_monitoring_component_close,
239 .mca_register_component_params = mca_coll_monitoring_component_register
240 },
241 .collm_data = {
242
243 MCA_BASE_METADATA_PARAM_CHECKPOINT
244 },
245
246 .collm_init_query = mca_coll_monitoring_component_init,
247 .collm_comm_query = mca_coll_monitoring_component_query
248 },
249 .priority = INT_MAX
250 };
251
252 OBJ_CLASS_INSTANCE(mca_coll_monitoring_module_t,
253 mca_coll_base_module_t,
254 NULL,
255 NULL);
256