This source file includes following definitions.
- mca_bml_r2_ft_event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 #include "ompi_config.h"
23 #include "opal/util/output.h"
24
25 #include <stdlib.h>
26 #include <string.h>
27
28 #include "opal/runtime/opal_progress.h"
29 #include "opal/mca/btl/base/base.h"
30 #include "opal/mca/pmix/pmix.h"
31
32 #include "ompi/runtime/ompi_cr.h"
33 #include "ompi/mca/bml/base/base.h"
34 #include "ompi/mca/bml/base/bml_base_btl.h"
35 #include "ompi/mca/pml/base/base.h"
36 #include "ompi/proc/proc.h"
37
38 #include "bml_r2.h"
39 #include "bml_r2_ft.h"
40
41 int mca_bml_r2_ft_event(int state)
42 {
43 #if OPAL_ENABLE_FT_CR == 1
44 static bool first_continue_pass = false;
45 ompi_proc_t** procs = NULL;
46 size_t num_procs;
47 size_t btl_idx;
48 int ret, p;
49 int loc_state;
50 int param_type = -1;
51 const char **btl_list;
52
53 if(OPAL_CRS_CHECKPOINT == state) {
54
55 }
56 else if(OPAL_CRS_CONTINUE == state) {
57 first_continue_pass = !first_continue_pass;
58
59
60 if (opal_cr_continue_like_restart && !first_continue_pass) {
61 procs = ompi_proc_all(&num_procs);
62 if(NULL == procs) {
63 return OMPI_ERR_OUT_OF_RESOURCE;
64 }
65 }
66 }
67 else if(OPAL_CRS_RESTART_PRE == state ) {
68
69 }
70 else if(OPAL_CRS_RESTART == state ) {
71 procs = ompi_proc_all(&num_procs);
72 if(NULL == procs) {
73 return OMPI_ERR_OUT_OF_RESOURCE;
74 }
75 }
76 else if(OPAL_CRS_TERM == state ) {
77 ;
78 }
79 else {
80 ;
81 }
82
83
84
85
86
87 if( OPAL_CRS_RESTART != state ) {
88 if( OPAL_CRS_CONTINUE == state && !first_continue_pass ) {
89 ;
90 } else {
91
92
93
94 if( OPAL_CRS_RESTART_PRE == state ) {
95 loc_state = OPAL_CRS_RESTART;
96 } else {
97 loc_state = state;
98 }
99
100
101
102
103
104
105
106
107
108 for(btl_idx = 0; btl_idx < mca_bml_r2.num_btl_modules; btl_idx++) {
109
110
111
112 if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool &&
113 NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event ) {
114 opal_output_verbose(10, ompi_cr_output,
115 "bml:r2: ft_event: Notify the %s MPool.\n",
116 (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_component->mpool_version.mca_component_name);
117 if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_mpool->mpool_ft_event(loc_state) ) ) {
118 continue;
119 }
120 }
121
122
123
124
125 if( NULL != (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event) {
126 opal_output_verbose(10, ompi_cr_output,
127 "bml:r2: ft_event: Notify the %s BTL.\n",
128 (mca_bml_r2.btl_modules[btl_idx])->btl_component->btl_version.mca_component_name);
129 if(OMPI_SUCCESS != (ret = (mca_bml_r2.btl_modules[btl_idx])->btl_ft_event(loc_state) ) ) {
130 continue;
131 }
132 }
133 }
134 }
135 }
136
137 if(OPAL_CRS_CHECKPOINT == state) {
138 ;
139 }
140 else if(OPAL_CRS_CONTINUE == state) {
141
142 if (opal_cr_continue_like_restart && first_continue_pass) {
143 if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) {
144 opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n");
145 return ret;
146 }
147 if( OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_btl_base_framework)) ) {
148 opal_output(0, "bml:r2: ft_event(Restart): Failed to close BTL framework\n");
149 return ret;
150 }
151 }
152
153 else if (opal_cr_continue_like_restart && !first_continue_pass) {
154
155
156
157
158 if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
159 opal_output(0, "bml:r2: ft_event(Restart): Failed to fence complete\n");
160 return ret;
161 }
162
163
164
165
166 if( OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_btl_base_framework, 0)) ) {
167 opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n");
168 return ret;
169 }
170
171
172
173
174
175
176 if( OMPI_SUCCESS != (ret = mca_btl_base_select(OPAL_ENABLE_PROGRESS_THREADS, 1) ) ) {
177 opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n");
178 return ret;
179 }
180
181
182
183
184 mca_bml_r2.btls_added = false;
185
186 for(p = 0; p < (int)num_procs; ++p) {
187 if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
188 OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
189 procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
190 }
191
192 OBJ_RELEASE(procs[p]);
193 }
194
195 if( NULL != procs ) {
196 free(procs);
197 procs = NULL;
198 }
199 }
200 }
201 else if(OPAL_CRS_RESTART_PRE == state ) {
202 opal_output_verbose(10, ompi_cr_output,
203 "bml:r2: ft_event(Restart): Finalize BML\n");
204
205
206
207
208
209
210
211
212
213
214
215 if( OMPI_SUCCESS != (ret = mca_bml_r2_finalize()) ) {
216 opal_output(0, "bml:r2: ft_event(Restart): Failed to finalize BML framework\n");
217 return ret;
218 }
219 if( OMPI_SUCCESS != (ret = mca_base_framework_close(&opal_btl_base_framework)) ) {
220 opal_output(0, "bml:r2: ft_event(Restart): Failed to close BTL framework\n");
221 return ret;
222 }
223 }
224 else if(OPAL_CRS_RESTART == state ) {
225
226
227
228
229
230 if( OMPI_SUCCESS != (ret = opal_pmix.fence(NULL, 0))) {
231 opal_output(0, "bml:r2: ft_event(Restart): Failed to fence complete\n");
232 return ret;
233 }
234
235
236
237
238
239 param_type = mca_base_var_find("ompi", "btl", NULL, NULL);
240 btl_list = NULL;
241 mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
242 opal_output_verbose(11, ompi_cr_output,
243 "Restart (Previous BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
244
245 if( OMPI_SUCCESS != (ret = mca_base_framework_open(&opal_btl_base_framework, 0)) ) {
246 opal_output(0, "bml:r2: ft_event(Restart): Failed to open BTL framework\n");
247 return ret;
248 }
249
250
251 btl_list = NULL;
252 mca_base_var_get_value(param_type, &btl_list, NULL, NULL);
253 opal_output_verbose(11, ompi_cr_output,
254 "Restart (New BTL MCA): <%s>\n", btl_list ? btl_list[0] : "");
255 if( NULL != btl_list ) {
256 free(btl_list);
257 btl_list = NULL;
258 }
259
260
261
262
263
264
265 if( OMPI_SUCCESS != (ret = mca_btl_base_select(OPAL_ENABLE_PROGRESS_THREADS, 1) ) ) {
266 opal_output(0, "bml:r2: ft_event(Restart): Failed to select in BTL framework\n");
267 return ret;
268 }
269
270
271
272
273 mca_bml_r2.btls_added = false;
274
275 for(p = 0; p < (int)num_procs; ++p) {
276 if( NULL != procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]) {
277 OBJ_RELEASE(procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML]);
278 procs[p]->proc_endpoints[OMPI_PROC_ENDPOINT_TAG_BML] = NULL;
279 }
280
281 OBJ_RELEASE(procs[p]);
282 }
283
284 if( NULL != procs ) {
285 free(procs);
286 procs = NULL;
287 }
288 }
289 else if(OPAL_CRS_TERM == state ) {
290 ;
291 }
292 else {
293 ;
294 }
295 #endif
296
297 return OMPI_SUCCESS;
298 }