1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2010 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2005 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2015 Los Alamos National Security, LLC. All rights
14 * reserved.
15 * $COPYRIGHT$
16 *
17 * Additional copyrights may follow
18 *
19 * $HEADER$
20 */
21 /**
22 * @file
23 *
24 * Snapshot Coordination (SNAPC) Interface
25 *
26 * Terminology:
27 * ------------
28 * Global Snapshot Coordinator:
29 * - HNP(s) coordination function.
30 * Local Snapshot Coordinator
31 * - VHNP(s) [e.g., orted] coordination function
32 * Application Snapshot Coordinator
33 * - Application level coordinaton function
34 * Local Snapshot
35 * - Snapshot generated by a single process in the parallel job
36 * Local Snapshot Reference
37 * - A generic reference to the physical Local Snapshot
38 * Global Snapshot
39 * - Snapshot generated for the entire parallel job
40 * Global Snapshot Reference
41 * - A generic reference to the physical Global Snapshot
42 *
43 * General Description:
44 * ---------------------
45 * This framework is tasked with:
46 * - Initiating the checkpoint in the system
47 * - Physically moving the local snapshot files to a location
48 * Initially this location, is the node on which the Head Node Process (HNP)
49 * is running, but later this will be a replicated checkpoint server or
50 * the like.
51 * - Generating a 'global snapshot handle' that the user can use to restart
52 * the parallel job.
53 *
54 * Each component will have 3 teirs of behavior that must behave in concert:
55 * - Global Snapshot Coordinator
56 * This is the HNPs tasks. Mostly distributing the notification of the
57 * checkpoint, and then compiling the physical and virtual nature of the
58 * global snapshot handle.
59 * - Local Snapshot Coordinator
60 * This is the VHNPs (or orted, if available) tasks. This will involve
61 * working with the Global Snapshot Coordinator to route the physical
62 * and virtual 'local snapshot's from the application to the desired
63 * location. This process must also notify the Global Snapshot Coordinator
64 * when it's set of processes have completed the checkpoint.
65 * - Application Snapshot Coordinator
66 * This is the application level coordinator. This is very light, just
67 * a subscription to be triggered when it needs to checkpoint, and then,
68 * once finished with the checkpoint, notify the Local Snapshot Coordinator
69 * that it is complete.
70 * If there is no orted (so no bootproxy), then the application assumes the
71 * responsibility of the Local Snapshot Coordinator as well.
72 *
73 */
74
75 #ifndef MCA_SNAPC_H
76 #define MCA_SNAPC_H
77
78 #include "orte_config.h"
79 #include "orte/constants.h"
80 #include "orte/types.h"
81
82 #include "orte/mca/mca.h"
83 #include "opal/mca/base/base.h"
84 #include "opal/mca/crs/crs.h"
85 #include "opal/mca/crs/base/base.h"
86
87 #include "opal/class/opal_object.h"
88 #include "opal/class/opal_pointer_array.h"
89 #include "opal/util/output.h"
90
91 #include "orte/mca/sstore/sstore.h"
92
93 BEGIN_C_DECLS
94
95 /**
96 * States that a process can be in while checkpointing
97 */
98 /* Reached an error */
99 #define ORTE_SNAPC_CKPT_STATE_ERROR 0
100
101 /* Doing no checkpoint -- Quiet state */
102 #define ORTE_SNAPC_CKPT_STATE_NONE 1
103 /* There has been a request for a checkpoint from one of the applications */
104 #define ORTE_SNAPC_CKPT_STATE_REQUEST 2
105 /* There is a Pending checkpoint for this process */
106 #define ORTE_SNAPC_CKPT_STATE_PENDING 3
107 /* Running the checkpoint */
108 #define ORTE_SNAPC_CKPT_STATE_RUNNING 4
109 /* INC Prep Finished */
110 #define ORTE_SNAPC_CKPT_STATE_INC_PREPED 5
111 /* All Processes have been stopped */
112 #define ORTE_SNAPC_CKPT_STATE_STOPPED 6
113 /* Finished the checkpoint locally */
114 #define ORTE_SNAPC_CKPT_STATE_FINISHED_LOCAL 7
115 /* Migrating */
116 #define ORTE_SNAPC_CKPT_STATE_MIGRATING 8
117 /* Finished establishing the checkpoint */
118 #define ORTE_SNAPC_CKPT_STATE_ESTABLISHED 9
119 /* Processes continuing or have been recovered (finished post-INC) */
120 #define ORTE_SNAPC_CKPT_STATE_RECOVERED 10
121 /* Unable to checkpoint this job */
122 #define ORTE_SNAPC_CKPT_STATE_NO_CKPT 11
123 /* Unable to restart this job */
124 #define ORTE_SNAPC_CKPT_STATE_NO_RESTART 12
125 #define ORTE_SNAPC_CKPT_MAX 13
126
127 /**
128 * Sufficiently high shift value to avoid colliding the process
129 * checkpointing states above with the ORTE process states
130 */
131 #define ORTE_SNAPC_CKPT_SHIFT 131072
132
133 /* Uniquely encode the SNAPC state */
134 #define ORTE_SNAPC_CKPT_NOTIFY(state) (ORTE_SNAPC_CKPT_SHIFT + state)
135
136 /* Decode the SNAPC state */
137 #define ORTE_SNAPC_CKPT_STATE(state) (state - ORTE_SNAPC_CKPT_SHIFT)
138
139 /* Check whether a state is a SNAPC state or not. */
140 #define CHECK_ORTE_SNAPC_CKPT_STATE(state) (state >= ORTE_SNAPC_CKPT_SHIFT)
141
142 /**
143 * Definition of a orte local snapshot.
144 * Similar to the opal_crs_base_snapshot_t except that it
145 * contains process contact information.
146 */
147 struct orte_snapc_base_local_snapshot_1_0_0_t {
148 /** List super object */
149 opal_list_item_t super;
150
151 /** ORTE Process name */
152 orte_process_name_t process_name;
153
154 /** State of the checkpoint */
155 int state;
156
157 /** Stable Storage Handle (must equal the global version) */
158 orte_sstore_base_handle_t ss_handle;
159 };
160 typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_1_0_0_t;
161 typedef struct orte_snapc_base_local_snapshot_1_0_0_t orte_snapc_base_local_snapshot_t;
162
163 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_local_snapshot_t);
164
165 /**
166 * Definition of the global snapshot.
167 * Each component is assumed to have extened this definition
168 * in the same way they extern the orte_snapc_base_compoinent_t below.
169 */
170 struct orte_snapc_base_global_snapshot_1_0_0_t {
171 /** This is an object, so must have super */
172 opal_list_item_t super;
173
174 /** A list of orte_snapc_base_snapshot_t's */
175 opal_list_t local_snapshots;
176
177 /** Checkpoint Options */
178 opal_crs_base_ckpt_options_t *options;
179
180 /** Stable Storage Handle */
181 orte_sstore_base_handle_t ss_handle;
182 };
183 typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_1_0_0_t;
184 typedef struct orte_snapc_base_global_snapshot_1_0_0_t orte_snapc_base_global_snapshot_t;
185
186 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_global_snapshot_t);
187
188 struct orte_snapc_base_quiesce_1_0_0_t {
189 /** Parent is an object type */
190 opal_object_t super;
191
192 /** Current epoch */
193 int epoch;
194 /** Requested CRS */
195 char * crs_name;
196 /** Handle for reference */
197 char * handle;
198 /** snapshot list */
199 orte_snapc_base_global_snapshot_t *snapshot;
200
201 /** Stable Storage Handle */
202 orte_sstore_base_handle_t ss_handle;
203 /** Stable Storage Snapshot list */
204 orte_sstore_base_global_snapshot_info_t *ss_snapshot;
205
206 /** Target Directory */
207 char * target_dir;
208 /** Command Line */
209 char * cmdline;
210 /** State of operation if checkpointing */
211 opal_crs_state_type_t cr_state;
212 /** Checkpointing? */
213 bool checkpointing;
214 /** Restarting? */
215 bool restarting;
216
217 /** Migrating? */
218 bool migrating;
219 /** List of migrating processes */
220 int num_migrating;
221 opal_pointer_array_t migrating_procs;
222 };
223 typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_1_0_0_t;
224 typedef struct orte_snapc_base_quiesce_1_0_0_t orte_snapc_base_quiesce_t;
225
226 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_quiesce_t);
227
228 /**
229 * Application request for a global checkpoint related operation
230 */
231 typedef enum {
232 ORTE_SNAPC_OP_NONE = 0,
233 ORTE_SNAPC_OP_INIT,
234 ORTE_SNAPC_OP_FIN,
235 ORTE_SNAPC_OP_FIN_ACK,
236 ORTE_SNAPC_OP_CHECKPOINT,
237 ORTE_SNAPC_OP_RESTART,
238 ORTE_SNAPC_OP_MIGRATE,
239 ORTE_SNAPC_OP_QUIESCE_START,
240 ORTE_SNAPC_OP_QUIESCE_CHECKPOINT,
241 ORTE_SNAPC_OP_QUIESCE_END
242 } orte_snapc_base_request_op_event_t;
243
244 struct orte_snapc_base_request_op_1_0_0_t {
245 /** Parent is an object type */
246 opal_object_t super;
247
248 /** Event to request */
249 orte_snapc_base_request_op_event_t event;
250
251 /** Is this request still active */
252 bool is_active;
253
254 /** Leader of the operation */
255 int leader;
256
257 /** Sequence Number */
258 int seq_num;
259
260 /** Global Handle */
261 char * global_handle;
262
263 /** Stable Storage Handle */
264 orte_sstore_base_handle_t ss_handle;
265
266 /** Migrating vpid list of participants */
267 int mig_num;
268 int *mig_vpids;
269
270 /** Migrating hostname preference list */
271 char (*mig_host_pref)[OPAL_MAX_PROCESSOR_NAME];
272
273 /** Migrating vpid preference list */
274 int *mig_vpid_pref;
275
276 /** Info key */
277 int *mig_off_node;
278 };
279 typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_1_0_0_t;
280 typedef struct orte_snapc_base_request_op_1_0_0_t orte_snapc_base_request_op_t;
281
282 ORTE_DECLSPEC OBJ_CLASS_DECLARATION(orte_snapc_base_request_op_t);
283
284 /**
285 * Module initialization function.
286 * Returns ORTE_SUCCESS
287 */
288 typedef int (*orte_snapc_base_module_init_fn_t)
289 (bool seed, bool app);
290
291 /**
292 * Module finalization function.
293 * Returns ORTE_SUCCESS
294 */
295 typedef int (*orte_snapc_base_module_finalize_fn_t)
296 (void);
297
298 /**
299 * Setup the necessary structures for this job
300 * Returns ORTE_SUCCESS
301 */
302 typedef int (*orte_snapc_base_setup_job_fn_t)
303 (orte_jobid_t jobid);
304
305 /**
306 * Setup the necessary structures for this job
307 * Returns ORTE_SUCCESS
308 */
309 typedef int (*orte_snapc_base_release_job_fn_t)
310 (orte_jobid_t jobid);
311
312
313 /**
314 * Handle fault tolerance updates
315 *
316 * @param[in] state Fault tolerance state update
317 *
318 * @retval ORTE_SUCCESS The operation completed successfully
319 * @retval ORTE_ERROR An unspecifed error occurred
320 */
321 typedef int (*orte_snapc_base_ft_event_fn_t)(int state);
322
323 /**
324 * Start a checkpoint originating from an internal source.
325 *
326 * This really only makes sense to call from an application, but in the future
327 * we may allow the checkpoint operation to use this function from the local
328 * coordinator.
329 *
330 * @param[out] epoch Epoch number to associate with this checkpoint operation
331 * Returns ORTE_SUCCESS
332 */
333 typedef int (*orte_snapc_base_start_checkpoint_fn_t)
334 (orte_snapc_base_quiesce_t *datum);
335
336 /**
337 * Signal end of checkpoint epoch originating from an internal source.
338 *
339 * @param[in] epoch Epoch number to associate with this checkpoint operation
340 * Returns ORTE_SUCCESS
341 */
342 typedef int (*orte_snapc_base_end_checkpoint_fn_t)
343 (orte_snapc_base_quiesce_t *datum);
344
345 /**
346 * Request a checkpoint related operation to take place
347 */
348 typedef int (*orte_snapc_base_request_op_fn_t)
349 (orte_snapc_base_request_op_t *datum);
350
351 /**
352 * Structure for SNAPC components.
353 */
354 struct orte_snapc_base_component_2_0_0_t {
355 /** MCA base component */
356 mca_base_component_t base_version;
357 /** MCA base data */
358 mca_base_component_data_t base_data;
359
360 /** Verbosity Level */
361 int verbose;
362 /** Output Handle for opal_output */
363 int output_handle;
364 /** Default Priority */
365 int priority;
366 };
367 typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_2_0_0_t;
368 typedef struct orte_snapc_base_component_2_0_0_t orte_snapc_base_component_t;
369
370 /**
371 * Structure for SNAPC modules
372 */
373 struct orte_snapc_base_module_1_0_0_t {
374 /** Initialization Function */
375 orte_snapc_base_module_init_fn_t snapc_init;
376 /** Finalization Function */
377 orte_snapc_base_module_finalize_fn_t snapc_finalize;
378 /** Setup structures for a job */
379 orte_snapc_base_setup_job_fn_t setup_job;
380 /** Release job */
381 orte_snapc_base_release_job_fn_t release_job;
382 /** Handle any FT Notifications */
383 orte_snapc_base_ft_event_fn_t ft_event;
384 /** Handle internal request for checkpoint */
385 orte_snapc_base_start_checkpoint_fn_t start_ckpt;
386 orte_snapc_base_end_checkpoint_fn_t end_ckpt;
387 /** Handle a checkpoint related request */
388 orte_snapc_base_request_op_fn_t request_op;
389 };
390 typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_1_0_0_t;
391 typedef struct orte_snapc_base_module_1_0_0_t orte_snapc_base_module_t;
392
393 ORTE_DECLSPEC extern orte_snapc_base_module_t orte_snapc;
394 ORTE_DECLSPEC extern orte_snapc_base_component_t orte_snapc_base_selected_component;
395
396 /**
397 * Macro for use in components that are of type SNAPC
398 */
399 #define ORTE_SNAPC_BASE_VERSION_2_0_0 \
400 ORTE_MCA_BASE_VERSION_2_1_0("snapc", 2, 0, 0)
401
402 END_C_DECLS
403
404 #endif /* ORTE_SNAPC_H */
405