1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */
2 /*
3 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana
4 * University Research and Technology
5 * Corporation. All rights reserved.
6 * Copyright (c) 2004-2005 The University of Tennessee and The University
7 * of Tennessee Research Foundation. All rights
8 * reserved.
9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart,
10 * University of Stuttgart. All rights reserved.
11 * Copyright (c) 2004-2005 The Regents of the University of California.
12 * All rights reserved.
13 * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved.
14 * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights
15 * reserved.
16 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved.
17 * $COPYRIGHT$
18 *
19 * Additional copyrights may follow
20 *
21 * $HEADER$
22 */
23 /**
24 * @file
25 *
26 * I/O Forwarding Service
27 * The I/O forwarding service (IOF) is used to connect stdin, stdout, and
28 * stderr file descriptor streams from MPI processes to the user
29 *
30 * The design is fairly simple: when a proc is spawned, the IOF establishes
31 * connections between its stdin, stdout, and stderr to a
32 * corresponding IOF stream. In addition, the IOF designates a separate
33 * stream for passing OMPI/ORTE internal diagnostic/help output to mpirun.
34 * This is done specifically to separate such output from the user's
35 * stdout/err - basically, it allows us to present it to the user in
36 * a separate format for easier recognition. Data read from a source
37 * on any stream (e.g., printed to stdout by the proc) is relayed
38 * by the local daemon to the other end of the stream - i.e., stdin
39 * is relayed to the local proc, while stdout/err is relayed to mpirun.
40 * Thus, the eventual result is to connect ALL streams to/from
41 * the application process and mpirun.
42 *
43 * Note: By default, data read from stdin is forwarded -only- to rank=0.
44 * Stdin for all other procs is tied to "/dev/null".
45 *
46 * External tools can "pull" copies of stdout/err and
47 * the diagnostic stream from mpirun for any process. In this case,
48 * mpirun will send a copy of the output to the "pulling" process. Note that external tools
49 * cannot "push" something into stdin unless the user specifically directed
50 * that stdin remain open, nor under any conditions "pull" a copy of the
51 * stdin being sent to rank=0.
52 *
53 * Tools can exploit either of two mechanisms for this purpose:
54 *
55 * (a) call orte_init themselves and utilize the ORTE tool comm
56 * library to access the IOF. This also provides access to
57 * other tool library functions - e.g., to order that a job
58 * be spawned; or
59 *
60 * (b) fork/exec the "orte-iof" tool and let it serve as the interface
61 * to mpirun. This lets the tool avoid calling orte_init, and means
62 * the tool will not have to compile against the ORTE/OMPI libraries.
63 * However, the orte-iof tool is limited solely to interfacing
64 * stdio and cannot be used for other functions included in
65 * the tool comm library
66 *
67 * Thus, mpirun acts as a "switchyard" for IO, taking input from stdin
68 * and passing it to rank=0 of the job, and taking stdout/err/diag from all
69 * ranks and passing it to its own stdout/err/diag plus any "pull"
70 * requestors.
71 *
72 * Streams are identified by ORTE process name (to include wildcards,
73 * such as "all processes in ORTE job X") and tag. There are
74 * currently only 4 allowed predefined tags:
75 *
76 * - ORTE_IOF_STDIN (value 0)
77 * - ORTE_IOF_STDOUT (value 1)
78 * - ORTE_IOF_STDERR (value 2)
79 * - ORTE_IOF_INTERNAL (value 3): for "internal" messages
80 * from the infrastructure, just to differentiate them from user job
81 * stdout/stderr
82 *
83 * Note that since streams are identified by ORTE process name, the
84 * caller has no idea whether the stream is on the local node or a
85 * remote node -- it's just a stream.
86 *
87 * IOF components are selected on a "one of many" basis, meaning that
88 * only one IOF component will be selected for a given process.
89 * Details for the various components are given in their source code
90 * bases.
91 *
92 * Each IOF component must support the following API:
93 *
94 * push: Tie a local file descriptor (*not* a stream!) to the stdin
95 * of the specified process. If the user has not specified that stdin
96 * of the specified process is to remain open, this will return an error.
97 *
98 * pull: Tie a local file descriptor (*not* a stream!) to a stream.
99 * Subsequent input that appears via the stream will
100 * automatically be sent to the target file descriptor until the
101 * stream is "closed" or an EOF is received on the local file descriptor.
102 * Valid source values include ORTE_IOF_STDOUT, ORTE_IOF_STDERR, and
103 * ORTE_IOF_INTERNAL
104 *
105 * close: Closes a stream, flushing any pending data down it and
106 * terminating any "push/pull" connections against it. Unclear yet
107 * if this needs to be blocking, or can be done non-blocking.
108 *
109 * flush: Block until all pending data on all open streams has been
110 * written down local file descriptors and/or completed sending across
111 * the OOB to remote process targets.
112 *
113 */
114
115 #ifndef ORTE_IOF_H
116 #define ORTE_IOF_H
117
118 #include "orte_config.h"
119 #include "orte/types.h"
120
121 #include "orte/mca/mca.h"
122
123 #include "orte/runtime/orte_globals.h"
124
125 #include "iof_types.h"
126
127 BEGIN_C_DECLS
128
129 /* define a macro for requesting a proxy PULL of IO on
130 * behalf of a tool that had the HNP spawn a job. First
131 * argument is the orte_job_t of the spawned job, second
132 * is a pointer to the name of the requesting tool */
133 #define ORTE_IOF_PROXY_PULL(a, b) \
134 do { \
135 opal_buffer_t *buf; \
136 orte_iof_tag_t tag; \
137 orte_process_name_t nm; \
138 \
139 buf = OBJ_NEW(opal_buffer_t); \
140 \
141 /* setup the tag to pull from HNP */ \
142 tag = ORTE_IOF_STDOUTALL | ORTE_IOF_PULL | ORTE_IOF_EXCLUSIVE; \
143 opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG); \
144 /* pack the name of the source we want to pull */ \
145 nm.jobid = (a)->jobid; \
146 nm.vpid = ORTE_VPID_WILDCARD; \
147 opal_dss.pack(buf, &nm, 1, ORTE_NAME); \
148 /* pack the name of the tool */ \
149 opal_dss.pack(buf, (b), 1, ORTE_NAME); \
150 \
151 /* send the buffer to the HNP */ \
152 orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, \
153 ORTE_RML_TAG_IOF_HNP, \
154 orte_rml_send_callback, NULL); \
155 } while(0);
156
157 /* Initialize the selected module */
158 typedef int (*orte_iof_base_init_fn_t)(void);
159
160 /**
161 * Explicitly push data from the specified input file descriptor to
162 * the stdin of the indicated peer(s). The provided peer name can
163 * include wildcard values.
164 *
165 * @param peer Name of target peer(s)
166 * @param fd Local file descriptor for input.
167 */
168 typedef int (*orte_iof_base_push_fn_t)(const orte_process_name_t* peer,
169 orte_iof_tag_t src_tag, int fd);
170
171 /**
172 * Explicitly pull data from the specified set of SOURCE peers and
173 * dump to the indicated output file descriptor. Any fragments that
174 * arrive on the stream will automatically be written down the fd.
175 *
176 * @param peer Name used to qualify set of origin peers.
177 * @param source_tag Indicates the output streams to be forwarded
178 * @param fd Local file descriptor for output.
179 */
180 typedef int (*orte_iof_base_pull_fn_t)(const orte_process_name_t* peer,
181 orte_iof_tag_t source_tag,
182 int fd);
183
184 /**
185 * Close the specified iof stream(s) from the indicated peer(s)
186 */
187 typedef int (*orte_iof_base_close_fn_t)(const orte_process_name_t* peer,
188 orte_iof_tag_t source_tag);
189
190 /**
191 * Output something via the IOF subsystem
192 */
193 typedef int (*orte_iof_base_output_fn_t)(const orte_process_name_t* peer,
194 orte_iof_tag_t source_tag,
195 const char *msg);
196
197 /* Flag that a job is complete */
198 typedef void (*orte_iof_base_complete_fn_t)(const orte_job_t *jdata);
199
200 /* finalize the selected module */
201 typedef int (*orte_iof_base_finalize_fn_t)(void);
202
203 /**
204 * FT Event Notification
205 */
206 typedef int (*orte_iof_base_ft_event_fn_t)(int state);
207
208 /**
209 * IOF module.
210 */
211 struct orte_iof_base_module_2_0_0_t {
212 orte_iof_base_init_fn_t init;
213 orte_iof_base_push_fn_t push;
214 orte_iof_base_pull_fn_t pull;
215 orte_iof_base_close_fn_t close;
216 orte_iof_base_output_fn_t output;
217 orte_iof_base_complete_fn_t complete;
218 orte_iof_base_finalize_fn_t finalize;
219 orte_iof_base_ft_event_fn_t ft_event;
220 };
221
222 typedef struct orte_iof_base_module_2_0_0_t orte_iof_base_module_2_0_0_t;
223 typedef orte_iof_base_module_2_0_0_t orte_iof_base_module_t;
224 ORTE_DECLSPEC extern orte_iof_base_module_t orte_iof;
225
226 struct orte_iof_base_component_2_0_0_t {
227 mca_base_component_t iof_version;
228 mca_base_component_data_t iof_data;
229 };
230 typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_2_0_0_t;
231 typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_t;
232
233 END_C_DECLS
234
235 /*
236 * Macro for use in components that are of type iof
237 */
238 #define ORTE_IOF_BASE_VERSION_2_0_0 \
239 ORTE_MCA_BASE_VERSION_2_1_0("iof", 2, 0, 0)
240
241 #endif /* ORTE_IOF_H */