1 /* -*- Mode: C; c-basic-offset:4 ; indent-tabs-mode:nil -*- */ 2 /* 3 * Copyright (c) 2004-2007 The Trustees of Indiana University and Indiana 4 * University Research and Technology 5 * Corporation. All rights reserved. 6 * Copyright (c) 2004-2005 The University of Tennessee and The University 7 * of Tennessee Research Foundation. All rights 8 * reserved. 9 * Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, 10 * University of Stuttgart. All rights reserved. 11 * Copyright (c) 2004-2005 The Regents of the University of California. 12 * All rights reserved. 13 * Copyright (c) 2007-2008 Cisco Systems, Inc. All rights reserved. 14 * Copyright (c) 2012-2015 Los Alamos National Security, LLC. All rights 15 * reserved. 16 * Copyright (c) 2014-2019 Intel, Inc. All rights reserved. 17 * $COPYRIGHT$ 18 * 19 * Additional copyrights may follow 20 * 21 * $HEADER$ 22 */ 23 /** 24 * @file 25 * 26 * I/O Forwarding Service 27 * The I/O forwarding service (IOF) is used to connect stdin, stdout, and 28 * stderr file descriptor streams from MPI processes to the user 29 * 30 * The design is fairly simple: when a proc is spawned, the IOF establishes 31 * connections between its stdin, stdout, and stderr to a 32 * corresponding IOF stream. In addition, the IOF designates a separate 33 * stream for passing OMPI/ORTE internal diagnostic/help output to mpirun. 34 * This is done specifically to separate such output from the user's 35 * stdout/err - basically, it allows us to present it to the user in 36 * a separate format for easier recognition. Data read from a source 37 * on any stream (e.g., printed to stdout by the proc) is relayed 38 * by the local daemon to the other end of the stream - i.e., stdin 39 * is relayed to the local proc, while stdout/err is relayed to mpirun. 40 * Thus, the eventual result is to connect ALL streams to/from 41 * the application process and mpirun. 42 * 43 * Note: By default, data read from stdin is forwarded -only- to rank=0. 44 * Stdin for all other procs is tied to "/dev/null". 45 * 46 * External tools can "pull" copies of stdout/err and 47 * the diagnostic stream from mpirun for any process. In this case, 48 * mpirun will send a copy of the output to the "pulling" process. Note that external tools 49 * cannot "push" something into stdin unless the user specifically directed 50 * that stdin remain open, nor under any conditions "pull" a copy of the 51 * stdin being sent to rank=0. 52 * 53 * Tools can exploit either of two mechanisms for this purpose: 54 * 55 * (a) call orte_init themselves and utilize the ORTE tool comm 56 * library to access the IOF. This also provides access to 57 * other tool library functions - e.g., to order that a job 58 * be spawned; or 59 * 60 * (b) fork/exec the "orte-iof" tool and let it serve as the interface 61 * to mpirun. This lets the tool avoid calling orte_init, and means 62 * the tool will not have to compile against the ORTE/OMPI libraries. 63 * However, the orte-iof tool is limited solely to interfacing 64 * stdio and cannot be used for other functions included in 65 * the tool comm library 66 * 67 * Thus, mpirun acts as a "switchyard" for IO, taking input from stdin 68 * and passing it to rank=0 of the job, and taking stdout/err/diag from all 69 * ranks and passing it to its own stdout/err/diag plus any "pull" 70 * requestors. 71 * 72 * Streams are identified by ORTE process name (to include wildcards, 73 * such as "all processes in ORTE job X") and tag. There are 74 * currently only 4 allowed predefined tags: 75 * 76 * - ORTE_IOF_STDIN (value 0) 77 * - ORTE_IOF_STDOUT (value 1) 78 * - ORTE_IOF_STDERR (value 2) 79 * - ORTE_IOF_INTERNAL (value 3): for "internal" messages 80 * from the infrastructure, just to differentiate them from user job 81 * stdout/stderr 82 * 83 * Note that since streams are identified by ORTE process name, the 84 * caller has no idea whether the stream is on the local node or a 85 * remote node -- it's just a stream. 86 * 87 * IOF components are selected on a "one of many" basis, meaning that 88 * only one IOF component will be selected for a given process. 89 * Details for the various components are given in their source code 90 * bases. 91 * 92 * Each IOF component must support the following API: 93 * 94 * push: Tie a local file descriptor (*not* a stream!) to the stdin 95 * of the specified process. If the user has not specified that stdin 96 * of the specified process is to remain open, this will return an error. 97 * 98 * pull: Tie a local file descriptor (*not* a stream!) to a stream. 99 * Subsequent input that appears via the stream will 100 * automatically be sent to the target file descriptor until the 101 * stream is "closed" or an EOF is received on the local file descriptor. 102 * Valid source values include ORTE_IOF_STDOUT, ORTE_IOF_STDERR, and 103 * ORTE_IOF_INTERNAL 104 * 105 * close: Closes a stream, flushing any pending data down it and 106 * terminating any "push/pull" connections against it. Unclear yet 107 * if this needs to be blocking, or can be done non-blocking. 108 * 109 * flush: Block until all pending data on all open streams has been 110 * written down local file descriptors and/or completed sending across 111 * the OOB to remote process targets. 112 * 113 */ 114 115 #ifndef ORTE_IOF_H 116 #define ORTE_IOF_H 117 118 #include "orte_config.h" 119 #include "orte/types.h" 120 121 #include "orte/mca/mca.h" 122 123 #include "orte/runtime/orte_globals.h" 124 125 #include "iof_types.h" 126 127 BEGIN_C_DECLS 128 129 /* define a macro for requesting a proxy PULL of IO on 130 * behalf of a tool that had the HNP spawn a job. First 131 * argument is the orte_job_t of the spawned job, second 132 * is a pointer to the name of the requesting tool */ 133 #define ORTE_IOF_PROXY_PULL(a, b) \ 134 do { \ 135 opal_buffer_t *buf; \ 136 orte_iof_tag_t tag; \ 137 orte_process_name_t nm; \ 138 \ 139 buf = OBJ_NEW(opal_buffer_t); \ 140 \ 141 /* setup the tag to pull from HNP */ \ 142 tag = ORTE_IOF_STDOUTALL | ORTE_IOF_PULL | ORTE_IOF_EXCLUSIVE; \ 143 opal_dss.pack(buf, &tag, 1, ORTE_IOF_TAG); \ 144 /* pack the name of the source we want to pull */ \ 145 nm.jobid = (a)->jobid; \ 146 nm.vpid = ORTE_VPID_WILDCARD; \ 147 opal_dss.pack(buf, &nm, 1, ORTE_NAME); \ 148 /* pack the name of the tool */ \ 149 opal_dss.pack(buf, (b), 1, ORTE_NAME); \ 150 \ 151 /* send the buffer to the HNP */ \ 152 orte_rml.send_buffer_nb(ORTE_PROC_MY_HNP, buf, \ 153 ORTE_RML_TAG_IOF_HNP, \ 154 orte_rml_send_callback, NULL); \ 155 } while(0); 156 157 /* Initialize the selected module */ 158 typedef int (*orte_iof_base_init_fn_t)(void); 159 160 /** 161 * Explicitly push data from the specified input file descriptor to 162 * the stdin of the indicated peer(s). The provided peer name can 163 * include wildcard values. 164 * 165 * @param peer Name of target peer(s) 166 * @param fd Local file descriptor for input. 167 */ 168 typedef int (*orte_iof_base_push_fn_t)(const orte_process_name_t* peer, 169 orte_iof_tag_t src_tag, int fd); 170 171 /** 172 * Explicitly pull data from the specified set of SOURCE peers and 173 * dump to the indicated output file descriptor. Any fragments that 174 * arrive on the stream will automatically be written down the fd. 175 * 176 * @param peer Name used to qualify set of origin peers. 177 * @param source_tag Indicates the output streams to be forwarded 178 * @param fd Local file descriptor for output. 179 */ 180 typedef int (*orte_iof_base_pull_fn_t)(const orte_process_name_t* peer, 181 orte_iof_tag_t source_tag, 182 int fd); 183 184 /** 185 * Close the specified iof stream(s) from the indicated peer(s) 186 */ 187 typedef int (*orte_iof_base_close_fn_t)(const orte_process_name_t* peer, 188 orte_iof_tag_t source_tag); 189 190 /** 191 * Output something via the IOF subsystem 192 */ 193 typedef int (*orte_iof_base_output_fn_t)(const orte_process_name_t* peer, 194 orte_iof_tag_t source_tag, 195 const char *msg); 196 197 /* Flag that a job is complete */ 198 typedef void (*orte_iof_base_complete_fn_t)(const orte_job_t *jdata); 199 200 /* finalize the selected module */ 201 typedef int (*orte_iof_base_finalize_fn_t)(void); 202 203 /** 204 * FT Event Notification 205 */ 206 typedef int (*orte_iof_base_ft_event_fn_t)(int state); 207 208 /** 209 * IOF module. 210 */ 211 struct orte_iof_base_module_2_0_0_t { 212 orte_iof_base_init_fn_t init; 213 orte_iof_base_push_fn_t push; 214 orte_iof_base_pull_fn_t pull; 215 orte_iof_base_close_fn_t close; 216 orte_iof_base_output_fn_t output; 217 orte_iof_base_complete_fn_t complete; 218 orte_iof_base_finalize_fn_t finalize; 219 orte_iof_base_ft_event_fn_t ft_event; 220 }; 221 222 typedef struct orte_iof_base_module_2_0_0_t orte_iof_base_module_2_0_0_t; 223 typedef orte_iof_base_module_2_0_0_t orte_iof_base_module_t; 224 ORTE_DECLSPEC extern orte_iof_base_module_t orte_iof; 225 226 struct orte_iof_base_component_2_0_0_t { 227 mca_base_component_t iof_version; 228 mca_base_component_data_t iof_data; 229 }; 230 typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_2_0_0_t; 231 typedef struct orte_iof_base_component_2_0_0_t orte_iof_base_component_t; 232 233 END_C_DECLS 234 235 /* 236 * Macro for use in components that are of type iof 237 */ 238 #define ORTE_IOF_BASE_VERSION_2_0_0 \ 239 ORTE_MCA_BASE_VERSION_2_1_0("iof", 2, 0, 0) 240 241 #endif /* ORTE_IOF_H */