This source file includes following definitions.
- launched
- completed
- orterun
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "orte_config.h"
28 #include "orte/constants.h"
29
30 #include <string.h>
31 #include <stdio.h>
32 #include <stdlib.h>
33 #ifdef HAVE_STRINGS_H
34 #include <strings.h>
35 #endif
36 #ifdef HAVE_UNISTD_H
37 #include <unistd.h>
38 #endif
39 #ifdef HAVE_SYS_PARAM_H
40 #include <sys/param.h>
41 #endif
42 #include <errno.h>
43 #include <signal.h>
44 #include <ctype.h>
45 #ifdef HAVE_SYS_TYPES_H
46 #include <sys/types.h>
47 #endif
48 #ifdef HAVE_SYS_WAIT_H
49 #include <sys/wait.h>
50 #endif
51 #ifdef HAVE_SYS_TIME_H
52 #include <sys/time.h>
53 #endif
54 #include <fcntl.h>
55 #ifdef HAVE_SYS_STAT_H
56 #include <sys/stat.h>
57 #endif
58
59 #include "opal/mca/event/event.h"
60 #include "opal/mca/installdirs/installdirs.h"
61 #include "opal/mca/hwloc/base/base.h"
62 #include "opal/mca/base/base.h"
63 #include "opal/util/argv.h"
64 #include "opal/util/output.h"
65 #include "opal/util/basename.h"
66 #include "opal/util/cmd_line.h"
67 #include "opal/util/opal_environ.h"
68 #include "opal/util/opal_getcwd.h"
69 #include "opal/util/show_help.h"
70 #include "opal/util/fd.h"
71 #include "opal/sys/atomic.h"
72 #if OPAL_ENABLE_FT_CR == 1
73 #include "opal/runtime/opal_cr.h"
74 #endif
75
76 #include "opal/version.h"
77 #include "opal/runtime/opal.h"
78 #include "opal/runtime/opal_info_support.h"
79 #include "opal/util/os_path.h"
80 #include "opal/util/path.h"
81 #include "opal/class/opal_pointer_array.h"
82 #include "opal/dss/dss.h"
83
84 #include "orte/mca/odls/odls.h"
85 #include "orte/mca/rml/rml.h"
86 #include "orte/mca/state/state.h"
87 #include "orte/util/proc_info.h"
88 #include "orte/util/session_dir.h"
89 #include "orte/util/show_help.h"
90 #include "orte/util/threads.h"
91
92 #include "orte/runtime/runtime.h"
93 #include "orte/runtime/orte_globals.h"
94 #include "orte/runtime/orte_wait.h"
95 #include "orte/runtime/orte_locks.h"
96 #include "orte/runtime/orte_quit.h"
97
98
99 #include "orte/orted/orted.h"
100 #include "orte/orted/orted_submit.h"
101 #include "orterun.h"
102
103
104 typedef struct {
105 int status;
106 volatile bool active;
107 orte_job_t *jdata;
108 } orte_submit_status_t;
109
110
111 static void launched(int index, orte_job_t *jdata, int ret, void *cbdata)
112 {
113 orte_submit_status_t *launchst = (orte_submit_status_t*)cbdata;
114 launchst->status = ret;
115 ORTE_UPDATE_EXIT_STATUS(ret);
116 OBJ_RETAIN(jdata);
117 launchst->jdata = jdata;
118 launchst->active = false;
119 }
120 static void completed(int index, orte_job_t *jdata, int ret, void *cbdata)
121 {
122 orte_submit_status_t *completest = (orte_submit_status_t*)cbdata;
123 completest->status = ret;
124 ORTE_UPDATE_EXIT_STATUS(ret);
125 OBJ_RETAIN(jdata);
126 completest->jdata = jdata;
127 completest->active = false;
128 }
129
130 int orterun(int argc, char *argv[])
131 {
132 orte_submit_status_t launchst, completest;
133
134
135
136 if (ORTE_SUCCESS != orte_submit_init(argc, argv, NULL)) {
137 exit(1);
138 }
139
140
141
142
143
144 if (0 == geteuid() && !orte_cmd_options.run_as_root) {
145 fprintf(stderr, "--------------------------------------------------------------------------\n");
146 if (NULL != orte_cmd_options.help) {
147 fprintf(stderr, "%s cannot provide the help message when run as root.\n", orte_basename);
148 } else {
149
150 fprintf(stderr, "%s has detected an attempt to run as root.\n", orte_basename);
151 }
152 fprintf(stderr, "Running at root is *strongly* discouraged as any mistake (e.g., in\n");
153 fprintf(stderr, "defining TMPDIR) or bug can result in catastrophic damage to the OS\n");
154 fprintf(stderr, "file system, leaving your system in an unusable state.\n\n");
155 fprintf(stderr, "You can override this protection by adding the --allow-run-as-root\n");
156 fprintf(stderr, "option to your cmd line. However, we reiterate our strong advice\n");
157 fprintf(stderr, "against doing so - please do so at your own risk.\n");
158 fprintf(stderr, "--------------------------------------------------------------------------\n");
159 exit(1);
160 }
161
162
163
164
165
166
167 orte_rml.recv_buffer_nb(ORTE_NAME_WILDCARD, ORTE_RML_TAG_DAEMON,
168 ORTE_RML_PERSISTENT, orte_daemon_recv, NULL);
169
170
171 if (orte_cmd_options.terminate_dvm) {
172 if (ORTE_ERR_OP_IN_PROGRESS != orte_submit_halt()) {
173 ORTE_UPDATE_EXIT_STATUS(1);
174 goto DONE;
175 }
176 while (orte_event_base_active) {
177 opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
178 }
179
180
181
182 orte_exit_status = 0;
183 goto DONE;
184 } else {
185
186 memset(&launchst, 0, sizeof(launchst));
187 memset(&completest, 0, sizeof(completest));
188 launchst.active = true;
189 completest.active = true;
190 if (ORTE_SUCCESS != orte_submit_job(argv, NULL,
191 launched, &launchst,
192 completed, &completest)) {
193 ORTE_UPDATE_EXIT_STATUS(1);
194 goto DONE;
195 }
196 }
197
198
199 while (orte_event_base_active && launchst.active) {
200 opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
201 }
202 ORTE_ACQUIRE_OBJECT(orte_event_base_active);
203 if (orte_debug_flag) {
204 opal_output(0, "Job %s has launched",
205 (NULL == launchst.jdata) ? "UNKNOWN" : ORTE_JOBID_PRINT(launchst.jdata->jobid));
206 }
207 if (!orte_event_base_active || ORTE_SUCCESS != launchst.status) {
208 goto DONE;
209 }
210
211 while (orte_event_base_active && completest.active) {
212 opal_event_loop(orte_event_base, OPAL_EVLOOP_ONCE);
213 }
214 ORTE_ACQUIRE_OBJECT(orte_event_base_active);
215
216 if (ORTE_PROC_IS_HNP) {
217
218 orte_odls.kill_local_procs(NULL);
219 }
220
221 DONE:
222
223 orte_submit_finalize();
224 orte_finalize();
225 orte_session_dir_cleanup(ORTE_JOBID_WILDCARD);
226
227 orte_proc_info_finalize();
228
229 if (orte_debug_flag) {
230 fprintf(stderr, "exiting with status %d\n", orte_exit_status);
231 }
232 exit(orte_exit_status);
233 }