This source file includes following definitions.
- main
- parse_args
- orte_getline
- kill_procs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27 #include "orte_config.h"
28 #include "orte/constants.h"
29
30 #include <stdio.h>
31 #include <errno.h>
32 #ifdef HAVE_UNISTD_H
33 #include <unistd.h>
34 #endif
35 #include <stdlib.h>
36 #ifdef HAVE_SYS_STAT_H
37 #include <sys/stat.h>
38 #endif
39 #ifdef HAVE_SYS_TYPES_H
40 #include <sys/types.h>
41 #endif
42 #ifdef HAVE_SYS_WAIT_H
43 #include <sys/wait.h>
44 #endif
45 #ifdef HAVE_SYS_PARAM_H
46 #include <sys/param.h>
47 #endif
48 #include <string.h>
49 #ifdef HAVE_DIRENT_H
50 #include <dirent.h>
51 #endif
52 #include <signal.h>
53 #ifdef HAVE_PWD_H
54 #include <pwd.h>
55 #endif
56
57 #include "opal/util/cmd_line.h"
58 #include "opal/util/opal_environ.h"
59 #include "opal/util/os_dirpath.h"
60 #include "opal/util/basename.h"
61 #include "opal/util/error.h"
62 #include "opal/util/printf.h"
63 #include "opal/mca/base/base.h"
64 #include "opal/util/show_help.h"
65
66 #include "orte/util/proc_info.h"
67 #include "orte/util/show_help.h"
68
69 #include "opal/runtime/opal.h"
70 #if OPAL_ENABLE_FT_CR == 1
71 #include "opal/runtime/opal_cr.h"
72 #endif
73 #include "orte/runtime/runtime.h"
74
75
76
77
78 static int parse_args(int argc, char *argv[]);
79 static void kill_procs(void);
80
81
82
83
84 typedef struct {
85 bool help;
86 bool verbose;
87 bool debug;
88 } orte_clean_globals_t;
89
90 orte_clean_globals_t orte_clean_globals = {0};
91
92 opal_cmd_line_init_t cmd_line_opts[] = {
93 { NULL,
94 'h', NULL, "help",
95 0,
96 &orte_clean_globals.help, OPAL_CMD_LINE_TYPE_BOOL,
97 "This help message" },
98
99 { NULL,
100 'v', NULL, "verbose",
101 0,
102 &orte_clean_globals.verbose, OPAL_CMD_LINE_TYPE_BOOL,
103 "Generate verbose output" },
104
105 { NULL,
106 'd', NULL, "debug",
107 0,
108 &orte_clean_globals.debug, OPAL_CMD_LINE_TYPE_BOOL,
109 "Extra debug output for developers to ensure that orte-clean is working" },
110
111
112 { NULL,
113 '\0', NULL, NULL,
114 0,
115 NULL, OPAL_CMD_LINE_TYPE_NULL,
116 NULL }
117 };
118
119
120
121
122
123
124
125 int
126 main(int argc, char *argv[])
127 {
128 int ret = ORTE_SUCCESS;
129 #if OPAL_ENABLE_FT_CR == 1
130 char *tmp_env_var;
131 #endif
132 char *legacy;
133
134
135 if (ORTE_SUCCESS != (ret = opal_init_util(&argc, &argv))) {
136 return ret;
137 }
138
139 if (ORTE_SUCCESS != (ret = parse_args(argc, argv))) {
140 return ret;
141 }
142
143 #if OPAL_ENABLE_FT_CR == 1
144
145
146
147
148 opal_cr_set_enabled(false);
149
150
151 (void) mca_base_var_env_name("crs", &tmp_env_var);
152 opal_setenv(tmp_env_var,
153 "none",
154 true, &environ);
155 free(tmp_env_var);
156 tmp_env_var = NULL;
157
158 (void) mca_base_var_env_name("opal_cr_is_tool", &tmp_env_var);
159 opal_setenv(tmp_env_var,
160 "1", true, NULL);
161 free(tmp_env_var);
162 #endif
163
164 if (ORTE_SUCCESS != (ret = orte_init(&argc, &argv, ORTE_PROC_TOOL))) {
165 return ret;
166 }
167
168
169
170
171
172
173 if (orte_clean_globals.verbose) {
174 fprintf(stderr, "orte-clean: cleaning session dir tree %s\n",
175 orte_process_info.top_session_dir);
176 }
177 opal_os_dirpath_destroy(orte_process_info.top_session_dir, true, NULL);
178
179
180 opal_asprintf(&legacy, "%s/openmpi-sessions-%d@%s_0",
181 orte_process_info.tmpdir_base,
182 (int)geteuid(), orte_process_info.nodename);
183 opal_os_dirpath_destroy(legacy, true, NULL);
184 free(legacy);
185
186
187 opal_asprintf(&legacy, "rm -rf %s/pmix*", orte_process_info.tmpdir_base);
188 system(legacy);
189 free(legacy);
190
191
192 kill_procs();
193
194 orte_finalize();
195
196 return ORTE_SUCCESS;
197 }
198
199
200
201
202 static int parse_args(int argc, char *argv[]) {
203 int ret;
204 opal_cmd_line_t cmd_line;
205 orte_clean_globals_t tmp = { false, false, false };
206
207
208
209
210 memcpy(&orte_clean_globals, &tmp, sizeof(tmp));
211
212
213
214
215 opal_cmd_line_create(&cmd_line, cmd_line_opts);
216 ret = opal_cmd_line_parse(&cmd_line, false, false, argc, argv);
217
218 if (OPAL_SUCCESS != ret) {
219 if (OPAL_ERR_SILENT != ret) {
220 fprintf(stderr, "%s: command line error (%s)\n", argv[0],
221 opal_strerror(ret));
222 }
223 return ret;
224 }
225
226
227
228
229 if (orte_clean_globals.help) {
230 char *str, *args = NULL;
231 args = opal_cmd_line_get_usage_msg(&cmd_line);
232 str = opal_show_help_string("help-orte-clean.txt", "usage", true,
233 args);
234 if (NULL != str) {
235 printf("%s", str);
236 free(str);
237 }
238 free(args);
239
240 exit(0);
241 }
242
243 OBJ_DESTRUCT(&cmd_line);
244
245 return ORTE_SUCCESS;
246 }
247
248 static char *orte_getline(FILE *fp)
249 {
250 char *ret, *buff;
251 char input[1024];
252 int i;
253
254 ret = fgets(input, 1024, fp);
255 if (NULL != ret) {
256
257 for (i=strlen(input)-2; i > 0; i--) {
258 if (input[i] != ' ') {
259 input[i+1] = '\0';
260 break;
261 }
262 }
263 buff = strdup(input);
264 return buff;
265 }
266
267 return NULL;
268 }
269
270
271
272
273
274
275 static
276 void kill_procs(void) {
277 int ortedpid;
278 char *fullprocname;
279 char *procname;
280 char *pidstr;
281 char *user;
282 int procpid;
283 FILE *psfile;
284 char *inputline;
285 char *this_user;
286 int uid;
287 char *separator = " \t";
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303 char command[] = ORTE_CLEAN_PS_CMD;
304 if (0 == strcmp("unknown", command)) {
305 return;
306 }
307
308 if (orte_clean_globals.verbose) {
309 fprintf(stderr, "orte-clean: killing any lingering procs\n");
310 }
311
312
313
314
315 ortedpid = getppid();
316
317
318 uid = getuid();
319 opal_asprintf(&this_user, "%d", uid);
320
321
322
323
324
325
326
327
328
329
330
331
332
333 sleep(1);
334
335 psfile = popen(command, "r");
336
337
338
339
340 if (NULL == (inputline = orte_getline(psfile))) {
341 free(this_user);
342 pclose(psfile);
343 return;
344 }
345 free(inputline);
346
347 while (NULL != (inputline = orte_getline(psfile))) {
348
349
350 fullprocname = strtok(inputline, separator);
351 pidstr = strtok(NULL, separator);
352 user = strtok(NULL, separator);
353
354 if (orte_clean_globals.debug) {
355 fprintf(stdout, "\norte-clean: user(pid)=%s, me=%s\n",
356 user, this_user);
357 }
358
359
360
361
362 if ((0 != strcmp(user, this_user)) && (0 != strcmp("0", this_user))) {
363
364 free(inputline);
365 continue;
366 }
367
368 procpid = atoi(pidstr);
369 procname = opal_basename(fullprocname);
370 if (orte_clean_globals.debug) {
371 fprintf(stdout, "orte-clean: fullname=%s, basename=%s, pid=%d\n",
372 fullprocname, procname, procpid);
373 }
374
375
376
377
378
379
380
381
382
383
384
385 if (0 == strncmp("orted", procname, strlen("orted")) ||
386 0 == strncmp("(orted)", procname, strlen("(orted)")) ||
387 0 == strncmp("orte-dvm", procname, strlen("orte-dvm")) ||
388 0 == strncmp("(orte-dvm)", procname, strlen("(orte-dvm)"))) {
389 if (procpid != ortedpid) {
390 if (orte_clean_globals.verbose) {
391 fprintf(stderr, "orte-clean: found potential rogue orted process"
392 " (pid=%d,uid=%s), sending SIGKILL...\n",
393 procpid, user);
394 }
395
396
397
398
399 (void)kill(procpid, SIGKILL);
400 }
401 }
402
403
404
405
406 if (0 == strncmp("orterun", procname, strlen("orterun")) ||
407 0 == strncmp("mpirun", procname, strlen("mpirun"))) {
408
409
410
411 if (procpid != ortedpid) {
412 if (orte_clean_globals.verbose) {
413 fprintf(stderr, "orte-clean: found potential rogue orterun process"
414 " (pid=%d,uid=%s), sending SIGKILL...\n",
415 procpid, user);
416
417 }
418
419 if (ORTE_PROC_IS_SINGLETON) {
420 if (procpid != orte_process_info.hnp_pid) {
421 (void)kill(procpid, SIGKILL);
422 }
423 } else {
424
425
426
427 (void)kill(procpid, SIGKILL);
428 }
429 }
430 }
431 free(inputline);
432 free(procname);
433 }
434 free(this_user);
435 pclose(psfile);
436 return;
437 }