This source file includes following definitions.
- rsh_component_register
- rsh_component_open
- rsh_component_query
- rsh_component_close
- orte_plm_rsh_search
- rsh_launch_agent_lookup
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 #include "orte_config.h"
35 #include "orte/constants.h"
36
37 #include <stdlib.h>
38 #ifdef HAVE_UNISTD_H
39 #include <unistd.h>
40 #endif
41 #include <ctype.h>
42
43 #include "opal/util/opal_environ.h"
44 #include "opal/util/output.h"
45 #include "opal/util/argv.h"
46 #include "opal/util/basename.h"
47 #include "opal/util/path.h"
48 #include "opal/util/string_copy.h"
49
50 #include "orte/mca/state/state.h"
51 #include "orte/util/name_fns.h"
52 #include "orte/runtime/orte_globals.h"
53 #include "orte/util/show_help.h"
54
55 #include "orte/mca/plm/plm.h"
56 #include "orte/mca/plm/base/plm_private.h"
57 #include "orte/mca/plm/rsh/plm_rsh.h"
58
59
60
61
62 const char *mca_plm_rsh_component_version_string =
63 "Open MPI rsh plm MCA component version " ORTE_VERSION;
64
65
66 static int rsh_component_register(void);
67 static int rsh_component_open(void);
68 static int rsh_component_query(mca_base_module_t **module, int *priority);
69 static int rsh_component_close(void);
70 static int rsh_launch_agent_lookup(const char *agent_list, char *path);
71
72
73 static char *mca_plm_rsh_delay_string = NULL;
74 static int agent_var_id = -1;
75
76
77
78
79
80
81 orte_plm_rsh_component_t mca_plm_rsh_component = {
82 {
83
84
85
86 .base_version = {
87 ORTE_PLM_BASE_VERSION_2_0_0,
88
89
90 .mca_component_name = "rsh",
91 MCA_BASE_MAKE_VERSION(component, ORTE_MAJOR_VERSION, ORTE_MINOR_VERSION,
92 ORTE_RELEASE_VERSION),
93
94
95 .mca_open_component = rsh_component_open,
96 .mca_close_component = rsh_component_close,
97 .mca_query_component = rsh_component_query,
98 .mca_register_component_params = rsh_component_register,
99 },
100 .base_data = {
101
102 MCA_BASE_METADATA_PARAM_CHECKPOINT
103 },
104 }
105 };
106
107 static int rsh_component_register(void)
108 {
109 mca_base_component_t *c = &mca_plm_rsh_component.super.base_version;
110 int var_id;
111
112 mca_plm_rsh_component.num_concurrent = 128;
113 (void) mca_base_component_var_register (c, "num_concurrent",
114 "How many plm_rsh_agent instances to invoke concurrently (must be > 0)",
115 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
116 OPAL_INFO_LVL_5,
117 MCA_BASE_VAR_SCOPE_READONLY,
118 &mca_plm_rsh_component.num_concurrent);
119
120 mca_plm_rsh_component.force_rsh = false;
121 (void) mca_base_component_var_register (c, "force_rsh", "Force the launcher to always use rsh",
122 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
123 OPAL_INFO_LVL_2,
124 MCA_BASE_VAR_SCOPE_READONLY,
125 &mca_plm_rsh_component.force_rsh);
126 mca_plm_rsh_component.disable_qrsh = false;
127 (void) mca_base_component_var_register (c, "disable_qrsh",
128 "Disable the use of qrsh when under the Grid Engine parallel environment",
129 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
130 OPAL_INFO_LVL_2,
131 MCA_BASE_VAR_SCOPE_READONLY,
132 &mca_plm_rsh_component.disable_qrsh);
133
134 mca_plm_rsh_component.daemonize_qrsh = false;
135 (void) mca_base_component_var_register (c, "daemonize_qrsh",
136 "Daemonize the orted under the Grid Engine parallel environment",
137 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
138 OPAL_INFO_LVL_2,
139 MCA_BASE_VAR_SCOPE_READONLY,
140 &mca_plm_rsh_component.daemonize_qrsh);
141
142 mca_plm_rsh_component.disable_llspawn = false;
143 (void) mca_base_component_var_register (c, "disable_llspawn",
144 "Disable the use of llspawn when under the LoadLeveler environment",
145 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
146 OPAL_INFO_LVL_2,
147 MCA_BASE_VAR_SCOPE_READONLY,
148 &mca_plm_rsh_component.disable_llspawn);
149
150 mca_plm_rsh_component.daemonize_llspawn = false;
151 (void) mca_base_component_var_register (c, "daemonize_llspawn",
152 "Daemonize the orted when under the LoadLeveler environment",
153 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
154 OPAL_INFO_LVL_2,
155 MCA_BASE_VAR_SCOPE_READONLY,
156 &mca_plm_rsh_component.daemonize_llspawn);
157
158 mca_plm_rsh_component.priority = 10;
159 (void) mca_base_component_var_register (c, "priority", "Priority of the rsh plm component",
160 MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
161 OPAL_INFO_LVL_9,
162 MCA_BASE_VAR_SCOPE_READONLY,
163 &mca_plm_rsh_component.priority);
164
165 mca_plm_rsh_delay_string = NULL;
166 (void) mca_base_component_var_register (c, "delay",
167 "Delay between invocations of the remote agent (sec[:usec])",
168 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
169 OPAL_INFO_LVL_4,
170 MCA_BASE_VAR_SCOPE_READONLY,
171 &mca_plm_rsh_delay_string);
172
173 mca_plm_rsh_component.no_tree_spawn = false;
174 (void) mca_base_component_var_register (c, "no_tree_spawn",
175 "If set to true, do not launch via a tree-based topology",
176 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
177 OPAL_INFO_LVL_5,
178 MCA_BASE_VAR_SCOPE_READONLY,
179 &mca_plm_rsh_component.no_tree_spawn);
180
181
182 mca_plm_rsh_component.agent = "ssh : rsh";
183 var_id = mca_base_component_var_register (c, "agent",
184 "The command used to launch executables on remote nodes (typically either \"ssh\" or \"rsh\")",
185 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
186 OPAL_INFO_LVL_2,
187 MCA_BASE_VAR_SCOPE_READONLY,
188 &mca_plm_rsh_component.agent);
189 (void) mca_base_var_register_synonym (var_id, "orte", "pls", NULL, "rsh_agent", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
190 (void) mca_base_var_register_synonym (var_id, "orte", "orte", NULL, "rsh_agent", MCA_BASE_VAR_SYN_FLAG_DEPRECATED);
191 agent_var_id = var_id;
192
193 mca_plm_rsh_component.assume_same_shell = true;
194 var_id = mca_base_component_var_register (c, "assume_same_shell",
195 "If set to true, assume that the shell on the remote node is the same as the shell on the local node. Otherwise, probe for what the remote shell [default: 1]",
196 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
197 OPAL_INFO_LVL_2,
198 MCA_BASE_VAR_SCOPE_READONLY,
199 &mca_plm_rsh_component.assume_same_shell);
200
201 (void) mca_base_var_register_synonym (var_id, "orte", "orte", NULL, "assume_same_shell", 0);
202
203 mca_plm_rsh_component.pass_environ_mca_params = true;
204 (void) mca_base_component_var_register (c, "pass_environ_mca_params",
205 "If set to false, do not include mca params from the environment on the orted cmd line",
206 MCA_BASE_VAR_TYPE_BOOL, NULL, 0, 0,
207 OPAL_INFO_LVL_2,
208 MCA_BASE_VAR_SCOPE_READONLY,
209 &mca_plm_rsh_component.pass_environ_mca_params);
210 mca_plm_rsh_component.ssh_args = NULL;
211 (void) mca_base_component_var_register (c, "args",
212 "Arguments to add to rsh/ssh",
213 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
214 OPAL_INFO_LVL_2,
215 MCA_BASE_VAR_SCOPE_READONLY,
216 &mca_plm_rsh_component.ssh_args);
217
218 mca_plm_rsh_component.pass_libpath = NULL;
219 (void) mca_base_component_var_register (c, "pass_libpath",
220 "Prepend the specified library path to the remote shell's LD_LIBRARY_PATH",
221 MCA_BASE_VAR_TYPE_STRING, NULL, 0, 0,
222 OPAL_INFO_LVL_2,
223 MCA_BASE_VAR_SCOPE_READONLY,
224 &mca_plm_rsh_component.pass_libpath);
225
226 return ORTE_SUCCESS;
227 }
228
229 static int rsh_component_open(void)
230 {
231 char *ctmp;
232
233
234 mca_plm_rsh_component.using_qrsh = false;
235 mca_plm_rsh_component.using_llspawn = false;
236 mca_plm_rsh_component.agent_argv = NULL;
237
238
239 if (mca_plm_rsh_component.num_concurrent <= 0) {
240 orte_show_help("help-plm-rsh.txt", "concurrency-less-than-zero",
241 true, mca_plm_rsh_component.num_concurrent);
242 mca_plm_rsh_component.num_concurrent = 1;
243 }
244
245 if (NULL != mca_plm_rsh_delay_string) {
246 mca_plm_rsh_component.delay.tv_sec = strtol(mca_plm_rsh_delay_string, &ctmp, 10);
247 if (ctmp == mca_plm_rsh_delay_string) {
248 mca_plm_rsh_component.delay.tv_sec = 0;
249 }
250 if (':' == ctmp[0]) {
251 mca_plm_rsh_component.delay.tv_nsec = 1000 * strtol (ctmp + 1, NULL, 10);
252 }
253 }
254
255 return ORTE_SUCCESS;
256 }
257
258
259 static int rsh_component_query(mca_base_module_t **module, int *priority)
260 {
261 char *tmp;
262
263
264
265
266
267 int ret;
268 mca_base_var_source_t source;
269 ret = mca_base_var_get_value(agent_var_id, NULL, &source, NULL);
270 if (OPAL_SUCCESS != ret) {
271 return ret;
272 }
273 if (MCA_BASE_VAR_SOURCE_DEFAULT != source) {
274
275
276 goto lookup;
277 }
278
279
280 if (!mca_plm_rsh_component.disable_qrsh &&
281 NULL != getenv("SGE_ROOT") && NULL != getenv("ARC") &&
282 NULL != getenv("PE_HOSTFILE") && NULL != getenv("JOB_ID")) {
283
284 opal_asprintf(&tmp, "%s/bin/%s", getenv("SGE_ROOT"), getenv("ARC"));
285
286 if (ORTE_SUCCESS != rsh_launch_agent_lookup("qrsh", tmp)) {
287
288 opal_output_verbose(1, orte_plm_base_framework.framework_output,
289 "%s plm:rsh: unable to be used: SGE indicated but cannot find path "
290 "or execution permissions not set for launching agent qrsh",
291 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
292 free(tmp);
293 *module = NULL;
294 return ORTE_ERROR;
295 }
296 mca_plm_rsh_component.agent = tmp;
297 mca_plm_rsh_component.using_qrsh = true;
298 goto success;
299 }
300
301
302 if (!mca_plm_rsh_component.disable_llspawn &&
303 NULL != getenv("LOADL_STEP_ID")) {
304
305 if (ORTE_SUCCESS != rsh_launch_agent_lookup("llspawn", NULL)) {
306 opal_output_verbose(1, orte_plm_base_framework.framework_output,
307 "%s plm:rsh: unable to be used: LoadLeveler "
308 "indicated but cannot find path or execution "
309 "permissions not set for launching agent llspawn",
310 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME));
311 *module = NULL;
312 return ORTE_ERROR;
313 }
314 mca_plm_rsh_component.agent = strdup("llspawn");
315 mca_plm_rsh_component.using_llspawn = true;
316 goto success;
317 }
318
319
320
321 lookup:
322 if (ORTE_SUCCESS != rsh_launch_agent_lookup(NULL, NULL)) {
323
324
325 if (NULL != mca_plm_rsh_component.agent) {
326 orte_show_help("help-plm-rsh.txt", "agent-not-found", true,
327 mca_plm_rsh_component.agent);
328 ORTE_FORCED_TERMINATE(ORTE_ERR_NOT_FOUND);
329 return ORTE_ERR_FATAL;
330 }
331
332 OPAL_OUTPUT_VERBOSE((1, orte_plm_base_framework.framework_output,
333 "%s plm:rsh: unable to be used: cannot find path "
334 "for launching agent \"%s\"\n",
335 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
336 mca_plm_rsh_component.agent));
337 *module = NULL;
338 return ORTE_ERROR;
339 }
340
341 success:
342
343 *priority = mca_plm_rsh_component.priority;
344 *module = (mca_base_module_t *) &orte_plm_rsh_module;
345 return ORTE_SUCCESS;
346 }
347
348
349 static int rsh_component_close(void)
350 {
351 return ORTE_SUCCESS;
352 }
353
354
355
356
357
358
359 char **orte_plm_rsh_search(const char* agent_list, const char *path)
360 {
361 int i, j;
362 char *line, **lines;
363 char **tokens, *tmp;
364 char cwd[OPAL_PATH_MAX];
365
366 if (NULL == path) {
367 getcwd(cwd, OPAL_PATH_MAX);
368 } else {
369 opal_string_copy(cwd, path, OPAL_PATH_MAX);
370 }
371 if (NULL == agent_list) {
372 lines = opal_argv_split(mca_plm_rsh_component.agent, ':');
373 } else {
374 lines = opal_argv_split(agent_list, ':');
375 }
376 for (i = 0; NULL != lines[i]; ++i) {
377 line = lines[i];
378
379
380 for (j = 0; '\0' != line[j] && isspace(line[j]); ++line) {
381 continue;
382 }
383 for (j = strlen(line) - 2; j > 0 && isspace(line[j]); ++j) {
384 line[j] = '\0';
385 }
386 if (strlen(line) <= 0) {
387 continue;
388 }
389
390
391 tokens = opal_argv_split(line, ' ');
392
393
394 tmp = opal_path_findv(tokens[0], X_OK, environ, cwd);
395 if (NULL != tmp) {
396 free(tokens[0]);
397 tokens[0] = tmp;
398 opal_argv_free(lines);
399 return tokens;
400 }
401
402
403 opal_argv_free(tokens);
404 }
405
406
407 opal_argv_free(lines);
408 return NULL;
409 }
410
411 static int rsh_launch_agent_lookup(const char *agent_list, char *path)
412 {
413 char *bname;
414 int i;
415
416 OPAL_OUTPUT_VERBOSE((5, orte_plm_base_framework.framework_output,
417 "%s plm:rsh_lookup on agent %s path %s",
418 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
419 (NULL == agent_list) ? mca_plm_rsh_component.agent : agent_list,
420 (NULL == path) ? "NULL" : path));
421 if (NULL == (mca_plm_rsh_component.agent_argv = orte_plm_rsh_search(agent_list, path))) {
422 return ORTE_ERR_NOT_FOUND;
423 }
424
425
426
427 mca_plm_rsh_component.agent_path = strdup(mca_plm_rsh_component.agent_argv[0]);
428 bname = opal_basename(mca_plm_rsh_component.agent_argv[0]);
429 if (NULL == bname) {
430 return ORTE_SUCCESS;
431 }
432
433 free(mca_plm_rsh_component.agent_argv[0]);
434 mca_plm_rsh_component.agent_argv[0] = bname;
435
436 if (0 == strcmp(bname, "ssh")) {
437
438 if (NULL != orte_xterm) {
439 opal_argv_append_unique_nosize(&mca_plm_rsh_component.agent_argv, "-X", false);
440 } else if (0 >= opal_output_get_verbosity(orte_plm_base_framework.framework_output)) {
441
442
443
444
445 for (i = 1; NULL != mca_plm_rsh_component.agent_argv[i]; ++i) {
446 if (0 == strcasecmp("-x", mca_plm_rsh_component.agent_argv[i])) {
447 break;
448 }
449 }
450 if (NULL == mca_plm_rsh_component.agent_argv[i]) {
451 opal_argv_append_nosize(&mca_plm_rsh_component.agent_argv, "-x");
452 }
453 }
454 }
455 return ORTE_SUCCESS;
456 }