This source file includes following definitions.
- allocate
- finalize
- discover
- tm_getline
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21 #include "orte_config.h"
22 #include "orte/constants.h"
23 #include "orte/types.h"
24
25 #include <errno.h>
26 #include <unistd.h>
27 #include <string.h>
28
29 #include "orte/util/show_help.h"
30 #include "opal/util/os_path.h"
31 #include "opal/util/net.h"
32
33 #include "orte/mca/errmgr/errmgr.h"
34 #include "orte/runtime/orte_globals.h"
35 #include "orte/util/name_fns.h"
36
37 #include "orte/mca/ras/base/ras_private.h"
38 #include "ras_tm.h"
39
40
41
42
43
44 static int allocate(orte_job_t *jdata, opal_list_t *nodes);
45 static int finalize(void);
46
47 static int discover(opal_list_t* nodelist, char *pbs_jobid);
48 static char *tm_getline(FILE *fp);
49
50 #define TM_FILE_MAX_LINE_LENGTH 512
51
52 static char *filename;
53
54
55
56
57 orte_ras_base_module_t orte_ras_tm_module = {
58 NULL,
59 allocate,
60 NULL,
61 finalize
62 };
63
64
65
66
67
68
69
70 static int allocate(orte_job_t *jdata, opal_list_t *nodes)
71 {
72 int ret;
73 char *pbs_jobid;
74
75
76 if (NULL == (pbs_jobid = getenv("PBS_JOBID"))) {
77 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
78 return ORTE_ERR_NOT_FOUND;
79 }
80
81
82
83
84 orte_job_ident = strdup(pbs_jobid);
85
86 if (ORTE_SUCCESS != (ret = discover(nodes, pbs_jobid))) {
87 ORTE_ERROR_LOG(ret);
88 return ret;
89 }
90
91
92
93
94 if (opal_list_is_empty(nodes)) {
95 orte_show_help("help-ras-tm.txt", "no-nodes-found", true, filename);
96 return ORTE_ERR_NOT_FOUND;
97 }
98
99
100 return ORTE_SUCCESS;
101 }
102
103
104
105
106 static int finalize(void)
107 {
108 OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
109 "%s ras:tm:finalize: success (nothing to do)",
110 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
111 return ORTE_SUCCESS;
112 }
113
114
115
116
117
118
119
120
121
122
123
124 static int discover(opal_list_t* nodelist, char *pbs_jobid)
125 {
126 int32_t nodeid;
127 orte_node_t *node;
128 opal_list_item_t* item;
129 FILE *fp;
130 char *hostname, *cppn;
131 int ppn;
132 char *ptr;
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147 if (mca_ras_tm_component.smp_mode) {
148 if (NULL == (cppn = getenv("PBS_PPN"))) {
149 orte_show_help("help-ras-tm.txt", "smp-error", true);
150 return ORTE_ERR_NOT_FOUND;
151 }
152 ppn = strtol(cppn, NULL, 10);
153 } else {
154 ppn = 1;
155 }
156
157
158 filename = opal_os_path(false, mca_ras_tm_component.nodefile_dir,
159 pbs_jobid, NULL);
160 fp = fopen(filename, "r");
161 if (NULL == fp) {
162 ORTE_ERROR_LOG(ORTE_ERR_FILE_OPEN_FAILURE);
163 free(filename);
164 return ORTE_ERR_FILE_OPEN_FAILURE;
165 }
166
167
168
169
170
171
172 nodeid=0;
173 while (NULL != (hostname = tm_getline(fp))) {
174 if( !orte_keep_fqdn_hostnames && !opal_net_isaddr(hostname) ) {
175 if (NULL != (ptr = strchr(hostname, '.'))) {
176 *ptr = '\0';
177 }
178 }
179
180 OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
181 "%s ras:tm:allocate:discover: got hostname %s",
182 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), hostname));
183
184
185
186
187 for (item = opal_list_get_first(nodelist);
188 opal_list_get_end(nodelist) != item;
189 item = opal_list_get_next(item)) {
190 node = (orte_node_t*) item;
191 if (0 == strcmp(node->name, hostname)) {
192 if (mca_ras_tm_component.smp_mode) {
193
194 orte_show_help("help-ras-tm.txt", "smp-multi", true);
195 return ORTE_ERR_BAD_PARAM;
196 }
197 ++node->slots;
198
199 OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
200 "%s ras:tm:allocate:discover: found -- bumped slots to %d",
201 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->slots));
202
203 break;
204 }
205 }
206
207
208
209 if (opal_list_get_end(nodelist) == item) {
210
211
212
213 OPAL_OUTPUT_VERBOSE((1, orte_ras_base_framework.framework_output,
214 "%s ras:tm:allocate:discover: not found -- added to list",
215 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
216
217 node = OBJ_NEW(orte_node_t);
218 node->name = hostname;
219 orte_set_attribute(&node->attributes, ORTE_NODE_LAUNCH_ID, ORTE_ATTR_LOCAL, &nodeid, OPAL_INT32);
220 node->slots_inuse = 0;
221 node->slots_max = 0;
222 node->slots = ppn;
223 node->state = ORTE_NODE_STATE_UP;
224 opal_list_append(nodelist, &node->super);
225 } else {
226
227
228 free(hostname);
229 }
230
231
232 nodeid++;
233 }
234 fclose(fp);
235
236 return ORTE_SUCCESS;
237 }
238
239 static char *tm_getline(FILE *fp)
240 {
241 char *ret, *buff;
242 char input[TM_FILE_MAX_LINE_LENGTH];
243
244 ret = fgets(input, TM_FILE_MAX_LINE_LENGTH, fp);
245 if (NULL != ret) {
246 input[strlen(input)-1] = '\0';
247 buff = strdup(input);
248 return buff;
249 }
250
251 return NULL;
252 }
253