This source file includes following definitions.
- orte_ras_base_node_insert
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24 #include "orte_config.h"
25 #include "orte/constants.h"
26
27 #include <string.h>
28
29 #include "opal/util/argv.h"
30 #include "opal/util/if.h"
31
32 #include "orte/mca/errmgr/errmgr.h"
33 #include "orte/mca/rmaps/base/base.h"
34 #include "orte/util/name_fns.h"
35 #include "orte/runtime/orte_globals.h"
36
37 #include "orte/mca/ras/base/ras_private.h"
38
39
40
41
42
43 int orte_ras_base_node_insert(opal_list_t* nodes, orte_job_t *jdata)
44 {
45 opal_list_item_t* item;
46 orte_std_cntr_t num_nodes;
47 int rc, i;
48 orte_node_t *node, *hnp_node, *nptr;
49 char *ptr;
50 bool hnp_alone = true, skiphnp = false;
51 orte_attribute_t *kv;
52 char **alias=NULL, **nalias;
53 orte_proc_t *daemon;
54 orte_job_t *djob;
55
56
57 num_nodes = (orte_std_cntr_t)opal_list_get_size(nodes);
58 if (0 == num_nodes) {
59 return ORTE_SUCCESS;
60 }
61
62 OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
63 "%s ras:base:node_insert inserting %ld nodes",
64 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
65 (long)num_nodes));
66
67
68 if (1 < orte_ras_base.multiplier) {
69 orte_set_attribute(&jdata->attributes, ORTE_JOB_MULTI_DAEMON_SIM,
70 ORTE_ATTR_GLOBAL, NULL, OPAL_BOOL);
71 }
72
73
74
75
76 if (ORTE_SUCCESS != (rc = opal_pointer_array_set_size(orte_node_pool, num_nodes * orte_ras_base.multiplier))) {
77 ORTE_ERROR_LOG(rc);
78 return rc;
79 }
80
81
82 djob = orte_get_job_data_object(ORTE_PROC_MY_NAME->jobid);
83
84
85 hnp_node = (orte_node_t*)opal_pointer_array_get_item(orte_node_pool, 0);
86
87 if ((orte_ras_base.launch_orted_on_hn == true) &&
88 (orte_managed_allocation)) {
89 if (NULL != hnp_node) {
90 OPAL_LIST_FOREACH(node, nodes, orte_node_t) {
91 if (orte_ifislocal(node->name)) {
92 orte_hnp_is_allocated = true;
93 break;
94 }
95 }
96 if (orte_hnp_is_allocated && !(ORTE_GET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping) &
97 ORTE_MAPPING_NO_USE_LOCAL)) {
98 hnp_node->name = strdup("mpirun");
99 skiphnp = true;
100 ORTE_SET_MAPPING_DIRECTIVE(orte_rmaps_base.mapping, ORTE_MAPPING_NO_USE_LOCAL);
101 }
102 }
103 }
104
105
106 while (NULL != (item = opal_list_remove_first(nodes))) {
107 node = (orte_node_t*)item;
108
109
110
111
112
113 if (!skiphnp && NULL != hnp_node && orte_ifislocal(node->name)) {
114 OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
115 "%s ras:base:node_insert updating HNP [%s] info to %ld slots",
116 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
117 node->name,
118 (long)node->slots));
119
120
121 orte_hnp_is_allocated = true;
122
123 orte_ras_base.total_slots_alloc += node->slots;
124
125 hnp_node->slots = node->slots;
126 hnp_node->slots_max = node->slots_max;
127
128 OPAL_LIST_FOREACH(kv, &node->attributes, orte_attribute_t) {
129 orte_set_attribute(&node->attributes, kv->key, ORTE_ATTR_LOCAL, &kv->data, kv->type);
130 }
131 if (orte_managed_allocation || ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
132
133
134
135 ORTE_FLAG_SET(hnp_node, ORTE_NODE_FLAG_SLOTS_GIVEN);
136 } else {
137 ORTE_FLAG_UNSET(hnp_node, ORTE_NODE_FLAG_SLOTS_GIVEN);
138 }
139
140
141
142
143 if (orte_show_resolved_nodenames) {
144
145 if (0 != strcmp(node->name, hnp_node->name)) {
146
147 ptr = NULL;
148 orte_get_attribute(&hnp_node->attributes, ORTE_NODE_ALIAS, (void**)&ptr, OPAL_STRING);
149 if (NULL != ptr) {
150 alias = opal_argv_split(ptr, ',');
151 free(ptr);
152 }
153
154 opal_argv_append_unique_nosize(&alias, node->name, false);
155 }
156 if (orte_get_attribute(&node->attributes, ORTE_NODE_ALIAS, (void**)&ptr, OPAL_STRING)) {
157 nalias = opal_argv_split(ptr, ',');
158
159 for (i=0; NULL != nalias[i]; i++) {
160 opal_argv_append_unique_nosize(&alias, nalias[i], false);
161 }
162 opal_argv_free(nalias);
163 }
164
165 if (0 < opal_argv_count(alias)) {
166 ptr = opal_argv_join(alias, ',');
167 orte_set_attribute(&hnp_node->attributes, ORTE_NODE_ALIAS, ORTE_ATTR_LOCAL, ptr, OPAL_STRING);
168 free(ptr);
169 }
170 opal_argv_free(alias);
171 }
172
173 OBJ_RELEASE(node);
174
175 for (i=1; i < orte_ras_base.multiplier; i++) {
176 opal_dss.copy((void**)&node, hnp_node, ORTE_NODE);
177 ORTE_FLAG_UNSET(node, ORTE_NODE_FLAG_DAEMON_LAUNCHED);
178 node->index = opal_pointer_array_add(orte_node_pool, node);
179 }
180 } else {
181
182 OPAL_OUTPUT_VERBOSE((5, orte_ras_base_framework.framework_output,
183 "%s ras:base:node_insert node %s slots %d",
184 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
185 (NULL == node->name) ? "NULL" : node->name,
186 node->slots));
187 if (orte_managed_allocation) {
188
189
190
191 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_SLOTS_GIVEN);
192 }
193
194 node->index = opal_pointer_array_add(orte_node_pool, (void*)node);
195 if (ORTE_SUCCESS > (rc = node->index)) {
196 ORTE_ERROR_LOG(rc);
197 return rc;
198 }
199 if (orte_do_not_launch) {
200
201
202
203 daemon = OBJ_NEW(orte_proc_t);
204 daemon->name.jobid = ORTE_PROC_MY_NAME->jobid;
205 daemon->name.vpid = node->index;
206 daemon->state = ORTE_PROC_STATE_RUNNING;
207 OBJ_RETAIN(node);
208 daemon->node = node;
209 opal_pointer_array_set_item(djob->procs, daemon->name.vpid, daemon);
210 djob->num_procs++;
211 OBJ_RETAIN(daemon);
212 node->daemon = daemon;
213 }
214
215 orte_ras_base.total_slots_alloc += node->slots;
216
217 if (NULL != strchr(node->name, '.')) {
218 orte_have_fqdn_allocation = true;
219 }
220
221 hnp_alone = false;
222 for (i=1; i < orte_ras_base.multiplier; i++) {
223 opal_dss.copy((void**)&nptr, node, ORTE_NODE);
224 nptr->index = opal_pointer_array_add(orte_node_pool, nptr);
225 }
226 }
227 }
228
229
230
231
232
233 if (NULL != hnp_node && !orte_have_fqdn_allocation && !hnp_alone) {
234 if (NULL != (ptr = strchr(hnp_node->name, '.'))) {
235 *ptr = '\0';
236 }
237 }
238
239 return ORTE_SUCCESS;
240 }