This source file includes following definitions.
- orte_ess_base_proc_binding
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 #include "orte_config.h"
26 #include "orte/constants.h"
27
28 #ifdef HAVE_UNISTD_H
29 #include <unistd.h>
30 #endif
31 #include <stdlib.h>
32 #include <errno.h>
33
34 #include "opal/util/output.h"
35 #include "opal/mca/pmix/pmix.h"
36 #include "opal/mca/hwloc/base/base.h"
37
38 #include "orte/mca/errmgr/errmgr.h"
39 #include "orte/util/name_fns.h"
40 #include "orte/util/proc_info.h"
41 #include "orte/util/show_help.h"
42 #include "orte/runtime/orte_globals.h"
43
44 #include "orte/mca/ess/base/base.h"
45
46 int orte_ess_base_proc_binding(void)
47 {
48 hwloc_obj_t node, obj;
49 hwloc_cpuset_t cpus, nodeset;
50 hwloc_obj_type_t target;
51 unsigned int cache_level = 0;
52 struct hwloc_topology_support *support;
53 char *map;
54 int ret;
55 char *error=NULL;
56 hwloc_cpuset_t mycpus;
57
58
59
60 if (NULL != getenv(OPAL_MCA_PREFIX"orte_bound_at_launch")) {
61 orte_proc_is_bound = true;
62 if (NULL != (map = getenv(OPAL_MCA_PREFIX"orte_base_applied_binding"))) {
63 orte_proc_applied_binding = hwloc_bitmap_alloc();
64 if (0 != (ret = hwloc_bitmap_list_sscanf(orte_proc_applied_binding, map))) {
65 error = "applied_binding parse";
66 goto error;
67 }
68 }
69 if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
70
71 map = NULL;
72 OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
73 ORTE_PROC_MY_NAME, &map, OPAL_STRING);
74 if (OPAL_SUCCESS == ret && NULL != map) {
75 opal_output(0, "MCW rank %s bound to %s",
76 ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid), map);
77 free(map);
78 } else {
79 opal_output(0, "MCW rank %s not bound", ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid));
80 }
81 }
82 return ORTE_SUCCESS;
83 } else if (NULL != getenv(OPAL_MCA_PREFIX"orte_externally_bound")) {
84 orte_proc_is_bound = true;
85
86 map = NULL;
87 OPAL_MODEX_RECV_VALUE_OPTIONAL(ret, OPAL_PMIX_LOCALITY_STRING,
88 ORTE_PROC_MY_NAME, &map, OPAL_STRING);
89 if (OPAL_SUCCESS == ret && NULL != map) {
90
91 if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
92 opal_output(0, "MCW rank %s bound to %s",
93 ORTE_VPID_PRINT(ORTE_PROC_MY_NAME->vpid), map);
94 }
95 free(map);
96 return ORTE_SUCCESS;
97 }
98
99 }
100
101
102 if (OPAL_SUCCESS != opal_hwloc_base_get_topology()) {
103
104 return ORTE_SUCCESS;
105 }
106
107
108 if (!orte_proc_is_bound) {
109 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
110 "%s Not bound at launch",
111 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
112 support = (struct hwloc_topology_support*)hwloc_topology_get_support(opal_hwloc_topology);
113
114 node = hwloc_get_root_obj(opal_hwloc_topology);
115 nodeset = node->cpuset;
116
117 cpus = hwloc_bitmap_alloc();
118 if (hwloc_get_cpubind(opal_hwloc_topology, cpus, HWLOC_CPUBIND_PROCESS) < 0) {
119
120
121
122 hwloc_bitmap_free(cpus);
123 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
124 "%s Binding not supported",
125 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
126 goto MOVEON;
127 }
128
129
130
131 if (0 != hwloc_bitmap_compare(cpus, nodeset) ||
132 opal_hwloc_base_single_cpu(nodeset) ||
133 opal_hwloc_base_single_cpu(cpus)) {
134
135
136
137 orte_proc_is_bound = true;
138 hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
139 hwloc_bitmap_free(cpus);
140 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
141 "%s Process was externally bound",
142 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
143 } else if (support->cpubind->set_thisproc_cpubind &&
144 OPAL_BINDING_POLICY_IS_SET(opal_hwloc_binding_policy) &&
145 OPAL_BIND_TO_NONE != OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
146
147
148
149 hwloc_bitmap_zero(cpus);
150 if (OPAL_BIND_TO_CPUSET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
151 if (OPAL_SUCCESS != (ret = opal_hwloc_base_cpu_list_parse(opal_hwloc_base_cpu_list,
152 opal_hwloc_topology,
153 OPAL_HWLOC_LOGICAL, cpus))) {
154 error = "Setting processor affinity failed";
155 hwloc_bitmap_free(cpus);
156 goto error;
157 }
158 if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
159 error = "Setting processor affinity failed";
160 hwloc_bitmap_free(cpus);
161 goto error;
162 }
163 hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
164 hwloc_bitmap_free(cpus);
165 orte_proc_is_bound = true;
166 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
167 "%s Process bound according to slot_list",
168 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
169 } else {
170
171 hwloc_bitmap_free(cpus);
172
173 if (ORTE_NODE_RANK_INVALID == orte_process_info.my_node_rank) {
174
175
176
177
178 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
179 "%s Process not bound - no node rank available",
180 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
181 goto MOVEON;
182 }
183
184
185
186 if (OPAL_BIND_TO_HWTHREAD == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
187 if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_PU,
188 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
189 ret = ORTE_ERR_NOT_FOUND;
190 error = "Getting hwthread object";
191 goto error;
192 }
193 cpus = obj->cpuset;
194 if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
195 ret = ORTE_ERROR;
196 error = "Setting processor affinity failed";
197 goto error;
198 }
199 hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
200 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
201 "%s Process bound to hwthread",
202 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
203 } else if (OPAL_BIND_TO_CORE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
204
205
206
207 if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
208 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
209 ret = ORTE_ERR_NOT_FOUND;
210 error = "Getting core object";
211 goto error;
212 }
213 cpus = obj->cpuset;
214 if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
215 error = "Setting processor affinity failed";
216 ret = ORTE_ERROR;
217 goto error;
218 }
219 hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
220 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
221 "%s Process bound to core",
222 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
223 } else {
224
225
226
227 if (NULL == (obj = opal_hwloc_base_get_obj_by_type(opal_hwloc_topology, HWLOC_OBJ_CORE,
228 0, orte_process_info.my_node_rank, OPAL_HWLOC_LOGICAL))) {
229 ret = ORTE_ERR_NOT_FOUND;
230 error = "Getting core object";
231 goto error;
232 }
233 if (OPAL_BIND_TO_L1CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
234 OPAL_HWLOC_MAKE_OBJ_CACHE(1, target, cache_level);
235 } else if (OPAL_BIND_TO_L2CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
236 OPAL_HWLOC_MAKE_OBJ_CACHE(2, target, cache_level);
237 } else if (OPAL_BIND_TO_L3CACHE == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
238 OPAL_HWLOC_MAKE_OBJ_CACHE(3, target, cache_level);
239 } else if (OPAL_BIND_TO_SOCKET == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
240 target = HWLOC_OBJ_SOCKET;
241 } else if (OPAL_BIND_TO_NUMA == OPAL_GET_BINDING_POLICY(opal_hwloc_binding_policy)) {
242 target = HWLOC_OBJ_NODE;
243 } else {
244 ret = ORTE_ERR_NOT_FOUND;
245 error = "Binding policy not known";
246 goto error;
247 }
248 for (obj = obj->parent; NULL != obj; obj = obj->parent) {
249 if (target == obj->type) {
250 #if HWLOC_API_VERSION < 0x20000
251 if (HWLOC_OBJ_CACHE == target && cache_level != obj->attr->cache.depth) {
252 continue;
253 }
254 #else
255
256 ++cache_level;
257 #endif
258
259 cpus = obj->cpuset;
260 if (0 > hwloc_set_cpubind(opal_hwloc_topology, cpus, 0)) {
261 ret = ORTE_ERROR;
262 error = "Setting processor affinity failed";
263 goto error;
264 }
265 hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, cpus);
266 orte_proc_is_bound = true;
267 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
268 "%s Process bound to %s",
269 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME),
270 hwloc_obj_type_string(target)));
271 break;
272 }
273 }
274 if (!orte_proc_is_bound) {
275 ret = ORTE_ERROR;
276 error = "Setting processor affinity failed";
277 goto error;
278 }
279 }
280 }
281 }
282 } else {
283 OPAL_OUTPUT_VERBOSE((5, orte_ess_base_framework.framework_output,
284 "%s Process bound at launch",
285 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME)));
286 }
287
288 MOVEON:
289
290 mycpus = hwloc_bitmap_alloc();
291 if (hwloc_get_cpubind(opal_hwloc_topology,
292 mycpus,
293 HWLOC_CPUBIND_PROCESS) < 0) {
294 if (NULL != orte_process_info.cpuset) {
295 free(orte_process_info.cpuset);
296 orte_process_info.cpuset = NULL;
297 }
298 if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
299 opal_output(0, "MCW rank %d is not bound",
300 ORTE_PROC_MY_NAME->vpid);
301 }
302 } else {
303
304 if (NULL != orte_process_info.cpuset) {
305 free(orte_process_info.cpuset);
306 orte_process_info.cpuset = NULL;
307 }
308 hwloc_bitmap_list_asprintf(&orte_process_info.cpuset, mycpus);
309
310 if (opal_hwloc_report_bindings || 4 < opal_output_get_verbosity(orte_ess_base_framework.framework_output)) {
311 char tmp1[1024], tmp2[1024];
312 if (OPAL_ERR_NOT_BOUND == opal_hwloc_base_cset2str(tmp1, sizeof(tmp1), opal_hwloc_topology, mycpus)) {
313 opal_output(0, "MCW rank %d is not bound (or bound to all available processors)", ORTE_PROC_MY_NAME->vpid);
314 } else {
315 opal_hwloc_base_cset2mapstr(tmp2, sizeof(tmp2), opal_hwloc_topology, mycpus);
316 opal_output(0, "MCW rank %d bound to %s: %s",
317 ORTE_PROC_MY_NAME->vpid, tmp1, tmp2);
318 }
319 }
320 }
321 hwloc_bitmap_free(mycpus);
322
323 if (NULL != orte_process_info.cpuset) {
324 OPAL_MODEX_SEND_VALUE(ret, OPAL_PMIX_GLOBAL, OPAL_PMIX_CPUSET,
325 orte_process_info.cpuset, OPAL_STRING);
326 }
327 return ORTE_SUCCESS;
328
329 error:
330 if (ORTE_ERR_SILENT != ret) {
331 orte_show_help("help-orte-runtime",
332 "orte_init:startup:internal-failure",
333 true, error, ORTE_ERROR_NAME(ret), ret);
334 }
335
336 return ORTE_ERR_SILENT;
337 }