This source file includes following definitions.
- orte_rmaps_rr_byslot
- orte_rmaps_rr_bynode
- orte_rmaps_rr_byobj
- byobj_span
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23 #include "orte_config.h"
24 #include "orte/constants.h"
25
26 #include <string.h>
27
28 #include "opal/util/output.h"
29 #include "opal/mca/hwloc/base/base.h"
30
31 #include "orte/util/show_help.h"
32 #include "orte/util/name_fns.h"
33 #include "orte/runtime/orte_globals.h"
34 #include "orte/mca/errmgr/errmgr.h"
35
36 #include "orte/mca/rmaps/base/rmaps_private.h"
37 #include "orte/mca/rmaps/base/base.h"
38 #include "rmaps_rr.h"
39
40 int orte_rmaps_rr_byslot(orte_job_t *jdata,
41 orte_app_context_t *app,
42 opal_list_t *node_list,
43 orte_std_cntr_t num_slots,
44 orte_vpid_t num_procs)
45 {
46 int i, nprocs_mapped;
47 orte_node_t *node;
48 orte_proc_t *proc;
49 int num_procs_to_assign, extra_procs_to_assign=0, nxtra_nodes=0;
50 hwloc_obj_t obj=NULL;
51 float balance;
52 bool add_one=false;
53
54 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
55 "mca:rmaps:rr: mapping by slot for job %s slots %d num_procs %lu",
56 ORTE_JOBID_PRINT(jdata->jobid), (int)num_slots, (unsigned long)num_procs);
57
58
59 if (num_slots < (int)app->num_procs) {
60 if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
61 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
62 true, app->num_procs, app->app, orte_process_info.nodename);
63 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
64 return ORTE_ERR_SILENT;
65 }
66 }
67
68
69
70
71 nprocs_mapped = 0;
72 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
73 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
74 "mca:rmaps:rr:slot working node %s",
75 node->name);
76
77
78
79 if (NULL != node->topology && NULL != node->topology->topo) {
80 obj = hwloc_get_root_obj(node->topology->topo);
81 }
82 if (node->slots <= node->slots_inuse) {
83 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
84 "mca:rmaps:rr:slot node %s is full - skipping",
85 node->name);
86 continue;
87 }
88 if (orte_rmaps_base_pernode) {
89 num_procs_to_assign = 1;
90 } else if (0 < orte_rmaps_base_n_pernode) {
91 num_procs_to_assign = orte_rmaps_base_n_pernode;
92 } else if (0 < orte_rmaps_base_n_persocket) {
93 if (NULL == node->topology) {
94 orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
95 true, node->name);
96 return ORTE_ERR_SILENT;
97 }
98 num_procs_to_assign = orte_rmaps_base_n_persocket * opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, 0, OPAL_HWLOC_AVAILABLE);
99 } else {
100
101 num_procs_to_assign = node->slots - node->slots_inuse;
102 }
103 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
104 "mca:rmaps:rr:slot assigning %d procs to node %s",
105 (int)num_procs_to_assign, node->name);
106
107 for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) {
108
109 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
110 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
111 OBJ_RETAIN(node);
112 opal_pointer_array_add(jdata->map->nodes, node);
113 ++(jdata->map->num_nodes);
114 }
115 if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
116 return ORTE_ERR_OUT_OF_RESOURCE;
117 }
118 nprocs_mapped++;
119 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
120 }
121 }
122
123 if (nprocs_mapped == app->num_procs) {
124
125 return ORTE_SUCCESS;
126 }
127
128 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
129 "mca:rmaps:rr:slot job %s is oversubscribed - performing second pass",
130 ORTE_JOBID_PRINT(jdata->jobid));
131
132
133
134
135
136 balance = (float)((int)app->num_procs - nprocs_mapped) / (float)opal_list_get_size(node_list);
137 extra_procs_to_assign = (int)balance;
138 if (0 < (balance - (float)extra_procs_to_assign)) {
139
140 nxtra_nodes = app->num_procs - nprocs_mapped - (extra_procs_to_assign * opal_list_get_size(node_list));
141
142
143
144 extra_procs_to_assign++;
145
146 add_one = true;
147 }
148
149 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
150 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
151 "mca:rmaps:rr:slot working node %s",
152 node->name);
153
154
155
156 if (NULL != node->topology && NULL != node->topology->topo) {
157 obj = hwloc_get_root_obj(node->topology->topo);
158 }
159
160
161 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
162 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
163 OBJ_RETAIN(node);
164 opal_pointer_array_add(jdata->map->nodes, node);
165 ++(jdata->map->num_nodes);
166 }
167 if (add_one) {
168 if (0 == nxtra_nodes) {
169 --extra_procs_to_assign;
170 add_one = false;
171 } else {
172 --nxtra_nodes;
173 }
174 }
175 if(node->slots <= node->slots_inuse) {
176
177 num_procs_to_assign = extra_procs_to_assign;
178 }
179 else {
180
181 num_procs_to_assign = node->slots - node->slots_inuse + extra_procs_to_assign;
182 }
183 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
184 "mca:rmaps:rr:slot adding up to %d procs to node %s",
185 num_procs_to_assign, node->name);
186 for (i=0; i < num_procs_to_assign && nprocs_mapped < app->num_procs; i++) {
187 if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
188 return ORTE_ERR_OUT_OF_RESOURCE;
189 }
190 nprocs_mapped++;
191 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
192 }
193
194
195
196 if (node->slots < (int)node->num_procs) {
197
198
199
200 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
201 ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
202
203 if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
204
205
206
207 if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
208 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
209 true, app->num_procs, app->app, orte_process_info.nodename);
210 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
211 return ORTE_ERR_SILENT;
212 } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
213
214 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
215 true, app->num_procs, app->app, orte_process_info.nodename);
216 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
217 return ORTE_ERR_SILENT;
218 }
219 }
220 }
221
222 if (nprocs_mapped == app->num_procs) {
223 break;
224 }
225 }
226 return ORTE_SUCCESS;
227 }
228
229 int orte_rmaps_rr_bynode(orte_job_t *jdata,
230 orte_app_context_t *app,
231 opal_list_t *node_list,
232 orte_std_cntr_t num_slots,
233 orte_vpid_t num_procs)
234 {
235 int j, nprocs_mapped, nnodes;
236 orte_node_t *node;
237 orte_proc_t *proc;
238 int num_procs_to_assign, navg;
239 int extra_procs_to_assign=0, nxtra_nodes=0;
240 hwloc_obj_t obj=NULL;
241 float balance;
242 bool add_one=false;
243 bool oversubscribed=false;
244
245 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
246 "mca:rmaps:rr: mapping by node for job %s app %d slots %d num_procs %lu",
247 ORTE_JOBID_PRINT(jdata->jobid), (int)app->idx,
248 (int)num_slots, (unsigned long)num_procs);
249
250
251 if (num_slots < (int)app->num_procs) {
252 if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
253 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
254 true, app->num_procs, app->app, orte_process_info.nodename);
255 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
256 return ORTE_ERR_SILENT;
257 }
258 oversubscribed = true;
259 }
260
261 nnodes = opal_list_get_size(node_list);
262 nprocs_mapped = 0;
263
264 do {
265
266
267
268
269
270
271
272 navg = ((int)app->num_procs - nprocs_mapped) / nnodes;
273 if (0 == navg) {
274
275
276
277 navg = 1;
278 }
279
280
281 balance = (float)(((int)app->num_procs - nprocs_mapped) - (navg * nnodes)) / (float)nnodes;
282 extra_procs_to_assign = (int)balance;
283 nxtra_nodes = 0;
284 add_one = false;
285 if (0 < (balance - (float)extra_procs_to_assign)) {
286
287 nxtra_nodes = ((int)app->num_procs - nprocs_mapped) - ((navg + extra_procs_to_assign) * nnodes);
288
289
290
291 extra_procs_to_assign++;
292
293 add_one = true;
294 }
295
296 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
297 "mca:rmaps:rr: mapping by node navg %d extra_procs %d extra_nodes %d",
298 navg, extra_procs_to_assign, nxtra_nodes);
299
300 nnodes = 0;
301 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
302
303
304
305 if (NULL != node->topology && NULL != node->topology->topo) {
306 obj = hwloc_get_root_obj(node->topology->topo);
307 }
308
309 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
310 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
311 OBJ_RETAIN(node);
312 opal_pointer_array_add(jdata->map->nodes, node);
313 ++(jdata->map->num_nodes);
314 }
315 if (orte_rmaps_base_pernode) {
316 num_procs_to_assign = 1;
317 } else if (0 < orte_rmaps_base_n_pernode) {
318 num_procs_to_assign = orte_rmaps_base_n_pernode;
319 } else if (0 < orte_rmaps_base_n_persocket) {
320 num_procs_to_assign = orte_rmaps_base_n_persocket * opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, 0, OPAL_HWLOC_AVAILABLE);
321 } else if (oversubscribed) {
322
323 if (add_one) {
324 if (0 == nxtra_nodes) {
325 --extra_procs_to_assign;
326 add_one = false;
327 } else {
328 --nxtra_nodes;
329 }
330 }
331
332 num_procs_to_assign = navg + extra_procs_to_assign;
333 } else if (node->slots <= node->slots_inuse) {
334
335 continue;
336 } else {
337
338
339
340
341
342
343
344
345 if (add_one) {
346 if (0 == nxtra_nodes) {
347 --extra_procs_to_assign;
348 add_one = false;
349 } else {
350 --nxtra_nodes;
351 }
352 }
353
354 if ((node->slots - node->slots_inuse) < (navg + extra_procs_to_assign)) {
355 num_procs_to_assign = node->slots - node->slots_inuse;
356
357 if (num_procs_to_assign == 0) {
358 continue;
359 }
360 } else {
361
362 num_procs_to_assign = navg + extra_procs_to_assign;
363 }
364 OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
365 "%s NODE %s AVG %d ASSIGN %d EXTRA %d",
366 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
367 navg, num_procs_to_assign, extra_procs_to_assign));
368 }
369 nnodes++;
370 OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
371 "%s NODE %s ASSIGNING %d",
372 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name,
373 num_procs_to_assign));
374 for (j=0; j < num_procs_to_assign && nprocs_mapped < app->num_procs; j++) {
375 if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
376 return ORTE_ERR_OUT_OF_RESOURCE;
377 }
378 nprocs_mapped++;
379 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
380 }
381
382
383
384 if (node->slots < (int)node->num_procs) {
385
386
387
388 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
389 ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
390
391 if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
392
393
394
395 if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
396 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
397 true, app->num_procs, app->app, orte_process_info.nodename);
398 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
399 return ORTE_ERR_SILENT;
400 } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
401
402 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
403 true, app->num_procs, app->app, orte_process_info.nodename);
404 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
405 return ORTE_ERR_SILENT;
406 }
407 }
408 }
409 if (nprocs_mapped == app->num_procs) {
410
411 break;
412 }
413 }
414 } while (nprocs_mapped < app->num_procs && 0 < nnodes);
415
416
417 while (nprocs_mapped < app->num_procs) {
418 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
419
420
421
422 if (NULL != node->topology && NULL != node->topology->topo) {
423 obj = hwloc_get_root_obj(node->topology->topo);
424 }
425
426 OPAL_OUTPUT_VERBOSE((20, orte_rmaps_base_framework.framework_output,
427 "%s ADDING PROC TO NODE %s",
428 ORTE_NAME_PRINT(ORTE_PROC_MY_NAME), node->name));
429 if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
430 return ORTE_ERR_OUT_OF_RESOURCE;
431 }
432 nprocs_mapped++;
433 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
434
435
436
437 if (node->slots < (int)node->num_procs) {
438
439
440
441 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
442 ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
443 }
444 if (nprocs_mapped == app->num_procs) {
445
446 break;
447 }
448 }
449 }
450
451 return ORTE_SUCCESS;
452 }
453
454 static int byobj_span(orte_job_t *jdata,
455 orte_app_context_t *app,
456 opal_list_t *node_list,
457 orte_std_cntr_t num_slots,
458 orte_vpid_t num_procs,
459 hwloc_obj_type_t target, unsigned cache_level);
460
461
462
463
464
465 int orte_rmaps_rr_byobj(orte_job_t *jdata,
466 orte_app_context_t *app,
467 opal_list_t *node_list,
468 orte_std_cntr_t num_slots,
469 orte_vpid_t num_procs,
470 hwloc_obj_type_t target, unsigned cache_level)
471 {
472 int i, nmapped, nprocs_mapped;
473 orte_node_t *node;
474 orte_proc_t *proc;
475 int nprocs, start;
476 hwloc_obj_t obj=NULL;
477 unsigned int nobjs;
478 bool add_one;
479 bool second_pass;
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495 if (ORTE_MAPPING_SPAN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
496 return byobj_span(jdata, app, node_list, num_slots,
497 num_procs, target, cache_level);
498 }
499
500 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
501 "mca:rmaps:rr: mapping no-span by %s for job %s slots %d num_procs %lu",
502 hwloc_obj_type_string(target),
503 ORTE_JOBID_PRINT(jdata->jobid),
504 (int)num_slots, (unsigned long)num_procs);
505
506
507 if (num_slots < app->num_procs) {
508 if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
509 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
510 true, app->num_procs, app->app, orte_process_info.nodename);
511 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
512 return ORTE_ERR_SILENT;
513 }
514 }
515
516
517
518
519
520
521 nprocs_mapped = 0;
522 second_pass = false;
523 do {
524 add_one = false;
525 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
526 if (NULL == node->topology || NULL == node->topology->topo) {
527 orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
528 true, node->name);
529 return ORTE_ERR_SILENT;
530 }
531 start = 0;
532
533 nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE);
534 if (0 == nobjs) {
535 continue;
536 }
537 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
538 "mca:rmaps:rr: found %u %s objects on node %s",
539 nobjs, hwloc_obj_type_string(target), node->name);
540
541
542
543 if (ORTE_JOBID_INVALID != jdata->originator.jobid) {
544 start = (jdata->bkmark_obj + 1) % nobjs;
545 }
546
547 if (orte_rmaps_base_pernode) {
548 nprocs = 1;
549 } else if (0 < orte_rmaps_base_n_pernode) {
550 nprocs = orte_rmaps_base_n_pernode;
551 } else if (0 < orte_rmaps_base_n_persocket) {
552 if (HWLOC_OBJ_PACKAGE == target) {
553 nprocs = orte_rmaps_base_n_persocket * nobjs;
554 } else {
555 nprocs = orte_rmaps_base_n_persocket * opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, 0, OPAL_HWLOC_AVAILABLE);
556 }
557 } else {
558 nprocs = node->slots - node->slots_inuse;
559 }
560 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
561 "mca:rmaps:rr: calculated nprocs %d", nprocs);
562 if (nprocs < 1) {
563 if (second_pass) {
564
565
566
567 nprocs = 1;
568
569
570
571 start = node->num_procs % nobjs;
572 } else {
573 continue;
574 }
575 }
576
577 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
578 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
579 OBJ_RETAIN(node);
580 opal_pointer_array_add(jdata->map->nodes, node);
581 ++(jdata->map->num_nodes);
582 }
583 nmapped = 0;
584 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
585 "mca:rmaps:rr: assigning nprocs %d", nprocs);
586 do {
587
588 for (i=0; i < (int)nobjs && nmapped < nprocs && nprocs_mapped < (int)app->num_procs; i++) {
589 opal_output_verbose(20, orte_rmaps_base_framework.framework_output,
590 "mca:rmaps:rr: assigning proc to object %d", (i+start) % nobjs);
591
592 if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, (i+start) % nobjs, OPAL_HWLOC_AVAILABLE))) {
593 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
594 return ORTE_ERR_NOT_FOUND;
595 }
596 if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) {
597 orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true,
598 orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj),
599 orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
600 return ORTE_ERR_SILENT;
601 }
602 if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
603 return ORTE_ERR_OUT_OF_RESOURCE;
604 }
605 nprocs_mapped++;
606 nmapped++;
607 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
608 }
609 } while (nmapped < nprocs && nprocs_mapped < (int)app->num_procs);
610 add_one = true;
611
612
613
614 if (node->slots < (int)node->num_procs) {
615
616
617
618 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
619 ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
620
621 if (ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_SLOTS_GIVEN)) {
622
623
624
625 if (!(ORTE_MAPPING_SUBSCRIBE_GIVEN & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping))) {
626 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
627 true, app->num_procs, app->app, orte_process_info.nodename);
628 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
629 return ORTE_ERR_SILENT;
630 } else if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
631
632 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
633 true, app->num_procs, app->app, orte_process_info.nodename);
634 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
635 return ORTE_ERR_SILENT;
636 }
637 }
638 }
639 if (nprocs_mapped == app->num_procs) {
640
641 break;
642 }
643 }
644 second_pass = true;
645 } while (add_one && nprocs_mapped < app->num_procs);
646
647 if (nprocs_mapped < app->num_procs) {
648
649 return ORTE_ERR_NOT_FOUND;
650 }
651
652 return ORTE_SUCCESS;
653 }
654
655 static int byobj_span(orte_job_t *jdata,
656 orte_app_context_t *app,
657 opal_list_t *node_list,
658 orte_std_cntr_t num_slots,
659 orte_vpid_t num_procs,
660 hwloc_obj_type_t target, unsigned cache_level)
661 {
662 int i, j, nprocs_mapped, navg;
663 orte_node_t *node;
664 orte_proc_t *proc;
665 int nprocs, nxtra_objs;
666 hwloc_obj_t obj=NULL;
667 unsigned int nobjs;
668
669 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
670 "mca:rmaps:rr: mapping span by %s for job %s slots %d num_procs %lu",
671 hwloc_obj_type_string(target),
672 ORTE_JOBID_PRINT(jdata->jobid),
673 (int)num_slots, (unsigned long)num_procs);
674
675
676 if (num_slots < (int)app->num_procs) {
677 if (ORTE_MAPPING_NO_OVERSUBSCRIBE & ORTE_GET_MAPPING_DIRECTIVE(jdata->map->mapping)) {
678 orte_show_help("help-orte-rmaps-base.txt", "orte-rmaps-base:alloc-error",
679 true, app->num_procs, app->app, orte_process_info.nodename);
680 ORTE_UPDATE_EXIT_STATUS(ORTE_ERROR_DEFAULT_EXIT_CODE);
681 return ORTE_ERR_SILENT;
682 }
683 }
684
685
686
687
688 nobjs = 0;
689 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
690 if (NULL == node->topology || NULL == node->topology->topo) {
691 orte_show_help("help-orte-rmaps-ppr.txt", "ppr-topo-missing",
692 true, node->name);
693 return ORTE_ERR_SILENT;
694 }
695
696 nobjs += opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE);
697 }
698
699 if (0 == nobjs) {
700 return ORTE_ERR_NOT_FOUND;
701 }
702
703
704 navg = app->num_procs / nobjs;
705 if (0 == navg) {
706
707
708
709 navg = 1;
710 }
711
712
713 if (0 > (nxtra_objs = app->num_procs - (navg * nobjs))) {
714 nxtra_objs = 0;
715 }
716
717 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
718 "mca:rmaps:rr: mapping by %s navg %d extra_objs %d",
719 hwloc_obj_type_string(target),
720 navg, nxtra_objs);
721
722 nprocs_mapped = 0;
723 OPAL_LIST_FOREACH(node, node_list, orte_node_t) {
724
725 if (!ORTE_FLAG_TEST(node, ORTE_NODE_FLAG_MAPPED)) {
726 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_MAPPED);
727 OBJ_RETAIN(node);
728 opal_pointer_array_add(jdata->map->nodes, node);
729 ++(jdata->map->num_nodes);
730 }
731
732 nobjs = opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, target, cache_level, OPAL_HWLOC_AVAILABLE);
733 opal_output_verbose(2, orte_rmaps_base_framework.framework_output,
734 "mca:rmaps:rr:byobj: found %d objs on node %s", nobjs, node->name);
735
736 for (i=0; i < (int)nobjs && nprocs_mapped < (int)app->num_procs; i++) {
737
738 if (NULL == (obj = opal_hwloc_base_get_obj_by_type(node->topology->topo, target, cache_level, i, OPAL_HWLOC_AVAILABLE))) {
739 ORTE_ERROR_LOG(ORTE_ERR_NOT_FOUND);
740 return ORTE_ERR_NOT_FOUND;
741 }
742 if (orte_rmaps_base.cpus_per_rank > (int)opal_hwloc_base_get_npus(node->topology->topo, obj)) {
743 orte_show_help("help-orte-rmaps-base.txt", "mapping-too-low", true,
744 orte_rmaps_base.cpus_per_rank, opal_hwloc_base_get_npus(node->topology->topo, obj),
745 orte_rmaps_base_print_mapping(orte_rmaps_base.mapping));
746 return ORTE_ERR_SILENT;
747 }
748
749 if (orte_rmaps_base_pernode) {
750 nprocs = 1;
751 } else if (0 < orte_rmaps_base_n_pernode) {
752 nprocs = orte_rmaps_base_n_pernode;
753 } else if (0 < orte_rmaps_base_n_persocket) {
754 if (HWLOC_OBJ_PACKAGE == target) {
755 nprocs = orte_rmaps_base_n_persocket * nobjs;
756 } else {
757 nprocs = orte_rmaps_base_n_persocket * opal_hwloc_base_get_nbobjs_by_type(node->topology->topo, HWLOC_OBJ_PACKAGE, 0, OPAL_HWLOC_AVAILABLE);
758 }
759 } else {
760 nprocs = navg;
761 }
762 if (0 < nxtra_objs) {
763 nprocs++;
764 nxtra_objs--;
765 }
766
767 for (j=0; j < nprocs && nprocs_mapped < app->num_procs; j++) {
768 if (NULL == (proc = orte_rmaps_base_setup_proc(jdata, node, app->idx))) {
769 return ORTE_ERR_OUT_OF_RESOURCE;
770 }
771 nprocs_mapped++;
772 orte_set_attribute(&proc->attributes, ORTE_PROC_HWLOC_LOCALE, ORTE_ATTR_LOCAL, obj, OPAL_PTR);
773 }
774
775 jdata->bookmark = node;
776 }
777
778
779
780 if (node->slots < (int)node->num_procs) {
781
782
783
784 ORTE_FLAG_SET(node, ORTE_NODE_FLAG_OVERSUBSCRIBED);
785 ORTE_FLAG_SET(jdata, ORTE_JOB_FLAG_OVERSUBSCRIBED);
786 }
787 if (nprocs_mapped == app->num_procs) {
788
789 break;
790 }
791 }
792
793 return ORTE_SUCCESS;
794 }