This source file includes following definitions.
- cpuiddump_free
- cpuiddump_read
- cpuiddump_find_by_input
- cpuid_or_from_dump
- fill_amd_cache
- look_proc
- hwloc_x86_add_cpuinfos
- summarize
- look_procs
- hwloc_x86_os_state_save
- hwloc_x86_os_state_restore
- hwloc_x86_os_state_save
- hwloc_x86_os_state_restore
- fake_get_cpubind
- fake_set_cpubind
- hwloc_look_x86
- hwloc_x86_discover
- hwloc_x86_check_cpuiddump_input
- hwloc_x86_backend_disable
- hwloc_x86_component_instantiate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17 #include <private/autogen/config.h>
18 #include <hwloc.h>
19 #include <private/private.h>
20 #include <private/debug.h>
21 #include <private/misc.h>
22
23 #include <private/cpuid-x86.h>
24
25 #include <sys/types.h>
26 #ifdef HAVE_DIRENT_H
27 #include <dirent.h>
28 #endif
29 #ifdef HAVE_VALGRIND_VALGRIND_H
30 #include <valgrind/valgrind.h>
31 #endif
32
33 struct hwloc_x86_backend_data_s {
34 unsigned nbprocs;
35 hwloc_bitmap_t apicid_set;
36 int apicid_unique;
37 char *src_cpuiddump_path;
38 int is_knl;
39 };
40
41
42
43
44
45 struct cpuiddump {
46 unsigned nr;
47 struct cpuiddump_entry {
48 unsigned inmask;
49 unsigned ineax;
50 unsigned inebx;
51 unsigned inecx;
52 unsigned inedx;
53 unsigned outeax;
54 unsigned outebx;
55 unsigned outecx;
56 unsigned outedx;
57 } *entries;
58 };
59
60 static void
61 cpuiddump_free(struct cpuiddump *cpuiddump)
62 {
63 if (cpuiddump->nr)
64 free(cpuiddump->entries);
65 free(cpuiddump);
66 }
67
68 static struct cpuiddump *
69 cpuiddump_read(const char *dirpath, unsigned idx)
70 {
71 struct cpuiddump *cpuiddump;
72 struct cpuiddump_entry *cur;
73 FILE *file;
74 char line[128];
75 unsigned nr;
76
77 cpuiddump = malloc(sizeof(*cpuiddump));
78 if (!cpuiddump) {
79 fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
80 goto out;
81 }
82
83 {
84 size_t filenamelen = strlen(dirpath) + 15;
85 HWLOC_VLA(char, filename, filenamelen);
86 snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
87 file = fopen(filename, "r");
88 if (!file) {
89 fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
90 goto out_with_dump;
91 }
92 }
93
94 nr = 0;
95 while (fgets(line, sizeof(line), file))
96 nr++;
97 cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry));
98 if (!cpuiddump->entries) {
99 fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
100 goto out_with_file;
101 }
102
103 fseek(file, 0, SEEK_SET);
104 cur = &cpuiddump->entries[0];
105 nr = 0;
106 while (fgets(line, sizeof(line), file)) {
107 if (*line == '#')
108 continue;
109 if (sscanf(line, "%x %x %x %x %x => %x %x %x %x",
110 &cur->inmask,
111 &cur->ineax, &cur->inebx, &cur->inecx, &cur->inedx,
112 &cur->outeax, &cur->outebx, &cur->outecx, &cur->outedx) == 9) {
113 cur++;
114 nr++;
115 }
116 }
117
118 cpuiddump->nr = nr;
119 fclose(file);
120 return cpuiddump;
121
122 out_with_file:
123 fclose(file);
124 out_with_dump:
125 free(cpuiddump);
126 out:
127 return NULL;
128 }
129
130 static void
131 cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx, struct cpuiddump *cpuiddump)
132 {
133 unsigned i;
134
135 for(i=0; i<cpuiddump->nr; i++) {
136 struct cpuiddump_entry *entry = &cpuiddump->entries[i];
137 if ((entry->inmask & 0x1) && *eax != entry->ineax)
138 continue;
139 if ((entry->inmask & 0x2) && *ebx != entry->inebx)
140 continue;
141 if ((entry->inmask & 0x4) && *ecx != entry->inecx)
142 continue;
143 if ((entry->inmask & 0x8) && *edx != entry->inedx)
144 continue;
145 *eax = entry->outeax;
146 *ebx = entry->outebx;
147 *ecx = entry->outecx;
148 *edx = entry->outedx;
149 return;
150 }
151
152 fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
153 *eax, *ebx, *ecx, *edx);
154 *eax = 0;
155 *ebx = 0;
156 *ecx = 0;
157 *edx = 0;
158 }
159
160 static void cpuid_or_from_dump(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *edx, struct cpuiddump *src_cpuiddump)
161 {
162 if (src_cpuiddump) {
163 cpuiddump_find_by_input(eax, ebx, ecx, edx, src_cpuiddump);
164 } else {
165 hwloc_x86_cpuid(eax, ebx, ecx, edx);
166 }
167 }
168
169
170
171
172
173 #define has_topoext(features) ((features)[6] & (1 << 22))
174 #define has_x2apic(features) ((features)[4] & (1 << 21))
175
176 struct cacheinfo {
177 hwloc_obj_cache_type_t type;
178 unsigned level;
179 unsigned nbthreads_sharing;
180 unsigned cacheid;
181
182 unsigned linesize;
183 unsigned linepart;
184 int inclusive;
185 int ways;
186 unsigned sets;
187 unsigned long size;
188 };
189
190 struct procinfo {
191 unsigned present;
192 unsigned apicid;
193 unsigned packageid;
194 unsigned nodeid;
195 unsigned unitid;
196 unsigned threadid;
197 unsigned coreid;
198 unsigned *otherids;
199 unsigned levels;
200 unsigned numcaches;
201 struct cacheinfo *cache;
202 char cpuvendor[13];
203 char cpumodel[3*4*4+1];
204 unsigned cpustepping;
205 unsigned cpumodelnumber;
206 unsigned cpufamilynumber;
207 };
208
209 enum cpuid_type {
210 intel,
211 amd,
212 zhaoxin,
213 unknown
214 };
215
216 static void fill_amd_cache(struct procinfo *infos, unsigned level, hwloc_obj_cache_type_t type, unsigned nbthreads_sharing, unsigned cpuid)
217 {
218 struct cacheinfo *cache, *tmpcaches;
219 unsigned cachenum;
220 unsigned long size = 0;
221
222 if (level == 1)
223 size = ((cpuid >> 24)) << 10;
224 else if (level == 2)
225 size = ((cpuid >> 16)) << 10;
226 else if (level == 3)
227 size = ((cpuid >> 18)) << 19;
228 if (!size)
229 return;
230
231 tmpcaches = realloc(infos->cache, (infos->numcaches+1)*sizeof(*infos->cache));
232 if (!tmpcaches)
233
234 return;
235 infos->cache = tmpcaches;
236 cachenum = infos->numcaches++;
237
238 cache = &infos->cache[cachenum];
239
240 cache->type = type;
241 cache->level = level;
242 cache->nbthreads_sharing = nbthreads_sharing;
243 cache->linesize = cpuid & 0xff;
244 cache->linepart = 0;
245 cache->inclusive = 0;
246
247 if (level == 1) {
248 cache->ways = (cpuid >> 16) & 0xff;
249 if (cache->ways == 0xff)
250
251 cache->ways = -1;
252 } else {
253 static const unsigned ways_tab[] = { 0, 1, 2, 0, 4, 0, 8, 0, 16, 0, 32, 48, 64, 96, 128, -1 };
254 unsigned ways = (cpuid >> 12) & 0xf;
255 cache->ways = ways_tab[ways];
256 }
257 cache->size = size;
258 cache->sets = 0;
259
260 hwloc_debug("cache L%u t%u linesize %u ways %d size %luKB\n", cache->level, cache->nbthreads_sharing, cache->linesize, cache->ways, cache->size >> 10);
261 }
262
263
264
265 static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, struct cpuiddump *src_cpuiddump)
266 {
267 struct hwloc_x86_backend_data_s *data = backend->private_data;
268 unsigned eax, ebx, ecx = 0, edx;
269 unsigned cachenum;
270 struct cacheinfo *cache;
271 unsigned regs[4];
272 unsigned legacy_max_log_proc;
273 unsigned legacy_log_proc_id;
274 unsigned _model, _extendedmodel, _family, _extendedfamily;
275
276 infos->present = 1;
277
278
279 eax = 0x01;
280 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
281 infos->apicid = ebx >> 24;
282 if (edx & (1 << 28))
283 legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
284 else
285 legacy_max_log_proc = 1;
286 hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc);
287 infos->packageid = infos->apicid / legacy_max_log_proc;
288 legacy_log_proc_id = infos->apicid % legacy_max_log_proc;
289 hwloc_debug("phys %u legacy thread %u\n", infos->packageid, legacy_log_proc_id);
290
291
292 _model = (eax>>4) & 0xf;
293 _extendedmodel = (eax>>16) & 0xf;
294 _family = (eax>>8) & 0xf;
295 _extendedfamily = (eax>>20) & 0xff;
296 if ((cpuid_type == intel || cpuid_type == amd) && _family == 0xf) {
297 infos->cpufamilynumber = _family + _extendedfamily;
298 } else {
299 infos->cpufamilynumber = _family;
300 }
301 if ((cpuid_type == intel && (_family == 0x6 || _family == 0xf))
302 || (cpuid_type == amd && _family == 0xf)
303 || (cpuid_type == zhaoxin && (_family == 0x6 || _family == 0x7))) {
304 infos->cpumodelnumber = _model + (_extendedmodel << 4);
305 } else {
306 infos->cpumodelnumber = _model;
307 }
308 infos->cpustepping = eax & 0xf;
309
310 if (cpuid_type == intel && infos->cpufamilynumber == 0x6 &&
311 (infos->cpumodelnumber == 0x57 || infos->cpumodelnumber == 0x85))
312 data->is_knl = 1;
313
314
315 memset(regs, 0, sizeof(regs));
316 regs[0] = 0;
317 cpuid_or_from_dump(®s[0], ®s[1], ®s[3], ®s[2], src_cpuiddump);
318 memcpy(infos->cpuvendor, regs+1, 4*3);
319
320
321
322 if (highest_ext_cpuid >= 0x80000004) {
323 memset(regs, 0, sizeof(regs));
324 regs[0] = 0x80000002;
325 cpuid_or_from_dump(®s[0], ®s[1], ®s[2], ®s[3], src_cpuiddump);
326 memcpy(infos->cpumodel, regs, 4*4);
327 regs[0] = 0x80000003;
328 cpuid_or_from_dump(®s[0], ®s[1], ®s[2], ®s[3], src_cpuiddump);
329 memcpy(infos->cpumodel + 4*4, regs, 4*4);
330 regs[0] = 0x80000004;
331 cpuid_or_from_dump(®s[0], ®s[1], ®s[2], ®s[3], src_cpuiddump);
332 memcpy(infos->cpumodel + 4*4*2, regs, 4*4);
333
334 }
335
336
337
338
339 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000008) {
340 unsigned max_nbcores;
341 unsigned max_nbthreads;
342 unsigned coreidsize;
343 unsigned logprocid;
344 eax = 0x80000008;
345 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
346 coreidsize = (ecx >> 12) & 0xf;
347 hwloc_debug("core ID size: %u\n", coreidsize);
348 if (!coreidsize) {
349 max_nbcores = (ecx & 0xff) + 1;
350 } else
351 max_nbcores = 1 << coreidsize;
352 hwloc_debug("Thus max # of cores: %u\n", max_nbcores);
353
354 max_nbthreads = 1 ;
355 hwloc_debug("and max # of threads: %u\n", max_nbthreads);
356
357
358
359
360
361 infos->packageid = infos->apicid / max_nbcores;
362 logprocid = infos->apicid % max_nbcores;
363 infos->threadid = logprocid % max_nbthreads;
364 infos->coreid = logprocid / max_nbthreads;
365 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
366 }
367
368 infos->numcaches = 0;
369 infos->cache = NULL;
370
371
372
373
374
375 if (cpuid_type != intel && cpuid_type != zhaoxin && has_topoext(features)) {
376 unsigned apic_id, node_id, nodes_per_proc;
377
378
379 assert(!infos->numcaches);
380
381 eax = 0x8000001e;
382 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
383 infos->apicid = apic_id = eax;
384
385 if (infos->cpufamilynumber == 0x16) {
386
387 node_id = 0;
388 nodes_per_proc = 1;
389 } else {
390 node_id = ecx & 0xff;
391 nodes_per_proc = ((ecx >> 8) & 7) + 1;
392 }
393 infos->nodeid = node_id;
394 if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
395 || (infos->cpufamilynumber == 0x17 && nodes_per_proc > 4)) {
396 hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
397 }
398
399 if (infos->cpufamilynumber <= 0x16) {
400 unsigned unit_id, cores_per_unit;
401 infos->unitid = unit_id = ebx & 0xff;
402 cores_per_unit = ((ebx >> 8) & 0xff) + 1;
403 hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, node_id, cores_per_unit, unit_id);
404
405
406
407
408 } else {
409 unsigned core_id, threads_per_core;
410 infos->coreid = core_id = ebx & 0xff;
411 threads_per_core = ((ebx >> 8) & 0xff) + 1;
412 hwloc_debug("topoext %08x, %u nodes, node %u, %u threads in core %u\n", apic_id, nodes_per_proc, node_id, threads_per_core, core_id);
413 }
414
415 for (cachenum = 0; ; cachenum++) {
416 eax = 0x8000001d;
417 ecx = cachenum;
418 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
419 if ((eax & 0x1f) == 0)
420 break;
421 infos->numcaches++;
422 }
423
424 cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
425 if (cache) {
426 for (cachenum = 0; ; cachenum++) {
427 unsigned long linesize, linepart, ways, sets;
428 eax = 0x8000001d;
429 ecx = cachenum;
430 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
431
432 if ((eax & 0x1f) == 0)
433 break;
434 switch (eax & 0x1f) {
435 case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
436 case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
437 default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
438 }
439
440 cache->level = (eax >> 5) & 0x7;
441
442 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
443
444 cache->linesize = linesize = (ebx & 0xfff) + 1;
445 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
446 ways = ((ebx >> 22) & 0x3ff) + 1;
447
448 if (eax & (1 << 9))
449
450 cache->ways = -1;
451 else
452 cache->ways = ways;
453 cache->sets = sets = ecx + 1;
454 cache->size = linesize * linepart * ways * sets;
455 cache->inclusive = edx & 0x2;
456
457 hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
458 cachenum, cache->level,
459 cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
460 cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
461
462 cache++;
463 }
464 } else {
465 infos->numcaches = 0;
466 }
467 } else {
468
469
470
471
472 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000005) {
473 eax = 0x80000005;
474 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
475 fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_DATA, 1, ecx);
476 fill_amd_cache(infos, 1, HWLOC_OBJ_CACHE_INSTRUCTION, 1, edx);
477 }
478 if (cpuid_type != intel && cpuid_type != zhaoxin && highest_ext_cpuid >= 0x80000006) {
479 eax = 0x80000006;
480 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
481 if (ecx & 0xf000)
482
483
484
485
486 fill_amd_cache(infos, 2, HWLOC_OBJ_CACHE_UNIFIED, 1, ecx);
487 if (edx & 0xf000)
488 fill_amd_cache(infos, 3, HWLOC_OBJ_CACHE_UNIFIED, legacy_max_log_proc, edx);
489 }
490 }
491
492
493
494
495 if (cpuid_type != amd && highest_cpuid >= 0x04) {
496 unsigned max_nbcores;
497 unsigned max_nbthreads;
498 unsigned level;
499 struct cacheinfo *tmpcaches;
500 unsigned oldnumcaches = infos->numcaches;
501
502 for (cachenum = 0; ; cachenum++) {
503 eax = 0x04;
504 ecx = cachenum;
505 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
506
507 hwloc_debug("cache %u type %u\n", cachenum, eax & 0x1f);
508 if ((eax & 0x1f) == 0)
509 break;
510 level = (eax >> 5) & 0x7;
511 if (data->is_knl && level == 3)
512
513 break;
514 infos->numcaches++;
515
516 if (!cachenum) {
517
518 max_nbcores = ((eax >> 26) & 0x3f) + 1;
519 max_nbthreads = legacy_max_log_proc / max_nbcores;
520 hwloc_debug("thus %u threads\n", max_nbthreads);
521 infos->threadid = legacy_log_proc_id % max_nbthreads;
522 infos->coreid = legacy_log_proc_id / max_nbthreads;
523 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
524 }
525 }
526
527 tmpcaches = realloc(infos->cache, infos->numcaches * sizeof(*infos->cache));
528 if (tmpcaches) {
529 infos->cache = tmpcaches;
530 cache = &infos->cache[oldnumcaches];
531
532 for (cachenum = 0; ; cachenum++) {
533 unsigned long linesize, linepart, ways, sets;
534 eax = 0x04;
535 ecx = cachenum;
536 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
537
538 if ((eax & 0x1f) == 0)
539 break;
540 level = (eax >> 5) & 0x7;
541 if (data->is_knl && level == 3)
542
543 break;
544 switch (eax & 0x1f) {
545 case 1: cache->type = HWLOC_OBJ_CACHE_DATA; break;
546 case 2: cache->type = HWLOC_OBJ_CACHE_INSTRUCTION; break;
547 default: cache->type = HWLOC_OBJ_CACHE_UNIFIED; break;
548 }
549
550 cache->level = level;
551 cache->nbthreads_sharing = ((eax >> 14) & 0xfff) + 1;
552
553 cache->linesize = linesize = (ebx & 0xfff) + 1;
554 cache->linepart = linepart = ((ebx >> 12) & 0x3ff) + 1;
555 ways = ((ebx >> 22) & 0x3ff) + 1;
556 if (eax & (1 << 9))
557
558 cache->ways = -1;
559 else
560 cache->ways = ways;
561 cache->sets = sets = ecx + 1;
562 cache->size = linesize * linepart * ways * sets;
563 cache->inclusive = edx & 0x2;
564
565 hwloc_debug("cache %u L%u%c t%u linesize %lu linepart %lu ways %lu sets %lu, size %luKB\n",
566 cachenum, cache->level,
567 cache->type == HWLOC_OBJ_CACHE_DATA ? 'd' : cache->type == HWLOC_OBJ_CACHE_INSTRUCTION ? 'i' : 'u',
568 cache->nbthreads_sharing, linesize, linepart, ways, sets, cache->size >> 10);
569 cache++;
570 }
571 }
572 }
573
574
575
576
577 if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x0b && has_x2apic(features)) {
578 unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
579 for (level = 0; ; level++) {
580 ecx = level;
581 eax = 0x0b;
582 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
583 if (!eax && !ebx)
584 break;
585 }
586 if (level) {
587 infos->otherids = malloc(level * sizeof(*infos->otherids));
588 if (infos->otherids) {
589 infos->levels = level;
590 for (level = 0; ; level++) {
591 ecx = level;
592 eax = 0x0b;
593 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
594 if (!eax && !ebx)
595 break;
596 apic_nextshift = eax & 0x1f;
597 apic_number = ebx & 0xffff;
598 apic_type = (ecx & 0xff00) >> 8;
599 apic_id = edx;
600 id = (apic_id >> apic_shift) & ((1 << (apic_nextshift - apic_shift)) - 1);
601 hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
602 infos->apicid = apic_id;
603 infos->otherids[level] = UINT_MAX;
604 switch (apic_type) {
605 case 1:
606 infos->threadid = id;
607
608 break;
609 case 2:
610 infos->coreid = id;
611
612 break;
613 default:
614 hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
615 infos->otherids[level] = apic_id >> apic_shift;
616 break;
617 }
618 apic_shift = apic_nextshift;
619 }
620 infos->apicid = apic_id;
621 infos->packageid = apic_id >> apic_shift;
622 hwloc_debug("x2APIC remainder: %u\n", infos->packageid);
623 hwloc_debug("this is thread %u of core %u\n", infos->threadid, infos->coreid);
624 }
625 }
626 }
627
628
629 for (cachenum = 0; cachenum < infos->numcaches; cachenum++) {
630 cache = &infos->cache[cachenum];
631
632
633 cache->cacheid = infos->apicid / cache->nbthreads_sharing;
634
635 if (cpuid_type == amd) {
636
637 if (infos->cpufamilynumber == 0x17
638 && cache->level == 3 && cache->nbthreads_sharing == 6) {
639
640
641
642
643 cache->cacheid = infos->apicid / 8;
644
645 } else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
646 && cache->level == 3
647 && (cache->ways == -1 || (cache->ways % 2 == 0)) && cache->nbthreads_sharing >= 8) {
648
649
650
651 if (cache->nbthreads_sharing == 16)
652 cache->nbthreads_sharing = 12;
653 cache->nbthreads_sharing /= 2;
654 cache->size /= 2;
655 if (cache->ways != -1)
656 cache->ways /= 2;
657
658
659
660
661
662
663 cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing
664 + 2 * (infos->apicid / legacy_max_log_proc);
665
666 } else if (infos->cpufamilynumber == 0x15
667 && (infos->cpumodelnumber == 0x1 || infos->cpumodelnumber == 0x2 )
668 && cache->level == 3 && cache->nbthreads_sharing == 6) {
669
670
671
672 cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing
673 + 2 * (infos->apicid / legacy_max_log_proc);
674 }
675 }
676 }
677
678 if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
679 data->apicid_unique = 0;
680 else
681 hwloc_bitmap_set(data->apicid_set, infos->apicid);
682 }
683
684 static void
685 hwloc_x86_add_cpuinfos(hwloc_obj_t obj, struct procinfo *info, int replace)
686 {
687 char number[8];
688 if (info->cpuvendor[0])
689 hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUVendor", info->cpuvendor, replace);
690 snprintf(number, sizeof(number), "%u", info->cpufamilynumber);
691 hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUFamilyNumber", number, replace);
692 snprintf(number, sizeof(number), "%u", info->cpumodelnumber);
693 hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUModelNumber", number, replace);
694 if (info->cpumodel[0]) {
695 const char *c = info->cpumodel;
696 while (*c == ' ')
697 c++;
698 hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUModel", c, replace);
699 }
700 snprintf(number, sizeof(number), "%u", info->cpustepping);
701 hwloc__add_info_nodup(&obj->infos, &obj->infos_count, "CPUStepping", number, replace);
702 }
703
704
705 static void summarize(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery)
706 {
707 struct hwloc_topology *topology = backend->topology;
708 struct hwloc_x86_backend_data_s *data = backend->private_data;
709 unsigned nbprocs = data->nbprocs;
710 hwloc_bitmap_t complete_cpuset = hwloc_bitmap_alloc();
711 unsigned i, j, l, level;
712 int one = -1;
713 hwloc_bitmap_t remaining_cpuset;
714 int gotnuma = 0;
715
716 for (i = 0; i < nbprocs; i++)
717 if (infos[i].present) {
718 hwloc_bitmap_set(complete_cpuset, i);
719 one = i;
720 }
721
722 if (one == -1) {
723 hwloc_bitmap_free(complete_cpuset);
724 return;
725 }
726
727 remaining_cpuset = hwloc_bitmap_alloc();
728
729
730
731
732
733
734 if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_PACKAGE)) {
735
736 hwloc_obj_t package;
737
738 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
739 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
740 if (fulldiscovery) {
741 unsigned packageid = infos[i].packageid;
742 hwloc_bitmap_t package_cpuset = hwloc_bitmap_alloc();
743
744 for (j = i; j < nbprocs; j++) {
745 if (infos[j].packageid == packageid) {
746 hwloc_bitmap_set(package_cpuset, j);
747 hwloc_bitmap_clr(remaining_cpuset, j);
748 }
749 }
750 package = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PACKAGE, packageid);
751 package->cpuset = package_cpuset;
752
753 hwloc_x86_add_cpuinfos(package, &infos[i], 0);
754
755 hwloc_debug_1arg_bitmap("os package %u has cpuset %s\n",
756 packageid, package_cpuset);
757 hwloc_insert_object_by_cpuset(topology, package);
758
759 } else {
760
761 hwloc_bitmap_t set = hwloc_bitmap_alloc();
762 hwloc_bitmap_set(set, i);
763 package = hwloc_get_next_obj_covering_cpuset_by_type(topology, set, HWLOC_OBJ_PACKAGE, NULL);
764 hwloc_bitmap_free(set);
765 if (package) {
766
767 hwloc_x86_add_cpuinfos(package, &infos[i], 1);
768 hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, package->cpuset);
769 } else {
770
771 hwloc_x86_add_cpuinfos(hwloc_get_root_obj(topology), &infos[i], 1);
772 break;
773 }
774 }
775 }
776 }
777
778
779 if (fulldiscovery) {
780 hwloc_bitmap_t node_cpuset;
781 hwloc_obj_t node;
782
783
784
785 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
786 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
787 unsigned packageid = infos[i].packageid;
788 unsigned nodeid = infos[i].nodeid;
789
790 if (nodeid == (unsigned)-1) {
791 hwloc_bitmap_clr(remaining_cpuset, i);
792 continue;
793 }
794
795 node_cpuset = hwloc_bitmap_alloc();
796 for (j = i; j < nbprocs; j++) {
797 if (infos[j].nodeid == (unsigned) -1) {
798 hwloc_bitmap_clr(remaining_cpuset, j);
799 continue;
800 }
801
802 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid) {
803 hwloc_bitmap_set(node_cpuset, j);
804 hwloc_bitmap_clr(remaining_cpuset, j);
805 }
806 }
807 node = hwloc_alloc_setup_object(topology, HWLOC_OBJ_NUMANODE, nodeid);
808 node->cpuset = node_cpuset;
809 node->nodeset = hwloc_bitmap_alloc();
810 hwloc_bitmap_set(node->nodeset, nodeid);
811 hwloc_debug_1arg_bitmap("os node %u has cpuset %s\n",
812 nodeid, node_cpuset);
813 hwloc_insert_object_by_cpuset(topology, node);
814 gotnuma++;
815 }
816 }
817
818 if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
819
820 if (fulldiscovery) {
821 hwloc_bitmap_t unit_cpuset;
822 hwloc_obj_t unit;
823
824 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
825 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
826 unsigned packageid = infos[i].packageid;
827 unsigned unitid = infos[i].unitid;
828
829 if (unitid == (unsigned)-1) {
830 hwloc_bitmap_clr(remaining_cpuset, i);
831 continue;
832 }
833
834 unit_cpuset = hwloc_bitmap_alloc();
835 for (j = i; j < nbprocs; j++) {
836 if (infos[j].unitid == (unsigned) -1) {
837 hwloc_bitmap_clr(remaining_cpuset, j);
838 continue;
839 }
840
841 if (infos[j].packageid == packageid && infos[j].unitid == unitid) {
842 hwloc_bitmap_set(unit_cpuset, j);
843 hwloc_bitmap_clr(remaining_cpuset, j);
844 }
845 }
846 unit = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unitid);
847 unit->cpuset = unit_cpuset;
848 unit->subtype = strdup("ComputeUnit");
849 unit->attr->group.kind = HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT;
850 hwloc_debug_1arg_bitmap("os unit %u has cpuset %s\n",
851 unitid, unit_cpuset);
852 hwloc_insert_object_by_cpuset(topology, unit);
853 }
854 }
855
856
857 if (infos[one].otherids) {
858 for (level = infos[one].levels-1; level <= infos[one].levels-1; level--) {
859 if (infos[one].otherids[level] != UINT_MAX) {
860 hwloc_bitmap_t unknown_cpuset;
861 hwloc_obj_t unknown_obj;
862
863 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
864 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
865 unsigned unknownid = infos[i].otherids[level];
866
867 unknown_cpuset = hwloc_bitmap_alloc();
868 for (j = i; j < nbprocs; j++) {
869 if (infos[j].otherids[level] == unknownid) {
870 hwloc_bitmap_set(unknown_cpuset, j);
871 hwloc_bitmap_clr(remaining_cpuset, j);
872 }
873 }
874 unknown_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, unknownid);
875 unknown_obj->cpuset = unknown_cpuset;
876 unknown_obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_X2APIC_UNKNOWN;
877 unknown_obj->attr->group.subkind = level;
878 hwloc_debug_2args_bitmap("os unknown%u %u has cpuset %s\n",
879 level, unknownid, unknown_cpuset);
880 hwloc_insert_object_by_cpuset(topology, unknown_obj);
881 }
882 }
883 }
884 }
885 }
886
887 if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_CORE)) {
888
889 if (fulldiscovery) {
890 hwloc_bitmap_t core_cpuset;
891 hwloc_obj_t core;
892
893 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
894 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
895 unsigned packageid = infos[i].packageid;
896 unsigned nodeid = infos[i].nodeid;
897 unsigned coreid = infos[i].coreid;
898
899 if (coreid == (unsigned) -1) {
900 hwloc_bitmap_clr(remaining_cpuset, i);
901 continue;
902 }
903
904 core_cpuset = hwloc_bitmap_alloc();
905 for (j = i; j < nbprocs; j++) {
906 if (infos[j].coreid == (unsigned) -1) {
907 hwloc_bitmap_clr(remaining_cpuset, j);
908 continue;
909 }
910
911 if (infos[j].packageid == packageid && infos[j].nodeid == nodeid && infos[j].coreid == coreid) {
912 hwloc_bitmap_set(core_cpuset, j);
913 hwloc_bitmap_clr(remaining_cpuset, j);
914 }
915 }
916 core = hwloc_alloc_setup_object(topology, HWLOC_OBJ_CORE, coreid);
917 core->cpuset = core_cpuset;
918 hwloc_debug_1arg_bitmap("os core %u has cpuset %s\n",
919 coreid, core_cpuset);
920 hwloc_insert_object_by_cpuset(topology, core);
921 }
922 }
923 }
924
925
926 if (fulldiscovery) {
927 hwloc_debug("%s", "\n\n * CPU cpusets *\n\n");
928 for (i=0; i<nbprocs; i++)
929 if(infos[i].present) {
930 struct hwloc_obj *obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_PU, i);
931 obj->cpuset = hwloc_bitmap_alloc();
932 hwloc_bitmap_only(obj->cpuset, i);
933 hwloc_debug_1arg_bitmap("PU %u has cpuset %s\n", i, obj->cpuset);
934 hwloc_insert_object_by_cpuset(topology, obj);
935 }
936 }
937
938
939
940 level = 0;
941 for (i = 0; i < nbprocs; i++)
942 for (j = 0; j < infos[i].numcaches; j++)
943 if (infos[i].cache[j].level > level)
944 level = infos[i].cache[j].level;
945 while (level > 0) {
946 hwloc_obj_cache_type_t type;
947 HWLOC_BUILD_ASSERT(HWLOC_OBJ_CACHE_DATA == HWLOC_OBJ_CACHE_UNIFIED+1);
948 HWLOC_BUILD_ASSERT(HWLOC_OBJ_CACHE_INSTRUCTION == HWLOC_OBJ_CACHE_DATA+1);
949 for (type = HWLOC_OBJ_CACHE_UNIFIED; type <= HWLOC_OBJ_CACHE_INSTRUCTION; type++) {
950
951 hwloc_obj_type_t otype;
952 hwloc_obj_t cache;
953
954 otype = hwloc_cache_type_by_depth_type(level, type);
955 if (otype == HWLOC_OBJ_TYPE_NONE)
956 continue;
957 if (!hwloc_filter_check_keep_object_type(topology, otype))
958 continue;
959
960 hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
961 while ((i = hwloc_bitmap_first(remaining_cpuset)) != (unsigned) -1) {
962 hwloc_bitmap_t puset;
963
964 for (l = 0; l < infos[i].numcaches; l++) {
965 if (infos[i].cache[l].level == level && infos[i].cache[l].type == type)
966 break;
967 }
968 if (l == infos[i].numcaches) {
969
970 hwloc_bitmap_clr(remaining_cpuset, i);
971 continue;
972 }
973
974 puset = hwloc_bitmap_alloc();
975 hwloc_bitmap_set(puset, i);
976 cache = hwloc_get_next_obj_covering_cpuset_by_type(topology, puset, otype, NULL);
977 hwloc_bitmap_free(puset);
978
979 if (cache) {
980
981 if (!hwloc_obj_get_info_by_name(cache, "Inclusive"))
982 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
983 hwloc_bitmap_andnot(remaining_cpuset, remaining_cpuset, cache->cpuset);
984 } else {
985
986 hwloc_bitmap_t cache_cpuset;
987 unsigned packageid = infos[i].packageid;
988 unsigned cacheid = infos[i].cache[l].cacheid;
989
990 cache_cpuset = hwloc_bitmap_alloc();
991 for (j = i; j < nbprocs; j++) {
992 unsigned l2;
993 for (l2 = 0; l2 < infos[j].numcaches; l2++) {
994 if (infos[j].cache[l2].level == level && infos[j].cache[l2].type == type)
995 break;
996 }
997 if (l2 == infos[j].numcaches) {
998
999 hwloc_bitmap_clr(remaining_cpuset, j);
1000 continue;
1001 }
1002 if (infos[j].packageid == packageid && infos[j].cache[l2].cacheid == cacheid) {
1003 hwloc_bitmap_set(cache_cpuset, j);
1004 hwloc_bitmap_clr(remaining_cpuset, j);
1005 }
1006 }
1007 cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX);
1008 cache->attr->cache.depth = level;
1009 cache->attr->cache.size = infos[i].cache[l].size;
1010 cache->attr->cache.linesize = infos[i].cache[l].linesize;
1011 cache->attr->cache.associativity = infos[i].cache[l].ways;
1012 cache->attr->cache.type = infos[i].cache[l].type;
1013 cache->cpuset = cache_cpuset;
1014 hwloc_obj_add_info(cache, "Inclusive", infos[i].cache[l].inclusive ? "1" : "0");
1015 hwloc_debug_2args_bitmap("os L%u cache %u has cpuset %s\n",
1016 level, cacheid, cache_cpuset);
1017 hwloc_insert_object_by_cpuset(topology, cache);
1018 }
1019 }
1020 }
1021 level--;
1022 }
1023
1024
1025
1026 hwloc_bitmap_free(remaining_cpuset);
1027 hwloc_bitmap_free(complete_cpuset);
1028
1029 if (gotnuma)
1030 topology->support.discovery->numa = 1;
1031 }
1032
1033 static int
1034 look_procs(struct hwloc_backend *backend, struct procinfo *infos, int fulldiscovery,
1035 unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
1036 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
1037 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
1038 {
1039 struct hwloc_x86_backend_data_s *data = backend->private_data;
1040 struct hwloc_topology *topology = backend->topology;
1041 unsigned nbprocs = data->nbprocs;
1042 hwloc_bitmap_t orig_cpuset = NULL;
1043 hwloc_bitmap_t set = NULL;
1044 unsigned i;
1045
1046 if (!data->src_cpuiddump_path) {
1047 orig_cpuset = hwloc_bitmap_alloc();
1048 if (get_cpubind(topology, orig_cpuset, HWLOC_CPUBIND_STRICT)) {
1049 hwloc_bitmap_free(orig_cpuset);
1050 return -1;
1051 }
1052 set = hwloc_bitmap_alloc();
1053 }
1054
1055 for (i = 0; i < nbprocs; i++) {
1056 struct cpuiddump *src_cpuiddump = NULL;
1057 if (data->src_cpuiddump_path) {
1058 src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i);
1059 if (!src_cpuiddump)
1060 continue;
1061 } else {
1062 hwloc_bitmap_only(set, i);
1063 hwloc_debug("binding to CPU%u\n", i);
1064 if (set_cpubind(topology, set, HWLOC_CPUBIND_STRICT)) {
1065 hwloc_debug("could not bind to CPU%u: %s\n", i, strerror(errno));
1066 continue;
1067 }
1068 }
1069
1070 look_proc(backend, &infos[i], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
1071
1072 if (data->src_cpuiddump_path) {
1073 cpuiddump_free(src_cpuiddump);
1074 }
1075 }
1076
1077 if (!data->src_cpuiddump_path) {
1078 set_cpubind(topology, orig_cpuset, 0);
1079 hwloc_bitmap_free(set);
1080 hwloc_bitmap_free(orig_cpuset);
1081 }
1082
1083 if (!data->apicid_unique)
1084 fulldiscovery = 0;
1085 else
1086 summarize(backend, infos, fulldiscovery);
1087 return 0;
1088 }
1089
1090 #if defined HWLOC_FREEBSD_SYS && defined HAVE_CPUSET_SETID
1091 #include <sys/param.h>
1092 #include <sys/cpuset.h>
1093 typedef cpusetid_t hwloc_x86_os_state_t;
1094 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state, struct cpuiddump *src_cpuiddump)
1095 {
1096 if (!src_cpuiddump) {
1097
1098 cpuset_getid(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, state);
1099 cpuset_setid(CPU_WHICH_PID, -1, 0);
1100 }
1101 }
1102 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state, struct cpuiddump *src_cpuiddump)
1103 {
1104 if (!src_cpuiddump) {
1105
1106 cpuset_setid(CPU_WHICH_PID, -1, *state);
1107 }
1108 }
1109 #else
1110 typedef void * hwloc_x86_os_state_t;
1111 static void hwloc_x86_os_state_save(hwloc_x86_os_state_t *state __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump __hwloc_attribute_unused) { }
1112 static void hwloc_x86_os_state_restore(hwloc_x86_os_state_t *state __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump __hwloc_attribute_unused) { }
1113 #endif
1114
1115
1116 #define INTEL_EBX ('G' | ('e'<<8) | ('n'<<16) | ('u'<<24))
1117 #define INTEL_EDX ('i' | ('n'<<8) | ('e'<<16) | ('I'<<24))
1118 #define INTEL_ECX ('n' | ('t'<<8) | ('e'<<16) | ('l'<<24))
1119
1120
1121 #define AMD_EBX ('A' | ('u'<<8) | ('t'<<16) | ('h'<<24))
1122 #define AMD_EDX ('e' | ('n'<<8) | ('t'<<16) | ('i'<<24))
1123 #define AMD_ECX ('c' | ('A'<<8) | ('M'<<16) | ('D'<<24))
1124
1125
1126 #define ZX_EBX ('C' | ('e'<<8) | ('n'<<16) | ('t'<<24))
1127 #define ZX_EDX ('a' | ('u'<<8) | ('r'<<16) | ('H'<<24))
1128 #define ZX_ECX ('a' | ('u'<<8) | ('l'<<16) | ('s'<<24))
1129
1130 #define SH_EBX (' ' | (' '<<8) | ('S'<<16) | ('h'<<24))
1131 #define SH_EDX ('a' | ('n'<<8) | ('g'<<16) | ('h'<<24))
1132 #define SH_ECX ('a' | ('i'<<8) | (' '<<16) | (' '<<24))
1133
1134
1135 static int fake_get_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
1136 hwloc_cpuset_t set __hwloc_attribute_unused,
1137 int flags __hwloc_attribute_unused)
1138 {
1139 return 0;
1140 }
1141 static int fake_set_cpubind(hwloc_topology_t topology __hwloc_attribute_unused,
1142 hwloc_const_cpuset_t set __hwloc_attribute_unused,
1143 int flags __hwloc_attribute_unused)
1144 {
1145 return 0;
1146 }
1147
1148 static
1149 int hwloc_look_x86(struct hwloc_backend *backend, int fulldiscovery)
1150 {
1151 struct hwloc_x86_backend_data_s *data = backend->private_data;
1152 unsigned nbprocs = data->nbprocs;
1153 unsigned eax, ebx, ecx = 0, edx;
1154 unsigned i;
1155 unsigned highest_cpuid;
1156 unsigned highest_ext_cpuid;
1157
1158 unsigned features[10] = { 0 };
1159 struct procinfo *infos = NULL;
1160 enum cpuid_type cpuid_type = unknown;
1161 hwloc_x86_os_state_t os_state;
1162 struct hwloc_binding_hooks hooks;
1163 struct hwloc_topology_support support;
1164 struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
1165 int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL;
1166 int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL;
1167 struct cpuiddump *src_cpuiddump = NULL;
1168 int ret = -1;
1169
1170 if (data->src_cpuiddump_path) {
1171
1172 src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0);
1173 if (!src_cpuiddump)
1174 goto out;
1175
1176 } else {
1177
1178 memset(&hooks, 0, sizeof(hooks));
1179 support.membind = &memsupport;
1180 hwloc_set_native_binding_hooks(&hooks, &support);
1181 if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
1182 get_cpubind = hooks.get_thisthread_cpubind;
1183 set_cpubind = hooks.set_thisthread_cpubind;
1184 } else if (hooks.get_thisproc_cpubind && hooks.set_thisproc_cpubind) {
1185
1186
1187
1188
1189 get_cpubind = hooks.get_thisproc_cpubind;
1190 set_cpubind = hooks.set_thisproc_cpubind;
1191 } else {
1192
1193 if (nbprocs > 1)
1194 goto out;
1195 get_cpubind = fake_get_cpubind;
1196 set_cpubind = fake_set_cpubind;
1197 }
1198 }
1199
1200 if (!src_cpuiddump && !hwloc_have_x86_cpuid())
1201 goto out;
1202
1203 infos = calloc(nbprocs, sizeof(struct procinfo));
1204 if (NULL == infos)
1205 goto out;
1206 for (i = 0; i < nbprocs; i++) {
1207 infos[i].nodeid = (unsigned) -1;
1208 infos[i].packageid = (unsigned) -1;
1209 infos[i].unitid = (unsigned) -1;
1210 infos[i].coreid = (unsigned) -1;
1211 infos[i].threadid = (unsigned) -1;
1212 }
1213
1214 eax = 0x00;
1215 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
1216 highest_cpuid = eax;
1217 if (ebx == INTEL_EBX && ecx == INTEL_ECX && edx == INTEL_EDX)
1218 cpuid_type = intel;
1219 else if (ebx == AMD_EBX && ecx == AMD_ECX && edx == AMD_EDX)
1220 cpuid_type = amd;
1221 else if ((ebx == ZX_EBX && ecx == ZX_ECX && edx == ZX_EDX)
1222 || (ebx == SH_EBX && ecx == SH_ECX && edx == SH_EDX))
1223 cpuid_type = zhaoxin;
1224
1225 hwloc_debug("highest cpuid %x, cpuid type %u\n", highest_cpuid, cpuid_type);
1226 if (highest_cpuid < 0x01) {
1227 goto out_with_infos;
1228 }
1229
1230 eax = 0x01;
1231 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
1232 features[0] = edx;
1233 features[4] = ecx;
1234
1235 eax = 0x80000000;
1236 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
1237 highest_ext_cpuid = eax;
1238
1239 hwloc_debug("highest extended cpuid %x\n", highest_ext_cpuid);
1240
1241 if (highest_cpuid >= 0x7) {
1242 eax = 0x7;
1243 ecx = 0;
1244 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
1245 features[9] = ebx;
1246 }
1247
1248 if (cpuid_type != intel && highest_ext_cpuid >= 0x80000001) {
1249 eax = 0x80000001;
1250 cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
1251 features[1] = edx;
1252 features[6] = ecx;
1253 }
1254
1255 hwloc_x86_os_state_save(&os_state, src_cpuiddump);
1256
1257 ret = look_procs(backend, infos, fulldiscovery,
1258 highest_cpuid, highest_ext_cpuid, features, cpuid_type,
1259 get_cpubind, set_cpubind);
1260 if (!ret)
1261
1262 goto out_with_os_state;
1263
1264 if (nbprocs == 1) {
1265
1266 look_proc(backend, &infos[0], highest_cpuid, highest_ext_cpuid, features, cpuid_type, src_cpuiddump);
1267 summarize(backend, infos, fulldiscovery);
1268 ret = 0;
1269 }
1270
1271 out_with_os_state:
1272 hwloc_x86_os_state_restore(&os_state, src_cpuiddump);
1273
1274 out_with_infos:
1275 if (NULL != infos) {
1276 for (i = 0; i < nbprocs; i++) {
1277 free(infos[i].cache);
1278 free(infos[i].otherids);
1279 }
1280 free(infos);
1281 }
1282
1283 out:
1284 if (src_cpuiddump)
1285 cpuiddump_free(src_cpuiddump);
1286 return ret;
1287 }
1288
1289 static int
1290 hwloc_x86_discover(struct hwloc_backend *backend)
1291 {
1292 struct hwloc_x86_backend_data_s *data = backend->private_data;
1293 struct hwloc_topology *topology = backend->topology;
1294 int alreadypus = 0;
1295 int ret;
1296
1297 #if HAVE_DECL_RUNNING_ON_VALGRIND
1298 if (RUNNING_ON_VALGRIND && !data->src_cpuiddump_path) {
1299 fprintf(stderr, "hwloc x86 backend cannot work under Valgrind, disabling.\n"
1300 "May be reenabled by dumping CPUIDs with hwloc-gather-cpuid\n"
1301 "and reloading them under Valgrind with HWLOC_CPUID_PATH.\n");
1302 return 0;
1303 }
1304 #endif
1305
1306 if (data->src_cpuiddump_path) {
1307 assert(data->nbprocs > 0);
1308 topology->support.discovery->pu = 1;
1309 } else {
1310 int nbprocs = hwloc_fallback_nbprocessors(topology);
1311 if (nbprocs >= 1)
1312 topology->support.discovery->pu = 1;
1313 else
1314 nbprocs = 1;
1315 data->nbprocs = (unsigned) nbprocs;
1316 }
1317
1318 if (topology->levels[0][0]->cpuset) {
1319
1320 if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
1321
1322 alreadypus = 1;
1323 goto fulldiscovery;
1324 }
1325
1326
1327 hwloc_topology_reconnect(topology, 0);
1328 ret = hwloc_look_x86(backend, 0);
1329 if (ret)
1330 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1331 return 0;
1332 } else {
1333
1334 hwloc_alloc_root_sets(topology->levels[0][0]);
1335 }
1336
1337 fulldiscovery:
1338 if (hwloc_look_x86(backend, 1) < 0) {
1339
1340 if (!alreadypus)
1341 hwloc_setup_pu_level(topology, data->nbprocs);
1342 }
1343
1344 hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
1345
1346 if (!data->src_cpuiddump_path) {
1347 #ifdef HAVE_UNAME
1348 hwloc_add_uname_info(topology, NULL);
1349 #else
1350
1351 #ifdef HWLOC_X86_64_ARCH
1352 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86_64");
1353 #else
1354 hwloc_obj_add_info(topology->levels[0][0], "Architecture", "x86");
1355 #endif
1356 #endif
1357 }
1358
1359 return 1;
1360 }
1361
1362 static int
1363 hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t set)
1364 {
1365 #if !(defined HWLOC_WIN_SYS && !defined __MINGW32__)
1366 struct dirent *dirent;
1367 DIR *dir;
1368 FILE *file;
1369 char line [32];
1370
1371 dir = opendir(src_cpuiddump_path);
1372 if (!dir)
1373 return -1;
1374
1375 char path[strlen(src_cpuiddump_path) + strlen("/hwloc-cpuid-info") + 1];
1376 sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
1377 file = fopen(path, "r");
1378 if (!file) {
1379 fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path);
1380 goto out_with_dir;
1381 }
1382 if (!fgets(line, sizeof(line), file)) {
1383 fprintf(stderr, "Found read dumped cpuid summary in %s\n", path);
1384 fclose(file);
1385 goto out_with_dir;
1386 }
1387 fclose(file);
1388 if (strcmp(line, "Architecture: x86\n")) {
1389 fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
1390 goto out_with_dir;
1391 }
1392
1393 while ((dirent = readdir(dir)) != NULL) {
1394 if (!strncmp(dirent->d_name, "pu", 2)) {
1395 char *end;
1396 unsigned long idx = strtoul(dirent->d_name+2, &end, 10);
1397 if (!*end)
1398 hwloc_bitmap_set(set, idx);
1399 else
1400 fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
1401 dirent->d_name, src_cpuiddump_path);
1402 }
1403 }
1404 closedir(dir);
1405
1406 if (hwloc_bitmap_iszero(set)) {
1407 fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
1408 src_cpuiddump_path);
1409 return -1;
1410 } else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) {
1411
1412 fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
1413 src_cpuiddump_path);
1414 return -1;
1415 }
1416
1417 return 0;
1418
1419 out_with_dir:
1420 closedir(dir);
1421 #endif
1422 return -1;
1423 }
1424
1425 static void
1426 hwloc_x86_backend_disable(struct hwloc_backend *backend)
1427 {
1428 struct hwloc_x86_backend_data_s *data = backend->private_data;
1429 hwloc_bitmap_free(data->apicid_set);
1430 free(data->src_cpuiddump_path);
1431 free(data);
1432 }
1433
1434 static struct hwloc_backend *
1435 hwloc_x86_component_instantiate(struct hwloc_disc_component *component,
1436 const void *_data1 __hwloc_attribute_unused,
1437 const void *_data2 __hwloc_attribute_unused,
1438 const void *_data3 __hwloc_attribute_unused)
1439 {
1440 struct hwloc_backend *backend;
1441 struct hwloc_x86_backend_data_s *data;
1442 const char *src_cpuiddump_path;
1443
1444 backend = hwloc_backend_alloc(component);
1445 if (!backend)
1446 goto out;
1447
1448 data = malloc(sizeof(*data));
1449 if (!data) {
1450 errno = ENOMEM;
1451 goto out_with_backend;
1452 }
1453
1454 backend->private_data = data;
1455 backend->discover = hwloc_x86_discover;
1456 backend->disable = hwloc_x86_backend_disable;
1457
1458
1459 data->is_knl = 0;
1460 data->apicid_set = hwloc_bitmap_alloc();
1461 data->apicid_unique = 1;
1462 data->src_cpuiddump_path = NULL;
1463
1464 src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
1465 if (src_cpuiddump_path) {
1466 hwloc_bitmap_t set = hwloc_bitmap_alloc();
1467 if (!hwloc_x86_check_cpuiddump_input(src_cpuiddump_path, set)) {
1468 backend->is_thissystem = 0;
1469 data->src_cpuiddump_path = strdup(src_cpuiddump_path);
1470 assert(!hwloc_bitmap_iszero(set));
1471 data->nbprocs = hwloc_bitmap_weight(set);
1472 } else {
1473 fprintf(stderr, "Ignoring dumped cpuid directory.\n");
1474 }
1475 hwloc_bitmap_free(set);
1476 }
1477
1478 return backend;
1479
1480 out_with_backend:
1481 free(backend);
1482 out:
1483 return NULL;
1484 }
1485
1486 static struct hwloc_disc_component hwloc_x86_disc_component = {
1487 HWLOC_DISC_COMPONENT_TYPE_CPU,
1488 "x86",
1489 HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
1490 hwloc_x86_component_instantiate,
1491 45,
1492 1,
1493 NULL
1494 };
1495
1496 const struct hwloc_component hwloc_x86_component = {
1497 HWLOC_COMPONENT_ABI,
1498 NULL, NULL,
1499 HWLOC_COMPONENT_TYPE_DISC,
1500 0,
1501 &hwloc_x86_disc_component
1502 };