This source file includes following definitions.
- hwloc_cuda_cores_per_MP
- hwloc_cuda_discover
- hwloc_cuda_component_instantiate
- hwloc_cuda_component_init
1
2
3
4
5
6
7 #include <private/autogen/config.h>
8 #include <hwloc.h>
9 #include <hwloc/plugins.h>
10 #include <hwloc/cudart.h>
11
12
13 #include <private/misc.h>
14 #include <private/debug.h>
15
16 #include <cuda_runtime_api.h>
17
18 static unsigned hwloc_cuda_cores_per_MP(int major, int minor)
19 {
20
21 switch (major) {
22 case 1:
23 switch (minor) {
24 case 0:
25 case 1:
26 case 2:
27 case 3: return 8;
28 }
29 break;
30 case 2:
31 switch (minor) {
32 case 0: return 32;
33 case 1: return 48;
34 }
35 break;
36 case 3:
37 return 192;
38 case 5:
39 return 128;
40 case 6:
41 switch (minor) {
42 case 0: return 64;
43 case 1:
44 case 2: return 128;
45 }
46 break;
47 case 7:
48 return 64;
49 }
50 hwloc_debug("unknown compute capability %d.%d, disabling core display.\n", major, minor);
51 return 0;
52 }
53
54 static int
55 hwloc_cuda_discover(struct hwloc_backend *backend)
56 {
57 struct hwloc_topology *topology = backend->topology;
58 enum hwloc_type_filter_e filter;
59 cudaError_t cures;
60 int nb, i;
61
62 hwloc_topology_get_type_filter(topology, HWLOC_OBJ_OS_DEVICE, &filter);
63 if (filter == HWLOC_TYPE_FILTER_KEEP_NONE)
64 return 0;
65
66 cures = cudaGetDeviceCount(&nb);
67 if (cures)
68 return -1;
69
70 for (i = 0; i < nb; i++) {
71 int domain, bus, dev;
72 char cuda_name[32];
73 char number[32];
74 struct cudaDeviceProp prop;
75 hwloc_obj_t cuda_device, parent;
76 unsigned cores;
77
78 cuda_device = hwloc_alloc_setup_object(topology, HWLOC_OBJ_OS_DEVICE, HWLOC_UNKNOWN_INDEX);
79 snprintf(cuda_name, sizeof(cuda_name), "cuda%d", i);
80 cuda_device->name = strdup(cuda_name);
81 cuda_device->depth = HWLOC_TYPE_DEPTH_UNKNOWN;
82 cuda_device->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
83
84 cuda_device->subtype = strdup("CUDA");
85 hwloc_obj_add_info(cuda_device, "Backend", "CUDA");
86 hwloc_obj_add_info(cuda_device, "GPUVendor", "NVIDIA Corporation");
87
88 cures = cudaGetDeviceProperties(&prop, i);
89 if (!cures && prop.name[0] != '\0')
90 hwloc_obj_add_info(cuda_device, "GPUModel", prop.name);
91
92 snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.totalGlobalMem) >> 10);
93 hwloc_obj_add_info(cuda_device, "CUDAGlobalMemorySize", number);
94
95 snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.l2CacheSize) >> 10);
96 hwloc_obj_add_info(cuda_device, "CUDAL2CacheSize", number);
97
98 snprintf(number, sizeof(number), "%d", prop.multiProcessorCount);
99 hwloc_obj_add_info(cuda_device, "CUDAMultiProcessors", number);
100
101 cores = hwloc_cuda_cores_per_MP(prop.major, prop.minor);
102 if (cores) {
103 snprintf(number, sizeof(number), "%u", cores);
104 hwloc_obj_add_info(cuda_device, "CUDACoresPerMP", number);
105 }
106
107 snprintf(number, sizeof(number), "%llu", ((unsigned long long) prop.sharedMemPerBlock) >> 10);
108 hwloc_obj_add_info(cuda_device, "CUDASharedMemorySizePerMP", number);
109
110 parent = NULL;
111 if (hwloc_cudart_get_device_pci_ids(NULL , i, &domain, &bus, &dev) == 0) {
112 parent = hwloc_pcidisc_find_by_busid(topology, domain, bus, dev, 0);
113 if (!parent)
114 parent = hwloc_pcidisc_find_busid_parent(topology, domain, bus, dev, 0);
115 }
116 if (!parent)
117 parent = hwloc_get_root_obj(topology);
118
119 hwloc_insert_object_by_parent(topology, parent, cuda_device);
120 }
121
122 return 0;
123 }
124
125 static struct hwloc_backend *
126 hwloc_cuda_component_instantiate(struct hwloc_disc_component *component,
127 const void *_data1 __hwloc_attribute_unused,
128 const void *_data2 __hwloc_attribute_unused,
129 const void *_data3 __hwloc_attribute_unused)
130 {
131 struct hwloc_backend *backend;
132
133 backend = hwloc_backend_alloc(component);
134 if (!backend)
135 return NULL;
136
137 backend->discover = hwloc_cuda_discover;
138 return backend;
139 }
140
141 static struct hwloc_disc_component hwloc_cuda_disc_component = {
142 HWLOC_DISC_COMPONENT_TYPE_MISC,
143 "cuda",
144 HWLOC_DISC_COMPONENT_TYPE_GLOBAL,
145 hwloc_cuda_component_instantiate,
146 10,
147 1,
148 NULL
149 };
150
151 static int
152 hwloc_cuda_component_init(unsigned long flags)
153 {
154 if (flags)
155 return -1;
156 if (hwloc_plugin_check_namespace("cuda", "hwloc_backend_alloc") < 0)
157 return -1;
158 return 0;
159 }
160
161 #ifdef HWLOC_INSIDE_PLUGIN
162 HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
163 #endif
164
165 const struct hwloc_component hwloc_cuda_component = {
166 HWLOC_COMPONENT_ABI,
167 hwloc_cuda_component_init, NULL,
168 HWLOC_COMPONENT_TYPE_DISC,
169 0,
170 &hwloc_cuda_disc_component
171 };