1 /*
2 * Copyright © 2010-2017 Inria. All rights reserved.
3 * Copyright © 2010-2011 Université Bordeaux
4 * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
5 * See COPYING in top-level directory.
6 */
7
8 /** \file
9 * \brief Macros to help interaction between hwloc and the CUDA Runtime API.
10 *
11 * Applications that use both hwloc and the CUDA Runtime API may want to
12 * include this file so as to get topology information for CUDA devices.
13 *
14 */
15
16 #ifndef HWLOC_CUDART_H
17 #define HWLOC_CUDART_H
18
19 #include <hwloc.h>
20 #include <hwloc/autogen/config.h>
21 #include <hwloc/helper.h>
22 #ifdef HWLOC_LINUX_SYS
23 #include <hwloc/linux.h>
24 #endif
25
26 #include <cuda.h> /* for CUDA_VERSION */
27 #include <cuda_runtime_api.h>
28
29
30 #ifdef __cplusplus
31 extern "C" {
32 #endif
33
34
35 /** \defgroup hwlocality_cudart Interoperability with the CUDA Runtime API
36 *
37 * This interface offers ways to retrieve topology information about
38 * CUDA devices when using the CUDA Runtime API.
39 *
40 * @{
41 */
42
43 /** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
44 *
45 * Device index \p idx must match the local machine.
46 */
47 static __hwloc_inline int
48 hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
49 int idx, int *domain, int *bus, int *dev)
50 {
51 cudaError_t cerr;
52 struct cudaDeviceProp prop;
53
54 cerr = cudaGetDeviceProperties(&prop, idx);
55 if (cerr) {
56 errno = ENOSYS;
57 return -1;
58 }
59
60 #if CUDA_VERSION >= 4000
61 *domain = prop.pciDomainID;
62 #else
63 *domain = 0;
64 #endif
65
66 *bus = prop.pciBusID;
67 *dev = prop.pciDeviceID;
68
69 return 0;
70 }
71
72 /** \brief Get the CPU set of logical processors that are physically
73 * close to device \p idx.
74 *
75 * Return the CPU set describing the locality of the CUDA device
76 * whose index is \p idx.
77 *
78 * Topology \p topology and device \p idx must match the local machine.
79 * I/O devices detection and the CUDA component are not needed in the topology.
80 *
81 * The function only returns the locality of the device.
82 * If more information about the device is needed, OS objects should
83 * be used instead, see hwloc_cudart_get_device_osdev_by_index().
84 *
85 * This function is currently only implemented in a meaningful way for
86 * Linux; other systems will simply get a full cpuset.
87 */
88 static __hwloc_inline int
89 hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
90 int idx, hwloc_cpuset_t set)
91 {
92 #ifdef HWLOC_LINUX_SYS
93 /* If we're on Linux, use the sysfs mechanism to get the local cpus */
94 #define HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX 128
95 char path[HWLOC_CUDART_DEVICE_SYSFS_PATH_MAX];
96 int domain, bus, dev;
97
98 if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
99 return -1;
100
101 if (!hwloc_topology_is_thissystem(topology)) {
102 errno = EINVAL;
103 return -1;
104 }
105
106 sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", (unsigned) domain, (unsigned) bus, (unsigned) dev);
107 if (hwloc_linux_read_path_as_cpumask(path, set) < 0
108 || hwloc_bitmap_iszero(set))
109 hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
110 #else
111 /* Non-Linux systems simply get a full cpuset */
112 hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
113 #endif
114 return 0;
115 }
116
117 /** \brief Get the hwloc PCI device object corresponding to the
118 * CUDA device whose index is \p idx.
119 *
120 * Return the PCI device object describing the CUDA device whose
121 * index is \p idx. Return NULL if there is none.
122 *
123 * Topology \p topology and device \p idx must match the local machine.
124 * I/O devices detection must be enabled in topology \p topology.
125 * The CUDA component is not needed in the topology.
126 */
127 static __hwloc_inline hwloc_obj_t
128 hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
129 {
130 int domain, bus, dev;
131
132 if (hwloc_cudart_get_device_pci_ids(topology, idx, &domain, &bus, &dev))
133 return NULL;
134
135 return hwloc_get_pcidev_by_busid(topology, domain, bus, dev, 0);
136 }
137
138 /** \brief Get the hwloc OS device object corresponding to the
139 * CUDA device whose index is \p idx.
140 *
141 * Return the OS device object describing the CUDA device whose
142 * index is \p idx. Return NULL if there is none.
143 *
144 * The topology \p topology does not necessarily have to match the current
145 * machine. For instance the topology may be an XML import of a remote host.
146 * I/O devices detection and the CUDA component must be enabled in the topology.
147 * If not, the locality of the object may still be found using
148 * hwloc_cudart_get_device_cpuset().
149 *
150 * \note The corresponding PCI device object can be obtained by looking
151 * at the OS device parent object (unless PCI devices are filtered out).
152 *
153 * \note This function is identical to hwloc_cuda_get_device_osdev_by_index().
154 */
155 static __hwloc_inline hwloc_obj_t
156 hwloc_cudart_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
157 {
158 hwloc_obj_t osdev = NULL;
159 while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
160 if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
161 && osdev->name
162 && !strncmp("cuda", osdev->name, 4)
163 && atoi(osdev->name + 4) == (int) idx)
164 return osdev;
165 }
166 return NULL;
167 }
168
169 /** @} */
170
171
172 #ifdef __cplusplus
173 } /* extern "C" */
174 #endif
175
176
177 #endif /* HWLOC_CUDART_H */