This source file includes following definitions.
- get_distance_matrix
- find_numa_node
- find_my_numa_node
- find_device_numa
- opal_btl_usnic_hwloc_distance
1
2
3
4
5
6
7
8
9
10
11 #include "opal_config.h"
12
13 #include "opal/mca/hwloc/base/base.h"
14 #include "opal/constants.h"
15
16 #include "opal/mca/btl/base/base.h"
17
18 #include "btl_usnic_hwloc.h"
19
20
21
22
23 static hwloc_obj_t my_numa_node = NULL;
24 static int num_numa_nodes = 0;
25 static struct hwloc_distances_s *matrix = NULL;
26 #if HWLOC_API_VERSION >= 0x20000
27 static unsigned int matrix_nr = 1;
28 #endif
29
30
31
32
33 static int get_distance_matrix(void)
34 {
35 #if HWLOC_API_VERSION < 0x20000
36
37
38
39 if (NULL == matrix) {
40 matrix = hwloc_get_whole_distance_matrix_by_type(opal_hwloc_topology,
41 HWLOC_OBJ_NODE);
42 }
43
44 return (NULL == matrix) ? OPAL_ERROR : OPAL_SUCCESS;
45 #else
46 if (0 != hwloc_distances_get_by_type(opal_hwloc_topology, HWLOC_OBJ_NODE,
47 &matrix_nr, &matrix,
48 HWLOC_DISTANCES_KIND_MEANS_LATENCY, 0) || 0 == matrix_nr) {
49 return OPAL_ERROR;
50 }
51 return OPAL_SUCCESS;
52 #endif
53 }
54
55
56
57
58 static hwloc_obj_t find_numa_node(hwloc_bitmap_t cpuset)
59 {
60 hwloc_obj_t obj;
61
62 obj =
63 hwloc_get_first_largest_obj_inside_cpuset(opal_hwloc_topology, cpuset);
64
65
66 while (obj->type > HWLOC_OBJ_NODE &&
67 NULL != obj->parent) {
68 obj = obj->parent;
69 }
70
71
72 if (obj->type != HWLOC_OBJ_NODE) {
73 opal_output_verbose(5, USNIC_OUT,
74 "btl:usnic:filter_numa: could not find NUMA node where this process is bound; filtering by NUMA distance not possible");
75 return NULL;
76 }
77
78
79
80 if (hwloc_get_nbobjs_inside_cpuset_by_type(opal_hwloc_topology,
81 cpuset, HWLOC_OBJ_NODE) > 1) {
82 opal_output_verbose(5, USNIC_OUT,
83 "btl:usnic:filter_numa: this process is bound to more than 1 NUMA node; filtering by NUMA distance not possible");
84 return NULL;
85 }
86
87 return obj;
88 }
89
90
91
92
93
94
95
96
97
98
99 static int find_my_numa_node(void)
100 {
101 hwloc_obj_t obj;
102 hwloc_bitmap_t cpuset;
103
104 if (NULL != my_numa_node) {
105 return OPAL_SUCCESS;
106 }
107
108
109 cpuset = hwloc_bitmap_alloc();
110 if (NULL == cpuset) {
111 return OPAL_ERR_OUT_OF_RESOURCE;
112 }
113 if (0 != hwloc_get_cpubind(opal_hwloc_topology, cpuset, 0)) {
114 hwloc_bitmap_free(cpuset);
115 return OPAL_ERR_NOT_AVAILABLE;
116 }
117
118
119 obj = find_numa_node(cpuset);
120 hwloc_bitmap_free(cpuset);
121 if (NULL == obj) {
122 return OPAL_ERR_NOT_AVAILABLE;
123 }
124
125
126 my_numa_node = obj;
127 num_numa_nodes = hwloc_get_nbobjs_by_type(opal_hwloc_topology,
128 HWLOC_OBJ_NODE);
129 return OPAL_SUCCESS;
130
131 }
132
133
134
135
136 static hwloc_obj_t find_device_numa(opal_btl_usnic_module_t *module)
137 {
138 struct fi_usnic_info *uip;
139 hwloc_obj_t obj;
140
141
142 assert(NULL != matrix);
143 assert(NULL != my_numa_node);
144
145 uip = &module->usnic_info;
146
147
148
149
150 obj = NULL;
151 while (NULL != (obj = hwloc_get_next_osdev(opal_hwloc_topology, obj))) {
152 assert(HWLOC_OBJ_OS_DEVICE == obj->type);
153 if (0 == strcmp(obj->name, uip->ui.v1.ui_ifname)) {
154 break;
155 }
156 }
157
158
159 if (NULL == obj) {
160 return NULL;
161 }
162
163
164
165 while (obj->type > HWLOC_OBJ_NODE &&
166 NULL != obj->parent) {
167 obj = obj->parent;
168 }
169
170
171 if (obj->type != HWLOC_OBJ_NODE) {
172 opal_output_verbose(5, USNIC_OUT,
173 "btl:usnic:filter_numa: could not find NUMA node for %s; filtering by NUMA distance not possible",
174 module->linux_device_name);
175 return NULL;
176 }
177
178 return obj;
179 }
180
181
182
183
184
185 int opal_btl_usnic_hwloc_distance(opal_btl_usnic_module_t *module)
186 {
187 int ret;
188 hwloc_obj_t dev_numa;
189
190
191 assert(NULL != module);
192
193
194 if (!proc_bound()) {
195 opal_output_verbose(5, USNIC_OUT,
196 "btl:usnic:filter_numa: not sorting devices by NUMA distance (process not bound)");
197 return OPAL_SUCCESS;
198 }
199
200 opal_output_verbose(5, USNIC_OUT,
201 "btl:usnic:filter_numa: filtering devices by NUMA distance");
202
203
204 if (OPAL_SUCCESS !=- opal_hwloc_base_get_topology()) {
205 opal_output_verbose(5, USNIC_OUT,
206 "btl:usnic:filter_numa: not sorting devices by NUMA distance (topology not available)");
207 return OPAL_SUCCESS;
208 }
209
210
211 if (OPAL_SUCCESS != (ret = get_distance_matrix())) {
212 return ret;
213 }
214
215
216 if (OPAL_SUCCESS != (ret = find_my_numa_node())) {
217 return ret;
218 }
219
220
221 if (NULL == my_numa_node) {
222 return OPAL_SUCCESS;
223 }
224
225
226 dev_numa = find_device_numa(module);
227
228
229
230 #if HWLOC_API_VERSION < 0x20000
231 if (NULL != dev_numa) {
232 module->numa_distance =
233 matrix->latency[dev_numa->logical_index * num_numa_nodes +
234 my_numa_node->logical_index];
235
236 opal_output_verbose(5, USNIC_OUT,
237 "btl:usnic:filter_numa: %s is distance %d from me",
238 module->linux_device_name,
239 module->numa_distance);
240 }
241 #else
242 if (NULL != dev_numa) {
243 int myindex, devindex;
244 unsigned int j;
245 myindex = -1;
246 for (j=0; j < matrix_nr; j++) {
247 if (my_numa_node == matrix->objs[j]) {
248 myindex = j;
249 break;
250 }
251 }
252 if (-1 == myindex) {
253 return OPAL_SUCCESS;
254 }
255 devindex = -1;
256 for (j=0; j < matrix_nr; j++) {
257 if (dev_numa == matrix->objs[j]) {
258 devindex = j;
259 break;
260 }
261 }
262 if (-1 == devindex) {
263 return OPAL_SUCCESS;
264 }
265
266 module->numa_distance =
267 matrix->values[(devindex * num_numa_nodes) + myindex];
268
269 opal_output_verbose(5, USNIC_OUT,
270 "btl:usnic:filter_numa: %s is distance %d from me",
271 module->linux_device_name,
272 module->numa_distance);
273 }
274 #endif
275
276 return OPAL_SUCCESS;
277 }