This source file includes following definitions.
- mca_mpool_hugepage_register
- mca_mpool_hugepage_open
- mca_mpool_hugepage_close
- page_compare
- mca_mpool_hugepage_find_hugepages
- mca_mpool_hugepage_query
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28 #define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
29 #include "opal_config.h"
30 #include "opal/mca/base/base.h"
31 #include "opal/runtime/opal_params.h"
32 #include "opal/mca/base/mca_base_pvar.h"
33 #include "opal/mca/mpool/base/base.h"
34 #include "opal/mca/allocator/base/base.h"
35
36 #include "opal/util/argv.h"
37
38 #include "mpool_hugepage.h"
39
40 #ifdef HAVE_UNISTD_H
41 #include <unistd.h>
42 #endif
43 #ifdef HAVE_MALLOC_H
44 #include <malloc.h>
45 #endif
46 #ifdef HAVE_SYS_VFS_H
47 #include <sys/vfs.h>
48 #endif
49 #ifdef HAVE_SYS_MOUNT_H
50 #include <sys/mount.h>
51 #endif
52 #ifdef HAVE_SYS_PARAM_H
53 #include <sys/param.h>
54 #endif
55 #ifdef HAVE_SYS_MMAN_H
56 #include <sys/mman.h>
57 #endif
58 #ifdef HAVE_MNTENT_H
59 #include <mntent.h>
60 #endif
61
62 #include <fcntl.h>
63
64
65
66
67
68
69 #if defined(HAVE_STATFS) && (defined(HAVE_STRUCT_STATFS_F_FSTYPENAME) || \
70 defined(HAVE_STRUCT_STATFS_F_TYPE))
71 #define USE_STATFS 1
72 #endif
73
74
75
76
77
78 static int mca_mpool_hugepage_open (void);
79 static int mca_mpool_hugepage_close (void);
80 static int mca_mpool_hugepage_register (void);
81 static int mca_mpool_hugepage_query (const char *hints, int *priority,
82 mca_mpool_base_module_t **module);
83 static void mca_mpool_hugepage_find_hugepages (void);
84
85 static int mca_mpool_hugepage_priority;
86 static unsigned long mca_mpool_hugepage_page_size;
87
88 mca_mpool_hugepage_component_t mca_mpool_hugepage_component = {
89 {
90
91
92
93 .mpool_version ={
94 MCA_MPOOL_BASE_VERSION_3_0_0,
95
96 .mca_component_name = "hugepage",
97 MCA_BASE_MAKE_VERSION(component, OPAL_MAJOR_VERSION, OPAL_MINOR_VERSION,
98 OPAL_RELEASE_VERSION),
99 .mca_open_component = mca_mpool_hugepage_open,
100 .mca_close_component = mca_mpool_hugepage_close,
101 .mca_register_component_params = mca_mpool_hugepage_register,
102 },
103 .mpool_data = {
104
105 MCA_BASE_METADATA_PARAM_CHECKPOINT
106 },
107
108 .mpool_query = mca_mpool_hugepage_query,
109 },
110 };
111
112
113
114
115
116 static int mca_mpool_hugepage_register(void)
117 {
118 mca_mpool_hugepage_priority = 50;
119 (void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
120 "priority", "Default priority of the hugepage mpool component "
121 "(default: 50)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
122 OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
123 &mca_mpool_hugepage_priority);
124
125 mca_mpool_hugepage_page_size = 1 << 21;
126 (void) mca_base_component_var_register (&mca_mpool_hugepage_component.super.mpool_version,
127 "page_size", "Default huge page size of the hugepage mpool component "
128 "(default: 2M)", MCA_BASE_VAR_TYPE_INT, NULL, 0, 0,
129 OPAL_INFO_LVL_9, MCA_BASE_VAR_SCOPE_LOCAL,
130 &mca_mpool_hugepage_page_size);
131
132 mca_mpool_hugepage_component.bytes_allocated = 0;
133 (void) mca_base_component_pvar_register (&mca_mpool_hugepage_component.super.mpool_version,
134 "bytes_allocated", "Number of bytes currently allocated in the mpool "
135 "hugepage component", OPAL_INFO_LVL_3, MCA_BASE_PVAR_CLASS_SIZE,
136 MCA_BASE_VAR_TYPE_UNSIGNED_LONG, NULL, MCA_BASE_VAR_BIND_NO_OBJECT,
137 MCA_BASE_PVAR_FLAG_READONLY | MCA_BASE_PVAR_FLAG_CONTINUOUS,
138 NULL, NULL, NULL, (void *) &mca_mpool_hugepage_component.bytes_allocated);
139
140 return OPAL_SUCCESS;
141 }
142
143 static int mca_mpool_hugepage_open (void)
144 {
145 mca_mpool_hugepage_module_t *hugepage_module;
146 mca_mpool_hugepage_hugepage_t *hp;
147 int module_index, rc;
148
149 OBJ_CONSTRUCT(&mca_mpool_hugepage_component.huge_pages, opal_list_t);
150 mca_mpool_hugepage_find_hugepages ();
151
152 if (0 == opal_list_get_size (&mca_mpool_hugepage_component.huge_pages)) {
153 return OPAL_SUCCESS;
154 }
155
156 mca_mpool_hugepage_component.modules = (mca_mpool_hugepage_module_t *)
157 calloc (opal_list_get_size (&mca_mpool_hugepage_component.huge_pages),
158 sizeof (mca_mpool_hugepage_module_t));
159 if (NULL == mca_mpool_hugepage_component.modules) {
160 return OPAL_ERR_OUT_OF_RESOURCE;
161 }
162
163 module_index = 0;
164 OPAL_LIST_FOREACH(hp, &mca_mpool_hugepage_component.huge_pages, mca_mpool_hugepage_hugepage_t) {
165 hugepage_module = mca_mpool_hugepage_component.modules + module_index;
166 rc = mca_mpool_hugepage_module_init (hugepage_module, hp);
167 if (OPAL_SUCCESS != rc) {
168 continue;
169 }
170 module_index++;
171 }
172
173 mca_mpool_hugepage_component.module_count = module_index;
174
175 return OPAL_SUCCESS;
176 }
177
178 static int mca_mpool_hugepage_close (void)
179 {
180 OPAL_LIST_DESTRUCT(&mca_mpool_hugepage_component.huge_pages);
181
182 for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
183 mca_mpool_hugepage_module_t *module = mca_mpool_hugepage_component.modules + i;
184 module->super.mpool_finalize (&module->super);
185 }
186
187 free (mca_mpool_hugepage_component.modules);
188 mca_mpool_hugepage_component.modules = NULL;
189
190 return OPAL_SUCCESS;
191 }
192
193 #ifdef HAVE_MNTENT_H
194 static int page_compare (opal_list_item_t **a, opal_list_item_t **b) {
195 mca_mpool_hugepage_hugepage_t *pagea = (mca_mpool_hugepage_hugepage_t *) *a;
196 mca_mpool_hugepage_hugepage_t *pageb = (mca_mpool_hugepage_hugepage_t *) *b;
197 if (pagea->page_size > pageb->page_size) {
198 return 1;
199 } else if (pagea->page_size < pageb->page_size) {
200 return -1;
201 }
202
203 return 0;
204 }
205 #endif
206
207 static void mca_mpool_hugepage_find_hugepages (void) {
208 #ifdef HAVE_MNTENT_H
209 mca_mpool_hugepage_hugepage_t *hp;
210 FILE *fh;
211 struct mntent *mntent;
212 char *opts, *tok, *ctx;
213
214 fh = setmntent ("/proc/mounts", "r");
215 if (NULL == fh) {
216 return;
217 }
218
219 while (NULL != (mntent = getmntent(fh))) {
220 unsigned long page_size = 0;
221
222 if (0 != strcmp(mntent->mnt_type, "hugetlbfs")) {
223 continue;
224 }
225
226 opts = strdup(mntent->mnt_opts);
227 if (NULL == opts) {
228 break;
229 }
230
231 tok = strtok_r (opts, ",", &ctx);
232
233 do {
234 if (0 == strncmp (tok, "pagesize", 8)) {
235 break;
236 }
237 tok = strtok_r (NULL, ",", &ctx);
238 } while (tok);
239
240 if (!tok) {
241 #if defined(USE_STATFS)
242 struct statfs info;
243
244 statfs (mntent->mnt_dir, &info);
245 #elif defined(HAVE_STATVFS)
246 struct statvfs info;
247 statvfs (mntent->mnt_dir, &info);
248 #endif
249 page_size = info.f_bsize;
250 } else {
251 (void) sscanf (tok, "pagesize=%lu", &page_size);
252 }
253 free(opts);
254
255 if (0 == page_size) {
256
257 continue;
258 }
259
260 hp = OBJ_NEW(mca_mpool_hugepage_hugepage_t);
261 if (NULL == hp) {
262 break;
263 }
264
265 hp->path = strdup (mntent->mnt_dir);
266 hp->page_size = page_size;
267
268 if(0 == access (hp->path, R_OK | W_OK)){
269 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
270 "found huge page with size = %lu, path = %s, mmap flags = 0x%x, adding to list",
271 hp->page_size, hp->path, hp->mmap_flags);
272 opal_list_append (&mca_mpool_hugepage_component.huge_pages, &hp->super);
273 } else {
274 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
275 "found huge page with size = %lu, path = %s, mmap flags = 0x%x, with invalid "
276 "permissions, skipping", hp->page_size, hp->path, hp->mmap_flags);
277 OBJ_RELEASE(hp);
278 }
279 }
280
281 opal_list_sort (&mca_mpool_hugepage_component.huge_pages, page_compare);
282
283 endmntent (fh);
284 #endif
285 }
286
287 static int mca_mpool_hugepage_query (const char *hints, int *priority_out,
288 mca_mpool_base_module_t **module)
289 {
290 unsigned long page_size = 0;
291 char **hints_array;
292 int my_priority = mca_mpool_hugepage_priority;
293 char *tmp;
294 bool found = false;
295
296 if (0 == mca_mpool_hugepage_component.module_count) {
297 return OPAL_ERR_NOT_AVAILABLE;
298 }
299
300 if (hints) {
301 hints_array = opal_argv_split (hints, ',');
302 if (NULL == hints_array) {
303 return OPAL_ERR_OUT_OF_RESOURCE;
304 }
305
306 for (int i = 0 ; hints_array[i] ; ++i) {
307 char *key = hints_array[i];
308 char *value = NULL;
309
310 if (NULL != (tmp = strchr (key, '='))) {
311 value = tmp + 1;
312 *tmp = '\0';
313 }
314
315 if (0 == strcasecmp ("mpool", key)) {
316 if (value && 0 == strcasecmp ("hugepage", value)) {
317
318 my_priority = 100;
319 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
320 "hugepage mpool matches hint: %s=%s", key, value);
321 } else {
322
323 my_priority = 0;
324 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
325 "hugepage mpool does not match hint: %s=%s", key, value);
326 opal_argv_free (hints_array);
327 return OPAL_ERR_NOT_FOUND;
328 }
329 }
330
331 if (0 == strcasecmp ("page_size", key) && value) {
332 page_size = strtoul (value, &tmp, 0);
333 if (*tmp) {
334 switch (*tmp) {
335 case 'g':
336 case 'G':
337 page_size *= 1024;
338
339 case 'm':
340 case 'M':
341 page_size *= 1024;
342
343 case 'k':
344 case 'K':
345 page_size *= 1024;
346 break;
347 default:
348 page_size = -1;
349 }
350 }
351 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
352 "hugepage mpool requested page size: %lu", page_size);
353 }
354 }
355
356 opal_argv_free (hints_array);
357 }
358
359 if (0 == page_size) {
360
361 page_size = mca_mpool_hugepage_page_size;
362 if (my_priority < 100) {
363
364 my_priority = 0;
365 }
366 opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
367 "hugepage mpool did not match any hints: %s", hints);
368 }
369
370 for (int i = 0 ; i < mca_mpool_hugepage_component.module_count ; ++i) {
371 mca_mpool_hugepage_module_t *hugepage_module = mca_mpool_hugepage_component.modules + i;
372
373 if (hugepage_module->huge_page->page_size != page_size) {
374 continue;
375 }
376
377 my_priority = (my_priority < 80) ? my_priority + 20 : 100;
378
379 if (module) {
380 *module = &hugepage_module->super;
381 }
382
383 opal_output_verbose (MCA_BASE_VERBOSE_INFO, opal_mpool_base_framework.framework_output,
384 "matches page size hint. page size: %lu, path: %s, mmap flags: "
385 "0x%x", page_size, hugepage_module->huge_page->path,
386 hugepage_module->huge_page->mmap_flags);
387 found = true;
388 break;
389 }
390
391 if (!found) {
392 opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_output,
393 "could not find page matching page request: %lu", page_size);
394 return OPAL_ERR_NOT_FOUND;
395 }
396
397 if (priority_out) {
398 *priority_out = my_priority;
399 }
400
401 return OPAL_SUCCESS;
402 }