This source file includes following definitions.
- mca_mpool_hugepage_hugepage_constructor
- mca_mpool_hugepage_hugepage_destructor
- mca_mpool_rb_hugepage_compare
- mca_mpool_hugepage_module_init
- mca_mpool_hugepage_seg_alloc
- mca_mpool_hugepage_seg_free
- mca_mpool_hugepage_alloc
- mca_mpool_hugepage_realloc
- mca_mpool_hugepage_free
- mca_mpool_hugepage_finalize
- mca_mpool_hugepage_ft_event
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 #define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
31 #include "opal_config.h"
32 #include "opal/align.h"
33 #include "mpool_hugepage.h"
34 #include <errno.h>
35 #include <string.h>
36 #ifdef HAVE_MALLOC_H
37 #include <malloc.h>
38 #endif
39 #include "opal/mca/mpool/base/base.h"
40 #include "opal/runtime/opal_params.h"
41 #include "opal/include/opal_stdint.h"
42 #include "opal/mca/allocator/base/base.h"
43 #include "opal/util/printf.h"
44
45 #include <fcntl.h>
46 #include <sys/mman.h>
47
48
49 static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size, size_t align,
50 uint32_t flags);
51 static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size);
52 static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr);
53 static void mca_mpool_hugepage_finalize (mca_mpool_base_module_t *mpool);
54 static int mca_mpool_hugepage_ft_event (int state);
55
56 static void mca_mpool_hugepage_hugepage_constructor (mca_mpool_hugepage_hugepage_t *huge_page)
57 {
58 memset ((char *)huge_page + sizeof(huge_page->super), 0, sizeof (*huge_page) - sizeof (huge_page->super));
59 }
60
61 static void mca_mpool_hugepage_hugepage_destructor (mca_mpool_hugepage_hugepage_t *huge_page)
62 {
63 free (huge_page->path);
64 }
65
66 OBJ_CLASS_INSTANCE(mca_mpool_hugepage_hugepage_t, opal_list_item_t,
67 mca_mpool_hugepage_hugepage_constructor,
68 mca_mpool_hugepage_hugepage_destructor);
69
70 static int mca_mpool_rb_hugepage_compare (void *key1, void *key2)
71 {
72 if (key1 == key2) {
73 return 0;
74 }
75
76 return (key1 < key2) ? -1 : 1;
77 }
78
79
80
81
82 int mca_mpool_hugepage_module_init(mca_mpool_hugepage_module_t *mpool,
83 mca_mpool_hugepage_hugepage_t *huge_page)
84 {
85 mca_allocator_base_component_t *allocator_component;
86 int rc;
87
88 mpool->super.mpool_component = &mca_mpool_hugepage_component.super;
89 mpool->super.mpool_base = NULL;
90 mpool->super.mpool_alloc = mca_mpool_hugepage_alloc;
91 mpool->super.mpool_realloc = mca_mpool_hugepage_realloc;
92 mpool->super.mpool_free = mca_mpool_hugepage_free;
93 mpool->super.mpool_finalize = mca_mpool_hugepage_finalize;
94 mpool->super.mpool_ft_event = mca_mpool_hugepage_ft_event;
95 mpool->super.flags = MCA_MPOOL_FLAGS_MPI_ALLOC_MEM;
96
97 OBJ_CONSTRUCT(&mpool->lock, opal_mutex_t);
98
99 mpool->huge_page = huge_page;
100
101
102 allocator_component = mca_allocator_component_lookup ("bucket");
103 if (NULL == allocator_component) {
104 return OPAL_ERR_NOT_AVAILABLE;
105 }
106
107 mpool->allocator = allocator_component->allocator_init (true, mca_mpool_hugepage_seg_alloc,
108 mca_mpool_hugepage_seg_free, mpool);
109
110 OBJ_CONSTRUCT(&mpool->allocation_tree, opal_rb_tree_t);
111 rc = opal_rb_tree_init (&mpool->allocation_tree, mca_mpool_rb_hugepage_compare);
112 if (OPAL_SUCCESS != rc) {
113 OBJ_DESTRUCT(&mpool->allocation_tree);
114 return OPAL_ERR_NOT_AVAILABLE;
115 }
116
117 return OPAL_SUCCESS;
118 }
119
120 void *mca_mpool_hugepage_seg_alloc (void *ctx, size_t *sizep)
121 {
122 mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx;
123 mca_mpool_hugepage_hugepage_t *huge_page = hugepage_module->huge_page;
124 size_t size = *sizep;
125 void *base = NULL;
126 char *path = NULL;
127 int flags = MAP_PRIVATE;
128 int fd = -1;
129 int rc;
130
131 size = OPAL_ALIGN(size, huge_page->page_size, size_t);
132
133 if (huge_page->path) {
134 int32_t count;
135
136 count = opal_atomic_add_fetch_32 (&huge_page->count, 1);
137
138 rc = opal_asprintf (&path, "%s/hugepage.openmpi.%d.%d", huge_page->path,
139 getpid (), count);
140 if (0 > rc) {
141 return NULL;
142 }
143
144 fd = open (path, O_RDWR | O_CREAT, 0600);
145 if (-1 == fd) {
146 free (path);
147 return NULL;
148 }
149
150 if (0 != ftruncate (fd, size)) {
151 close (fd);
152 unlink (path);
153 free (path);
154 return NULL;
155 }
156 } else {
157 #if defined(MAP_ANONYMOUS)
158 flags |= MAP_ANONYMOUS;
159 #elif defined(MAP_ANON)
160
161 flags |= MAP_ANON;
162 #endif
163 }
164
165 base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags | huge_page->mmap_flags, fd, 0);
166 if (path) {
167 unlink (path);
168 free (path);
169 }
170
171 if (fd >= 0) {
172 close (fd);
173 }
174
175 if (MAP_FAILED == base) {
176 opal_output_verbose (MCA_BASE_VERBOSE_WARN, opal_mpool_base_framework.framework_verbose,
177 "could not allocate huge page(s). falling back on standard pages");
178
179 base = mmap (NULL, size, PROT_READ | PROT_WRITE, flags, -1, 0);
180 }
181
182 if (MAP_FAILED == base) {
183 return NULL;
184 }
185
186 opal_mutex_lock (&hugepage_module->lock);
187 opal_rb_tree_insert (&hugepage_module->allocation_tree, base, (void *) (intptr_t) size);
188 (void) opal_atomic_fetch_add_size_t (&mca_mpool_hugepage_component.bytes_allocated, size);
189 opal_mutex_unlock (&hugepage_module->lock);
190
191 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose,
192 "allocated segment %p of size %lu bytes", base, size));
193
194 *sizep = size;
195
196 return base;
197 }
198
199 void mca_mpool_hugepage_seg_free (void *ctx, void *addr)
200 {
201 mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) ctx;
202 size_t size;
203
204 opal_mutex_lock (&hugepage_module->lock);
205
206 size = (size_t) (intptr_t) opal_rb_tree_find (&hugepage_module->allocation_tree, addr);
207 if (size > 0) {
208 opal_rb_tree_delete (&hugepage_module->allocation_tree, addr);
209 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_mpool_base_framework.framework_verbose,
210 "freeing segment %p of size %lu bytes", addr, size));
211 munmap (addr, size);
212 (void) opal_atomic_fetch_add_size_t (&mca_mpool_hugepage_component.bytes_allocated, -size);
213 }
214
215 opal_mutex_unlock (&hugepage_module->lock);
216 }
217
218
219
220
221 static void *mca_mpool_hugepage_alloc (mca_mpool_base_module_t *mpool, size_t size,
222 size_t align, uint32_t flags)
223 {
224 mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
225 return hugepage_module->allocator->alc_alloc (hugepage_module->allocator, size, align);
226 }
227
228
229
230
231 static void *mca_mpool_hugepage_realloc (mca_mpool_base_module_t *mpool, void *addr, size_t size)
232 {
233 mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
234
235 return hugepage_module->allocator->alc_realloc (hugepage_module->allocator, addr, size);
236 }
237
238
239
240
241 static void mca_mpool_hugepage_free (mca_mpool_base_module_t *mpool, void *addr)
242 {
243 mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
244
245 hugepage_module->allocator->alc_free (hugepage_module->allocator, addr);
246 }
247
248 static void mca_mpool_hugepage_finalize (struct mca_mpool_base_module_t *mpool)
249 {
250 mca_mpool_hugepage_module_t *hugepage_module = (mca_mpool_hugepage_module_t *) mpool;
251
252 OBJ_DESTRUCT(&hugepage_module->lock);
253 OBJ_DESTRUCT(&hugepage_module->allocation_tree);
254
255 if (hugepage_module->allocator) {
256 (void) hugepage_module->allocator->alc_finalize (hugepage_module->allocator);
257 hugepage_module->allocator = NULL;
258 }
259 }
260
261 static int mca_mpool_hugepage_ft_event (int state) {
262 return OPAL_SUCCESS;
263 }