This source file includes following definitions.
- registration_flags_cacheable
- registration_is_cacheable
- mca_rcache_grdma_cache_contructor
- mca_rcache_grdma_cache_destructor
- mca_rcache_grdma_module_init
- dereg_mem
- do_unregistration_gc
- mca_rcache_grdma_evict_lru_local
- mca_rcache_grdma_evict
- mca_rcache_grdma_add_to_lru
- mca_rcache_grdma_remove_from_lru
- mca_rcache_grdma_check_cached
- mca_rcache_grdma_register
- mca_rcache_grdma_find
- mca_rcache_grdma_deregister
- mca_rcache_grdma_add_to_gc
- gc_add
- mca_rcache_grdma_invalidate_range
- check_for_cuda_freed_memory
- mca_rcache_grdma_finalize
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30 #define OPAL_DISABLE_ENABLE_MEM_DEBUG 1
31 #include "opal_config.h"
32
33 #include <errno.h>
34 #include <string.h>
35 #include <stdlib.h>
36
37 #include "opal/align.h"
38
39 #include "opal/util/proc.h"
40 #if OPAL_CUDA_GDR_SUPPORT
41 #include "opal/mca/common/cuda/common_cuda.h"
42 #endif
43 #include "opal/mca/rcache/rcache.h"
44 #include "opal/mca/rcache/base/base.h"
45
46 #include "opal/util/sys_limits.h"
47 #include "opal/align.h"
48 #include "rcache_grdma.h"
49
50
51 static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *addr,
52 size_t size, uint32_t flags, int32_t access_flags,
53 mca_rcache_base_registration_t **reg);
54 static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
55 mca_rcache_base_registration_t *reg);
56 static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
57 size_t size, mca_rcache_base_registration_t **reg);
58 static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache, void *base,
59 size_t size);
60 static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache);
61 static bool mca_rcache_grdma_evict (mca_rcache_base_module_t *rcache);
62 static int mca_rcache_grdma_add_to_gc (mca_rcache_base_registration_t *grdma_reg);
63
64 static inline bool registration_flags_cacheable (uint32_t flags)
65 {
66 return (mca_rcache_grdma_component.leave_pinned &&
67 !(flags &
68 (MCA_RCACHE_FLAGS_CACHE_BYPASS |
69 MCA_RCACHE_FLAGS_PERSIST |
70 MCA_RCACHE_FLAGS_INVALID)));
71 }
72
73 static inline bool registration_is_cacheable(mca_rcache_base_registration_t *reg)
74 {
75 return registration_flags_cacheable (reg->flags);
76 }
77
78 #if OPAL_CUDA_GDR_SUPPORT
79 static int check_for_cuda_freed_memory(mca_rcache_base_module_t *rcache, void *addr, size_t size);
80 #endif
81 static void mca_rcache_grdma_cache_contructor (mca_rcache_grdma_cache_t *cache)
82 {
83 memset ((void *)((uintptr_t)cache + sizeof (cache->super)), 0, sizeof (*cache) - sizeof (cache->super));
84
85 OBJ_CONSTRUCT(&cache->lru_list, opal_list_t);
86 OBJ_CONSTRUCT(&cache->gc_lifo, opal_lifo_t);
87
88 cache->vma_module = mca_rcache_base_vma_module_alloc ();
89 }
90
91 static void mca_rcache_grdma_cache_destructor (mca_rcache_grdma_cache_t *cache)
92 {
93
94 while (NULL != opal_list_remove_first (&cache->lru_list));
95
96 OBJ_DESTRUCT(&cache->lru_list);
97 OBJ_DESTRUCT(&cache->gc_lifo);
98 if (cache->vma_module) {
99 OBJ_RELEASE(cache->vma_module);
100 }
101
102 free (cache->cache_name);
103 }
104
105 OBJ_CLASS_INSTANCE(mca_rcache_grdma_cache_t, opal_list_item_t,
106 mca_rcache_grdma_cache_contructor,
107 mca_rcache_grdma_cache_destructor);
108
109
110
111
112 void mca_rcache_grdma_module_init(mca_rcache_grdma_module_t* rcache, mca_rcache_grdma_cache_t *cache)
113 {
114 OBJ_RETAIN(cache);
115 rcache->cache = cache;
116
117 rcache->super.rcache_component = &mca_rcache_grdma_component.super;
118 rcache->super.rcache_register = mca_rcache_grdma_register;
119 rcache->super.rcache_find = mca_rcache_grdma_find;
120 rcache->super.rcache_deregister = mca_rcache_grdma_deregister;
121 rcache->super.rcache_invalidate_range = mca_rcache_grdma_invalidate_range;
122 rcache->super.rcache_finalize = mca_rcache_grdma_finalize;
123 rcache->super.rcache_evict = mca_rcache_grdma_evict;
124
125 rcache->stat_cache_hit = rcache->stat_cache_miss = rcache->stat_evicted = 0;
126 rcache->stat_cache_found = rcache->stat_cache_notfound = 0;
127
128 OBJ_CONSTRUCT(&rcache->reg_list, opal_free_list_t);
129 opal_free_list_init (&rcache->reg_list, rcache->resources.sizeof_reg,
130 opal_cache_line_size,
131 OBJ_CLASS(mca_rcache_base_registration_t),
132 0, opal_cache_line_size, 0, -1, 32, NULL, 0,
133 NULL, NULL, NULL);
134 }
135
136 static inline int dereg_mem(mca_rcache_base_registration_t *reg)
137 {
138 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) reg->rcache;
139 int rc;
140
141 reg->ref_count = 0;
142
143 if (!(reg->flags & MCA_RCACHE_FLAGS_CACHE_BYPASS)) {
144 mca_rcache_base_vma_delete (rcache_grdma->cache->vma_module, reg);
145 }
146
147 rc = rcache_grdma->resources.deregister_mem (rcache_grdma->resources.reg_data, reg);
148 if (OPAL_LIKELY(OPAL_SUCCESS == rc)) {
149 opal_free_list_return_mt (&rcache_grdma->reg_list,
150 (opal_free_list_item_t *) reg);
151 }
152
153 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
154 "registration %p destroyed", (void *) reg));
155 return rc;
156 }
157
158 static inline void do_unregistration_gc (mca_rcache_base_module_t *rcache)
159 {
160 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
161 opal_list_item_t *item;
162
163
164 while (NULL != (item = opal_lifo_pop_atomic (&rcache_grdma->cache->gc_lifo))) {
165 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
166 "deleting stale registration %p", (void *) item));
167 dereg_mem ((mca_rcache_base_registration_t *) item);
168 }
169 }
170 static inline bool mca_rcache_grdma_evict_lru_local (mca_rcache_grdma_cache_t *cache)
171 {
172 mca_rcache_grdma_module_t *rcache_grdma;
173 mca_rcache_base_registration_t *old_reg;
174
175 opal_mutex_lock (&cache->vma_module->vma_lock);
176 old_reg = (mca_rcache_base_registration_t *)
177 opal_list_remove_first (&cache->lru_list);
178 if (NULL == old_reg) {
179 opal_mutex_unlock (&cache->vma_module->vma_lock);
180 return false;
181 }
182
183 rcache_grdma = (mca_rcache_grdma_module_t *) old_reg->rcache;
184
185 (void) dereg_mem (old_reg);
186 opal_mutex_unlock (&cache->vma_module->vma_lock);
187
188 rcache_grdma->stat_evicted++;
189
190 return true;
191 }
192
193 static bool mca_rcache_grdma_evict (mca_rcache_base_module_t *rcache)
194 {
195 return mca_rcache_grdma_evict_lru_local (((mca_rcache_grdma_module_t *) rcache)->cache);
196 }
197
198 struct mca_rcache_base_find_args_t {
199 mca_rcache_base_registration_t *reg;
200 mca_rcache_grdma_module_t *rcache_grdma;
201 unsigned char *base;
202 unsigned char *bound;
203 int access_flags;
204 };
205
206 typedef struct mca_rcache_base_find_args_t mca_rcache_base_find_args_t;
207
208 static inline void mca_rcache_grdma_add_to_lru (mca_rcache_grdma_module_t *rcache_grdma, mca_rcache_base_registration_t *grdma_reg)
209 {
210 opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
211
212 opal_list_append(&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
213
214
215 opal_atomic_wmb ();
216
217
218 opal_atomic_fetch_or_32 ((opal_atomic_int32_t *) &grdma_reg->flags, MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU);
219
220 opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
221 }
222
223 static inline void mca_rcache_grdma_remove_from_lru (mca_rcache_grdma_module_t *rcache_grdma, mca_rcache_base_registration_t *grdma_reg)
224 {
225
226
227
228
229 while (!(grdma_reg->flags & MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU));
230
231
232 opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
233
234 opal_list_remove_item (&rcache_grdma->cache->lru_list, (opal_list_item_t *) grdma_reg);
235
236 grdma_reg->flags &= ~MCA_RCACHE_GRDMA_REG_FLAG_IN_LRU;
237
238 opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
239 }
240
241 static int mca_rcache_grdma_check_cached (mca_rcache_base_registration_t *grdma_reg, void *ctx)
242 {
243 mca_rcache_base_find_args_t *args = (mca_rcache_base_find_args_t *) ctx;
244 mca_rcache_grdma_module_t *rcache_grdma = args->rcache_grdma;
245
246 if ((grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) || &rcache_grdma->super != grdma_reg->rcache ||
247 grdma_reg->base > args->base || grdma_reg->bound < args->bound) {
248 return 0;
249 }
250
251 if (OPAL_UNLIKELY((args->access_flags & grdma_reg->access_flags) != args->access_flags)) {
252 args->access_flags |= grdma_reg->access_flags;
253
254
255 return mca_rcache_grdma_add_to_gc (grdma_reg);
256 }
257
258 int32_t ref_cnt = opal_atomic_fetch_add_32 (&grdma_reg->ref_count, 1);
259 args->reg = grdma_reg;
260
261 if (0 == ref_cnt) {
262 mca_rcache_grdma_remove_from_lru (rcache_grdma, grdma_reg);
263 }
264
265
266 (void) opal_atomic_fetch_add_32 ((opal_atomic_int32_t *) &rcache_grdma->stat_cache_hit, 1);
267 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
268 "returning existing registration %p. references %d", (void *) grdma_reg, ref_cnt));
269 return 1;
270 }
271
272
273
274
275 static int mca_rcache_grdma_register (mca_rcache_base_module_t *rcache, void *addr,
276 size_t size, uint32_t flags, int32_t access_flags,
277 mca_rcache_base_registration_t **reg)
278 {
279 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t*)rcache;
280 const bool bypass_cache = !!(flags & MCA_RCACHE_FLAGS_CACHE_BYPASS);
281 const bool persist = !!(flags & MCA_RCACHE_FLAGS_PERSIST);
282 mca_rcache_base_registration_t *grdma_reg;
283 opal_free_list_item_t *item;
284 unsigned char *base, *bound;
285 unsigned int page_size = opal_getpagesize ();
286 int rc;
287
288 *reg = NULL;
289
290
291 base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
292 bound = OPAL_ALIGN_PTR((intptr_t) addr + size, page_size, unsigned char *) - 1;
293
294 #if OPAL_CUDA_GDR_SUPPORT
295 if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
296 size_t psize;
297 mca_common_cuda_get_address_range(&base, &psize, addr);
298 bound = base + psize - 1;
299
300
301 check_for_cuda_freed_memory(rcache, base, psize);
302 }
303 #endif
304
305 do_unregistration_gc (rcache);
306
307
308
309 if (!(bypass_cache || persist)) {
310 mca_rcache_base_find_args_t find_args = {.reg = NULL, .rcache_grdma = rcache_grdma,
311 .base = base, .bound = bound,
312 .access_flags = access_flags};
313
314 rc = mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, false,
315 mca_rcache_grdma_check_cached, (void *) &find_args);
316 if (1 == rc) {
317 *reg = find_args.reg;
318 return OPAL_SUCCESS;
319 }
320
321
322 access_flags = find_args.access_flags;
323
324 OPAL_THREAD_ADD_FETCH32((opal_atomic_int32_t *) &rcache_grdma->stat_cache_miss, 1);
325 }
326
327 item = opal_free_list_get_mt (&rcache_grdma->reg_list);
328 if(NULL == item) {
329 return OPAL_ERR_OUT_OF_RESOURCE;
330 }
331 grdma_reg = (mca_rcache_base_registration_t*)item;
332
333 grdma_reg->rcache = rcache;
334 grdma_reg->base = base;
335 grdma_reg->bound = bound;
336 grdma_reg->flags = flags;
337 grdma_reg->access_flags = access_flags;
338 grdma_reg->ref_count = 1;
339 #if OPAL_CUDA_GDR_SUPPORT
340 if (flags & MCA_RCACHE_FLAGS_CUDA_GPU_MEM) {
341 mca_common_cuda_get_buffer_id(grdma_reg);
342 }
343 #endif
344
345 while (OPAL_ERR_OUT_OF_RESOURCE ==
346 (rc = rcache_grdma->resources.register_mem(rcache_grdma->resources.reg_data,
347 base, bound - base + 1, grdma_reg))) {
348
349 if (!mca_rcache_grdma_evict (rcache)) {
350 break;
351 }
352 }
353
354 if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
355 opal_free_list_return_mt (&rcache_grdma->reg_list, item);
356 return rc;
357 }
358
359 if (false == bypass_cache) {
360
361
362
363
364
365 rc = mca_rcache_base_vma_insert (rcache_grdma->cache->vma_module, grdma_reg, 0);
366 if (OPAL_UNLIKELY(rc != OPAL_SUCCESS)) {
367 rcache_grdma->resources.deregister_mem (rcache_grdma->resources.reg_data, grdma_reg);
368 opal_free_list_return_mt (&rcache_grdma->reg_list, item);
369 return rc;
370 }
371 }
372
373 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
374 "created new registration %p for region {%p, %p} with flags 0x%x",
375 (void *)grdma_reg, (void*)base, (void*)bound, grdma_reg->flags));
376
377 *reg = grdma_reg;
378
379 return OPAL_SUCCESS;
380 }
381
382 static int mca_rcache_grdma_find (mca_rcache_base_module_t *rcache, void *addr,
383 size_t size, mca_rcache_base_registration_t **reg)
384 {
385 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t*)rcache;
386 unsigned long page_size = opal_getpagesize ();
387 unsigned char *base, *bound;
388 int rc;
389
390 base = OPAL_DOWN_ALIGN_PTR(addr, page_size, unsigned char *);
391 bound = OPAL_ALIGN_PTR((intptr_t) addr + size - 1, page_size, unsigned char *);
392
393 opal_mutex_lock (&rcache_grdma->cache->vma_module->vma_lock);
394
395 rc = mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, base, bound - base + 1, reg);
396 if(NULL != *reg &&
397 (mca_rcache_grdma_component.leave_pinned ||
398 ((*reg)->flags & MCA_RCACHE_FLAGS_PERSIST) ||
399 ((*reg)->base == base && (*reg)->bound == bound))) {
400 assert(((void*)(*reg)->bound) >= addr);
401 if(0 == (*reg)->ref_count &&
402 mca_rcache_grdma_component.leave_pinned) {
403 opal_list_remove_item(&rcache_grdma->cache->lru_list,
404 (opal_list_item_t*)(*reg));
405 }
406 rcache_grdma->stat_cache_found++;
407 opal_atomic_add_fetch_32 (&(*reg)->ref_count, 1);
408 } else {
409 rcache_grdma->stat_cache_notfound++;
410 }
411
412 opal_mutex_unlock (&rcache_grdma->cache->vma_module->vma_lock);
413
414 return rc;
415 }
416
417 static int mca_rcache_grdma_deregister (mca_rcache_base_module_t *rcache,
418 mca_rcache_base_registration_t *reg)
419 {
420 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
421 int32_t ref_count;
422
423 ref_count = opal_atomic_add_fetch_32 (®->ref_count, -1);
424
425 OPAL_OUTPUT_VERBOSE((MCA_BASE_VERBOSE_TRACE, opal_rcache_base_framework.framework_output,
426 "returning registration %p, remaining references %d", (void *) reg, ref_count));
427
428 assert (ref_count >= 0);
429 if (ref_count > 0) {
430 return OPAL_SUCCESS;
431 }
432
433 if (registration_is_cacheable(reg)) {
434 mca_rcache_grdma_add_to_lru (rcache_grdma, reg);
435 return OPAL_SUCCESS;
436 }
437
438 return dereg_mem (reg);
439 }
440
441 struct gc_add_args_t {
442 void *base;
443 size_t size;
444 };
445 typedef struct gc_add_args_t gc_add_args_t;
446
447 static int mca_rcache_grdma_add_to_gc (mca_rcache_base_registration_t *grdma_reg)
448 {
449 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) grdma_reg->rcache;
450 uint32_t flags = opal_atomic_fetch_or_32 ((opal_atomic_int32_t *) &grdma_reg->flags, MCA_RCACHE_FLAGS_INVALID);
451
452 if ((flags & MCA_RCACHE_FLAGS_INVALID) || 0 != grdma_reg->ref_count) {
453
454 return OPAL_SUCCESS;
455 }
456
457
458
459
460 if (registration_flags_cacheable (flags)) {
461 mca_rcache_grdma_remove_from_lru (rcache_grdma, grdma_reg);
462 }
463
464 opal_lifo_push_atomic (&rcache_grdma->cache->gc_lifo, (opal_list_item_t *) grdma_reg);
465
466 return OPAL_SUCCESS;
467 }
468
469 static int gc_add (mca_rcache_base_registration_t *grdma_reg, void *ctx)
470 {
471 gc_add_args_t *args = (gc_add_args_t *) ctx;
472
473 if (grdma_reg->flags & MCA_RCACHE_FLAGS_INVALID) {
474
475 return OPAL_SUCCESS;
476 }
477
478 if (grdma_reg->ref_count && grdma_reg->base == args->base) {
479
480
481
482
483
484
485 return OPAL_ERROR;
486 }
487
488 return mca_rcache_grdma_add_to_gc (grdma_reg);
489 }
490
491 static int mca_rcache_grdma_invalidate_range (mca_rcache_base_module_t *rcache,
492 void *base, size_t size)
493 {
494 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
495 gc_add_args_t args = {.base = base, .size = size};
496 return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, base, size, true, gc_add, &args);
497 }
498
499
500
501
502
503 #if OPAL_CUDA_GDR_SUPPORT
504
505 static int check_for_cuda_freed_memory (mca_rcache_base_module_t *rcache, void *addr, size_t size)
506 {
507 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t *) rcache;
508 mca_rcache_base_registration_t *reg;
509
510 mca_rcache_base_vma_find (rcache_grdma->cache->vma_module, addr, size, ®);
511 if (NULL == reg) {
512 return OPAL_SUCCESS;
513 }
514
515
516 if (!(mca_common_cuda_previously_freed_memory(reg))) {
517 return OPAL_SUCCESS;
518 }
519
520
521
522
523 return mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, addr, size, true, gc_add, NULL);
524 }
525 #endif
526
527 static void mca_rcache_grdma_finalize (mca_rcache_base_module_t *rcache)
528 {
529 mca_rcache_grdma_module_t *rcache_grdma = (mca_rcache_grdma_module_t*)rcache;
530
531
532 if (true == mca_rcache_grdma_component.print_stats) {
533 opal_output(0, "%s grdma: stats "
534 "(hit/miss/found/not found/evicted/tree size): %d/%d/%d/%d/%d/%ld\n",
535 OPAL_NAME_PRINT(OPAL_PROC_MY_NAME),
536 rcache_grdma->stat_cache_hit, rcache_grdma->stat_cache_miss,
537 rcache_grdma->stat_cache_found, rcache_grdma->stat_cache_notfound,
538 rcache_grdma->stat_evicted, (long) mca_rcache_base_vma_size (rcache_grdma->cache->vma_module));
539 }
540
541 do_unregistration_gc (&rcache_grdma->super);
542
543 (void) mca_rcache_base_vma_iterate (rcache_grdma->cache->vma_module, NULL, (size_t) -1, true,
544 gc_add, (void *) rcache);
545 do_unregistration_gc (rcache);
546
547 OBJ_RELEASE(rcache_grdma->cache);
548
549 OBJ_DESTRUCT(&rcache_grdma->reg_list);
550
551
552 free(rcache);
553 }