1 #define JEMALLOC_BASE_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4
5 #include "jemalloc/internal/assert.h"
6 #include "jemalloc/internal/extent_mmap.h"
7 #include "jemalloc/internal/mutex.h"
8 #include "jemalloc/internal/sz.h"
9
10 /******************************************************************************/
11 /* Data. */
12
13 static base_t *b0;
14
15 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT;
16
17 const char *metadata_thp_mode_names[] = {
18 "disabled",
19 "auto",
20 "always"
21 };
22
23 /******************************************************************************/
24
25 static inline bool
metadata_thp_madvise(void)26 metadata_thp_madvise(void) {
27 return (metadata_thp_enabled() &&
28 (init_system_thp_mode == thp_mode_default));
29 }
30
31 static void *
base_map(tsdn_t * tsdn,extent_hooks_t * extent_hooks,unsigned ind,size_t size)32 base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) {
33 void *addr;
34 bool zero = true;
35 bool commit = true;
36
37 /* Use huge page sizes and alignment regardless of opt_metadata_thp. */
38 assert(size == HUGEPAGE_CEILING(size));
39 size_t alignment = HUGEPAGE;
40 if (extent_hooks == &extent_hooks_default) {
41 addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit);
42 } else {
43 /* No arena context as we are creating new arenas. */
44 tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
45 pre_reentrancy(tsd, NULL);
46 addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment,
47 &zero, &commit, ind);
48 post_reentrancy(tsd);
49 }
50
51 return addr;
52 }
53
54 static void
base_unmap(tsdn_t * tsdn,extent_hooks_t * extent_hooks,unsigned ind,void * addr,size_t size)55 base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr,
56 size_t size) {
57 /*
58 * Cascade through dalloc, decommit, purge_forced, and purge_lazy,
59 * stopping at first success. This cascade is performed for consistency
60 * with the cascade in extent_dalloc_wrapper() because an application's
61 * custom hooks may not support e.g. dalloc. This function is only ever
62 * called as a side effect of arena destruction, so although it might
63 * seem pointless to do anything besides dalloc here, the application
64 * may in fact want the end state of all associated virtual memory to be
65 * in some consistent-but-allocated state.
66 */
67 if (extent_hooks == &extent_hooks_default) {
68 if (!extent_dalloc_mmap(addr, size)) {
69 goto label_done;
70 }
71 if (!pages_decommit(addr, size)) {
72 goto label_done;
73 }
74 if (!pages_purge_forced(addr, size)) {
75 goto label_done;
76 }
77 if (!pages_purge_lazy(addr, size)) {
78 goto label_done;
79 }
80 /* Nothing worked. This should never happen. */
81 not_reached();
82 } else {
83 tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn);
84 pre_reentrancy(tsd, NULL);
85 if (extent_hooks->dalloc != NULL &&
86 !extent_hooks->dalloc(extent_hooks, addr, size, true,
87 ind)) {
88 goto label_post_reentrancy;
89 }
90 if (extent_hooks->decommit != NULL &&
91 !extent_hooks->decommit(extent_hooks, addr, size, 0, size,
92 ind)) {
93 goto label_post_reentrancy;
94 }
95 if (extent_hooks->purge_forced != NULL &&
96 !extent_hooks->purge_forced(extent_hooks, addr, size, 0,
97 size, ind)) {
98 goto label_post_reentrancy;
99 }
100 if (extent_hooks->purge_lazy != NULL &&
101 !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size,
102 ind)) {
103 goto label_post_reentrancy;
104 }
105 /* Nothing worked. That's the application's problem. */
106 label_post_reentrancy:
107 post_reentrancy(tsd);
108 }
109 label_done:
110 if (metadata_thp_madvise()) {
111 /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */
112 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
113 (size & HUGEPAGE_MASK) == 0);
114 pages_nohuge(addr, size);
115 }
116 }
117
118 static void
base_extent_init(size_t * extent_sn_next,extent_t * extent,void * addr,size_t size)119 base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr,
120 size_t size) {
121 size_t sn;
122
123 sn = *extent_sn_next;
124 (*extent_sn_next)++;
125
126 extent_binit(extent, addr, size, sn);
127 }
128
129 static size_t
base_get_num_blocks(base_t * base,bool with_new_block)130 base_get_num_blocks(base_t *base, bool with_new_block) {
131 base_block_t *b = base->blocks;
132 assert(b != NULL);
133
134 size_t n_blocks = with_new_block ? 2 : 1;
135 while (b->next != NULL) {
136 n_blocks++;
137 b = b->next;
138 }
139
140 return n_blocks;
141 }
142
143 static void
base_auto_thp_switch(tsdn_t * tsdn,base_t * base)144 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) {
145 assert(opt_metadata_thp == metadata_thp_auto);
146 malloc_mutex_assert_owner(tsdn, &base->mtx);
147 if (base->auto_thp_switched) {
148 return;
149 }
150 /* Called when adding a new block. */
151 bool should_switch;
152 if (base_ind_get(base) != 0) {
153 should_switch = (base_get_num_blocks(base, true) ==
154 BASE_AUTO_THP_THRESHOLD);
155 } else {
156 should_switch = (base_get_num_blocks(base, true) ==
157 BASE_AUTO_THP_THRESHOLD_A0);
158 }
159 if (!should_switch) {
160 return;
161 }
162
163 base->auto_thp_switched = true;
164 assert(!config_stats || base->n_thp == 0);
165 /* Make the initial blocks THP lazily. */
166 base_block_t *block = base->blocks;
167 while (block != NULL) {
168 assert((block->size & HUGEPAGE_MASK) == 0);
169 pages_huge(block, block->size);
170 if (config_stats) {
171 base->n_thp += HUGEPAGE_CEILING(block->size -
172 extent_bsize_get(&block->extent)) >> LG_HUGEPAGE;
173 }
174 block = block->next;
175 assert(block == NULL || (base_ind_get(base) == 0));
176 }
177 }
178
179 static void *
base_extent_bump_alloc_helper(extent_t * extent,size_t * gap_size,size_t size,size_t alignment)180 base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size,
181 size_t alignment) {
182 void *ret;
183
184 assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM));
185 assert(size == ALIGNMENT_CEILING(size, alignment));
186
187 *gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent),
188 alignment) - (uintptr_t)extent_addr_get(extent);
189 ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size);
190 assert(extent_bsize_get(extent) >= *gap_size + size);
191 extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) +
192 *gap_size + size), extent_bsize_get(extent) - *gap_size - size,
193 extent_sn_get(extent));
194 return ret;
195 }
196
197 static void
base_extent_bump_alloc_post(base_t * base,extent_t * extent,size_t gap_size,void * addr,size_t size)198 base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size,
199 void *addr, size_t size) {
200 if (extent_bsize_get(extent) > 0) {
201 /*
202 * Compute the index for the largest size class that does not
203 * exceed extent's size.
204 */
205 szind_t index_floor =
206 sz_size2index(extent_bsize_get(extent) + 1) - 1;
207 extent_heap_insert(&base->avail[index_floor], extent);
208 }
209
210 if (config_stats) {
211 base->allocated += size;
212 /*
213 * Add one PAGE to base_resident for every page boundary that is
214 * crossed by the new allocation. Adjust n_thp similarly when
215 * metadata_thp is enabled.
216 */
217 base->resident += PAGE_CEILING((uintptr_t)addr + size) -
218 PAGE_CEILING((uintptr_t)addr - gap_size);
219 assert(base->allocated <= base->resident);
220 assert(base->resident <= base->mapped);
221 if (metadata_thp_madvise() && (opt_metadata_thp ==
222 metadata_thp_always || base->auto_thp_switched)) {
223 base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size)
224 - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >>
225 LG_HUGEPAGE;
226 assert(base->mapped >= base->n_thp << LG_HUGEPAGE);
227 }
228 }
229 }
230
231 static void *
base_extent_bump_alloc(base_t * base,extent_t * extent,size_t size,size_t alignment)232 base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size,
233 size_t alignment) {
234 void *ret;
235 size_t gap_size;
236
237 ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment);
238 base_extent_bump_alloc_post(base, extent, gap_size, ret, size);
239 return ret;
240 }
241
242 /*
243 * Allocate a block of virtual memory that is large enough to start with a
244 * base_block_t header, followed by an object of specified size and alignment.
245 * On success a pointer to the initialized base_block_t header is returned.
246 */
247 static base_block_t *
base_block_alloc(tsdn_t * tsdn,base_t * base,extent_hooks_t * extent_hooks,unsigned ind,pszind_t * pind_last,size_t * extent_sn_next,size_t size,size_t alignment)248 base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks,
249 unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size,
250 size_t alignment) {
251 alignment = ALIGNMENT_CEILING(alignment, QUANTUM);
252 size_t usize = ALIGNMENT_CEILING(size, alignment);
253 size_t header_size = sizeof(base_block_t);
254 size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) -
255 header_size;
256 /*
257 * Create increasingly larger blocks in order to limit the total number
258 * of disjoint virtual memory ranges. Choose the next size in the page
259 * size class series (skipping size classes that are not a multiple of
260 * HUGEPAGE), or a size large enough to satisfy the requested size and
261 * alignment, whichever is larger.
262 */
263 size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size
264 + usize));
265 pszind_t pind_next = (*pind_last + 1 < sz_psz2ind(SC_LARGE_MAXCLASS)) ?
266 *pind_last + 1 : *pind_last;
267 size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next));
268 size_t block_size = (min_block_size > next_block_size) ? min_block_size
269 : next_block_size;
270 base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind,
271 block_size);
272 if (block == NULL) {
273 return NULL;
274 }
275
276 if (metadata_thp_madvise()) {
277 void *addr = (void *)block;
278 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 &&
279 (block_size & HUGEPAGE_MASK) == 0);
280 if (opt_metadata_thp == metadata_thp_always) {
281 pages_huge(addr, block_size);
282 } else if (opt_metadata_thp == metadata_thp_auto &&
283 base != NULL) {
284 /* base != NULL indicates this is not a new base. */
285 malloc_mutex_lock(tsdn, &base->mtx);
286 base_auto_thp_switch(tsdn, base);
287 if (base->auto_thp_switched) {
288 pages_huge(addr, block_size);
289 }
290 malloc_mutex_unlock(tsdn, &base->mtx);
291 }
292 }
293
294 *pind_last = sz_psz2ind(block_size);
295 block->size = block_size;
296 block->next = NULL;
297 assert(block_size >= header_size);
298 base_extent_init(extent_sn_next, &block->extent,
299 (void *)((uintptr_t)block + header_size), block_size - header_size);
300 return block;
301 }
302
303 /*
304 * Allocate an extent that is at least as large as specified size, with
305 * specified alignment.
306 */
307 static extent_t *
base_extent_alloc(tsdn_t * tsdn,base_t * base,size_t size,size_t alignment)308 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
309 malloc_mutex_assert_owner(tsdn, &base->mtx);
310
311 extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
312 /*
313 * Drop mutex during base_block_alloc(), because an extent hook will be
314 * called.
315 */
316 malloc_mutex_unlock(tsdn, &base->mtx);
317 base_block_t *block = base_block_alloc(tsdn, base, extent_hooks,
318 base_ind_get(base), &base->pind_last, &base->extent_sn_next, size,
319 alignment);
320 malloc_mutex_lock(tsdn, &base->mtx);
321 if (block == NULL) {
322 return NULL;
323 }
324 block->next = base->blocks;
325 base->blocks = block;
326 if (config_stats) {
327 base->allocated += sizeof(base_block_t);
328 base->resident += PAGE_CEILING(sizeof(base_block_t));
329 base->mapped += block->size;
330 if (metadata_thp_madvise() &&
331 !(opt_metadata_thp == metadata_thp_auto
332 && !base->auto_thp_switched)) {
333 assert(base->n_thp > 0);
334 base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >>
335 LG_HUGEPAGE;
336 }
337 assert(base->allocated <= base->resident);
338 assert(base->resident <= base->mapped);
339 assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
340 }
341 return &block->extent;
342 }
343
344 base_t *
b0get(void)345 b0get(void) {
346 return b0;
347 }
348
349 base_t *
base_new(tsdn_t * tsdn,unsigned ind,extent_hooks_t * extent_hooks)350 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) {
351 pszind_t pind_last = 0;
352 size_t extent_sn_next = 0;
353 base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind,
354 &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM);
355 if (block == NULL) {
356 return NULL;
357 }
358
359 size_t gap_size;
360 size_t base_alignment = CACHELINE;
361 size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment);
362 base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent,
363 &gap_size, base_size, base_alignment);
364 base->ind = ind;
365 atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED);
366 if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE,
367 malloc_mutex_rank_exclusive)) {
368 base_unmap(tsdn, extent_hooks, ind, block, block->size);
369 return NULL;
370 }
371 base->pind_last = pind_last;
372 base->extent_sn_next = extent_sn_next;
373 base->blocks = block;
374 base->auto_thp_switched = false;
375 for (szind_t i = 0; i < SC_NSIZES; i++) {
376 extent_heap_new(&base->avail[i]);
377 }
378 if (config_stats) {
379 base->allocated = sizeof(base_block_t);
380 base->resident = PAGE_CEILING(sizeof(base_block_t));
381 base->mapped = block->size;
382 base->n_thp = (opt_metadata_thp == metadata_thp_always) &&
383 metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t))
384 >> LG_HUGEPAGE : 0;
385 assert(base->allocated <= base->resident);
386 assert(base->resident <= base->mapped);
387 assert(base->n_thp << LG_HUGEPAGE <= base->mapped);
388 }
389 base_extent_bump_alloc_post(base, &block->extent, gap_size, base,
390 base_size);
391
392 return base;
393 }
394
395 void
base_delete(tsdn_t * tsdn,base_t * base)396 base_delete(tsdn_t *tsdn, base_t *base) {
397 extent_hooks_t *extent_hooks = base_extent_hooks_get(base);
398 base_block_t *next = base->blocks;
399 do {
400 base_block_t *block = next;
401 next = block->next;
402 base_unmap(tsdn, extent_hooks, base_ind_get(base), block,
403 block->size);
404 } while (next != NULL);
405 }
406
407 extent_hooks_t *
base_extent_hooks_get(base_t * base)408 base_extent_hooks_get(base_t *base) {
409 return (extent_hooks_t *)atomic_load_p(&base->extent_hooks,
410 ATOMIC_ACQUIRE);
411 }
412
413 extent_hooks_t *
base_extent_hooks_set(base_t * base,extent_hooks_t * extent_hooks)414 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) {
415 extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base);
416 atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE);
417 return old_extent_hooks;
418 }
419
420 static void *
base_alloc_impl(tsdn_t * tsdn,base_t * base,size_t size,size_t alignment,size_t * esn)421 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment,
422 size_t *esn) {
423 alignment = QUANTUM_CEILING(alignment);
424 size_t usize = ALIGNMENT_CEILING(size, alignment);
425 size_t asize = usize + alignment - QUANTUM;
426
427 extent_t *extent = NULL;
428 malloc_mutex_lock(tsdn, &base->mtx);
429 for (szind_t i = sz_size2index(asize); i < SC_NSIZES; i++) {
430 extent = extent_heap_remove_first(&base->avail[i]);
431 if (extent != NULL) {
432 /* Use existing space. */
433 break;
434 }
435 }
436 if (extent == NULL) {
437 /* Try to allocate more space. */
438 extent = base_extent_alloc(tsdn, base, usize, alignment);
439 }
440 void *ret;
441 if (extent == NULL) {
442 ret = NULL;
443 goto label_return;
444 }
445
446 ret = base_extent_bump_alloc(base, extent, usize, alignment);
447 if (esn != NULL) {
448 *esn = extent_sn_get(extent);
449 }
450 label_return:
451 malloc_mutex_unlock(tsdn, &base->mtx);
452 return ret;
453 }
454
455 /*
456 * base_alloc() returns zeroed memory, which is always demand-zeroed for the
457 * auto arenas, in order to make multi-page sparse data structures such as radix
458 * tree nodes efficient with respect to physical memory usage. Upon success a
459 * pointer to at least size bytes with specified alignment is returned. Note
460 * that size is rounded up to the nearest multiple of alignment to avoid false
461 * sharing.
462 */
463 void *
base_alloc(tsdn_t * tsdn,base_t * base,size_t size,size_t alignment)464 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) {
465 return base_alloc_impl(tsdn, base, size, alignment, NULL);
466 }
467
468 extent_t *
base_alloc_extent(tsdn_t * tsdn,base_t * base)469 base_alloc_extent(tsdn_t *tsdn, base_t *base) {
470 size_t esn;
471 extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t),
472 CACHELINE, &esn);
473 if (extent == NULL) {
474 return NULL;
475 }
476 extent_esn_set(extent, esn);
477 return extent;
478 }
479
480 void
base_stats_get(tsdn_t * tsdn,base_t * base,size_t * allocated,size_t * resident,size_t * mapped,size_t * n_thp)481 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident,
482 size_t *mapped, size_t *n_thp) {
483 cassert(config_stats);
484
485 malloc_mutex_lock(tsdn, &base->mtx);
486 assert(base->allocated <= base->resident);
487 assert(base->resident <= base->mapped);
488 *allocated = base->allocated;
489 *resident = base->resident;
490 *mapped = base->mapped;
491 *n_thp = base->n_thp;
492 malloc_mutex_unlock(tsdn, &base->mtx);
493 }
494
495 void
base_prefork(tsdn_t * tsdn,base_t * base)496 base_prefork(tsdn_t *tsdn, base_t *base) {
497 malloc_mutex_prefork(tsdn, &base->mtx);
498 }
499
500 void
base_postfork_parent(tsdn_t * tsdn,base_t * base)501 base_postfork_parent(tsdn_t *tsdn, base_t *base) {
502 malloc_mutex_postfork_parent(tsdn, &base->mtx);
503 }
504
505 void
base_postfork_child(tsdn_t * tsdn,base_t * base)506 base_postfork_child(tsdn_t *tsdn, base_t *base) {
507 malloc_mutex_postfork_child(tsdn, &base->mtx);
508 }
509
510 bool
base_boot(tsdn_t * tsdn)511 base_boot(tsdn_t *tsdn) {
512 b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default);
513 return (b0 == NULL);
514 }
515