1 #define JEMALLOC_BASE_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 #include "jemalloc/internal/jemalloc_internal_includes.h" 4 5 #include "jemalloc/internal/assert.h" 6 #include "jemalloc/internal/extent_mmap.h" 7 #include "jemalloc/internal/mutex.h" 8 #include "jemalloc/internal/sz.h" 9 10 /******************************************************************************/ 11 /* Data. */ 12 13 static base_t *b0; 14 15 metadata_thp_mode_t opt_metadata_thp = METADATA_THP_DEFAULT; 16 17 const char *metadata_thp_mode_names[] = { 18 "disabled", 19 "auto", 20 "always" 21 }; 22 23 /******************************************************************************/ 24 25 static inline bool 26 metadata_thp_madvise(void) { 27 return (metadata_thp_enabled() && 28 (init_system_thp_mode == thp_mode_default)); 29 } 30 31 static void * 32 base_map(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, size_t size) { 33 void *addr; 34 bool zero = true; 35 bool commit = true; 36 37 /* Use huge page sizes and alignment regardless of opt_metadata_thp. */ 38 assert(size == HUGEPAGE_CEILING(size)); 39 size_t alignment = HUGEPAGE; 40 if (extent_hooks == &extent_hooks_default) { 41 addr = extent_alloc_mmap(NULL, size, alignment, &zero, &commit); 42 } else { 43 /* No arena context as we are creating new arenas. */ 44 tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); 45 pre_reentrancy(tsd, NULL); 46 addr = extent_hooks->alloc(extent_hooks, NULL, size, alignment, 47 &zero, &commit, ind); 48 post_reentrancy(tsd); 49 } 50 51 return addr; 52 } 53 54 static void 55 base_unmap(tsdn_t *tsdn, extent_hooks_t *extent_hooks, unsigned ind, void *addr, 56 size_t size) { 57 /* 58 * Cascade through dalloc, decommit, purge_forced, and purge_lazy, 59 * stopping at first success. This cascade is performed for consistency 60 * with the cascade in extent_dalloc_wrapper() because an application's 61 * custom hooks may not support e.g. dalloc. This function is only ever 62 * called as a side effect of arena destruction, so although it might 63 * seem pointless to do anything besides dalloc here, the application 64 * may in fact want the end state of all associated virtual memory to be 65 * in some consistent-but-allocated state. 66 */ 67 if (extent_hooks == &extent_hooks_default) { 68 if (!extent_dalloc_mmap(addr, size)) { 69 goto label_done; 70 } 71 if (!pages_decommit(addr, size)) { 72 goto label_done; 73 } 74 if (!pages_purge_forced(addr, size)) { 75 goto label_done; 76 } 77 if (!pages_purge_lazy(addr, size)) { 78 goto label_done; 79 } 80 /* Nothing worked. This should never happen. */ 81 not_reached(); 82 } else { 83 tsd_t *tsd = tsdn_null(tsdn) ? tsd_fetch() : tsdn_tsd(tsdn); 84 pre_reentrancy(tsd, NULL); 85 if (extent_hooks->dalloc != NULL && 86 !extent_hooks->dalloc(extent_hooks, addr, size, true, 87 ind)) { 88 goto label_post_reentrancy; 89 } 90 if (extent_hooks->decommit != NULL && 91 !extent_hooks->decommit(extent_hooks, addr, size, 0, size, 92 ind)) { 93 goto label_post_reentrancy; 94 } 95 if (extent_hooks->purge_forced != NULL && 96 !extent_hooks->purge_forced(extent_hooks, addr, size, 0, 97 size, ind)) { 98 goto label_post_reentrancy; 99 } 100 if (extent_hooks->purge_lazy != NULL && 101 !extent_hooks->purge_lazy(extent_hooks, addr, size, 0, size, 102 ind)) { 103 goto label_post_reentrancy; 104 } 105 /* Nothing worked. That's the application's problem. */ 106 label_post_reentrancy: 107 post_reentrancy(tsd); 108 } 109 label_done: 110 if (metadata_thp_madvise()) { 111 /* Set NOHUGEPAGE after unmap to avoid kernel defrag. */ 112 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && 113 (size & HUGEPAGE_MASK) == 0); 114 pages_nohuge(addr, size); 115 } 116 } 117 118 static void 119 base_extent_init(size_t *extent_sn_next, extent_t *extent, void *addr, 120 size_t size) { 121 size_t sn; 122 123 sn = *extent_sn_next; 124 (*extent_sn_next)++; 125 126 extent_binit(extent, addr, size, sn); 127 } 128 129 static size_t 130 base_get_num_blocks(base_t *base, bool with_new_block) { 131 base_block_t *b = base->blocks; 132 assert(b != NULL); 133 134 size_t n_blocks = with_new_block ? 2 : 1; 135 while (b->next != NULL) { 136 n_blocks++; 137 b = b->next; 138 } 139 140 return n_blocks; 141 } 142 143 static void 144 base_auto_thp_switch(tsdn_t *tsdn, base_t *base) { 145 assert(opt_metadata_thp == metadata_thp_auto); 146 malloc_mutex_assert_owner(tsdn, &base->mtx); 147 if (base->auto_thp_switched) { 148 return; 149 } 150 /* Called when adding a new block. */ 151 bool should_switch; 152 if (base_ind_get(base) != 0) { 153 should_switch = (base_get_num_blocks(base, true) == 154 BASE_AUTO_THP_THRESHOLD); 155 } else { 156 should_switch = (base_get_num_blocks(base, true) == 157 BASE_AUTO_THP_THRESHOLD_A0); 158 } 159 if (!should_switch) { 160 return; 161 } 162 163 base->auto_thp_switched = true; 164 assert(!config_stats || base->n_thp == 0); 165 /* Make the initial blocks THP lazily. */ 166 base_block_t *block = base->blocks; 167 while (block != NULL) { 168 assert((block->size & HUGEPAGE_MASK) == 0); 169 pages_huge(block, block->size); 170 if (config_stats) { 171 base->n_thp += HUGEPAGE_CEILING(block->size - 172 extent_bsize_get(&block->extent)) >> LG_HUGEPAGE; 173 } 174 block = block->next; 175 assert(block == NULL || (base_ind_get(base) == 0)); 176 } 177 } 178 179 static void * 180 base_extent_bump_alloc_helper(extent_t *extent, size_t *gap_size, size_t size, 181 size_t alignment) { 182 void *ret; 183 184 assert(alignment == ALIGNMENT_CEILING(alignment, QUANTUM)); 185 assert(size == ALIGNMENT_CEILING(size, alignment)); 186 187 *gap_size = ALIGNMENT_CEILING((uintptr_t)extent_addr_get(extent), 188 alignment) - (uintptr_t)extent_addr_get(extent); 189 ret = (void *)((uintptr_t)extent_addr_get(extent) + *gap_size); 190 assert(extent_bsize_get(extent) >= *gap_size + size); 191 extent_binit(extent, (void *)((uintptr_t)extent_addr_get(extent) + 192 *gap_size + size), extent_bsize_get(extent) - *gap_size - size, 193 extent_sn_get(extent)); 194 return ret; 195 } 196 197 static void 198 base_extent_bump_alloc_post(base_t *base, extent_t *extent, size_t gap_size, 199 void *addr, size_t size) { 200 if (extent_bsize_get(extent) > 0) { 201 /* 202 * Compute the index for the largest size class that does not 203 * exceed extent's size. 204 */ 205 szind_t index_floor = 206 sz_size2index(extent_bsize_get(extent) + 1) - 1; 207 extent_heap_insert(&base->avail[index_floor], extent); 208 } 209 210 if (config_stats) { 211 base->allocated += size; 212 /* 213 * Add one PAGE to base_resident for every page boundary that is 214 * crossed by the new allocation. Adjust n_thp similarly when 215 * metadata_thp is enabled. 216 */ 217 base->resident += PAGE_CEILING((uintptr_t)addr + size) - 218 PAGE_CEILING((uintptr_t)addr - gap_size); 219 assert(base->allocated <= base->resident); 220 assert(base->resident <= base->mapped); 221 if (metadata_thp_madvise() && (opt_metadata_thp == 222 metadata_thp_always || base->auto_thp_switched)) { 223 base->n_thp += (HUGEPAGE_CEILING((uintptr_t)addr + size) 224 - HUGEPAGE_CEILING((uintptr_t)addr - gap_size)) >> 225 LG_HUGEPAGE; 226 assert(base->mapped >= base->n_thp << LG_HUGEPAGE); 227 } 228 } 229 } 230 231 static void * 232 base_extent_bump_alloc(base_t *base, extent_t *extent, size_t size, 233 size_t alignment) { 234 void *ret; 235 size_t gap_size; 236 237 ret = base_extent_bump_alloc_helper(extent, &gap_size, size, alignment); 238 base_extent_bump_alloc_post(base, extent, gap_size, ret, size); 239 return ret; 240 } 241 242 /* 243 * Allocate a block of virtual memory that is large enough to start with a 244 * base_block_t header, followed by an object of specified size and alignment. 245 * On success a pointer to the initialized base_block_t header is returned. 246 */ 247 static base_block_t * 248 base_block_alloc(tsdn_t *tsdn, base_t *base, extent_hooks_t *extent_hooks, 249 unsigned ind, pszind_t *pind_last, size_t *extent_sn_next, size_t size, 250 size_t alignment) { 251 alignment = ALIGNMENT_CEILING(alignment, QUANTUM); 252 size_t usize = ALIGNMENT_CEILING(size, alignment); 253 size_t header_size = sizeof(base_block_t); 254 size_t gap_size = ALIGNMENT_CEILING(header_size, alignment) - 255 header_size; 256 /* 257 * Create increasingly larger blocks in order to limit the total number 258 * of disjoint virtual memory ranges. Choose the next size in the page 259 * size class series (skipping size classes that are not a multiple of 260 * HUGEPAGE), or a size large enough to satisfy the requested size and 261 * alignment, whichever is larger. 262 */ 263 size_t min_block_size = HUGEPAGE_CEILING(sz_psz2u(header_size + gap_size 264 + usize)); 265 pszind_t pind_next = (*pind_last + 1 < NPSIZES) ? *pind_last + 1 : 266 *pind_last; 267 size_t next_block_size = HUGEPAGE_CEILING(sz_pind2sz(pind_next)); 268 size_t block_size = (min_block_size > next_block_size) ? min_block_size 269 : next_block_size; 270 base_block_t *block = (base_block_t *)base_map(tsdn, extent_hooks, ind, 271 block_size); 272 if (block == NULL) { 273 return NULL; 274 } 275 276 if (metadata_thp_madvise()) { 277 void *addr = (void *)block; 278 assert(((uintptr_t)addr & HUGEPAGE_MASK) == 0 && 279 (block_size & HUGEPAGE_MASK) == 0); 280 if (opt_metadata_thp == metadata_thp_always) { 281 pages_huge(addr, block_size); 282 } else if (opt_metadata_thp == metadata_thp_auto && 283 base != NULL) { 284 /* base != NULL indicates this is not a new base. */ 285 malloc_mutex_lock(tsdn, &base->mtx); 286 base_auto_thp_switch(tsdn, base); 287 if (base->auto_thp_switched) { 288 pages_huge(addr, block_size); 289 } 290 malloc_mutex_unlock(tsdn, &base->mtx); 291 } 292 } 293 294 *pind_last = sz_psz2ind(block_size); 295 block->size = block_size; 296 block->next = NULL; 297 assert(block_size >= header_size); 298 base_extent_init(extent_sn_next, &block->extent, 299 (void *)((uintptr_t)block + header_size), block_size - header_size); 300 return block; 301 } 302 303 /* 304 * Allocate an extent that is at least as large as specified size, with 305 * specified alignment. 306 */ 307 static extent_t * 308 base_extent_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { 309 malloc_mutex_assert_owner(tsdn, &base->mtx); 310 311 extent_hooks_t *extent_hooks = base_extent_hooks_get(base); 312 /* 313 * Drop mutex during base_block_alloc(), because an extent hook will be 314 * called. 315 */ 316 malloc_mutex_unlock(tsdn, &base->mtx); 317 base_block_t *block = base_block_alloc(tsdn, base, extent_hooks, 318 base_ind_get(base), &base->pind_last, &base->extent_sn_next, size, 319 alignment); 320 malloc_mutex_lock(tsdn, &base->mtx); 321 if (block == NULL) { 322 return NULL; 323 } 324 block->next = base->blocks; 325 base->blocks = block; 326 if (config_stats) { 327 base->allocated += sizeof(base_block_t); 328 base->resident += PAGE_CEILING(sizeof(base_block_t)); 329 base->mapped += block->size; 330 if (metadata_thp_madvise() && 331 !(opt_metadata_thp == metadata_thp_auto 332 && !base->auto_thp_switched)) { 333 assert(base->n_thp > 0); 334 base->n_thp += HUGEPAGE_CEILING(sizeof(base_block_t)) >> 335 LG_HUGEPAGE; 336 } 337 assert(base->allocated <= base->resident); 338 assert(base->resident <= base->mapped); 339 assert(base->n_thp << LG_HUGEPAGE <= base->mapped); 340 } 341 return &block->extent; 342 } 343 344 base_t * 345 b0get(void) { 346 return b0; 347 } 348 349 base_t * 350 base_new(tsdn_t *tsdn, unsigned ind, extent_hooks_t *extent_hooks) { 351 pszind_t pind_last = 0; 352 size_t extent_sn_next = 0; 353 base_block_t *block = base_block_alloc(tsdn, NULL, extent_hooks, ind, 354 &pind_last, &extent_sn_next, sizeof(base_t), QUANTUM); 355 if (block == NULL) { 356 return NULL; 357 } 358 359 size_t gap_size; 360 size_t base_alignment = CACHELINE; 361 size_t base_size = ALIGNMENT_CEILING(sizeof(base_t), base_alignment); 362 base_t *base = (base_t *)base_extent_bump_alloc_helper(&block->extent, 363 &gap_size, base_size, base_alignment); 364 base->ind = ind; 365 atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELAXED); 366 if (malloc_mutex_init(&base->mtx, "base", WITNESS_RANK_BASE, 367 malloc_mutex_rank_exclusive)) { 368 base_unmap(tsdn, extent_hooks, ind, block, block->size); 369 return NULL; 370 } 371 base->pind_last = pind_last; 372 base->extent_sn_next = extent_sn_next; 373 base->blocks = block; 374 base->auto_thp_switched = false; 375 for (szind_t i = 0; i < NSIZES; i++) { 376 extent_heap_new(&base->avail[i]); 377 } 378 if (config_stats) { 379 base->allocated = sizeof(base_block_t); 380 base->resident = PAGE_CEILING(sizeof(base_block_t)); 381 base->mapped = block->size; 382 base->n_thp = (opt_metadata_thp == metadata_thp_always) && 383 metadata_thp_madvise() ? HUGEPAGE_CEILING(sizeof(base_block_t)) 384 >> LG_HUGEPAGE : 0; 385 assert(base->allocated <= base->resident); 386 assert(base->resident <= base->mapped); 387 assert(base->n_thp << LG_HUGEPAGE <= base->mapped); 388 } 389 base_extent_bump_alloc_post(base, &block->extent, gap_size, base, 390 base_size); 391 392 return base; 393 } 394 395 void 396 base_delete(tsdn_t *tsdn, base_t *base) { 397 extent_hooks_t *extent_hooks = base_extent_hooks_get(base); 398 base_block_t *next = base->blocks; 399 do { 400 base_block_t *block = next; 401 next = block->next; 402 base_unmap(tsdn, extent_hooks, base_ind_get(base), block, 403 block->size); 404 } while (next != NULL); 405 } 406 407 extent_hooks_t * 408 base_extent_hooks_get(base_t *base) { 409 return (extent_hooks_t *)atomic_load_p(&base->extent_hooks, 410 ATOMIC_ACQUIRE); 411 } 412 413 extent_hooks_t * 414 base_extent_hooks_set(base_t *base, extent_hooks_t *extent_hooks) { 415 extent_hooks_t *old_extent_hooks = base_extent_hooks_get(base); 416 atomic_store_p(&base->extent_hooks, extent_hooks, ATOMIC_RELEASE); 417 return old_extent_hooks; 418 } 419 420 static void * 421 base_alloc_impl(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment, 422 size_t *esn) { 423 alignment = QUANTUM_CEILING(alignment); 424 size_t usize = ALIGNMENT_CEILING(size, alignment); 425 size_t asize = usize + alignment - QUANTUM; 426 427 extent_t *extent = NULL; 428 malloc_mutex_lock(tsdn, &base->mtx); 429 for (szind_t i = sz_size2index(asize); i < NSIZES; i++) { 430 extent = extent_heap_remove_first(&base->avail[i]); 431 if (extent != NULL) { 432 /* Use existing space. */ 433 break; 434 } 435 } 436 if (extent == NULL) { 437 /* Try to allocate more space. */ 438 extent = base_extent_alloc(tsdn, base, usize, alignment); 439 } 440 void *ret; 441 if (extent == NULL) { 442 ret = NULL; 443 goto label_return; 444 } 445 446 ret = base_extent_bump_alloc(base, extent, usize, alignment); 447 if (esn != NULL) { 448 *esn = extent_sn_get(extent); 449 } 450 label_return: 451 malloc_mutex_unlock(tsdn, &base->mtx); 452 return ret; 453 } 454 455 /* 456 * base_alloc() returns zeroed memory, which is always demand-zeroed for the 457 * auto arenas, in order to make multi-page sparse data structures such as radix 458 * tree nodes efficient with respect to physical memory usage. Upon success a 459 * pointer to at least size bytes with specified alignment is returned. Note 460 * that size is rounded up to the nearest multiple of alignment to avoid false 461 * sharing. 462 */ 463 void * 464 base_alloc(tsdn_t *tsdn, base_t *base, size_t size, size_t alignment) { 465 return base_alloc_impl(tsdn, base, size, alignment, NULL); 466 } 467 468 extent_t * 469 base_alloc_extent(tsdn_t *tsdn, base_t *base) { 470 size_t esn; 471 extent_t *extent = base_alloc_impl(tsdn, base, sizeof(extent_t), 472 CACHELINE, &esn); 473 if (extent == NULL) { 474 return NULL; 475 } 476 extent_esn_set(extent, esn); 477 return extent; 478 } 479 480 void 481 base_stats_get(tsdn_t *tsdn, base_t *base, size_t *allocated, size_t *resident, 482 size_t *mapped, size_t *n_thp) { 483 cassert(config_stats); 484 485 malloc_mutex_lock(tsdn, &base->mtx); 486 assert(base->allocated <= base->resident); 487 assert(base->resident <= base->mapped); 488 *allocated = base->allocated; 489 *resident = base->resident; 490 *mapped = base->mapped; 491 *n_thp = base->n_thp; 492 malloc_mutex_unlock(tsdn, &base->mtx); 493 } 494 495 void 496 base_prefork(tsdn_t *tsdn, base_t *base) { 497 malloc_mutex_prefork(tsdn, &base->mtx); 498 } 499 500 void 501 base_postfork_parent(tsdn_t *tsdn, base_t *base) { 502 malloc_mutex_postfork_parent(tsdn, &base->mtx); 503 } 504 505 void 506 base_postfork_child(tsdn_t *tsdn, base_t *base) { 507 malloc_mutex_postfork_child(tsdn, &base->mtx); 508 } 509 510 bool 511 base_boot(tsdn_t *tsdn) { 512 b0 = base_new(tsdn, 0, (extent_hooks_t *)&extent_hooks_default); 513 return (b0 == NULL); 514 } 515