1 #include "jemalloc/internal/jemalloc_preamble.h" 2 #include "jemalloc/internal/jemalloc_internal_includes.h" 3 4 #include "jemalloc/internal/pac.h" 5 #include "jemalloc/internal/san.h" 6 7 static edata_t *pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, 8 size_t alignment, bool zero, bool guarded, bool frequent_reuse, 9 bool *deferred_work_generated); 10 static bool pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, 11 size_t old_size, size_t new_size, bool zero, bool *deferred_work_generated); 12 static bool pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, 13 size_t old_size, size_t new_size, bool *deferred_work_generated); 14 static void pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, 15 bool *deferred_work_generated); 16 static uint64_t pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self); 17 18 static inline void 19 pac_decay_data_get(pac_t *pac, extent_state_t state, 20 decay_t **r_decay, pac_decay_stats_t **r_decay_stats, ecache_t **r_ecache) { 21 switch(state) { 22 case extent_state_dirty: 23 *r_decay = &pac->decay_dirty; 24 *r_decay_stats = &pac->stats->decay_dirty; 25 *r_ecache = &pac->ecache_dirty; 26 return; 27 case extent_state_muzzy: 28 *r_decay = &pac->decay_muzzy; 29 *r_decay_stats = &pac->stats->decay_muzzy; 30 *r_ecache = &pac->ecache_muzzy; 31 return; 32 default: 33 unreachable(); 34 } 35 } 36 37 bool 38 pac_init(tsdn_t *tsdn, pac_t *pac, base_t *base, emap_t *emap, 39 edata_cache_t *edata_cache, nstime_t *cur_time, 40 size_t pac_oversize_threshold, ssize_t dirty_decay_ms, 41 ssize_t muzzy_decay_ms, pac_stats_t *pac_stats, malloc_mutex_t *stats_mtx) { 42 unsigned ind = base_ind_get(base); 43 /* 44 * Delay coalescing for dirty extents despite the disruptive effect on 45 * memory layout for best-fit extent allocation, since cached extents 46 * are likely to be reused soon after deallocation, and the cost of 47 * merging/splitting extents is non-trivial. 48 */ 49 if (ecache_init(tsdn, &pac->ecache_dirty, extent_state_dirty, ind, 50 /* delay_coalesce */ true)) { 51 return true; 52 } 53 /* 54 * Coalesce muzzy extents immediately, because operations on them are in 55 * the critical path much less often than for dirty extents. 56 */ 57 if (ecache_init(tsdn, &pac->ecache_muzzy, extent_state_muzzy, ind, 58 /* delay_coalesce */ false)) { 59 return true; 60 } 61 /* 62 * Coalesce retained extents immediately, in part because they will 63 * never be evicted (and therefore there's no opportunity for delayed 64 * coalescing), but also because operations on retained extents are not 65 * in the critical path. 66 */ 67 if (ecache_init(tsdn, &pac->ecache_retained, extent_state_retained, 68 ind, /* delay_coalesce */ false)) { 69 return true; 70 } 71 exp_grow_init(&pac->exp_grow); 72 if (malloc_mutex_init(&pac->grow_mtx, "extent_grow", 73 WITNESS_RANK_EXTENT_GROW, malloc_mutex_rank_exclusive)) { 74 return true; 75 } 76 atomic_store_zu(&pac->oversize_threshold, pac_oversize_threshold, 77 ATOMIC_RELAXED); 78 if (decay_init(&pac->decay_dirty, cur_time, dirty_decay_ms)) { 79 return true; 80 } 81 if (decay_init(&pac->decay_muzzy, cur_time, muzzy_decay_ms)) { 82 return true; 83 } 84 if (san_bump_alloc_init(&pac->sba)) { 85 return true; 86 } 87 88 pac->base = base; 89 pac->emap = emap; 90 pac->edata_cache = edata_cache; 91 pac->stats = pac_stats; 92 pac->stats_mtx = stats_mtx; 93 atomic_store_zu(&pac->extent_sn_next, 0, ATOMIC_RELAXED); 94 95 pac->pai.alloc = &pac_alloc_impl; 96 pac->pai.alloc_batch = &pai_alloc_batch_default; 97 pac->pai.expand = &pac_expand_impl; 98 pac->pai.shrink = &pac_shrink_impl; 99 pac->pai.dalloc = &pac_dalloc_impl; 100 pac->pai.dalloc_batch = &pai_dalloc_batch_default; 101 pac->pai.time_until_deferred_work = &pac_time_until_deferred_work; 102 103 return false; 104 } 105 106 static inline bool 107 pac_may_have_muzzy(pac_t *pac) { 108 return pac_decay_ms_get(pac, extent_state_muzzy) != 0; 109 } 110 111 static edata_t * 112 pac_alloc_real(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, 113 size_t alignment, bool zero, bool guarded) { 114 assert(!guarded || alignment <= PAGE); 115 116 edata_t *edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty, 117 NULL, size, alignment, zero, guarded); 118 119 if (edata == NULL && pac_may_have_muzzy(pac)) { 120 edata = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy, 121 NULL, size, alignment, zero, guarded); 122 } 123 if (edata == NULL) { 124 edata = ecache_alloc_grow(tsdn, pac, ehooks, 125 &pac->ecache_retained, NULL, size, alignment, zero, 126 guarded); 127 if (config_stats && edata != NULL) { 128 atomic_fetch_add_zu(&pac->stats->pac_mapped, size, 129 ATOMIC_RELAXED); 130 } 131 } 132 133 return edata; 134 } 135 136 static edata_t * 137 pac_alloc_new_guarded(tsdn_t *tsdn, pac_t *pac, ehooks_t *ehooks, size_t size, 138 size_t alignment, bool zero, bool frequent_reuse) { 139 assert(alignment <= PAGE); 140 141 edata_t *edata; 142 if (san_bump_enabled() && frequent_reuse) { 143 edata = san_bump_alloc(tsdn, &pac->sba, pac, ehooks, size, 144 zero); 145 } else { 146 size_t size_with_guards = san_two_side_guarded_sz(size); 147 /* Alloc a non-guarded extent first.*/ 148 edata = pac_alloc_real(tsdn, pac, ehooks, size_with_guards, 149 /* alignment */ PAGE, zero, /* guarded */ false); 150 if (edata != NULL) { 151 /* Add guards around it. */ 152 assert(edata_size_get(edata) == size_with_guards); 153 san_guard_pages_two_sided(tsdn, ehooks, edata, 154 pac->emap, true); 155 } 156 } 157 assert(edata == NULL || (edata_guarded_get(edata) && 158 edata_size_get(edata) == size)); 159 160 return edata; 161 } 162 163 static edata_t * 164 pac_alloc_impl(tsdn_t *tsdn, pai_t *self, size_t size, size_t alignment, 165 bool zero, bool guarded, bool frequent_reuse, 166 bool *deferred_work_generated) { 167 pac_t *pac = (pac_t *)self; 168 ehooks_t *ehooks = pac_ehooks_get(pac); 169 170 edata_t *edata = NULL; 171 /* 172 * The condition is an optimization - not frequently reused guarded 173 * allocations are never put in the ecache. pac_alloc_real also 174 * doesn't grow retained for guarded allocations. So pac_alloc_real 175 * for such allocations would always return NULL. 176 * */ 177 if (!guarded || frequent_reuse) { 178 edata = pac_alloc_real(tsdn, pac, ehooks, size, alignment, 179 zero, guarded); 180 } 181 if (edata == NULL && guarded) { 182 /* No cached guarded extents; creating a new one. */ 183 edata = pac_alloc_new_guarded(tsdn, pac, ehooks, size, 184 alignment, zero, frequent_reuse); 185 } 186 187 return edata; 188 } 189 190 static bool 191 pac_expand_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, 192 size_t new_size, bool zero, bool *deferred_work_generated) { 193 pac_t *pac = (pac_t *)self; 194 ehooks_t *ehooks = pac_ehooks_get(pac); 195 196 size_t mapped_add = 0; 197 size_t expand_amount = new_size - old_size; 198 199 if (ehooks_merge_will_fail(ehooks)) { 200 return true; 201 } 202 edata_t *trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_dirty, 203 edata, expand_amount, PAGE, zero, /* guarded*/ false); 204 if (trail == NULL) { 205 trail = ecache_alloc(tsdn, pac, ehooks, &pac->ecache_muzzy, 206 edata, expand_amount, PAGE, zero, /* guarded*/ false); 207 } 208 if (trail == NULL) { 209 trail = ecache_alloc_grow(tsdn, pac, ehooks, 210 &pac->ecache_retained, edata, expand_amount, PAGE, zero, 211 /* guarded */ false); 212 mapped_add = expand_amount; 213 } 214 if (trail == NULL) { 215 return true; 216 } 217 if (extent_merge_wrapper(tsdn, pac, ehooks, edata, trail)) { 218 extent_dalloc_wrapper(tsdn, pac, ehooks, trail); 219 return true; 220 } 221 if (config_stats && mapped_add > 0) { 222 atomic_fetch_add_zu(&pac->stats->pac_mapped, mapped_add, 223 ATOMIC_RELAXED); 224 } 225 return false; 226 } 227 228 static bool 229 pac_shrink_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, size_t old_size, 230 size_t new_size, bool *deferred_work_generated) { 231 pac_t *pac = (pac_t *)self; 232 ehooks_t *ehooks = pac_ehooks_get(pac); 233 234 size_t shrink_amount = old_size - new_size; 235 236 if (ehooks_split_will_fail(ehooks)) { 237 return true; 238 } 239 240 edata_t *trail = extent_split_wrapper(tsdn, pac, ehooks, edata, 241 new_size, shrink_amount, /* holding_core_locks */ false); 242 if (trail == NULL) { 243 return true; 244 } 245 ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, trail); 246 *deferred_work_generated = true; 247 return false; 248 } 249 250 static void 251 pac_dalloc_impl(tsdn_t *tsdn, pai_t *self, edata_t *edata, 252 bool *deferred_work_generated) { 253 pac_t *pac = (pac_t *)self; 254 ehooks_t *ehooks = pac_ehooks_get(pac); 255 256 if (edata_guarded_get(edata)) { 257 /* 258 * Because cached guarded extents do exact fit only, large 259 * guarded extents are restored on dalloc eagerly (otherwise 260 * they will not be reused efficiently). Slab sizes have a 261 * limited number of size classes, and tend to cycle faster. 262 * 263 * In the case where coalesce is restrained (VirtualFree on 264 * Windows), guarded extents are also not cached -- otherwise 265 * during arena destroy / reset, the retained extents would not 266 * be whole regions (i.e. they are split between regular and 267 * guarded). 268 */ 269 if (!edata_slab_get(edata) || !maps_coalesce) { 270 assert(edata_size_get(edata) >= SC_LARGE_MINCLASS || 271 !maps_coalesce); 272 san_unguard_pages_two_sided(tsdn, ehooks, edata, 273 pac->emap); 274 } 275 } 276 277 ecache_dalloc(tsdn, pac, ehooks, &pac->ecache_dirty, edata); 278 /* Purging of deallocated pages is deferred */ 279 *deferred_work_generated = true; 280 } 281 282 static inline uint64_t 283 pac_ns_until_purge(tsdn_t *tsdn, decay_t *decay, size_t npages) { 284 if (malloc_mutex_trylock(tsdn, &decay->mtx)) { 285 /* Use minimal interval if decay is contended. */ 286 return BACKGROUND_THREAD_DEFERRED_MIN; 287 } 288 uint64_t result = decay_ns_until_purge(decay, npages, 289 ARENA_DEFERRED_PURGE_NPAGES_THRESHOLD); 290 291 malloc_mutex_unlock(tsdn, &decay->mtx); 292 return result; 293 } 294 295 static uint64_t 296 pac_time_until_deferred_work(tsdn_t *tsdn, pai_t *self) { 297 uint64_t time; 298 pac_t *pac = (pac_t *)self; 299 300 time = pac_ns_until_purge(tsdn, 301 &pac->decay_dirty, 302 ecache_npages_get(&pac->ecache_dirty)); 303 if (time == BACKGROUND_THREAD_DEFERRED_MIN) { 304 return time; 305 } 306 307 uint64_t muzzy = pac_ns_until_purge(tsdn, 308 &pac->decay_muzzy, 309 ecache_npages_get(&pac->ecache_muzzy)); 310 if (muzzy < time) { 311 time = muzzy; 312 } 313 return time; 314 } 315 316 bool 317 pac_retain_grow_limit_get_set(tsdn_t *tsdn, pac_t *pac, size_t *old_limit, 318 size_t *new_limit) { 319 pszind_t new_ind JEMALLOC_CC_SILENCE_INIT(0); 320 if (new_limit != NULL) { 321 size_t limit = *new_limit; 322 /* Grow no more than the new limit. */ 323 if ((new_ind = sz_psz2ind(limit + 1) - 1) >= SC_NPSIZES) { 324 return true; 325 } 326 } 327 328 malloc_mutex_lock(tsdn, &pac->grow_mtx); 329 if (old_limit != NULL) { 330 *old_limit = sz_pind2sz(pac->exp_grow.limit); 331 } 332 if (new_limit != NULL) { 333 pac->exp_grow.limit = new_ind; 334 } 335 malloc_mutex_unlock(tsdn, &pac->grow_mtx); 336 337 return false; 338 } 339 340 static size_t 341 pac_stash_decayed(tsdn_t *tsdn, pac_t *pac, ecache_t *ecache, 342 size_t npages_limit, size_t npages_decay_max, 343 edata_list_inactive_t *result) { 344 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), 345 WITNESS_RANK_CORE, 0); 346 ehooks_t *ehooks = pac_ehooks_get(pac); 347 348 /* Stash extents according to npages_limit. */ 349 size_t nstashed = 0; 350 while (nstashed < npages_decay_max) { 351 edata_t *edata = ecache_evict(tsdn, pac, ehooks, ecache, 352 npages_limit); 353 if (edata == NULL) { 354 break; 355 } 356 edata_list_inactive_append(result, edata); 357 nstashed += edata_size_get(edata) >> LG_PAGE; 358 } 359 return nstashed; 360 } 361 362 static size_t 363 pac_decay_stashed(tsdn_t *tsdn, pac_t *pac, decay_t *decay, 364 pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay, 365 edata_list_inactive_t *decay_extents) { 366 bool err; 367 368 size_t nmadvise = 0; 369 size_t nunmapped = 0; 370 size_t npurged = 0; 371 372 ehooks_t *ehooks = pac_ehooks_get(pac); 373 374 bool try_muzzy = !fully_decay 375 && pac_decay_ms_get(pac, extent_state_muzzy) != 0; 376 377 for (edata_t *edata = edata_list_inactive_first(decay_extents); edata != 378 NULL; edata = edata_list_inactive_first(decay_extents)) { 379 edata_list_inactive_remove(decay_extents, edata); 380 381 size_t size = edata_size_get(edata); 382 size_t npages = size >> LG_PAGE; 383 384 nmadvise++; 385 npurged += npages; 386 387 switch (ecache->state) { 388 case extent_state_active: 389 not_reached(); 390 case extent_state_dirty: 391 if (try_muzzy) { 392 err = extent_purge_lazy_wrapper(tsdn, ehooks, 393 edata, /* offset */ 0, size); 394 if (!err) { 395 ecache_dalloc(tsdn, pac, ehooks, 396 &pac->ecache_muzzy, edata); 397 break; 398 } 399 } 400 JEMALLOC_FALLTHROUGH; 401 case extent_state_muzzy: 402 extent_dalloc_wrapper(tsdn, pac, ehooks, edata); 403 nunmapped += npages; 404 break; 405 case extent_state_retained: 406 default: 407 not_reached(); 408 } 409 } 410 411 if (config_stats) { 412 LOCKEDINT_MTX_LOCK(tsdn, *pac->stats_mtx); 413 locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx), 414 &decay_stats->npurge, 1); 415 locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx), 416 &decay_stats->nmadvise, nmadvise); 417 locked_inc_u64(tsdn, LOCKEDINT_MTX(*pac->stats_mtx), 418 &decay_stats->purged, npurged); 419 LOCKEDINT_MTX_UNLOCK(tsdn, *pac->stats_mtx); 420 atomic_fetch_sub_zu(&pac->stats->pac_mapped, 421 nunmapped << LG_PAGE, ATOMIC_RELAXED); 422 } 423 424 return npurged; 425 } 426 427 /* 428 * npages_limit: Decay at most npages_decay_max pages without violating the 429 * invariant: (ecache_npages_get(ecache) >= npages_limit). We need an upper 430 * bound on number of pages in order to prevent unbounded growth (namely in 431 * stashed), otherwise unbounded new pages could be added to extents during the 432 * current decay run, so that the purging thread never finishes. 433 */ 434 static void 435 pac_decay_to_limit(tsdn_t *tsdn, pac_t *pac, decay_t *decay, 436 pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay, 437 size_t npages_limit, size_t npages_decay_max) { 438 witness_assert_depth_to_rank(tsdn_witness_tsdp_get(tsdn), 439 WITNESS_RANK_CORE, 1); 440 441 if (decay->purging || npages_decay_max == 0) { 442 return; 443 } 444 decay->purging = true; 445 malloc_mutex_unlock(tsdn, &decay->mtx); 446 447 edata_list_inactive_t decay_extents; 448 edata_list_inactive_init(&decay_extents); 449 size_t npurge = pac_stash_decayed(tsdn, pac, ecache, npages_limit, 450 npages_decay_max, &decay_extents); 451 if (npurge != 0) { 452 size_t npurged = pac_decay_stashed(tsdn, pac, decay, 453 decay_stats, ecache, fully_decay, &decay_extents); 454 assert(npurged == npurge); 455 } 456 457 malloc_mutex_lock(tsdn, &decay->mtx); 458 decay->purging = false; 459 } 460 461 void 462 pac_decay_all(tsdn_t *tsdn, pac_t *pac, decay_t *decay, 463 pac_decay_stats_t *decay_stats, ecache_t *ecache, bool fully_decay) { 464 malloc_mutex_assert_owner(tsdn, &decay->mtx); 465 pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache, fully_decay, 466 /* npages_limit */ 0, ecache_npages_get(ecache)); 467 } 468 469 static void 470 pac_decay_try_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay, 471 pac_decay_stats_t *decay_stats, ecache_t *ecache, 472 size_t current_npages, size_t npages_limit) { 473 if (current_npages > npages_limit) { 474 pac_decay_to_limit(tsdn, pac, decay, decay_stats, ecache, 475 /* fully_decay */ false, npages_limit, 476 current_npages - npages_limit); 477 } 478 } 479 480 bool 481 pac_maybe_decay_purge(tsdn_t *tsdn, pac_t *pac, decay_t *decay, 482 pac_decay_stats_t *decay_stats, ecache_t *ecache, 483 pac_purge_eagerness_t eagerness) { 484 malloc_mutex_assert_owner(tsdn, &decay->mtx); 485 486 /* Purge all or nothing if the option is disabled. */ 487 ssize_t decay_ms = decay_ms_read(decay); 488 if (decay_ms <= 0) { 489 if (decay_ms == 0) { 490 pac_decay_to_limit(tsdn, pac, decay, decay_stats, 491 ecache, /* fully_decay */ false, 492 /* npages_limit */ 0, ecache_npages_get(ecache)); 493 } 494 return false; 495 } 496 497 /* 498 * If the deadline has been reached, advance to the current epoch and 499 * purge to the new limit if necessary. Note that dirty pages created 500 * during the current epoch are not subject to purge until a future 501 * epoch, so as a result purging only happens during epoch advances, or 502 * being triggered by background threads (scheduled event). 503 */ 504 nstime_t time; 505 nstime_init_update(&time); 506 size_t npages_current = ecache_npages_get(ecache); 507 bool epoch_advanced = decay_maybe_advance_epoch(decay, &time, 508 npages_current); 509 if (eagerness == PAC_PURGE_ALWAYS 510 || (epoch_advanced && eagerness == PAC_PURGE_ON_EPOCH_ADVANCE)) { 511 size_t npages_limit = decay_npages_limit_get(decay); 512 pac_decay_try_purge(tsdn, pac, decay, decay_stats, ecache, 513 npages_current, npages_limit); 514 } 515 516 return epoch_advanced; 517 } 518 519 bool 520 pac_decay_ms_set(tsdn_t *tsdn, pac_t *pac, extent_state_t state, 521 ssize_t decay_ms, pac_purge_eagerness_t eagerness) { 522 decay_t *decay; 523 pac_decay_stats_t *decay_stats; 524 ecache_t *ecache; 525 pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache); 526 527 if (!decay_ms_valid(decay_ms)) { 528 return true; 529 } 530 531 malloc_mutex_lock(tsdn, &decay->mtx); 532 /* 533 * Restart decay backlog from scratch, which may cause many dirty pages 534 * to be immediately purged. It would conceptually be possible to map 535 * the old backlog onto the new backlog, but there is no justification 536 * for such complexity since decay_ms changes are intended to be 537 * infrequent, either between the {-1, 0, >0} states, or a one-time 538 * arbitrary change during initial arena configuration. 539 */ 540 nstime_t cur_time; 541 nstime_init_update(&cur_time); 542 decay_reinit(decay, &cur_time, decay_ms); 543 pac_maybe_decay_purge(tsdn, pac, decay, decay_stats, ecache, eagerness); 544 malloc_mutex_unlock(tsdn, &decay->mtx); 545 546 return false; 547 } 548 549 ssize_t 550 pac_decay_ms_get(pac_t *pac, extent_state_t state) { 551 decay_t *decay; 552 pac_decay_stats_t *decay_stats; 553 ecache_t *ecache; 554 pac_decay_data_get(pac, state, &decay, &decay_stats, &ecache); 555 return decay_ms_read(decay); 556 } 557 558 void 559 pac_reset(tsdn_t *tsdn, pac_t *pac) { 560 /* 561 * No-op for now; purging is still done at the arena-level. It should 562 * get moved in here, though. 563 */ 564 (void)tsdn; 565 (void)pac; 566 } 567 568 void 569 pac_destroy(tsdn_t *tsdn, pac_t *pac) { 570 assert(ecache_npages_get(&pac->ecache_dirty) == 0); 571 assert(ecache_npages_get(&pac->ecache_muzzy) == 0); 572 /* 573 * Iterate over the retained extents and destroy them. This gives the 574 * extent allocator underlying the extent hooks an opportunity to unmap 575 * all retained memory without having to keep its own metadata 576 * structures. In practice, virtual memory for dss-allocated extents is 577 * leaked here, so best practice is to avoid dss for arenas to be 578 * destroyed, or provide custom extent hooks that track retained 579 * dss-based extents for later reuse. 580 */ 581 ehooks_t *ehooks = pac_ehooks_get(pac); 582 edata_t *edata; 583 while ((edata = ecache_evict(tsdn, pac, ehooks, 584 &pac->ecache_retained, 0)) != NULL) { 585 extent_destroy_wrapper(tsdn, pac, ehooks, edata); 586 } 587 } 588