1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h" 2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h" 3*c43cad87SWarner Losh 4*c43cad87SWarner Losh #include "jemalloc/internal/thread_event.h" 5*c43cad87SWarner Losh 6*c43cad87SWarner Losh /* 7*c43cad87SWarner Losh * Signatures for event specific functions. These functions should be defined 8*c43cad87SWarner Losh * by the modules owning each event. The signatures here verify that the 9*c43cad87SWarner Losh * definitions follow the right format. 10*c43cad87SWarner Losh * 11*c43cad87SWarner Losh * The first two are functions computing new / postponed event wait time. New 12*c43cad87SWarner Losh * event wait time is the time till the next event if an event is currently 13*c43cad87SWarner Losh * being triggered; postponed event wait time is the time till the next event 14*c43cad87SWarner Losh * if an event should be triggered but needs to be postponed, e.g. when the TSD 15*c43cad87SWarner Losh * is not nominal or during reentrancy. 16*c43cad87SWarner Losh * 17*c43cad87SWarner Losh * The third is the event handler function, which is called whenever an event 18*c43cad87SWarner Losh * is triggered. The parameter is the elapsed time since the last time an 19*c43cad87SWarner Losh * event of the same type was triggered. 20*c43cad87SWarner Losh */ 21*c43cad87SWarner Losh #define E(event, condition_unused, is_alloc_event_unused) \ 22*c43cad87SWarner Losh uint64_t event##_new_event_wait(tsd_t *tsd); \ 23*c43cad87SWarner Losh uint64_t event##_postponed_event_wait(tsd_t *tsd); \ 24*c43cad87SWarner Losh void event##_event_handler(tsd_t *tsd, uint64_t elapsed); 25*c43cad87SWarner Losh 26*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 27*c43cad87SWarner Losh #undef E 28*c43cad87SWarner Losh 29*c43cad87SWarner Losh /* Signatures for internal functions fetching elapsed time. */ 30*c43cad87SWarner Losh #define E(event, condition_unused, is_alloc_event_unused) \ 31*c43cad87SWarner Losh static uint64_t event##_fetch_elapsed(tsd_t *tsd); 32*c43cad87SWarner Losh 33*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 34*c43cad87SWarner Losh #undef E 35*c43cad87SWarner Losh 36*c43cad87SWarner Losh static uint64_t 37*c43cad87SWarner Losh tcache_gc_fetch_elapsed(tsd_t *tsd) { 38*c43cad87SWarner Losh return TE_INVALID_ELAPSED; 39*c43cad87SWarner Losh } 40*c43cad87SWarner Losh 41*c43cad87SWarner Losh static uint64_t 42*c43cad87SWarner Losh tcache_gc_dalloc_fetch_elapsed(tsd_t *tsd) { 43*c43cad87SWarner Losh return TE_INVALID_ELAPSED; 44*c43cad87SWarner Losh } 45*c43cad87SWarner Losh 46*c43cad87SWarner Losh static uint64_t 47*c43cad87SWarner Losh prof_sample_fetch_elapsed(tsd_t *tsd) { 48*c43cad87SWarner Losh uint64_t last_event = thread_allocated_last_event_get(tsd); 49*c43cad87SWarner Losh uint64_t last_sample_event = prof_sample_last_event_get(tsd); 50*c43cad87SWarner Losh prof_sample_last_event_set(tsd, last_event); 51*c43cad87SWarner Losh return last_event - last_sample_event; 52*c43cad87SWarner Losh } 53*c43cad87SWarner Losh 54*c43cad87SWarner Losh static uint64_t 55*c43cad87SWarner Losh stats_interval_fetch_elapsed(tsd_t *tsd) { 56*c43cad87SWarner Losh uint64_t last_event = thread_allocated_last_event_get(tsd); 57*c43cad87SWarner Losh uint64_t last_stats_event = stats_interval_last_event_get(tsd); 58*c43cad87SWarner Losh stats_interval_last_event_set(tsd, last_event); 59*c43cad87SWarner Losh return last_event - last_stats_event; 60*c43cad87SWarner Losh } 61*c43cad87SWarner Losh 62*c43cad87SWarner Losh static uint64_t 63*c43cad87SWarner Losh peak_alloc_fetch_elapsed(tsd_t *tsd) { 64*c43cad87SWarner Losh return TE_INVALID_ELAPSED; 65*c43cad87SWarner Losh } 66*c43cad87SWarner Losh 67*c43cad87SWarner Losh static uint64_t 68*c43cad87SWarner Losh peak_dalloc_fetch_elapsed(tsd_t *tsd) { 69*c43cad87SWarner Losh return TE_INVALID_ELAPSED; 70*c43cad87SWarner Losh } 71*c43cad87SWarner Losh 72*c43cad87SWarner Losh /* Per event facilities done. */ 73*c43cad87SWarner Losh 74*c43cad87SWarner Losh static bool 75*c43cad87SWarner Losh te_ctx_has_active_events(te_ctx_t *ctx) { 76*c43cad87SWarner Losh assert(config_debug); 77*c43cad87SWarner Losh #define E(event, condition, alloc_event) \ 78*c43cad87SWarner Losh if (condition && alloc_event == ctx->is_alloc) { \ 79*c43cad87SWarner Losh return true; \ 80*c43cad87SWarner Losh } 81*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 82*c43cad87SWarner Losh #undef E 83*c43cad87SWarner Losh return false; 84*c43cad87SWarner Losh } 85*c43cad87SWarner Losh 86*c43cad87SWarner Losh static uint64_t 87*c43cad87SWarner Losh te_next_event_compute(tsd_t *tsd, bool is_alloc) { 88*c43cad87SWarner Losh uint64_t wait = TE_MAX_START_WAIT; 89*c43cad87SWarner Losh #define E(event, condition, alloc_event) \ 90*c43cad87SWarner Losh if (is_alloc == alloc_event && condition) { \ 91*c43cad87SWarner Losh uint64_t event_wait = \ 92*c43cad87SWarner Losh event##_event_wait_get(tsd); \ 93*c43cad87SWarner Losh assert(event_wait <= TE_MAX_START_WAIT); \ 94*c43cad87SWarner Losh if (event_wait > 0U && event_wait < wait) { \ 95*c43cad87SWarner Losh wait = event_wait; \ 96*c43cad87SWarner Losh } \ 97*c43cad87SWarner Losh } 98*c43cad87SWarner Losh 99*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 100*c43cad87SWarner Losh #undef E 101*c43cad87SWarner Losh assert(wait <= TE_MAX_START_WAIT); 102*c43cad87SWarner Losh return wait; 103*c43cad87SWarner Losh } 104*c43cad87SWarner Losh 105*c43cad87SWarner Losh static void 106*c43cad87SWarner Losh te_assert_invariants_impl(tsd_t *tsd, te_ctx_t *ctx) { 107*c43cad87SWarner Losh uint64_t current_bytes = te_ctx_current_bytes_get(ctx); 108*c43cad87SWarner Losh uint64_t last_event = te_ctx_last_event_get(ctx); 109*c43cad87SWarner Losh uint64_t next_event = te_ctx_next_event_get(ctx); 110*c43cad87SWarner Losh uint64_t next_event_fast = te_ctx_next_event_fast_get(ctx); 111*c43cad87SWarner Losh 112*c43cad87SWarner Losh assert(last_event != next_event); 113*c43cad87SWarner Losh if (next_event > TE_NEXT_EVENT_FAST_MAX || !tsd_fast(tsd)) { 114*c43cad87SWarner Losh assert(next_event_fast == 0U); 115*c43cad87SWarner Losh } else { 116*c43cad87SWarner Losh assert(next_event_fast == next_event); 117*c43cad87SWarner Losh } 118*c43cad87SWarner Losh 119*c43cad87SWarner Losh /* The subtraction is intentionally susceptible to underflow. */ 120*c43cad87SWarner Losh uint64_t interval = next_event - last_event; 121*c43cad87SWarner Losh 122*c43cad87SWarner Losh /* The subtraction is intentionally susceptible to underflow. */ 123*c43cad87SWarner Losh assert(current_bytes - last_event < interval); 124*c43cad87SWarner Losh uint64_t min_wait = te_next_event_compute(tsd, te_ctx_is_alloc(ctx)); 125*c43cad87SWarner Losh /* 126*c43cad87SWarner Losh * next_event should have been pushed up only except when no event is 127*c43cad87SWarner Losh * on and the TSD is just initialized. The last_event == 0U guard 128*c43cad87SWarner Losh * below is stronger than needed, but having an exactly accurate guard 129*c43cad87SWarner Losh * is more complicated to implement. 130*c43cad87SWarner Losh */ 131*c43cad87SWarner Losh assert((!te_ctx_has_active_events(ctx) && last_event == 0U) || 132*c43cad87SWarner Losh interval == min_wait || 133*c43cad87SWarner Losh (interval < min_wait && interval == TE_MAX_INTERVAL)); 134*c43cad87SWarner Losh } 135*c43cad87SWarner Losh 136*c43cad87SWarner Losh void 137*c43cad87SWarner Losh te_assert_invariants_debug(tsd_t *tsd) { 138*c43cad87SWarner Losh te_ctx_t ctx; 139*c43cad87SWarner Losh te_ctx_get(tsd, &ctx, true); 140*c43cad87SWarner Losh te_assert_invariants_impl(tsd, &ctx); 141*c43cad87SWarner Losh 142*c43cad87SWarner Losh te_ctx_get(tsd, &ctx, false); 143*c43cad87SWarner Losh te_assert_invariants_impl(tsd, &ctx); 144*c43cad87SWarner Losh } 145*c43cad87SWarner Losh 146*c43cad87SWarner Losh /* 147*c43cad87SWarner Losh * Synchronization around the fast threshold in tsd -- 148*c43cad87SWarner Losh * There are two threads to consider in the synchronization here: 149*c43cad87SWarner Losh * - The owner of the tsd being updated by a slow path change 150*c43cad87SWarner Losh * - The remote thread, doing that slow path change. 151*c43cad87SWarner Losh * 152*c43cad87SWarner Losh * As a design constraint, we want to ensure that a slow-path transition cannot 153*c43cad87SWarner Losh * be ignored for arbitrarily long, and that if the remote thread causes a 154*c43cad87SWarner Losh * slow-path transition and then communicates with the owner thread that it has 155*c43cad87SWarner Losh * occurred, then the owner will go down the slow path on the next allocator 156*c43cad87SWarner Losh * operation (so that we don't want to just wait until the owner hits its slow 157*c43cad87SWarner Losh * path reset condition on its own). 158*c43cad87SWarner Losh * 159*c43cad87SWarner Losh * Here's our strategy to do that: 160*c43cad87SWarner Losh * 161*c43cad87SWarner Losh * The remote thread will update the slow-path stores to TSD variables, issue a 162*c43cad87SWarner Losh * SEQ_CST fence, and then update the TSD next_event_fast counter. The owner 163*c43cad87SWarner Losh * thread will update next_event_fast, issue an SEQ_CST fence, and then check 164*c43cad87SWarner Losh * its TSD to see if it's on the slow path. 165*c43cad87SWarner Losh 166*c43cad87SWarner Losh * This is fairly straightforward when 64-bit atomics are supported. Assume that 167*c43cad87SWarner Losh * the remote fence is sandwiched between two owner fences in the reset pathway. 168*c43cad87SWarner Losh * The case where there is no preceding or trailing owner fence (i.e. because 169*c43cad87SWarner Losh * the owner thread is near the beginning or end of its life) can be analyzed 170*c43cad87SWarner Losh * similarly. The owner store to next_event_fast preceding the earlier owner 171*c43cad87SWarner Losh * fence will be earlier in coherence order than the remote store to it, so that 172*c43cad87SWarner Losh * the owner thread will go down the slow path once the store becomes visible to 173*c43cad87SWarner Losh * it, which is no later than the time of the second fence. 174*c43cad87SWarner Losh 175*c43cad87SWarner Losh * The case where we don't support 64-bit atomics is trickier, since word 176*c43cad87SWarner Losh * tearing is possible. We'll repeat the same analysis, and look at the two 177*c43cad87SWarner Losh * owner fences sandwiching the remote fence. The next_event_fast stores done 178*c43cad87SWarner Losh * alongside the earlier owner fence cannot overwrite any of the remote stores 179*c43cad87SWarner Losh * (since they precede the earlier owner fence in sb, which precedes the remote 180*c43cad87SWarner Losh * fence in sc, which precedes the remote stores in sb). After the second owner 181*c43cad87SWarner Losh * fence there will be a re-check of the slow-path variables anyways, so the 182*c43cad87SWarner Losh * "owner will notice that it's on the slow path eventually" guarantee is 183*c43cad87SWarner Losh * satisfied. To make sure that the out-of-band-messaging constraint is as well, 184*c43cad87SWarner Losh * note that either the message passing is sequenced before the second owner 185*c43cad87SWarner Losh * fence (in which case the remote stores happen before the second set of owner 186*c43cad87SWarner Losh * stores, so malloc sees a value of zero for next_event_fast and goes down the 187*c43cad87SWarner Losh * slow path), or it is not (in which case the owner sees the tsd slow-path 188*c43cad87SWarner Losh * writes on its previous update). This leaves open the possibility that the 189*c43cad87SWarner Losh * remote thread will (at some arbitrary point in the future) zero out one half 190*c43cad87SWarner Losh * of the owner thread's next_event_fast, but that's always safe (it just sends 191*c43cad87SWarner Losh * it down the slow path earlier). 192*c43cad87SWarner Losh */ 193*c43cad87SWarner Losh static void 194*c43cad87SWarner Losh te_ctx_next_event_fast_update(te_ctx_t *ctx) { 195*c43cad87SWarner Losh uint64_t next_event = te_ctx_next_event_get(ctx); 196*c43cad87SWarner Losh uint64_t next_event_fast = (next_event <= TE_NEXT_EVENT_FAST_MAX) ? 197*c43cad87SWarner Losh next_event : 0U; 198*c43cad87SWarner Losh te_ctx_next_event_fast_set(ctx, next_event_fast); 199*c43cad87SWarner Losh } 200*c43cad87SWarner Losh 201*c43cad87SWarner Losh void 202*c43cad87SWarner Losh te_recompute_fast_threshold(tsd_t *tsd) { 203*c43cad87SWarner Losh if (tsd_state_get(tsd) != tsd_state_nominal) { 204*c43cad87SWarner Losh /* Check first because this is also called on purgatory. */ 205*c43cad87SWarner Losh te_next_event_fast_set_non_nominal(tsd); 206*c43cad87SWarner Losh return; 207*c43cad87SWarner Losh } 208*c43cad87SWarner Losh 209*c43cad87SWarner Losh te_ctx_t ctx; 210*c43cad87SWarner Losh te_ctx_get(tsd, &ctx, true); 211*c43cad87SWarner Losh te_ctx_next_event_fast_update(&ctx); 212*c43cad87SWarner Losh te_ctx_get(tsd, &ctx, false); 213*c43cad87SWarner Losh te_ctx_next_event_fast_update(&ctx); 214*c43cad87SWarner Losh 215*c43cad87SWarner Losh atomic_fence(ATOMIC_SEQ_CST); 216*c43cad87SWarner Losh if (tsd_state_get(tsd) != tsd_state_nominal) { 217*c43cad87SWarner Losh te_next_event_fast_set_non_nominal(tsd); 218*c43cad87SWarner Losh } 219*c43cad87SWarner Losh } 220*c43cad87SWarner Losh 221*c43cad87SWarner Losh static void 222*c43cad87SWarner Losh te_adjust_thresholds_helper(tsd_t *tsd, te_ctx_t *ctx, 223*c43cad87SWarner Losh uint64_t wait) { 224*c43cad87SWarner Losh /* 225*c43cad87SWarner Losh * The next threshold based on future events can only be adjusted after 226*c43cad87SWarner Losh * progressing the last_event counter (which is set to current). 227*c43cad87SWarner Losh */ 228*c43cad87SWarner Losh assert(te_ctx_current_bytes_get(ctx) == te_ctx_last_event_get(ctx)); 229*c43cad87SWarner Losh assert(wait <= TE_MAX_START_WAIT); 230*c43cad87SWarner Losh 231*c43cad87SWarner Losh uint64_t next_event = te_ctx_last_event_get(ctx) + (wait <= 232*c43cad87SWarner Losh TE_MAX_INTERVAL ? wait : TE_MAX_INTERVAL); 233*c43cad87SWarner Losh te_ctx_next_event_set(tsd, ctx, next_event); 234*c43cad87SWarner Losh } 235*c43cad87SWarner Losh 236*c43cad87SWarner Losh static uint64_t 237*c43cad87SWarner Losh te_clip_event_wait(uint64_t event_wait) { 238*c43cad87SWarner Losh assert(event_wait > 0U); 239*c43cad87SWarner Losh if (TE_MIN_START_WAIT > 1U && 240*c43cad87SWarner Losh unlikely(event_wait < TE_MIN_START_WAIT)) { 241*c43cad87SWarner Losh event_wait = TE_MIN_START_WAIT; 242*c43cad87SWarner Losh } 243*c43cad87SWarner Losh if (TE_MAX_START_WAIT < UINT64_MAX && 244*c43cad87SWarner Losh unlikely(event_wait > TE_MAX_START_WAIT)) { 245*c43cad87SWarner Losh event_wait = TE_MAX_START_WAIT; 246*c43cad87SWarner Losh } 247*c43cad87SWarner Losh return event_wait; 248*c43cad87SWarner Losh } 249*c43cad87SWarner Losh 250*c43cad87SWarner Losh void 251*c43cad87SWarner Losh te_event_trigger(tsd_t *tsd, te_ctx_t *ctx) { 252*c43cad87SWarner Losh /* usize has already been added to thread_allocated. */ 253*c43cad87SWarner Losh uint64_t bytes_after = te_ctx_current_bytes_get(ctx); 254*c43cad87SWarner Losh /* The subtraction is intentionally susceptible to underflow. */ 255*c43cad87SWarner Losh uint64_t accumbytes = bytes_after - te_ctx_last_event_get(ctx); 256*c43cad87SWarner Losh 257*c43cad87SWarner Losh te_ctx_last_event_set(ctx, bytes_after); 258*c43cad87SWarner Losh 259*c43cad87SWarner Losh bool allow_event_trigger = tsd_nominal(tsd) && 260*c43cad87SWarner Losh tsd_reentrancy_level_get(tsd) == 0; 261*c43cad87SWarner Losh bool is_alloc = ctx->is_alloc; 262*c43cad87SWarner Losh uint64_t wait = TE_MAX_START_WAIT; 263*c43cad87SWarner Losh 264*c43cad87SWarner Losh #define E(event, condition, alloc_event) \ 265*c43cad87SWarner Losh bool is_##event##_triggered = false; \ 266*c43cad87SWarner Losh if (is_alloc == alloc_event && condition) { \ 267*c43cad87SWarner Losh uint64_t event_wait = event##_event_wait_get(tsd); \ 268*c43cad87SWarner Losh assert(event_wait <= TE_MAX_START_WAIT); \ 269*c43cad87SWarner Losh if (event_wait > accumbytes) { \ 270*c43cad87SWarner Losh event_wait -= accumbytes; \ 271*c43cad87SWarner Losh } else if (!allow_event_trigger) { \ 272*c43cad87SWarner Losh event_wait = event##_postponed_event_wait(tsd); \ 273*c43cad87SWarner Losh } else { \ 274*c43cad87SWarner Losh is_##event##_triggered = true; \ 275*c43cad87SWarner Losh event_wait = event##_new_event_wait(tsd); \ 276*c43cad87SWarner Losh } \ 277*c43cad87SWarner Losh event_wait = te_clip_event_wait(event_wait); \ 278*c43cad87SWarner Losh event##_event_wait_set(tsd, event_wait); \ 279*c43cad87SWarner Losh if (event_wait < wait) { \ 280*c43cad87SWarner Losh wait = event_wait; \ 281*c43cad87SWarner Losh } \ 282*c43cad87SWarner Losh } 283*c43cad87SWarner Losh 284*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 285*c43cad87SWarner Losh #undef E 286*c43cad87SWarner Losh 287*c43cad87SWarner Losh assert(wait <= TE_MAX_START_WAIT); 288*c43cad87SWarner Losh te_adjust_thresholds_helper(tsd, ctx, wait); 289*c43cad87SWarner Losh te_assert_invariants(tsd); 290*c43cad87SWarner Losh 291*c43cad87SWarner Losh #define E(event, condition, alloc_event) \ 292*c43cad87SWarner Losh if (is_alloc == alloc_event && condition && \ 293*c43cad87SWarner Losh is_##event##_triggered) { \ 294*c43cad87SWarner Losh assert(allow_event_trigger); \ 295*c43cad87SWarner Losh uint64_t elapsed = event##_fetch_elapsed(tsd); \ 296*c43cad87SWarner Losh event##_event_handler(tsd, elapsed); \ 297*c43cad87SWarner Losh } 298*c43cad87SWarner Losh 299*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 300*c43cad87SWarner Losh #undef E 301*c43cad87SWarner Losh 302*c43cad87SWarner Losh te_assert_invariants(tsd); 303*c43cad87SWarner Losh } 304*c43cad87SWarner Losh 305*c43cad87SWarner Losh static void 306*c43cad87SWarner Losh te_init(tsd_t *tsd, bool is_alloc) { 307*c43cad87SWarner Losh te_ctx_t ctx; 308*c43cad87SWarner Losh te_ctx_get(tsd, &ctx, is_alloc); 309*c43cad87SWarner Losh /* 310*c43cad87SWarner Losh * Reset the last event to current, which starts the events from a clean 311*c43cad87SWarner Losh * state. This is necessary when re-init the tsd event counters. 312*c43cad87SWarner Losh * 313*c43cad87SWarner Losh * The event counters maintain a relationship with the current bytes: 314*c43cad87SWarner Losh * last_event <= current < next_event. When a reinit happens (e.g. 315*c43cad87SWarner Losh * reincarnated tsd), the last event needs progressing because all 316*c43cad87SWarner Losh * events start fresh from the current bytes. 317*c43cad87SWarner Losh */ 318*c43cad87SWarner Losh te_ctx_last_event_set(&ctx, te_ctx_current_bytes_get(&ctx)); 319*c43cad87SWarner Losh 320*c43cad87SWarner Losh uint64_t wait = TE_MAX_START_WAIT; 321*c43cad87SWarner Losh #define E(event, condition, alloc_event) \ 322*c43cad87SWarner Losh if (is_alloc == alloc_event && condition) { \ 323*c43cad87SWarner Losh uint64_t event_wait = event##_new_event_wait(tsd); \ 324*c43cad87SWarner Losh event_wait = te_clip_event_wait(event_wait); \ 325*c43cad87SWarner Losh event##_event_wait_set(tsd, event_wait); \ 326*c43cad87SWarner Losh if (event_wait < wait) { \ 327*c43cad87SWarner Losh wait = event_wait; \ 328*c43cad87SWarner Losh } \ 329*c43cad87SWarner Losh } 330*c43cad87SWarner Losh 331*c43cad87SWarner Losh ITERATE_OVER_ALL_EVENTS 332*c43cad87SWarner Losh #undef E 333*c43cad87SWarner Losh te_adjust_thresholds_helper(tsd, &ctx, wait); 334*c43cad87SWarner Losh } 335*c43cad87SWarner Losh 336*c43cad87SWarner Losh void 337*c43cad87SWarner Losh tsd_te_init(tsd_t *tsd) { 338*c43cad87SWarner Losh /* Make sure no overflow for the bytes accumulated on event_trigger. */ 339*c43cad87SWarner Losh assert(TE_MAX_INTERVAL <= UINT64_MAX - SC_LARGE_MAXCLASS + 1); 340*c43cad87SWarner Losh te_init(tsd, true); 341*c43cad87SWarner Losh te_init(tsd, false); 342*c43cad87SWarner Losh te_assert_invariants(tsd); 343*c43cad87SWarner Losh } 344