1*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_preamble.h" 2*c43cad87SWarner Losh #include "jemalloc/internal/jemalloc_internal_includes.h" 3*c43cad87SWarner Losh 4*c43cad87SWarner Losh #include "jemalloc/internal/assert.h" 5*c43cad87SWarner Losh #include "jemalloc/internal/buf_writer.h" 6*c43cad87SWarner Losh #include "jemalloc/internal/emitter.h" 7*c43cad87SWarner Losh #include "jemalloc/internal/prof_data.h" 8*c43cad87SWarner Losh #include "jemalloc/internal/prof_recent.h" 9*c43cad87SWarner Losh 10*c43cad87SWarner Losh ssize_t opt_prof_recent_alloc_max = PROF_RECENT_ALLOC_MAX_DEFAULT; 11*c43cad87SWarner Losh malloc_mutex_t prof_recent_alloc_mtx; /* Protects the fields below */ 12*c43cad87SWarner Losh static atomic_zd_t prof_recent_alloc_max; 13*c43cad87SWarner Losh static ssize_t prof_recent_alloc_count = 0; 14*c43cad87SWarner Losh prof_recent_list_t prof_recent_alloc_list; 15*c43cad87SWarner Losh 16*c43cad87SWarner Losh malloc_mutex_t prof_recent_dump_mtx; /* Protects dumping. */ 17*c43cad87SWarner Losh 18*c43cad87SWarner Losh static void 19*c43cad87SWarner Losh prof_recent_alloc_max_init() { 20*c43cad87SWarner Losh atomic_store_zd(&prof_recent_alloc_max, opt_prof_recent_alloc_max, 21*c43cad87SWarner Losh ATOMIC_RELAXED); 22*c43cad87SWarner Losh } 23*c43cad87SWarner Losh 24*c43cad87SWarner Losh static inline ssize_t 25*c43cad87SWarner Losh prof_recent_alloc_max_get_no_lock() { 26*c43cad87SWarner Losh return atomic_load_zd(&prof_recent_alloc_max, ATOMIC_RELAXED); 27*c43cad87SWarner Losh } 28*c43cad87SWarner Losh 29*c43cad87SWarner Losh static inline ssize_t 30*c43cad87SWarner Losh prof_recent_alloc_max_get(tsd_t *tsd) { 31*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 32*c43cad87SWarner Losh return prof_recent_alloc_max_get_no_lock(); 33*c43cad87SWarner Losh } 34*c43cad87SWarner Losh 35*c43cad87SWarner Losh static inline ssize_t 36*c43cad87SWarner Losh prof_recent_alloc_max_update(tsd_t *tsd, ssize_t max) { 37*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 38*c43cad87SWarner Losh ssize_t old_max = prof_recent_alloc_max_get(tsd); 39*c43cad87SWarner Losh atomic_store_zd(&prof_recent_alloc_max, max, ATOMIC_RELAXED); 40*c43cad87SWarner Losh return old_max; 41*c43cad87SWarner Losh } 42*c43cad87SWarner Losh 43*c43cad87SWarner Losh static prof_recent_t * 44*c43cad87SWarner Losh prof_recent_allocate_node(tsdn_t *tsdn) { 45*c43cad87SWarner Losh return (prof_recent_t *)iallocztm(tsdn, sizeof(prof_recent_t), 46*c43cad87SWarner Losh sz_size2index(sizeof(prof_recent_t)), false, NULL, true, 47*c43cad87SWarner Losh arena_get(tsdn, 0, false), true); 48*c43cad87SWarner Losh } 49*c43cad87SWarner Losh 50*c43cad87SWarner Losh static void 51*c43cad87SWarner Losh prof_recent_free_node(tsdn_t *tsdn, prof_recent_t *node) { 52*c43cad87SWarner Losh assert(node != NULL); 53*c43cad87SWarner Losh assert(isalloc(tsdn, node) == sz_s2u(sizeof(prof_recent_t))); 54*c43cad87SWarner Losh idalloctm(tsdn, node, NULL, NULL, true, true); 55*c43cad87SWarner Losh } 56*c43cad87SWarner Losh 57*c43cad87SWarner Losh static inline void 58*c43cad87SWarner Losh increment_recent_count(tsd_t *tsd, prof_tctx_t *tctx) { 59*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); 60*c43cad87SWarner Losh ++tctx->recent_count; 61*c43cad87SWarner Losh assert(tctx->recent_count > 0); 62*c43cad87SWarner Losh } 63*c43cad87SWarner Losh 64*c43cad87SWarner Losh bool 65*c43cad87SWarner Losh prof_recent_alloc_prepare(tsd_t *tsd, prof_tctx_t *tctx) { 66*c43cad87SWarner Losh cassert(config_prof); 67*c43cad87SWarner Losh assert(opt_prof && prof_booted); 68*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), tctx->tdata->lock); 69*c43cad87SWarner Losh malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 70*c43cad87SWarner Losh 71*c43cad87SWarner Losh /* 72*c43cad87SWarner Losh * Check whether last-N mode is turned on without trying to acquire the 73*c43cad87SWarner Losh * lock, so as to optimize for the following two scenarios: 74*c43cad87SWarner Losh * (1) Last-N mode is switched off; 75*c43cad87SWarner Losh * (2) Dumping, during which last-N mode is temporarily turned off so 76*c43cad87SWarner Losh * as not to block sampled allocations. 77*c43cad87SWarner Losh */ 78*c43cad87SWarner Losh if (prof_recent_alloc_max_get_no_lock() == 0) { 79*c43cad87SWarner Losh return false; 80*c43cad87SWarner Losh } 81*c43cad87SWarner Losh 82*c43cad87SWarner Losh /* 83*c43cad87SWarner Losh * Increment recent_count to hold the tctx so that it won't be gone 84*c43cad87SWarner Losh * even after tctx->tdata->lock is released. This acts as a 85*c43cad87SWarner Losh * "placeholder"; the real recording of the allocation requires a lock 86*c43cad87SWarner Losh * on prof_recent_alloc_mtx and is done in prof_recent_alloc (when 87*c43cad87SWarner Losh * tctx->tdata->lock has been released). 88*c43cad87SWarner Losh */ 89*c43cad87SWarner Losh increment_recent_count(tsd, tctx); 90*c43cad87SWarner Losh return true; 91*c43cad87SWarner Losh } 92*c43cad87SWarner Losh 93*c43cad87SWarner Losh static void 94*c43cad87SWarner Losh decrement_recent_count(tsd_t *tsd, prof_tctx_t *tctx) { 95*c43cad87SWarner Losh malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 96*c43cad87SWarner Losh assert(tctx != NULL); 97*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), tctx->tdata->lock); 98*c43cad87SWarner Losh assert(tctx->recent_count > 0); 99*c43cad87SWarner Losh --tctx->recent_count; 100*c43cad87SWarner Losh prof_tctx_try_destroy(tsd, tctx); 101*c43cad87SWarner Losh } 102*c43cad87SWarner Losh 103*c43cad87SWarner Losh static inline edata_t * 104*c43cad87SWarner Losh prof_recent_alloc_edata_get_no_lock(const prof_recent_t *n) { 105*c43cad87SWarner Losh return (edata_t *)atomic_load_p(&n->alloc_edata, ATOMIC_ACQUIRE); 106*c43cad87SWarner Losh } 107*c43cad87SWarner Losh 108*c43cad87SWarner Losh edata_t * 109*c43cad87SWarner Losh prof_recent_alloc_edata_get_no_lock_test(const prof_recent_t *n) { 110*c43cad87SWarner Losh cassert(config_prof); 111*c43cad87SWarner Losh return prof_recent_alloc_edata_get_no_lock(n); 112*c43cad87SWarner Losh } 113*c43cad87SWarner Losh 114*c43cad87SWarner Losh static inline edata_t * 115*c43cad87SWarner Losh prof_recent_alloc_edata_get(tsd_t *tsd, const prof_recent_t *n) { 116*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 117*c43cad87SWarner Losh return prof_recent_alloc_edata_get_no_lock(n); 118*c43cad87SWarner Losh } 119*c43cad87SWarner Losh 120*c43cad87SWarner Losh static void 121*c43cad87SWarner Losh prof_recent_alloc_edata_set(tsd_t *tsd, prof_recent_t *n, edata_t *edata) { 122*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 123*c43cad87SWarner Losh atomic_store_p(&n->alloc_edata, edata, ATOMIC_RELEASE); 124*c43cad87SWarner Losh } 125*c43cad87SWarner Losh 126*c43cad87SWarner Losh void 127*c43cad87SWarner Losh edata_prof_recent_alloc_init(edata_t *edata) { 128*c43cad87SWarner Losh cassert(config_prof); 129*c43cad87SWarner Losh edata_prof_recent_alloc_set_dont_call_directly(edata, NULL); 130*c43cad87SWarner Losh } 131*c43cad87SWarner Losh 132*c43cad87SWarner Losh static inline prof_recent_t * 133*c43cad87SWarner Losh edata_prof_recent_alloc_get_no_lock(const edata_t *edata) { 134*c43cad87SWarner Losh cassert(config_prof); 135*c43cad87SWarner Losh return edata_prof_recent_alloc_get_dont_call_directly(edata); 136*c43cad87SWarner Losh } 137*c43cad87SWarner Losh 138*c43cad87SWarner Losh prof_recent_t * 139*c43cad87SWarner Losh edata_prof_recent_alloc_get_no_lock_test(const edata_t *edata) { 140*c43cad87SWarner Losh cassert(config_prof); 141*c43cad87SWarner Losh return edata_prof_recent_alloc_get_no_lock(edata); 142*c43cad87SWarner Losh } 143*c43cad87SWarner Losh 144*c43cad87SWarner Losh static inline prof_recent_t * 145*c43cad87SWarner Losh edata_prof_recent_alloc_get(tsd_t *tsd, const edata_t *edata) { 146*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 147*c43cad87SWarner Losh prof_recent_t *recent_alloc = 148*c43cad87SWarner Losh edata_prof_recent_alloc_get_no_lock(edata); 149*c43cad87SWarner Losh assert(recent_alloc == NULL || 150*c43cad87SWarner Losh prof_recent_alloc_edata_get(tsd, recent_alloc) == edata); 151*c43cad87SWarner Losh return recent_alloc; 152*c43cad87SWarner Losh } 153*c43cad87SWarner Losh 154*c43cad87SWarner Losh static prof_recent_t * 155*c43cad87SWarner Losh edata_prof_recent_alloc_update_internal(tsd_t *tsd, edata_t *edata, 156*c43cad87SWarner Losh prof_recent_t *recent_alloc) { 157*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 158*c43cad87SWarner Losh prof_recent_t *old_recent_alloc = 159*c43cad87SWarner Losh edata_prof_recent_alloc_get(tsd, edata); 160*c43cad87SWarner Losh edata_prof_recent_alloc_set_dont_call_directly(edata, recent_alloc); 161*c43cad87SWarner Losh return old_recent_alloc; 162*c43cad87SWarner Losh } 163*c43cad87SWarner Losh 164*c43cad87SWarner Losh static void 165*c43cad87SWarner Losh edata_prof_recent_alloc_set(tsd_t *tsd, edata_t *edata, 166*c43cad87SWarner Losh prof_recent_t *recent_alloc) { 167*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 168*c43cad87SWarner Losh assert(recent_alloc != NULL); 169*c43cad87SWarner Losh prof_recent_t *old_recent_alloc = 170*c43cad87SWarner Losh edata_prof_recent_alloc_update_internal(tsd, edata, recent_alloc); 171*c43cad87SWarner Losh assert(old_recent_alloc == NULL); 172*c43cad87SWarner Losh prof_recent_alloc_edata_set(tsd, recent_alloc, edata); 173*c43cad87SWarner Losh } 174*c43cad87SWarner Losh 175*c43cad87SWarner Losh static void 176*c43cad87SWarner Losh edata_prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata, 177*c43cad87SWarner Losh prof_recent_t *recent_alloc) { 178*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 179*c43cad87SWarner Losh assert(recent_alloc != NULL); 180*c43cad87SWarner Losh prof_recent_t *old_recent_alloc = 181*c43cad87SWarner Losh edata_prof_recent_alloc_update_internal(tsd, edata, NULL); 182*c43cad87SWarner Losh assert(old_recent_alloc == recent_alloc); 183*c43cad87SWarner Losh assert(edata == prof_recent_alloc_edata_get(tsd, recent_alloc)); 184*c43cad87SWarner Losh prof_recent_alloc_edata_set(tsd, recent_alloc, NULL); 185*c43cad87SWarner Losh } 186*c43cad87SWarner Losh 187*c43cad87SWarner Losh /* 188*c43cad87SWarner Losh * This function should be called right before an allocation is released, so 189*c43cad87SWarner Losh * that the associated recent allocation record can contain the following 190*c43cad87SWarner Losh * information: 191*c43cad87SWarner Losh * (1) The allocation is released; 192*c43cad87SWarner Losh * (2) The time of the deallocation; and 193*c43cad87SWarner Losh * (3) The prof_tctx associated with the deallocation. 194*c43cad87SWarner Losh */ 195*c43cad87SWarner Losh void 196*c43cad87SWarner Losh prof_recent_alloc_reset(tsd_t *tsd, edata_t *edata) { 197*c43cad87SWarner Losh cassert(config_prof); 198*c43cad87SWarner Losh /* 199*c43cad87SWarner Losh * Check whether the recent allocation record still exists without 200*c43cad87SWarner Losh * trying to acquire the lock. 201*c43cad87SWarner Losh */ 202*c43cad87SWarner Losh if (edata_prof_recent_alloc_get_no_lock(edata) == NULL) { 203*c43cad87SWarner Losh return; 204*c43cad87SWarner Losh } 205*c43cad87SWarner Losh 206*c43cad87SWarner Losh prof_tctx_t *dalloc_tctx = prof_tctx_create(tsd); 207*c43cad87SWarner Losh /* 208*c43cad87SWarner Losh * In case dalloc_tctx is NULL, e.g. due to OOM, we will not record the 209*c43cad87SWarner Losh * deallocation time / tctx, which is handled later, after we check 210*c43cad87SWarner Losh * again when holding the lock. 211*c43cad87SWarner Losh */ 212*c43cad87SWarner Losh 213*c43cad87SWarner Losh if (dalloc_tctx != NULL) { 214*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock); 215*c43cad87SWarner Losh increment_recent_count(tsd, dalloc_tctx); 216*c43cad87SWarner Losh dalloc_tctx->prepared = false; 217*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), dalloc_tctx->tdata->lock); 218*c43cad87SWarner Losh } 219*c43cad87SWarner Losh 220*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 221*c43cad87SWarner Losh /* Check again after acquiring the lock. */ 222*c43cad87SWarner Losh prof_recent_t *recent = edata_prof_recent_alloc_get(tsd, edata); 223*c43cad87SWarner Losh if (recent != NULL) { 224*c43cad87SWarner Losh assert(nstime_equals_zero(&recent->dalloc_time)); 225*c43cad87SWarner Losh assert(recent->dalloc_tctx == NULL); 226*c43cad87SWarner Losh if (dalloc_tctx != NULL) { 227*c43cad87SWarner Losh nstime_prof_update(&recent->dalloc_time); 228*c43cad87SWarner Losh recent->dalloc_tctx = dalloc_tctx; 229*c43cad87SWarner Losh dalloc_tctx = NULL; 230*c43cad87SWarner Losh } 231*c43cad87SWarner Losh edata_prof_recent_alloc_reset(tsd, edata, recent); 232*c43cad87SWarner Losh } 233*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 234*c43cad87SWarner Losh 235*c43cad87SWarner Losh if (dalloc_tctx != NULL) { 236*c43cad87SWarner Losh /* We lost the rase - the allocation record was just gone. */ 237*c43cad87SWarner Losh decrement_recent_count(tsd, dalloc_tctx); 238*c43cad87SWarner Losh } 239*c43cad87SWarner Losh } 240*c43cad87SWarner Losh 241*c43cad87SWarner Losh static void 242*c43cad87SWarner Losh prof_recent_alloc_evict_edata(tsd_t *tsd, prof_recent_t *recent_alloc) { 243*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 244*c43cad87SWarner Losh edata_t *edata = prof_recent_alloc_edata_get(tsd, recent_alloc); 245*c43cad87SWarner Losh if (edata != NULL) { 246*c43cad87SWarner Losh edata_prof_recent_alloc_reset(tsd, edata, recent_alloc); 247*c43cad87SWarner Losh } 248*c43cad87SWarner Losh } 249*c43cad87SWarner Losh 250*c43cad87SWarner Losh static bool 251*c43cad87SWarner Losh prof_recent_alloc_is_empty(tsd_t *tsd) { 252*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 253*c43cad87SWarner Losh if (ql_empty(&prof_recent_alloc_list)) { 254*c43cad87SWarner Losh assert(prof_recent_alloc_count == 0); 255*c43cad87SWarner Losh return true; 256*c43cad87SWarner Losh } else { 257*c43cad87SWarner Losh assert(prof_recent_alloc_count > 0); 258*c43cad87SWarner Losh return false; 259*c43cad87SWarner Losh } 260*c43cad87SWarner Losh } 261*c43cad87SWarner Losh 262*c43cad87SWarner Losh static void 263*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd_t *tsd) { 264*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 265*c43cad87SWarner Losh if (!config_debug) { 266*c43cad87SWarner Losh return; 267*c43cad87SWarner Losh } 268*c43cad87SWarner Losh ssize_t count = 0; 269*c43cad87SWarner Losh prof_recent_t *n; 270*c43cad87SWarner Losh ql_foreach(n, &prof_recent_alloc_list, link) { 271*c43cad87SWarner Losh ++count; 272*c43cad87SWarner Losh } 273*c43cad87SWarner Losh assert(count == prof_recent_alloc_count); 274*c43cad87SWarner Losh assert(prof_recent_alloc_max_get(tsd) == -1 || 275*c43cad87SWarner Losh count <= prof_recent_alloc_max_get(tsd)); 276*c43cad87SWarner Losh } 277*c43cad87SWarner Losh 278*c43cad87SWarner Losh void 279*c43cad87SWarner Losh prof_recent_alloc(tsd_t *tsd, edata_t *edata, size_t size, size_t usize) { 280*c43cad87SWarner Losh cassert(config_prof); 281*c43cad87SWarner Losh assert(edata != NULL); 282*c43cad87SWarner Losh prof_tctx_t *tctx = edata_prof_tctx_get(edata); 283*c43cad87SWarner Losh 284*c43cad87SWarner Losh malloc_mutex_assert_not_owner(tsd_tsdn(tsd), tctx->tdata->lock); 285*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 286*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 287*c43cad87SWarner Losh 288*c43cad87SWarner Losh /* 289*c43cad87SWarner Losh * Reserve a new prof_recent_t node if needed. If needed, we release 290*c43cad87SWarner Losh * the prof_recent_alloc_mtx lock and allocate. Then, rather than 291*c43cad87SWarner Losh * immediately checking for OOM, we regain the lock and try to make use 292*c43cad87SWarner Losh * of the reserve node if needed. There are six scenarios: 293*c43cad87SWarner Losh * 294*c43cad87SWarner Losh * \ now | no need | need but OOMed | need and allocated 295*c43cad87SWarner Losh * later \ | | | 296*c43cad87SWarner Losh * ------------------------------------------------------------ 297*c43cad87SWarner Losh * no need | (1) | (2) | (3) 298*c43cad87SWarner Losh * ------------------------------------------------------------ 299*c43cad87SWarner Losh * need | (4) | (5) | (6) 300*c43cad87SWarner Losh * 301*c43cad87SWarner Losh * First, "(4)" never happens, because we don't release the lock in the 302*c43cad87SWarner Losh * middle if there's no need for a new node; in such cases "(1)" always 303*c43cad87SWarner Losh * takes place, which is trivial. 304*c43cad87SWarner Losh * 305*c43cad87SWarner Losh * Out of the remaining four scenarios, "(6)" is the common case and is 306*c43cad87SWarner Losh * trivial. "(5)" is also trivial, in which case we'll rollback the 307*c43cad87SWarner Losh * effect of prof_recent_alloc_prepare() as expected. 308*c43cad87SWarner Losh * 309*c43cad87SWarner Losh * "(2)" / "(3)" occurs when the need for a new node is gone after we 310*c43cad87SWarner Losh * regain the lock. If the new node is successfully allocated, i.e. in 311*c43cad87SWarner Losh * the case of "(3)", we'll release it in the end; otherwise, i.e. in 312*c43cad87SWarner Losh * the case of "(2)", we do nothing - we're lucky that the OOM ends up 313*c43cad87SWarner Losh * doing no harm at all. 314*c43cad87SWarner Losh * 315*c43cad87SWarner Losh * Therefore, the only performance cost of the "release lock" -> 316*c43cad87SWarner Losh * "allocate" -> "regain lock" design is the "(3)" case, but it happens 317*c43cad87SWarner Losh * very rarely, so the cost is relatively small compared to the gain of 318*c43cad87SWarner Losh * not having to have the lock order of prof_recent_alloc_mtx above all 319*c43cad87SWarner Losh * the allocation locks. 320*c43cad87SWarner Losh */ 321*c43cad87SWarner Losh prof_recent_t *reserve = NULL; 322*c43cad87SWarner Losh if (prof_recent_alloc_max_get(tsd) == -1 || 323*c43cad87SWarner Losh prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)) { 324*c43cad87SWarner Losh assert(prof_recent_alloc_max_get(tsd) != 0); 325*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 326*c43cad87SWarner Losh reserve = prof_recent_allocate_node(tsd_tsdn(tsd)); 327*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 328*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 329*c43cad87SWarner Losh } 330*c43cad87SWarner Losh 331*c43cad87SWarner Losh if (prof_recent_alloc_max_get(tsd) == 0) { 332*c43cad87SWarner Losh assert(prof_recent_alloc_is_empty(tsd)); 333*c43cad87SWarner Losh goto label_rollback; 334*c43cad87SWarner Losh } 335*c43cad87SWarner Losh 336*c43cad87SWarner Losh prof_tctx_t *old_alloc_tctx, *old_dalloc_tctx; 337*c43cad87SWarner Losh if (prof_recent_alloc_count == prof_recent_alloc_max_get(tsd)) { 338*c43cad87SWarner Losh /* If upper limit is reached, rotate the head. */ 339*c43cad87SWarner Losh assert(prof_recent_alloc_max_get(tsd) != -1); 340*c43cad87SWarner Losh assert(!prof_recent_alloc_is_empty(tsd)); 341*c43cad87SWarner Losh prof_recent_t *head = ql_first(&prof_recent_alloc_list); 342*c43cad87SWarner Losh old_alloc_tctx = head->alloc_tctx; 343*c43cad87SWarner Losh assert(old_alloc_tctx != NULL); 344*c43cad87SWarner Losh old_dalloc_tctx = head->dalloc_tctx; 345*c43cad87SWarner Losh prof_recent_alloc_evict_edata(tsd, head); 346*c43cad87SWarner Losh ql_rotate(&prof_recent_alloc_list, link); 347*c43cad87SWarner Losh } else { 348*c43cad87SWarner Losh /* Otherwise make use of the new node. */ 349*c43cad87SWarner Losh assert(prof_recent_alloc_max_get(tsd) == -1 || 350*c43cad87SWarner Losh prof_recent_alloc_count < prof_recent_alloc_max_get(tsd)); 351*c43cad87SWarner Losh if (reserve == NULL) { 352*c43cad87SWarner Losh goto label_rollback; 353*c43cad87SWarner Losh } 354*c43cad87SWarner Losh ql_elm_new(reserve, link); 355*c43cad87SWarner Losh ql_tail_insert(&prof_recent_alloc_list, reserve, link); 356*c43cad87SWarner Losh reserve = NULL; 357*c43cad87SWarner Losh old_alloc_tctx = NULL; 358*c43cad87SWarner Losh old_dalloc_tctx = NULL; 359*c43cad87SWarner Losh ++prof_recent_alloc_count; 360*c43cad87SWarner Losh } 361*c43cad87SWarner Losh 362*c43cad87SWarner Losh /* Fill content into the tail node. */ 363*c43cad87SWarner Losh prof_recent_t *tail = ql_last(&prof_recent_alloc_list, link); 364*c43cad87SWarner Losh assert(tail != NULL); 365*c43cad87SWarner Losh tail->size = size; 366*c43cad87SWarner Losh tail->usize = usize; 367*c43cad87SWarner Losh nstime_copy(&tail->alloc_time, edata_prof_alloc_time_get(edata)); 368*c43cad87SWarner Losh tail->alloc_tctx = tctx; 369*c43cad87SWarner Losh nstime_init_zero(&tail->dalloc_time); 370*c43cad87SWarner Losh tail->dalloc_tctx = NULL; 371*c43cad87SWarner Losh edata_prof_recent_alloc_set(tsd, edata, tail); 372*c43cad87SWarner Losh 373*c43cad87SWarner Losh assert(!prof_recent_alloc_is_empty(tsd)); 374*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 375*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 376*c43cad87SWarner Losh 377*c43cad87SWarner Losh if (reserve != NULL) { 378*c43cad87SWarner Losh prof_recent_free_node(tsd_tsdn(tsd), reserve); 379*c43cad87SWarner Losh } 380*c43cad87SWarner Losh 381*c43cad87SWarner Losh /* 382*c43cad87SWarner Losh * Asynchronously handle the tctx of the old node, so that there's no 383*c43cad87SWarner Losh * simultaneous holdings of prof_recent_alloc_mtx and tdata->lock. 384*c43cad87SWarner Losh * In the worst case this may delay the tctx release but it's better 385*c43cad87SWarner Losh * than holding prof_recent_alloc_mtx for longer. 386*c43cad87SWarner Losh */ 387*c43cad87SWarner Losh if (old_alloc_tctx != NULL) { 388*c43cad87SWarner Losh decrement_recent_count(tsd, old_alloc_tctx); 389*c43cad87SWarner Losh } 390*c43cad87SWarner Losh if (old_dalloc_tctx != NULL) { 391*c43cad87SWarner Losh decrement_recent_count(tsd, old_dalloc_tctx); 392*c43cad87SWarner Losh } 393*c43cad87SWarner Losh return; 394*c43cad87SWarner Losh 395*c43cad87SWarner Losh label_rollback: 396*c43cad87SWarner Losh assert(edata_prof_recent_alloc_get(tsd, edata) == NULL); 397*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 398*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 399*c43cad87SWarner Losh if (reserve != NULL) { 400*c43cad87SWarner Losh prof_recent_free_node(tsd_tsdn(tsd), reserve); 401*c43cad87SWarner Losh } 402*c43cad87SWarner Losh decrement_recent_count(tsd, tctx); 403*c43cad87SWarner Losh } 404*c43cad87SWarner Losh 405*c43cad87SWarner Losh ssize_t 406*c43cad87SWarner Losh prof_recent_alloc_max_ctl_read() { 407*c43cad87SWarner Losh cassert(config_prof); 408*c43cad87SWarner Losh /* Don't bother to acquire the lock. */ 409*c43cad87SWarner Losh return prof_recent_alloc_max_get_no_lock(); 410*c43cad87SWarner Losh } 411*c43cad87SWarner Losh 412*c43cad87SWarner Losh static void 413*c43cad87SWarner Losh prof_recent_alloc_restore_locked(tsd_t *tsd, prof_recent_list_t *to_delete) { 414*c43cad87SWarner Losh malloc_mutex_assert_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 415*c43cad87SWarner Losh ssize_t max = prof_recent_alloc_max_get(tsd); 416*c43cad87SWarner Losh if (max == -1 || prof_recent_alloc_count <= max) { 417*c43cad87SWarner Losh /* Easy case - no need to alter the list. */ 418*c43cad87SWarner Losh ql_new(to_delete); 419*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 420*c43cad87SWarner Losh return; 421*c43cad87SWarner Losh } 422*c43cad87SWarner Losh 423*c43cad87SWarner Losh prof_recent_t *node; 424*c43cad87SWarner Losh ql_foreach(node, &prof_recent_alloc_list, link) { 425*c43cad87SWarner Losh if (prof_recent_alloc_count == max) { 426*c43cad87SWarner Losh break; 427*c43cad87SWarner Losh } 428*c43cad87SWarner Losh prof_recent_alloc_evict_edata(tsd, node); 429*c43cad87SWarner Losh --prof_recent_alloc_count; 430*c43cad87SWarner Losh } 431*c43cad87SWarner Losh assert(prof_recent_alloc_count == max); 432*c43cad87SWarner Losh 433*c43cad87SWarner Losh ql_move(to_delete, &prof_recent_alloc_list); 434*c43cad87SWarner Losh if (max == 0) { 435*c43cad87SWarner Losh assert(node == NULL); 436*c43cad87SWarner Losh } else { 437*c43cad87SWarner Losh assert(node != NULL); 438*c43cad87SWarner Losh ql_split(to_delete, node, &prof_recent_alloc_list, link); 439*c43cad87SWarner Losh } 440*c43cad87SWarner Losh assert(!ql_empty(to_delete)); 441*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 442*c43cad87SWarner Losh } 443*c43cad87SWarner Losh 444*c43cad87SWarner Losh static void 445*c43cad87SWarner Losh prof_recent_alloc_async_cleanup(tsd_t *tsd, prof_recent_list_t *to_delete) { 446*c43cad87SWarner Losh malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_dump_mtx); 447*c43cad87SWarner Losh malloc_mutex_assert_not_owner(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 448*c43cad87SWarner Losh while (!ql_empty(to_delete)) { 449*c43cad87SWarner Losh prof_recent_t *node = ql_first(to_delete); 450*c43cad87SWarner Losh ql_remove(to_delete, node, link); 451*c43cad87SWarner Losh decrement_recent_count(tsd, node->alloc_tctx); 452*c43cad87SWarner Losh if (node->dalloc_tctx != NULL) { 453*c43cad87SWarner Losh decrement_recent_count(tsd, node->dalloc_tctx); 454*c43cad87SWarner Losh } 455*c43cad87SWarner Losh prof_recent_free_node(tsd_tsdn(tsd), node); 456*c43cad87SWarner Losh } 457*c43cad87SWarner Losh } 458*c43cad87SWarner Losh 459*c43cad87SWarner Losh ssize_t 460*c43cad87SWarner Losh prof_recent_alloc_max_ctl_write(tsd_t *tsd, ssize_t max) { 461*c43cad87SWarner Losh cassert(config_prof); 462*c43cad87SWarner Losh assert(max >= -1); 463*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 464*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 465*c43cad87SWarner Losh const ssize_t old_max = prof_recent_alloc_max_update(tsd, max); 466*c43cad87SWarner Losh prof_recent_list_t to_delete; 467*c43cad87SWarner Losh prof_recent_alloc_restore_locked(tsd, &to_delete); 468*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 469*c43cad87SWarner Losh prof_recent_alloc_async_cleanup(tsd, &to_delete); 470*c43cad87SWarner Losh return old_max; 471*c43cad87SWarner Losh } 472*c43cad87SWarner Losh 473*c43cad87SWarner Losh static void 474*c43cad87SWarner Losh prof_recent_alloc_dump_bt(emitter_t *emitter, prof_tctx_t *tctx) { 475*c43cad87SWarner Losh char bt_buf[2 * sizeof(intptr_t) + 3]; 476*c43cad87SWarner Losh char *s = bt_buf; 477*c43cad87SWarner Losh assert(tctx != NULL); 478*c43cad87SWarner Losh prof_bt_t *bt = &tctx->gctx->bt; 479*c43cad87SWarner Losh for (size_t i = 0; i < bt->len; ++i) { 480*c43cad87SWarner Losh malloc_snprintf(bt_buf, sizeof(bt_buf), "%p", bt->vec[i]); 481*c43cad87SWarner Losh emitter_json_value(emitter, emitter_type_string, &s); 482*c43cad87SWarner Losh } 483*c43cad87SWarner Losh } 484*c43cad87SWarner Losh 485*c43cad87SWarner Losh static void 486*c43cad87SWarner Losh prof_recent_alloc_dump_node(emitter_t *emitter, prof_recent_t *node) { 487*c43cad87SWarner Losh emitter_json_object_begin(emitter); 488*c43cad87SWarner Losh 489*c43cad87SWarner Losh emitter_json_kv(emitter, "size", emitter_type_size, &node->size); 490*c43cad87SWarner Losh emitter_json_kv(emitter, "usize", emitter_type_size, &node->usize); 491*c43cad87SWarner Losh bool released = prof_recent_alloc_edata_get_no_lock(node) == NULL; 492*c43cad87SWarner Losh emitter_json_kv(emitter, "released", emitter_type_bool, &released); 493*c43cad87SWarner Losh 494*c43cad87SWarner Losh emitter_json_kv(emitter, "alloc_thread_uid", emitter_type_uint64, 495*c43cad87SWarner Losh &node->alloc_tctx->thr_uid); 496*c43cad87SWarner Losh prof_tdata_t *alloc_tdata = node->alloc_tctx->tdata; 497*c43cad87SWarner Losh assert(alloc_tdata != NULL); 498*c43cad87SWarner Losh if (alloc_tdata->thread_name != NULL) { 499*c43cad87SWarner Losh emitter_json_kv(emitter, "alloc_thread_name", 500*c43cad87SWarner Losh emitter_type_string, &alloc_tdata->thread_name); 501*c43cad87SWarner Losh } 502*c43cad87SWarner Losh uint64_t alloc_time_ns = nstime_ns(&node->alloc_time); 503*c43cad87SWarner Losh emitter_json_kv(emitter, "alloc_time", emitter_type_uint64, 504*c43cad87SWarner Losh &alloc_time_ns); 505*c43cad87SWarner Losh emitter_json_array_kv_begin(emitter, "alloc_trace"); 506*c43cad87SWarner Losh prof_recent_alloc_dump_bt(emitter, node->alloc_tctx); 507*c43cad87SWarner Losh emitter_json_array_end(emitter); 508*c43cad87SWarner Losh 509*c43cad87SWarner Losh if (released && node->dalloc_tctx != NULL) { 510*c43cad87SWarner Losh emitter_json_kv(emitter, "dalloc_thread_uid", 511*c43cad87SWarner Losh emitter_type_uint64, &node->dalloc_tctx->thr_uid); 512*c43cad87SWarner Losh prof_tdata_t *dalloc_tdata = node->dalloc_tctx->tdata; 513*c43cad87SWarner Losh assert(dalloc_tdata != NULL); 514*c43cad87SWarner Losh if (dalloc_tdata->thread_name != NULL) { 515*c43cad87SWarner Losh emitter_json_kv(emitter, "dalloc_thread_name", 516*c43cad87SWarner Losh emitter_type_string, &dalloc_tdata->thread_name); 517*c43cad87SWarner Losh } 518*c43cad87SWarner Losh assert(!nstime_equals_zero(&node->dalloc_time)); 519*c43cad87SWarner Losh uint64_t dalloc_time_ns = nstime_ns(&node->dalloc_time); 520*c43cad87SWarner Losh emitter_json_kv(emitter, "dalloc_time", emitter_type_uint64, 521*c43cad87SWarner Losh &dalloc_time_ns); 522*c43cad87SWarner Losh emitter_json_array_kv_begin(emitter, "dalloc_trace"); 523*c43cad87SWarner Losh prof_recent_alloc_dump_bt(emitter, node->dalloc_tctx); 524*c43cad87SWarner Losh emitter_json_array_end(emitter); 525*c43cad87SWarner Losh } 526*c43cad87SWarner Losh 527*c43cad87SWarner Losh emitter_json_object_end(emitter); 528*c43cad87SWarner Losh } 529*c43cad87SWarner Losh 530*c43cad87SWarner Losh #define PROF_RECENT_PRINT_BUFSIZE 65536 531*c43cad87SWarner Losh JEMALLOC_COLD 532*c43cad87SWarner Losh void 533*c43cad87SWarner Losh prof_recent_alloc_dump(tsd_t *tsd, write_cb_t *write_cb, void *cbopaque) { 534*c43cad87SWarner Losh cassert(config_prof); 535*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_dump_mtx); 536*c43cad87SWarner Losh buf_writer_t buf_writer; 537*c43cad87SWarner Losh buf_writer_init(tsd_tsdn(tsd), &buf_writer, write_cb, cbopaque, NULL, 538*c43cad87SWarner Losh PROF_RECENT_PRINT_BUFSIZE); 539*c43cad87SWarner Losh emitter_t emitter; 540*c43cad87SWarner Losh emitter_init(&emitter, emitter_output_json_compact, buf_writer_cb, 541*c43cad87SWarner Losh &buf_writer); 542*c43cad87SWarner Losh prof_recent_list_t temp_list; 543*c43cad87SWarner Losh 544*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 545*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 546*c43cad87SWarner Losh ssize_t dump_max = prof_recent_alloc_max_get(tsd); 547*c43cad87SWarner Losh ql_move(&temp_list, &prof_recent_alloc_list); 548*c43cad87SWarner Losh ssize_t dump_count = prof_recent_alloc_count; 549*c43cad87SWarner Losh prof_recent_alloc_count = 0; 550*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 551*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 552*c43cad87SWarner Losh 553*c43cad87SWarner Losh emitter_begin(&emitter); 554*c43cad87SWarner Losh uint64_t sample_interval = (uint64_t)1U << lg_prof_sample; 555*c43cad87SWarner Losh emitter_json_kv(&emitter, "sample_interval", emitter_type_uint64, 556*c43cad87SWarner Losh &sample_interval); 557*c43cad87SWarner Losh emitter_json_kv(&emitter, "recent_alloc_max", emitter_type_ssize, 558*c43cad87SWarner Losh &dump_max); 559*c43cad87SWarner Losh emitter_json_array_kv_begin(&emitter, "recent_alloc"); 560*c43cad87SWarner Losh prof_recent_t *node; 561*c43cad87SWarner Losh ql_foreach(node, &temp_list, link) { 562*c43cad87SWarner Losh prof_recent_alloc_dump_node(&emitter, node); 563*c43cad87SWarner Losh } 564*c43cad87SWarner Losh emitter_json_array_end(&emitter); 565*c43cad87SWarner Losh emitter_end(&emitter); 566*c43cad87SWarner Losh 567*c43cad87SWarner Losh malloc_mutex_lock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 568*c43cad87SWarner Losh prof_recent_alloc_assert_count(tsd); 569*c43cad87SWarner Losh ql_concat(&temp_list, &prof_recent_alloc_list, link); 570*c43cad87SWarner Losh ql_move(&prof_recent_alloc_list, &temp_list); 571*c43cad87SWarner Losh prof_recent_alloc_count += dump_count; 572*c43cad87SWarner Losh prof_recent_alloc_restore_locked(tsd, &temp_list); 573*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_alloc_mtx); 574*c43cad87SWarner Losh 575*c43cad87SWarner Losh buf_writer_terminate(tsd_tsdn(tsd), &buf_writer); 576*c43cad87SWarner Losh malloc_mutex_unlock(tsd_tsdn(tsd), &prof_recent_dump_mtx); 577*c43cad87SWarner Losh 578*c43cad87SWarner Losh prof_recent_alloc_async_cleanup(tsd, &temp_list); 579*c43cad87SWarner Losh } 580*c43cad87SWarner Losh #undef PROF_RECENT_PRINT_BUFSIZE 581*c43cad87SWarner Losh 582*c43cad87SWarner Losh bool 583*c43cad87SWarner Losh prof_recent_init() { 584*c43cad87SWarner Losh cassert(config_prof); 585*c43cad87SWarner Losh prof_recent_alloc_max_init(); 586*c43cad87SWarner Losh 587*c43cad87SWarner Losh if (malloc_mutex_init(&prof_recent_alloc_mtx, "prof_recent_alloc", 588*c43cad87SWarner Losh WITNESS_RANK_PROF_RECENT_ALLOC, malloc_mutex_rank_exclusive)) { 589*c43cad87SWarner Losh return true; 590*c43cad87SWarner Losh } 591*c43cad87SWarner Losh 592*c43cad87SWarner Losh if (malloc_mutex_init(&prof_recent_dump_mtx, "prof_recent_dump", 593*c43cad87SWarner Losh WITNESS_RANK_PROF_RECENT_DUMP, malloc_mutex_rank_exclusive)) { 594*c43cad87SWarner Losh return true; 595*c43cad87SWarner Losh } 596*c43cad87SWarner Losh 597*c43cad87SWarner Losh ql_new(&prof_recent_alloc_list); 598*c43cad87SWarner Losh 599*c43cad87SWarner Losh return false; 600*c43cad87SWarner Losh } 601