1 #include "jemalloc/internal/jemalloc_preamble.h" 2 #include "jemalloc/internal/jemalloc_internal_includes.h" 3 4 #include "jemalloc/internal/hpdata.h" 5 6 static int 7 hpdata_age_comp(const hpdata_t *a, const hpdata_t *b) { 8 uint64_t a_age = hpdata_age_get(a); 9 uint64_t b_age = hpdata_age_get(b); 10 /* 11 * hpdata ages are operation counts in the psset; no two should be the 12 * same. 13 */ 14 assert(a_age != b_age); 15 return (a_age > b_age) - (a_age < b_age); 16 } 17 18 ph_gen(, hpdata_age_heap, hpdata_t, age_link, hpdata_age_comp) 19 20 void 21 hpdata_init(hpdata_t *hpdata, void *addr, uint64_t age) { 22 hpdata_addr_set(hpdata, addr); 23 hpdata_age_set(hpdata, age); 24 hpdata->h_huge = false; 25 hpdata->h_alloc_allowed = true; 26 hpdata->h_in_psset_alloc_container = false; 27 hpdata->h_purge_allowed = false; 28 hpdata->h_hugify_allowed = false; 29 hpdata->h_in_psset_hugify_container = false; 30 hpdata->h_mid_purge = false; 31 hpdata->h_mid_hugify = false; 32 hpdata->h_updating = false; 33 hpdata->h_in_psset = false; 34 hpdata_longest_free_range_set(hpdata, HUGEPAGE_PAGES); 35 hpdata->h_nactive = 0; 36 fb_init(hpdata->active_pages, HUGEPAGE_PAGES); 37 hpdata->h_ntouched = 0; 38 fb_init(hpdata->touched_pages, HUGEPAGE_PAGES); 39 40 hpdata_assert_consistent(hpdata); 41 } 42 43 void * 44 hpdata_reserve_alloc(hpdata_t *hpdata, size_t sz) { 45 hpdata_assert_consistent(hpdata); 46 /* 47 * This is a metadata change; the hpdata should therefore either not be 48 * in the psset, or should have explicitly marked itself as being 49 * mid-update. 50 */ 51 assert(!hpdata->h_in_psset || hpdata->h_updating); 52 assert(hpdata->h_alloc_allowed); 53 assert((sz & PAGE_MASK) == 0); 54 size_t npages = sz >> LG_PAGE; 55 assert(npages <= hpdata_longest_free_range_get(hpdata)); 56 57 size_t result; 58 59 size_t start = 0; 60 /* 61 * These are dead stores, but the compiler will issue warnings on them 62 * since it can't tell statically that found is always true below. 63 */ 64 size_t begin = 0; 65 size_t len = 0; 66 67 size_t largest_unchosen_range = 0; 68 while (true) { 69 bool found = fb_urange_iter(hpdata->active_pages, 70 HUGEPAGE_PAGES, start, &begin, &len); 71 /* 72 * A precondition to this function is that hpdata must be able 73 * to serve the allocation. 74 */ 75 assert(found); 76 assert(len <= hpdata_longest_free_range_get(hpdata)); 77 if (len >= npages) { 78 /* 79 * We use first-fit within the page slabs; this gives 80 * bounded worst-case fragmentation within a slab. It's 81 * not necessarily right; we could experiment with 82 * various other options. 83 */ 84 break; 85 } 86 if (len > largest_unchosen_range) { 87 largest_unchosen_range = len; 88 } 89 start = begin + len; 90 } 91 /* We found a range; remember it. */ 92 result = begin; 93 fb_set_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages); 94 hpdata->h_nactive += npages; 95 96 /* 97 * We might be about to dirty some memory for the first time; update our 98 * count if so. 99 */ 100 size_t new_dirty = fb_ucount(hpdata->touched_pages, HUGEPAGE_PAGES, 101 result, npages); 102 fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, result, npages); 103 hpdata->h_ntouched += new_dirty; 104 105 /* 106 * If we allocated out of a range that was the longest in the hpdata, it 107 * might be the only one of that size and we'll have to adjust the 108 * metadata. 109 */ 110 if (len == hpdata_longest_free_range_get(hpdata)) { 111 start = begin + npages; 112 while (start < HUGEPAGE_PAGES) { 113 bool found = fb_urange_iter(hpdata->active_pages, 114 HUGEPAGE_PAGES, start, &begin, &len); 115 if (!found) { 116 break; 117 } 118 assert(len <= hpdata_longest_free_range_get(hpdata)); 119 if (len == hpdata_longest_free_range_get(hpdata)) { 120 largest_unchosen_range = len; 121 break; 122 } 123 if (len > largest_unchosen_range) { 124 largest_unchosen_range = len; 125 } 126 start = begin + len; 127 } 128 hpdata_longest_free_range_set(hpdata, largest_unchosen_range); 129 } 130 131 hpdata_assert_consistent(hpdata); 132 return (void *)( 133 (uintptr_t)hpdata_addr_get(hpdata) + (result << LG_PAGE)); 134 } 135 136 void 137 hpdata_unreserve(hpdata_t *hpdata, void *addr, size_t sz) { 138 hpdata_assert_consistent(hpdata); 139 /* See the comment in reserve. */ 140 assert(!hpdata->h_in_psset || hpdata->h_updating); 141 assert(((uintptr_t)addr & PAGE_MASK) == 0); 142 assert((sz & PAGE_MASK) == 0); 143 size_t begin = ((uintptr_t)addr - (uintptr_t)hpdata_addr_get(hpdata)) 144 >> LG_PAGE; 145 assert(begin < HUGEPAGE_PAGES); 146 size_t npages = sz >> LG_PAGE; 147 size_t old_longest_range = hpdata_longest_free_range_get(hpdata); 148 149 fb_unset_range(hpdata->active_pages, HUGEPAGE_PAGES, begin, npages); 150 /* We might have just created a new, larger range. */ 151 size_t new_begin = (fb_fls(hpdata->active_pages, HUGEPAGE_PAGES, 152 begin) + 1); 153 size_t new_end = fb_ffs(hpdata->active_pages, HUGEPAGE_PAGES, 154 begin + npages - 1); 155 size_t new_range_len = new_end - new_begin; 156 157 if (new_range_len > old_longest_range) { 158 hpdata_longest_free_range_set(hpdata, new_range_len); 159 } 160 161 hpdata->h_nactive -= npages; 162 163 hpdata_assert_consistent(hpdata); 164 } 165 166 size_t 167 hpdata_purge_begin(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { 168 hpdata_assert_consistent(hpdata); 169 /* 170 * See the comment below; we might purge any inactive extent, so it's 171 * unsafe for any other thread to turn any inactive extent active while 172 * we're operating on it. 173 */ 174 assert(!hpdata_alloc_allowed_get(hpdata)); 175 176 purge_state->npurged = 0; 177 purge_state->next_purge_search_begin = 0; 178 179 /* 180 * Initialize to_purge. 181 * 182 * It's possible to end up in situations where two dirty extents are 183 * separated by a retained extent: 184 * - 1 page allocated. 185 * - 1 page allocated. 186 * - 1 pages allocated. 187 * 188 * If the middle page is freed and purged, and then the first and third 189 * pages are freed, and then another purge pass happens, the hpdata 190 * looks like this: 191 * - 1 page dirty. 192 * - 1 page retained. 193 * - 1 page dirty. 194 * 195 * But it's safe to do a single 3-page purge. 196 * 197 * We do this by first computing the dirty pages, and then filling in 198 * any gaps by extending each range in the dirty bitmap to extend until 199 * the next active page. This purges more pages, but the expensive part 200 * of purging is the TLB shootdowns, rather than the kernel state 201 * tracking; doing a little bit more of the latter is fine if it saves 202 * us from doing some of the former. 203 */ 204 205 /* 206 * The dirty pages are those that are touched but not active. Note that 207 * in a normal-ish case, HUGEPAGE_PAGES is something like 512 and the 208 * fb_group_t is 64 bits, so this is 64 bytes, spread across 8 209 * fb_group_ts. 210 */ 211 fb_group_t dirty_pages[FB_NGROUPS(HUGEPAGE_PAGES)]; 212 fb_init(dirty_pages, HUGEPAGE_PAGES); 213 fb_bit_not(dirty_pages, hpdata->active_pages, HUGEPAGE_PAGES); 214 fb_bit_and(dirty_pages, dirty_pages, hpdata->touched_pages, 215 HUGEPAGE_PAGES); 216 217 fb_init(purge_state->to_purge, HUGEPAGE_PAGES); 218 size_t next_bit = 0; 219 while (next_bit < HUGEPAGE_PAGES) { 220 size_t next_dirty = fb_ffs(dirty_pages, HUGEPAGE_PAGES, 221 next_bit); 222 /* Recall that fb_ffs returns nbits if no set bit is found. */ 223 if (next_dirty == HUGEPAGE_PAGES) { 224 break; 225 } 226 size_t next_active = fb_ffs(hpdata->active_pages, 227 HUGEPAGE_PAGES, next_dirty); 228 /* 229 * Don't purge past the end of the dirty extent, into retained 230 * pages. This helps the kernel a tiny bit, but honestly it's 231 * mostly helpful for testing (where we tend to write test cases 232 * that think in terms of the dirty ranges). 233 */ 234 ssize_t last_dirty = fb_fls(dirty_pages, HUGEPAGE_PAGES, 235 next_active - 1); 236 assert(last_dirty >= 0); 237 assert((size_t)last_dirty >= next_dirty); 238 assert((size_t)last_dirty - next_dirty + 1 <= HUGEPAGE_PAGES); 239 240 fb_set_range(purge_state->to_purge, HUGEPAGE_PAGES, next_dirty, 241 last_dirty - next_dirty + 1); 242 next_bit = next_active + 1; 243 } 244 245 /* We should purge, at least, everything dirty. */ 246 size_t ndirty = hpdata->h_ntouched - hpdata->h_nactive; 247 purge_state->ndirty_to_purge = ndirty; 248 assert(ndirty <= fb_scount( 249 purge_state->to_purge, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)); 250 assert(ndirty == fb_scount(dirty_pages, HUGEPAGE_PAGES, 0, 251 HUGEPAGE_PAGES)); 252 253 hpdata_assert_consistent(hpdata); 254 255 return ndirty; 256 } 257 258 bool 259 hpdata_purge_next(hpdata_t *hpdata, hpdata_purge_state_t *purge_state, 260 void **r_purge_addr, size_t *r_purge_size) { 261 /* 262 * Note that we don't have a consistency check here; we're accessing 263 * hpdata without synchronization, and therefore have no right to expect 264 * a consistent state. 265 */ 266 assert(!hpdata_alloc_allowed_get(hpdata)); 267 268 if (purge_state->next_purge_search_begin == HUGEPAGE_PAGES) { 269 return false; 270 } 271 size_t purge_begin; 272 size_t purge_len; 273 bool found_range = fb_srange_iter(purge_state->to_purge, HUGEPAGE_PAGES, 274 purge_state->next_purge_search_begin, &purge_begin, &purge_len); 275 if (!found_range) { 276 return false; 277 } 278 279 *r_purge_addr = (void *)( 280 (uintptr_t)hpdata_addr_get(hpdata) + purge_begin * PAGE); 281 *r_purge_size = purge_len * PAGE; 282 283 purge_state->next_purge_search_begin = purge_begin + purge_len; 284 purge_state->npurged += purge_len; 285 assert(purge_state->npurged <= HUGEPAGE_PAGES); 286 287 return true; 288 } 289 290 void 291 hpdata_purge_end(hpdata_t *hpdata, hpdata_purge_state_t *purge_state) { 292 assert(!hpdata_alloc_allowed_get(hpdata)); 293 hpdata_assert_consistent(hpdata); 294 /* See the comment in reserve. */ 295 assert(!hpdata->h_in_psset || hpdata->h_updating); 296 297 assert(purge_state->npurged == fb_scount(purge_state->to_purge, 298 HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES)); 299 assert(purge_state->npurged >= purge_state->ndirty_to_purge); 300 301 fb_bit_not(purge_state->to_purge, purge_state->to_purge, 302 HUGEPAGE_PAGES); 303 fb_bit_and(hpdata->touched_pages, hpdata->touched_pages, 304 purge_state->to_purge, HUGEPAGE_PAGES); 305 assert(hpdata->h_ntouched >= purge_state->ndirty_to_purge); 306 hpdata->h_ntouched -= purge_state->ndirty_to_purge; 307 308 hpdata_assert_consistent(hpdata); 309 } 310 311 void 312 hpdata_hugify(hpdata_t *hpdata) { 313 hpdata_assert_consistent(hpdata); 314 hpdata->h_huge = true; 315 fb_set_range(hpdata->touched_pages, HUGEPAGE_PAGES, 0, HUGEPAGE_PAGES); 316 hpdata->h_ntouched = HUGEPAGE_PAGES; 317 hpdata_assert_consistent(hpdata); 318 } 319 320 void 321 hpdata_dehugify(hpdata_t *hpdata) { 322 hpdata_assert_consistent(hpdata); 323 hpdata->h_huge = false; 324 hpdata_assert_consistent(hpdata); 325 } 326