1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4
5 #include "jemalloc/internal/assert.h"
6
7 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
8
9 /******************************************************************************/
10 /* Data. */
11
12 /* This option should be opt-in only. */
13 #define BACKGROUND_THREAD_DEFAULT false
14 /* Read-only after initialization. */
15 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
16 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
17
18 /* Used for thread creation, termination and stats. */
19 malloc_mutex_t background_thread_lock;
20 /* Indicates global state. Atomic because decay reads this w/o locking. */
21 atomic_b_t background_thread_enabled_state;
22 size_t n_background_threads;
23 size_t max_background_threads;
24 /* Thread info per-index. */
25 background_thread_info_t *background_thread_info;
26
27 /******************************************************************************/
28
29 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
30
31 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
32 void *(*)(void *), void *__restrict);
33
34 static void
pthread_create_wrapper_init(void)35 pthread_create_wrapper_init(void) {
36 #ifdef JEMALLOC_LAZY_LOCK
37 if (!isthreaded) {
38 isthreaded = true;
39 }
40 #endif
41 }
42
43 int
pthread_create_wrapper(pthread_t * __restrict thread,const pthread_attr_t * attr,void * (* start_routine)(void *),void * __restrict arg)44 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
45 void *(*start_routine)(void *), void *__restrict arg) {
46 pthread_create_wrapper_init();
47
48 return pthread_create_fptr(thread, attr, start_routine, arg);
49 }
50 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
51
52 #ifndef JEMALLOC_BACKGROUND_THREAD
53 #define NOT_REACHED { not_reached(); }
background_thread_create(tsd_t * tsd,unsigned arena_ind)54 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
55 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
56 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
57 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
58 arena_decay_t *decay, size_t npages_new) NOT_REACHED
59 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
60 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
61 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
62 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
63 bool background_thread_stats_read(tsdn_t *tsdn,
64 background_thread_stats_t *stats) NOT_REACHED
65 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
66 #undef NOT_REACHED
67 #else
68
69 static bool background_thread_enabled_at_fork;
70
71 static void
72 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
73 background_thread_wakeup_time_set(tsdn, info, 0);
74 info->npages_to_purge_new = 0;
75 if (config_stats) {
76 info->tot_n_runs = 0;
77 nstime_init(&info->tot_sleep_time, 0);
78 }
79 }
80
81 static inline bool
82 set_current_thread_affinity(int cpu) {
83 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
84 cpu_set_t cpuset;
85 CPU_ZERO(&cpuset);
86 CPU_SET(cpu, &cpuset);
87 int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
88
89 return (ret != 0);
90 #else
91 return false;
92 #endif
93 }
94
95 /* Threshold for determining when to wake up the background thread. */
96 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
97 #define BILLION UINT64_C(1000000000)
98 /* Minimal sleep interval 100 ms. */
99 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
100
101 static inline size_t
102 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
103 size_t i;
104 uint64_t sum = 0;
105 for (i = 0; i < interval; i++) {
106 sum += decay->backlog[i] * h_steps[i];
107 }
108 for (; i < SMOOTHSTEP_NSTEPS; i++) {
109 sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
110 }
111
112 return (size_t)(sum >> SMOOTHSTEP_BFP);
113 }
114
115 static uint64_t
116 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
117 extents_t *extents) {
118 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
119 /* Use minimal interval if decay is contended. */
120 return BACKGROUND_THREAD_MIN_INTERVAL_NS;
121 }
122
123 uint64_t interval;
124 ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
125 if (decay_time <= 0) {
126 /* Purging is eagerly done or disabled currently. */
127 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
128 goto label_done;
129 }
130
131 uint64_t decay_interval_ns = nstime_ns(&decay->interval);
132 assert(decay_interval_ns > 0);
133 size_t npages = extents_npages_get(extents);
134 if (npages == 0) {
135 unsigned i;
136 for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
137 if (decay->backlog[i] > 0) {
138 break;
139 }
140 }
141 if (i == SMOOTHSTEP_NSTEPS) {
142 /* No dirty pages recorded. Sleep indefinitely. */
143 interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
144 goto label_done;
145 }
146 }
147 if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
148 /* Use max interval. */
149 interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
150 goto label_done;
151 }
152
153 size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
154 size_t ub = SMOOTHSTEP_NSTEPS;
155 /* Minimal 2 intervals to ensure reaching next epoch deadline. */
156 lb = (lb < 2) ? 2 : lb;
157 if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
158 (lb + 2 > ub)) {
159 interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
160 goto label_done;
161 }
162
163 assert(lb + 2 <= ub);
164 size_t npurge_lb, npurge_ub;
165 npurge_lb = decay_npurge_after_interval(decay, lb);
166 if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
167 interval = decay_interval_ns * lb;
168 goto label_done;
169 }
170 npurge_ub = decay_npurge_after_interval(decay, ub);
171 if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
172 interval = decay_interval_ns * ub;
173 goto label_done;
174 }
175
176 unsigned n_search = 0;
177 size_t target, npurge;
178 while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
179 && (lb + 2 < ub)) {
180 target = (lb + ub) / 2;
181 npurge = decay_npurge_after_interval(decay, target);
182 if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
183 ub = target;
184 npurge_ub = npurge;
185 } else {
186 lb = target;
187 npurge_lb = npurge;
188 }
189 assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
190 }
191 interval = decay_interval_ns * (ub + lb) / 2;
192 label_done:
193 interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
194 BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
195 malloc_mutex_unlock(tsdn, &decay->mtx);
196
197 return interval;
198 }
199
200 /* Compute purge interval for background threads. */
201 static uint64_t
202 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
203 uint64_t i1, i2;
204 i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
205 &arena->extents_dirty);
206 if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
207 return i1;
208 }
209 i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
210 &arena->extents_muzzy);
211
212 return i1 < i2 ? i1 : i2;
213 }
214
215 static void
216 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
217 uint64_t interval) {
218 if (config_stats) {
219 info->tot_n_runs++;
220 }
221 info->npages_to_purge_new = 0;
222
223 struct timeval tv;
224 /* Specific clock required by timedwait. */
225 gettimeofday(&tv, NULL);
226 nstime_t before_sleep;
227 nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
228
229 int ret;
230 if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
231 assert(background_thread_indefinite_sleep(info));
232 ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
233 assert(ret == 0);
234 } else {
235 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
236 interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
237 /* We need malloc clock (can be different from tv). */
238 nstime_t next_wakeup;
239 nstime_init(&next_wakeup, 0);
240 nstime_update(&next_wakeup);
241 nstime_iadd(&next_wakeup, interval);
242 assert(nstime_ns(&next_wakeup) <
243 BACKGROUND_THREAD_INDEFINITE_SLEEP);
244 background_thread_wakeup_time_set(tsdn, info,
245 nstime_ns(&next_wakeup));
246
247 nstime_t ts_wakeup;
248 nstime_copy(&ts_wakeup, &before_sleep);
249 nstime_iadd(&ts_wakeup, interval);
250 struct timespec ts;
251 ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
252 ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
253
254 assert(!background_thread_indefinite_sleep(info));
255 ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
256 assert(ret == ETIMEDOUT || ret == 0);
257 background_thread_wakeup_time_set(tsdn, info,
258 BACKGROUND_THREAD_INDEFINITE_SLEEP);
259 }
260 if (config_stats) {
261 gettimeofday(&tv, NULL);
262 nstime_t after_sleep;
263 nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
264 if (nstime_compare(&after_sleep, &before_sleep) > 0) {
265 nstime_subtract(&after_sleep, &before_sleep);
266 nstime_add(&info->tot_sleep_time, &after_sleep);
267 }
268 }
269 }
270
271 static bool
272 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
273 if (unlikely(info->state == background_thread_paused)) {
274 malloc_mutex_unlock(tsdn, &info->mtx);
275 /* Wait on global lock to update status. */
276 malloc_mutex_lock(tsdn, &background_thread_lock);
277 malloc_mutex_unlock(tsdn, &background_thread_lock);
278 malloc_mutex_lock(tsdn, &info->mtx);
279 return true;
280 }
281
282 return false;
283 }
284
285 static inline void
286 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
287 uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
288 unsigned narenas = narenas_total_get();
289
290 for (unsigned i = ind; i < narenas; i += max_background_threads) {
291 arena_t *arena = arena_get(tsdn, i, false);
292 if (!arena) {
293 continue;
294 }
295 arena_decay(tsdn, arena, true, false);
296 if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
297 /* Min interval will be used. */
298 continue;
299 }
300 uint64_t interval = arena_decay_compute_purge_interval(tsdn,
301 arena);
302 assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
303 if (min_interval > interval) {
304 min_interval = interval;
305 }
306 }
307 background_thread_sleep(tsdn, info, min_interval);
308 }
309
310 static bool
311 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
312 if (info == &background_thread_info[0]) {
313 malloc_mutex_assert_owner(tsd_tsdn(tsd),
314 &background_thread_lock);
315 } else {
316 malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
317 &background_thread_lock);
318 }
319
320 pre_reentrancy(tsd, NULL);
321 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
322 bool has_thread;
323 assert(info->state != background_thread_paused);
324 if (info->state == background_thread_started) {
325 has_thread = true;
326 info->state = background_thread_stopped;
327 pthread_cond_signal(&info->cond);
328 } else {
329 has_thread = false;
330 }
331 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
332
333 if (!has_thread) {
334 post_reentrancy(tsd);
335 return false;
336 }
337 void *ret;
338 if (pthread_join(info->thread, &ret)) {
339 post_reentrancy(tsd);
340 return true;
341 }
342 assert(ret == NULL);
343 n_background_threads--;
344 post_reentrancy(tsd);
345
346 return false;
347 }
348
349 static void *background_thread_entry(void *ind_arg);
350
351 static int
352 background_thread_create_signals_masked(pthread_t *thread,
353 const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
354 /*
355 * Mask signals during thread creation so that the thread inherits
356 * an empty signal set.
357 */
358 sigset_t set;
359 sigfillset(&set);
360 sigset_t oldset;
361 int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
362 if (mask_err != 0) {
363 return mask_err;
364 }
365 int create_err = pthread_create_wrapper(thread, attr, start_routine,
366 arg);
367 /*
368 * Restore the signal mask. Failure to restore the signal mask here
369 * changes program behavior.
370 */
371 int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
372 if (restore_err != 0) {
373 malloc_printf("<jemalloc>: background thread creation "
374 "failed (%d), and signal mask restoration failed "
375 "(%d)\n", create_err, restore_err);
376 if (opt_abort) {
377 abort();
378 }
379 }
380 return create_err;
381 }
382
383 static bool
384 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
385 bool *created_threads) {
386 bool ret = false;
387 if (likely(*n_created == n_background_threads)) {
388 return ret;
389 }
390
391 tsdn_t *tsdn = tsd_tsdn(tsd);
392 malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
393 for (unsigned i = 1; i < max_background_threads; i++) {
394 if (created_threads[i]) {
395 continue;
396 }
397 background_thread_info_t *info = &background_thread_info[i];
398 malloc_mutex_lock(tsdn, &info->mtx);
399 /*
400 * In case of the background_thread_paused state because of
401 * arena reset, delay the creation.
402 */
403 bool create = (info->state == background_thread_started);
404 malloc_mutex_unlock(tsdn, &info->mtx);
405 if (!create) {
406 continue;
407 }
408
409 pre_reentrancy(tsd, NULL);
410 int err = background_thread_create_signals_masked(&info->thread,
411 NULL, background_thread_entry, (void *)(uintptr_t)i);
412 post_reentrancy(tsd);
413
414 if (err == 0) {
415 (*n_created)++;
416 created_threads[i] = true;
417 } else {
418 malloc_printf("<jemalloc>: background thread "
419 "creation failed (%d)\n", err);
420 if (opt_abort) {
421 abort();
422 }
423 }
424 /* Return to restart the loop since we unlocked. */
425 ret = true;
426 break;
427 }
428 malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
429
430 return ret;
431 }
432
433 static void
434 background_thread0_work(tsd_t *tsd) {
435 /* Thread0 is also responsible for launching / terminating threads. */
436 VARIABLE_ARRAY(bool, created_threads, max_background_threads);
437 unsigned i;
438 for (i = 1; i < max_background_threads; i++) {
439 created_threads[i] = false;
440 }
441 /* Start working, and create more threads when asked. */
442 unsigned n_created = 1;
443 while (background_thread_info[0].state != background_thread_stopped) {
444 if (background_thread_pause_check(tsd_tsdn(tsd),
445 &background_thread_info[0])) {
446 continue;
447 }
448 if (check_background_thread_creation(tsd, &n_created,
449 (bool *)&created_threads)) {
450 continue;
451 }
452 background_work_sleep_once(tsd_tsdn(tsd),
453 &background_thread_info[0], 0);
454 }
455
456 /*
457 * Shut down other threads at exit. Note that the ctl thread is holding
458 * the global background_thread mutex (and is waiting) for us.
459 */
460 assert(!background_thread_enabled());
461 for (i = 1; i < max_background_threads; i++) {
462 background_thread_info_t *info = &background_thread_info[i];
463 assert(info->state != background_thread_paused);
464 if (created_threads[i]) {
465 background_threads_disable_single(tsd, info);
466 } else {
467 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
468 if (info->state != background_thread_stopped) {
469 /* The thread was not created. */
470 assert(info->state ==
471 background_thread_started);
472 n_background_threads--;
473 info->state = background_thread_stopped;
474 }
475 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
476 }
477 }
478 background_thread_info[0].state = background_thread_stopped;
479 assert(n_background_threads == 1);
480 }
481
482 static void
483 background_work(tsd_t *tsd, unsigned ind) {
484 background_thread_info_t *info = &background_thread_info[ind];
485
486 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
487 background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
488 BACKGROUND_THREAD_INDEFINITE_SLEEP);
489 if (ind == 0) {
490 background_thread0_work(tsd);
491 } else {
492 while (info->state != background_thread_stopped) {
493 if (background_thread_pause_check(tsd_tsdn(tsd),
494 info)) {
495 continue;
496 }
497 background_work_sleep_once(tsd_tsdn(tsd), info, ind);
498 }
499 }
500 assert(info->state == background_thread_stopped);
501 background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
502 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
503 }
504
505 static void *
506 background_thread_entry(void *ind_arg) {
507 unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
508 assert(thread_ind < max_background_threads);
509 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
510 pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
511 #elif defined(__FreeBSD__)
512 pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
513 #endif
514 if (opt_percpu_arena != percpu_arena_disabled) {
515 set_current_thread_affinity((int)thread_ind);
516 }
517 /*
518 * Start periodic background work. We use internal tsd which avoids
519 * side effects, for example triggering new arena creation (which in
520 * turn triggers another background thread creation).
521 */
522 background_work(tsd_internal_fetch(), thread_ind);
523 assert(pthread_equal(pthread_self(),
524 background_thread_info[thread_ind].thread));
525
526 return NULL;
527 }
528
529 static void
530 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532 info->state = background_thread_started;
533 background_thread_info_init(tsd_tsdn(tsd), info);
534 n_background_threads++;
535 }
536
537 static bool
538 background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
539 assert(have_background_thread);
540 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
541
542 /* We create at most NCPUs threads. */
543 size_t thread_ind = arena_ind % max_background_threads;
544 background_thread_info_t *info = &background_thread_info[thread_ind];
545
546 bool need_new_thread;
547 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
548 need_new_thread = background_thread_enabled() &&
549 (info->state == background_thread_stopped);
550 if (need_new_thread) {
551 background_thread_init(tsd, info);
552 }
553 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
554 if (!need_new_thread) {
555 return false;
556 }
557 if (arena_ind != 0) {
558 /* Threads are created asynchronously by Thread 0. */
559 background_thread_info_t *t0 = &background_thread_info[0];
560 malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
561 assert(t0->state == background_thread_started);
562 pthread_cond_signal(&t0->cond);
563 malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
564
565 return false;
566 }
567
568 pre_reentrancy(tsd, NULL);
569 /*
570 * To avoid complications (besides reentrancy), create internal
571 * background threads with the underlying pthread_create.
572 */
573 int err = background_thread_create_signals_masked(&info->thread, NULL,
574 background_thread_entry, (void *)thread_ind);
575 post_reentrancy(tsd);
576
577 if (err != 0) {
578 malloc_printf("<jemalloc>: arena 0 background thread creation "
579 "failed (%d)\n", err);
580 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
581 info->state = background_thread_stopped;
582 n_background_threads--;
583 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
584
585 return true;
586 }
587
588 return false;
589 }
590
591 /* Create a new background thread if needed. */
592 bool
593 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
594 assert(have_background_thread);
595
596 bool ret;
597 malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
598 ret = background_thread_create_locked(tsd, arena_ind);
599 malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
600
601 return ret;
602 }
603
604 bool
605 background_threads_enable(tsd_t *tsd) {
606 assert(n_background_threads == 0);
607 assert(background_thread_enabled());
608 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
609
610 VARIABLE_ARRAY(bool, marked, max_background_threads);
611 unsigned i, nmarked;
612 for (i = 0; i < max_background_threads; i++) {
613 marked[i] = false;
614 }
615 nmarked = 0;
616 /* Thread 0 is required and created at the end. */
617 marked[0] = true;
618 /* Mark the threads we need to create for thread 0. */
619 unsigned n = narenas_total_get();
620 for (i = 1; i < n; i++) {
621 if (marked[i % max_background_threads] ||
622 arena_get(tsd_tsdn(tsd), i, false) == NULL) {
623 continue;
624 }
625 background_thread_info_t *info = &background_thread_info[
626 i % max_background_threads];
627 malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
628 assert(info->state == background_thread_stopped);
629 background_thread_init(tsd, info);
630 malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
631 marked[i % max_background_threads] = true;
632 if (++nmarked == max_background_threads) {
633 break;
634 }
635 }
636
637 return background_thread_create_locked(tsd, 0);
638 }
639
640 bool
641 background_threads_disable(tsd_t *tsd) {
642 assert(!background_thread_enabled());
643 malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
644
645 /* Thread 0 will be responsible for terminating other threads. */
646 if (background_threads_disable_single(tsd,
647 &background_thread_info[0])) {
648 return true;
649 }
650 assert(n_background_threads == 0);
651
652 return false;
653 }
654
655 /* Check if we need to signal the background thread early. */
656 void
657 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
658 arena_decay_t *decay, size_t npages_new) {
659 background_thread_info_t *info = arena_background_thread_info_get(
660 arena);
661 if (malloc_mutex_trylock(tsdn, &info->mtx)) {
662 /*
663 * Background thread may hold the mutex for a long period of
664 * time. We'd like to avoid the variance on application
665 * threads. So keep this non-blocking, and leave the work to a
666 * future epoch.
667 */
668 return;
669 }
670
671 if (info->state != background_thread_started) {
672 goto label_done;
673 }
674 if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
675 goto label_done;
676 }
677
678 ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
679 if (decay_time <= 0) {
680 /* Purging is eagerly done or disabled currently. */
681 goto label_done_unlock2;
682 }
683 uint64_t decay_interval_ns = nstime_ns(&decay->interval);
684 assert(decay_interval_ns > 0);
685
686 nstime_t diff;
687 nstime_init(&diff, background_thread_wakeup_time_get(info));
688 if (nstime_compare(&diff, &decay->epoch) <= 0) {
689 goto label_done_unlock2;
690 }
691 nstime_subtract(&diff, &decay->epoch);
692 if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
693 goto label_done_unlock2;
694 }
695
696 if (npages_new > 0) {
697 size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
698 /*
699 * Compute how many new pages we would need to purge by the next
700 * wakeup, which is used to determine if we should signal the
701 * background thread.
702 */
703 uint64_t npurge_new;
704 if (n_epoch >= SMOOTHSTEP_NSTEPS) {
705 npurge_new = npages_new;
706 } else {
707 uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
708 assert(h_steps_max >=
709 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
710 npurge_new = npages_new * (h_steps_max -
711 h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
712 npurge_new >>= SMOOTHSTEP_BFP;
713 }
714 info->npages_to_purge_new += npurge_new;
715 }
716
717 bool should_signal;
718 if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
719 should_signal = true;
720 } else if (unlikely(background_thread_indefinite_sleep(info)) &&
721 (extents_npages_get(&arena->extents_dirty) > 0 ||
722 extents_npages_get(&arena->extents_muzzy) > 0 ||
723 info->npages_to_purge_new > 0)) {
724 should_signal = true;
725 } else {
726 should_signal = false;
727 }
728
729 if (should_signal) {
730 info->npages_to_purge_new = 0;
731 pthread_cond_signal(&info->cond);
732 }
733 label_done_unlock2:
734 malloc_mutex_unlock(tsdn, &decay->mtx);
735 label_done:
736 malloc_mutex_unlock(tsdn, &info->mtx);
737 }
738
739 void
740 background_thread_prefork0(tsdn_t *tsdn) {
741 malloc_mutex_prefork(tsdn, &background_thread_lock);
742 background_thread_enabled_at_fork = background_thread_enabled();
743 }
744
745 void
746 background_thread_prefork1(tsdn_t *tsdn) {
747 for (unsigned i = 0; i < max_background_threads; i++) {
748 malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
749 }
750 }
751
752 void
753 background_thread_postfork_parent(tsdn_t *tsdn) {
754 for (unsigned i = 0; i < max_background_threads; i++) {
755 malloc_mutex_postfork_parent(tsdn,
756 &background_thread_info[i].mtx);
757 }
758 malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
759 }
760
761 void
762 background_thread_postfork_child(tsdn_t *tsdn) {
763 for (unsigned i = 0; i < max_background_threads; i++) {
764 malloc_mutex_postfork_child(tsdn,
765 &background_thread_info[i].mtx);
766 }
767 malloc_mutex_postfork_child(tsdn, &background_thread_lock);
768 if (!background_thread_enabled_at_fork) {
769 return;
770 }
771
772 /* Clear background_thread state (reset to disabled for child). */
773 malloc_mutex_lock(tsdn, &background_thread_lock);
774 n_background_threads = 0;
775 background_thread_enabled_set(tsdn, false);
776 for (unsigned i = 0; i < max_background_threads; i++) {
777 background_thread_info_t *info = &background_thread_info[i];
778 malloc_mutex_lock(tsdn, &info->mtx);
779 info->state = background_thread_stopped;
780 int ret = pthread_cond_init(&info->cond, NULL);
781 assert(ret == 0);
782 background_thread_info_init(tsdn, info);
783 malloc_mutex_unlock(tsdn, &info->mtx);
784 }
785 malloc_mutex_unlock(tsdn, &background_thread_lock);
786 }
787
788 bool
789 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
790 assert(config_stats);
791 malloc_mutex_lock(tsdn, &background_thread_lock);
792 if (!background_thread_enabled()) {
793 malloc_mutex_unlock(tsdn, &background_thread_lock);
794 return true;
795 }
796
797 stats->num_threads = n_background_threads;
798 uint64_t num_runs = 0;
799 nstime_init(&stats->run_interval, 0);
800 for (unsigned i = 0; i < max_background_threads; i++) {
801 background_thread_info_t *info = &background_thread_info[i];
802 if (malloc_mutex_trylock(tsdn, &info->mtx)) {
803 /*
804 * Each background thread run may take a long time;
805 * avoid waiting on the stats if the thread is active.
806 */
807 continue;
808 }
809 if (info->state != background_thread_stopped) {
810 num_runs += info->tot_n_runs;
811 nstime_add(&stats->run_interval, &info->tot_sleep_time);
812 }
813 malloc_mutex_unlock(tsdn, &info->mtx);
814 }
815 stats->num_runs = num_runs;
816 if (num_runs > 0) {
817 nstime_idivide(&stats->run_interval, num_runs);
818 }
819 malloc_mutex_unlock(tsdn, &background_thread_lock);
820
821 return false;
822 }
823
824 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
825 #undef BILLION
826 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
827
828 #ifdef JEMALLOC_HAVE_DLSYM
829 #include <dlfcn.h>
830 #endif
831
832 static bool
833 pthread_create_fptr_init(void) {
834 if (pthread_create_fptr != NULL) {
835 return false;
836 }
837 /*
838 * Try the next symbol first, because 1) when use lazy_lock we have a
839 * wrapper for pthread_create; and 2) application may define its own
840 * wrapper as well (and can call malloc within the wrapper).
841 */
842 #ifdef JEMALLOC_HAVE_DLSYM
843 pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
844 #else
845 pthread_create_fptr = NULL;
846 #endif
847 if (pthread_create_fptr == NULL) {
848 if (config_lazy_lock) {
849 malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
850 "\"pthread_create\")\n");
851 abort();
852 } else {
853 /* Fall back to the default symbol. */
854 pthread_create_fptr = pthread_create;
855 }
856 }
857
858 return false;
859 }
860
861 /*
862 * When lazy lock is enabled, we need to make sure setting isthreaded before
863 * taking any background_thread locks. This is called early in ctl (instead of
864 * wait for the pthread_create calls to trigger) because the mutex is required
865 * before creating background threads.
866 */
867 void
868 background_thread_ctl_init(tsdn_t *tsdn) {
869 malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
870 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
871 pthread_create_fptr_init();
872 pthread_create_wrapper_init();
873 #endif
874 }
875
876 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
877
878 bool
879 background_thread_boot0(void) {
880 if (!have_background_thread && opt_background_thread) {
881 malloc_printf("<jemalloc>: option background_thread currently "
882 "supports pthread only\n");
883 return true;
884 }
885 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
886 if ((config_lazy_lock || opt_background_thread) &&
887 pthread_create_fptr_init()) {
888 return true;
889 }
890 #endif
891 return false;
892 }
893
894 bool
background_thread_boot1(tsdn_t * tsdn)895 background_thread_boot1(tsdn_t *tsdn) {
896 #ifdef JEMALLOC_BACKGROUND_THREAD
897 assert(have_background_thread);
898 assert(narenas_total_get() > 0);
899
900 if (opt_max_background_threads > MAX_BACKGROUND_THREAD_LIMIT) {
901 opt_max_background_threads = DEFAULT_NUM_BACKGROUND_THREAD;
902 }
903 max_background_threads = opt_max_background_threads;
904
905 background_thread_enabled_set(tsdn, opt_background_thread);
906 if (malloc_mutex_init(&background_thread_lock,
907 "background_thread_global",
908 WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
909 malloc_mutex_rank_exclusive)) {
910 return true;
911 }
912
913 background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
914 b0get(), opt_max_background_threads *
915 sizeof(background_thread_info_t), CACHELINE);
916 if (background_thread_info == NULL) {
917 return true;
918 }
919
920 for (unsigned i = 0; i < max_background_threads; i++) {
921 background_thread_info_t *info = &background_thread_info[i];
922 /* Thread mutex is rank_inclusive because of thread0. */
923 if (malloc_mutex_init(&info->mtx, "background_thread",
924 WITNESS_RANK_BACKGROUND_THREAD,
925 malloc_mutex_address_ordered)) {
926 return true;
927 }
928 if (pthread_cond_init(&info->cond, NULL)) {
929 return true;
930 }
931 malloc_mutex_lock(tsdn, &info->mtx);
932 info->state = background_thread_stopped;
933 background_thread_info_init(tsdn, info);
934 malloc_mutex_unlock(tsdn, &info->mtx);
935 }
936 #endif
937
938 return false;
939 }
940