xref: /freebsd/contrib/jemalloc/src/background_thread.c (revision 69a3d9e7dc9336be9d10f1736b505391033695cb)
1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 
7 /******************************************************************************/
8 /* Data. */
9 
10 /* This option should be opt-in only. */
11 #define BACKGROUND_THREAD_DEFAULT false
12 /* Read-only after initialization. */
13 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
14 
15 /* Used for thread creation, termination and stats. */
16 malloc_mutex_t background_thread_lock;
17 /* Indicates global state.  Atomic because decay reads this w/o locking. */
18 atomic_b_t background_thread_enabled_state;
19 size_t n_background_threads;
20 /* Thread info per-index. */
21 background_thread_info_t *background_thread_info;
22 
23 /* False if no necessary runtime support. */
24 bool can_enable_background_thread;
25 
26 /******************************************************************************/
27 
28 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
29 #include <dlfcn.h>
30 
31 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
32     void *(*)(void *), void *__restrict);
33 static pthread_once_t once_control = PTHREAD_ONCE_INIT;
34 
35 static void
36 pthread_create_wrapper_once(void) {
37 #ifdef JEMALLOC_LAZY_LOCK
38 	isthreaded = true;
39 #endif
40 }
41 
42 int
43 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
44     void *(*start_routine)(void *), void *__restrict arg) {
45 	pthread_once(&once_control, pthread_create_wrapper_once);
46 
47 	return pthread_create_fptr(thread, attr, start_routine, arg);
48 }
49 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
50 
51 #ifndef JEMALLOC_BACKGROUND_THREAD
52 #define NOT_REACHED { not_reached(); }
53 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
54 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
55 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
56 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
57     arena_decay_t *decay, size_t npages_new) NOT_REACHED
58 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
59 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
60 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
61 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
62 bool background_thread_stats_read(tsdn_t *tsdn,
63     background_thread_stats_t *stats) NOT_REACHED
64 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
65 #undef NOT_REACHED
66 #else
67 
68 static bool background_thread_enabled_at_fork;
69 
70 static void
71 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
72 	background_thread_wakeup_time_set(tsdn, info, 0);
73 	info->npages_to_purge_new = 0;
74 	if (config_stats) {
75 		info->tot_n_runs = 0;
76 		nstime_init(&info->tot_sleep_time, 0);
77 	}
78 }
79 
80 static inline bool
81 set_current_thread_affinity(UNUSED int cpu) {
82 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
83 	cpu_set_t cpuset;
84 	CPU_ZERO(&cpuset);
85 	CPU_SET(cpu, &cpuset);
86 	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
87 
88 	return (ret != 0);
89 #else
90 	return false;
91 #endif
92 }
93 
94 /* Threshold for determining when to wake up the background thread. */
95 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
96 #define BILLION UINT64_C(1000000000)
97 /* Minimal sleep interval 100 ms. */
98 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
99 
100 static inline size_t
101 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
102 	size_t i;
103 	uint64_t sum = 0;
104 	for (i = 0; i < interval; i++) {
105 		sum += decay->backlog[i] * h_steps[i];
106 	}
107 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
108 		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
109 	}
110 
111 	return (size_t)(sum >> SMOOTHSTEP_BFP);
112 }
113 
114 static uint64_t
115 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
116     extents_t *extents) {
117 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
118 		/* Use minimal interval if decay is contended. */
119 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
120 	}
121 
122 	uint64_t interval;
123 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
124 	if (decay_time <= 0) {
125 		/* Purging is eagerly done or disabled currently. */
126 		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
127 		goto label_done;
128 	}
129 
130 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
131 	assert(decay_interval_ns > 0);
132 	size_t npages = extents_npages_get(extents);
133 	if (npages == 0) {
134 		unsigned i;
135 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
136 			if (decay->backlog[i] > 0) {
137 				break;
138 			}
139 		}
140 		if (i == SMOOTHSTEP_NSTEPS) {
141 			/* No dirty pages recorded.  Sleep indefinitely. */
142 			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
143 			goto label_done;
144 		}
145 	}
146 	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
147 		/* Use max interval. */
148 		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
149 		goto label_done;
150 	}
151 
152 	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
153 	size_t ub = SMOOTHSTEP_NSTEPS;
154 	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
155 	lb = (lb < 2) ? 2 : lb;
156 	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
157 	    (lb + 2 > ub)) {
158 		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
159 		goto label_done;
160 	}
161 
162 	assert(lb + 2 <= ub);
163 	size_t npurge_lb, npurge_ub;
164 	npurge_lb = decay_npurge_after_interval(decay, lb);
165 	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
166 		interval = decay_interval_ns * lb;
167 		goto label_done;
168 	}
169 	npurge_ub = decay_npurge_after_interval(decay, ub);
170 	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
171 		interval = decay_interval_ns * ub;
172 		goto label_done;
173 	}
174 
175 	unsigned n_search = 0;
176 	size_t target, npurge;
177 	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
178 	    && (lb + 2 < ub)) {
179 		target = (lb + ub) / 2;
180 		npurge = decay_npurge_after_interval(decay, target);
181 		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
182 			ub = target;
183 			npurge_ub = npurge;
184 		} else {
185 			lb = target;
186 			npurge_lb = npurge;
187 		}
188 		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
189 	}
190 	interval = decay_interval_ns * (ub + lb) / 2;
191 label_done:
192 	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
193 	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
194 	malloc_mutex_unlock(tsdn, &decay->mtx);
195 
196 	return interval;
197 }
198 
199 /* Compute purge interval for background threads. */
200 static uint64_t
201 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
202 	uint64_t i1, i2;
203 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
204 	    &arena->extents_dirty);
205 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
206 		return i1;
207 	}
208 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
209 	    &arena->extents_muzzy);
210 
211 	return i1 < i2 ? i1 : i2;
212 }
213 
214 static void
215 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
216     uint64_t interval) {
217 	if (config_stats) {
218 		info->tot_n_runs++;
219 	}
220 	info->npages_to_purge_new = 0;
221 
222 	struct timeval tv;
223 	/* Specific clock required by timedwait. */
224 	gettimeofday(&tv, NULL);
225 	nstime_t before_sleep;
226 	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
227 
228 	int ret;
229 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
230 		assert(background_thread_indefinite_sleep(info));
231 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
232 		assert(ret == 0);
233 	} else {
234 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
235 		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
236 		/* We need malloc clock (can be different from tv). */
237 		nstime_t next_wakeup;
238 		nstime_init(&next_wakeup, 0);
239 		nstime_update(&next_wakeup);
240 		nstime_iadd(&next_wakeup, interval);
241 		assert(nstime_ns(&next_wakeup) <
242 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
243 		background_thread_wakeup_time_set(tsdn, info,
244 		    nstime_ns(&next_wakeup));
245 
246 		nstime_t ts_wakeup;
247 		nstime_copy(&ts_wakeup, &before_sleep);
248 		nstime_iadd(&ts_wakeup, interval);
249 		struct timespec ts;
250 		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
251 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
252 
253 		assert(!background_thread_indefinite_sleep(info));
254 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
255 		assert(ret == ETIMEDOUT || ret == 0);
256 		background_thread_wakeup_time_set(tsdn, info,
257 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
258 	}
259 	if (config_stats) {
260 		gettimeofday(&tv, NULL);
261 		nstime_t after_sleep;
262 		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
263 		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
264 			nstime_subtract(&after_sleep, &before_sleep);
265 			nstime_add(&info->tot_sleep_time, &after_sleep);
266 		}
267 	}
268 }
269 
270 static bool
271 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
272 	if (unlikely(info->state == background_thread_paused)) {
273 		malloc_mutex_unlock(tsdn, &info->mtx);
274 		/* Wait on global lock to update status. */
275 		malloc_mutex_lock(tsdn, &background_thread_lock);
276 		malloc_mutex_unlock(tsdn, &background_thread_lock);
277 		malloc_mutex_lock(tsdn, &info->mtx);
278 		return true;
279 	}
280 
281 	return false;
282 }
283 
284 static inline void
285 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
286 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
287 	unsigned narenas = narenas_total_get();
288 
289 	for (unsigned i = ind; i < narenas; i += ncpus) {
290 		arena_t *arena = arena_get(tsdn, i, false);
291 		if (!arena) {
292 			continue;
293 		}
294 		arena_decay(tsdn, arena, true, false);
295 		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
296 			/* Min interval will be used. */
297 			continue;
298 		}
299 		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
300 		    arena);
301 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
302 		if (min_interval > interval) {
303 			min_interval = interval;
304 		}
305 	}
306 	background_thread_sleep(tsdn, info, min_interval);
307 }
308 
309 static bool
310 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
311 	if (info == &background_thread_info[0]) {
312 		malloc_mutex_assert_owner(tsd_tsdn(tsd),
313 		    &background_thread_lock);
314 	} else {
315 		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
316 		    &background_thread_lock);
317 	}
318 
319 	pre_reentrancy(tsd);
320 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
321 	bool has_thread;
322 	assert(info->state != background_thread_paused);
323 	if (info->state == background_thread_started) {
324 		has_thread = true;
325 		info->state = background_thread_stopped;
326 		pthread_cond_signal(&info->cond);
327 	} else {
328 		has_thread = false;
329 	}
330 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
331 
332 	if (!has_thread) {
333 		post_reentrancy(tsd);
334 		return false;
335 	}
336 	void *ret;
337 	if (pthread_join(info->thread, &ret)) {
338 		post_reentrancy(tsd);
339 		return true;
340 	}
341 	assert(ret == NULL);
342 	n_background_threads--;
343 	post_reentrancy(tsd);
344 
345 	return false;
346 }
347 
348 static void *background_thread_entry(void *ind_arg);
349 
350 static void
351 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
352     bool *created_threads) {
353 	if (likely(*n_created == n_background_threads)) {
354 		return;
355 	}
356 
357 	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
358 label_restart:
359 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
360 	for (unsigned i = 1; i < ncpus; i++) {
361 		if (created_threads[i]) {
362 			continue;
363 		}
364 		background_thread_info_t *info = &background_thread_info[i];
365 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
366 		assert(info->state != background_thread_paused);
367 		bool create = (info->state == background_thread_started);
368 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
369 		if (!create) {
370 			continue;
371 		}
372 
373 		/*
374 		 * To avoid deadlock with prefork handlers (which waits for the
375 		 * mutex held here), unlock before calling pthread_create().
376 		 */
377 		malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
378 
379 		pre_reentrancy(tsd);
380 		int err = pthread_create_wrapper(&info->thread, NULL,
381 		    background_thread_entry, (void *)(uintptr_t)i);
382 		post_reentrancy(tsd);
383 
384 		if (err == 0) {
385 			(*n_created)++;
386 			created_threads[i] = true;
387 		} else {
388 			malloc_printf("<jemalloc>: background thread "
389 			    "creation failed (%d)\n", err);
390 			if (opt_abort) {
391 				abort();
392 			}
393 		}
394 		/* Restart since we unlocked. */
395 		goto label_restart;
396 	}
397 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_info[0].mtx);
398 	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
399 }
400 
401 static void
402 background_thread0_work(tsd_t *tsd) {
403 	/* Thread0 is also responsible for launching / terminating threads. */
404 	VARIABLE_ARRAY(bool, created_threads, ncpus);
405 	unsigned i;
406 	for (i = 1; i < ncpus; i++) {
407 		created_threads[i] = false;
408 	}
409 	/* Start working, and create more threads when asked. */
410 	unsigned n_created = 1;
411 	while (background_thread_info[0].state != background_thread_stopped) {
412 		if (background_thread_pause_check(tsd_tsdn(tsd),
413 		    &background_thread_info[0])) {
414 			continue;
415 		}
416 		check_background_thread_creation(tsd, &n_created,
417 		    (bool *)&created_threads);
418 		background_work_sleep_once(tsd_tsdn(tsd),
419 		    &background_thread_info[0], 0);
420 	}
421 
422 	/*
423 	 * Shut down other threads at exit.  Note that the ctl thread is holding
424 	 * the global background_thread mutex (and is waiting) for us.
425 	 */
426 	assert(!background_thread_enabled());
427 	for (i = 1; i < ncpus; i++) {
428 		background_thread_info_t *info = &background_thread_info[i];
429 		assert(info->state != background_thread_paused);
430 		if (created_threads[i]) {
431 			background_threads_disable_single(tsd, info);
432 		} else {
433 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
434 			/* Clear in case the thread wasn't created. */
435 			info->state = background_thread_stopped;
436 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
437 		}
438 	}
439 	background_thread_info[0].state = background_thread_stopped;
440 	assert(n_background_threads == 1);
441 }
442 
443 static void
444 background_work(tsd_t *tsd, unsigned ind) {
445 	background_thread_info_t *info = &background_thread_info[ind];
446 
447 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
448 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
449 	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
450 	if (ind == 0) {
451 		background_thread0_work(tsd);
452 	} else {
453 		while (info->state != background_thread_stopped) {
454 			if (background_thread_pause_check(tsd_tsdn(tsd),
455 			    info)) {
456 				continue;
457 			}
458 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
459 		}
460 	}
461 	assert(info->state == background_thread_stopped);
462 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
463 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
464 }
465 
466 static void *
467 background_thread_entry(void *ind_arg) {
468 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
469 	assert(thread_ind < ncpus);
470 
471 	if (opt_percpu_arena != percpu_arena_disabled) {
472 		set_current_thread_affinity((int)thread_ind);
473 	}
474 	/*
475 	 * Start periodic background work.  We use internal tsd which avoids
476 	 * side effects, for example triggering new arena creation (which in
477 	 * turn triggers another background thread creation).
478 	 */
479 	background_work(tsd_internal_fetch(), thread_ind);
480 	assert(pthread_equal(pthread_self(),
481 	    background_thread_info[thread_ind].thread));
482 
483 	return NULL;
484 }
485 
486 static void
487 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
488 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
489 	info->state = background_thread_started;
490 	background_thread_info_init(tsd_tsdn(tsd), info);
491 	n_background_threads++;
492 }
493 
494 /* Create a new background thread if needed. */
495 bool
496 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
497 	assert(have_background_thread);
498 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
499 
500 	/* We create at most NCPUs threads. */
501 	size_t thread_ind = arena_ind % ncpus;
502 	background_thread_info_t *info = &background_thread_info[thread_ind];
503 
504 	bool need_new_thread;
505 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
506 	need_new_thread = background_thread_enabled() &&
507 	    (info->state == background_thread_stopped);
508 	if (need_new_thread) {
509 		background_thread_init(tsd, info);
510 	}
511 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
512 	if (!need_new_thread) {
513 		return false;
514 	}
515 	if (arena_ind != 0) {
516 		/* Threads are created asynchronously by Thread 0. */
517 		background_thread_info_t *t0 = &background_thread_info[0];
518 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
519 		assert(t0->state == background_thread_started);
520 		pthread_cond_signal(&t0->cond);
521 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
522 
523 		return false;
524 	}
525 
526 	pre_reentrancy(tsd);
527 	/*
528 	 * To avoid complications (besides reentrancy), create internal
529 	 * background threads with the underlying pthread_create.
530 	 */
531 	int err = pthread_create_wrapper(&info->thread, NULL,
532 	    background_thread_entry, (void *)thread_ind);
533 	post_reentrancy(tsd);
534 
535 	if (err != 0) {
536 		malloc_printf("<jemalloc>: arena 0 background thread creation "
537 		    "failed (%d)\n", err);
538 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
539 		info->state = background_thread_stopped;
540 		n_background_threads--;
541 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
542 
543 		return true;
544 	}
545 
546 	return false;
547 }
548 
549 bool
550 background_threads_enable(tsd_t *tsd) {
551 	assert(n_background_threads == 0);
552 	assert(background_thread_enabled());
553 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
554 
555 	VARIABLE_ARRAY(bool, marked, ncpus);
556 	unsigned i, nmarked;
557 	for (i = 0; i < ncpus; i++) {
558 		marked[i] = false;
559 	}
560 	nmarked = 0;
561 	/* Mark the threads we need to create for thread 0. */
562 	unsigned n = narenas_total_get();
563 	for (i = 1; i < n; i++) {
564 		if (marked[i % ncpus] ||
565 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
566 			continue;
567 		}
568 		background_thread_info_t *info = &background_thread_info[i];
569 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
570 		assert(info->state == background_thread_stopped);
571 		background_thread_init(tsd, info);
572 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
573 		marked[i % ncpus] = true;
574 		if (++nmarked == ncpus) {
575 			break;
576 		}
577 	}
578 
579 	return background_thread_create(tsd, 0);
580 }
581 
582 bool
583 background_threads_disable(tsd_t *tsd) {
584 	assert(!background_thread_enabled());
585 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
586 
587 	/* Thread 0 will be responsible for terminating other threads. */
588 	if (background_threads_disable_single(tsd,
589 	    &background_thread_info[0])) {
590 		return true;
591 	}
592 	assert(n_background_threads == 0);
593 
594 	return false;
595 }
596 
597 /* Check if we need to signal the background thread early. */
598 void
599 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
600     arena_decay_t *decay, size_t npages_new) {
601 	background_thread_info_t *info = arena_background_thread_info_get(
602 	    arena);
603 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
604 		/*
605 		 * Background thread may hold the mutex for a long period of
606 		 * time.  We'd like to avoid the variance on application
607 		 * threads.  So keep this non-blocking, and leave the work to a
608 		 * future epoch.
609 		 */
610 		return;
611 	}
612 
613 	if (info->state != background_thread_started) {
614 		goto label_done;
615 	}
616 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
617 		goto label_done;
618 	}
619 
620 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
621 	if (decay_time <= 0) {
622 		/* Purging is eagerly done or disabled currently. */
623 		goto label_done_unlock2;
624 	}
625 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
626 	assert(decay_interval_ns > 0);
627 
628 	nstime_t diff;
629 	nstime_init(&diff, background_thread_wakeup_time_get(info));
630 	if (nstime_compare(&diff, &decay->epoch) <= 0) {
631 		goto label_done_unlock2;
632 	}
633 	nstime_subtract(&diff, &decay->epoch);
634 	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
635 		goto label_done_unlock2;
636 	}
637 
638 	if (npages_new > 0) {
639 		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
640 		/*
641 		 * Compute how many new pages we would need to purge by the next
642 		 * wakeup, which is used to determine if we should signal the
643 		 * background thread.
644 		 */
645 		uint64_t npurge_new;
646 		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
647 			npurge_new = npages_new;
648 		} else {
649 			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
650 			assert(h_steps_max >=
651 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
652 			npurge_new = npages_new * (h_steps_max -
653 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
654 			npurge_new >>= SMOOTHSTEP_BFP;
655 		}
656 		info->npages_to_purge_new += npurge_new;
657 	}
658 
659 	bool should_signal;
660 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
661 		should_signal = true;
662 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
663 	    (extents_npages_get(&arena->extents_dirty) > 0 ||
664 	    extents_npages_get(&arena->extents_muzzy) > 0 ||
665 	    info->npages_to_purge_new > 0)) {
666 		should_signal = true;
667 	} else {
668 		should_signal = false;
669 	}
670 
671 	if (should_signal) {
672 		info->npages_to_purge_new = 0;
673 		pthread_cond_signal(&info->cond);
674 	}
675 label_done_unlock2:
676 	malloc_mutex_unlock(tsdn, &decay->mtx);
677 label_done:
678 	malloc_mutex_unlock(tsdn, &info->mtx);
679 }
680 
681 void
682 background_thread_prefork0(tsdn_t *tsdn) {
683 	malloc_mutex_prefork(tsdn, &background_thread_lock);
684 	background_thread_enabled_at_fork = background_thread_enabled();
685 }
686 
687 void
688 background_thread_prefork1(tsdn_t *tsdn) {
689 	for (unsigned i = 0; i < ncpus; i++) {
690 		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
691 	}
692 }
693 
694 void
695 background_thread_postfork_parent(tsdn_t *tsdn) {
696 	for (unsigned i = 0; i < ncpus; i++) {
697 		malloc_mutex_postfork_parent(tsdn,
698 		    &background_thread_info[i].mtx);
699 	}
700 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
701 }
702 
703 void
704 background_thread_postfork_child(tsdn_t *tsdn) {
705 	for (unsigned i = 0; i < ncpus; i++) {
706 		malloc_mutex_postfork_child(tsdn,
707 		    &background_thread_info[i].mtx);
708 	}
709 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
710 	if (!background_thread_enabled_at_fork) {
711 		return;
712 	}
713 
714 	/* Clear background_thread state (reset to disabled for child). */
715 	malloc_mutex_lock(tsdn, &background_thread_lock);
716 	n_background_threads = 0;
717 	background_thread_enabled_set(tsdn, false);
718 	for (unsigned i = 0; i < ncpus; i++) {
719 		background_thread_info_t *info = &background_thread_info[i];
720 		malloc_mutex_lock(tsdn, &info->mtx);
721 		info->state = background_thread_stopped;
722 		int ret = pthread_cond_init(&info->cond, NULL);
723 		assert(ret == 0);
724 		background_thread_info_init(tsdn, info);
725 		malloc_mutex_unlock(tsdn, &info->mtx);
726 	}
727 	malloc_mutex_unlock(tsdn, &background_thread_lock);
728 }
729 
730 bool
731 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
732 	assert(config_stats);
733 	malloc_mutex_lock(tsdn, &background_thread_lock);
734 	if (!background_thread_enabled()) {
735 		malloc_mutex_unlock(tsdn, &background_thread_lock);
736 		return true;
737 	}
738 
739 	stats->num_threads = n_background_threads;
740 	uint64_t num_runs = 0;
741 	nstime_init(&stats->run_interval, 0);
742 	for (unsigned i = 0; i < ncpus; i++) {
743 		background_thread_info_t *info = &background_thread_info[i];
744 		malloc_mutex_lock(tsdn, &info->mtx);
745 		if (info->state != background_thread_stopped) {
746 			num_runs += info->tot_n_runs;
747 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
748 		}
749 		malloc_mutex_unlock(tsdn, &info->mtx);
750 	}
751 	stats->num_runs = num_runs;
752 	if (num_runs > 0) {
753 		nstime_idivide(&stats->run_interval, num_runs);
754 	}
755 	malloc_mutex_unlock(tsdn, &background_thread_lock);
756 
757 	return false;
758 }
759 
760 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
761 #undef BILLION
762 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
763 
764 /*
765  * When lazy lock is enabled, we need to make sure setting isthreaded before
766  * taking any background_thread locks.  This is called early in ctl (instead of
767  * wait for the pthread_create calls to trigger) because the mutex is required
768  * before creating background threads.
769  */
770 void
771 background_thread_ctl_init(tsdn_t *tsdn) {
772 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
773 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
774 	pthread_once(&once_control, pthread_create_wrapper_once);
775 #endif
776 }
777 
778 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
779 
780 bool
781 background_thread_boot0(void) {
782 	if (!have_background_thread && opt_background_thread) {
783 		malloc_printf("<jemalloc>: option background_thread currently "
784 		    "supports pthread only\n");
785 		return true;
786 	}
787 
788 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
789 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
790 	if (pthread_create_fptr == NULL) {
791 		can_enable_background_thread = false;
792 		if (config_lazy_lock || opt_background_thread) {
793 			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
794 			    "\"pthread_create\")\n");
795 			abort();
796 		}
797 	} else {
798 		can_enable_background_thread = true;
799 	}
800 #endif
801 	return false;
802 }
803 
804 bool
805 background_thread_boot1(tsdn_t *tsdn) {
806 #ifdef JEMALLOC_BACKGROUND_THREAD
807 	assert(have_background_thread);
808 	assert(narenas_total_get() > 0);
809 
810 	background_thread_enabled_set(tsdn, opt_background_thread);
811 	if (malloc_mutex_init(&background_thread_lock,
812 	    "background_thread_global",
813 	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
814 	    malloc_mutex_rank_exclusive)) {
815 		return true;
816 	}
817 	if (opt_background_thread) {
818 		background_thread_ctl_init(tsdn);
819 	}
820 
821 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
822 	    b0get(), ncpus * sizeof(background_thread_info_t), CACHELINE);
823 	if (background_thread_info == NULL) {
824 		return true;
825 	}
826 
827 	for (unsigned i = 0; i < ncpus; i++) {
828 		background_thread_info_t *info = &background_thread_info[i];
829 		/* Thread mutex is rank_inclusive because of thread0. */
830 		if (malloc_mutex_init(&info->mtx, "background_thread",
831 		    WITNESS_RANK_BACKGROUND_THREAD,
832 		    malloc_mutex_address_ordered)) {
833 			return true;
834 		}
835 		if (pthread_cond_init(&info->cond, NULL)) {
836 			return true;
837 		}
838 		malloc_mutex_lock(tsdn, &info->mtx);
839 		info->state = background_thread_stopped;
840 		background_thread_info_init(tsdn, info);
841 		malloc_mutex_unlock(tsdn, &info->mtx);
842 	}
843 #endif
844 
845 	return false;
846 }
847