xref: /freebsd/contrib/jemalloc/src/background_thread.c (revision f078c492a9b57877c723586db26d789cda1b98ea)
1 #define JEMALLOC_BACKGROUND_THREAD_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 #include "jemalloc/internal/jemalloc_internal_includes.h"
4 
5 #include "jemalloc/internal/assert.h"
6 
7 JEMALLOC_DIAGNOSTIC_DISABLE_SPURIOUS
8 
9 /******************************************************************************/
10 /* Data. */
11 
12 /* This option should be opt-in only. */
13 #define BACKGROUND_THREAD_DEFAULT false
14 /* Read-only after initialization. */
15 bool opt_background_thread = BACKGROUND_THREAD_DEFAULT;
16 size_t opt_max_background_threads = MAX_BACKGROUND_THREAD_LIMIT + 1;
17 
18 /* Used for thread creation, termination and stats. */
19 malloc_mutex_t background_thread_lock;
20 /* Indicates global state.  Atomic because decay reads this w/o locking. */
21 atomic_b_t background_thread_enabled_state;
22 size_t n_background_threads;
23 size_t max_background_threads;
24 /* Thread info per-index. */
25 background_thread_info_t *background_thread_info;
26 
27 /******************************************************************************/
28 
29 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
30 
31 static int (*pthread_create_fptr)(pthread_t *__restrict, const pthread_attr_t *,
32     void *(*)(void *), void *__restrict);
33 
34 static void
35 pthread_create_wrapper_init(void) {
36 #ifdef JEMALLOC_LAZY_LOCK
37 	if (!isthreaded) {
38 		isthreaded = true;
39 	}
40 #endif
41 }
42 
43 int
44 pthread_create_wrapper(pthread_t *__restrict thread, const pthread_attr_t *attr,
45     void *(*start_routine)(void *), void *__restrict arg) {
46 	pthread_create_wrapper_init();
47 
48 	return pthread_create_fptr(thread, attr, start_routine, arg);
49 }
50 #endif /* JEMALLOC_PTHREAD_CREATE_WRAPPER */
51 
52 #ifndef JEMALLOC_BACKGROUND_THREAD
53 #define NOT_REACHED { not_reached(); }
54 bool background_thread_create(tsd_t *tsd, unsigned arena_ind) NOT_REACHED
55 bool background_threads_enable(tsd_t *tsd) NOT_REACHED
56 bool background_threads_disable(tsd_t *tsd) NOT_REACHED
57 void background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
58     arena_decay_t *decay, size_t npages_new) NOT_REACHED
59 void background_thread_prefork0(tsdn_t *tsdn) NOT_REACHED
60 void background_thread_prefork1(tsdn_t *tsdn) NOT_REACHED
61 void background_thread_postfork_parent(tsdn_t *tsdn) NOT_REACHED
62 void background_thread_postfork_child(tsdn_t *tsdn) NOT_REACHED
63 bool background_thread_stats_read(tsdn_t *tsdn,
64     background_thread_stats_t *stats) NOT_REACHED
65 void background_thread_ctl_init(tsdn_t *tsdn) NOT_REACHED
66 #undef NOT_REACHED
67 #else
68 
69 static bool background_thread_enabled_at_fork;
70 
71 static void
72 background_thread_info_init(tsdn_t *tsdn, background_thread_info_t *info) {
73 	background_thread_wakeup_time_set(tsdn, info, 0);
74 	info->npages_to_purge_new = 0;
75 	if (config_stats) {
76 		info->tot_n_runs = 0;
77 		nstime_init(&info->tot_sleep_time, 0);
78 	}
79 }
80 
81 static inline bool
82 set_current_thread_affinity(int cpu) {
83 #if defined(JEMALLOC_HAVE_SCHED_SETAFFINITY)
84 	cpu_set_t cpuset;
85 	CPU_ZERO(&cpuset);
86 	CPU_SET(cpu, &cpuset);
87 	int ret = sched_setaffinity(0, sizeof(cpu_set_t), &cpuset);
88 
89 	return (ret != 0);
90 #else
91 	return false;
92 #endif
93 }
94 
95 /* Threshold for determining when to wake up the background thread. */
96 #define BACKGROUND_THREAD_NPAGES_THRESHOLD UINT64_C(1024)
97 #define BILLION UINT64_C(1000000000)
98 /* Minimal sleep interval 100 ms. */
99 #define BACKGROUND_THREAD_MIN_INTERVAL_NS (BILLION / 10)
100 
101 static inline size_t
102 decay_npurge_after_interval(arena_decay_t *decay, size_t interval) {
103 	size_t i;
104 	uint64_t sum = 0;
105 	for (i = 0; i < interval; i++) {
106 		sum += decay->backlog[i] * h_steps[i];
107 	}
108 	for (; i < SMOOTHSTEP_NSTEPS; i++) {
109 		sum += decay->backlog[i] * (h_steps[i] - h_steps[i - interval]);
110 	}
111 
112 	return (size_t)(sum >> SMOOTHSTEP_BFP);
113 }
114 
115 static uint64_t
116 arena_decay_compute_purge_interval_impl(tsdn_t *tsdn, arena_decay_t *decay,
117     extents_t *extents) {
118 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
119 		/* Use minimal interval if decay is contended. */
120 		return BACKGROUND_THREAD_MIN_INTERVAL_NS;
121 	}
122 
123 	uint64_t interval;
124 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
125 	if (decay_time <= 0) {
126 		/* Purging is eagerly done or disabled currently. */
127 		interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
128 		goto label_done;
129 	}
130 
131 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
132 	assert(decay_interval_ns > 0);
133 	size_t npages = extents_npages_get(extents);
134 	if (npages == 0) {
135 		unsigned i;
136 		for (i = 0; i < SMOOTHSTEP_NSTEPS; i++) {
137 			if (decay->backlog[i] > 0) {
138 				break;
139 			}
140 		}
141 		if (i == SMOOTHSTEP_NSTEPS) {
142 			/* No dirty pages recorded.  Sleep indefinitely. */
143 			interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
144 			goto label_done;
145 		}
146 	}
147 	if (npages <= BACKGROUND_THREAD_NPAGES_THRESHOLD) {
148 		/* Use max interval. */
149 		interval = decay_interval_ns * SMOOTHSTEP_NSTEPS;
150 		goto label_done;
151 	}
152 
153 	size_t lb = BACKGROUND_THREAD_MIN_INTERVAL_NS / decay_interval_ns;
154 	size_t ub = SMOOTHSTEP_NSTEPS;
155 	/* Minimal 2 intervals to ensure reaching next epoch deadline. */
156 	lb = (lb < 2) ? 2 : lb;
157 	if ((decay_interval_ns * ub <= BACKGROUND_THREAD_MIN_INTERVAL_NS) ||
158 	    (lb + 2 > ub)) {
159 		interval = BACKGROUND_THREAD_MIN_INTERVAL_NS;
160 		goto label_done;
161 	}
162 
163 	assert(lb + 2 <= ub);
164 	size_t npurge_lb, npurge_ub;
165 	npurge_lb = decay_npurge_after_interval(decay, lb);
166 	if (npurge_lb > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
167 		interval = decay_interval_ns * lb;
168 		goto label_done;
169 	}
170 	npurge_ub = decay_npurge_after_interval(decay, ub);
171 	if (npurge_ub < BACKGROUND_THREAD_NPAGES_THRESHOLD) {
172 		interval = decay_interval_ns * ub;
173 		goto label_done;
174 	}
175 
176 	unsigned n_search = 0;
177 	size_t target, npurge;
178 	while ((npurge_lb + BACKGROUND_THREAD_NPAGES_THRESHOLD < npurge_ub)
179 	    && (lb + 2 < ub)) {
180 		target = (lb + ub) / 2;
181 		npurge = decay_npurge_after_interval(decay, target);
182 		if (npurge > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
183 			ub = target;
184 			npurge_ub = npurge;
185 		} else {
186 			lb = target;
187 			npurge_lb = npurge;
188 		}
189 		assert(n_search++ < lg_floor(SMOOTHSTEP_NSTEPS) + 1);
190 	}
191 	interval = decay_interval_ns * (ub + lb) / 2;
192 label_done:
193 	interval = (interval < BACKGROUND_THREAD_MIN_INTERVAL_NS) ?
194 	    BACKGROUND_THREAD_MIN_INTERVAL_NS : interval;
195 	malloc_mutex_unlock(tsdn, &decay->mtx);
196 
197 	return interval;
198 }
199 
200 /* Compute purge interval for background threads. */
201 static uint64_t
202 arena_decay_compute_purge_interval(tsdn_t *tsdn, arena_t *arena) {
203 	uint64_t i1, i2;
204 	i1 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_dirty,
205 	    &arena->extents_dirty);
206 	if (i1 == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
207 		return i1;
208 	}
209 	i2 = arena_decay_compute_purge_interval_impl(tsdn, &arena->decay_muzzy,
210 	    &arena->extents_muzzy);
211 
212 	return i1 < i2 ? i1 : i2;
213 }
214 
215 static void
216 background_thread_sleep(tsdn_t *tsdn, background_thread_info_t *info,
217     uint64_t interval) {
218 	if (config_stats) {
219 		info->tot_n_runs++;
220 	}
221 	info->npages_to_purge_new = 0;
222 
223 	struct timeval tv;
224 	/* Specific clock required by timedwait. */
225 	gettimeofday(&tv, NULL);
226 	nstime_t before_sleep;
227 	nstime_init2(&before_sleep, tv.tv_sec, tv.tv_usec * 1000);
228 
229 	int ret;
230 	if (interval == BACKGROUND_THREAD_INDEFINITE_SLEEP) {
231 		assert(background_thread_indefinite_sleep(info));
232 		ret = pthread_cond_wait(&info->cond, &info->mtx.lock);
233 		assert(ret == 0);
234 	} else {
235 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS &&
236 		    interval <= BACKGROUND_THREAD_INDEFINITE_SLEEP);
237 		/* We need malloc clock (can be different from tv). */
238 		nstime_t next_wakeup;
239 		nstime_init(&next_wakeup, 0);
240 		nstime_update(&next_wakeup);
241 		nstime_iadd(&next_wakeup, interval);
242 		assert(nstime_ns(&next_wakeup) <
243 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
244 		background_thread_wakeup_time_set(tsdn, info,
245 		    nstime_ns(&next_wakeup));
246 
247 		nstime_t ts_wakeup;
248 		nstime_copy(&ts_wakeup, &before_sleep);
249 		nstime_iadd(&ts_wakeup, interval);
250 		struct timespec ts;
251 		ts.tv_sec = (size_t)nstime_sec(&ts_wakeup);
252 		ts.tv_nsec = (size_t)nstime_nsec(&ts_wakeup);
253 
254 		assert(!background_thread_indefinite_sleep(info));
255 		ret = pthread_cond_timedwait(&info->cond, &info->mtx.lock, &ts);
256 		assert(ret == ETIMEDOUT || ret == 0);
257 		background_thread_wakeup_time_set(tsdn, info,
258 		    BACKGROUND_THREAD_INDEFINITE_SLEEP);
259 	}
260 	if (config_stats) {
261 		gettimeofday(&tv, NULL);
262 		nstime_t after_sleep;
263 		nstime_init2(&after_sleep, tv.tv_sec, tv.tv_usec * 1000);
264 		if (nstime_compare(&after_sleep, &before_sleep) > 0) {
265 			nstime_subtract(&after_sleep, &before_sleep);
266 			nstime_add(&info->tot_sleep_time, &after_sleep);
267 		}
268 	}
269 }
270 
271 static bool
272 background_thread_pause_check(tsdn_t *tsdn, background_thread_info_t *info) {
273 	if (unlikely(info->state == background_thread_paused)) {
274 		malloc_mutex_unlock(tsdn, &info->mtx);
275 		/* Wait on global lock to update status. */
276 		malloc_mutex_lock(tsdn, &background_thread_lock);
277 		malloc_mutex_unlock(tsdn, &background_thread_lock);
278 		malloc_mutex_lock(tsdn, &info->mtx);
279 		return true;
280 	}
281 
282 	return false;
283 }
284 
285 static inline void
286 background_work_sleep_once(tsdn_t *tsdn, background_thread_info_t *info, unsigned ind) {
287 	uint64_t min_interval = BACKGROUND_THREAD_INDEFINITE_SLEEP;
288 	unsigned narenas = narenas_total_get();
289 
290 	for (unsigned i = ind; i < narenas; i += max_background_threads) {
291 		arena_t *arena = arena_get(tsdn, i, false);
292 		if (!arena) {
293 			continue;
294 		}
295 		arena_decay(tsdn, arena, true, false);
296 		if (min_interval == BACKGROUND_THREAD_MIN_INTERVAL_NS) {
297 			/* Min interval will be used. */
298 			continue;
299 		}
300 		uint64_t interval = arena_decay_compute_purge_interval(tsdn,
301 		    arena);
302 		assert(interval >= BACKGROUND_THREAD_MIN_INTERVAL_NS);
303 		if (min_interval > interval) {
304 			min_interval = interval;
305 		}
306 	}
307 	background_thread_sleep(tsdn, info, min_interval);
308 }
309 
310 static bool
311 background_threads_disable_single(tsd_t *tsd, background_thread_info_t *info) {
312 	if (info == &background_thread_info[0]) {
313 		malloc_mutex_assert_owner(tsd_tsdn(tsd),
314 		    &background_thread_lock);
315 	} else {
316 		malloc_mutex_assert_not_owner(tsd_tsdn(tsd),
317 		    &background_thread_lock);
318 	}
319 
320 	pre_reentrancy(tsd, NULL);
321 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
322 	bool has_thread;
323 	assert(info->state != background_thread_paused);
324 	if (info->state == background_thread_started) {
325 		has_thread = true;
326 		info->state = background_thread_stopped;
327 		pthread_cond_signal(&info->cond);
328 	} else {
329 		has_thread = false;
330 	}
331 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
332 
333 	if (!has_thread) {
334 		post_reentrancy(tsd);
335 		return false;
336 	}
337 	void *ret;
338 	if (pthread_join(info->thread, &ret)) {
339 		post_reentrancy(tsd);
340 		return true;
341 	}
342 	assert(ret == NULL);
343 	n_background_threads--;
344 	post_reentrancy(tsd);
345 
346 	return false;
347 }
348 
349 static void *background_thread_entry(void *ind_arg);
350 
351 static int
352 background_thread_create_signals_masked(pthread_t *thread,
353     const pthread_attr_t *attr, void *(*start_routine)(void *), void *arg) {
354 	/*
355 	 * Mask signals during thread creation so that the thread inherits
356 	 * an empty signal set.
357 	 */
358 	sigset_t set;
359 	sigfillset(&set);
360 	sigset_t oldset;
361 	int mask_err = pthread_sigmask(SIG_SETMASK, &set, &oldset);
362 	if (mask_err != 0) {
363 		return mask_err;
364 	}
365 	int create_err = pthread_create_wrapper(thread, attr, start_routine,
366 	    arg);
367 	/*
368 	 * Restore the signal mask.  Failure to restore the signal mask here
369 	 * changes program behavior.
370 	 */
371 	int restore_err = pthread_sigmask(SIG_SETMASK, &oldset, NULL);
372 	if (restore_err != 0) {
373 		malloc_printf("<jemalloc>: background thread creation "
374 		    "failed (%d), and signal mask restoration failed "
375 		    "(%d)\n", create_err, restore_err);
376 		if (opt_abort) {
377 			abort();
378 		}
379 	}
380 	return create_err;
381 }
382 
383 static bool
384 check_background_thread_creation(tsd_t *tsd, unsigned *n_created,
385     bool *created_threads) {
386 	bool ret = false;
387 	if (likely(*n_created == n_background_threads)) {
388 		return ret;
389 	}
390 
391 	tsdn_t *tsdn = tsd_tsdn(tsd);
392 	malloc_mutex_unlock(tsdn, &background_thread_info[0].mtx);
393 	for (unsigned i = 1; i < max_background_threads; i++) {
394 		if (created_threads[i]) {
395 			continue;
396 		}
397 		background_thread_info_t *info = &background_thread_info[i];
398 		malloc_mutex_lock(tsdn, &info->mtx);
399 		/*
400 		 * In case of the background_thread_paused state because of
401 		 * arena reset, delay the creation.
402 		 */
403 		bool create = (info->state == background_thread_started);
404 		malloc_mutex_unlock(tsdn, &info->mtx);
405 		if (!create) {
406 			continue;
407 		}
408 
409 		pre_reentrancy(tsd, NULL);
410 		int err = background_thread_create_signals_masked(&info->thread,
411 		    NULL, background_thread_entry, (void *)(uintptr_t)i);
412 		post_reentrancy(tsd);
413 
414 		if (err == 0) {
415 			(*n_created)++;
416 			created_threads[i] = true;
417 		} else {
418 			malloc_printf("<jemalloc>: background thread "
419 			    "creation failed (%d)\n", err);
420 			if (opt_abort) {
421 				abort();
422 			}
423 		}
424 		/* Return to restart the loop since we unlocked. */
425 		ret = true;
426 		break;
427 	}
428 	malloc_mutex_lock(tsdn, &background_thread_info[0].mtx);
429 
430 	return ret;
431 }
432 
433 static void
434 background_thread0_work(tsd_t *tsd) {
435 	/* Thread0 is also responsible for launching / terminating threads. */
436 	VARIABLE_ARRAY(bool, created_threads, max_background_threads);
437 	unsigned i;
438 	for (i = 1; i < max_background_threads; i++) {
439 		created_threads[i] = false;
440 	}
441 	/* Start working, and create more threads when asked. */
442 	unsigned n_created = 1;
443 	while (background_thread_info[0].state != background_thread_stopped) {
444 		if (background_thread_pause_check(tsd_tsdn(tsd),
445 		    &background_thread_info[0])) {
446 			continue;
447 		}
448 		if (check_background_thread_creation(tsd, &n_created,
449 		    (bool *)&created_threads)) {
450 			continue;
451 		}
452 		background_work_sleep_once(tsd_tsdn(tsd),
453 		    &background_thread_info[0], 0);
454 	}
455 
456 	/*
457 	 * Shut down other threads at exit.  Note that the ctl thread is holding
458 	 * the global background_thread mutex (and is waiting) for us.
459 	 */
460 	assert(!background_thread_enabled());
461 	for (i = 1; i < max_background_threads; i++) {
462 		background_thread_info_t *info = &background_thread_info[i];
463 		assert(info->state != background_thread_paused);
464 		if (created_threads[i]) {
465 			background_threads_disable_single(tsd, info);
466 		} else {
467 			malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
468 			if (info->state != background_thread_stopped) {
469 				/* The thread was not created. */
470 				assert(info->state ==
471 				    background_thread_started);
472 				n_background_threads--;
473 				info->state = background_thread_stopped;
474 			}
475 			malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
476 		}
477 	}
478 	background_thread_info[0].state = background_thread_stopped;
479 	assert(n_background_threads == 1);
480 }
481 
482 static void
483 background_work(tsd_t *tsd, unsigned ind) {
484 	background_thread_info_t *info = &background_thread_info[ind];
485 
486 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
487 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info,
488 	    BACKGROUND_THREAD_INDEFINITE_SLEEP);
489 	if (ind == 0) {
490 		background_thread0_work(tsd);
491 	} else {
492 		while (info->state != background_thread_stopped) {
493 			if (background_thread_pause_check(tsd_tsdn(tsd),
494 			    info)) {
495 				continue;
496 			}
497 			background_work_sleep_once(tsd_tsdn(tsd), info, ind);
498 		}
499 	}
500 	assert(info->state == background_thread_stopped);
501 	background_thread_wakeup_time_set(tsd_tsdn(tsd), info, 0);
502 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
503 }
504 
505 static void *
506 background_thread_entry(void *ind_arg) {
507 	unsigned thread_ind = (unsigned)(uintptr_t)ind_arg;
508 	assert(thread_ind < max_background_threads);
509 #ifdef JEMALLOC_HAVE_PTHREAD_SETNAME_NP
510 	pthread_setname_np(pthread_self(), "jemalloc_bg_thd");
511 #elif defined(__FreeBSD__)
512 	pthread_set_name_np(pthread_self(), "jemalloc_bg_thd");
513 #endif
514 	if (opt_percpu_arena != percpu_arena_disabled) {
515 		set_current_thread_affinity((int)thread_ind);
516 	}
517 	/*
518 	 * Start periodic background work.  We use internal tsd which avoids
519 	 * side effects, for example triggering new arena creation (which in
520 	 * turn triggers another background thread creation).
521 	 */
522 	background_work(tsd_internal_fetch(), thread_ind);
523 	assert(pthread_equal(pthread_self(),
524 	    background_thread_info[thread_ind].thread));
525 
526 	return NULL;
527 }
528 
529 static void
530 background_thread_init(tsd_t *tsd, background_thread_info_t *info) {
531 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
532 	info->state = background_thread_started;
533 	background_thread_info_init(tsd_tsdn(tsd), info);
534 	n_background_threads++;
535 }
536 
537 static bool
538 background_thread_create_locked(tsd_t *tsd, unsigned arena_ind) {
539 	assert(have_background_thread);
540 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
541 
542 	/* We create at most NCPUs threads. */
543 	size_t thread_ind = arena_ind % max_background_threads;
544 	background_thread_info_t *info = &background_thread_info[thread_ind];
545 
546 	bool need_new_thread;
547 	malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
548 	need_new_thread = background_thread_enabled() &&
549 	    (info->state == background_thread_stopped);
550 	if (need_new_thread) {
551 		background_thread_init(tsd, info);
552 	}
553 	malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
554 	if (!need_new_thread) {
555 		return false;
556 	}
557 	if (arena_ind != 0) {
558 		/* Threads are created asynchronously by Thread 0. */
559 		background_thread_info_t *t0 = &background_thread_info[0];
560 		malloc_mutex_lock(tsd_tsdn(tsd), &t0->mtx);
561 		assert(t0->state == background_thread_started);
562 		pthread_cond_signal(&t0->cond);
563 		malloc_mutex_unlock(tsd_tsdn(tsd), &t0->mtx);
564 
565 		return false;
566 	}
567 
568 	pre_reentrancy(tsd, NULL);
569 	/*
570 	 * To avoid complications (besides reentrancy), create internal
571 	 * background threads with the underlying pthread_create.
572 	 */
573 	int err = background_thread_create_signals_masked(&info->thread, NULL,
574 	    background_thread_entry, (void *)thread_ind);
575 	post_reentrancy(tsd);
576 
577 	if (err != 0) {
578 		malloc_printf("<jemalloc>: arena 0 background thread creation "
579 		    "failed (%d)\n", err);
580 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
581 		info->state = background_thread_stopped;
582 		n_background_threads--;
583 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
584 
585 		return true;
586 	}
587 
588 	return false;
589 }
590 
591 /* Create a new background thread if needed. */
592 bool
593 background_thread_create(tsd_t *tsd, unsigned arena_ind) {
594 	assert(have_background_thread);
595 
596 	bool ret;
597 	malloc_mutex_lock(tsd_tsdn(tsd), &background_thread_lock);
598 	ret = background_thread_create_locked(tsd, arena_ind);
599 	malloc_mutex_unlock(tsd_tsdn(tsd), &background_thread_lock);
600 
601 	return ret;
602 }
603 
604 bool
605 background_threads_enable(tsd_t *tsd) {
606 	assert(n_background_threads == 0);
607 	assert(background_thread_enabled());
608 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
609 
610 	VARIABLE_ARRAY(bool, marked, max_background_threads);
611 	unsigned i, nmarked;
612 	for (i = 0; i < max_background_threads; i++) {
613 		marked[i] = false;
614 	}
615 	nmarked = 0;
616 	/* Thread 0 is required and created at the end. */
617 	marked[0] = true;
618 	/* Mark the threads we need to create for thread 0. */
619 	unsigned n = narenas_total_get();
620 	for (i = 1; i < n; i++) {
621 		if (marked[i % max_background_threads] ||
622 		    arena_get(tsd_tsdn(tsd), i, false) == NULL) {
623 			continue;
624 		}
625 		background_thread_info_t *info = &background_thread_info[
626 		    i % max_background_threads];
627 		malloc_mutex_lock(tsd_tsdn(tsd), &info->mtx);
628 		assert(info->state == background_thread_stopped);
629 		background_thread_init(tsd, info);
630 		malloc_mutex_unlock(tsd_tsdn(tsd), &info->mtx);
631 		marked[i % max_background_threads] = true;
632 		if (++nmarked == max_background_threads) {
633 			break;
634 		}
635 	}
636 
637 	return background_thread_create_locked(tsd, 0);
638 }
639 
640 bool
641 background_threads_disable(tsd_t *tsd) {
642 	assert(!background_thread_enabled());
643 	malloc_mutex_assert_owner(tsd_tsdn(tsd), &background_thread_lock);
644 
645 	/* Thread 0 will be responsible for terminating other threads. */
646 	if (background_threads_disable_single(tsd,
647 	    &background_thread_info[0])) {
648 		return true;
649 	}
650 	assert(n_background_threads == 0);
651 
652 	return false;
653 }
654 
655 /* Check if we need to signal the background thread early. */
656 void
657 background_thread_interval_check(tsdn_t *tsdn, arena_t *arena,
658     arena_decay_t *decay, size_t npages_new) {
659 	background_thread_info_t *info = arena_background_thread_info_get(
660 	    arena);
661 	if (malloc_mutex_trylock(tsdn, &info->mtx)) {
662 		/*
663 		 * Background thread may hold the mutex for a long period of
664 		 * time.  We'd like to avoid the variance on application
665 		 * threads.  So keep this non-blocking, and leave the work to a
666 		 * future epoch.
667 		 */
668 		return;
669 	}
670 
671 	if (info->state != background_thread_started) {
672 		goto label_done;
673 	}
674 	if (malloc_mutex_trylock(tsdn, &decay->mtx)) {
675 		goto label_done;
676 	}
677 
678 	ssize_t decay_time = atomic_load_zd(&decay->time_ms, ATOMIC_RELAXED);
679 	if (decay_time <= 0) {
680 		/* Purging is eagerly done or disabled currently. */
681 		goto label_done_unlock2;
682 	}
683 	uint64_t decay_interval_ns = nstime_ns(&decay->interval);
684 	assert(decay_interval_ns > 0);
685 
686 	nstime_t diff;
687 	nstime_init(&diff, background_thread_wakeup_time_get(info));
688 	if (nstime_compare(&diff, &decay->epoch) <= 0) {
689 		goto label_done_unlock2;
690 	}
691 	nstime_subtract(&diff, &decay->epoch);
692 	if (nstime_ns(&diff) < BACKGROUND_THREAD_MIN_INTERVAL_NS) {
693 		goto label_done_unlock2;
694 	}
695 
696 	if (npages_new > 0) {
697 		size_t n_epoch = (size_t)(nstime_ns(&diff) / decay_interval_ns);
698 		/*
699 		 * Compute how many new pages we would need to purge by the next
700 		 * wakeup, which is used to determine if we should signal the
701 		 * background thread.
702 		 */
703 		uint64_t npurge_new;
704 		if (n_epoch >= SMOOTHSTEP_NSTEPS) {
705 			npurge_new = npages_new;
706 		} else {
707 			uint64_t h_steps_max = h_steps[SMOOTHSTEP_NSTEPS - 1];
708 			assert(h_steps_max >=
709 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
710 			npurge_new = npages_new * (h_steps_max -
711 			    h_steps[SMOOTHSTEP_NSTEPS - 1 - n_epoch]);
712 			npurge_new >>= SMOOTHSTEP_BFP;
713 		}
714 		info->npages_to_purge_new += npurge_new;
715 	}
716 
717 	bool should_signal;
718 	if (info->npages_to_purge_new > BACKGROUND_THREAD_NPAGES_THRESHOLD) {
719 		should_signal = true;
720 	} else if (unlikely(background_thread_indefinite_sleep(info)) &&
721 	    (extents_npages_get(&arena->extents_dirty) > 0 ||
722 	    extents_npages_get(&arena->extents_muzzy) > 0 ||
723 	    info->npages_to_purge_new > 0)) {
724 		should_signal = true;
725 	} else {
726 		should_signal = false;
727 	}
728 
729 	if (should_signal) {
730 		info->npages_to_purge_new = 0;
731 		pthread_cond_signal(&info->cond);
732 	}
733 label_done_unlock2:
734 	malloc_mutex_unlock(tsdn, &decay->mtx);
735 label_done:
736 	malloc_mutex_unlock(tsdn, &info->mtx);
737 }
738 
739 void
740 background_thread_prefork0(tsdn_t *tsdn) {
741 	malloc_mutex_prefork(tsdn, &background_thread_lock);
742 	background_thread_enabled_at_fork = background_thread_enabled();
743 }
744 
745 void
746 background_thread_prefork1(tsdn_t *tsdn) {
747 	for (unsigned i = 0; i < max_background_threads; i++) {
748 		malloc_mutex_prefork(tsdn, &background_thread_info[i].mtx);
749 	}
750 }
751 
752 void
753 background_thread_postfork_parent(tsdn_t *tsdn) {
754 	for (unsigned i = 0; i < max_background_threads; i++) {
755 		malloc_mutex_postfork_parent(tsdn,
756 		    &background_thread_info[i].mtx);
757 	}
758 	malloc_mutex_postfork_parent(tsdn, &background_thread_lock);
759 }
760 
761 void
762 background_thread_postfork_child(tsdn_t *tsdn) {
763 	for (unsigned i = 0; i < max_background_threads; i++) {
764 		malloc_mutex_postfork_child(tsdn,
765 		    &background_thread_info[i].mtx);
766 	}
767 	malloc_mutex_postfork_child(tsdn, &background_thread_lock);
768 	if (!background_thread_enabled_at_fork) {
769 		return;
770 	}
771 
772 	/* Clear background_thread state (reset to disabled for child). */
773 	malloc_mutex_lock(tsdn, &background_thread_lock);
774 	n_background_threads = 0;
775 	background_thread_enabled_set(tsdn, false);
776 	for (unsigned i = 0; i < max_background_threads; i++) {
777 		background_thread_info_t *info = &background_thread_info[i];
778 		malloc_mutex_lock(tsdn, &info->mtx);
779 		info->state = background_thread_stopped;
780 		int ret = pthread_cond_init(&info->cond, NULL);
781 		assert(ret == 0);
782 		background_thread_info_init(tsdn, info);
783 		malloc_mutex_unlock(tsdn, &info->mtx);
784 	}
785 	malloc_mutex_unlock(tsdn, &background_thread_lock);
786 }
787 
788 bool
789 background_thread_stats_read(tsdn_t *tsdn, background_thread_stats_t *stats) {
790 	assert(config_stats);
791 	malloc_mutex_lock(tsdn, &background_thread_lock);
792 	if (!background_thread_enabled()) {
793 		malloc_mutex_unlock(tsdn, &background_thread_lock);
794 		return true;
795 	}
796 
797 	stats->num_threads = n_background_threads;
798 	uint64_t num_runs = 0;
799 	nstime_init(&stats->run_interval, 0);
800 	for (unsigned i = 0; i < max_background_threads; i++) {
801 		background_thread_info_t *info = &background_thread_info[i];
802 		if (malloc_mutex_trylock(tsdn, &info->mtx)) {
803 			/*
804 			 * Each background thread run may take a long time;
805 			 * avoid waiting on the stats if the thread is active.
806 			 */
807 			continue;
808 		}
809 		if (info->state != background_thread_stopped) {
810 			num_runs += info->tot_n_runs;
811 			nstime_add(&stats->run_interval, &info->tot_sleep_time);
812 		}
813 		malloc_mutex_unlock(tsdn, &info->mtx);
814 	}
815 	stats->num_runs = num_runs;
816 	if (num_runs > 0) {
817 		nstime_idivide(&stats->run_interval, num_runs);
818 	}
819 	malloc_mutex_unlock(tsdn, &background_thread_lock);
820 
821 	return false;
822 }
823 
824 #undef BACKGROUND_THREAD_NPAGES_THRESHOLD
825 #undef BILLION
826 #undef BACKGROUND_THREAD_MIN_INTERVAL_NS
827 
828 #ifdef JEMALLOC_HAVE_DLSYM
829 #include <dlfcn.h>
830 #endif
831 
832 static bool
833 pthread_create_fptr_init(void) {
834 	if (pthread_create_fptr != NULL) {
835 		return false;
836 	}
837 	/*
838 	 * Try the next symbol first, because 1) when use lazy_lock we have a
839 	 * wrapper for pthread_create; and 2) application may define its own
840 	 * wrapper as well (and can call malloc within the wrapper).
841 	 */
842 #ifdef JEMALLOC_HAVE_DLSYM
843 	pthread_create_fptr = dlsym(RTLD_NEXT, "pthread_create");
844 #else
845 	pthread_create_fptr = NULL;
846 #endif
847 	if (pthread_create_fptr == NULL) {
848 		if (config_lazy_lock) {
849 			malloc_write("<jemalloc>: Error in dlsym(RTLD_NEXT, "
850 			    "\"pthread_create\")\n");
851 			abort();
852 		} else {
853 			/* Fall back to the default symbol. */
854 			pthread_create_fptr = pthread_create;
855 		}
856 	}
857 
858 	return false;
859 }
860 
861 /*
862  * When lazy lock is enabled, we need to make sure setting isthreaded before
863  * taking any background_thread locks.  This is called early in ctl (instead of
864  * wait for the pthread_create calls to trigger) because the mutex is required
865  * before creating background threads.
866  */
867 void
868 background_thread_ctl_init(tsdn_t *tsdn) {
869 	malloc_mutex_assert_not_owner(tsdn, &background_thread_lock);
870 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
871 	pthread_create_fptr_init();
872 	pthread_create_wrapper_init();
873 #endif
874 }
875 
876 #endif /* defined(JEMALLOC_BACKGROUND_THREAD) */
877 
878 bool
879 background_thread_boot0(void) {
880 	if (!have_background_thread && opt_background_thread) {
881 		malloc_printf("<jemalloc>: option background_thread currently "
882 		    "supports pthread only\n");
883 		return true;
884 	}
885 #ifdef JEMALLOC_PTHREAD_CREATE_WRAPPER
886 	if ((config_lazy_lock || opt_background_thread) &&
887 	    pthread_create_fptr_init()) {
888 		return true;
889 	}
890 #endif
891 	return false;
892 }
893 
894 bool
895 background_thread_boot1(tsdn_t *tsdn) {
896 #ifdef JEMALLOC_BACKGROUND_THREAD
897 	assert(have_background_thread);
898 	assert(narenas_total_get() > 0);
899 
900 	if (opt_max_background_threads > MAX_BACKGROUND_THREAD_LIMIT) {
901 		opt_max_background_threads = DEFAULT_NUM_BACKGROUND_THREAD;
902 	}
903 	max_background_threads = opt_max_background_threads;
904 
905 	background_thread_enabled_set(tsdn, opt_background_thread);
906 	if (malloc_mutex_init(&background_thread_lock,
907 	    "background_thread_global",
908 	    WITNESS_RANK_BACKGROUND_THREAD_GLOBAL,
909 	    malloc_mutex_rank_exclusive)) {
910 		return true;
911 	}
912 
913 	background_thread_info = (background_thread_info_t *)base_alloc(tsdn,
914 	    b0get(), opt_max_background_threads *
915 	    sizeof(background_thread_info_t), CACHELINE);
916 	if (background_thread_info == NULL) {
917 		return true;
918 	}
919 
920 	for (unsigned i = 0; i < max_background_threads; i++) {
921 		background_thread_info_t *info = &background_thread_info[i];
922 		/* Thread mutex is rank_inclusive because of thread0. */
923 		if (malloc_mutex_init(&info->mtx, "background_thread",
924 		    WITNESS_RANK_BACKGROUND_THREAD,
925 		    malloc_mutex_address_ordered)) {
926 			return true;
927 		}
928 		if (pthread_cond_init(&info->cond, NULL)) {
929 			return true;
930 		}
931 		malloc_mutex_lock(tsdn, &info->mtx);
932 		info->state = background_thread_stopped;
933 		background_thread_info_init(tsdn, info);
934 		malloc_mutex_unlock(tsdn, &info->mtx);
935 	}
936 #endif
937 
938 	return false;
939 }
940