xref: /freebsd/contrib/jemalloc/src/pages.c (revision c43cad87172039ccf38172129c79755ea79e6102)
1 #include "jemalloc/internal/jemalloc_preamble.h"
2 
3 #include "jemalloc/internal/pages.h"
4 
5 #include "jemalloc/internal/jemalloc_internal_includes.h"
6 
7 #include "jemalloc/internal/assert.h"
8 #include "jemalloc/internal/malloc_io.h"
9 
10 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
11 #include <sys/sysctl.h>
12 #ifdef __FreeBSD__
13 #include <sys/auxv.h>
14 #include <vm/vm_param.h>
15 #include <vm/vm.h>
16 #endif
17 #endif
18 #ifdef __NetBSD__
19 #include <sys/bitops.h>	/* ilog2 */
20 #endif
21 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG
22 #define PAGES_FD_TAG VM_MAKE_TAG(101U)
23 #else
24 #define PAGES_FD_TAG -1
25 #endif
26 
27 /******************************************************************************/
28 /* Data. */
29 
30 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
31 static size_t	os_page;
32 
33 #ifndef _WIN32
34 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
35 #  define PAGES_PROT_DECOMMIT (PROT_NONE)
36 static int	mmap_flags;
37 #endif
38 static bool	os_overcommits;
39 
40 const char *thp_mode_names[] = {
41 	"default",
42 	"always",
43 	"never",
44 	"not supported"
45 };
46 thp_mode_t opt_thp = THP_MODE_DEFAULT;
47 thp_mode_t init_system_thp_mode;
48 
49 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */
50 static bool pages_can_purge_lazy_runtime = true;
51 
52 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
53 static int madvise_dont_need_zeros_is_faulty = -1;
54 /**
55  * Check that MADV_DONTNEED will actually zero pages on subsequent access.
56  *
57  * Since qemu does not support this, yet [1], and you can get very tricky
58  * assert if you will run program with jemalloc in use under qemu:
59  *
60  *     <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0"
61  *
62  *   [1]: https://patchwork.kernel.org/patch/10576637/
63  */
64 static int madvise_MADV_DONTNEED_zeroes_pages()
65 {
66 	int works = -1;
67 	size_t size = PAGE;
68 
69 	void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE,
70 	    MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
71 
72 	if (addr == MAP_FAILED) {
73 		malloc_write("<jemalloc>: Cannot allocate memory for "
74 		    "MADV_DONTNEED check\n");
75 		if (opt_abort) {
76 			abort();
77 		}
78 	}
79 
80 	memset(addr, 'A', size);
81 	if (madvise(addr, size, MADV_DONTNEED) == 0) {
82 		works = memchr(addr, 'A', size) == NULL;
83 	} else {
84 		/*
85 		 * If madvise() does not support MADV_DONTNEED, then we can
86 		 * call it anyway, and use it's return code.
87 		 */
88 		works = 1;
89 	}
90 
91 	if (munmap(addr, size) != 0) {
92 		malloc_write("<jemalloc>: Cannot deallocate memory for "
93 		    "MADV_DONTNEED check\n");
94 		if (opt_abort) {
95 			abort();
96 		}
97 	}
98 
99 	return works;
100 }
101 #endif
102 
103 /******************************************************************************/
104 /*
105  * Function prototypes for static functions that are referenced prior to
106  * definition.
107  */
108 
109 static void os_pages_unmap(void *addr, size_t size);
110 
111 /******************************************************************************/
112 
113 static void *
114 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
115 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
116 	assert(ALIGNMENT_CEILING(size, os_page) == size);
117 	assert(size != 0);
118 
119 	if (os_overcommits) {
120 		*commit = true;
121 	}
122 
123 	void *ret;
124 #ifdef _WIN32
125 	/*
126 	 * If VirtualAlloc can't allocate at the given address when one is
127 	 * given, it fails and returns NULL.
128 	 */
129 	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
130 	    PAGE_READWRITE);
131 #else
132 	/*
133 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
134 	 * of existing mappings, and we only want to create new mappings.
135 	 */
136 	{
137 #ifdef __NetBSD__
138 		/*
139 		 * On NetBSD PAGE for a platform is defined to the
140 		 * maximum page size of all machine architectures
141 		 * for that platform, so that we can use the same
142 		 * binaries across all machine architectures.
143 		 */
144 		if (alignment > os_page || PAGE > os_page) {
145 			unsigned int a = ilog2(MAX(alignment, PAGE));
146 			mmap_flags |= MAP_ALIGNED(a);
147 		}
148 #endif
149 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
150 
151 		ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0);
152 	}
153 	assert(ret != NULL);
154 
155 	if (ret == MAP_FAILED) {
156 		ret = NULL;
157 	} else if (addr != NULL && ret != addr) {
158 		/*
159 		 * We succeeded in mapping memory, but not in the right place.
160 		 */
161 		os_pages_unmap(ret, size);
162 		ret = NULL;
163 	}
164 #endif
165 	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
166 	    ret == addr));
167 	return ret;
168 }
169 
170 static void *
171 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
172     bool *commit) {
173 	void *ret = (void *)((uintptr_t)addr + leadsize);
174 
175 	assert(alloc_size >= leadsize + size);
176 #ifdef _WIN32
177 	os_pages_unmap(addr, alloc_size);
178 	void *new_addr = os_pages_map(ret, size, PAGE, commit);
179 	if (new_addr == ret) {
180 		return ret;
181 	}
182 	if (new_addr != NULL) {
183 		os_pages_unmap(new_addr, size);
184 	}
185 	return NULL;
186 #else
187 	size_t trailsize = alloc_size - leadsize - size;
188 
189 	if (leadsize != 0) {
190 		os_pages_unmap(addr, leadsize);
191 	}
192 	if (trailsize != 0) {
193 		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
194 	}
195 	return ret;
196 #endif
197 }
198 
199 static void
200 os_pages_unmap(void *addr, size_t size) {
201 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
202 	assert(ALIGNMENT_CEILING(size, os_page) == size);
203 
204 #ifdef _WIN32
205 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
206 #else
207 	if (munmap(addr, size) == -1)
208 #endif
209 	{
210 		char buf[BUFERROR_BUF];
211 
212 		buferror(get_errno(), buf, sizeof(buf));
213 		malloc_printf("<jemalloc>: Error in "
214 #ifdef _WIN32
215 		    "VirtualFree"
216 #else
217 		    "munmap"
218 #endif
219 		    "(): %s\n", buf);
220 		if (opt_abort) {
221 			abort();
222 		}
223 	}
224 }
225 
226 static void *
227 pages_map_slow(size_t size, size_t alignment, bool *commit) {
228 	size_t alloc_size = size + alignment - os_page;
229 	/* Beware size_t wrap-around. */
230 	if (alloc_size < size) {
231 		return NULL;
232 	}
233 
234 	void *ret;
235 	do {
236 		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
237 		if (pages == NULL) {
238 			return NULL;
239 		}
240 		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
241 		    - (uintptr_t)pages;
242 		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
243 	} while (ret == NULL);
244 
245 	assert(ret != NULL);
246 	assert(PAGE_ADDR2BASE(ret) == ret);
247 	return ret;
248 }
249 
250 void *
251 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
252 	assert(alignment >= PAGE);
253 	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
254 
255 #if defined(__FreeBSD__) && defined(MAP_EXCL)
256 	/*
257 	 * FreeBSD has mechanisms both to mmap at specific address without
258 	 * touching existing mappings, and to mmap with specific alignment.
259 	 */
260 	{
261 		if (os_overcommits) {
262 			*commit = true;
263 		}
264 
265 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
266 		int flags = mmap_flags;
267 
268 		if (addr != NULL) {
269 			flags |= MAP_FIXED | MAP_EXCL;
270 		} else {
271 			unsigned alignment_bits = ffs_zu(alignment);
272 			assert(alignment_bits > 0);
273 			flags |= MAP_ALIGNED(alignment_bits);
274 		}
275 
276 		void *ret = mmap(addr, size, prot, flags, -1, 0);
277 		if (ret == MAP_FAILED) {
278 			ret = NULL;
279 		}
280 
281 		return ret;
282 	}
283 #endif
284 	/*
285 	 * Ideally, there would be a way to specify alignment to mmap() (like
286 	 * NetBSD has), but in the absence of such a feature, we have to work
287 	 * hard to efficiently create aligned mappings.  The reliable, but
288 	 * slow method is to create a mapping that is over-sized, then trim the
289 	 * excess.  However, that always results in one or two calls to
290 	 * os_pages_unmap(), and it can leave holes in the process's virtual
291 	 * memory map if memory grows downward.
292 	 *
293 	 * Optimistically try mapping precisely the right amount before falling
294 	 * back to the slow method, with the expectation that the optimistic
295 	 * approach works most of the time.
296 	 */
297 
298 	void *ret = os_pages_map(addr, size, os_page, commit);
299 	if (ret == NULL || ret == addr) {
300 		return ret;
301 	}
302 	assert(addr == NULL);
303 	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
304 		os_pages_unmap(ret, size);
305 		return pages_map_slow(size, alignment, commit);
306 	}
307 
308 	assert(PAGE_ADDR2BASE(ret) == ret);
309 	return ret;
310 }
311 
312 void
313 pages_unmap(void *addr, size_t size) {
314 	assert(PAGE_ADDR2BASE(addr) == addr);
315 	assert(PAGE_CEILING(size) == size);
316 
317 	os_pages_unmap(addr, size);
318 }
319 
320 static bool
321 os_pages_commit(void *addr, size_t size, bool commit) {
322 	assert(PAGE_ADDR2BASE(addr) == addr);
323 	assert(PAGE_CEILING(size) == size);
324 
325 #ifdef _WIN32
326 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
327 	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
328 #else
329 	{
330 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
331 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
332 		    PAGES_FD_TAG, 0);
333 		if (result == MAP_FAILED) {
334 			return true;
335 		}
336 		if (result != addr) {
337 			/*
338 			 * We succeeded in mapping memory, but not in the right
339 			 * place.
340 			 */
341 			os_pages_unmap(result, size);
342 			return true;
343 		}
344 		return false;
345 	}
346 #endif
347 }
348 
349 static bool
350 pages_commit_impl(void *addr, size_t size, bool commit) {
351 	if (os_overcommits) {
352 		return true;
353 	}
354 
355 	return os_pages_commit(addr, size, commit);
356 }
357 
358 bool
359 pages_commit(void *addr, size_t size) {
360 	return pages_commit_impl(addr, size, true);
361 }
362 
363 bool
364 pages_decommit(void *addr, size_t size) {
365 	return pages_commit_impl(addr, size, false);
366 }
367 
368 void
369 pages_mark_guards(void *head, void *tail) {
370 	assert(head != NULL || tail != NULL);
371 	assert(head == NULL || tail == NULL ||
372 	    (uintptr_t)head < (uintptr_t)tail);
373 #ifdef JEMALLOC_HAVE_MPROTECT
374 	if (head != NULL) {
375 		mprotect(head, PAGE, PROT_NONE);
376 	}
377 	if (tail != NULL) {
378 		mprotect(tail, PAGE, PROT_NONE);
379 	}
380 #else
381 	/* Decommit sets to PROT_NONE / MEM_DECOMMIT. */
382 	if (head != NULL) {
383 		os_pages_commit(head, PAGE, false);
384 	}
385 	if (tail != NULL) {
386 		os_pages_commit(tail, PAGE, false);
387 	}
388 #endif
389 }
390 
391 void
392 pages_unmark_guards(void *head, void *tail) {
393 	assert(head != NULL || tail != NULL);
394 	assert(head == NULL || tail == NULL ||
395 	    (uintptr_t)head < (uintptr_t)tail);
396 #ifdef JEMALLOC_HAVE_MPROTECT
397 	bool head_and_tail = (head != NULL) && (tail != NULL);
398 	size_t range = head_and_tail ?
399 	    (uintptr_t)tail - (uintptr_t)head + PAGE :
400 	    SIZE_T_MAX;
401 	/*
402 	 * The amount of work that the kernel does in mprotect depends on the
403 	 * range argument.  SC_LARGE_MINCLASS is an arbitrary threshold chosen
404 	 * to prevent kernel from doing too much work that would outweigh the
405 	 * savings of performing one less system call.
406 	 */
407 	bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS;
408 	if (ranged_mprotect) {
409 		mprotect(head, range, PROT_READ | PROT_WRITE);
410 	} else {
411 		if (head != NULL) {
412 			mprotect(head, PAGE, PROT_READ | PROT_WRITE);
413 		}
414 		if (tail != NULL) {
415 			mprotect(tail, PAGE, PROT_READ | PROT_WRITE);
416 		}
417 	}
418 #else
419 	if (head != NULL) {
420 		os_pages_commit(head, PAGE, true);
421 	}
422 	if (tail != NULL) {
423 		os_pages_commit(tail, PAGE, true);
424 	}
425 #endif
426 }
427 
428 bool
429 pages_purge_lazy(void *addr, size_t size) {
430 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
431 	assert(PAGE_CEILING(size) == size);
432 
433 	if (!pages_can_purge_lazy) {
434 		return true;
435 	}
436 	if (!pages_can_purge_lazy_runtime) {
437 		/*
438 		 * Built with lazy purge enabled, but detected it was not
439 		 * supported on the current system.
440 		 */
441 		return true;
442 	}
443 
444 #ifdef _WIN32
445 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
446 	return false;
447 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
448 	return (madvise(addr, size,
449 #  ifdef MADV_FREE
450 	    MADV_FREE
451 #  else
452 	    JEMALLOC_MADV_FREE
453 #  endif
454 	    ) != 0);
455 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
456     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
457 	return (madvise(addr, size, MADV_DONTNEED) != 0);
458 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
459     !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
460 	return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
461 #else
462 	not_reached();
463 #endif
464 }
465 
466 bool
467 pages_purge_forced(void *addr, size_t size) {
468 	assert(PAGE_ADDR2BASE(addr) == addr);
469 	assert(PAGE_CEILING(size) == size);
470 
471 	if (!pages_can_purge_forced) {
472 		return true;
473 	}
474 
475 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
476     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
477 	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
478 	    madvise(addr, size, MADV_DONTNEED) != 0);
479 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \
480     defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS)
481 	return (unlikely(madvise_dont_need_zeros_is_faulty) ||
482 	    posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0);
483 #elif defined(JEMALLOC_MAPS_COALESCE)
484 	/* Try to overlay a new demand-zeroed mapping. */
485 	return pages_commit(addr, size);
486 #else
487 	not_reached();
488 #endif
489 }
490 
491 static bool
492 pages_huge_impl(void *addr, size_t size, bool aligned) {
493 	if (aligned) {
494 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
495 		assert(HUGEPAGE_CEILING(size) == size);
496 	}
497 #if defined(JEMALLOC_HAVE_MADVISE_HUGE)
498 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
499 #elif defined(JEMALLOC_HAVE_MEMCNTL)
500 	struct memcntl_mha m = {0};
501 	m.mha_cmd = MHA_MAPSIZE_VA;
502 	m.mha_pagesize = HUGEPAGE;
503 	return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0);
504 #else
505 	return true;
506 #endif
507 }
508 
509 bool
510 pages_huge(void *addr, size_t size) {
511 	return pages_huge_impl(addr, size, true);
512 }
513 
514 static bool
515 pages_huge_unaligned(void *addr, size_t size) {
516 	return pages_huge_impl(addr, size, false);
517 }
518 
519 static bool
520 pages_nohuge_impl(void *addr, size_t size, bool aligned) {
521 	if (aligned) {
522 		assert(HUGEPAGE_ADDR2BASE(addr) == addr);
523 		assert(HUGEPAGE_CEILING(size) == size);
524 	}
525 
526 #ifdef JEMALLOC_HAVE_MADVISE_HUGE
527 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
528 #else
529 	return false;
530 #endif
531 }
532 
533 bool
534 pages_nohuge(void *addr, size_t size) {
535 	return pages_nohuge_impl(addr, size, true);
536 }
537 
538 static bool
539 pages_nohuge_unaligned(void *addr, size_t size) {
540 	return pages_nohuge_impl(addr, size, false);
541 }
542 
543 bool
544 pages_dontdump(void *addr, size_t size) {
545 	assert(PAGE_ADDR2BASE(addr) == addr);
546 	assert(PAGE_CEILING(size) == size);
547 #if defined(JEMALLOC_MADVISE_DONTDUMP)
548 	return madvise(addr, size, MADV_DONTDUMP) != 0;
549 #elif defined(JEMALLOC_MADVISE_NOCORE)
550 	return madvise(addr, size, MADV_NOCORE) != 0;
551 #else
552 	return false;
553 #endif
554 }
555 
556 bool
557 pages_dodump(void *addr, size_t size) {
558 	assert(PAGE_ADDR2BASE(addr) == addr);
559 	assert(PAGE_CEILING(size) == size);
560 #if defined(JEMALLOC_MADVISE_DONTDUMP)
561 	return madvise(addr, size, MADV_DODUMP) != 0;
562 #elif defined(JEMALLOC_MADVISE_NOCORE)
563 	return madvise(addr, size, MADV_CORE) != 0;
564 #else
565 	return false;
566 #endif
567 }
568 
569 
570 static size_t
571 os_page_detect(void) {
572 #ifdef _WIN32
573 	SYSTEM_INFO si;
574 	GetSystemInfo(&si);
575 	return si.dwPageSize;
576 #elif defined(__FreeBSD__)
577 	/*
578 	 * This returns the value obtained from
579 	 * the auxv vector, avoiding a syscall.
580 	 */
581 	return getpagesize();
582 #else
583 	long result = sysconf(_SC_PAGESIZE);
584 	if (result == -1) {
585 		return LG_PAGE;
586 	}
587 	return (size_t)result;
588 #endif
589 }
590 
591 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
592 static bool
593 os_overcommits_sysctl(void) {
594 	int vm_overcommit;
595 	size_t sz;
596 	int bsdflags;
597 
598 	if (_elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags)) == 0)
599 		return ((bsdflags & ELF_BSDF_VMNOOVERCOMMIT) == 0);
600 
601 	sz = sizeof(vm_overcommit);
602 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT)
603 	int mib[2];
604 
605 	mib[0] = CTL_VM;
606 	mib[1] = VM_OVERCOMMIT;
607 	if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) {
608 		return false; /* Error. */
609 	}
610 #else
611 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
612 		return false; /* Error. */
613 	}
614 #endif
615 
616 	return ((vm_overcommit & (SWAP_RESERVE_FORCE_ON |
617 	    SWAP_RESERVE_RLIMIT_ON)) == 0);
618 }
619 #endif
620 
621 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
622 /*
623  * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
624  * reentry during bootstrapping if another library has interposed system call
625  * wrappers.
626  */
627 static bool
628 os_overcommits_proc(void) {
629 	int fd;
630 	char buf[1];
631 
632 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
633 	#if defined(O_CLOEXEC)
634 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
635 			O_CLOEXEC);
636 	#else
637 		fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY);
638 		if (fd != -1) {
639 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
640 		}
641 	#endif
642 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
643 	#if defined(O_CLOEXEC)
644 		fd = (int)syscall(SYS_openat,
645 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
646 	#else
647 		fd = (int)syscall(SYS_openat,
648 			AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY);
649 		if (fd != -1) {
650 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
651 		}
652 	#endif
653 #else
654 	#if defined(O_CLOEXEC)
655 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
656 	#else
657 		fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY);
658 		if (fd != -1) {
659 			fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC);
660 		}
661 	#endif
662 #endif
663 
664 	if (fd == -1) {
665 		return false; /* Error. */
666 	}
667 
668 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
669 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
670 	syscall(SYS_close, fd);
671 #else
672 	close(fd);
673 #endif
674 
675 	if (nread < 1) {
676 		return false; /* Error. */
677 	}
678 	/*
679 	 * /proc/sys/vm/overcommit_memory meanings:
680 	 * 0: Heuristic overcommit.
681 	 * 1: Always overcommit.
682 	 * 2: Never overcommit.
683 	 */
684 	return (buf[0] == '0' || buf[0] == '1');
685 }
686 #endif
687 
688 void
689 pages_set_thp_state (void *ptr, size_t size) {
690 	if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) {
691 		return;
692 	}
693 	assert(opt_thp != thp_mode_not_supported &&
694 	    init_system_thp_mode != thp_mode_not_supported);
695 
696 	if (opt_thp == thp_mode_always
697 	    && init_system_thp_mode != thp_mode_never) {
698 		assert(init_system_thp_mode == thp_mode_default);
699 		pages_huge_unaligned(ptr, size);
700 	} else if (opt_thp == thp_mode_never) {
701 		assert(init_system_thp_mode == thp_mode_default ||
702 		    init_system_thp_mode == thp_mode_always);
703 		pages_nohuge_unaligned(ptr, size);
704 	}
705 }
706 
707 static void
708 init_thp_state(void) {
709 	if (!have_madvise_huge && !have_memcntl) {
710 		if (metadata_thp_enabled() && opt_abort) {
711 			malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n");
712 			abort();
713 		}
714 		goto label_error;
715 	}
716 #if defined(JEMALLOC_HAVE_MADVISE_HUGE)
717 	static const char sys_state_madvise[] = "always [madvise] never\n";
718 	static const char sys_state_always[] = "[always] madvise never\n";
719 	static const char sys_state_never[] = "always madvise [never]\n";
720 	char buf[sizeof(sys_state_madvise)];
721 
722 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
723 	int fd = (int)syscall(SYS_open,
724 	    "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
725 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
726 	int fd = (int)syscall(SYS_openat,
727 		    AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
728 #else
729 	int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY);
730 #endif
731 	if (fd == -1) {
732 		goto label_error;
733 	}
734 
735 	ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf));
736 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
737 	syscall(SYS_close, fd);
738 #else
739 	close(fd);
740 #endif
741 
742         if (nread < 0) {
743 		goto label_error;
744         }
745 
746 	if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) {
747 		init_system_thp_mode = thp_mode_default;
748 	} else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) {
749 		init_system_thp_mode = thp_mode_always;
750 	} else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) {
751 		init_system_thp_mode = thp_mode_never;
752 	} else {
753 		goto label_error;
754 	}
755 	return;
756 #elif defined(JEMALLOC_HAVE_MEMCNTL)
757 	init_system_thp_mode = thp_mode_default;
758 	return;
759 #endif
760 label_error:
761 	opt_thp = init_system_thp_mode = thp_mode_not_supported;
762 }
763 
764 bool
765 pages_boot(void) {
766 	os_page = os_page_detect();
767 	if (os_page > PAGE) {
768 		malloc_write("<jemalloc>: Unsupported system page size\n");
769 		if (opt_abort) {
770 			abort();
771 		}
772 		return true;
773 	}
774 
775 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS
776 	if (!opt_trust_madvise) {
777 		madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages();
778 		if (madvise_dont_need_zeros_is_faulty) {
779 			malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n");
780 			malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n");
781 		}
782 	} else {
783 		/* In case opt_trust_madvise is disable,
784 		 * do not do runtime check */
785 		madvise_dont_need_zeros_is_faulty = 0;
786 	}
787 #endif
788 
789 #ifndef _WIN32
790 	mmap_flags = MAP_PRIVATE | MAP_ANON;
791 #endif
792 
793 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
794 	os_overcommits = os_overcommits_sysctl();
795 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
796 	os_overcommits = os_overcommits_proc();
797 #  ifdef MAP_NORESERVE
798 	if (os_overcommits) {
799 		mmap_flags |= MAP_NORESERVE;
800 	}
801 #  endif
802 #elif defined(__NetBSD__)
803 	os_overcommits = true;
804 #else
805 	os_overcommits = false;
806 #endif
807 
808 	init_thp_state();
809 
810 #ifdef __FreeBSD__
811 	/*
812 	 * FreeBSD doesn't need the check; madvise(2) is known to work.
813 	 */
814 #else
815 	/* Detect lazy purge runtime support. */
816 	if (pages_can_purge_lazy) {
817 		bool committed = false;
818 		void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed);
819 		if (madv_free_page == NULL) {
820 			return true;
821 		}
822 		assert(pages_can_purge_lazy_runtime);
823 		if (pages_purge_lazy(madv_free_page, PAGE)) {
824 			pages_can_purge_lazy_runtime = false;
825 		}
826 		os_pages_unmap(madv_free_page, PAGE);
827 	}
828 #endif
829 
830 	return false;
831 }
832