1 #include "jemalloc/internal/jemalloc_preamble.h" 2 3 #include "jemalloc/internal/pages.h" 4 5 #include "jemalloc/internal/jemalloc_internal_includes.h" 6 7 #include "jemalloc/internal/assert.h" 8 #include "jemalloc/internal/malloc_io.h" 9 10 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 11 #include <sys/sysctl.h> 12 #ifdef __FreeBSD__ 13 #include <sys/auxv.h> 14 #include <vm/vm_param.h> 15 #include <vm/vm.h> 16 #endif 17 #endif 18 #ifdef __NetBSD__ 19 #include <sys/bitops.h> /* ilog2 */ 20 #endif 21 #ifdef JEMALLOC_HAVE_VM_MAKE_TAG 22 #define PAGES_FD_TAG VM_MAKE_TAG(101U) 23 #else 24 #define PAGES_FD_TAG -1 25 #endif 26 27 /******************************************************************************/ 28 /* Data. */ 29 30 /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 31 static size_t os_page; 32 33 #ifndef _WIN32 34 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 35 # define PAGES_PROT_DECOMMIT (PROT_NONE) 36 static int mmap_flags; 37 #endif 38 static bool os_overcommits; 39 40 const char *thp_mode_names[] = { 41 "default", 42 "always", 43 "never", 44 "not supported" 45 }; 46 thp_mode_t opt_thp = THP_MODE_DEFAULT; 47 thp_mode_t init_system_thp_mode; 48 49 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 50 static bool pages_can_purge_lazy_runtime = true; 51 52 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS 53 static int madvise_dont_need_zeros_is_faulty = -1; 54 /** 55 * Check that MADV_DONTNEED will actually zero pages on subsequent access. 56 * 57 * Since qemu does not support this, yet [1], and you can get very tricky 58 * assert if you will run program with jemalloc in use under qemu: 59 * 60 * <jemalloc>: ../contrib/jemalloc/src/extent.c:1195: Failed assertion: "p[i] == 0" 61 * 62 * [1]: https://patchwork.kernel.org/patch/10576637/ 63 */ 64 static int madvise_MADV_DONTNEED_zeroes_pages() 65 { 66 int works = -1; 67 size_t size = PAGE; 68 69 void * addr = mmap(NULL, size, PROT_READ|PROT_WRITE, 70 MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); 71 72 if (addr == MAP_FAILED) { 73 malloc_write("<jemalloc>: Cannot allocate memory for " 74 "MADV_DONTNEED check\n"); 75 if (opt_abort) { 76 abort(); 77 } 78 } 79 80 memset(addr, 'A', size); 81 if (madvise(addr, size, MADV_DONTNEED) == 0) { 82 works = memchr(addr, 'A', size) == NULL; 83 } else { 84 /* 85 * If madvise() does not support MADV_DONTNEED, then we can 86 * call it anyway, and use it's return code. 87 */ 88 works = 1; 89 } 90 91 if (munmap(addr, size) != 0) { 92 malloc_write("<jemalloc>: Cannot deallocate memory for " 93 "MADV_DONTNEED check\n"); 94 if (opt_abort) { 95 abort(); 96 } 97 } 98 99 return works; 100 } 101 #endif 102 103 /******************************************************************************/ 104 /* 105 * Function prototypes for static functions that are referenced prior to 106 * definition. 107 */ 108 109 static void os_pages_unmap(void *addr, size_t size); 110 111 /******************************************************************************/ 112 113 static void * 114 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 115 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 116 assert(ALIGNMENT_CEILING(size, os_page) == size); 117 assert(size != 0); 118 119 if (os_overcommits) { 120 *commit = true; 121 } 122 123 void *ret; 124 #ifdef _WIN32 125 /* 126 * If VirtualAlloc can't allocate at the given address when one is 127 * given, it fails and returns NULL. 128 */ 129 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 130 PAGE_READWRITE); 131 #else 132 /* 133 * We don't use MAP_FIXED here, because it can cause the *replacement* 134 * of existing mappings, and we only want to create new mappings. 135 */ 136 { 137 #ifdef __NetBSD__ 138 /* 139 * On NetBSD PAGE for a platform is defined to the 140 * maximum page size of all machine architectures 141 * for that platform, so that we can use the same 142 * binaries across all machine architectures. 143 */ 144 if (alignment > os_page || PAGE > os_page) { 145 unsigned int a = ilog2(MAX(alignment, PAGE)); 146 mmap_flags |= MAP_ALIGNED(a); 147 } 148 #endif 149 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 150 151 ret = mmap(addr, size, prot, mmap_flags, PAGES_FD_TAG, 0); 152 } 153 assert(ret != NULL); 154 155 if (ret == MAP_FAILED) { 156 ret = NULL; 157 } else if (addr != NULL && ret != addr) { 158 /* 159 * We succeeded in mapping memory, but not in the right place. 160 */ 161 os_pages_unmap(ret, size); 162 ret = NULL; 163 } 164 #endif 165 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 166 ret == addr)); 167 return ret; 168 } 169 170 static void * 171 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 172 bool *commit) { 173 void *ret = (void *)((uintptr_t)addr + leadsize); 174 175 assert(alloc_size >= leadsize + size); 176 #ifdef _WIN32 177 os_pages_unmap(addr, alloc_size); 178 void *new_addr = os_pages_map(ret, size, PAGE, commit); 179 if (new_addr == ret) { 180 return ret; 181 } 182 if (new_addr != NULL) { 183 os_pages_unmap(new_addr, size); 184 } 185 return NULL; 186 #else 187 size_t trailsize = alloc_size - leadsize - size; 188 189 if (leadsize != 0) { 190 os_pages_unmap(addr, leadsize); 191 } 192 if (trailsize != 0) { 193 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 194 } 195 return ret; 196 #endif 197 } 198 199 static void 200 os_pages_unmap(void *addr, size_t size) { 201 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 202 assert(ALIGNMENT_CEILING(size, os_page) == size); 203 204 #ifdef _WIN32 205 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 206 #else 207 if (munmap(addr, size) == -1) 208 #endif 209 { 210 char buf[BUFERROR_BUF]; 211 212 buferror(get_errno(), buf, sizeof(buf)); 213 malloc_printf("<jemalloc>: Error in " 214 #ifdef _WIN32 215 "VirtualFree" 216 #else 217 "munmap" 218 #endif 219 "(): %s\n", buf); 220 if (opt_abort) { 221 abort(); 222 } 223 } 224 } 225 226 static void * 227 pages_map_slow(size_t size, size_t alignment, bool *commit) { 228 size_t alloc_size = size + alignment - os_page; 229 /* Beware size_t wrap-around. */ 230 if (alloc_size < size) { 231 return NULL; 232 } 233 234 void *ret; 235 do { 236 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 237 if (pages == NULL) { 238 return NULL; 239 } 240 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 241 - (uintptr_t)pages; 242 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 243 } while (ret == NULL); 244 245 assert(ret != NULL); 246 assert(PAGE_ADDR2BASE(ret) == ret); 247 return ret; 248 } 249 250 void * 251 pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 252 assert(alignment >= PAGE); 253 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 254 255 #if defined(__FreeBSD__) && defined(MAP_EXCL) 256 /* 257 * FreeBSD has mechanisms both to mmap at specific address without 258 * touching existing mappings, and to mmap with specific alignment. 259 */ 260 { 261 if (os_overcommits) { 262 *commit = true; 263 } 264 265 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 266 int flags = mmap_flags; 267 268 if (addr != NULL) { 269 flags |= MAP_FIXED | MAP_EXCL; 270 } else { 271 unsigned alignment_bits = ffs_zu(alignment); 272 assert(alignment_bits > 0); 273 flags |= MAP_ALIGNED(alignment_bits); 274 } 275 276 void *ret = mmap(addr, size, prot, flags, -1, 0); 277 if (ret == MAP_FAILED) { 278 ret = NULL; 279 } 280 281 return ret; 282 } 283 #endif 284 /* 285 * Ideally, there would be a way to specify alignment to mmap() (like 286 * NetBSD has), but in the absence of such a feature, we have to work 287 * hard to efficiently create aligned mappings. The reliable, but 288 * slow method is to create a mapping that is over-sized, then trim the 289 * excess. However, that always results in one or two calls to 290 * os_pages_unmap(), and it can leave holes in the process's virtual 291 * memory map if memory grows downward. 292 * 293 * Optimistically try mapping precisely the right amount before falling 294 * back to the slow method, with the expectation that the optimistic 295 * approach works most of the time. 296 */ 297 298 void *ret = os_pages_map(addr, size, os_page, commit); 299 if (ret == NULL || ret == addr) { 300 return ret; 301 } 302 assert(addr == NULL); 303 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 304 os_pages_unmap(ret, size); 305 return pages_map_slow(size, alignment, commit); 306 } 307 308 assert(PAGE_ADDR2BASE(ret) == ret); 309 return ret; 310 } 311 312 void 313 pages_unmap(void *addr, size_t size) { 314 assert(PAGE_ADDR2BASE(addr) == addr); 315 assert(PAGE_CEILING(size) == size); 316 317 os_pages_unmap(addr, size); 318 } 319 320 static bool 321 os_pages_commit(void *addr, size_t size, bool commit) { 322 assert(PAGE_ADDR2BASE(addr) == addr); 323 assert(PAGE_CEILING(size) == size); 324 325 #ifdef _WIN32 326 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 327 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 328 #else 329 { 330 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 331 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 332 PAGES_FD_TAG, 0); 333 if (result == MAP_FAILED) { 334 return true; 335 } 336 if (result != addr) { 337 /* 338 * We succeeded in mapping memory, but not in the right 339 * place. 340 */ 341 os_pages_unmap(result, size); 342 return true; 343 } 344 return false; 345 } 346 #endif 347 } 348 349 static bool 350 pages_commit_impl(void *addr, size_t size, bool commit) { 351 if (os_overcommits) { 352 return true; 353 } 354 355 return os_pages_commit(addr, size, commit); 356 } 357 358 bool 359 pages_commit(void *addr, size_t size) { 360 return pages_commit_impl(addr, size, true); 361 } 362 363 bool 364 pages_decommit(void *addr, size_t size) { 365 return pages_commit_impl(addr, size, false); 366 } 367 368 void 369 pages_mark_guards(void *head, void *tail) { 370 assert(head != NULL || tail != NULL); 371 assert(head == NULL || tail == NULL || 372 (uintptr_t)head < (uintptr_t)tail); 373 #ifdef JEMALLOC_HAVE_MPROTECT 374 if (head != NULL) { 375 mprotect(head, PAGE, PROT_NONE); 376 } 377 if (tail != NULL) { 378 mprotect(tail, PAGE, PROT_NONE); 379 } 380 #else 381 /* Decommit sets to PROT_NONE / MEM_DECOMMIT. */ 382 if (head != NULL) { 383 os_pages_commit(head, PAGE, false); 384 } 385 if (tail != NULL) { 386 os_pages_commit(tail, PAGE, false); 387 } 388 #endif 389 } 390 391 void 392 pages_unmark_guards(void *head, void *tail) { 393 assert(head != NULL || tail != NULL); 394 assert(head == NULL || tail == NULL || 395 (uintptr_t)head < (uintptr_t)tail); 396 #ifdef JEMALLOC_HAVE_MPROTECT 397 bool head_and_tail = (head != NULL) && (tail != NULL); 398 size_t range = head_and_tail ? 399 (uintptr_t)tail - (uintptr_t)head + PAGE : 400 SIZE_T_MAX; 401 /* 402 * The amount of work that the kernel does in mprotect depends on the 403 * range argument. SC_LARGE_MINCLASS is an arbitrary threshold chosen 404 * to prevent kernel from doing too much work that would outweigh the 405 * savings of performing one less system call. 406 */ 407 bool ranged_mprotect = head_and_tail && range <= SC_LARGE_MINCLASS; 408 if (ranged_mprotect) { 409 mprotect(head, range, PROT_READ | PROT_WRITE); 410 } else { 411 if (head != NULL) { 412 mprotect(head, PAGE, PROT_READ | PROT_WRITE); 413 } 414 if (tail != NULL) { 415 mprotect(tail, PAGE, PROT_READ | PROT_WRITE); 416 } 417 } 418 #else 419 if (head != NULL) { 420 os_pages_commit(head, PAGE, true); 421 } 422 if (tail != NULL) { 423 os_pages_commit(tail, PAGE, true); 424 } 425 #endif 426 } 427 428 bool 429 pages_purge_lazy(void *addr, size_t size) { 430 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 431 assert(PAGE_CEILING(size) == size); 432 433 if (!pages_can_purge_lazy) { 434 return true; 435 } 436 if (!pages_can_purge_lazy_runtime) { 437 /* 438 * Built with lazy purge enabled, but detected it was not 439 * supported on the current system. 440 */ 441 return true; 442 } 443 444 #ifdef _WIN32 445 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 446 return false; 447 #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 448 return (madvise(addr, size, 449 # ifdef MADV_FREE 450 MADV_FREE 451 # else 452 JEMALLOC_MADV_FREE 453 # endif 454 ) != 0); 455 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 456 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 457 return (madvise(addr, size, MADV_DONTNEED) != 0); 458 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ 459 !defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) 460 return (posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); 461 #else 462 not_reached(); 463 #endif 464 } 465 466 bool 467 pages_purge_forced(void *addr, size_t size) { 468 assert(PAGE_ADDR2BASE(addr) == addr); 469 assert(PAGE_CEILING(size) == size); 470 471 if (!pages_can_purge_forced) { 472 return true; 473 } 474 475 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 476 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 477 return (unlikely(madvise_dont_need_zeros_is_faulty) || 478 madvise(addr, size, MADV_DONTNEED) != 0); 479 #elif defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED) && \ 480 defined(JEMALLOC_PURGE_POSIX_MADVISE_DONTNEED_ZEROS) 481 return (unlikely(madvise_dont_need_zeros_is_faulty) || 482 posix_madvise(addr, size, POSIX_MADV_DONTNEED) != 0); 483 #elif defined(JEMALLOC_MAPS_COALESCE) 484 /* Try to overlay a new demand-zeroed mapping. */ 485 return pages_commit(addr, size); 486 #else 487 not_reached(); 488 #endif 489 } 490 491 static bool 492 pages_huge_impl(void *addr, size_t size, bool aligned) { 493 if (aligned) { 494 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 495 assert(HUGEPAGE_CEILING(size) == size); 496 } 497 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) 498 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 499 #elif defined(JEMALLOC_HAVE_MEMCNTL) 500 struct memcntl_mha m = {0}; 501 m.mha_cmd = MHA_MAPSIZE_VA; 502 m.mha_pagesize = HUGEPAGE; 503 return (memcntl(addr, size, MC_HAT_ADVISE, (caddr_t)&m, 0, 0) == 0); 504 #else 505 return true; 506 #endif 507 } 508 509 bool 510 pages_huge(void *addr, size_t size) { 511 return pages_huge_impl(addr, size, true); 512 } 513 514 static bool 515 pages_huge_unaligned(void *addr, size_t size) { 516 return pages_huge_impl(addr, size, false); 517 } 518 519 static bool 520 pages_nohuge_impl(void *addr, size_t size, bool aligned) { 521 if (aligned) { 522 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 523 assert(HUGEPAGE_CEILING(size) == size); 524 } 525 526 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 527 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 528 #else 529 return false; 530 #endif 531 } 532 533 bool 534 pages_nohuge(void *addr, size_t size) { 535 return pages_nohuge_impl(addr, size, true); 536 } 537 538 static bool 539 pages_nohuge_unaligned(void *addr, size_t size) { 540 return pages_nohuge_impl(addr, size, false); 541 } 542 543 bool 544 pages_dontdump(void *addr, size_t size) { 545 assert(PAGE_ADDR2BASE(addr) == addr); 546 assert(PAGE_CEILING(size) == size); 547 #if defined(JEMALLOC_MADVISE_DONTDUMP) 548 return madvise(addr, size, MADV_DONTDUMP) != 0; 549 #elif defined(JEMALLOC_MADVISE_NOCORE) 550 return madvise(addr, size, MADV_NOCORE) != 0; 551 #else 552 return false; 553 #endif 554 } 555 556 bool 557 pages_dodump(void *addr, size_t size) { 558 assert(PAGE_ADDR2BASE(addr) == addr); 559 assert(PAGE_CEILING(size) == size); 560 #if defined(JEMALLOC_MADVISE_DONTDUMP) 561 return madvise(addr, size, MADV_DODUMP) != 0; 562 #elif defined(JEMALLOC_MADVISE_NOCORE) 563 return madvise(addr, size, MADV_CORE) != 0; 564 #else 565 return false; 566 #endif 567 } 568 569 570 static size_t 571 os_page_detect(void) { 572 #ifdef _WIN32 573 SYSTEM_INFO si; 574 GetSystemInfo(&si); 575 return si.dwPageSize; 576 #elif defined(__FreeBSD__) 577 /* 578 * This returns the value obtained from 579 * the auxv vector, avoiding a syscall. 580 */ 581 return getpagesize(); 582 #else 583 long result = sysconf(_SC_PAGESIZE); 584 if (result == -1) { 585 return LG_PAGE; 586 } 587 return (size_t)result; 588 #endif 589 } 590 591 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 592 static bool 593 os_overcommits_sysctl(void) { 594 int vm_overcommit; 595 size_t sz; 596 int bsdflags; 597 598 if (_elf_aux_info(AT_BSDFLAGS, &bsdflags, sizeof(bsdflags)) == 0) 599 return ((bsdflags & ELF_BSDF_VMNOOVERCOMMIT) == 0); 600 601 sz = sizeof(vm_overcommit); 602 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 603 int mib[2]; 604 605 mib[0] = CTL_VM; 606 mib[1] = VM_OVERCOMMIT; 607 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 608 return false; /* Error. */ 609 } 610 #else 611 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 612 return false; /* Error. */ 613 } 614 #endif 615 616 return ((vm_overcommit & (SWAP_RESERVE_FORCE_ON | 617 SWAP_RESERVE_RLIMIT_ON)) == 0); 618 } 619 #endif 620 621 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 622 /* 623 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 624 * reentry during bootstrapping if another library has interposed system call 625 * wrappers. 626 */ 627 static bool 628 os_overcommits_proc(void) { 629 int fd; 630 char buf[1]; 631 632 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 633 #if defined(O_CLOEXEC) 634 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 635 O_CLOEXEC); 636 #else 637 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 638 if (fd != -1) { 639 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 640 } 641 #endif 642 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 643 #if defined(O_CLOEXEC) 644 fd = (int)syscall(SYS_openat, 645 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 646 #else 647 fd = (int)syscall(SYS_openat, 648 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 649 if (fd != -1) { 650 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 651 } 652 #endif 653 #else 654 #if defined(O_CLOEXEC) 655 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 656 #else 657 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 658 if (fd != -1) { 659 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 660 } 661 #endif 662 #endif 663 664 if (fd == -1) { 665 return false; /* Error. */ 666 } 667 668 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 669 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 670 syscall(SYS_close, fd); 671 #else 672 close(fd); 673 #endif 674 675 if (nread < 1) { 676 return false; /* Error. */ 677 } 678 /* 679 * /proc/sys/vm/overcommit_memory meanings: 680 * 0: Heuristic overcommit. 681 * 1: Always overcommit. 682 * 2: Never overcommit. 683 */ 684 return (buf[0] == '0' || buf[0] == '1'); 685 } 686 #endif 687 688 void 689 pages_set_thp_state (void *ptr, size_t size) { 690 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 691 return; 692 } 693 assert(opt_thp != thp_mode_not_supported && 694 init_system_thp_mode != thp_mode_not_supported); 695 696 if (opt_thp == thp_mode_always 697 && init_system_thp_mode != thp_mode_never) { 698 assert(init_system_thp_mode == thp_mode_default); 699 pages_huge_unaligned(ptr, size); 700 } else if (opt_thp == thp_mode_never) { 701 assert(init_system_thp_mode == thp_mode_default || 702 init_system_thp_mode == thp_mode_always); 703 pages_nohuge_unaligned(ptr, size); 704 } 705 } 706 707 static void 708 init_thp_state(void) { 709 if (!have_madvise_huge && !have_memcntl) { 710 if (metadata_thp_enabled() && opt_abort) { 711 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 712 abort(); 713 } 714 goto label_error; 715 } 716 #if defined(JEMALLOC_HAVE_MADVISE_HUGE) 717 static const char sys_state_madvise[] = "always [madvise] never\n"; 718 static const char sys_state_always[] = "[always] madvise never\n"; 719 static const char sys_state_never[] = "always madvise [never]\n"; 720 char buf[sizeof(sys_state_madvise)]; 721 722 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 723 int fd = (int)syscall(SYS_open, 724 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 725 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 726 int fd = (int)syscall(SYS_openat, 727 AT_FDCWD, "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 728 #else 729 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 730 #endif 731 if (fd == -1) { 732 goto label_error; 733 } 734 735 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 736 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 737 syscall(SYS_close, fd); 738 #else 739 close(fd); 740 #endif 741 742 if (nread < 0) { 743 goto label_error; 744 } 745 746 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 747 init_system_thp_mode = thp_mode_default; 748 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 749 init_system_thp_mode = thp_mode_always; 750 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 751 init_system_thp_mode = thp_mode_never; 752 } else { 753 goto label_error; 754 } 755 return; 756 #elif defined(JEMALLOC_HAVE_MEMCNTL) 757 init_system_thp_mode = thp_mode_default; 758 return; 759 #endif 760 label_error: 761 opt_thp = init_system_thp_mode = thp_mode_not_supported; 762 } 763 764 bool 765 pages_boot(void) { 766 os_page = os_page_detect(); 767 if (os_page > PAGE) { 768 malloc_write("<jemalloc>: Unsupported system page size\n"); 769 if (opt_abort) { 770 abort(); 771 } 772 return true; 773 } 774 775 #ifdef JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS 776 if (!opt_trust_madvise) { 777 madvise_dont_need_zeros_is_faulty = !madvise_MADV_DONTNEED_zeroes_pages(); 778 if (madvise_dont_need_zeros_is_faulty) { 779 malloc_write("<jemalloc>: MADV_DONTNEED does not work (memset will be used instead)\n"); 780 malloc_write("<jemalloc>: (This is the expected behaviour if you are running under QEMU)\n"); 781 } 782 } else { 783 /* In case opt_trust_madvise is disable, 784 * do not do runtime check */ 785 madvise_dont_need_zeros_is_faulty = 0; 786 } 787 #endif 788 789 #ifndef _WIN32 790 mmap_flags = MAP_PRIVATE | MAP_ANON; 791 #endif 792 793 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 794 os_overcommits = os_overcommits_sysctl(); 795 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 796 os_overcommits = os_overcommits_proc(); 797 # ifdef MAP_NORESERVE 798 if (os_overcommits) { 799 mmap_flags |= MAP_NORESERVE; 800 } 801 # endif 802 #elif defined(__NetBSD__) 803 os_overcommits = true; 804 #else 805 os_overcommits = false; 806 #endif 807 808 init_thp_state(); 809 810 #ifdef __FreeBSD__ 811 /* 812 * FreeBSD doesn't need the check; madvise(2) is known to work. 813 */ 814 #else 815 /* Detect lazy purge runtime support. */ 816 if (pages_can_purge_lazy) { 817 bool committed = false; 818 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 819 if (madv_free_page == NULL) { 820 return true; 821 } 822 assert(pages_can_purge_lazy_runtime); 823 if (pages_purge_lazy(madv_free_page, PAGE)) { 824 pages_can_purge_lazy_runtime = false; 825 } 826 os_pages_unmap(madv_free_page, PAGE); 827 } 828 #endif 829 830 return false; 831 } 832