1 #define JEMALLOC_PAGES_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 4 #include "jemalloc/internal/pages.h" 5 6 #include "jemalloc/internal/jemalloc_internal_includes.h" 7 8 #include "jemalloc/internal/assert.h" 9 #include "jemalloc/internal/malloc_io.h" 10 11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 12 #include <sys/sysctl.h> 13 #ifdef __FreeBSD__ 14 #include <vm/vm_param.h> 15 #endif 16 #endif 17 18 /******************************************************************************/ 19 /* Data. */ 20 21 /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 22 static size_t os_page; 23 24 #ifndef _WIN32 25 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 26 # define PAGES_PROT_DECOMMIT (PROT_NONE) 27 static int mmap_flags; 28 #endif 29 static bool os_overcommits; 30 31 const char *thp_mode_names[] = { 32 "default", 33 "always", 34 "never", 35 "not supported" 36 }; 37 thp_mode_t opt_thp = THP_MODE_DEFAULT; 38 thp_mode_t init_system_thp_mode; 39 40 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 41 static bool pages_can_purge_lazy_runtime = true; 42 43 /******************************************************************************/ 44 /* 45 * Function prototypes for static functions that are referenced prior to 46 * definition. 47 */ 48 49 static void os_pages_unmap(void *addr, size_t size); 50 51 /******************************************************************************/ 52 53 static void * 54 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 55 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 56 assert(ALIGNMENT_CEILING(size, os_page) == size); 57 assert(size != 0); 58 59 if (os_overcommits) { 60 *commit = true; 61 } 62 63 void *ret; 64 #ifdef _WIN32 65 /* 66 * If VirtualAlloc can't allocate at the given address when one is 67 * given, it fails and returns NULL. 68 */ 69 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 70 PAGE_READWRITE); 71 #else 72 /* 73 * We don't use MAP_FIXED here, because it can cause the *replacement* 74 * of existing mappings, and we only want to create new mappings. 75 */ 76 { 77 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 78 79 ret = mmap(addr, size, prot, mmap_flags, -1, 0); 80 } 81 assert(ret != NULL); 82 83 if (ret == MAP_FAILED) { 84 ret = NULL; 85 } else if (addr != NULL && ret != addr) { 86 /* 87 * We succeeded in mapping memory, but not in the right place. 88 */ 89 os_pages_unmap(ret, size); 90 ret = NULL; 91 } 92 #endif 93 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 94 ret == addr)); 95 return ret; 96 } 97 98 static void * 99 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 100 bool *commit) { 101 void *ret = (void *)((uintptr_t)addr + leadsize); 102 103 assert(alloc_size >= leadsize + size); 104 #ifdef _WIN32 105 os_pages_unmap(addr, alloc_size); 106 void *new_addr = os_pages_map(ret, size, PAGE, commit); 107 if (new_addr == ret) { 108 return ret; 109 } 110 if (new_addr != NULL) { 111 os_pages_unmap(new_addr, size); 112 } 113 return NULL; 114 #else 115 size_t trailsize = alloc_size - leadsize - size; 116 117 if (leadsize != 0) { 118 os_pages_unmap(addr, leadsize); 119 } 120 if (trailsize != 0) { 121 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 122 } 123 return ret; 124 #endif 125 } 126 127 static void 128 os_pages_unmap(void *addr, size_t size) { 129 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 130 assert(ALIGNMENT_CEILING(size, os_page) == size); 131 132 #ifdef _WIN32 133 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 134 #else 135 if (munmap(addr, size) == -1) 136 #endif 137 { 138 char buf[BUFERROR_BUF]; 139 140 buferror(get_errno(), buf, sizeof(buf)); 141 malloc_printf("<jemalloc>: Error in " 142 #ifdef _WIN32 143 "VirtualFree" 144 #else 145 "munmap" 146 #endif 147 "(): %s\n", buf); 148 if (opt_abort) { 149 abort(); 150 } 151 } 152 } 153 154 static void * 155 pages_map_slow(size_t size, size_t alignment, bool *commit) { 156 size_t alloc_size = size + alignment - os_page; 157 /* Beware size_t wrap-around. */ 158 if (alloc_size < size) { 159 return NULL; 160 } 161 162 void *ret; 163 do { 164 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 165 if (pages == NULL) { 166 return NULL; 167 } 168 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 169 - (uintptr_t)pages; 170 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 171 } while (ret == NULL); 172 173 assert(ret != NULL); 174 assert(PAGE_ADDR2BASE(ret) == ret); 175 return ret; 176 } 177 178 void * 179 pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 180 assert(alignment >= PAGE); 181 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 182 183 #if defined(__FreeBSD__) && defined(MAP_EXCL) 184 /* 185 * FreeBSD has mechanisms both to mmap at specific address without 186 * touching existing mappings, and to mmap with specific alignment. 187 */ 188 { 189 if (os_overcommits) { 190 *commit = true; 191 } 192 193 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 194 int flags = mmap_flags; 195 196 if (addr != NULL) { 197 flags |= MAP_FIXED | MAP_EXCL; 198 } else { 199 unsigned alignment_bits = ffs_zu(alignment); 200 assert(alignment_bits > 1); 201 flags |= MAP_ALIGNED(alignment_bits - 1); 202 } 203 204 void *ret = mmap(addr, size, prot, flags, -1, 0); 205 if (ret == MAP_FAILED) { 206 ret = NULL; 207 } 208 209 return ret; 210 } 211 #endif 212 /* 213 * Ideally, there would be a way to specify alignment to mmap() (like 214 * NetBSD has), but in the absence of such a feature, we have to work 215 * hard to efficiently create aligned mappings. The reliable, but 216 * slow method is to create a mapping that is over-sized, then trim the 217 * excess. However, that always results in one or two calls to 218 * os_pages_unmap(), and it can leave holes in the process's virtual 219 * memory map if memory grows downward. 220 * 221 * Optimistically try mapping precisely the right amount before falling 222 * back to the slow method, with the expectation that the optimistic 223 * approach works most of the time. 224 */ 225 226 void *ret = os_pages_map(addr, size, os_page, commit); 227 if (ret == NULL || ret == addr) { 228 return ret; 229 } 230 assert(addr == NULL); 231 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 232 os_pages_unmap(ret, size); 233 return pages_map_slow(size, alignment, commit); 234 } 235 236 assert(PAGE_ADDR2BASE(ret) == ret); 237 return ret; 238 } 239 240 void 241 pages_unmap(void *addr, size_t size) { 242 assert(PAGE_ADDR2BASE(addr) == addr); 243 assert(PAGE_CEILING(size) == size); 244 245 os_pages_unmap(addr, size); 246 } 247 248 static bool 249 pages_commit_impl(void *addr, size_t size, bool commit) { 250 assert(PAGE_ADDR2BASE(addr) == addr); 251 assert(PAGE_CEILING(size) == size); 252 253 if (os_overcommits) { 254 return true; 255 } 256 257 #ifdef _WIN32 258 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 259 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 260 #else 261 { 262 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 263 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 264 -1, 0); 265 if (result == MAP_FAILED) { 266 return true; 267 } 268 if (result != addr) { 269 /* 270 * We succeeded in mapping memory, but not in the right 271 * place. 272 */ 273 os_pages_unmap(result, size); 274 return true; 275 } 276 return false; 277 } 278 #endif 279 } 280 281 bool 282 pages_commit(void *addr, size_t size) { 283 return pages_commit_impl(addr, size, true); 284 } 285 286 bool 287 pages_decommit(void *addr, size_t size) { 288 return pages_commit_impl(addr, size, false); 289 } 290 291 bool 292 pages_purge_lazy(void *addr, size_t size) { 293 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 294 assert(PAGE_CEILING(size) == size); 295 296 if (!pages_can_purge_lazy) { 297 return true; 298 } 299 if (!pages_can_purge_lazy_runtime) { 300 /* 301 * Built with lazy purge enabled, but detected it was not 302 * supported on the current system. 303 */ 304 return true; 305 } 306 307 #ifdef _WIN32 308 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 309 return false; 310 #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 311 return (madvise(addr, size, 312 # ifdef MADV_FREE 313 MADV_FREE 314 # else 315 JEMALLOC_MADV_FREE 316 # endif 317 ) != 0); 318 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 319 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 320 return (madvise(addr, size, MADV_DONTNEED) != 0); 321 #else 322 not_reached(); 323 #endif 324 } 325 326 bool 327 pages_purge_forced(void *addr, size_t size) { 328 assert(PAGE_ADDR2BASE(addr) == addr); 329 assert(PAGE_CEILING(size) == size); 330 331 if (!pages_can_purge_forced) { 332 return true; 333 } 334 335 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 336 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 337 return (madvise(addr, size, MADV_DONTNEED) != 0); 338 #elif defined(JEMALLOC_MAPS_COALESCE) 339 /* Try to overlay a new demand-zeroed mapping. */ 340 return pages_commit(addr, size); 341 #else 342 not_reached(); 343 #endif 344 } 345 346 static bool 347 pages_huge_impl(void *addr, size_t size, bool aligned) { 348 if (aligned) { 349 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 350 assert(HUGEPAGE_CEILING(size) == size); 351 } 352 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 353 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 354 #else 355 return true; 356 #endif 357 } 358 359 bool 360 pages_huge(void *addr, size_t size) { 361 return pages_huge_impl(addr, size, true); 362 } 363 364 static bool 365 pages_huge_unaligned(void *addr, size_t size) { 366 return pages_huge_impl(addr, size, false); 367 } 368 369 static bool 370 pages_nohuge_impl(void *addr, size_t size, bool aligned) { 371 if (aligned) { 372 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 373 assert(HUGEPAGE_CEILING(size) == size); 374 } 375 376 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 377 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 378 #else 379 return false; 380 #endif 381 } 382 383 bool 384 pages_nohuge(void *addr, size_t size) { 385 return pages_nohuge_impl(addr, size, true); 386 } 387 388 static bool 389 pages_nohuge_unaligned(void *addr, size_t size) { 390 return pages_nohuge_impl(addr, size, false); 391 } 392 393 bool 394 pages_dontdump(void *addr, size_t size) { 395 assert(PAGE_ADDR2BASE(addr) == addr); 396 assert(PAGE_CEILING(size) == size); 397 #ifdef JEMALLOC_MADVISE_DONTDUMP 398 return madvise(addr, size, MADV_DONTDUMP) != 0; 399 #else 400 return false; 401 #endif 402 } 403 404 bool 405 pages_dodump(void *addr, size_t size) { 406 assert(PAGE_ADDR2BASE(addr) == addr); 407 assert(PAGE_CEILING(size) == size); 408 #ifdef JEMALLOC_MADVISE_DONTDUMP 409 return madvise(addr, size, MADV_DODUMP) != 0; 410 #else 411 return false; 412 #endif 413 } 414 415 416 static size_t 417 os_page_detect(void) { 418 #ifdef _WIN32 419 SYSTEM_INFO si; 420 GetSystemInfo(&si); 421 return si.dwPageSize; 422 #elif defined(__FreeBSD__) 423 /* 424 * This returns the value obtained from 425 * the auxv vector, avoiding a syscall. 426 */ 427 return getpagesize(); 428 #else 429 long result = sysconf(_SC_PAGESIZE); 430 if (result == -1) { 431 return LG_PAGE; 432 } 433 return (size_t)result; 434 #endif 435 } 436 437 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 438 static bool 439 os_overcommits_sysctl(void) { 440 int vm_overcommit; 441 size_t sz; 442 443 sz = sizeof(vm_overcommit); 444 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 445 int mib[2]; 446 447 mib[0] = CTL_VM; 448 mib[1] = VM_OVERCOMMIT; 449 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 450 return false; /* Error. */ 451 } 452 #else 453 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 454 return false; /* Error. */ 455 } 456 #endif 457 458 return ((vm_overcommit & 0x3) == 0); 459 } 460 #endif 461 462 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 463 /* 464 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 465 * reentry during bootstrapping if another library has interposed system call 466 * wrappers. 467 */ 468 static bool 469 os_overcommits_proc(void) { 470 int fd; 471 char buf[1]; 472 473 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 474 #if defined(O_CLOEXEC) 475 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 476 O_CLOEXEC); 477 #else 478 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 479 if (fd != -1) { 480 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 481 } 482 #endif 483 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 484 #if defined(O_CLOEXEC) 485 fd = (int)syscall(SYS_openat, 486 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 487 #else 488 fd = (int)syscall(SYS_openat, 489 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 490 if (fd != -1) { 491 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 492 } 493 #endif 494 #else 495 #if defined(O_CLOEXEC) 496 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 497 #else 498 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 499 if (fd != -1) { 500 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 501 } 502 #endif 503 #endif 504 505 if (fd == -1) { 506 return false; /* Error. */ 507 } 508 509 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 510 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 511 syscall(SYS_close, fd); 512 #else 513 close(fd); 514 #endif 515 516 if (nread < 1) { 517 return false; /* Error. */ 518 } 519 /* 520 * /proc/sys/vm/overcommit_memory meanings: 521 * 0: Heuristic overcommit. 522 * 1: Always overcommit. 523 * 2: Never overcommit. 524 */ 525 return (buf[0] == '0' || buf[0] == '1'); 526 } 527 #endif 528 529 void 530 pages_set_thp_state (void *ptr, size_t size) { 531 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 532 return; 533 } 534 assert(opt_thp != thp_mode_not_supported && 535 init_system_thp_mode != thp_mode_not_supported); 536 537 if (opt_thp == thp_mode_always 538 && init_system_thp_mode != thp_mode_never) { 539 assert(init_system_thp_mode == thp_mode_default); 540 pages_huge_unaligned(ptr, size); 541 } else if (opt_thp == thp_mode_never) { 542 assert(init_system_thp_mode == thp_mode_default || 543 init_system_thp_mode == thp_mode_always); 544 pages_nohuge_unaligned(ptr, size); 545 } 546 } 547 548 static void 549 init_thp_state(void) { 550 if (!have_madvise_huge) { 551 if (metadata_thp_enabled() && opt_abort) { 552 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 553 abort(); 554 } 555 goto label_error; 556 } 557 558 static const char sys_state_madvise[] = "always [madvise] never\n"; 559 static const char sys_state_always[] = "[always] madvise never\n"; 560 static const char sys_state_never[] = "always madvise [never]\n"; 561 char buf[sizeof(sys_state_madvise)]; 562 563 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 564 int fd = (int)syscall(SYS_open, 565 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 566 #else 567 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 568 #endif 569 if (fd == -1) { 570 goto label_error; 571 } 572 573 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 574 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 575 syscall(SYS_close, fd); 576 #else 577 close(fd); 578 #endif 579 580 if (nread < 0) { 581 goto label_error; 582 } 583 584 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 585 init_system_thp_mode = thp_mode_default; 586 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 587 init_system_thp_mode = thp_mode_always; 588 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 589 init_system_thp_mode = thp_mode_never; 590 } else { 591 goto label_error; 592 } 593 return; 594 label_error: 595 opt_thp = init_system_thp_mode = thp_mode_not_supported; 596 } 597 598 bool 599 pages_boot(void) { 600 os_page = os_page_detect(); 601 if (os_page > PAGE) { 602 malloc_write("<jemalloc>: Unsupported system page size\n"); 603 if (opt_abort) { 604 abort(); 605 } 606 return true; 607 } 608 609 #ifndef _WIN32 610 mmap_flags = MAP_PRIVATE | MAP_ANON; 611 #endif 612 613 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 614 os_overcommits = os_overcommits_sysctl(); 615 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 616 os_overcommits = os_overcommits_proc(); 617 # ifdef MAP_NORESERVE 618 if (os_overcommits) { 619 mmap_flags |= MAP_NORESERVE; 620 } 621 # endif 622 #else 623 os_overcommits = false; 624 #endif 625 626 init_thp_state(); 627 628 #ifdef __FreeBSD__ 629 /* 630 * FreeBSD doesn't need the check; madvise(2) is known to work. 631 */ 632 #else 633 /* Detect lazy purge runtime support. */ 634 if (pages_can_purge_lazy) { 635 bool committed = false; 636 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 637 if (madv_free_page == NULL) { 638 return true; 639 } 640 assert(pages_can_purge_lazy_runtime); 641 if (pages_purge_lazy(madv_free_page, PAGE)) { 642 pages_can_purge_lazy_runtime = false; 643 } 644 os_pages_unmap(madv_free_page, PAGE); 645 } 646 #endif 647 648 return false; 649 } 650