1 #define JEMALLOC_PAGES_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 4 #include "jemalloc/internal/pages.h" 5 6 #include "jemalloc/internal/jemalloc_internal_includes.h" 7 8 #include "jemalloc/internal/assert.h" 9 #include "jemalloc/internal/malloc_io.h" 10 11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 12 #include <sys/sysctl.h> 13 #ifdef __FreeBSD__ 14 #include <vm/vm_param.h> 15 #endif 16 #endif 17 18 /******************************************************************************/ 19 /* Data. */ 20 21 /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 22 static size_t os_page; 23 24 #ifndef _WIN32 25 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 26 # define PAGES_PROT_DECOMMIT (PROT_NONE) 27 static int mmap_flags; 28 #endif 29 static bool os_overcommits; 30 31 const char *thp_mode_names[] = { 32 "default", 33 "always", 34 "never", 35 "not supported" 36 }; 37 thp_mode_t opt_thp = THP_MODE_DEFAULT; 38 thp_mode_t init_system_thp_mode; 39 40 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 41 static bool pages_can_purge_lazy_runtime = true; 42 43 /******************************************************************************/ 44 /* 45 * Function prototypes for static functions that are referenced prior to 46 * definition. 47 */ 48 49 static void os_pages_unmap(void *addr, size_t size); 50 51 /******************************************************************************/ 52 53 static void * 54 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 55 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 56 assert(ALIGNMENT_CEILING(size, os_page) == size); 57 assert(size != 0); 58 59 if (os_overcommits) { 60 *commit = true; 61 } 62 63 void *ret; 64 #ifdef _WIN32 65 /* 66 * If VirtualAlloc can't allocate at the given address when one is 67 * given, it fails and returns NULL. 68 */ 69 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 70 PAGE_READWRITE); 71 #else 72 /* 73 * We don't use MAP_FIXED here, because it can cause the *replacement* 74 * of existing mappings, and we only want to create new mappings. 75 */ 76 { 77 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 78 79 ret = mmap(addr, size, prot, mmap_flags, -1, 0); 80 } 81 assert(ret != NULL); 82 83 if (ret == MAP_FAILED) { 84 ret = NULL; 85 } else if (addr != NULL && ret != addr) { 86 /* 87 * We succeeded in mapping memory, but not in the right place. 88 */ 89 os_pages_unmap(ret, size); 90 ret = NULL; 91 } 92 #endif 93 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 94 ret == addr)); 95 return ret; 96 } 97 98 static void * 99 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 100 bool *commit) { 101 void *ret = (void *)((uintptr_t)addr + leadsize); 102 103 assert(alloc_size >= leadsize + size); 104 #ifdef _WIN32 105 os_pages_unmap(addr, alloc_size); 106 void *new_addr = os_pages_map(ret, size, PAGE, commit); 107 if (new_addr == ret) { 108 return ret; 109 } 110 if (new_addr != NULL) { 111 os_pages_unmap(new_addr, size); 112 } 113 return NULL; 114 #else 115 size_t trailsize = alloc_size - leadsize - size; 116 117 if (leadsize != 0) { 118 os_pages_unmap(addr, leadsize); 119 } 120 if (trailsize != 0) { 121 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 122 } 123 return ret; 124 #endif 125 } 126 127 static void 128 os_pages_unmap(void *addr, size_t size) { 129 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 130 assert(ALIGNMENT_CEILING(size, os_page) == size); 131 132 #ifdef _WIN32 133 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 134 #else 135 if (munmap(addr, size) == -1) 136 #endif 137 { 138 char buf[BUFERROR_BUF]; 139 140 buferror(get_errno(), buf, sizeof(buf)); 141 malloc_printf("<jemalloc>: Error in " 142 #ifdef _WIN32 143 "VirtualFree" 144 #else 145 "munmap" 146 #endif 147 "(): %s\n", buf); 148 if (opt_abort) { 149 abort(); 150 } 151 } 152 } 153 154 static void * 155 pages_map_slow(size_t size, size_t alignment, bool *commit) { 156 size_t alloc_size = size + alignment - os_page; 157 /* Beware size_t wrap-around. */ 158 if (alloc_size < size) { 159 return NULL; 160 } 161 162 void *ret; 163 do { 164 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 165 if (pages == NULL) { 166 return NULL; 167 } 168 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 169 - (uintptr_t)pages; 170 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 171 } while (ret == NULL); 172 173 assert(ret != NULL); 174 assert(PAGE_ADDR2BASE(ret) == ret); 175 return ret; 176 } 177 178 void * 179 pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 180 assert(alignment >= PAGE); 181 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 182 183 #if defined(__FreeBSD__) && defined(MAP_EXCL) 184 /* 185 * FreeBSD has mechanisms both to mmap at specific address without 186 * touching existing mappings, and to mmap with specific alignment. 187 */ 188 { 189 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 190 int flags = mmap_flags; 191 192 if (addr != NULL) { 193 flags |= MAP_FIXED | MAP_EXCL; 194 } else { 195 unsigned alignment_bits = ffs_zu(alignment); 196 assert(alignment_bits > 1); 197 flags |= MAP_ALIGNED(alignment_bits - 1); 198 } 199 200 void *ret = mmap(addr, size, prot, flags, -1, 0); 201 if (ret == MAP_FAILED) { 202 ret = NULL; 203 } 204 205 return ret; 206 } 207 #endif 208 /* 209 * Ideally, there would be a way to specify alignment to mmap() (like 210 * NetBSD has), but in the absence of such a feature, we have to work 211 * hard to efficiently create aligned mappings. The reliable, but 212 * slow method is to create a mapping that is over-sized, then trim the 213 * excess. However, that always results in one or two calls to 214 * os_pages_unmap(), and it can leave holes in the process's virtual 215 * memory map if memory grows downward. 216 * 217 * Optimistically try mapping precisely the right amount before falling 218 * back to the slow method, with the expectation that the optimistic 219 * approach works most of the time. 220 */ 221 222 void *ret = os_pages_map(addr, size, os_page, commit); 223 if (ret == NULL || ret == addr) { 224 return ret; 225 } 226 assert(addr == NULL); 227 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 228 os_pages_unmap(ret, size); 229 return pages_map_slow(size, alignment, commit); 230 } 231 232 assert(PAGE_ADDR2BASE(ret) == ret); 233 return ret; 234 } 235 236 void 237 pages_unmap(void *addr, size_t size) { 238 assert(PAGE_ADDR2BASE(addr) == addr); 239 assert(PAGE_CEILING(size) == size); 240 241 os_pages_unmap(addr, size); 242 } 243 244 static bool 245 pages_commit_impl(void *addr, size_t size, bool commit) { 246 assert(PAGE_ADDR2BASE(addr) == addr); 247 assert(PAGE_CEILING(size) == size); 248 249 if (os_overcommits) { 250 return true; 251 } 252 253 #ifdef _WIN32 254 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 255 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 256 #else 257 { 258 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 259 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 260 -1, 0); 261 if (result == MAP_FAILED) { 262 return true; 263 } 264 if (result != addr) { 265 /* 266 * We succeeded in mapping memory, but not in the right 267 * place. 268 */ 269 os_pages_unmap(result, size); 270 return true; 271 } 272 return false; 273 } 274 #endif 275 } 276 277 bool 278 pages_commit(void *addr, size_t size) { 279 return pages_commit_impl(addr, size, true); 280 } 281 282 bool 283 pages_decommit(void *addr, size_t size) { 284 return pages_commit_impl(addr, size, false); 285 } 286 287 bool 288 pages_purge_lazy(void *addr, size_t size) { 289 assert(PAGE_ADDR2BASE(addr) == addr); 290 assert(PAGE_CEILING(size) == size); 291 292 if (!pages_can_purge_lazy) { 293 return true; 294 } 295 if (!pages_can_purge_lazy_runtime) { 296 /* 297 * Built with lazy purge enabled, but detected it was not 298 * supported on the current system. 299 */ 300 return true; 301 } 302 303 #ifdef _WIN32 304 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 305 return false; 306 #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 307 return (madvise(addr, size, 308 # ifdef MADV_FREE 309 MADV_FREE 310 # else 311 JEMALLOC_MADV_FREE 312 # endif 313 ) != 0); 314 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 315 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 316 return (madvise(addr, size, MADV_DONTNEED) != 0); 317 #else 318 not_reached(); 319 #endif 320 } 321 322 bool 323 pages_purge_forced(void *addr, size_t size) { 324 assert(PAGE_ADDR2BASE(addr) == addr); 325 assert(PAGE_CEILING(size) == size); 326 327 if (!pages_can_purge_forced) { 328 return true; 329 } 330 331 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 332 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 333 return (madvise(addr, size, MADV_DONTNEED) != 0); 334 #elif defined(JEMALLOC_MAPS_COALESCE) 335 /* Try to overlay a new demand-zeroed mapping. */ 336 return pages_commit(addr, size); 337 #else 338 not_reached(); 339 #endif 340 } 341 342 static bool 343 pages_huge_impl(void *addr, size_t size, bool aligned) { 344 if (aligned) { 345 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 346 assert(HUGEPAGE_CEILING(size) == size); 347 } 348 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 349 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 350 #else 351 return true; 352 #endif 353 } 354 355 bool 356 pages_huge(void *addr, size_t size) { 357 return pages_huge_impl(addr, size, true); 358 } 359 360 static bool 361 pages_huge_unaligned(void *addr, size_t size) { 362 return pages_huge_impl(addr, size, false); 363 } 364 365 static bool 366 pages_nohuge_impl(void *addr, size_t size, bool aligned) { 367 if (aligned) { 368 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 369 assert(HUGEPAGE_CEILING(size) == size); 370 } 371 372 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 373 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 374 #else 375 return false; 376 #endif 377 } 378 379 bool 380 pages_nohuge(void *addr, size_t size) { 381 return pages_nohuge_impl(addr, size, true); 382 } 383 384 static bool 385 pages_nohuge_unaligned(void *addr, size_t size) { 386 return pages_nohuge_impl(addr, size, false); 387 } 388 389 bool 390 pages_dontdump(void *addr, size_t size) { 391 assert(PAGE_ADDR2BASE(addr) == addr); 392 assert(PAGE_CEILING(size) == size); 393 #ifdef JEMALLOC_MADVISE_DONTDUMP 394 return madvise(addr, size, MADV_DONTDUMP) != 0; 395 #else 396 return false; 397 #endif 398 } 399 400 bool 401 pages_dodump(void *addr, size_t size) { 402 assert(PAGE_ADDR2BASE(addr) == addr); 403 assert(PAGE_CEILING(size) == size); 404 #ifdef JEMALLOC_MADVISE_DONTDUMP 405 return madvise(addr, size, MADV_DODUMP) != 0; 406 #else 407 return false; 408 #endif 409 } 410 411 412 static size_t 413 os_page_detect(void) { 414 #ifdef _WIN32 415 SYSTEM_INFO si; 416 GetSystemInfo(&si); 417 return si.dwPageSize; 418 #elif defined(__FreeBSD__) 419 return getpagesize(); 420 #else 421 long result = sysconf(_SC_PAGESIZE); 422 if (result == -1) { 423 return LG_PAGE; 424 } 425 return (size_t)result; 426 #endif 427 } 428 429 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 430 static bool 431 os_overcommits_sysctl(void) { 432 int vm_overcommit; 433 size_t sz; 434 435 sz = sizeof(vm_overcommit); 436 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 437 int mib[2]; 438 439 mib[0] = CTL_VM; 440 mib[1] = VM_OVERCOMMIT; 441 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 442 return false; /* Error. */ 443 } 444 #else 445 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 446 return false; /* Error. */ 447 } 448 #endif 449 450 return ((vm_overcommit & 0x3) == 0); 451 } 452 #endif 453 454 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 455 /* 456 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 457 * reentry during bootstrapping if another library has interposed system call 458 * wrappers. 459 */ 460 static bool 461 os_overcommits_proc(void) { 462 int fd; 463 char buf[1]; 464 465 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 466 #if defined(O_CLOEXEC) 467 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 468 O_CLOEXEC); 469 #else 470 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 471 if (fd != -1) { 472 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 473 } 474 #endif 475 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 476 #if defined(O_CLOEXEC) 477 fd = (int)syscall(SYS_openat, 478 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 479 #else 480 fd = (int)syscall(SYS_openat, 481 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 482 if (fd != -1) { 483 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 484 } 485 #endif 486 #else 487 #if defined(O_CLOEXEC) 488 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 489 #else 490 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 491 if (fd != -1) { 492 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 493 } 494 #endif 495 #endif 496 497 if (fd == -1) { 498 return false; /* Error. */ 499 } 500 501 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 502 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 503 syscall(SYS_close, fd); 504 #else 505 close(fd); 506 #endif 507 508 if (nread < 1) { 509 return false; /* Error. */ 510 } 511 /* 512 * /proc/sys/vm/overcommit_memory meanings: 513 * 0: Heuristic overcommit. 514 * 1: Always overcommit. 515 * 2: Never overcommit. 516 */ 517 return (buf[0] == '0' || buf[0] == '1'); 518 } 519 #endif 520 521 void 522 pages_set_thp_state (void *ptr, size_t size) { 523 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 524 return; 525 } 526 assert(opt_thp != thp_mode_not_supported && 527 init_system_thp_mode != thp_mode_not_supported); 528 529 if (opt_thp == thp_mode_always 530 && init_system_thp_mode != thp_mode_never) { 531 assert(init_system_thp_mode == thp_mode_default); 532 pages_huge_unaligned(ptr, size); 533 } else if (opt_thp == thp_mode_never) { 534 assert(init_system_thp_mode == thp_mode_default || 535 init_system_thp_mode == thp_mode_always); 536 pages_nohuge_unaligned(ptr, size); 537 } 538 } 539 540 static void 541 init_thp_state(void) { 542 if (!have_madvise_huge) { 543 if (metadata_thp_enabled() && opt_abort) { 544 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 545 abort(); 546 } 547 goto label_error; 548 } 549 550 static const char sys_state_madvise[] = "always [madvise] never\n"; 551 static const char sys_state_always[] = "[always] madvise never\n"; 552 static const char sys_state_never[] = "always madvise [never]\n"; 553 char buf[sizeof(sys_state_madvise)]; 554 555 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 556 int fd = (int)syscall(SYS_open, 557 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 558 #else 559 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 560 #endif 561 if (fd == -1) { 562 goto label_error; 563 } 564 565 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 566 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 567 syscall(SYS_close, fd); 568 #else 569 close(fd); 570 #endif 571 572 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 573 init_system_thp_mode = thp_mode_default; 574 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 575 init_system_thp_mode = thp_mode_always; 576 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 577 init_system_thp_mode = thp_mode_never; 578 } else { 579 goto label_error; 580 } 581 return; 582 label_error: 583 opt_thp = init_system_thp_mode = thp_mode_not_supported; 584 } 585 586 bool 587 pages_boot(void) { 588 os_page = os_page_detect(); 589 if (os_page > PAGE) { 590 malloc_write("<jemalloc>: Unsupported system page size\n"); 591 if (opt_abort) { 592 abort(); 593 } 594 return true; 595 } 596 597 #ifndef _WIN32 598 mmap_flags = MAP_PRIVATE | MAP_ANON; 599 #endif 600 601 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 602 os_overcommits = os_overcommits_sysctl(); 603 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 604 os_overcommits = os_overcommits_proc(); 605 # ifdef MAP_NORESERVE 606 if (os_overcommits) { 607 mmap_flags |= MAP_NORESERVE; 608 } 609 # endif 610 #else 611 os_overcommits = false; 612 #endif 613 614 init_thp_state(); 615 616 #ifdef __FreeBSD__ 617 /* 618 * FreeBSD doesn't need the check; madvise(2) is known to work. 619 */ 620 #else 621 /* Detect lazy purge runtime support. */ 622 if (pages_can_purge_lazy) { 623 bool committed = false; 624 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 625 if (madv_free_page == NULL) { 626 return true; 627 } 628 assert(pages_can_purge_lazy_runtime); 629 if (pages_purge_lazy(madv_free_page, PAGE)) { 630 pages_can_purge_lazy_runtime = false; 631 } 632 os_pages_unmap(madv_free_page, PAGE); 633 } 634 #endif 635 636 return false; 637 } 638