1 #define JEMALLOC_PAGES_C_ 2 #include "jemalloc/internal/jemalloc_preamble.h" 3 4 #include "jemalloc/internal/pages.h" 5 6 #include "jemalloc/internal/jemalloc_internal_includes.h" 7 8 #include "jemalloc/internal/assert.h" 9 #include "jemalloc/internal/malloc_io.h" 10 11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 12 #include <sys/sysctl.h> 13 #ifdef __FreeBSD__ 14 #include <vm/vm_param.h> 15 #endif 16 #endif 17 18 /******************************************************************************/ 19 /* Data. */ 20 21 /* Actual operating system page size, detected during bootstrap, <= PAGE. */ 22 static size_t os_page; 23 24 #ifndef _WIN32 25 # define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE) 26 # define PAGES_PROT_DECOMMIT (PROT_NONE) 27 static int mmap_flags; 28 #endif 29 static bool os_overcommits; 30 31 const char *thp_mode_names[] = { 32 "default", 33 "always", 34 "never", 35 "not supported" 36 }; 37 thp_mode_t opt_thp = THP_MODE_DEFAULT; 38 thp_mode_t init_system_thp_mode; 39 40 /* Runtime support for lazy purge. Irrelevant when !pages_can_purge_lazy. */ 41 static bool pages_can_purge_lazy_runtime = true; 42 43 /******************************************************************************/ 44 /* 45 * Function prototypes for static functions that are referenced prior to 46 * definition. 47 */ 48 49 static void os_pages_unmap(void *addr, size_t size); 50 51 /******************************************************************************/ 52 53 static void * 54 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 55 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 56 assert(ALIGNMENT_CEILING(size, os_page) == size); 57 assert(size != 0); 58 59 if (os_overcommits) { 60 *commit = true; 61 } 62 63 void *ret; 64 #ifdef _WIN32 65 /* 66 * If VirtualAlloc can't allocate at the given address when one is 67 * given, it fails and returns NULL. 68 */ 69 ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0), 70 PAGE_READWRITE); 71 #else 72 /* 73 * We don't use MAP_FIXED here, because it can cause the *replacement* 74 * of existing mappings, and we only want to create new mappings. 75 */ 76 { 77 int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 78 79 ret = mmap(addr, size, prot, mmap_flags, -1, 0); 80 } 81 assert(ret != NULL); 82 83 if (ret == MAP_FAILED) { 84 ret = NULL; 85 } else if (addr != NULL && ret != addr) { 86 /* 87 * We succeeded in mapping memory, but not in the right place. 88 */ 89 os_pages_unmap(ret, size); 90 ret = NULL; 91 } 92 #endif 93 assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL && 94 ret == addr)); 95 return ret; 96 } 97 98 static void * 99 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size, 100 bool *commit) { 101 void *ret = (void *)((uintptr_t)addr + leadsize); 102 103 assert(alloc_size >= leadsize + size); 104 #ifdef _WIN32 105 os_pages_unmap(addr, alloc_size); 106 void *new_addr = os_pages_map(ret, size, PAGE, commit); 107 if (new_addr == ret) { 108 return ret; 109 } 110 if (new_addr != NULL) { 111 os_pages_unmap(new_addr, size); 112 } 113 return NULL; 114 #else 115 size_t trailsize = alloc_size - leadsize - size; 116 117 if (leadsize != 0) { 118 os_pages_unmap(addr, leadsize); 119 } 120 if (trailsize != 0) { 121 os_pages_unmap((void *)((uintptr_t)ret + size), trailsize); 122 } 123 return ret; 124 #endif 125 } 126 127 static void 128 os_pages_unmap(void *addr, size_t size) { 129 assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr); 130 assert(ALIGNMENT_CEILING(size, os_page) == size); 131 132 #ifdef _WIN32 133 if (VirtualFree(addr, 0, MEM_RELEASE) == 0) 134 #else 135 if (munmap(addr, size) == -1) 136 #endif 137 { 138 char buf[BUFERROR_BUF]; 139 140 buferror(get_errno(), buf, sizeof(buf)); 141 malloc_printf("<jemalloc>: Error in " 142 #ifdef _WIN32 143 "VirtualFree" 144 #else 145 "munmap" 146 #endif 147 "(): %s\n", buf); 148 if (opt_abort) { 149 abort(); 150 } 151 } 152 } 153 154 static void * 155 pages_map_slow(size_t size, size_t alignment, bool *commit) { 156 size_t alloc_size = size + alignment - os_page; 157 /* Beware size_t wrap-around. */ 158 if (alloc_size < size) { 159 return NULL; 160 } 161 162 void *ret; 163 do { 164 void *pages = os_pages_map(NULL, alloc_size, alignment, commit); 165 if (pages == NULL) { 166 return NULL; 167 } 168 size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment) 169 - (uintptr_t)pages; 170 ret = os_pages_trim(pages, alloc_size, leadsize, size, commit); 171 } while (ret == NULL); 172 173 assert(ret != NULL); 174 assert(PAGE_ADDR2BASE(ret) == ret); 175 return ret; 176 } 177 178 void * 179 pages_map(void *addr, size_t size, size_t alignment, bool *commit) { 180 assert(alignment >= PAGE); 181 assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr); 182 183 /* 184 * Ideally, there would be a way to specify alignment to mmap() (like 185 * NetBSD has), but in the absence of such a feature, we have to work 186 * hard to efficiently create aligned mappings. The reliable, but 187 * slow method is to create a mapping that is over-sized, then trim the 188 * excess. However, that always results in one or two calls to 189 * os_pages_unmap(), and it can leave holes in the process's virtual 190 * memory map if memory grows downward. 191 * 192 * Optimistically try mapping precisely the right amount before falling 193 * back to the slow method, with the expectation that the optimistic 194 * approach works most of the time. 195 */ 196 197 void *ret = os_pages_map(addr, size, os_page, commit); 198 if (ret == NULL || ret == addr) { 199 return ret; 200 } 201 assert(addr == NULL); 202 if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) { 203 os_pages_unmap(ret, size); 204 return pages_map_slow(size, alignment, commit); 205 } 206 207 assert(PAGE_ADDR2BASE(ret) == ret); 208 return ret; 209 } 210 211 void 212 pages_unmap(void *addr, size_t size) { 213 assert(PAGE_ADDR2BASE(addr) == addr); 214 assert(PAGE_CEILING(size) == size); 215 216 os_pages_unmap(addr, size); 217 } 218 219 static bool 220 pages_commit_impl(void *addr, size_t size, bool commit) { 221 assert(PAGE_ADDR2BASE(addr) == addr); 222 assert(PAGE_CEILING(size) == size); 223 224 if (os_overcommits) { 225 return true; 226 } 227 228 #ifdef _WIN32 229 return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT, 230 PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT))); 231 #else 232 { 233 int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT; 234 void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED, 235 -1, 0); 236 if (result == MAP_FAILED) { 237 return true; 238 } 239 if (result != addr) { 240 /* 241 * We succeeded in mapping memory, but not in the right 242 * place. 243 */ 244 os_pages_unmap(result, size); 245 return true; 246 } 247 return false; 248 } 249 #endif 250 } 251 252 bool 253 pages_commit(void *addr, size_t size) { 254 return pages_commit_impl(addr, size, true); 255 } 256 257 bool 258 pages_decommit(void *addr, size_t size) { 259 return pages_commit_impl(addr, size, false); 260 } 261 262 bool 263 pages_purge_lazy(void *addr, size_t size) { 264 assert(PAGE_ADDR2BASE(addr) == addr); 265 assert(PAGE_CEILING(size) == size); 266 267 if (!pages_can_purge_lazy) { 268 return true; 269 } 270 if (!pages_can_purge_lazy_runtime) { 271 /* 272 * Built with lazy purge enabled, but detected it was not 273 * supported on the current system. 274 */ 275 return true; 276 } 277 278 #ifdef _WIN32 279 VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); 280 return false; 281 #elif defined(JEMALLOC_PURGE_MADVISE_FREE) 282 return (madvise(addr, size, 283 # ifdef MADV_FREE 284 MADV_FREE 285 # else 286 JEMALLOC_MADV_FREE 287 # endif 288 ) != 0); 289 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 290 !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 291 return (madvise(addr, size, MADV_DONTNEED) != 0); 292 #else 293 not_reached(); 294 #endif 295 } 296 297 bool 298 pages_purge_forced(void *addr, size_t size) { 299 assert(PAGE_ADDR2BASE(addr) == addr); 300 assert(PAGE_CEILING(size) == size); 301 302 if (!pages_can_purge_forced) { 303 return true; 304 } 305 306 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \ 307 defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS) 308 return (madvise(addr, size, MADV_DONTNEED) != 0); 309 #elif defined(JEMALLOC_MAPS_COALESCE) 310 /* Try to overlay a new demand-zeroed mapping. */ 311 return pages_commit(addr, size); 312 #else 313 not_reached(); 314 #endif 315 } 316 317 static bool 318 pages_huge_impl(void *addr, size_t size, bool aligned) { 319 if (aligned) { 320 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 321 assert(HUGEPAGE_CEILING(size) == size); 322 } 323 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 324 return (madvise(addr, size, MADV_HUGEPAGE) != 0); 325 #else 326 return true; 327 #endif 328 } 329 330 bool 331 pages_huge(void *addr, size_t size) { 332 return pages_huge_impl(addr, size, true); 333 } 334 335 static bool 336 pages_huge_unaligned(void *addr, size_t size) { 337 return pages_huge_impl(addr, size, false); 338 } 339 340 static bool 341 pages_nohuge_impl(void *addr, size_t size, bool aligned) { 342 if (aligned) { 343 assert(HUGEPAGE_ADDR2BASE(addr) == addr); 344 assert(HUGEPAGE_CEILING(size) == size); 345 } 346 347 #ifdef JEMALLOC_HAVE_MADVISE_HUGE 348 return (madvise(addr, size, MADV_NOHUGEPAGE) != 0); 349 #else 350 return false; 351 #endif 352 } 353 354 bool 355 pages_nohuge(void *addr, size_t size) { 356 return pages_nohuge_impl(addr, size, true); 357 } 358 359 static bool 360 pages_nohuge_unaligned(void *addr, size_t size) { 361 return pages_nohuge_impl(addr, size, false); 362 } 363 364 bool 365 pages_dontdump(void *addr, size_t size) { 366 assert(PAGE_ADDR2BASE(addr) == addr); 367 assert(PAGE_CEILING(size) == size); 368 #ifdef JEMALLOC_MADVISE_DONTDUMP 369 return madvise(addr, size, MADV_DONTDUMP) != 0; 370 #else 371 return false; 372 #endif 373 } 374 375 bool 376 pages_dodump(void *addr, size_t size) { 377 assert(PAGE_ADDR2BASE(addr) == addr); 378 assert(PAGE_CEILING(size) == size); 379 #ifdef JEMALLOC_MADVISE_DONTDUMP 380 return madvise(addr, size, MADV_DODUMP) != 0; 381 #else 382 return false; 383 #endif 384 } 385 386 387 static size_t 388 os_page_detect(void) { 389 #ifdef _WIN32 390 SYSTEM_INFO si; 391 GetSystemInfo(&si); 392 return si.dwPageSize; 393 #elif defined(__FreeBSD__) 394 return getpagesize(); 395 #else 396 long result = sysconf(_SC_PAGESIZE); 397 if (result == -1) { 398 return LG_PAGE; 399 } 400 return (size_t)result; 401 #endif 402 } 403 404 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 405 static bool 406 os_overcommits_sysctl(void) { 407 int vm_overcommit; 408 size_t sz; 409 410 sz = sizeof(vm_overcommit); 411 #if defined(__FreeBSD__) && defined(VM_OVERCOMMIT) 412 int mib[2]; 413 414 mib[0] = CTL_VM; 415 mib[1] = VM_OVERCOMMIT; 416 if (sysctl(mib, 2, &vm_overcommit, &sz, NULL, 0) != 0) { 417 return false; /* Error. */ 418 } 419 #else 420 if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) { 421 return false; /* Error. */ 422 } 423 #endif 424 425 return ((vm_overcommit & 0x3) == 0); 426 } 427 #endif 428 429 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY 430 /* 431 * Use syscall(2) rather than {open,read,close}(2) when possible to avoid 432 * reentry during bootstrapping if another library has interposed system call 433 * wrappers. 434 */ 435 static bool 436 os_overcommits_proc(void) { 437 int fd; 438 char buf[1]; 439 440 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 441 #if defined(O_CLOEXEC) 442 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY | 443 O_CLOEXEC); 444 #else 445 fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY); 446 if (fd != -1) { 447 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 448 } 449 #endif 450 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat) 451 #if defined(O_CLOEXEC) 452 fd = (int)syscall(SYS_openat, 453 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 454 #else 455 fd = (int)syscall(SYS_openat, 456 AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY); 457 if (fd != -1) { 458 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 459 } 460 #endif 461 #else 462 #if defined(O_CLOEXEC) 463 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC); 464 #else 465 fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); 466 if (fd != -1) { 467 fcntl(fd, F_SETFD, fcntl(fd, F_GETFD) | FD_CLOEXEC); 468 } 469 #endif 470 #endif 471 472 if (fd == -1) { 473 return false; /* Error. */ 474 } 475 476 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 477 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 478 syscall(SYS_close, fd); 479 #else 480 close(fd); 481 #endif 482 483 if (nread < 1) { 484 return false; /* Error. */ 485 } 486 /* 487 * /proc/sys/vm/overcommit_memory meanings: 488 * 0: Heuristic overcommit. 489 * 1: Always overcommit. 490 * 2: Never overcommit. 491 */ 492 return (buf[0] == '0' || buf[0] == '1'); 493 } 494 #endif 495 496 void 497 pages_set_thp_state (void *ptr, size_t size) { 498 if (opt_thp == thp_mode_default || opt_thp == init_system_thp_mode) { 499 return; 500 } 501 assert(opt_thp != thp_mode_not_supported && 502 init_system_thp_mode != thp_mode_not_supported); 503 504 if (opt_thp == thp_mode_always 505 && init_system_thp_mode != thp_mode_never) { 506 assert(init_system_thp_mode == thp_mode_default); 507 pages_huge_unaligned(ptr, size); 508 } else if (opt_thp == thp_mode_never) { 509 assert(init_system_thp_mode == thp_mode_default || 510 init_system_thp_mode == thp_mode_always); 511 pages_nohuge_unaligned(ptr, size); 512 } 513 } 514 515 static void 516 init_thp_state(void) { 517 if (!have_madvise_huge) { 518 if (metadata_thp_enabled() && opt_abort) { 519 malloc_write("<jemalloc>: no MADV_HUGEPAGE support\n"); 520 abort(); 521 } 522 goto label_error; 523 } 524 525 static const char sys_state_madvise[] = "always [madvise] never\n"; 526 static const char sys_state_always[] = "[always] madvise never\n"; 527 static const char sys_state_never[] = "always madvise [never]\n"; 528 char buf[sizeof(sys_state_madvise)]; 529 530 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open) 531 int fd = (int)syscall(SYS_open, 532 "/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 533 #else 534 int fd = open("/sys/kernel/mm/transparent_hugepage/enabled", O_RDONLY); 535 #endif 536 if (fd == -1) { 537 goto label_error; 538 } 539 540 ssize_t nread = malloc_read_fd(fd, &buf, sizeof(buf)); 541 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close) 542 syscall(SYS_close, fd); 543 #else 544 close(fd); 545 #endif 546 547 if (strncmp(buf, sys_state_madvise, (size_t)nread) == 0) { 548 init_system_thp_mode = thp_mode_default; 549 } else if (strncmp(buf, sys_state_always, (size_t)nread) == 0) { 550 init_system_thp_mode = thp_mode_always; 551 } else if (strncmp(buf, sys_state_never, (size_t)nread) == 0) { 552 init_system_thp_mode = thp_mode_never; 553 } else { 554 goto label_error; 555 } 556 return; 557 label_error: 558 opt_thp = init_system_thp_mode = thp_mode_not_supported; 559 } 560 561 bool 562 pages_boot(void) { 563 os_page = os_page_detect(); 564 if (os_page > PAGE) { 565 malloc_write("<jemalloc>: Unsupported system page size\n"); 566 if (opt_abort) { 567 abort(); 568 } 569 return true; 570 } 571 572 #ifndef _WIN32 573 mmap_flags = MAP_PRIVATE | MAP_ANON; 574 #endif 575 576 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT 577 os_overcommits = os_overcommits_sysctl(); 578 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY) 579 os_overcommits = os_overcommits_proc(); 580 # ifdef MAP_NORESERVE 581 if (os_overcommits) { 582 mmap_flags |= MAP_NORESERVE; 583 } 584 # endif 585 #else 586 os_overcommits = false; 587 #endif 588 589 init_thp_state(); 590 591 /* Detect lazy purge runtime support. */ 592 if (pages_can_purge_lazy) { 593 bool committed = false; 594 void *madv_free_page = os_pages_map(NULL, PAGE, PAGE, &committed); 595 if (madv_free_page == NULL) { 596 return true; 597 } 598 assert(pages_can_purge_lazy_runtime); 599 if (pages_purge_lazy(madv_free_page, PAGE)) { 600 pages_can_purge_lazy_runtime = false; 601 } 602 os_pages_unmap(madv_free_page, PAGE); 603 } 604 605 return false; 606 } 607