xref: /freebsd/contrib/jemalloc/src/pages.c (revision b37f6c9805edb4b89f0a8c2b78f78a3dcfc0647b)
1 #define JEMALLOC_PAGES_C_
2 #include "jemalloc/internal/jemalloc_preamble.h"
3 
4 #include "jemalloc/internal/pages.h"
5 
6 #include "jemalloc/internal/jemalloc_internal_includes.h"
7 
8 #include "jemalloc/internal/assert.h"
9 #include "jemalloc/internal/malloc_io.h"
10 
11 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
12 #include <sys/sysctl.h>
13 #endif
14 
15 /******************************************************************************/
16 /* Data. */
17 
18 /* Actual operating system page size, detected during bootstrap, <= PAGE. */
19 static size_t	os_page;
20 
21 #ifndef _WIN32
22 #  define PAGES_PROT_COMMIT (PROT_READ | PROT_WRITE)
23 #  define PAGES_PROT_DECOMMIT (PROT_NONE)
24 static int	mmap_flags;
25 #endif
26 static bool	os_overcommits;
27 
28 /******************************************************************************/
29 /*
30  * Function prototypes for static functions that are referenced prior to
31  * definition.
32  */
33 
34 static void os_pages_unmap(void *addr, size_t size);
35 
36 /******************************************************************************/
37 
38 static void *
39 os_pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
40 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
41 	assert(ALIGNMENT_CEILING(size, os_page) == size);
42 	assert(size != 0);
43 
44 	if (os_overcommits) {
45 		*commit = true;
46 	}
47 
48 	void *ret;
49 #ifdef _WIN32
50 	/*
51 	 * If VirtualAlloc can't allocate at the given address when one is
52 	 * given, it fails and returns NULL.
53 	 */
54 	ret = VirtualAlloc(addr, size, MEM_RESERVE | (*commit ? MEM_COMMIT : 0),
55 	    PAGE_READWRITE);
56 #else
57 	/*
58 	 * We don't use MAP_FIXED here, because it can cause the *replacement*
59 	 * of existing mappings, and we only want to create new mappings.
60 	 */
61 	{
62 		int prot = *commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
63 
64 		ret = mmap(addr, size, prot, mmap_flags, -1, 0);
65 	}
66 	assert(ret != NULL);
67 
68 	if (ret == MAP_FAILED) {
69 		ret = NULL;
70 	} else if (addr != NULL && ret != addr) {
71 		/*
72 		 * We succeeded in mapping memory, but not in the right place.
73 		 */
74 		os_pages_unmap(ret, size);
75 		ret = NULL;
76 	}
77 #endif
78 	assert(ret == NULL || (addr == NULL && ret != addr) || (addr != NULL &&
79 	    ret == addr));
80 	return ret;
81 }
82 
83 static void *
84 os_pages_trim(void *addr, size_t alloc_size, size_t leadsize, size_t size,
85     bool *commit) {
86 	void *ret = (void *)((uintptr_t)addr + leadsize);
87 
88 	assert(alloc_size >= leadsize + size);
89 #ifdef _WIN32
90 	os_pages_unmap(addr, alloc_size);
91 	void *new_addr = os_pages_map(ret, size, PAGE, commit);
92 	if (new_addr == ret) {
93 		return ret;
94 	}
95 	if (new_addr != NULL) {
96 		os_pages_unmap(new_addr, size);
97 	}
98 	return NULL;
99 #else
100 	size_t trailsize = alloc_size - leadsize - size;
101 
102 	if (leadsize != 0) {
103 		os_pages_unmap(addr, leadsize);
104 	}
105 	if (trailsize != 0) {
106 		os_pages_unmap((void *)((uintptr_t)ret + size), trailsize);
107 	}
108 	return ret;
109 #endif
110 }
111 
112 static void
113 os_pages_unmap(void *addr, size_t size) {
114 	assert(ALIGNMENT_ADDR2BASE(addr, os_page) == addr);
115 	assert(ALIGNMENT_CEILING(size, os_page) == size);
116 
117 #ifdef _WIN32
118 	if (VirtualFree(addr, 0, MEM_RELEASE) == 0)
119 #else
120 	if (munmap(addr, size) == -1)
121 #endif
122 	{
123 		char buf[BUFERROR_BUF];
124 
125 		buferror(get_errno(), buf, sizeof(buf));
126 		malloc_printf("<jemalloc>: Error in "
127 #ifdef _WIN32
128 		    "VirtualFree"
129 #else
130 		    "munmap"
131 #endif
132 		    "(): %s\n", buf);
133 		if (opt_abort) {
134 			abort();
135 		}
136 	}
137 }
138 
139 static void *
140 pages_map_slow(size_t size, size_t alignment, bool *commit) {
141 	size_t alloc_size = size + alignment - os_page;
142 	/* Beware size_t wrap-around. */
143 	if (alloc_size < size) {
144 		return NULL;
145 	}
146 
147 	void *ret;
148 	do {
149 		void *pages = os_pages_map(NULL, alloc_size, alignment, commit);
150 		if (pages == NULL) {
151 			return NULL;
152 		}
153 		size_t leadsize = ALIGNMENT_CEILING((uintptr_t)pages, alignment)
154 		    - (uintptr_t)pages;
155 		ret = os_pages_trim(pages, alloc_size, leadsize, size, commit);
156 	} while (ret == NULL);
157 
158 	assert(ret != NULL);
159 	assert(PAGE_ADDR2BASE(ret) == ret);
160 	return ret;
161 }
162 
163 void *
164 pages_map(void *addr, size_t size, size_t alignment, bool *commit) {
165 	assert(alignment >= PAGE);
166 	assert(ALIGNMENT_ADDR2BASE(addr, alignment) == addr);
167 
168 	/*
169 	 * Ideally, there would be a way to specify alignment to mmap() (like
170 	 * NetBSD has), but in the absence of such a feature, we have to work
171 	 * hard to efficiently create aligned mappings.  The reliable, but
172 	 * slow method is to create a mapping that is over-sized, then trim the
173 	 * excess.  However, that always results in one or two calls to
174 	 * os_pages_unmap(), and it can leave holes in the process's virtual
175 	 * memory map if memory grows downward.
176 	 *
177 	 * Optimistically try mapping precisely the right amount before falling
178 	 * back to the slow method, with the expectation that the optimistic
179 	 * approach works most of the time.
180 	 */
181 
182 	void *ret = os_pages_map(addr, size, os_page, commit);
183 	if (ret == NULL || ret == addr) {
184 		return ret;
185 	}
186 	assert(addr == NULL);
187 	if (ALIGNMENT_ADDR2OFFSET(ret, alignment) != 0) {
188 		os_pages_unmap(ret, size);
189 		return pages_map_slow(size, alignment, commit);
190 	}
191 
192 	assert(PAGE_ADDR2BASE(ret) == ret);
193 	return ret;
194 }
195 
196 void
197 pages_unmap(void *addr, size_t size) {
198 	assert(PAGE_ADDR2BASE(addr) == addr);
199 	assert(PAGE_CEILING(size) == size);
200 
201 	os_pages_unmap(addr, size);
202 }
203 
204 static bool
205 pages_commit_impl(void *addr, size_t size, bool commit) {
206 	assert(PAGE_ADDR2BASE(addr) == addr);
207 	assert(PAGE_CEILING(size) == size);
208 
209 	if (os_overcommits) {
210 		return true;
211 	}
212 
213 #ifdef _WIN32
214 	return (commit ? (addr != VirtualAlloc(addr, size, MEM_COMMIT,
215 	    PAGE_READWRITE)) : (!VirtualFree(addr, size, MEM_DECOMMIT)));
216 #else
217 	{
218 		int prot = commit ? PAGES_PROT_COMMIT : PAGES_PROT_DECOMMIT;
219 		void *result = mmap(addr, size, prot, mmap_flags | MAP_FIXED,
220 		    -1, 0);
221 		if (result == MAP_FAILED) {
222 			return true;
223 		}
224 		if (result != addr) {
225 			/*
226 			 * We succeeded in mapping memory, but not in the right
227 			 * place.
228 			 */
229 			os_pages_unmap(result, size);
230 			return true;
231 		}
232 		return false;
233 	}
234 #endif
235 }
236 
237 bool
238 pages_commit(void *addr, size_t size) {
239 	return pages_commit_impl(addr, size, true);
240 }
241 
242 bool
243 pages_decommit(void *addr, size_t size) {
244 	return pages_commit_impl(addr, size, false);
245 }
246 
247 bool
248 pages_purge_lazy(void *addr, size_t size) {
249 	assert(PAGE_ADDR2BASE(addr) == addr);
250 	assert(PAGE_CEILING(size) == size);
251 
252 	if (!pages_can_purge_lazy) {
253 		return true;
254 	}
255 
256 #ifdef _WIN32
257 	VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE);
258 	return false;
259 #elif defined(JEMALLOC_PURGE_MADVISE_FREE)
260 	return (madvise(addr, size, MADV_FREE) != 0);
261 #elif defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
262     !defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
263 	return (madvise(addr, size, MADV_DONTNEED) != 0);
264 #else
265 	not_reached();
266 #endif
267 }
268 
269 bool
270 pages_purge_forced(void *addr, size_t size) {
271 	assert(PAGE_ADDR2BASE(addr) == addr);
272 	assert(PAGE_CEILING(size) == size);
273 
274 	if (!pages_can_purge_forced) {
275 		return true;
276 	}
277 
278 #if defined(JEMALLOC_PURGE_MADVISE_DONTNEED) && \
279     defined(JEMALLOC_PURGE_MADVISE_DONTNEED_ZEROS)
280 	return (madvise(addr, size, MADV_DONTNEED) != 0);
281 #elif defined(JEMALLOC_MAPS_COALESCE)
282 	/* Try to overlay a new demand-zeroed mapping. */
283 	return pages_commit(addr, size);
284 #else
285 	not_reached();
286 #endif
287 }
288 
289 bool
290 pages_huge(void *addr, size_t size) {
291 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
292 	assert(HUGEPAGE_CEILING(size) == size);
293 
294 #ifdef JEMALLOC_THP
295 	return (madvise(addr, size, MADV_HUGEPAGE) != 0);
296 #else
297 	return true;
298 #endif
299 }
300 
301 bool
302 pages_nohuge(void *addr, size_t size) {
303 	assert(HUGEPAGE_ADDR2BASE(addr) == addr);
304 	assert(HUGEPAGE_CEILING(size) == size);
305 
306 #ifdef JEMALLOC_THP
307 	return (madvise(addr, size, MADV_NOHUGEPAGE) != 0);
308 #else
309 	return false;
310 #endif
311 }
312 
313 static size_t
314 os_page_detect(void) {
315 #ifdef _WIN32
316 	SYSTEM_INFO si;
317 	GetSystemInfo(&si);
318 	return si.dwPageSize;
319 #else
320 	long result = sysconf(_SC_PAGESIZE);
321 	if (result == -1) {
322 		return LG_PAGE;
323 	}
324 	return (size_t)result;
325 #endif
326 }
327 
328 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
329 static bool
330 os_overcommits_sysctl(void) {
331 	int vm_overcommit;
332 	size_t sz;
333 
334 	sz = sizeof(vm_overcommit);
335 	if (sysctlbyname("vm.overcommit", &vm_overcommit, &sz, NULL, 0) != 0) {
336 		return false; /* Error. */
337 	}
338 
339 	return ((vm_overcommit & 0x3) == 0);
340 }
341 #endif
342 
343 #ifdef JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY
344 /*
345  * Use syscall(2) rather than {open,read,close}(2) when possible to avoid
346  * reentry during bootstrapping if another library has interposed system call
347  * wrappers.
348  */
349 static bool
350 os_overcommits_proc(void) {
351 	int fd;
352 	char buf[1];
353 	ssize_t nread;
354 
355 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_open)
356 	fd = (int)syscall(SYS_open, "/proc/sys/vm/overcommit_memory", O_RDONLY |
357 	    O_CLOEXEC);
358 #elif defined(JEMALLOC_USE_SYSCALL) && defined(SYS_openat)
359 	fd = (int)syscall(SYS_openat,
360 	    AT_FDCWD, "/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
361 #else
362 	fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY | O_CLOEXEC);
363 #endif
364 	if (fd == -1) {
365 		return false; /* Error. */
366 	}
367 
368 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_read)
369 	nread = (ssize_t)syscall(SYS_read, fd, &buf, sizeof(buf));
370 #else
371 	nread = read(fd, &buf, sizeof(buf));
372 #endif
373 
374 #if defined(JEMALLOC_USE_SYSCALL) && defined(SYS_close)
375 	syscall(SYS_close, fd);
376 #else
377 	close(fd);
378 #endif
379 
380 	if (nread < 1) {
381 		return false; /* Error. */
382 	}
383 	/*
384 	 * /proc/sys/vm/overcommit_memory meanings:
385 	 * 0: Heuristic overcommit.
386 	 * 1: Always overcommit.
387 	 * 2: Never overcommit.
388 	 */
389 	return (buf[0] == '0' || buf[0] == '1');
390 }
391 #endif
392 
393 bool
394 pages_boot(void) {
395 	os_page = os_page_detect();
396 	if (os_page > PAGE) {
397 		malloc_write("<jemalloc>: Unsupported system page size\n");
398 		if (opt_abort) {
399 			abort();
400 		}
401 		return true;
402 	}
403 
404 #ifndef _WIN32
405 	mmap_flags = MAP_PRIVATE | MAP_ANON;
406 #endif
407 
408 #ifdef JEMALLOC_SYSCTL_VM_OVERCOMMIT
409 	os_overcommits = os_overcommits_sysctl();
410 #elif defined(JEMALLOC_PROC_SYS_VM_OVERCOMMIT_MEMORY)
411 	os_overcommits = os_overcommits_proc();
412 #  ifdef MAP_NORESERVE
413 	if (os_overcommits) {
414 		mmap_flags |= MAP_NORESERVE;
415 	}
416 #  endif
417 #else
418 	os_overcommits = false;
419 #endif
420 
421 	return false;
422 }
423