197d06da6SDmitry Chagin /*- 297d06da6SDmitry Chagin * Copyright (c) 2004 Tim J. Robbins 397d06da6SDmitry Chagin * Copyright (c) 2002 Doug Rabson 497d06da6SDmitry Chagin * Copyright (c) 2000 Marcel Moolenaar 597d06da6SDmitry Chagin * Copyright (c) 1994-1995 Søren Schmidt 697d06da6SDmitry Chagin * All rights reserved. 797d06da6SDmitry Chagin * 897d06da6SDmitry Chagin * Redistribution and use in source and binary forms, with or without 997d06da6SDmitry Chagin * modification, are permitted provided that the following conditions 1097d06da6SDmitry Chagin * are met: 1197d06da6SDmitry Chagin * 1. Redistributions of source code must retain the above copyright 1297d06da6SDmitry Chagin * notice, this list of conditions and the following disclaimer 1397d06da6SDmitry Chagin * in this position and unchanged. 1497d06da6SDmitry Chagin * 2. Redistributions in binary form must reproduce the above copyright 1597d06da6SDmitry Chagin * notice, this list of conditions and the following disclaimer in the 1697d06da6SDmitry Chagin * documentation and/or other materials provided with the distribution. 1797d06da6SDmitry Chagin * 3. The name of the author may not be used to endorse or promote products 1897d06da6SDmitry Chagin * derived from this software without specific prior written permission. 1997d06da6SDmitry Chagin * 2097d06da6SDmitry Chagin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 2197d06da6SDmitry Chagin * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 2297d06da6SDmitry Chagin * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2397d06da6SDmitry Chagin * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2497d06da6SDmitry Chagin * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2597d06da6SDmitry Chagin * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2697d06da6SDmitry Chagin * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2797d06da6SDmitry Chagin * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2897d06da6SDmitry Chagin * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2997d06da6SDmitry Chagin * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3097d06da6SDmitry Chagin */ 3197d06da6SDmitry Chagin 32d8e53d94SDmitry Chagin #include <sys/fcntl.h> 3397d06da6SDmitry Chagin #include <sys/file.h> 3497d06da6SDmitry Chagin #include <sys/ktr.h> 35f4134e3dSMark Johnston #include <sys/lock.h> 36*6bb132baSBrooks Davis #include <sys/malloc.h> 3797d06da6SDmitry Chagin #include <sys/mman.h> 3897d06da6SDmitry Chagin #include <sys/proc.h> 3997d06da6SDmitry Chagin #include <sys/resourcevar.h> 40f4134e3dSMark Johnston #include <sys/rwlock.h> 41496ab053SKonstantin Belousov #include <sys/syscallsubr.h> 4297d06da6SDmitry Chagin #include <sys/sysent.h> 4397d06da6SDmitry Chagin #include <sys/sysproto.h> 4497d06da6SDmitry Chagin 4597d06da6SDmitry Chagin #include <vm/pmap.h> 4669cdfcefSEdward Tomasz Napierala #include <vm/vm_extern.h> 4797d06da6SDmitry Chagin #include <vm/vm_map.h> 48f4134e3dSMark Johnston #include <vm/vm_object.h> 4997d06da6SDmitry Chagin 5097d06da6SDmitry Chagin #include <compat/linux/linux_emul.h> 5197d06da6SDmitry Chagin #include <compat/linux/linux_mmap.h> 5297d06da6SDmitry Chagin #include <compat/linux/linux_persona.h> 5397d06da6SDmitry Chagin #include <compat/linux/linux_util.h> 5497d06da6SDmitry Chagin 5597d06da6SDmitry Chagin #define STACK_SIZE (2 * 1024 * 1024) 5697d06da6SDmitry Chagin #define GUARD_SIZE (4 * PAGE_SIZE) 5797d06da6SDmitry Chagin 5897d06da6SDmitry Chagin #if defined(__amd64__) 5997d06da6SDmitry Chagin static void linux_fixup_prot(struct thread *td, int *prot); 6097d06da6SDmitry Chagin #endif 6197d06da6SDmitry Chagin 6218348a23SKyle Evans static int 6318348a23SKyle Evans linux_mmap_check_fp(struct file *fp, int flags, int prot, int maxprot) 6418348a23SKyle Evans { 6518348a23SKyle Evans 6618348a23SKyle Evans /* Linux mmap() just fails for O_WRONLY files */ 6718348a23SKyle Evans if ((fp->f_flag & FREAD) == 0) 6818348a23SKyle Evans return (EACCES); 6918348a23SKyle Evans 7018348a23SKyle Evans return (0); 7118348a23SKyle Evans } 7297d06da6SDmitry Chagin 7397d06da6SDmitry Chagin int 7497d06da6SDmitry Chagin linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot, 7597d06da6SDmitry Chagin int flags, int fd, off_t pos) 7697d06da6SDmitry Chagin { 77d718de81SBrooks Davis struct mmap_req mr, mr_fixed; 7897d06da6SDmitry Chagin struct proc *p = td->td_proc; 7997d06da6SDmitry Chagin struct vmspace *vms = td->td_proc->p_vmspace; 8069cdfcefSEdward Tomasz Napierala int bsd_flags, error; 8197d06da6SDmitry Chagin 8297d06da6SDmitry Chagin LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx", 8397d06da6SDmitry Chagin addr, len, prot, flags, fd, pos); 8497d06da6SDmitry Chagin 8597d06da6SDmitry Chagin error = 0; 8669cdfcefSEdward Tomasz Napierala bsd_flags = 0; 8797d06da6SDmitry Chagin 8897d06da6SDmitry Chagin /* 8997d06da6SDmitry Chagin * Linux mmap(2): 9097d06da6SDmitry Chagin * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 9197d06da6SDmitry Chagin */ 9297d06da6SDmitry Chagin if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 9397d06da6SDmitry Chagin return (EINVAL); 9497d06da6SDmitry Chagin 9597d06da6SDmitry Chagin if (flags & LINUX_MAP_SHARED) 9669cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_SHARED; 9797d06da6SDmitry Chagin if (flags & LINUX_MAP_PRIVATE) 9869cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_PRIVATE; 9997d06da6SDmitry Chagin if (flags & LINUX_MAP_FIXED) 10069cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_FIXED; 10197d06da6SDmitry Chagin if (flags & LINUX_MAP_ANON) { 10297d06da6SDmitry Chagin /* Enforce pos to be on page boundary, then ignore. */ 10397d06da6SDmitry Chagin if ((pos & PAGE_MASK) != 0) 10497d06da6SDmitry Chagin return (EINVAL); 10597d06da6SDmitry Chagin pos = 0; 10669cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_ANON; 10797d06da6SDmitry Chagin } else 10869cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_NOSYNC; 10997d06da6SDmitry Chagin if (flags & LINUX_MAP_GROWSDOWN) 11069cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_STACK; 11197d06da6SDmitry Chagin 112618b55c2SEdward Tomasz Napierala #if defined(__amd64__) 113618b55c2SEdward Tomasz Napierala /* 114618b55c2SEdward Tomasz Napierala * According to the Linux mmap(2) man page, "MAP_32BIT flag 115618b55c2SEdward Tomasz Napierala * is ignored when MAP_FIXED is set." 116618b55c2SEdward Tomasz Napierala */ 117618b55c2SEdward Tomasz Napierala if ((flags & LINUX_MAP_32BIT) && (flags & LINUX_MAP_FIXED) == 0) 118618b55c2SEdward Tomasz Napierala bsd_flags |= MAP_32BIT; 119618b55c2SEdward Tomasz Napierala 12097d06da6SDmitry Chagin /* 12197d06da6SDmitry Chagin * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 12297d06da6SDmitry Chagin * on Linux/i386 if the binary requires executable stack. 12397d06da6SDmitry Chagin * We do this only for IA32 emulation as on native i386 this is does not 12497d06da6SDmitry Chagin * make sense without PAE. 12597d06da6SDmitry Chagin * 12697d06da6SDmitry Chagin * XXX. Linux checks that the file system is not mounted with noexec. 12797d06da6SDmitry Chagin */ 12869cdfcefSEdward Tomasz Napierala linux_fixup_prot(td, &prot); 12997d06da6SDmitry Chagin #endif 13097d06da6SDmitry Chagin 13197d06da6SDmitry Chagin /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 13269cdfcefSEdward Tomasz Napierala fd = (bsd_flags & MAP_ANON) ? -1 : fd; 13397d06da6SDmitry Chagin if (flags & LINUX_MAP_GROWSDOWN) { 13497d06da6SDmitry Chagin /* 13597d06da6SDmitry Chagin * The Linux MAP_GROWSDOWN option does not limit auto 13697d06da6SDmitry Chagin * growth of the region. Linux mmap with this option 13797d06da6SDmitry Chagin * takes as addr the initial BOS, and as len, the initial 13897d06da6SDmitry Chagin * region size. It can then grow down from addr without 13997d06da6SDmitry Chagin * limit. However, Linux threads has an implicit internal 14097d06da6SDmitry Chagin * limit to stack size of STACK_SIZE. Its just not 14197d06da6SDmitry Chagin * enforced explicitly in Linux. But, here we impose 14297d06da6SDmitry Chagin * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 14397d06da6SDmitry Chagin * region, since we can do this with our mmap. 14497d06da6SDmitry Chagin * 14597d06da6SDmitry Chagin * Our mmap with MAP_STACK takes addr as the maximum 14697d06da6SDmitry Chagin * downsize limit on BOS, and as len the max size of 14797d06da6SDmitry Chagin * the region. It then maps the top SGROWSIZ bytes, 14897d06da6SDmitry Chagin * and auto grows the region down, up to the limit 14997d06da6SDmitry Chagin * in addr. 15097d06da6SDmitry Chagin * 15197d06da6SDmitry Chagin * If we don't use the MAP_STACK option, the effect 15297d06da6SDmitry Chagin * of this code is to allocate a stack region of a 15397d06da6SDmitry Chagin * fixed size of (STACK_SIZE - GUARD_SIZE). 15497d06da6SDmitry Chagin */ 15597d06da6SDmitry Chagin 15697d06da6SDmitry Chagin if ((caddr_t)addr + len > vms->vm_maxsaddr) { 15797d06da6SDmitry Chagin /* 15897d06da6SDmitry Chagin * Some Linux apps will attempt to mmap 15997d06da6SDmitry Chagin * thread stacks near the top of their 16097d06da6SDmitry Chagin * address space. If their TOS is greater 16197d06da6SDmitry Chagin * than vm_maxsaddr, vm_map_growstack() 16297d06da6SDmitry Chagin * will confuse the thread stack with the 16397d06da6SDmitry Chagin * process stack and deliver a SEGV if they 16497d06da6SDmitry Chagin * attempt to grow the thread stack past their 16597d06da6SDmitry Chagin * current stacksize rlimit. To avoid this, 16697d06da6SDmitry Chagin * adjust vm_maxsaddr upwards to reflect 16797d06da6SDmitry Chagin * the current stacksize rlimit rather 16897d06da6SDmitry Chagin * than the maximum possible stacksize. 16997d06da6SDmitry Chagin * It would be better to adjust the 17097d06da6SDmitry Chagin * mmap'ed region, but some apps do not check 17197d06da6SDmitry Chagin * mmap's return value. 17297d06da6SDmitry Chagin */ 17397d06da6SDmitry Chagin PROC_LOCK(p); 174becaf643SJohn Baldwin vms->vm_maxsaddr = (char *)round_page(vms->vm_stacktop) - 17597d06da6SDmitry Chagin lim_cur_proc(p, RLIMIT_STACK); 17697d06da6SDmitry Chagin PROC_UNLOCK(p); 17797d06da6SDmitry Chagin } 17897d06da6SDmitry Chagin 17997d06da6SDmitry Chagin /* 18097d06da6SDmitry Chagin * This gives us our maximum stack size and a new BOS. 18197d06da6SDmitry Chagin * If we're using VM_STACK, then mmap will just map 18297d06da6SDmitry Chagin * the top SGROWSIZ bytes, and let the stack grow down 18397d06da6SDmitry Chagin * to the limit at BOS. If we're not using VM_STACK 18497d06da6SDmitry Chagin * we map the full stack, since we don't have a way 18597d06da6SDmitry Chagin * to autogrow it. 18697d06da6SDmitry Chagin */ 18769cdfcefSEdward Tomasz Napierala if (len <= STACK_SIZE - GUARD_SIZE) { 18869cdfcefSEdward Tomasz Napierala addr = addr - (STACK_SIZE - GUARD_SIZE - len); 18969cdfcefSEdward Tomasz Napierala len = STACK_SIZE - GUARD_SIZE; 19097d06da6SDmitry Chagin } 19197d06da6SDmitry Chagin } 19297d06da6SDmitry Chagin 193c6d57d30SEdward Tomasz Napierala /* 194c6d57d30SEdward Tomasz Napierala * FreeBSD is free to ignore the address hint if MAP_FIXED wasn't 195c6d57d30SEdward Tomasz Napierala * passed. However, some Linux applications, like the ART runtime, 196c6d57d30SEdward Tomasz Napierala * depend on the hint. If the MAP_FIXED wasn't passed, but the 197c6d57d30SEdward Tomasz Napierala * address is not zero, try with MAP_FIXED and MAP_EXCL first, 198c6d57d30SEdward Tomasz Napierala * and fall back to the normal behaviour if that fails. 199c6d57d30SEdward Tomasz Napierala */ 200d718de81SBrooks Davis mr = (struct mmap_req) { 201d718de81SBrooks Davis .mr_hint = addr, 202d718de81SBrooks Davis .mr_len = len, 203d718de81SBrooks Davis .mr_prot = prot, 204d718de81SBrooks Davis .mr_flags = bsd_flags, 205d718de81SBrooks Davis .mr_fd = fd, 206d718de81SBrooks Davis .mr_pos = pos, 207d718de81SBrooks Davis .mr_check_fp_fn = linux_mmap_check_fp, 208d718de81SBrooks Davis }; 209c6d57d30SEdward Tomasz Napierala if (addr != 0 && (bsd_flags & MAP_FIXED) == 0 && 210c6d57d30SEdward Tomasz Napierala (bsd_flags & MAP_EXCL) == 0) { 211d718de81SBrooks Davis mr_fixed = mr; 212d718de81SBrooks Davis mr_fixed.mr_flags |= MAP_FIXED | MAP_EXCL; 2137a1591c1SBrooks Davis error = kern_mmap(td, &mr_fixed); 214c6d57d30SEdward Tomasz Napierala if (error == 0) 215c6d57d30SEdward Tomasz Napierala goto out; 216c6d57d30SEdward Tomasz Napierala } 21797d06da6SDmitry Chagin 2187a1591c1SBrooks Davis error = kern_mmap(td, &mr); 219c6d57d30SEdward Tomasz Napierala out: 22097d06da6SDmitry Chagin LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]); 22197d06da6SDmitry Chagin 22297d06da6SDmitry Chagin return (error); 22397d06da6SDmitry Chagin } 22497d06da6SDmitry Chagin 22597d06da6SDmitry Chagin int 22697d06da6SDmitry Chagin linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot) 22797d06da6SDmitry Chagin { 2289b65fa69SKonstantin Belousov int flags = 0; 22997d06da6SDmitry Chagin 2309b65fa69SKonstantin Belousov /* XXX Ignore PROT_GROWSUP for now. */ 2319b65fa69SKonstantin Belousov prot &= ~LINUX_PROT_GROWSUP; 2329b65fa69SKonstantin Belousov if ((prot & ~(LINUX_PROT_GROWSDOWN | PROT_READ | PROT_WRITE | 2339b65fa69SKonstantin Belousov PROT_EXEC)) != 0) 234e2fba140STijl Coosemans return (EINVAL); 2359b65fa69SKonstantin Belousov if ((prot & LINUX_PROT_GROWSDOWN) != 0) { 2369b65fa69SKonstantin Belousov prot &= ~LINUX_PROT_GROWSDOWN; 2379b65fa69SKonstantin Belousov flags |= VM_MAP_PROTECT_GROWSDOWN; 2389b65fa69SKonstantin Belousov } 239e2fba140STijl Coosemans 24097d06da6SDmitry Chagin #if defined(__amd64__) 24169cdfcefSEdward Tomasz Napierala linux_fixup_prot(td, &prot); 24297d06da6SDmitry Chagin #endif 2439b65fa69SKonstantin Belousov return (kern_mprotect(td, addr, len, prot, flags)); 24497d06da6SDmitry Chagin } 24597d06da6SDmitry Chagin 246f4134e3dSMark Johnston /* 247f4134e3dSMark Johnston * Implement Linux madvise(MADV_DONTNEED), which has unusual semantics: for 248f4134e3dSMark Johnston * anonymous memory, pages in the range are immediately discarded. 249f4134e3dSMark Johnston */ 250f4134e3dSMark Johnston static int 251f4134e3dSMark Johnston linux_madvise_dontneed(struct thread *td, vm_offset_t start, vm_offset_t end) 252f4134e3dSMark Johnston { 253f4134e3dSMark Johnston vm_map_t map; 254f4134e3dSMark Johnston vm_map_entry_t entry; 255f4134e3dSMark Johnston vm_object_t backing_object, object; 256f4134e3dSMark Johnston vm_offset_t estart, eend; 257f4134e3dSMark Johnston vm_pindex_t pstart, pend; 258f4134e3dSMark Johnston int error; 259f4134e3dSMark Johnston 260f4134e3dSMark Johnston map = &td->td_proc->p_vmspace->vm_map; 261f4134e3dSMark Johnston 262f4134e3dSMark Johnston if (!vm_map_range_valid(map, start, end)) 263f4134e3dSMark Johnston return (EINVAL); 264f4134e3dSMark Johnston start = trunc_page(start); 265f4134e3dSMark Johnston end = round_page(end); 266f4134e3dSMark Johnston 267f4134e3dSMark Johnston error = 0; 268f4134e3dSMark Johnston vm_map_lock_read(map); 269f4134e3dSMark Johnston if (!vm_map_lookup_entry(map, start, &entry)) 270f4134e3dSMark Johnston entry = vm_map_entry_succ(entry); 271f4134e3dSMark Johnston for (; entry->start < end; entry = vm_map_entry_succ(entry)) { 272f4134e3dSMark Johnston if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) != 0) 273f4134e3dSMark Johnston continue; 274f4134e3dSMark Johnston 275f4134e3dSMark Johnston if (entry->wired_count != 0) { 276f4134e3dSMark Johnston error = EINVAL; 277f4134e3dSMark Johnston break; 278f4134e3dSMark Johnston } 279f4134e3dSMark Johnston 280f4134e3dSMark Johnston object = entry->object.vm_object; 281f4134e3dSMark Johnston if (object == NULL) 282f4134e3dSMark Johnston continue; 28330dcce27SMark Johnston if ((object->flags & (OBJ_UNMANAGED | OBJ_FICTITIOUS)) != 0) 28430dcce27SMark Johnston continue; 285f4134e3dSMark Johnston 286f4134e3dSMark Johnston pstart = OFF_TO_IDX(entry->offset); 287f4134e3dSMark Johnston if (start > entry->start) { 288f4134e3dSMark Johnston pstart += atop(start - entry->start); 289f4134e3dSMark Johnston estart = start; 290f4134e3dSMark Johnston } else { 291f4134e3dSMark Johnston estart = entry->start; 292f4134e3dSMark Johnston } 293f4134e3dSMark Johnston pend = OFF_TO_IDX(entry->offset) + 294f4134e3dSMark Johnston atop(entry->end - entry->start); 295f4134e3dSMark Johnston if (entry->end > end) { 296f4134e3dSMark Johnston pend -= atop(entry->end - end); 297f4134e3dSMark Johnston eend = end; 298f4134e3dSMark Johnston } else { 299f4134e3dSMark Johnston eend = entry->end; 300f4134e3dSMark Johnston } 301f4134e3dSMark Johnston 302f4134e3dSMark Johnston if ((object->flags & (OBJ_ANON | OBJ_ONEMAPPING)) == 303f4134e3dSMark Johnston (OBJ_ANON | OBJ_ONEMAPPING)) { 304f4134e3dSMark Johnston /* 305f4134e3dSMark Johnston * Singly-mapped anonymous memory is discarded. This 306f4134e3dSMark Johnston * does not match Linux's semantics when the object 307f4134e3dSMark Johnston * belongs to a shadow chain of length > 1, since 308f4134e3dSMark Johnston * subsequent faults may retrieve pages from an 309f4134e3dSMark Johnston * intermediate anonymous object. However, handling 310f4134e3dSMark Johnston * this case correctly introduces a fair bit of 311f4134e3dSMark Johnston * complexity. 312f4134e3dSMark Johnston */ 313f4134e3dSMark Johnston VM_OBJECT_WLOCK(object); 314f4134e3dSMark Johnston if ((object->flags & OBJ_ONEMAPPING) != 0) { 315f4134e3dSMark Johnston vm_object_collapse(object); 316f4134e3dSMark Johnston vm_object_page_remove(object, pstart, pend, 0); 317f4134e3dSMark Johnston backing_object = object->backing_object; 318f4134e3dSMark Johnston if (backing_object != NULL && 319f4134e3dSMark Johnston (backing_object->flags & OBJ_ANON) != 0) 320f4134e3dSMark Johnston linux_msg(td, 321f4134e3dSMark Johnston "possibly incorrect MADV_DONTNEED"); 322f4134e3dSMark Johnston VM_OBJECT_WUNLOCK(object); 323f4134e3dSMark Johnston continue; 324f4134e3dSMark Johnston } 325f4134e3dSMark Johnston VM_OBJECT_WUNLOCK(object); 326f4134e3dSMark Johnston } 327f4134e3dSMark Johnston 328f4134e3dSMark Johnston /* 329f4134e3dSMark Johnston * Handle shared mappings. Remove them outright instead of 330f4134e3dSMark Johnston * calling pmap_advise(), for consistency with Linux. 331f4134e3dSMark Johnston */ 332f4134e3dSMark Johnston pmap_remove(map->pmap, estart, eend); 333f4134e3dSMark Johnston vm_object_madvise(object, pstart, pend, MADV_DONTNEED); 334f4134e3dSMark Johnston } 335f4134e3dSMark Johnston vm_map_unlock_read(map); 336f4134e3dSMark Johnston 337f4134e3dSMark Johnston return (error); 338f4134e3dSMark Johnston } 339f4134e3dSMark Johnston 34052c81be1SEdward Tomasz Napierala int 34152c81be1SEdward Tomasz Napierala linux_madvise_common(struct thread *td, uintptr_t addr, size_t len, int behav) 34252c81be1SEdward Tomasz Napierala { 34352c81be1SEdward Tomasz Napierala 34452c81be1SEdward Tomasz Napierala switch (behav) { 34552c81be1SEdward Tomasz Napierala case LINUX_MADV_NORMAL: 34652c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_NORMAL)); 34752c81be1SEdward Tomasz Napierala case LINUX_MADV_RANDOM: 34852c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_RANDOM)); 34952c81be1SEdward Tomasz Napierala case LINUX_MADV_SEQUENTIAL: 35052c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_SEQUENTIAL)); 35152c81be1SEdward Tomasz Napierala case LINUX_MADV_WILLNEED: 35252c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_WILLNEED)); 35352c81be1SEdward Tomasz Napierala case LINUX_MADV_DONTNEED: 354f4134e3dSMark Johnston return (linux_madvise_dontneed(td, addr, addr + len)); 35552c81be1SEdward Tomasz Napierala case LINUX_MADV_FREE: 35652c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_FREE)); 35752c81be1SEdward Tomasz Napierala case LINUX_MADV_REMOVE: 35852c81be1SEdward Tomasz Napierala linux_msg(curthread, "unsupported madvise MADV_REMOVE"); 35952c81be1SEdward Tomasz Napierala return (EINVAL); 36052c81be1SEdward Tomasz Napierala case LINUX_MADV_DONTFORK: 36152c81be1SEdward Tomasz Napierala return (kern_minherit(td, addr, len, INHERIT_NONE)); 36252c81be1SEdward Tomasz Napierala case LINUX_MADV_DOFORK: 36352c81be1SEdward Tomasz Napierala return (kern_minherit(td, addr, len, INHERIT_COPY)); 36452c81be1SEdward Tomasz Napierala case LINUX_MADV_MERGEABLE: 36552c81be1SEdward Tomasz Napierala linux_msg(curthread, "unsupported madvise MADV_MERGEABLE"); 36652c81be1SEdward Tomasz Napierala return (EINVAL); 36752c81be1SEdward Tomasz Napierala case LINUX_MADV_UNMERGEABLE: 36852c81be1SEdward Tomasz Napierala /* We don't merge anyway. */ 36952c81be1SEdward Tomasz Napierala return (0); 37052c81be1SEdward Tomasz Napierala case LINUX_MADV_HUGEPAGE: 37152c81be1SEdward Tomasz Napierala /* Ignored; on FreeBSD huge pages are always on. */ 37252c81be1SEdward Tomasz Napierala return (0); 37352c81be1SEdward Tomasz Napierala case LINUX_MADV_NOHUGEPAGE: 37409c4e43dSEdward Tomasz Napierala #if 0 37509c4e43dSEdward Tomasz Napierala /* 37609c4e43dSEdward Tomasz Napierala * Don't warn - Firefox uses it a lot, and in real Linux it's 37709c4e43dSEdward Tomasz Napierala * an optional feature. 37809c4e43dSEdward Tomasz Napierala */ 37952c81be1SEdward Tomasz Napierala linux_msg(curthread, "unsupported madvise MADV_NOHUGEPAGE"); 38009c4e43dSEdward Tomasz Napierala #endif 38152c81be1SEdward Tomasz Napierala return (EINVAL); 38252c81be1SEdward Tomasz Napierala case LINUX_MADV_DONTDUMP: 38352c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_NOCORE)); 38452c81be1SEdward Tomasz Napierala case LINUX_MADV_DODUMP: 38552c81be1SEdward Tomasz Napierala return (kern_madvise(td, addr, len, MADV_CORE)); 38652c81be1SEdward Tomasz Napierala case LINUX_MADV_WIPEONFORK: 38752c81be1SEdward Tomasz Napierala return (kern_minherit(td, addr, len, INHERIT_ZERO)); 38852c81be1SEdward Tomasz Napierala case LINUX_MADV_KEEPONFORK: 38952c81be1SEdward Tomasz Napierala return (kern_minherit(td, addr, len, INHERIT_COPY)); 39052c81be1SEdward Tomasz Napierala case LINUX_MADV_HWPOISON: 39152c81be1SEdward Tomasz Napierala linux_msg(curthread, "unsupported madvise MADV_HWPOISON"); 39252c81be1SEdward Tomasz Napierala return (EINVAL); 39352c81be1SEdward Tomasz Napierala case LINUX_MADV_SOFT_OFFLINE: 39452c81be1SEdward Tomasz Napierala linux_msg(curthread, "unsupported madvise MADV_SOFT_OFFLINE"); 39552c81be1SEdward Tomasz Napierala return (EINVAL); 396ae9cafd9SConrad Meyer case -1: 397ae9cafd9SConrad Meyer /* 398ae9cafd9SConrad Meyer * -1 is sometimes used as a dummy value to detect simplistic 399ae9cafd9SConrad Meyer * madvise(2) stub implementations. This safeguard is used by 400ae9cafd9SConrad Meyer * BoringSSL, for example, before assuming MADV_WIPEONFORK is 401ae9cafd9SConrad Meyer * safe to use. Don't produce an "unsupported" error message 402ae9cafd9SConrad Meyer * for this special dummy value, which is unlikely to be used 403ae9cafd9SConrad Meyer * by any new advisory behavior feature. 404ae9cafd9SConrad Meyer */ 405ae9cafd9SConrad Meyer return (EINVAL); 40652c81be1SEdward Tomasz Napierala default: 40752c81be1SEdward Tomasz Napierala linux_msg(curthread, "unsupported madvise behav %d", behav); 40852c81be1SEdward Tomasz Napierala return (EINVAL); 40952c81be1SEdward Tomasz Napierala } 41052c81be1SEdward Tomasz Napierala } 41152c81be1SEdward Tomasz Napierala 41297d06da6SDmitry Chagin #if defined(__amd64__) 41397d06da6SDmitry Chagin static void 41497d06da6SDmitry Chagin linux_fixup_prot(struct thread *td, int *prot) 41597d06da6SDmitry Chagin { 41697d06da6SDmitry Chagin struct linux_pemuldata *pem; 41797d06da6SDmitry Chagin 41897d06da6SDmitry Chagin if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) { 41997d06da6SDmitry Chagin pem = pem_find(td->td_proc); 42097d06da6SDmitry Chagin if (pem->persona & LINUX_READ_IMPLIES_EXEC) 42197d06da6SDmitry Chagin *prot |= PROT_EXEC; 42297d06da6SDmitry Chagin } 42397d06da6SDmitry Chagin 42497d06da6SDmitry Chagin } 42597d06da6SDmitry Chagin #endif 426