197d06da6SDmitry Chagin /*- 297d06da6SDmitry Chagin * Copyright (c) 2004 Tim J. Robbins 397d06da6SDmitry Chagin * Copyright (c) 2002 Doug Rabson 497d06da6SDmitry Chagin * Copyright (c) 2000 Marcel Moolenaar 597d06da6SDmitry Chagin * Copyright (c) 1994-1995 Søren Schmidt 697d06da6SDmitry Chagin * All rights reserved. 797d06da6SDmitry Chagin * 897d06da6SDmitry Chagin * Redistribution and use in source and binary forms, with or without 997d06da6SDmitry Chagin * modification, are permitted provided that the following conditions 1097d06da6SDmitry Chagin * are met: 1197d06da6SDmitry Chagin * 1. Redistributions of source code must retain the above copyright 1297d06da6SDmitry Chagin * notice, this list of conditions and the following disclaimer 1397d06da6SDmitry Chagin * in this position and unchanged. 1497d06da6SDmitry Chagin * 2. Redistributions in binary form must reproduce the above copyright 1597d06da6SDmitry Chagin * notice, this list of conditions and the following disclaimer in the 1697d06da6SDmitry Chagin * documentation and/or other materials provided with the distribution. 1797d06da6SDmitry Chagin * 3. The name of the author may not be used to endorse or promote products 1897d06da6SDmitry Chagin * derived from this software without specific prior written permission. 1997d06da6SDmitry Chagin * 2097d06da6SDmitry Chagin * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 2197d06da6SDmitry Chagin * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 2297d06da6SDmitry Chagin * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 2397d06da6SDmitry Chagin * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 2497d06da6SDmitry Chagin * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 2597d06da6SDmitry Chagin * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 2697d06da6SDmitry Chagin * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 2797d06da6SDmitry Chagin * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 2897d06da6SDmitry Chagin * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 2997d06da6SDmitry Chagin * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 3097d06da6SDmitry Chagin * 3197d06da6SDmitry Chagin * $FreeBSD$ 3297d06da6SDmitry Chagin */ 3397d06da6SDmitry Chagin 3497d06da6SDmitry Chagin #include <sys/cdefs.h> 3597d06da6SDmitry Chagin __FBSDID("$FreeBSD$"); 3697d06da6SDmitry Chagin 3797d06da6SDmitry Chagin #include <sys/capsicum.h> 3897d06da6SDmitry Chagin #include <sys/file.h> 3997d06da6SDmitry Chagin #include <sys/imgact.h> 4097d06da6SDmitry Chagin #include <sys/ktr.h> 4197d06da6SDmitry Chagin #include <sys/mman.h> 4297d06da6SDmitry Chagin #include <sys/proc.h> 4397d06da6SDmitry Chagin #include <sys/resourcevar.h> 44496ab053SKonstantin Belousov #include <sys/syscallsubr.h> 4597d06da6SDmitry Chagin #include <sys/sysent.h> 4697d06da6SDmitry Chagin #include <sys/sysproto.h> 4797d06da6SDmitry Chagin 4897d06da6SDmitry Chagin #include <vm/pmap.h> 4969cdfcefSEdward Tomasz Napierala #include <vm/vm_extern.h> 5097d06da6SDmitry Chagin #include <vm/vm_map.h> 5197d06da6SDmitry Chagin 5297d06da6SDmitry Chagin #include <compat/linux/linux_emul.h> 5397d06da6SDmitry Chagin #include <compat/linux/linux_mmap.h> 5497d06da6SDmitry Chagin #include <compat/linux/linux_persona.h> 5597d06da6SDmitry Chagin #include <compat/linux/linux_util.h> 5697d06da6SDmitry Chagin 5797d06da6SDmitry Chagin 5897d06da6SDmitry Chagin #define STACK_SIZE (2 * 1024 * 1024) 5997d06da6SDmitry Chagin #define GUARD_SIZE (4 * PAGE_SIZE) 6097d06da6SDmitry Chagin 6197d06da6SDmitry Chagin #if defined(__amd64__) 6297d06da6SDmitry Chagin static void linux_fixup_prot(struct thread *td, int *prot); 6397d06da6SDmitry Chagin #endif 6497d06da6SDmitry Chagin 6597d06da6SDmitry Chagin 6697d06da6SDmitry Chagin int 6797d06da6SDmitry Chagin linux_mmap_common(struct thread *td, uintptr_t addr, size_t len, int prot, 6897d06da6SDmitry Chagin int flags, int fd, off_t pos) 6997d06da6SDmitry Chagin { 7097d06da6SDmitry Chagin struct proc *p = td->td_proc; 7197d06da6SDmitry Chagin struct vmspace *vms = td->td_proc->p_vmspace; 7269cdfcefSEdward Tomasz Napierala int bsd_flags, error; 7397d06da6SDmitry Chagin struct file *fp; 7497d06da6SDmitry Chagin 7597d06da6SDmitry Chagin LINUX_CTR6(mmap2, "0x%lx, %ld, %ld, 0x%08lx, %ld, 0x%lx", 7697d06da6SDmitry Chagin addr, len, prot, flags, fd, pos); 7797d06da6SDmitry Chagin 7897d06da6SDmitry Chagin error = 0; 7969cdfcefSEdward Tomasz Napierala bsd_flags = 0; 8097d06da6SDmitry Chagin fp = NULL; 8197d06da6SDmitry Chagin 8297d06da6SDmitry Chagin /* 8397d06da6SDmitry Chagin * Linux mmap(2): 8497d06da6SDmitry Chagin * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 8597d06da6SDmitry Chagin */ 8697d06da6SDmitry Chagin if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 8797d06da6SDmitry Chagin return (EINVAL); 8897d06da6SDmitry Chagin 8997d06da6SDmitry Chagin if (flags & LINUX_MAP_SHARED) 9069cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_SHARED; 9197d06da6SDmitry Chagin if (flags & LINUX_MAP_PRIVATE) 9269cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_PRIVATE; 9397d06da6SDmitry Chagin if (flags & LINUX_MAP_FIXED) 9469cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_FIXED; 9597d06da6SDmitry Chagin if (flags & LINUX_MAP_ANON) { 9697d06da6SDmitry Chagin /* Enforce pos to be on page boundary, then ignore. */ 9797d06da6SDmitry Chagin if ((pos & PAGE_MASK) != 0) 9897d06da6SDmitry Chagin return (EINVAL); 9997d06da6SDmitry Chagin pos = 0; 10069cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_ANON; 10197d06da6SDmitry Chagin } else 10269cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_NOSYNC; 10397d06da6SDmitry Chagin if (flags & LINUX_MAP_GROWSDOWN) 10469cdfcefSEdward Tomasz Napierala bsd_flags |= MAP_STACK; 10597d06da6SDmitry Chagin 10697d06da6SDmitry Chagin /* 10797d06da6SDmitry Chagin * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 10897d06da6SDmitry Chagin * on Linux/i386 if the binary requires executable stack. 10997d06da6SDmitry Chagin * We do this only for IA32 emulation as on native i386 this is does not 11097d06da6SDmitry Chagin * make sense without PAE. 11197d06da6SDmitry Chagin * 11297d06da6SDmitry Chagin * XXX. Linux checks that the file system is not mounted with noexec. 11397d06da6SDmitry Chagin */ 11497d06da6SDmitry Chagin #if defined(__amd64__) 11569cdfcefSEdward Tomasz Napierala linux_fixup_prot(td, &prot); 11697d06da6SDmitry Chagin #endif 11797d06da6SDmitry Chagin 11897d06da6SDmitry Chagin /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 11969cdfcefSEdward Tomasz Napierala fd = (bsd_flags & MAP_ANON) ? -1 : fd; 12069cdfcefSEdward Tomasz Napierala if (fd != -1) { 12197d06da6SDmitry Chagin /* 12297d06da6SDmitry Chagin * Linux follows Solaris mmap(2) description: 12397d06da6SDmitry Chagin * The file descriptor fildes is opened with 12497d06da6SDmitry Chagin * read permission, regardless of the 12597d06da6SDmitry Chagin * protection options specified. 12697d06da6SDmitry Chagin */ 12797d06da6SDmitry Chagin 128cbd92ce6SMatt Macy error = fget(td, fd, &cap_mmap_rights, &fp); 12997d06da6SDmitry Chagin if (error != 0) 13097d06da6SDmitry Chagin return (error); 13107c757ecSHans Petter Selasky if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_DEV) { 13297d06da6SDmitry Chagin fdrop(fp, td); 13397d06da6SDmitry Chagin return (EINVAL); 13497d06da6SDmitry Chagin } 13597d06da6SDmitry Chagin 13697d06da6SDmitry Chagin /* Linux mmap() just fails for O_WRONLY files */ 13797d06da6SDmitry Chagin if (!(fp->f_flag & FREAD)) { 13897d06da6SDmitry Chagin fdrop(fp, td); 13997d06da6SDmitry Chagin return (EACCES); 14097d06da6SDmitry Chagin } 14197d06da6SDmitry Chagin 14297d06da6SDmitry Chagin fdrop(fp, td); 14397d06da6SDmitry Chagin } 14497d06da6SDmitry Chagin 14597d06da6SDmitry Chagin if (flags & LINUX_MAP_GROWSDOWN) { 14697d06da6SDmitry Chagin /* 14797d06da6SDmitry Chagin * The Linux MAP_GROWSDOWN option does not limit auto 14897d06da6SDmitry Chagin * growth of the region. Linux mmap with this option 14997d06da6SDmitry Chagin * takes as addr the initial BOS, and as len, the initial 15097d06da6SDmitry Chagin * region size. It can then grow down from addr without 15197d06da6SDmitry Chagin * limit. However, Linux threads has an implicit internal 15297d06da6SDmitry Chagin * limit to stack size of STACK_SIZE. Its just not 15397d06da6SDmitry Chagin * enforced explicitly in Linux. But, here we impose 15497d06da6SDmitry Chagin * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 15597d06da6SDmitry Chagin * region, since we can do this with our mmap. 15697d06da6SDmitry Chagin * 15797d06da6SDmitry Chagin * Our mmap with MAP_STACK takes addr as the maximum 15897d06da6SDmitry Chagin * downsize limit on BOS, and as len the max size of 15997d06da6SDmitry Chagin * the region. It then maps the top SGROWSIZ bytes, 16097d06da6SDmitry Chagin * and auto grows the region down, up to the limit 16197d06da6SDmitry Chagin * in addr. 16297d06da6SDmitry Chagin * 16397d06da6SDmitry Chagin * If we don't use the MAP_STACK option, the effect 16497d06da6SDmitry Chagin * of this code is to allocate a stack region of a 16597d06da6SDmitry Chagin * fixed size of (STACK_SIZE - GUARD_SIZE). 16697d06da6SDmitry Chagin */ 16797d06da6SDmitry Chagin 16897d06da6SDmitry Chagin if ((caddr_t)addr + len > vms->vm_maxsaddr) { 16997d06da6SDmitry Chagin /* 17097d06da6SDmitry Chagin * Some Linux apps will attempt to mmap 17197d06da6SDmitry Chagin * thread stacks near the top of their 17297d06da6SDmitry Chagin * address space. If their TOS is greater 17397d06da6SDmitry Chagin * than vm_maxsaddr, vm_map_growstack() 17497d06da6SDmitry Chagin * will confuse the thread stack with the 17597d06da6SDmitry Chagin * process stack and deliver a SEGV if they 17697d06da6SDmitry Chagin * attempt to grow the thread stack past their 17797d06da6SDmitry Chagin * current stacksize rlimit. To avoid this, 17897d06da6SDmitry Chagin * adjust vm_maxsaddr upwards to reflect 17997d06da6SDmitry Chagin * the current stacksize rlimit rather 18097d06da6SDmitry Chagin * than the maximum possible stacksize. 18197d06da6SDmitry Chagin * It would be better to adjust the 18297d06da6SDmitry Chagin * mmap'ed region, but some apps do not check 18397d06da6SDmitry Chagin * mmap's return value. 18497d06da6SDmitry Chagin */ 18597d06da6SDmitry Chagin PROC_LOCK(p); 18697d06da6SDmitry Chagin vms->vm_maxsaddr = (char *)p->p_sysent->sv_usrstack - 18797d06da6SDmitry Chagin lim_cur_proc(p, RLIMIT_STACK); 18897d06da6SDmitry Chagin PROC_UNLOCK(p); 18997d06da6SDmitry Chagin } 19097d06da6SDmitry Chagin 19197d06da6SDmitry Chagin /* 19297d06da6SDmitry Chagin * This gives us our maximum stack size and a new BOS. 19397d06da6SDmitry Chagin * If we're using VM_STACK, then mmap will just map 19497d06da6SDmitry Chagin * the top SGROWSIZ bytes, and let the stack grow down 19597d06da6SDmitry Chagin * to the limit at BOS. If we're not using VM_STACK 19697d06da6SDmitry Chagin * we map the full stack, since we don't have a way 19797d06da6SDmitry Chagin * to autogrow it. 19897d06da6SDmitry Chagin */ 19969cdfcefSEdward Tomasz Napierala if (len <= STACK_SIZE - GUARD_SIZE) { 20069cdfcefSEdward Tomasz Napierala addr = addr - (STACK_SIZE - GUARD_SIZE - len); 20169cdfcefSEdward Tomasz Napierala len = STACK_SIZE - GUARD_SIZE; 20297d06da6SDmitry Chagin } 20397d06da6SDmitry Chagin } 20497d06da6SDmitry Chagin 205c6d57d30SEdward Tomasz Napierala /* 206c6d57d30SEdward Tomasz Napierala * FreeBSD is free to ignore the address hint if MAP_FIXED wasn't 207c6d57d30SEdward Tomasz Napierala * passed. However, some Linux applications, like the ART runtime, 208c6d57d30SEdward Tomasz Napierala * depend on the hint. If the MAP_FIXED wasn't passed, but the 209c6d57d30SEdward Tomasz Napierala * address is not zero, try with MAP_FIXED and MAP_EXCL first, 210c6d57d30SEdward Tomasz Napierala * and fall back to the normal behaviour if that fails. 211c6d57d30SEdward Tomasz Napierala */ 212c6d57d30SEdward Tomasz Napierala if (addr != 0 && (bsd_flags & MAP_FIXED) == 0 && 213c6d57d30SEdward Tomasz Napierala (bsd_flags & MAP_EXCL) == 0) { 214c6d57d30SEdward Tomasz Napierala error = kern_mmap(td, addr, len, prot, 215c6d57d30SEdward Tomasz Napierala bsd_flags | MAP_FIXED | MAP_EXCL, fd, pos); 216c6d57d30SEdward Tomasz Napierala if (error == 0) 217c6d57d30SEdward Tomasz Napierala goto out; 218c6d57d30SEdward Tomasz Napierala } 21997d06da6SDmitry Chagin 220c6d57d30SEdward Tomasz Napierala error = kern_mmap(td, addr, len, prot, bsd_flags, fd, pos); 221c6d57d30SEdward Tomasz Napierala out: 22297d06da6SDmitry Chagin LINUX_CTR2(mmap2, "return: %d (%p)", error, td->td_retval[0]); 22397d06da6SDmitry Chagin 22497d06da6SDmitry Chagin return (error); 22597d06da6SDmitry Chagin } 22697d06da6SDmitry Chagin 22797d06da6SDmitry Chagin int 22897d06da6SDmitry Chagin linux_mprotect_common(struct thread *td, uintptr_t addr, size_t len, int prot) 22997d06da6SDmitry Chagin { 23097d06da6SDmitry Chagin 231*e2fba140STijl Coosemans /* XXX Ignore PROT_GROWSDOWN and PROT_GROWSUP for now. */ 232*e2fba140STijl Coosemans prot &= ~(LINUX_PROT_GROWSDOWN | LINUX_PROT_GROWSUP); 233*e2fba140STijl Coosemans if ((prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC)) != 0) 234*e2fba140STijl Coosemans return (EINVAL); 235*e2fba140STijl Coosemans 23697d06da6SDmitry Chagin #if defined(__amd64__) 23769cdfcefSEdward Tomasz Napierala linux_fixup_prot(td, &prot); 23897d06da6SDmitry Chagin #endif 239496ab053SKonstantin Belousov return (kern_mprotect(td, addr, len, prot)); 24097d06da6SDmitry Chagin } 24197d06da6SDmitry Chagin 24297d06da6SDmitry Chagin #if defined(__amd64__) 24397d06da6SDmitry Chagin static void 24497d06da6SDmitry Chagin linux_fixup_prot(struct thread *td, int *prot) 24597d06da6SDmitry Chagin { 24697d06da6SDmitry Chagin struct linux_pemuldata *pem; 24797d06da6SDmitry Chagin 24897d06da6SDmitry Chagin if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && *prot & PROT_READ) { 24997d06da6SDmitry Chagin pem = pem_find(td->td_proc); 25097d06da6SDmitry Chagin if (pem->persona & LINUX_READ_IMPLIES_EXEC) 25197d06da6SDmitry Chagin *prot |= PROT_EXEC; 25297d06da6SDmitry Chagin } 25397d06da6SDmitry Chagin 25497d06da6SDmitry Chagin } 25597d06da6SDmitry Chagin #endif 256