1ad2056f2SAlexander Leidinger /*- 24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause 37f2d13d6SPedro F. Giffuni * 4b267239dSEd Maste * Copyright (c) 1994-1996 Søren Schmidt 5ad2056f2SAlexander Leidinger * Copyright (c) 2006 Roman Divacky 6ad2056f2SAlexander Leidinger * All rights reserved. 71ca6b15bSDmitry Chagin * Copyright (c) 2013 Dmitry Chagin <dchagin@FreeBSD.org> 8ad2056f2SAlexander Leidinger * 9ad2056f2SAlexander Leidinger * Redistribution and use in source and binary forms, with or without 10ad2056f2SAlexander Leidinger * modification, are permitted provided that the following conditions 11ad2056f2SAlexander Leidinger * are met: 12ad2056f2SAlexander Leidinger * 1. Redistributions of source code must retain the above copyright 13023b850bSEd Maste * notice, this list of conditions and the following disclaimer. 14ad2056f2SAlexander Leidinger * 2. Redistributions in binary form must reproduce the above copyright 15ad2056f2SAlexander Leidinger * notice, this list of conditions and the following disclaimer in the 16ad2056f2SAlexander Leidinger * documentation and/or other materials provided with the distribution. 17ad2056f2SAlexander Leidinger * 18023b850bSEd Maste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19023b850bSEd Maste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20023b850bSEd Maste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21023b850bSEd Maste * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22023b850bSEd Maste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23023b850bSEd Maste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24023b850bSEd Maste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25023b850bSEd Maste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26023b850bSEd Maste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27023b850bSEd Maste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28023b850bSEd Maste * SUCH DAMAGE. 29ad2056f2SAlexander Leidinger */ 30ad2056f2SAlexander Leidinger 31ad2056f2SAlexander Leidinger #include <sys/cdefs.h> 32ad2056f2SAlexander Leidinger __FBSDID("$FreeBSD$"); 33ad2056f2SAlexander Leidinger 34ad2056f2SAlexander Leidinger #include <sys/param.h> 35b267239dSEd Maste #include <sys/fcntl.h> 36ad2056f2SAlexander Leidinger #include <sys/imgact.h> 3781338031SDmitry Chagin #include <sys/ktr.h> 38ad2056f2SAlexander Leidinger #include <sys/lock.h> 39ad2056f2SAlexander Leidinger #include <sys/malloc.h> 40ad2056f2SAlexander Leidinger #include <sys/mutex.h> 41ad2056f2SAlexander Leidinger #include <sys/proc.h> 428c5059e9SEdward Tomasz Napierala #include <sys/resourcevar.h> 43d8e53d94SDmitry Chagin #include <sys/sx.h> 44ad2056f2SAlexander Leidinger #include <sys/syscallsubr.h> 4595353459SDimitry Andric #include <sys/sysent.h> 46ad2056f2SAlexander Leidinger 474732e446SRoman Divacky #include <compat/linux/linux_emul.h> 488c5059e9SEdward Tomasz Napierala #include <compat/linux/linux_mib.h> 49d825ce0aSJohn Baldwin #include <compat/linux/linux_misc.h> 5023e8912cSDmitry Chagin #include <compat/linux/linux_persona.h> 5181338031SDmitry Chagin #include <compat/linux/linux_util.h> 524732e446SRoman Divacky 53b267239dSEd Maste #if BYTE_ORDER == LITTLE_ENDIAN 54b267239dSEd Maste #define SHELLMAGIC 0x2123 /* #! */ 55b267239dSEd Maste #else 56b267239dSEd Maste #define SHELLMAGIC 0x2321 57b267239dSEd Maste #endif 5819e252baSAlexander Leidinger 5981338031SDmitry Chagin /* 60bc273677SDmitry Chagin * This returns reference to the thread emuldata entry (if found) 6181338031SDmitry Chagin * 6281338031SDmitry Chagin * Hold PROC_LOCK when referencing emuldata from other threads. 6381338031SDmitry Chagin */ 64ad2056f2SAlexander Leidinger struct linux_emuldata * 6581338031SDmitry Chagin em_find(struct thread *td) 66ad2056f2SAlexander Leidinger { 67ad2056f2SAlexander Leidinger struct linux_emuldata *em; 68ad2056f2SAlexander Leidinger 6981338031SDmitry Chagin em = td->td_emuldata; 70ad2056f2SAlexander Leidinger 71ad2056f2SAlexander Leidinger return (em); 72ad2056f2SAlexander Leidinger } 73ad2056f2SAlexander Leidinger 74bc273677SDmitry Chagin /* 75bc273677SDmitry Chagin * This returns reference to the proc pemuldata entry (if found) 76bc273677SDmitry Chagin * 77bc273677SDmitry Chagin * Hold PROC_LOCK when referencing proc pemuldata from other threads. 78bc273677SDmitry Chagin * Hold LINUX_PEM_LOCK wher referencing pemuldata members. 79bc273677SDmitry Chagin */ 80bc273677SDmitry Chagin struct linux_pemuldata * 81bc273677SDmitry Chagin pem_find(struct proc *p) 82bc273677SDmitry Chagin { 83bc273677SDmitry Chagin struct linux_pemuldata *pem; 84bc273677SDmitry Chagin 85bc273677SDmitry Chagin pem = p->p_emuldata; 86bc273677SDmitry Chagin 87bc273677SDmitry Chagin return (pem); 88bc273677SDmitry Chagin } 89bc273677SDmitry Chagin 908c5059e9SEdward Tomasz Napierala /* 918c5059e9SEdward Tomasz Napierala * Linux apps generally expect the soft open file limit to be set 928c5059e9SEdward Tomasz Napierala * to 1024, often iterating over all the file descriptors up to that 938c5059e9SEdward Tomasz Napierala * limit instead of using closefrom(2). Give them what they want, 948c5059e9SEdward Tomasz Napierala * unless there already is a resource limit in place. 958c5059e9SEdward Tomasz Napierala */ 968c5059e9SEdward Tomasz Napierala static void 978c5059e9SEdward Tomasz Napierala linux_set_default_openfiles(struct thread *td, struct proc *p) 988c5059e9SEdward Tomasz Napierala { 998c5059e9SEdward Tomasz Napierala struct rlimit rlim; 1000c8d7eebSMateusz Guzik int error __diagused; 1018c5059e9SEdward Tomasz Napierala 1028c5059e9SEdward Tomasz Napierala if (linux_default_openfiles < 0) 1038c5059e9SEdward Tomasz Napierala return; 1048c5059e9SEdward Tomasz Napierala 1058c5059e9SEdward Tomasz Napierala PROC_LOCK(p); 1068c5059e9SEdward Tomasz Napierala lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim); 1078c5059e9SEdward Tomasz Napierala PROC_UNLOCK(p); 1088c5059e9SEdward Tomasz Napierala if (rlim.rlim_cur != rlim.rlim_max || 1098c5059e9SEdward Tomasz Napierala rlim.rlim_cur <= linux_default_openfiles) 1108c5059e9SEdward Tomasz Napierala return; 1118c5059e9SEdward Tomasz Napierala rlim.rlim_cur = linux_default_openfiles; 1128c5059e9SEdward Tomasz Napierala error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim); 1138c5059e9SEdward Tomasz Napierala KASSERT(error == 0, ("kern_proc_setrlimit failed")); 1148c5059e9SEdward Tomasz Napierala } 1158c5059e9SEdward Tomasz Napierala 1161c34dcb5SEdward Tomasz Napierala /* 1171c34dcb5SEdward Tomasz Napierala * The default stack size limit in Linux is 8MB. 1181c34dcb5SEdward Tomasz Napierala */ 1191c34dcb5SEdward Tomasz Napierala static void 1201c34dcb5SEdward Tomasz Napierala linux_set_default_stacksize(struct thread *td, struct proc *p) 1211c34dcb5SEdward Tomasz Napierala { 1221c34dcb5SEdward Tomasz Napierala struct rlimit rlim; 1230c8d7eebSMateusz Guzik int error __diagused; 1241c34dcb5SEdward Tomasz Napierala 1251c34dcb5SEdward Tomasz Napierala if (linux_default_stacksize < 0) 1261c34dcb5SEdward Tomasz Napierala return; 1271c34dcb5SEdward Tomasz Napierala 1281c34dcb5SEdward Tomasz Napierala PROC_LOCK(p); 1291c34dcb5SEdward Tomasz Napierala lim_rlimit_proc(p, RLIMIT_STACK, &rlim); 1301c34dcb5SEdward Tomasz Napierala PROC_UNLOCK(p); 1311c34dcb5SEdward Tomasz Napierala if (rlim.rlim_cur != rlim.rlim_max || 1321c34dcb5SEdward Tomasz Napierala rlim.rlim_cur <= linux_default_stacksize) 1331c34dcb5SEdward Tomasz Napierala return; 1341c34dcb5SEdward Tomasz Napierala rlim.rlim_cur = linux_default_stacksize; 1351c34dcb5SEdward Tomasz Napierala error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim); 1361c34dcb5SEdward Tomasz Napierala KASSERT(error == 0, ("kern_proc_setrlimit failed")); 1371c34dcb5SEdward Tomasz Napierala } 1381c34dcb5SEdward Tomasz Napierala 13981338031SDmitry Chagin void 1400a4b664aSDmitry Chagin linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread) 141ad2056f2SAlexander Leidinger { 14281338031SDmitry Chagin struct linux_emuldata *em; 143bc273677SDmitry Chagin struct linux_pemuldata *pem; 14468cf0367SDmitry Chagin struct proc *p; 14519e252baSAlexander Leidinger 14681338031SDmitry Chagin if (newtd != NULL) { 14768cf0367SDmitry Chagin p = newtd->td_proc; 14868cf0367SDmitry Chagin 14981338031SDmitry Chagin /* non-exec call */ 150*b834497cSDmitry Chagin em = malloc(sizeof(*em), M_LINUX, M_WAITOK | M_ZERO); 1510a4b664aSDmitry Chagin if (init_thread) { 152e16fe1c7SDmitry Chagin LINUX_CTR1(proc_init, "thread newtd(%d)", 153e16fe1c7SDmitry Chagin newtd->td_tid); 154e16fe1c7SDmitry Chagin 15581338031SDmitry Chagin em->em_tid = newtd->td_tid; 15681338031SDmitry Chagin } else { 15768cf0367SDmitry Chagin LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid); 15819e252baSAlexander Leidinger 15968cf0367SDmitry Chagin em->em_tid = p->p_pid; 160bc273677SDmitry Chagin 161e0d3ea8cSDmitry Chagin pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO); 162bc273677SDmitry Chagin sx_init(&pem->pem_sx, "lpemlk"); 16368cf0367SDmitry Chagin p->p_emuldata = pem; 164ad2056f2SAlexander Leidinger } 16581338031SDmitry Chagin newtd->td_emuldata = em; 1668c5059e9SEdward Tomasz Napierala 1678c5059e9SEdward Tomasz Napierala linux_set_default_openfiles(td, p); 1681c34dcb5SEdward Tomasz Napierala linux_set_default_stacksize(td, p); 169ad2056f2SAlexander Leidinger } else { 17068cf0367SDmitry Chagin p = td->td_proc; 17168cf0367SDmitry Chagin 17219e252baSAlexander Leidinger /* exec */ 17368cf0367SDmitry Chagin LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid); 17419e252baSAlexander Leidinger 175ad2056f2SAlexander Leidinger /* lookup the old one */ 17681338031SDmitry Chagin em = em_find(td); 1774e1e8346SDmitry Chagin KASSERT(em != NULL, ("proc_init: thread emuldata not found.\n")); 17881338031SDmitry Chagin 17968cf0367SDmitry Chagin em->em_tid = p->p_pid; 18097cfa5c8SDmitry Chagin em->flags = 0; 18197cfa5c8SDmitry Chagin em->robust_futexes = NULL; 18297cfa5c8SDmitry Chagin em->child_clear_tid = NULL; 18397cfa5c8SDmitry Chagin em->child_set_tid = NULL; 184e16fe1c7SDmitry Chagin 18568cf0367SDmitry Chagin pem = pem_find(p); 1864e1e8346SDmitry Chagin KASSERT(pem != NULL, ("proc_init: proc emuldata not found.\n")); 18723e8912cSDmitry Chagin pem->persona = 0; 188b7df7b98SDmitry Chagin pem->oom_score_adj = 0; 189ad2056f2SAlexander Leidinger } 190ad2056f2SAlexander Leidinger } 191ad2056f2SAlexander Leidinger 192ad2056f2SAlexander Leidinger void 1934815f175SKonstantin Belousov linux_on_exit(struct proc *p) 194ad2056f2SAlexander Leidinger { 195bc273677SDmitry Chagin struct linux_pemuldata *pem; 19681338031SDmitry Chagin struct thread *td = curthread; 197ad2056f2SAlexander Leidinger 1984815f175SKonstantin Belousov MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX); 199bc273677SDmitry Chagin 2007d96520bSDmitry Chagin LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p", 2017d96520bSDmitry Chagin td->td_tid, p->p_pid, p); 2027d96520bSDmitry Chagin 203bc273677SDmitry Chagin pem = pem_find(p); 204bc273677SDmitry Chagin if (pem == NULL) 205bc273677SDmitry Chagin return; 20681338031SDmitry Chagin (p->p_sysent->sv_thread_detach)(td); 207bc273677SDmitry Chagin 208bc273677SDmitry Chagin p->p_emuldata = NULL; 209bc273677SDmitry Chagin 210bc273677SDmitry Chagin sx_destroy(&pem->pem_sx); 211e0d3ea8cSDmitry Chagin free(pem, M_LINUX); 212e8b8b834SAlexander Leidinger } 213ad2056f2SAlexander Leidinger 21481338031SDmitry Chagin int 21581338031SDmitry Chagin linux_common_execve(struct thread *td, struct image_args *eargs) 21681338031SDmitry Chagin { 217bc273677SDmitry Chagin struct linux_pemuldata *pem; 218d707582fSDmitry Chagin struct vmspace *oldvmspace; 21981338031SDmitry Chagin struct linux_emuldata *em; 22081338031SDmitry Chagin struct proc *p; 22181338031SDmitry Chagin int error; 222ad2056f2SAlexander Leidinger 22381338031SDmitry Chagin p = td->td_proc; 224ad2056f2SAlexander Leidinger 225d707582fSDmitry Chagin error = pre_execve(td, &oldvmspace); 226d707582fSDmitry Chagin if (error != 0) 227d707582fSDmitry Chagin return (error); 22881338031SDmitry Chagin 229aaf78c16SKonstantin Belousov error = kern_execve(td, eargs, NULL, oldvmspace); 230d707582fSDmitry Chagin post_execve(td, error, oldvmspace); 231814629ddSEd Schouten if (error != EJUSTRETURN) 23281338031SDmitry Chagin return (error); 23381338031SDmitry Chagin 23481338031SDmitry Chagin /* 23581338031SDmitry Chagin * In a case of transition from Linux binary execing to 236eae594f7SEd Maste * FreeBSD binary we destroy Linux emuldata thread & proc entries. 23781338031SDmitry Chagin */ 23881338031SDmitry Chagin if (SV_CURPROC_ABI() != SV_ABI_LINUX) { 239fd745e1dSDmitry Chagin 240fd745e1dSDmitry Chagin /* Clear ABI root directory if set. */ 241fd745e1dSDmitry Chagin linux_pwd_onexec_native(td); 242fd745e1dSDmitry Chagin 24381338031SDmitry Chagin PROC_LOCK(p); 24481338031SDmitry Chagin em = em_find(td); 245bc273677SDmitry Chagin KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n")); 24681338031SDmitry Chagin td->td_emuldata = NULL; 247bc273677SDmitry Chagin 248bc273677SDmitry Chagin pem = pem_find(p); 249bc273677SDmitry Chagin KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n")); 250bc273677SDmitry Chagin p->p_emuldata = NULL; 25181338031SDmitry Chagin PROC_UNLOCK(p); 25281338031SDmitry Chagin 253*b834497cSDmitry Chagin free(em, M_LINUX); 254e0d3ea8cSDmitry Chagin free(pem, M_LINUX); 25581338031SDmitry Chagin } 256814629ddSEd Schouten return (EJUSTRETURN); 25781338031SDmitry Chagin } 25881338031SDmitry Chagin 259fd745e1dSDmitry Chagin int 2604815f175SKonstantin Belousov linux_on_exec(struct proc *p, struct image_params *imgp) 26181338031SDmitry Chagin { 2628a15ac83SKonstantin Belousov struct thread *td; 26332ba368bSDmitry Chagin struct thread *othertd; 26423e8912cSDmitry Chagin #if defined(__amd64__) 26523e8912cSDmitry Chagin struct linux_pemuldata *pem; 26623e8912cSDmitry Chagin #endif 267fd745e1dSDmitry Chagin int error; 26832ba368bSDmitry Chagin 2698a15ac83SKonstantin Belousov td = curthread; 2704815f175SKonstantin Belousov MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX); 27181338031SDmitry Chagin 27281338031SDmitry Chagin /* 2734815f175SKonstantin Belousov * When execing to Linux binary, we create Linux emuldata 2744815f175SKonstantin Belousov * thread entry. 27581338031SDmitry Chagin */ 2768a15ac83SKonstantin Belousov if (SV_PROC_ABI(p) == SV_ABI_LINUX) { 2778a15ac83SKonstantin Belousov /* 2788a15ac83SKonstantin Belousov * Process already was under Linuxolator 2798a15ac83SKonstantin Belousov * before exec. Update emuldata to reflect 2808a15ac83SKonstantin Belousov * single-threaded cleaned state after exec. 2818a15ac83SKonstantin Belousov */ 2820a4b664aSDmitry Chagin linux_proc_init(td, NULL, false); 2838a15ac83SKonstantin Belousov } else { 2848a15ac83SKonstantin Belousov /* 2858a15ac83SKonstantin Belousov * We are switching the process to Linux emulator. 2868a15ac83SKonstantin Belousov */ 2870a4b664aSDmitry Chagin linux_proc_init(td, td, false); 2888a15ac83SKonstantin Belousov 2898a15ac83SKonstantin Belousov /* 2908a15ac83SKonstantin Belousov * Create a transient td_emuldata for all suspended 2918a15ac83SKonstantin Belousov * threads, so that p->p_sysent->sv_thread_detach() == 2928a15ac83SKonstantin Belousov * linux_thread_detach() can find expected but unused 2938a15ac83SKonstantin Belousov * emuldata. 2948a15ac83SKonstantin Belousov */ 2958a15ac83SKonstantin Belousov FOREACH_THREAD_IN_PROC(td->td_proc, othertd) { 2964815f175SKonstantin Belousov if (othertd == td) 2974815f175SKonstantin Belousov continue; 2980a4b664aSDmitry Chagin linux_proc_init(td, othertd, true); 2998a15ac83SKonstantin Belousov } 300fd745e1dSDmitry Chagin 301fd745e1dSDmitry Chagin /* Set ABI root directory. */ 302fd745e1dSDmitry Chagin if ((error = linux_pwd_onexec(td)) != 0) 303fd745e1dSDmitry Chagin return (error); 3048a15ac83SKonstantin Belousov } 30523e8912cSDmitry Chagin #if defined(__amd64__) 30623e8912cSDmitry Chagin /* 30723e8912cSDmitry Chagin * An IA32 executable which has executable stack will have the 30823e8912cSDmitry Chagin * READ_IMPLIES_EXEC personality flag set automatically. 30923e8912cSDmitry Chagin */ 31023e8912cSDmitry Chagin if (SV_PROC_FLAG(td->td_proc, SV_ILP32) && 31123e8912cSDmitry Chagin imgp->stack_prot & VM_PROT_EXECUTE) { 31223e8912cSDmitry Chagin pem = pem_find(p); 31323e8912cSDmitry Chagin pem->persona |= LINUX_READ_IMPLIES_EXEC; 31423e8912cSDmitry Chagin } 31523e8912cSDmitry Chagin #endif 316fd745e1dSDmitry Chagin return (0); 31781338031SDmitry Chagin } 31881338031SDmitry Chagin 31981338031SDmitry Chagin void 3204815f175SKonstantin Belousov linux_thread_dtor(struct thread *td) 321ad2056f2SAlexander Leidinger { 322ad2056f2SAlexander Leidinger struct linux_emuldata *em; 323ad2056f2SAlexander Leidinger 32481338031SDmitry Chagin em = em_find(td); 32581338031SDmitry Chagin if (em == NULL) 32681338031SDmitry Chagin return; 32781338031SDmitry Chagin td->td_emuldata = NULL; 328ad2056f2SAlexander Leidinger 3297d96520bSDmitry Chagin LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid); 330ad2056f2SAlexander Leidinger 331*b834497cSDmitry Chagin free(em, M_LINUX); 332ad2056f2SAlexander Leidinger } 333ad2056f2SAlexander Leidinger 334ad2056f2SAlexander Leidinger void 335e5d81ef1SDmitry Chagin linux_schedtail(struct thread *td) 336ad2056f2SAlexander Leidinger { 337ad2056f2SAlexander Leidinger struct linux_emuldata *em; 33874a0e24fSMateusz Guzik #ifdef KTR 33974a0e24fSMateusz Guzik int error; 34074a0e24fSMateusz Guzik #else 34174a0e24fSMateusz Guzik int error __unused; 34274a0e24fSMateusz Guzik #endif 343ad2056f2SAlexander Leidinger int *child_set_tid; 344ad2056f2SAlexander Leidinger 34581338031SDmitry Chagin em = em_find(td); 346bc273677SDmitry Chagin KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n")); 347ad2056f2SAlexander Leidinger child_set_tid = em->child_set_tid; 348ad2056f2SAlexander Leidinger 34919e252baSAlexander Leidinger if (child_set_tid != NULL) { 350e0327ddbSDmitry Chagin error = copyout(&em->em_tid, child_set_tid, 35181338031SDmitry Chagin sizeof(em->em_tid)); 3527d96520bSDmitry Chagin LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d", 35381338031SDmitry Chagin td->td_tid, child_set_tid, em->em_tid, error); 35481338031SDmitry Chagin } else 3557d96520bSDmitry Chagin LINUX_CTR1(schedtail, "thread(%d)", em->em_tid); 356bb63fddeSAlexander Leidinger } 357