1ad2056f2SAlexander Leidinger /*-
24d846d26SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
37f2d13d6SPedro F. Giffuni *
4b267239dSEd Maste * Copyright (c) 1994-1996 Søren Schmidt
5ad2056f2SAlexander Leidinger * Copyright (c) 2006 Roman Divacky
6ad2056f2SAlexander Leidinger * All rights reserved.
71ca6b15bSDmitry Chagin * Copyright (c) 2013 Dmitry Chagin <dchagin@FreeBSD.org>
8ad2056f2SAlexander Leidinger *
9ad2056f2SAlexander Leidinger * Redistribution and use in source and binary forms, with or without
10ad2056f2SAlexander Leidinger * modification, are permitted provided that the following conditions
11ad2056f2SAlexander Leidinger * are met:
12ad2056f2SAlexander Leidinger * 1. Redistributions of source code must retain the above copyright
13023b850bSEd Maste * notice, this list of conditions and the following disclaimer.
14ad2056f2SAlexander Leidinger * 2. Redistributions in binary form must reproduce the above copyright
15ad2056f2SAlexander Leidinger * notice, this list of conditions and the following disclaimer in the
16ad2056f2SAlexander Leidinger * documentation and/or other materials provided with the distribution.
17ad2056f2SAlexander Leidinger *
18023b850bSEd Maste * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19023b850bSEd Maste * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20023b850bSEd Maste * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21023b850bSEd Maste * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22023b850bSEd Maste * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23023b850bSEd Maste * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24023b850bSEd Maste * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25023b850bSEd Maste * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26023b850bSEd Maste * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27023b850bSEd Maste * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28023b850bSEd Maste * SUCH DAMAGE.
29ad2056f2SAlexander Leidinger */
30ad2056f2SAlexander Leidinger
31ad2056f2SAlexander Leidinger #include <sys/param.h>
32b267239dSEd Maste #include <sys/fcntl.h>
33ad2056f2SAlexander Leidinger #include <sys/imgact.h>
3481338031SDmitry Chagin #include <sys/ktr.h>
35ad2056f2SAlexander Leidinger #include <sys/lock.h>
36ad2056f2SAlexander Leidinger #include <sys/malloc.h>
37ad2056f2SAlexander Leidinger #include <sys/mutex.h>
38ad2056f2SAlexander Leidinger #include <sys/proc.h>
398c5059e9SEdward Tomasz Napierala #include <sys/resourcevar.h>
40d8e53d94SDmitry Chagin #include <sys/sx.h>
41ad2056f2SAlexander Leidinger #include <sys/syscallsubr.h>
4295353459SDimitry Andric #include <sys/sysent.h>
43ad2056f2SAlexander Leidinger
444732e446SRoman Divacky #include <compat/linux/linux_emul.h>
458c5059e9SEdward Tomasz Napierala #include <compat/linux/linux_mib.h>
46d825ce0aSJohn Baldwin #include <compat/linux/linux_misc.h>
4723e8912cSDmitry Chagin #include <compat/linux/linux_persona.h>
4881338031SDmitry Chagin #include <compat/linux/linux_util.h>
494732e446SRoman Divacky
50b267239dSEd Maste #if BYTE_ORDER == LITTLE_ENDIAN
51b267239dSEd Maste #define SHELLMAGIC 0x2123 /* #! */
52b267239dSEd Maste #else
53b267239dSEd Maste #define SHELLMAGIC 0x2321
54b267239dSEd Maste #endif
5519e252baSAlexander Leidinger
5681338031SDmitry Chagin /*
57bc273677SDmitry Chagin * This returns reference to the thread emuldata entry (if found)
5881338031SDmitry Chagin *
5981338031SDmitry Chagin * Hold PROC_LOCK when referencing emuldata from other threads.
6081338031SDmitry Chagin */
61ad2056f2SAlexander Leidinger struct linux_emuldata *
em_find(struct thread * td)6281338031SDmitry Chagin em_find(struct thread *td)
63ad2056f2SAlexander Leidinger {
64ad2056f2SAlexander Leidinger struct linux_emuldata *em;
65ad2056f2SAlexander Leidinger
6681338031SDmitry Chagin em = td->td_emuldata;
67ad2056f2SAlexander Leidinger
68ad2056f2SAlexander Leidinger return (em);
69ad2056f2SAlexander Leidinger }
70ad2056f2SAlexander Leidinger
71bc273677SDmitry Chagin /*
72bc273677SDmitry Chagin * This returns reference to the proc pemuldata entry (if found)
73bc273677SDmitry Chagin *
74bc273677SDmitry Chagin * Hold PROC_LOCK when referencing proc pemuldata from other threads.
75bc273677SDmitry Chagin * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
76bc273677SDmitry Chagin */
77bc273677SDmitry Chagin struct linux_pemuldata *
pem_find(struct proc * p)78bc273677SDmitry Chagin pem_find(struct proc *p)
79bc273677SDmitry Chagin {
80bc273677SDmitry Chagin struct linux_pemuldata *pem;
81bc273677SDmitry Chagin
82bc273677SDmitry Chagin pem = p->p_emuldata;
83bc273677SDmitry Chagin
84bc273677SDmitry Chagin return (pem);
85bc273677SDmitry Chagin }
86bc273677SDmitry Chagin
878c5059e9SEdward Tomasz Napierala /*
888c5059e9SEdward Tomasz Napierala * Linux apps generally expect the soft open file limit to be set
898c5059e9SEdward Tomasz Napierala * to 1024, often iterating over all the file descriptors up to that
908c5059e9SEdward Tomasz Napierala * limit instead of using closefrom(2). Give them what they want,
918c5059e9SEdward Tomasz Napierala * unless there already is a resource limit in place.
928c5059e9SEdward Tomasz Napierala */
938c5059e9SEdward Tomasz Napierala static void
linux_set_default_openfiles(struct thread * td,struct proc * p)948c5059e9SEdward Tomasz Napierala linux_set_default_openfiles(struct thread *td, struct proc *p)
958c5059e9SEdward Tomasz Napierala {
968c5059e9SEdward Tomasz Napierala struct rlimit rlim;
970c8d7eebSMateusz Guzik int error __diagused;
988c5059e9SEdward Tomasz Napierala
998c5059e9SEdward Tomasz Napierala if (linux_default_openfiles < 0)
1008c5059e9SEdward Tomasz Napierala return;
1018c5059e9SEdward Tomasz Napierala
1028c5059e9SEdward Tomasz Napierala PROC_LOCK(p);
1038c5059e9SEdward Tomasz Napierala lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim);
1048c5059e9SEdward Tomasz Napierala PROC_UNLOCK(p);
1058c5059e9SEdward Tomasz Napierala if (rlim.rlim_cur != rlim.rlim_max ||
1068c5059e9SEdward Tomasz Napierala rlim.rlim_cur <= linux_default_openfiles)
1078c5059e9SEdward Tomasz Napierala return;
1088c5059e9SEdward Tomasz Napierala rlim.rlim_cur = linux_default_openfiles;
1098c5059e9SEdward Tomasz Napierala error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim);
1108c5059e9SEdward Tomasz Napierala KASSERT(error == 0, ("kern_proc_setrlimit failed"));
1118c5059e9SEdward Tomasz Napierala }
1128c5059e9SEdward Tomasz Napierala
1131c34dcb5SEdward Tomasz Napierala /*
1141c34dcb5SEdward Tomasz Napierala * The default stack size limit in Linux is 8MB.
1151c34dcb5SEdward Tomasz Napierala */
1161c34dcb5SEdward Tomasz Napierala static void
linux_set_default_stacksize(struct thread * td,struct proc * p)1171c34dcb5SEdward Tomasz Napierala linux_set_default_stacksize(struct thread *td, struct proc *p)
1181c34dcb5SEdward Tomasz Napierala {
1191c34dcb5SEdward Tomasz Napierala struct rlimit rlim;
1200c8d7eebSMateusz Guzik int error __diagused;
1211c34dcb5SEdward Tomasz Napierala
1221c34dcb5SEdward Tomasz Napierala if (linux_default_stacksize < 0)
1231c34dcb5SEdward Tomasz Napierala return;
1241c34dcb5SEdward Tomasz Napierala
1251c34dcb5SEdward Tomasz Napierala PROC_LOCK(p);
1261c34dcb5SEdward Tomasz Napierala lim_rlimit_proc(p, RLIMIT_STACK, &rlim);
1271c34dcb5SEdward Tomasz Napierala PROC_UNLOCK(p);
1281c34dcb5SEdward Tomasz Napierala if (rlim.rlim_cur != rlim.rlim_max ||
1291c34dcb5SEdward Tomasz Napierala rlim.rlim_cur <= linux_default_stacksize)
1301c34dcb5SEdward Tomasz Napierala return;
1311c34dcb5SEdward Tomasz Napierala rlim.rlim_cur = linux_default_stacksize;
1321c34dcb5SEdward Tomasz Napierala error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim);
1331c34dcb5SEdward Tomasz Napierala KASSERT(error == 0, ("kern_proc_setrlimit failed"));
1341c34dcb5SEdward Tomasz Napierala }
1351c34dcb5SEdward Tomasz Napierala
13681338031SDmitry Chagin void
linux_proc_init(struct thread * td,struct thread * newtd,bool init_thread)1370a4b664aSDmitry Chagin linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread)
138ad2056f2SAlexander Leidinger {
13981338031SDmitry Chagin struct linux_emuldata *em;
140bc273677SDmitry Chagin struct linux_pemuldata *pem;
14168cf0367SDmitry Chagin struct proc *p;
14219e252baSAlexander Leidinger
14381338031SDmitry Chagin if (newtd != NULL) {
14468cf0367SDmitry Chagin p = newtd->td_proc;
14568cf0367SDmitry Chagin
14681338031SDmitry Chagin /* non-exec call */
147*b834497cSDmitry Chagin em = malloc(sizeof(*em), M_LINUX, M_WAITOK | M_ZERO);
1480a4b664aSDmitry Chagin if (init_thread) {
149e16fe1c7SDmitry Chagin LINUX_CTR1(proc_init, "thread newtd(%d)",
150e16fe1c7SDmitry Chagin newtd->td_tid);
151e16fe1c7SDmitry Chagin
15281338031SDmitry Chagin em->em_tid = newtd->td_tid;
15381338031SDmitry Chagin } else {
15468cf0367SDmitry Chagin LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
15519e252baSAlexander Leidinger
15668cf0367SDmitry Chagin em->em_tid = p->p_pid;
157bc273677SDmitry Chagin
158e0d3ea8cSDmitry Chagin pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
159bc273677SDmitry Chagin sx_init(&pem->pem_sx, "lpemlk");
16068cf0367SDmitry Chagin p->p_emuldata = pem;
161ad2056f2SAlexander Leidinger }
16281338031SDmitry Chagin newtd->td_emuldata = em;
1638c5059e9SEdward Tomasz Napierala
1648c5059e9SEdward Tomasz Napierala linux_set_default_openfiles(td, p);
1651c34dcb5SEdward Tomasz Napierala linux_set_default_stacksize(td, p);
166ad2056f2SAlexander Leidinger } else {
16768cf0367SDmitry Chagin p = td->td_proc;
16868cf0367SDmitry Chagin
16919e252baSAlexander Leidinger /* exec */
17068cf0367SDmitry Chagin LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
17119e252baSAlexander Leidinger
172ad2056f2SAlexander Leidinger /* lookup the old one */
17381338031SDmitry Chagin em = em_find(td);
1744e1e8346SDmitry Chagin KASSERT(em != NULL, ("proc_init: thread emuldata not found.\n"));
17581338031SDmitry Chagin
17668cf0367SDmitry Chagin em->em_tid = p->p_pid;
17797cfa5c8SDmitry Chagin em->flags = 0;
17897cfa5c8SDmitry Chagin em->robust_futexes = NULL;
17997cfa5c8SDmitry Chagin em->child_clear_tid = NULL;
18097cfa5c8SDmitry Chagin em->child_set_tid = NULL;
181e16fe1c7SDmitry Chagin
18268cf0367SDmitry Chagin pem = pem_find(p);
1834e1e8346SDmitry Chagin KASSERT(pem != NULL, ("proc_init: proc emuldata not found.\n"));
18423e8912cSDmitry Chagin pem->persona = 0;
185b7df7b98SDmitry Chagin pem->oom_score_adj = 0;
186ad2056f2SAlexander Leidinger }
187ad2056f2SAlexander Leidinger }
188ad2056f2SAlexander Leidinger
189ad2056f2SAlexander Leidinger void
linux_on_exit(struct proc * p)1904815f175SKonstantin Belousov linux_on_exit(struct proc *p)
191ad2056f2SAlexander Leidinger {
192bc273677SDmitry Chagin struct linux_pemuldata *pem;
19381338031SDmitry Chagin struct thread *td = curthread;
194ad2056f2SAlexander Leidinger
1954815f175SKonstantin Belousov MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
196bc273677SDmitry Chagin
1977d96520bSDmitry Chagin LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
1987d96520bSDmitry Chagin td->td_tid, p->p_pid, p);
1997d96520bSDmitry Chagin
200bc273677SDmitry Chagin pem = pem_find(p);
201bc273677SDmitry Chagin if (pem == NULL)
202bc273677SDmitry Chagin return;
20381338031SDmitry Chagin (p->p_sysent->sv_thread_detach)(td);
204bc273677SDmitry Chagin
205bc273677SDmitry Chagin p->p_emuldata = NULL;
206bc273677SDmitry Chagin
207bc273677SDmitry Chagin sx_destroy(&pem->pem_sx);
208e0d3ea8cSDmitry Chagin free(pem, M_LINUX);
209e8b8b834SAlexander Leidinger }
210ad2056f2SAlexander Leidinger
21181338031SDmitry Chagin int
linux_common_execve(struct thread * td,struct image_args * eargs)21281338031SDmitry Chagin linux_common_execve(struct thread *td, struct image_args *eargs)
21381338031SDmitry Chagin {
214bc273677SDmitry Chagin struct linux_pemuldata *pem;
215d707582fSDmitry Chagin struct vmspace *oldvmspace;
21681338031SDmitry Chagin struct linux_emuldata *em;
21781338031SDmitry Chagin struct proc *p;
21881338031SDmitry Chagin int error;
219ad2056f2SAlexander Leidinger
22081338031SDmitry Chagin p = td->td_proc;
221ad2056f2SAlexander Leidinger
222d707582fSDmitry Chagin error = pre_execve(td, &oldvmspace);
223d707582fSDmitry Chagin if (error != 0)
224d707582fSDmitry Chagin return (error);
22581338031SDmitry Chagin
226aaf78c16SKonstantin Belousov error = kern_execve(td, eargs, NULL, oldvmspace);
227d707582fSDmitry Chagin post_execve(td, error, oldvmspace);
228814629ddSEd Schouten if (error != EJUSTRETURN)
22981338031SDmitry Chagin return (error);
23081338031SDmitry Chagin
23181338031SDmitry Chagin /*
23281338031SDmitry Chagin * In a case of transition from Linux binary execing to
233eae594f7SEd Maste * FreeBSD binary we destroy Linux emuldata thread & proc entries.
23481338031SDmitry Chagin */
23581338031SDmitry Chagin if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
236fd745e1dSDmitry Chagin
237fd745e1dSDmitry Chagin /* Clear ABI root directory if set. */
238fd745e1dSDmitry Chagin linux_pwd_onexec_native(td);
239fd745e1dSDmitry Chagin
24081338031SDmitry Chagin PROC_LOCK(p);
24181338031SDmitry Chagin em = em_find(td);
242bc273677SDmitry Chagin KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
24381338031SDmitry Chagin td->td_emuldata = NULL;
244bc273677SDmitry Chagin
245bc273677SDmitry Chagin pem = pem_find(p);
246bc273677SDmitry Chagin KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
247bc273677SDmitry Chagin p->p_emuldata = NULL;
24881338031SDmitry Chagin PROC_UNLOCK(p);
24981338031SDmitry Chagin
250*b834497cSDmitry Chagin free(em, M_LINUX);
251e0d3ea8cSDmitry Chagin free(pem, M_LINUX);
25281338031SDmitry Chagin }
253814629ddSEd Schouten return (EJUSTRETURN);
25481338031SDmitry Chagin }
25581338031SDmitry Chagin
256fd745e1dSDmitry Chagin int
linux_on_exec(struct proc * p,struct image_params * imgp)2574815f175SKonstantin Belousov linux_on_exec(struct proc *p, struct image_params *imgp)
25881338031SDmitry Chagin {
2598a15ac83SKonstantin Belousov struct thread *td;
26032ba368bSDmitry Chagin struct thread *othertd;
26123e8912cSDmitry Chagin #if defined(__amd64__)
26223e8912cSDmitry Chagin struct linux_pemuldata *pem;
26323e8912cSDmitry Chagin #endif
264fd745e1dSDmitry Chagin int error;
26532ba368bSDmitry Chagin
2668a15ac83SKonstantin Belousov td = curthread;
2674815f175SKonstantin Belousov MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
26881338031SDmitry Chagin
26981338031SDmitry Chagin /*
2704815f175SKonstantin Belousov * When execing to Linux binary, we create Linux emuldata
2714815f175SKonstantin Belousov * thread entry.
27281338031SDmitry Chagin */
2738a15ac83SKonstantin Belousov if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
2748a15ac83SKonstantin Belousov /*
2758a15ac83SKonstantin Belousov * Process already was under Linuxolator
2768a15ac83SKonstantin Belousov * before exec. Update emuldata to reflect
2778a15ac83SKonstantin Belousov * single-threaded cleaned state after exec.
2788a15ac83SKonstantin Belousov */
2790a4b664aSDmitry Chagin linux_proc_init(td, NULL, false);
2808a15ac83SKonstantin Belousov } else {
2818a15ac83SKonstantin Belousov /*
2828a15ac83SKonstantin Belousov * We are switching the process to Linux emulator.
2838a15ac83SKonstantin Belousov */
2840a4b664aSDmitry Chagin linux_proc_init(td, td, false);
2858a15ac83SKonstantin Belousov
2868a15ac83SKonstantin Belousov /*
2878a15ac83SKonstantin Belousov * Create a transient td_emuldata for all suspended
2888a15ac83SKonstantin Belousov * threads, so that p->p_sysent->sv_thread_detach() ==
2898a15ac83SKonstantin Belousov * linux_thread_detach() can find expected but unused
2908a15ac83SKonstantin Belousov * emuldata.
2918a15ac83SKonstantin Belousov */
2928a15ac83SKonstantin Belousov FOREACH_THREAD_IN_PROC(td->td_proc, othertd) {
2934815f175SKonstantin Belousov if (othertd == td)
2944815f175SKonstantin Belousov continue;
2950a4b664aSDmitry Chagin linux_proc_init(td, othertd, true);
2968a15ac83SKonstantin Belousov }
297fd745e1dSDmitry Chagin
298fd745e1dSDmitry Chagin /* Set ABI root directory. */
299fd745e1dSDmitry Chagin if ((error = linux_pwd_onexec(td)) != 0)
300fd745e1dSDmitry Chagin return (error);
3018a15ac83SKonstantin Belousov }
30223e8912cSDmitry Chagin #if defined(__amd64__)
30323e8912cSDmitry Chagin /*
30423e8912cSDmitry Chagin * An IA32 executable which has executable stack will have the
30523e8912cSDmitry Chagin * READ_IMPLIES_EXEC personality flag set automatically.
30623e8912cSDmitry Chagin */
30723e8912cSDmitry Chagin if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
30823e8912cSDmitry Chagin imgp->stack_prot & VM_PROT_EXECUTE) {
30923e8912cSDmitry Chagin pem = pem_find(p);
31023e8912cSDmitry Chagin pem->persona |= LINUX_READ_IMPLIES_EXEC;
31123e8912cSDmitry Chagin }
31223e8912cSDmitry Chagin #endif
313fd745e1dSDmitry Chagin return (0);
31481338031SDmitry Chagin }
31581338031SDmitry Chagin
31681338031SDmitry Chagin void
linux_thread_dtor(struct thread * td)3174815f175SKonstantin Belousov linux_thread_dtor(struct thread *td)
318ad2056f2SAlexander Leidinger {
319ad2056f2SAlexander Leidinger struct linux_emuldata *em;
320ad2056f2SAlexander Leidinger
32181338031SDmitry Chagin em = em_find(td);
32281338031SDmitry Chagin if (em == NULL)
32381338031SDmitry Chagin return;
32481338031SDmitry Chagin td->td_emuldata = NULL;
325ad2056f2SAlexander Leidinger
3267d96520bSDmitry Chagin LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
327ad2056f2SAlexander Leidinger
328*b834497cSDmitry Chagin free(em, M_LINUX);
329ad2056f2SAlexander Leidinger }
330ad2056f2SAlexander Leidinger
331ad2056f2SAlexander Leidinger void
linux_schedtail(struct thread * td)332e5d81ef1SDmitry Chagin linux_schedtail(struct thread *td)
333ad2056f2SAlexander Leidinger {
334ad2056f2SAlexander Leidinger struct linux_emuldata *em;
33574a0e24fSMateusz Guzik #ifdef KTR
33674a0e24fSMateusz Guzik int error;
33774a0e24fSMateusz Guzik #else
33874a0e24fSMateusz Guzik int error __unused;
33974a0e24fSMateusz Guzik #endif
340ad2056f2SAlexander Leidinger int *child_set_tid;
341ad2056f2SAlexander Leidinger
34281338031SDmitry Chagin em = em_find(td);
343bc273677SDmitry Chagin KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
344ad2056f2SAlexander Leidinger child_set_tid = em->child_set_tid;
345ad2056f2SAlexander Leidinger
34619e252baSAlexander Leidinger if (child_set_tid != NULL) {
347e0327ddbSDmitry Chagin error = copyout(&em->em_tid, child_set_tid,
34881338031SDmitry Chagin sizeof(em->em_tid));
3497d96520bSDmitry Chagin LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
35081338031SDmitry Chagin td->td_tid, child_set_tid, em->em_tid, error);
35181338031SDmitry Chagin } else
3527d96520bSDmitry Chagin LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
353bb63fddeSAlexander Leidinger }
354