xref: /freebsd/sys/compat/linux/linux_emul.c (revision 3460fab5fced39c7ea597cc7de0ebc3e4c88989a)
1  /*-
2   * SPDX-License-Identifier: BSD-2-Clause
3   *
4   * Copyright (c) 1994-1996 Søren Schmidt
5   * Copyright (c) 2006 Roman Divacky
6   * All rights reserved.
7   * Copyright (c) 2013 Dmitry Chagin <dchagin@FreeBSD.org>
8   *
9   * Redistribution and use in source and binary forms, with or without
10   * modification, are permitted provided that the following conditions
11   * are met:
12   * 1. Redistributions of source code must retain the above copyright
13   *    notice, this list of conditions and the following disclaimer.
14   * 2. Redistributions in binary form must reproduce the above copyright
15   *    notice, this list of conditions and the following disclaimer in the
16   *    documentation and/or other materials provided with the distribution.
17   *
18   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21   * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28   * SUCH DAMAGE.
29   */
30  
31  #include <sys/param.h>
32  #include <sys/fcntl.h>
33  #include <sys/imgact.h>
34  #include <sys/ktr.h>
35  #include <sys/lock.h>
36  #include <sys/malloc.h>
37  #include <sys/mutex.h>
38  #include <sys/proc.h>
39  #include <sys/resourcevar.h>
40  #include <sys/sx.h>
41  #include <sys/syscallsubr.h>
42  #include <sys/sysent.h>
43  
44  #include <compat/linux/linux_emul.h>
45  #include <compat/linux/linux_mib.h>
46  #include <compat/linux/linux_misc.h>
47  #include <compat/linux/linux_persona.h>
48  #include <compat/linux/linux_util.h>
49  
50  #if BYTE_ORDER == LITTLE_ENDIAN
51  #define SHELLMAGIC	0x2123 /* #! */
52  #else
53  #define SHELLMAGIC	0x2321
54  #endif
55  
56  /*
57   * This returns reference to the thread emuldata entry (if found)
58   *
59   * Hold PROC_LOCK when referencing emuldata from other threads.
60   */
61  struct linux_emuldata *
em_find(struct thread * td)62  em_find(struct thread *td)
63  {
64  	struct linux_emuldata *em;
65  
66  	em = td->td_emuldata;
67  
68  	return (em);
69  }
70  
71  /*
72   * This returns reference to the proc pemuldata entry (if found)
73   *
74   * Hold PROC_LOCK when referencing proc pemuldata from other threads.
75   * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
76   */
77  struct linux_pemuldata *
pem_find(struct proc * p)78  pem_find(struct proc *p)
79  {
80  	struct linux_pemuldata *pem;
81  
82  	pem = p->p_emuldata;
83  
84  	return (pem);
85  }
86  
87  /*
88   * Linux apps generally expect the soft open file limit to be set
89   * to 1024, often iterating over all the file descriptors up to that
90   * limit instead of using closefrom(2).  Give them what they want,
91   * unless there already is a resource limit in place.
92   */
93  static void
linux_set_default_openfiles(struct thread * td,struct proc * p)94  linux_set_default_openfiles(struct thread *td, struct proc *p)
95  {
96  	struct rlimit rlim;
97  	int error __diagused;
98  
99  	if (linux_default_openfiles < 0)
100  		return;
101  
102  	PROC_LOCK(p);
103  	lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim);
104  	PROC_UNLOCK(p);
105  	if (rlim.rlim_cur != rlim.rlim_max ||
106  	    rlim.rlim_cur <= linux_default_openfiles)
107  		return;
108  	rlim.rlim_cur = linux_default_openfiles;
109  	error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim);
110  	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
111  }
112  
113  /*
114   * The default stack size limit in Linux is 8MB.
115   */
116  static void
linux_set_default_stacksize(struct thread * td,struct proc * p)117  linux_set_default_stacksize(struct thread *td, struct proc *p)
118  {
119  	struct rlimit rlim;
120  	int error __diagused;
121  
122  	if (linux_default_stacksize < 0)
123  		return;
124  
125  	PROC_LOCK(p);
126  	lim_rlimit_proc(p, RLIMIT_STACK, &rlim);
127  	PROC_UNLOCK(p);
128  	if (rlim.rlim_cur != rlim.rlim_max ||
129  	    rlim.rlim_cur <= linux_default_stacksize)
130  		return;
131  	rlim.rlim_cur = linux_default_stacksize;
132  	error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim);
133  	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
134  }
135  
136  void
linux_proc_init(struct thread * td,struct thread * newtd,bool init_thread)137  linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread)
138  {
139  	struct linux_emuldata *em;
140  	struct linux_pemuldata *pem;
141  	struct proc *p;
142  
143  	if (newtd != NULL) {
144  		p = newtd->td_proc;
145  
146  		/* non-exec call */
147  		em = malloc(sizeof(*em), M_LINUX, M_WAITOK | M_ZERO);
148  		if (init_thread) {
149  			LINUX_CTR1(proc_init, "thread newtd(%d)",
150  			    newtd->td_tid);
151  
152  			em->em_tid = newtd->td_tid;
153  		} else {
154  			LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
155  
156  			em->em_tid = p->p_pid;
157  
158  			pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
159  			sx_init(&pem->pem_sx, "lpemlk");
160  			p->p_emuldata = pem;
161  		}
162  		newtd->td_emuldata = em;
163  
164  		linux_set_default_openfiles(td, p);
165  		linux_set_default_stacksize(td, p);
166  	} else {
167  		p = td->td_proc;
168  
169  		/* exec */
170  		LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
171  
172  		/* lookup the old one */
173  		em = em_find(td);
174  		KASSERT(em != NULL, ("proc_init: thread emuldata not found.\n"));
175  
176  		em->em_tid = p->p_pid;
177  		em->flags = 0;
178  		em->robust_futexes = NULL;
179  		em->child_clear_tid = NULL;
180  		em->child_set_tid = NULL;
181  
182  		pem = pem_find(p);
183  		KASSERT(pem != NULL, ("proc_init: proc emuldata not found.\n"));
184  		pem->persona = 0;
185  		pem->oom_score_adj = 0;
186  	}
187  }
188  
189  void
linux_on_exit(struct proc * p)190  linux_on_exit(struct proc *p)
191  {
192  	struct linux_pemuldata *pem;
193  	struct thread *td = curthread;
194  
195  	MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
196  
197  	LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
198  	    td->td_tid, p->p_pid, p);
199  
200  	pem = pem_find(p);
201  	if (pem == NULL)
202  		return;
203  	(p->p_sysent->sv_thread_detach)(td);
204  
205  	p->p_emuldata = NULL;
206  
207  	sx_destroy(&pem->pem_sx);
208  	free(pem, M_LINUX);
209  }
210  
211  int
linux_common_execve(struct thread * td,struct image_args * eargs)212  linux_common_execve(struct thread *td, struct image_args *eargs)
213  {
214  	struct linux_pemuldata *pem;
215  	struct vmspace *oldvmspace;
216  	struct linux_emuldata *em;
217  	struct proc *p;
218  	int error;
219  
220  	p = td->td_proc;
221  
222  	error = pre_execve(td, &oldvmspace);
223  	if (error != 0)
224  		return (error);
225  
226  	error = kern_execve(td, eargs, NULL, oldvmspace);
227  	post_execve(td, error, oldvmspace);
228  	if (error != EJUSTRETURN)
229  		return (error);
230  
231  	/*
232  	 * In a case of transition from Linux binary execing to
233  	 * FreeBSD binary we destroy Linux emuldata thread & proc entries.
234  	 */
235  	if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
236  
237  		/* Clear ABI root directory if set. */
238  		linux_pwd_onexec_native(td);
239  
240  		PROC_LOCK(p);
241  		em = em_find(td);
242  		KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
243  		td->td_emuldata = NULL;
244  
245  		pem = pem_find(p);
246  		KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
247  		p->p_emuldata = NULL;
248  		PROC_UNLOCK(p);
249  
250  		free(em, M_LINUX);
251  		free(pem, M_LINUX);
252  	}
253  	return (EJUSTRETURN);
254  }
255  
256  int
linux_on_exec(struct proc * p,struct image_params * imgp)257  linux_on_exec(struct proc *p, struct image_params *imgp)
258  {
259  	struct thread *td;
260  	struct thread *othertd;
261  #if defined(__amd64__)
262  	struct linux_pemuldata *pem;
263  #endif
264  	int error;
265  
266  	td = curthread;
267  	MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
268  
269  	/*
270  	 * When execing to Linux binary, we create Linux emuldata
271  	 * thread entry.
272  	 */
273  	if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
274  		/*
275  		 * Process already was under Linuxolator
276  		 * before exec.  Update emuldata to reflect
277  		 * single-threaded cleaned state after exec.
278  		 */
279  		linux_proc_init(td, NULL, false);
280  	} else {
281  		/*
282  		 * We are switching the process to Linux emulator.
283  		 */
284  		linux_proc_init(td, td, false);
285  
286  		/*
287  		 * Create a transient td_emuldata for all suspended
288  		 * threads, so that p->p_sysent->sv_thread_detach() ==
289  		 * linux_thread_detach() can find expected but unused
290  		 * emuldata.
291  		 */
292  		FOREACH_THREAD_IN_PROC(td->td_proc, othertd) {
293  			if (othertd == td)
294  				continue;
295  			linux_proc_init(td, othertd, true);
296  		}
297  
298  		/* Set ABI root directory. */
299  		if ((error = linux_pwd_onexec(td)) != 0)
300  			return (error);
301  	}
302  #if defined(__amd64__)
303  	/*
304  	 * An IA32 executable which has executable stack will have the
305  	 * READ_IMPLIES_EXEC personality flag set automatically.
306  	 */
307  	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
308  	    imgp->stack_prot & VM_PROT_EXECUTE) {
309  		pem = pem_find(p);
310  		pem->persona |= LINUX_READ_IMPLIES_EXEC;
311  	}
312  #endif
313  	return (0);
314  }
315  
316  void
linux_thread_dtor(struct thread * td)317  linux_thread_dtor(struct thread *td)
318  {
319  	struct linux_emuldata *em;
320  
321  	em = em_find(td);
322  	if (em == NULL)
323  		return;
324  	td->td_emuldata = NULL;
325  
326  	LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
327  
328  	free(em, M_LINUX);
329  }
330  
331  void
linux_schedtail(struct thread * td)332  linux_schedtail(struct thread *td)
333  {
334  	struct linux_emuldata *em;
335  #ifdef KTR
336  	int error;
337  #else
338  	int error __unused;
339  #endif
340  	int *child_set_tid;
341  
342  	em = em_find(td);
343  	KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
344  	child_set_tid = em->child_set_tid;
345  
346  	if (child_set_tid != NULL) {
347  		error = copyout(&em->em_tid, child_set_tid,
348  		    sizeof(em->em_tid));
349  		LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
350  		    td->td_tid, child_set_tid, em->em_tid, error);
351  	} else
352  		LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
353  }
354