xref: /freebsd/sys/compat/linux/linux_emul.c (revision cd8537910406e68d4719136a5b0cf6d23bb1b23b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * Copyright (c) 2006 Roman Divacky
6  * Copyright (c) 2013 Dmitry Chagin
7  * All rights reserved.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/fcntl.h>
37 #include <sys/imgact.h>
38 #include <sys/kernel.h>
39 #include <sys/ktr.h>
40 #include <sys/lock.h>
41 #include <sys/malloc.h>
42 #include <sys/mutex.h>
43 #include <sys/sx.h>
44 #include <sys/proc.h>
45 #include <sys/resourcevar.h>
46 #include <sys/syscallsubr.h>
47 #include <sys/sysent.h>
48 
49 #include <compat/linux/linux_emul.h>
50 #include <compat/linux/linux_mib.h>
51 #include <compat/linux/linux_misc.h>
52 #include <compat/linux/linux_persona.h>
53 #include <compat/linux/linux_util.h>
54 
55 #if BYTE_ORDER == LITTLE_ENDIAN
56 #define SHELLMAGIC	0x2123 /* #! */
57 #else
58 #define SHELLMAGIC	0x2321
59 #endif
60 
61 /*
62  * This returns reference to the thread emuldata entry (if found)
63  *
64  * Hold PROC_LOCK when referencing emuldata from other threads.
65  */
66 struct linux_emuldata *
67 em_find(struct thread *td)
68 {
69 	struct linux_emuldata *em;
70 
71 	em = td->td_emuldata;
72 
73 	return (em);
74 }
75 
76 /*
77  * This returns reference to the proc pemuldata entry (if found)
78  *
79  * Hold PROC_LOCK when referencing proc pemuldata from other threads.
80  * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
81  */
82 struct linux_pemuldata *
83 pem_find(struct proc *p)
84 {
85 	struct linux_pemuldata *pem;
86 
87 	pem = p->p_emuldata;
88 
89 	return (pem);
90 }
91 
92 /*
93  * Linux apps generally expect the soft open file limit to be set
94  * to 1024, often iterating over all the file descriptors up to that
95  * limit instead of using closefrom(2).  Give them what they want,
96  * unless there already is a resource limit in place.
97  */
98 static void
99 linux_set_default_openfiles(struct thread *td, struct proc *p)
100 {
101 	struct rlimit rlim;
102 	int error;
103 
104 	if (linux_default_openfiles < 0)
105 		return;
106 
107 	PROC_LOCK(p);
108 	lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim);
109 	PROC_UNLOCK(p);
110 	if (rlim.rlim_cur != rlim.rlim_max ||
111 	    rlim.rlim_cur <= linux_default_openfiles)
112 		return;
113 	rlim.rlim_cur = linux_default_openfiles;
114 	error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim);
115 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
116 }
117 
118 /*
119  * The default stack size limit in Linux is 8MB.
120  */
121 static void
122 linux_set_default_stacksize(struct thread *td, struct proc *p)
123 {
124 	struct rlimit rlim;
125 	int error;
126 
127 	if (linux_default_stacksize < 0)
128 		return;
129 
130 	PROC_LOCK(p);
131 	lim_rlimit_proc(p, RLIMIT_STACK, &rlim);
132 	PROC_UNLOCK(p);
133 	if (rlim.rlim_cur != rlim.rlim_max ||
134 	    rlim.rlim_cur <= linux_default_stacksize)
135 		return;
136 	rlim.rlim_cur = linux_default_stacksize;
137 	error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim);
138 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
139 }
140 
141 void
142 linux_proc_init(struct thread *td, struct thread *newtd, int flags)
143 {
144 	struct linux_emuldata *em;
145 	struct linux_pemuldata *pem;
146 	struct epoll_emuldata *emd;
147 	struct proc *p;
148 
149 	if (newtd != NULL) {
150 		p = newtd->td_proc;
151 
152 		/* non-exec call */
153 		em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO);
154 		if (flags & LINUX_CLONE_THREAD) {
155 			LINUX_CTR1(proc_init, "thread newtd(%d)",
156 			    newtd->td_tid);
157 
158 			em->em_tid = newtd->td_tid;
159 		} else {
160 			LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
161 
162 			em->em_tid = p->p_pid;
163 
164 			pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
165 			sx_init(&pem->pem_sx, "lpemlk");
166 			p->p_emuldata = pem;
167 		}
168 		newtd->td_emuldata = em;
169 
170 		linux_set_default_openfiles(td, p);
171 		linux_set_default_stacksize(td, p);
172 	} else {
173 		p = td->td_proc;
174 
175 		/* exec */
176 		LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
177 
178 		/* lookup the old one */
179 		em = em_find(td);
180 		KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
181 
182 		em->em_tid = p->p_pid;
183 		em->flags = 0;
184 		em->robust_futexes = NULL;
185 		em->child_clear_tid = NULL;
186 		em->child_set_tid = NULL;
187 
188 		 /* epoll should be destroyed in a case of exec. */
189 		pem = pem_find(p);
190 		KASSERT(pem != NULL, ("proc_exit: proc emuldata not found.\n"));
191 		pem->persona = 0;
192 		if (pem->epoll != NULL) {
193 			emd = pem->epoll;
194 			pem->epoll = NULL;
195 			free(emd, M_EPOLL);
196 		}
197 	}
198 
199 }
200 
201 void
202 linux_on_exit(struct proc *p)
203 {
204 	struct linux_pemuldata *pem;
205 	struct epoll_emuldata *emd;
206 	struct thread *td = curthread;
207 
208 	MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
209 
210 	LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
211 	    td->td_tid, p->p_pid, p);
212 
213 	pem = pem_find(p);
214 	if (pem == NULL)
215 		return;
216 	(p->p_sysent->sv_thread_detach)(td);
217 
218 	p->p_emuldata = NULL;
219 
220 	if (pem->epoll != NULL) {
221 		emd = pem->epoll;
222 		pem->epoll = NULL;
223 		free(emd, M_EPOLL);
224 	}
225 
226 	sx_destroy(&pem->pem_sx);
227 	free(pem, M_LINUX);
228 }
229 
230 /*
231  * If a Linux binary is exec'ing something, try this image activator
232  * first.  We override standard shell script execution in order to
233  * be able to modify the interpreter path.  We only do this if a Linux
234  * binary is doing the exec, so we do not create an EXEC module for it.
235  */
236 int
237 linux_exec_imgact_try(struct image_params *imgp)
238 {
239 	const char *head = (const char *)imgp->image_header;
240 	char *rpath;
241 	int error = -1;
242 
243 	/*
244 	 * The interpreter for shell scripts run from a Linux binary needs
245 	 * to be located in /compat/linux if possible in order to recursively
246 	 * maintain Linux path emulation.
247 	 */
248 	if (((const short *)head)[0] == SHELLMAGIC) {
249 		/*
250 		 * Run our normal shell image activator.  If it succeeds attempt
251 		 * to use the alternate path for the interpreter.  If an
252 		 * alternate path is found, use our stringspace to store it.
253 		 */
254 		if ((error = exec_shell_imgact(imgp)) == 0) {
255 			linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc),
256 			    imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0,
257 			    AT_FDCWD);
258 			if (rpath != NULL)
259 				imgp->args->fname_buf =
260 				    imgp->interpreter_name = rpath;
261 		}
262 	}
263 	return (error);
264 }
265 
266 int
267 linux_common_execve(struct thread *td, struct image_args *eargs)
268 {
269 	struct linux_pemuldata *pem;
270 	struct epoll_emuldata *emd;
271 	struct vmspace *oldvmspace;
272 	struct linux_emuldata *em;
273 	struct proc *p;
274 	int error;
275 
276 	p = td->td_proc;
277 
278 	error = pre_execve(td, &oldvmspace);
279 	if (error != 0)
280 		return (error);
281 
282 	error = kern_execve(td, eargs, NULL, oldvmspace);
283 	post_execve(td, error, oldvmspace);
284 	if (error != EJUSTRETURN)
285 		return (error);
286 
287 	/*
288 	 * In a case of transition from Linux binary execing to
289 	 * FreeBSD binary we destroy Linux emuldata thread & proc entries.
290 	 */
291 	if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
292 		PROC_LOCK(p);
293 		em = em_find(td);
294 		KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
295 		td->td_emuldata = NULL;
296 
297 		pem = pem_find(p);
298 		KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
299 		p->p_emuldata = NULL;
300 		PROC_UNLOCK(p);
301 
302 		if (pem->epoll != NULL) {
303 			emd = pem->epoll;
304 			pem->epoll = NULL;
305 			free(emd, M_EPOLL);
306 		}
307 
308 		free(em, M_TEMP);
309 		free(pem, M_LINUX);
310 	}
311 	return (EJUSTRETURN);
312 }
313 
314 void
315 linux_on_exec(struct proc *p, struct image_params *imgp)
316 {
317 	struct thread *td;
318 	struct thread *othertd;
319 #if defined(__amd64__)
320 	struct linux_pemuldata *pem;
321 #endif
322 
323 	td = curthread;
324 	MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
325 
326 	/*
327 	 * When execing to Linux binary, we create Linux emuldata
328 	 * thread entry.
329 	 */
330 	if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
331 		/*
332 		 * Process already was under Linuxolator
333 		 * before exec.  Update emuldata to reflect
334 		 * single-threaded cleaned state after exec.
335 		 */
336 		linux_proc_init(td, NULL, 0);
337 	} else {
338 		/*
339 		 * We are switching the process to Linux emulator.
340 		 */
341 		linux_proc_init(td, td, 0);
342 
343 		/*
344 		 * Create a transient td_emuldata for all suspended
345 		 * threads, so that p->p_sysent->sv_thread_detach() ==
346 		 * linux_thread_detach() can find expected but unused
347 		 * emuldata.
348 		 */
349 		FOREACH_THREAD_IN_PROC(td->td_proc, othertd) {
350 			if (othertd == td)
351 				continue;
352 			linux_proc_init(td, othertd, LINUX_CLONE_THREAD);
353 		}
354 	}
355 #if defined(__amd64__)
356 	/*
357 	 * An IA32 executable which has executable stack will have the
358 	 * READ_IMPLIES_EXEC personality flag set automatically.
359 	 */
360 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
361 	    imgp->stack_prot & VM_PROT_EXECUTE) {
362 		pem = pem_find(p);
363 		pem->persona |= LINUX_READ_IMPLIES_EXEC;
364 	}
365 #endif
366 }
367 
368 void
369 linux_thread_dtor(struct thread *td)
370 {
371 	struct linux_emuldata *em;
372 
373 	em = em_find(td);
374 	if (em == NULL)
375 		return;
376 	td->td_emuldata = NULL;
377 
378 	LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
379 
380 	free(em, M_TEMP);
381 }
382 
383 void
384 linux_schedtail(struct thread *td)
385 {
386 	struct linux_emuldata *em;
387 	struct proc *p;
388 	int error = 0;
389 	int *child_set_tid;
390 
391 	p = td->td_proc;
392 
393 	em = em_find(td);
394 	KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
395 	child_set_tid = em->child_set_tid;
396 
397 	if (child_set_tid != NULL) {
398 		error = copyout(&em->em_tid, child_set_tid,
399 		    sizeof(em->em_tid));
400 		LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
401 		    td->td_tid, child_set_tid, em->em_tid, error);
402 	} else
403 		LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
404 }
405