xref: /freebsd/sys/compat/linux/linux_emul.c (revision bdd1243df58e60e85101c09001d9812a789b6bc4)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * Copyright (c) 2006 Roman Divacky
6  * All rights reserved.
7  * Copyright (c) 2013 Dmitry Chagin <dchagin@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <sys/param.h>
35 #include <sys/fcntl.h>
36 #include <sys/imgact.h>
37 #include <sys/ktr.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/proc.h>
42 #include <sys/resourcevar.h>
43 #include <sys/sx.h>
44 #include <sys/syscallsubr.h>
45 #include <sys/sysent.h>
46 
47 #include <compat/linux/linux_emul.h>
48 #include <compat/linux/linux_mib.h>
49 #include <compat/linux/linux_misc.h>
50 #include <compat/linux/linux_persona.h>
51 #include <compat/linux/linux_util.h>
52 
53 #if BYTE_ORDER == LITTLE_ENDIAN
54 #define SHELLMAGIC	0x2123 /* #! */
55 #else
56 #define SHELLMAGIC	0x2321
57 #endif
58 
59 /*
60  * This returns reference to the thread emuldata entry (if found)
61  *
62  * Hold PROC_LOCK when referencing emuldata from other threads.
63  */
64 struct linux_emuldata *
65 em_find(struct thread *td)
66 {
67 	struct linux_emuldata *em;
68 
69 	em = td->td_emuldata;
70 
71 	return (em);
72 }
73 
74 /*
75  * This returns reference to the proc pemuldata entry (if found)
76  *
77  * Hold PROC_LOCK when referencing proc pemuldata from other threads.
78  * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
79  */
80 struct linux_pemuldata *
81 pem_find(struct proc *p)
82 {
83 	struct linux_pemuldata *pem;
84 
85 	pem = p->p_emuldata;
86 
87 	return (pem);
88 }
89 
90 /*
91  * Linux apps generally expect the soft open file limit to be set
92  * to 1024, often iterating over all the file descriptors up to that
93  * limit instead of using closefrom(2).  Give them what they want,
94  * unless there already is a resource limit in place.
95  */
96 static void
97 linux_set_default_openfiles(struct thread *td, struct proc *p)
98 {
99 	struct rlimit rlim;
100 	int error __diagused;
101 
102 	if (linux_default_openfiles < 0)
103 		return;
104 
105 	PROC_LOCK(p);
106 	lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim);
107 	PROC_UNLOCK(p);
108 	if (rlim.rlim_cur != rlim.rlim_max ||
109 	    rlim.rlim_cur <= linux_default_openfiles)
110 		return;
111 	rlim.rlim_cur = linux_default_openfiles;
112 	error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim);
113 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
114 }
115 
116 /*
117  * The default stack size limit in Linux is 8MB.
118  */
119 static void
120 linux_set_default_stacksize(struct thread *td, struct proc *p)
121 {
122 	struct rlimit rlim;
123 	int error __diagused;
124 
125 	if (linux_default_stacksize < 0)
126 		return;
127 
128 	PROC_LOCK(p);
129 	lim_rlimit_proc(p, RLIMIT_STACK, &rlim);
130 	PROC_UNLOCK(p);
131 	if (rlim.rlim_cur != rlim.rlim_max ||
132 	    rlim.rlim_cur <= linux_default_stacksize)
133 		return;
134 	rlim.rlim_cur = linux_default_stacksize;
135 	error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim);
136 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
137 }
138 
139 void
140 linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread)
141 {
142 	struct linux_emuldata *em;
143 	struct linux_pemuldata *pem;
144 	struct proc *p;
145 
146 	if (newtd != NULL) {
147 		p = newtd->td_proc;
148 
149 		/* non-exec call */
150 		em = malloc(sizeof(*em), M_TEMP, M_WAITOK | M_ZERO);
151 		if (init_thread) {
152 			LINUX_CTR1(proc_init, "thread newtd(%d)",
153 			    newtd->td_tid);
154 
155 			em->em_tid = newtd->td_tid;
156 		} else {
157 			LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
158 
159 			em->em_tid = p->p_pid;
160 
161 			pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
162 			sx_init(&pem->pem_sx, "lpemlk");
163 			p->p_emuldata = pem;
164 		}
165 		newtd->td_emuldata = em;
166 
167 		linux_set_default_openfiles(td, p);
168 		linux_set_default_stacksize(td, p);
169 	} else {
170 		p = td->td_proc;
171 
172 		/* exec */
173 		LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
174 
175 		/* lookup the old one */
176 		em = em_find(td);
177 		KASSERT(em != NULL, ("proc_init: thread emuldata not found.\n"));
178 
179 		em->em_tid = p->p_pid;
180 		em->flags = 0;
181 		em->robust_futexes = NULL;
182 		em->child_clear_tid = NULL;
183 		em->child_set_tid = NULL;
184 
185 		pem = pem_find(p);
186 		KASSERT(pem != NULL, ("proc_init: proc emuldata not found.\n"));
187 		pem->persona = 0;
188 		pem->oom_score_adj = 0;
189 	}
190 }
191 
192 void
193 linux_on_exit(struct proc *p)
194 {
195 	struct linux_pemuldata *pem;
196 	struct thread *td = curthread;
197 
198 	MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
199 
200 	LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
201 	    td->td_tid, p->p_pid, p);
202 
203 	pem = pem_find(p);
204 	if (pem == NULL)
205 		return;
206 	(p->p_sysent->sv_thread_detach)(td);
207 
208 	p->p_emuldata = NULL;
209 
210 	sx_destroy(&pem->pem_sx);
211 	free(pem, M_LINUX);
212 }
213 
214 int
215 linux_common_execve(struct thread *td, struct image_args *eargs)
216 {
217 	struct linux_pemuldata *pem;
218 	struct vmspace *oldvmspace;
219 	struct linux_emuldata *em;
220 	struct proc *p;
221 	int error;
222 
223 	p = td->td_proc;
224 
225 	error = pre_execve(td, &oldvmspace);
226 	if (error != 0)
227 		return (error);
228 
229 	error = kern_execve(td, eargs, NULL, oldvmspace);
230 	post_execve(td, error, oldvmspace);
231 	if (error != EJUSTRETURN)
232 		return (error);
233 
234 	/*
235 	 * In a case of transition from Linux binary execing to
236 	 * FreeBSD binary we destroy Linux emuldata thread & proc entries.
237 	 */
238 	if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
239 
240 		/* Clear ABI root directory if set. */
241 		linux_pwd_onexec_native(td);
242 
243 		PROC_LOCK(p);
244 		em = em_find(td);
245 		KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
246 		td->td_emuldata = NULL;
247 
248 		pem = pem_find(p);
249 		KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
250 		p->p_emuldata = NULL;
251 		PROC_UNLOCK(p);
252 
253 		free(em, M_TEMP);
254 		free(pem, M_LINUX);
255 	}
256 	return (EJUSTRETURN);
257 }
258 
259 int
260 linux_on_exec(struct proc *p, struct image_params *imgp)
261 {
262 	struct thread *td;
263 	struct thread *othertd;
264 #if defined(__amd64__)
265 	struct linux_pemuldata *pem;
266 #endif
267 	int error;
268 
269 	td = curthread;
270 	MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
271 
272 	/*
273 	 * When execing to Linux binary, we create Linux emuldata
274 	 * thread entry.
275 	 */
276 	if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
277 		/*
278 		 * Process already was under Linuxolator
279 		 * before exec.  Update emuldata to reflect
280 		 * single-threaded cleaned state after exec.
281 		 */
282 		linux_proc_init(td, NULL, false);
283 	} else {
284 		/*
285 		 * We are switching the process to Linux emulator.
286 		 */
287 		linux_proc_init(td, td, false);
288 
289 		/*
290 		 * Create a transient td_emuldata for all suspended
291 		 * threads, so that p->p_sysent->sv_thread_detach() ==
292 		 * linux_thread_detach() can find expected but unused
293 		 * emuldata.
294 		 */
295 		FOREACH_THREAD_IN_PROC(td->td_proc, othertd) {
296 			if (othertd == td)
297 				continue;
298 			linux_proc_init(td, othertd, true);
299 		}
300 
301 		/* Set ABI root directory. */
302 		if ((error = linux_pwd_onexec(td)) != 0)
303 			return (error);
304 	}
305 #if defined(__amd64__)
306 	/*
307 	 * An IA32 executable which has executable stack will have the
308 	 * READ_IMPLIES_EXEC personality flag set automatically.
309 	 */
310 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
311 	    imgp->stack_prot & VM_PROT_EXECUTE) {
312 		pem = pem_find(p);
313 		pem->persona |= LINUX_READ_IMPLIES_EXEC;
314 	}
315 #endif
316 	return (0);
317 }
318 
319 void
320 linux_thread_dtor(struct thread *td)
321 {
322 	struct linux_emuldata *em;
323 
324 	em = em_find(td);
325 	if (em == NULL)
326 		return;
327 	td->td_emuldata = NULL;
328 
329 	LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
330 
331 	free(em, M_TEMP);
332 }
333 
334 void
335 linux_schedtail(struct thread *td)
336 {
337 	struct linux_emuldata *em;
338 #ifdef KTR
339 	int error;
340 #else
341 	int error __unused;
342 #endif
343 	int *child_set_tid;
344 
345 	em = em_find(td);
346 	KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
347 	child_set_tid = em->child_set_tid;
348 
349 	if (child_set_tid != NULL) {
350 		error = copyout(&em->em_tid, child_set_tid,
351 		    sizeof(em->em_tid));
352 		LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
353 		    td->td_tid, child_set_tid, em->em_tid, error);
354 	} else
355 		LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
356 }
357