xref: /freebsd/sys/compat/linux/linux_emul.c (revision 214e3e09b3381e44bf5d9c1dcd19c4b1b923a796)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * Copyright (c) 2006 Roman Divacky
6  * All rights reserved.
7  * Copyright (c) 2013 Dmitry Chagin <dchagin@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/cdefs.h>
32 #include <sys/param.h>
33 #include <sys/fcntl.h>
34 #include <sys/imgact.h>
35 #include <sys/ktr.h>
36 #include <sys/lock.h>
37 #include <sys/malloc.h>
38 #include <sys/mutex.h>
39 #include <sys/proc.h>
40 #include <sys/resourcevar.h>
41 #include <sys/sx.h>
42 #include <sys/syscallsubr.h>
43 #include <sys/sysent.h>
44 
45 #include <compat/linux/linux_emul.h>
46 #include <compat/linux/linux_mib.h>
47 #include <compat/linux/linux_misc.h>
48 #include <compat/linux/linux_persona.h>
49 #include <compat/linux/linux_util.h>
50 
51 #if BYTE_ORDER == LITTLE_ENDIAN
52 #define SHELLMAGIC	0x2123 /* #! */
53 #else
54 #define SHELLMAGIC	0x2321
55 #endif
56 
57 /*
58  * This returns reference to the thread emuldata entry (if found)
59  *
60  * Hold PROC_LOCK when referencing emuldata from other threads.
61  */
62 struct linux_emuldata *
63 em_find(struct thread *td)
64 {
65 	struct linux_emuldata *em;
66 
67 	em = td->td_emuldata;
68 
69 	return (em);
70 }
71 
72 /*
73  * This returns reference to the proc pemuldata entry (if found)
74  *
75  * Hold PROC_LOCK when referencing proc pemuldata from other threads.
76  * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
77  */
78 struct linux_pemuldata *
79 pem_find(struct proc *p)
80 {
81 	struct linux_pemuldata *pem;
82 
83 	pem = p->p_emuldata;
84 
85 	return (pem);
86 }
87 
88 /*
89  * Linux apps generally expect the soft open file limit to be set
90  * to 1024, often iterating over all the file descriptors up to that
91  * limit instead of using closefrom(2).  Give them what they want,
92  * unless there already is a resource limit in place.
93  */
94 static void
95 linux_set_default_openfiles(struct thread *td, struct proc *p)
96 {
97 	struct rlimit rlim;
98 	int error __diagused;
99 
100 	if (linux_default_openfiles < 0)
101 		return;
102 
103 	PROC_LOCK(p);
104 	lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim);
105 	PROC_UNLOCK(p);
106 	if (rlim.rlim_cur != rlim.rlim_max ||
107 	    rlim.rlim_cur <= linux_default_openfiles)
108 		return;
109 	rlim.rlim_cur = linux_default_openfiles;
110 	error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim);
111 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
112 }
113 
114 /*
115  * The default stack size limit in Linux is 8MB.
116  */
117 static void
118 linux_set_default_stacksize(struct thread *td, struct proc *p)
119 {
120 	struct rlimit rlim;
121 	int error __diagused;
122 
123 	if (linux_default_stacksize < 0)
124 		return;
125 
126 	PROC_LOCK(p);
127 	lim_rlimit_proc(p, RLIMIT_STACK, &rlim);
128 	PROC_UNLOCK(p);
129 	if (rlim.rlim_cur != rlim.rlim_max ||
130 	    rlim.rlim_cur <= linux_default_stacksize)
131 		return;
132 	rlim.rlim_cur = linux_default_stacksize;
133 	error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim);
134 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
135 }
136 
137 void
138 linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread)
139 {
140 	struct linux_emuldata *em;
141 	struct linux_pemuldata *pem;
142 	struct proc *p;
143 
144 	if (newtd != NULL) {
145 		p = newtd->td_proc;
146 
147 		/* non-exec call */
148 		em = malloc(sizeof(*em), M_LINUX, M_WAITOK | M_ZERO);
149 		if (init_thread) {
150 			LINUX_CTR1(proc_init, "thread newtd(%d)",
151 			    newtd->td_tid);
152 
153 			em->em_tid = newtd->td_tid;
154 		} else {
155 			LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
156 
157 			em->em_tid = p->p_pid;
158 
159 			pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
160 			sx_init(&pem->pem_sx, "lpemlk");
161 			p->p_emuldata = pem;
162 		}
163 		newtd->td_emuldata = em;
164 
165 		linux_set_default_openfiles(td, p);
166 		linux_set_default_stacksize(td, p);
167 	} else {
168 		p = td->td_proc;
169 
170 		/* exec */
171 		LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
172 
173 		/* lookup the old one */
174 		em = em_find(td);
175 		KASSERT(em != NULL, ("proc_init: thread emuldata not found.\n"));
176 
177 		em->em_tid = p->p_pid;
178 		em->flags = 0;
179 		em->robust_futexes = NULL;
180 		em->child_clear_tid = NULL;
181 		em->child_set_tid = NULL;
182 
183 		pem = pem_find(p);
184 		KASSERT(pem != NULL, ("proc_init: proc emuldata not found.\n"));
185 		pem->persona = 0;
186 		pem->oom_score_adj = 0;
187 	}
188 }
189 
190 void
191 linux_on_exit(struct proc *p)
192 {
193 	struct linux_pemuldata *pem;
194 	struct thread *td = curthread;
195 
196 	MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
197 
198 	LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
199 	    td->td_tid, p->p_pid, p);
200 
201 	pem = pem_find(p);
202 	if (pem == NULL)
203 		return;
204 	(p->p_sysent->sv_thread_detach)(td);
205 
206 	p->p_emuldata = NULL;
207 
208 	sx_destroy(&pem->pem_sx);
209 	free(pem, M_LINUX);
210 }
211 
212 int
213 linux_common_execve(struct thread *td, struct image_args *eargs)
214 {
215 	struct linux_pemuldata *pem;
216 	struct vmspace *oldvmspace;
217 	struct linux_emuldata *em;
218 	struct proc *p;
219 	int error;
220 
221 	p = td->td_proc;
222 
223 	error = pre_execve(td, &oldvmspace);
224 	if (error != 0)
225 		return (error);
226 
227 	error = kern_execve(td, eargs, NULL, oldvmspace);
228 	post_execve(td, error, oldvmspace);
229 	if (error != EJUSTRETURN)
230 		return (error);
231 
232 	/*
233 	 * In a case of transition from Linux binary execing to
234 	 * FreeBSD binary we destroy Linux emuldata thread & proc entries.
235 	 */
236 	if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
237 
238 		/* Clear ABI root directory if set. */
239 		linux_pwd_onexec_native(td);
240 
241 		PROC_LOCK(p);
242 		em = em_find(td);
243 		KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
244 		td->td_emuldata = NULL;
245 
246 		pem = pem_find(p);
247 		KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
248 		p->p_emuldata = NULL;
249 		PROC_UNLOCK(p);
250 
251 		free(em, M_LINUX);
252 		free(pem, M_LINUX);
253 	}
254 	return (EJUSTRETURN);
255 }
256 
257 int
258 linux_on_exec(struct proc *p, struct image_params *imgp)
259 {
260 	struct thread *td;
261 	struct thread *othertd;
262 #if defined(__amd64__)
263 	struct linux_pemuldata *pem;
264 #endif
265 	int error;
266 
267 	td = curthread;
268 	MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
269 
270 	/*
271 	 * When execing to Linux binary, we create Linux emuldata
272 	 * thread entry.
273 	 */
274 	if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
275 		/*
276 		 * Process already was under Linuxolator
277 		 * before exec.  Update emuldata to reflect
278 		 * single-threaded cleaned state after exec.
279 		 */
280 		linux_proc_init(td, NULL, false);
281 	} else {
282 		/*
283 		 * We are switching the process to Linux emulator.
284 		 */
285 		linux_proc_init(td, td, false);
286 
287 		/*
288 		 * Create a transient td_emuldata for all suspended
289 		 * threads, so that p->p_sysent->sv_thread_detach() ==
290 		 * linux_thread_detach() can find expected but unused
291 		 * emuldata.
292 		 */
293 		FOREACH_THREAD_IN_PROC(td->td_proc, othertd) {
294 			if (othertd == td)
295 				continue;
296 			linux_proc_init(td, othertd, true);
297 		}
298 
299 		/* Set ABI root directory. */
300 		if ((error = linux_pwd_onexec(td)) != 0)
301 			return (error);
302 	}
303 #if defined(__amd64__)
304 	/*
305 	 * An IA32 executable which has executable stack will have the
306 	 * READ_IMPLIES_EXEC personality flag set automatically.
307 	 */
308 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
309 	    imgp->stack_prot & VM_PROT_EXECUTE) {
310 		pem = pem_find(p);
311 		pem->persona |= LINUX_READ_IMPLIES_EXEC;
312 	}
313 #endif
314 	return (0);
315 }
316 
317 void
318 linux_thread_dtor(struct thread *td)
319 {
320 	struct linux_emuldata *em;
321 
322 	em = em_find(td);
323 	if (em == NULL)
324 		return;
325 	td->td_emuldata = NULL;
326 
327 	LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
328 
329 	free(em, M_LINUX);
330 }
331 
332 void
333 linux_schedtail(struct thread *td)
334 {
335 	struct linux_emuldata *em;
336 #ifdef KTR
337 	int error;
338 #else
339 	int error __unused;
340 #endif
341 	int *child_set_tid;
342 
343 	em = em_find(td);
344 	KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
345 	child_set_tid = em->child_set_tid;
346 
347 	if (child_set_tid != NULL) {
348 		error = copyout(&em->em_tid, child_set_tid,
349 		    sizeof(em->em_tid));
350 		LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
351 		    td->td_tid, child_set_tid, em->em_tid, error);
352 	} else
353 		LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
354 }
355