xref: /freebsd/sys/compat/linux/linux_emul.c (revision aa1a8ff2d6dbc51ef058f46f3db5a8bb77967145)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause
3  *
4  * Copyright (c) 1994-1996 Søren Schmidt
5  * Copyright (c) 2006 Roman Divacky
6  * All rights reserved.
7  * Copyright (c) 2013 Dmitry Chagin <dchagin@FreeBSD.org>
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  */
30 
31 #include <sys/param.h>
32 #include <sys/fcntl.h>
33 #include <sys/imgact.h>
34 #include <sys/ktr.h>
35 #include <sys/lock.h>
36 #include <sys/malloc.h>
37 #include <sys/mutex.h>
38 #include <sys/proc.h>
39 #include <sys/resourcevar.h>
40 #include <sys/sx.h>
41 #include <sys/syscallsubr.h>
42 #include <sys/sysent.h>
43 
44 #include <compat/linux/linux_emul.h>
45 #include <compat/linux/linux_mib.h>
46 #include <compat/linux/linux_misc.h>
47 #include <compat/linux/linux_persona.h>
48 #include <compat/linux/linux_util.h>
49 
50 #if BYTE_ORDER == LITTLE_ENDIAN
51 #define SHELLMAGIC	0x2123 /* #! */
52 #else
53 #define SHELLMAGIC	0x2321
54 #endif
55 
56 /*
57  * This returns reference to the thread emuldata entry (if found)
58  *
59  * Hold PROC_LOCK when referencing emuldata from other threads.
60  */
61 struct linux_emuldata *
62 em_find(struct thread *td)
63 {
64 	struct linux_emuldata *em;
65 
66 	em = td->td_emuldata;
67 
68 	return (em);
69 }
70 
71 /*
72  * This returns reference to the proc pemuldata entry (if found)
73  *
74  * Hold PROC_LOCK when referencing proc pemuldata from other threads.
75  * Hold LINUX_PEM_LOCK wher referencing pemuldata members.
76  */
77 struct linux_pemuldata *
78 pem_find(struct proc *p)
79 {
80 	struct linux_pemuldata *pem;
81 
82 	pem = p->p_emuldata;
83 
84 	return (pem);
85 }
86 
87 /*
88  * Linux apps generally expect the soft open file limit to be set
89  * to 1024, often iterating over all the file descriptors up to that
90  * limit instead of using closefrom(2).  Give them what they want,
91  * unless there already is a resource limit in place.
92  */
93 static void
94 linux_set_default_openfiles(struct thread *td, struct proc *p)
95 {
96 	struct rlimit rlim;
97 	int error __diagused;
98 
99 	if (linux_default_openfiles < 0)
100 		return;
101 
102 	PROC_LOCK(p);
103 	lim_rlimit_proc(p, RLIMIT_NOFILE, &rlim);
104 	PROC_UNLOCK(p);
105 	if (rlim.rlim_cur != rlim.rlim_max ||
106 	    rlim.rlim_cur <= linux_default_openfiles)
107 		return;
108 	rlim.rlim_cur = linux_default_openfiles;
109 	error = kern_proc_setrlimit(td, p, RLIMIT_NOFILE, &rlim);
110 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
111 }
112 
113 /*
114  * The default stack size limit in Linux is 8MB.
115  */
116 static void
117 linux_set_default_stacksize(struct thread *td, struct proc *p)
118 {
119 	struct rlimit rlim;
120 	int error __diagused;
121 
122 	if (linux_default_stacksize < 0)
123 		return;
124 
125 	PROC_LOCK(p);
126 	lim_rlimit_proc(p, RLIMIT_STACK, &rlim);
127 	PROC_UNLOCK(p);
128 	if (rlim.rlim_cur != rlim.rlim_max ||
129 	    rlim.rlim_cur <= linux_default_stacksize)
130 		return;
131 	rlim.rlim_cur = linux_default_stacksize;
132 	error = kern_proc_setrlimit(td, p, RLIMIT_STACK, &rlim);
133 	KASSERT(error == 0, ("kern_proc_setrlimit failed"));
134 }
135 
136 void
137 linux_proc_init(struct thread *td, struct thread *newtd, bool init_thread)
138 {
139 	struct linux_emuldata *em;
140 	struct linux_pemuldata *pem;
141 	struct proc *p;
142 
143 	if (newtd != NULL) {
144 		p = newtd->td_proc;
145 
146 		/* non-exec call */
147 		em = malloc(sizeof(*em), M_LINUX, M_WAITOK | M_ZERO);
148 		if (init_thread) {
149 			LINUX_CTR1(proc_init, "thread newtd(%d)",
150 			    newtd->td_tid);
151 
152 			em->em_tid = newtd->td_tid;
153 		} else {
154 			LINUX_CTR1(proc_init, "fork newtd(%d)", p->p_pid);
155 
156 			em->em_tid = p->p_pid;
157 
158 			pem = malloc(sizeof(*pem), M_LINUX, M_WAITOK | M_ZERO);
159 			sx_init(&pem->pem_sx, "lpemlk");
160 			p->p_emuldata = pem;
161 		}
162 		newtd->td_emuldata = em;
163 
164 		linux_set_default_openfiles(td, p);
165 		linux_set_default_stacksize(td, p);
166 	} else {
167 		p = td->td_proc;
168 
169 		/* exec */
170 		LINUX_CTR1(proc_init, "exec newtd(%d)", p->p_pid);
171 
172 		/* lookup the old one */
173 		em = em_find(td);
174 		KASSERT(em != NULL, ("proc_init: thread emuldata not found.\n"));
175 
176 		em->em_tid = p->p_pid;
177 		em->flags = 0;
178 		em->robust_futexes = NULL;
179 		em->child_clear_tid = NULL;
180 		em->child_set_tid = NULL;
181 
182 		pem = pem_find(p);
183 		KASSERT(pem != NULL, ("proc_init: proc emuldata not found.\n"));
184 		pem->persona = 0;
185 		pem->oom_score_adj = 0;
186 	}
187 }
188 
189 void
190 linux_on_exit(struct proc *p)
191 {
192 	struct linux_pemuldata *pem;
193 	struct thread *td = curthread;
194 
195 	MPASS(SV_CURPROC_ABI() == SV_ABI_LINUX);
196 
197 	LINUX_CTR3(proc_exit, "thread(%d) proc(%d) p %p",
198 	    td->td_tid, p->p_pid, p);
199 
200 	pem = pem_find(p);
201 	if (pem == NULL)
202 		return;
203 	(p->p_sysent->sv_thread_detach)(td);
204 
205 	p->p_emuldata = NULL;
206 
207 	sx_destroy(&pem->pem_sx);
208 	free(pem, M_LINUX);
209 }
210 
211 int
212 linux_common_execve(struct thread *td, struct image_args *eargs)
213 {
214 	struct linux_pemuldata *pem;
215 	struct vmspace *oldvmspace;
216 	struct linux_emuldata *em;
217 	struct proc *p;
218 	int error;
219 
220 	p = td->td_proc;
221 
222 	error = pre_execve(td, &oldvmspace);
223 	if (error != 0)
224 		return (error);
225 
226 	error = kern_execve(td, eargs, NULL, oldvmspace);
227 	post_execve(td, error, oldvmspace);
228 	if (error != EJUSTRETURN)
229 		return (error);
230 
231 	/*
232 	 * In a case of transition from Linux binary execing to
233 	 * FreeBSD binary we destroy Linux emuldata thread & proc entries.
234 	 */
235 	if (SV_CURPROC_ABI() != SV_ABI_LINUX) {
236 
237 		/* Clear ABI root directory if set. */
238 		linux_pwd_onexec_native(td);
239 
240 		PROC_LOCK(p);
241 		em = em_find(td);
242 		KASSERT(em != NULL, ("proc_exec: thread emuldata not found.\n"));
243 		td->td_emuldata = NULL;
244 
245 		pem = pem_find(p);
246 		KASSERT(pem != NULL, ("proc_exec: proc pemuldata not found.\n"));
247 		p->p_emuldata = NULL;
248 		PROC_UNLOCK(p);
249 
250 		free(em, M_LINUX);
251 		free(pem, M_LINUX);
252 	}
253 	return (EJUSTRETURN);
254 }
255 
256 int
257 linux_on_exec(struct proc *p, struct image_params *imgp)
258 {
259 	struct thread *td;
260 	struct thread *othertd;
261 #if defined(__amd64__)
262 	struct linux_pemuldata *pem;
263 #endif
264 	int error;
265 
266 	td = curthread;
267 	MPASS((imgp->sysent->sv_flags & SV_ABI_MASK) == SV_ABI_LINUX);
268 
269 	/*
270 	 * When execing to Linux binary, we create Linux emuldata
271 	 * thread entry.
272 	 */
273 	if (SV_PROC_ABI(p) == SV_ABI_LINUX) {
274 		/*
275 		 * Process already was under Linuxolator
276 		 * before exec.  Update emuldata to reflect
277 		 * single-threaded cleaned state after exec.
278 		 */
279 		linux_proc_init(td, NULL, false);
280 	} else {
281 		/*
282 		 * We are switching the process to Linux emulator.
283 		 */
284 		linux_proc_init(td, td, false);
285 
286 		/*
287 		 * Create a transient td_emuldata for all suspended
288 		 * threads, so that p->p_sysent->sv_thread_detach() ==
289 		 * linux_thread_detach() can find expected but unused
290 		 * emuldata.
291 		 */
292 		FOREACH_THREAD_IN_PROC(td->td_proc, othertd) {
293 			if (othertd == td)
294 				continue;
295 			linux_proc_init(td, othertd, true);
296 		}
297 
298 		/* Set ABI root directory. */
299 		if ((error = linux_pwd_onexec(td)) != 0)
300 			return (error);
301 	}
302 #if defined(__amd64__)
303 	/*
304 	 * An IA32 executable which has executable stack will have the
305 	 * READ_IMPLIES_EXEC personality flag set automatically.
306 	 */
307 	if (SV_PROC_FLAG(td->td_proc, SV_ILP32) &&
308 	    imgp->stack_prot & VM_PROT_EXECUTE) {
309 		pem = pem_find(p);
310 		pem->persona |= LINUX_READ_IMPLIES_EXEC;
311 	}
312 #endif
313 	return (0);
314 }
315 
316 void
317 linux_thread_dtor(struct thread *td)
318 {
319 	struct linux_emuldata *em;
320 
321 	em = em_find(td);
322 	if (em == NULL)
323 		return;
324 	td->td_emuldata = NULL;
325 
326 	LINUX_CTR1(thread_dtor, "thread(%d)", em->em_tid);
327 
328 	free(em, M_LINUX);
329 }
330 
331 void
332 linux_schedtail(struct thread *td)
333 {
334 	struct linux_emuldata *em;
335 #ifdef KTR
336 	int error;
337 #else
338 	int error __unused;
339 #endif
340 	int *child_set_tid;
341 
342 	em = em_find(td);
343 	KASSERT(em != NULL, ("linux_schedtail: thread emuldata not found.\n"));
344 	child_set_tid = em->child_set_tid;
345 
346 	if (child_set_tid != NULL) {
347 		error = copyout(&em->em_tid, child_set_tid,
348 		    sizeof(em->em_tid));
349 		LINUX_CTR4(schedtail, "thread(%d) %p stored %d error %d",
350 		    td->td_tid, child_set_tid, em->em_tid, error);
351 	} else
352 		LINUX_CTR1(schedtail, "thread(%d)", em->em_tid);
353 }
354