xref: /freebsd/sys/compat/linux/linux_emul.c (revision 6e660824a82f590542932de52f128db584029893)
1 /*-
2  * Copyright (c) 2006 Roman Divacky
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 #include "opt_kdtrace.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/imgact.h>
38 #include <sys/kernel.h>
39 #include <sys/lock.h>
40 #include <sys/malloc.h>
41 #include <sys/mutex.h>
42 #include <sys/sdt.h>
43 #include <sys/sx.h>
44 #include <sys/proc.h>
45 #include <sys/syscallsubr.h>
46 #include <sys/sysent.h>
47 #include <sys/sysproto.h>
48 #include <sys/unistd.h>
49 
50 #ifdef COMPAT_LINUX32
51 #include <machine/../linux32/linux.h>
52 #include <machine/../linux32/linux32_proto.h>
53 #else
54 #include <machine/../linux/linux.h>
55 #include <machine/../linux/linux_proto.h>
56 #endif
57 
58 #include <compat/linux/linux_dtrace.h>
59 #include <compat/linux/linux_emul.h>
60 #include <compat/linux/linux_futex.h>
61 #include <compat/linux/linux_misc.h>
62 
63 /**
64  * Special DTrace provider for the linuxulator.
65  *
66  * In this file we define the provider for the entire linuxulator. All
67  * modules (= files of the linuxulator) use it.
68  *
69  * We define a different name depending on the emulated bitsize, see
70  * ../../<ARCH>/linux{,32}/linux.h, e.g.:
71  *      native bitsize          = linuxulator
72  *      amd64, 32bit emulation  = linuxulator32
73  */
74 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE);
75 
76 /**
77  * Special DTrace module "locks", it covers some linuxulator internal
78  * locks.
79  */
80 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, locked, "struct mtx *");
81 LIN_SDT_PROBE_DEFINE1(locks, emul_lock, unlock, "struct mtx *");
82 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, locked, "struct sx *");
83 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_rlock, unlock, "struct sx *");
84 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, locked, "struct sx *");
85 LIN_SDT_PROBE_DEFINE1(locks, emul_shared_wlock, unlock, "struct sx *");
86 
87 /**
88  * DTrace probes in this module.
89  */
90 LIN_SDT_PROBE_DEFINE2(emul, em_find, entry, "struct proc *", "int");
91 LIN_SDT_PROBE_DEFINE0(emul, em_find, return);
92 LIN_SDT_PROBE_DEFINE3(emul, proc_init, entry, "struct thread *", "pid_t",
93     "int");
94 LIN_SDT_PROBE_DEFINE0(emul, proc_init, create_thread);
95 LIN_SDT_PROBE_DEFINE0(emul, proc_init, fork);
96 LIN_SDT_PROBE_DEFINE0(emul, proc_init, exec);
97 LIN_SDT_PROBE_DEFINE0(emul, proc_init, return);
98 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, entry, "struct proc *");
99 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, futex_failed);
100 LIN_SDT_PROBE_DEFINE3(emul, proc_exit, reparent, "pid_t", "pid_t",
101     "struct proc *");
102 LIN_SDT_PROBE_DEFINE1(emul, proc_exit, child_clear_tid_error, "int");
103 LIN_SDT_PROBE_DEFINE0(emul, proc_exit, return);
104 LIN_SDT_PROBE_DEFINE2(emul, proc_exec, entry, "struct proc *",
105     "struct image_params *");
106 LIN_SDT_PROBE_DEFINE0(emul, proc_exec, return);
107 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, entry);
108 LIN_SDT_PROBE_DEFINE1(emul, linux_schedtail, copyout_error, "int");
109 LIN_SDT_PROBE_DEFINE0(emul, linux_schedtail, return);
110 LIN_SDT_PROBE_DEFINE1(emul, linux_set_tid_address, entry, "int *");
111 LIN_SDT_PROBE_DEFINE0(emul, linux_set_tid_address, return);
112 LIN_SDT_PROBE_DEFINE2(emul, linux_kill_threads, entry, "struct thread *",
113     "int");
114 LIN_SDT_PROBE_DEFINE1(emul, linux_kill_threads, kill, "pid_t");
115 LIN_SDT_PROBE_DEFINE0(emul, linux_kill_threads, return);
116 
117 struct sx	emul_shared_lock;
118 struct mtx	emul_lock;
119 
120 /* this returns locked reference to the emuldata entry (if found) */
121 struct linux_emuldata *
122 em_find(struct proc *p, int locked)
123 {
124 	struct linux_emuldata *em;
125 
126 	LIN_SDT_PROBE2(emul, em_find, entry, p, locked);
127 
128 	if (locked == EMUL_DOLOCK)
129 		EMUL_LOCK(&emul_lock);
130 
131 	em = p->p_emuldata;
132 
133 	if (em == NULL && locked == EMUL_DOLOCK)
134 		EMUL_UNLOCK(&emul_lock);
135 
136 	LIN_SDT_PROBE1(emul, em_find, return, em);
137 	return (em);
138 }
139 
140 int
141 linux_proc_init(struct thread *td, pid_t child, int flags)
142 {
143 	struct linux_emuldata *em, *p_em;
144 	struct proc *p;
145 
146 	LIN_SDT_PROBE3(emul, proc_init, entry, td, child, flags);
147 
148 	if (child != 0) {
149 		/* fork or create a thread */
150 		em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
151 		em->pid = child;
152 		em->pdeath_signal = 0;
153 		em->flags = 0;
154 		em->robust_futexes = NULL;
155 		if (flags & LINUX_CLONE_THREAD) {
156 			/* handled later in the code */
157 			LIN_SDT_PROBE0(emul, proc_init, create_thread);
158 		} else {
159 			struct linux_emuldata_shared *s;
160 
161 			LIN_SDT_PROBE0(emul, proc_init, fork);
162 
163 			s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
164 			s->refs = 1;
165 			s->group_pid = child;
166 
167 			LIST_INIT(&s->threads);
168 			em->shared = s;
169 		}
170 	} else {
171 		/* exec */
172 		LIN_SDT_PROBE0(emul, proc_init, exec);
173 
174 		/* lookup the old one */
175 		em = em_find(td->td_proc, EMUL_DOLOCK);
176 		KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
177 	}
178 
179 	em->child_clear_tid = NULL;
180 	em->child_set_tid = NULL;
181 
182 	/*
183 	 * allocate the shared struct only in clone()/fork cases in the case
184 	 * of clone() td = calling proc and child = pid of the newly created
185 	 * proc
186 	 */
187 	if (child != 0) {
188 		if (flags & LINUX_CLONE_THREAD) {
189 			/* lookup the parent */
190 			/*
191 			 * we dont have to lock the p_em because
192 			 * its waiting for us in linux_clone so
193 			 * there is no chance of it changing the
194 			 * p_em->shared address
195 			 */
196 			p_em = em_find(td->td_proc, EMUL_DONTLOCK);
197 			KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
198 			em->shared = p_em->shared;
199 			EMUL_SHARED_WLOCK(&emul_shared_lock);
200 			em->shared->refs++;
201 			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
202 		} else {
203 			/*
204 			 * handled earlier to avoid malloc(M_WAITOK) with
205 			 * rwlock held
206 			 */
207 		}
208 
209 		EMUL_SHARED_WLOCK(&emul_shared_lock);
210 		LIST_INSERT_HEAD(&em->shared->threads, em, threads);
211 		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
212 
213 		p = pfind(child);
214 		KASSERT(p != NULL, ("process not found in proc_init\n"));
215 		p->p_emuldata = em;
216 		PROC_UNLOCK(p);
217 	} else
218 		EMUL_UNLOCK(&emul_lock);
219 
220 	LIN_SDT_PROBE0(emul, proc_init, return);
221 	return (0);
222 }
223 
224 void
225 linux_proc_exit(void *arg __unused, struct proc *p)
226 {
227 	struct linux_emuldata *em;
228 	int error, shared_flags, shared_xstat;
229 	struct thread *td = FIRST_THREAD_IN_PROC(p);
230 	int *child_clear_tid;
231 	struct proc *q, *nq;
232 
233 	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
234 		return;
235 
236 	LIN_SDT_PROBE1(emul, proc_exit, entry, p);
237 
238 	release_futexes(p);
239 
240 	/* find the emuldata */
241 	em = em_find(p, EMUL_DOLOCK);
242 
243 	KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
244 
245 	/* reparent all procs that are not a thread leader to initproc */
246 	if (em->shared->group_pid != p->p_pid) {
247 		LIN_SDT_PROBE3(emul, proc_exit, reparent,
248 		    em->shared->group_pid, p->p_pid, p);
249 
250 		child_clear_tid = em->child_clear_tid;
251 		EMUL_UNLOCK(&emul_lock);
252 		sx_xlock(&proctree_lock);
253 		wakeup(initproc);
254 		PROC_LOCK(p);
255 		proc_reparent(p, initproc);
256 		p->p_sigparent = SIGCHLD;
257 		PROC_UNLOCK(p);
258 		sx_xunlock(&proctree_lock);
259 	} else {
260 		child_clear_tid = em->child_clear_tid;
261 		EMUL_UNLOCK(&emul_lock);
262 	}
263 
264 	EMUL_SHARED_WLOCK(&emul_shared_lock);
265 	shared_flags = em->shared->flags;
266 	shared_xstat = em->shared->xstat;
267 	LIST_REMOVE(em, threads);
268 
269 	em->shared->refs--;
270 	if (em->shared->refs == 0) {
271 		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
272 		free(em->shared, M_LINUX);
273 	} else
274 		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
275 
276 	if ((shared_flags & EMUL_SHARED_HASXSTAT) != 0)
277 		p->p_xstat = shared_xstat;
278 
279 	if (child_clear_tid != NULL) {
280 		struct linux_sys_futex_args cup;
281 		int null = 0;
282 
283 		error = copyout(&null, child_clear_tid, sizeof(null));
284 		if (error) {
285 			LIN_SDT_PROBE1(emul, proc_exit,
286 			    child_clear_tid_error, error);
287 
288 			free(em, M_LINUX);
289 
290 			LIN_SDT_PROBE0(emul, proc_exit, return);
291 			return;
292 		}
293 
294 		/* futexes stuff */
295 		cup.uaddr = child_clear_tid;
296 		cup.op = LINUX_FUTEX_WAKE;
297 		cup.val = 0x7fffffff;	/* Awake everyone */
298 		cup.timeout = NULL;
299 		cup.uaddr2 = NULL;
300 		cup.val3 = 0;
301 		error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
302 		/*
303 		 * this cannot happen at the moment and if this happens it
304 		 * probably means there is a user space bug
305 		 */
306 		if (error) {
307 			LIN_SDT_PROBE0(emul, proc_exit, futex_failed);
308 			printf(LMSG("futex stuff in proc_exit failed.\n"));
309 		}
310 	}
311 
312 	/* clean the stuff up */
313 	free(em, M_LINUX);
314 
315 	/* this is a little weird but rewritten from exit1() */
316 	sx_xlock(&proctree_lock);
317 	q = LIST_FIRST(&p->p_children);
318 	for (; q != NULL; q = nq) {
319 		nq = LIST_NEXT(q, p_sibling);
320 		if (q->p_flag & P_WEXIT)
321 			continue;
322 		if (__predict_false(q->p_sysent != &elf_linux_sysvec))
323 			continue;
324 		em = em_find(q, EMUL_DOLOCK);
325 		KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
326 		PROC_LOCK(q);
327 		if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) {
328 			kern_psignal(q, em->pdeath_signal);
329 		}
330 		PROC_UNLOCK(q);
331 		EMUL_UNLOCK(&emul_lock);
332 	}
333 	sx_xunlock(&proctree_lock);
334 
335 	LIN_SDT_PROBE0(emul, proc_exit, return);
336 }
337 
338 /*
339  * This is used in a case of transition from FreeBSD binary execing to linux binary
340  * in this case we create linux emuldata proc entry with the pid of the currently running
341  * process.
342  */
343 void
344 linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
345 {
346 	if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
347 		LIN_SDT_PROBE2(emul, proc_exec, entry, p, imgp);
348 	}
349 	if (__predict_false(imgp->sysent == &elf_linux_sysvec
350 	    && p->p_sysent != &elf_linux_sysvec))
351 		linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
352 	if (__predict_false((p->p_sysent->sv_flags & SV_ABI_MASK) ==
353 	    SV_ABI_LINUX))
354 		/* Kill threads regardless of imgp->sysent value */
355 		linux_kill_threads(FIRST_THREAD_IN_PROC(p), SIGKILL);
356 	if (__predict_false(imgp->sysent != &elf_linux_sysvec
357 	    && p->p_sysent == &elf_linux_sysvec)) {
358 		struct linux_emuldata *em;
359 
360 		/*
361 		 * XXX:There's a race because here we assign p->p_emuldata NULL
362 		 * but the process is still counted as linux one for a short
363  		 * time so some other process might reference it and try to
364  		 * access its p->p_emuldata and panicing on a NULL reference.
365 		 */
366 		em = em_find(p, EMUL_DONTLOCK);
367 
368 		KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
369 
370 		EMUL_SHARED_WLOCK(&emul_shared_lock);
371 		LIST_REMOVE(em, threads);
372 
373 		PROC_LOCK(p);
374 		p->p_emuldata = NULL;
375 		PROC_UNLOCK(p);
376 
377 		em->shared->refs--;
378 		if (em->shared->refs == 0) {
379 			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
380 			free(em->shared, M_LINUX);
381 		} else
382 			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
383 
384 		free(em, M_LINUX);
385 	}
386 
387 	if (__predict_false(imgp->sysent == &elf_linux_sysvec)) {
388 		LIN_SDT_PROBE0(emul, proc_exec, return);
389 	}
390 }
391 
392 void
393 linux_schedtail(struct thread *td)
394 {
395 	struct linux_emuldata *em;
396 	struct proc *p;
397 	int error = 0;
398 	int *child_set_tid;
399 
400 	p = td->td_proc;
401 
402 	LIN_SDT_PROBE1(emul, linux_schedtail, entry, p);
403 
404 	/* find the emuldata */
405 	em = em_find(p, EMUL_DOLOCK);
406 
407 	KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
408 	child_set_tid = em->child_set_tid;
409 	EMUL_UNLOCK(&emul_lock);
410 
411 	if (child_set_tid != NULL) {
412 		error = copyout(&p->p_pid, (int *)child_set_tid,
413 		    sizeof(p->p_pid));
414 
415 		if (error != 0) {
416 			LIN_SDT_PROBE1(emul, linux_schedtail, copyout_error,
417 			    error);
418 		}
419 	}
420 
421 	LIN_SDT_PROBE0(emul, linux_schedtail, return);
422 
423 	return;
424 }
425 
426 int
427 linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
428 {
429 	struct linux_emuldata *em;
430 
431 	LIN_SDT_PROBE1(emul, linux_set_tid_address, entry, args->tidptr);
432 
433 	/* find the emuldata */
434 	em = em_find(td->td_proc, EMUL_DOLOCK);
435 
436 	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
437 
438 	em->child_clear_tid = args->tidptr;
439 	td->td_retval[0] = td->td_proc->p_pid;
440 
441 	EMUL_UNLOCK(&emul_lock);
442 
443 	LIN_SDT_PROBE0(emul, linux_set_tid_address, return);
444 	return 0;
445 }
446 
447 void
448 linux_kill_threads(struct thread *td, int sig)
449 {
450 	struct linux_emuldata *em, *td_em, *tmp_em;
451 	struct proc *sp;
452 
453 	LIN_SDT_PROBE2(emul, linux_kill_threads, entry, td, sig);
454 
455 	td_em = em_find(td->td_proc, EMUL_DONTLOCK);
456 
457 	KASSERT(td_em != NULL, ("linux_kill_threads: emuldata not found.\n"));
458 
459 	EMUL_SHARED_RLOCK(&emul_shared_lock);
460 	LIST_FOREACH_SAFE(em, &td_em->shared->threads, threads, tmp_em) {
461 		if (em->pid == td_em->pid)
462 			continue;
463 
464 		sp = pfind(em->pid);
465 		if ((sp->p_flag & P_WEXIT) == 0)
466 			kern_psignal(sp, sig);
467 		PROC_UNLOCK(sp);
468 
469 		LIN_SDT_PROBE1(emul, linux_kill_threads, kill, em->pid);
470 	}
471 	EMUL_SHARED_RUNLOCK(&emul_shared_lock);
472 
473 	LIN_SDT_PROBE0(emul, linux_kill_threads, return);
474 }
475