xref: /freebsd/sys/compat/linux/linux_emul.c (revision 4848dd0858385db46fa4e0192a134605ee42ab01)
1 /*-
2  * Copyright (c) 2006 Roman Divacky
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software without specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  */
28 
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_compat.h"
33 
34 #include <sys/param.h>
35 #include <sys/systm.h>
36 #include <sys/imgact.h>
37 #include <sys/kernel.h>
38 #include <sys/lock.h>
39 #include <sys/malloc.h>
40 #include <sys/mutex.h>
41 #include <sys/sx.h>
42 #include <sys/proc.h>
43 #include <sys/syscallsubr.h>
44 #include <sys/sysproto.h>
45 #include <sys/unistd.h>
46 
47 #ifdef COMPAT_LINUX32
48 #include <machine/../linux32/linux.h>
49 #include <machine/../linux32/linux32_proto.h>
50 #else
51 #include <machine/../linux/linux.h>
52 #include <machine/../linux/linux_proto.h>
53 #endif
54 
55 #include <compat/linux/linux_emul.h>
56 #include <compat/linux/linux_futex.h>
57 
58 struct sx	emul_shared_lock;
59 struct mtx	emul_lock;
60 
61 /* this returns locked reference to the emuldata entry (if found) */
62 struct linux_emuldata *
63 em_find(struct proc *p, int locked)
64 {
65 	struct linux_emuldata *em;
66 
67 	if (locked == EMUL_DOLOCK)
68 		EMUL_LOCK(&emul_lock);
69 
70 	em = p->p_emuldata;
71 
72 	if (em == NULL && locked == EMUL_DOLOCK)
73 		EMUL_UNLOCK(&emul_lock);
74 
75 	return (em);
76 }
77 
78 int
79 linux_proc_init(struct thread *td, pid_t child, int flags)
80 {
81 	struct linux_emuldata *em, *p_em;
82 	struct proc *p;
83 
84 	if (child != 0) {
85 		/* non-exec call */
86 		em = malloc(sizeof *em, M_LINUX, M_WAITOK | M_ZERO);
87 		em->pid = child;
88 		em->pdeath_signal = 0;
89 		em->robust_futexes = NULL;
90 		if (flags & LINUX_CLONE_THREAD) {
91 			/* handled later in the code */
92 		} else {
93 			struct linux_emuldata_shared *s;
94 
95 			s = malloc(sizeof *s, M_LINUX, M_WAITOK | M_ZERO);
96 			s->refs = 1;
97 			s->group_pid = child;
98 
99 			LIST_INIT(&s->threads);
100 			em->shared = s;
101 		}
102 	} else {
103 		/* lookup the old one */
104 		em = em_find(td->td_proc, EMUL_DOLOCK);
105 		KASSERT(em != NULL, ("proc_init: emuldata not found in exec case.\n"));
106 	}
107 
108 	em->child_clear_tid = NULL;
109 	em->child_set_tid = NULL;
110 
111 	/*
112 	 * allocate the shared struct only in clone()/fork cases in the case
113 	 * of clone() td = calling proc and child = pid of the newly created
114 	 * proc
115 	 */
116 	if (child != 0) {
117 		if (flags & LINUX_CLONE_THREAD) {
118 			/* lookup the parent */
119 			/*
120 			 * we dont have to lock the p_em because
121 			 * its waiting for us in linux_clone so
122 			 * there is no chance of it changing the
123 			 * p_em->shared address
124 			 */
125 			p_em = em_find(td->td_proc, EMUL_DONTLOCK);
126 			KASSERT(p_em != NULL, ("proc_init: parent emuldata not found for CLONE_THREAD\n"));
127 			em->shared = p_em->shared;
128 			EMUL_SHARED_WLOCK(&emul_shared_lock);
129 			em->shared->refs++;
130 			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
131 		} else {
132 			/*
133 			 * handled earlier to avoid malloc(M_WAITOK) with
134 			 * rwlock held
135 			 */
136 		}
137 	}
138 	if (child != 0) {
139 		EMUL_SHARED_WLOCK(&emul_shared_lock);
140 		LIST_INSERT_HEAD(&em->shared->threads, em, threads);
141 		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
142 
143 		p = pfind(child);
144 		KASSERT(p != NULL, ("process not found in proc_init\n"));
145 		p->p_emuldata = em;
146 		PROC_UNLOCK(p);
147 	} else
148 		EMUL_UNLOCK(&emul_lock);
149 
150 	return (0);
151 }
152 
153 void
154 linux_proc_exit(void *arg __unused, struct proc *p)
155 {
156 	struct linux_emuldata *em;
157 	int error;
158 	struct thread *td = FIRST_THREAD_IN_PROC(p);
159 	int *child_clear_tid;
160 	struct proc *q, *nq;
161 
162 	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
163 		return;
164 
165 	release_futexes(p);
166 
167 	/* find the emuldata */
168 	em = em_find(p, EMUL_DOLOCK);
169 
170 	KASSERT(em != NULL, ("proc_exit: emuldata not found.\n"));
171 
172 	/* reparent all procs that are not a thread leader to initproc */
173 	if (em->shared->group_pid != p->p_pid) {
174 		child_clear_tid = em->child_clear_tid;
175 		EMUL_UNLOCK(&emul_lock);
176 		sx_xlock(&proctree_lock);
177 		wakeup(initproc);
178 		PROC_LOCK(p);
179 		proc_reparent(p, initproc);
180 		p->p_sigparent = SIGCHLD;
181 		PROC_UNLOCK(p);
182 		sx_xunlock(&proctree_lock);
183 	} else {
184 		child_clear_tid = em->child_clear_tid;
185 		EMUL_UNLOCK(&emul_lock);
186 	}
187 
188 	EMUL_SHARED_WLOCK(&emul_shared_lock);
189 	LIST_REMOVE(em, threads);
190 
191 	em->shared->refs--;
192 	if (em->shared->refs == 0) {
193 		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
194 		free(em->shared, M_LINUX);
195 	} else
196 		EMUL_SHARED_WUNLOCK(&emul_shared_lock);
197 
198 	if (child_clear_tid != NULL) {
199 		struct linux_sys_futex_args cup;
200 		int null = 0;
201 
202 		error = copyout(&null, child_clear_tid, sizeof(null));
203 		if (error) {
204 			free(em, M_LINUX);
205 			return;
206 		}
207 
208 		/* futexes stuff */
209 		cup.uaddr = child_clear_tid;
210 		cup.op = LINUX_FUTEX_WAKE;
211 		cup.val = 0x7fffffff;	/* Awake everyone */
212 		cup.timeout = NULL;
213 		cup.uaddr2 = NULL;
214 		cup.val3 = 0;
215 		error = linux_sys_futex(FIRST_THREAD_IN_PROC(p), &cup);
216 		/*
217 		 * this cannot happen at the moment and if this happens it
218 		 * probably means there is a user space bug
219 		 */
220 		if (error)
221 			printf(LMSG("futex stuff in proc_exit failed.\n"));
222 	}
223 
224 	/* clean the stuff up */
225 	free(em, M_LINUX);
226 
227 	/* this is a little weird but rewritten from exit1() */
228 	sx_xlock(&proctree_lock);
229 	q = LIST_FIRST(&p->p_children);
230 	for (; q != NULL; q = nq) {
231 		nq = LIST_NEXT(q, p_sibling);
232 		if (q->p_flag & P_WEXIT)
233 			continue;
234 		if (__predict_false(q->p_sysent != &elf_linux_sysvec))
235 			continue;
236 		em = em_find(q, EMUL_DOLOCK);
237 		KASSERT(em != NULL, ("linux_reparent: emuldata not found: %i\n", q->p_pid));
238 		PROC_LOCK(q);
239 		if ((q->p_flag & P_WEXIT) == 0 && em->pdeath_signal != 0) {
240 			psignal(q, em->pdeath_signal);
241 		}
242 		PROC_UNLOCK(q);
243 		EMUL_UNLOCK(&emul_lock);
244 	}
245 	sx_xunlock(&proctree_lock);
246 }
247 
248 /*
249  * This is used in a case of transition from FreeBSD binary execing to linux binary
250  * in this case we create linux emuldata proc entry with the pid of the currently running
251  * process.
252  */
253 void
254 linux_proc_exec(void *arg __unused, struct proc *p, struct image_params *imgp)
255 {
256 	if (__predict_false(imgp->sysent == &elf_linux_sysvec
257 	    && p->p_sysent != &elf_linux_sysvec))
258 		linux_proc_init(FIRST_THREAD_IN_PROC(p), p->p_pid, 0);
259 	if (__predict_false(imgp->sysent != &elf_linux_sysvec
260 	    && p->p_sysent == &elf_linux_sysvec)) {
261 		struct linux_emuldata *em;
262 
263 		/*
264 		 * XXX:There's a race because here we assign p->p_emuldata NULL
265 		 * but the process is still counted as linux one for a short
266  		 * time so some other process might reference it and try to
267  		 * access its p->p_emuldata and panicing on a NULL reference.
268 		 */
269 		em = em_find(p, EMUL_DONTLOCK);
270 
271 		KASSERT(em != NULL, ("proc_exec: emuldata not found.\n"));
272 
273 		EMUL_SHARED_WLOCK(&emul_shared_lock);
274 		LIST_REMOVE(em, threads);
275 
276 		PROC_LOCK(p);
277 		p->p_emuldata = NULL;
278 		PROC_UNLOCK(p);
279 
280 		em->shared->refs--;
281 		if (em->shared->refs == 0) {
282 			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
283 			free(em->shared, M_LINUX);
284 		} else
285 			EMUL_SHARED_WUNLOCK(&emul_shared_lock);
286 
287 		free(em, M_LINUX);
288 	}
289 }
290 
291 void
292 linux_schedtail(void *arg __unused, struct proc *p)
293 {
294 	struct linux_emuldata *em;
295 	int error = 0;
296 	int *child_set_tid;
297 
298 	if (__predict_true(p->p_sysent != &elf_linux_sysvec))
299 		return;
300 
301 	/* find the emuldata */
302 	em = em_find(p, EMUL_DOLOCK);
303 
304 	KASSERT(em != NULL, ("linux_schedtail: emuldata not found.\n"));
305 	child_set_tid = em->child_set_tid;
306 	EMUL_UNLOCK(&emul_lock);
307 
308 	if (child_set_tid != NULL)
309 		error = copyout(&p->p_pid, (int *)child_set_tid,
310 		    sizeof(p->p_pid));
311 
312 	return;
313 }
314 
315 int
316 linux_set_tid_address(struct thread *td, struct linux_set_tid_address_args *args)
317 {
318 	struct linux_emuldata *em;
319 
320 #ifdef DEBUG
321 	if (ldebug(set_tid_address))
322 		printf(ARGS(set_tid_address, "%p"), args->tidptr);
323 #endif
324 
325 	/* find the emuldata */
326 	em = em_find(td->td_proc, EMUL_DOLOCK);
327 
328 	KASSERT(em != NULL, ("set_tid_address: emuldata not found.\n"));
329 
330 	em->child_clear_tid = args->tidptr;
331 	td->td_retval[0] = td->td_proc->p_pid;
332 
333 	EMUL_UNLOCK(&emul_lock);
334 	return 0;
335 }
336