xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 6e478b81541b96b1c700702157f261d7a984cd18)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/proc.h>
41 #include <sys/sysent.h>
42 #include <sys/imgact.h>
43 #include <sys/imgact_aout.h>
44 #include <sys/imgact_elf.h>
45 #include <sys/signalvar.h>
46 #include <sys/malloc.h>
47 #include <vm/vm.h>
48 #include <vm/vm_param.h>
49 #include <vm/vm_page.h>
50 #include <vm/vm_extern.h>
51 #include <sys/exec.h>
52 #include <sys/kernel.h>
53 #include <sys/module.h>
54 #include <machine/cpu.h>
55 
56 #include <i386/linux/linux.h>
57 #include <i386/linux/linux_proto.h>
58 
59 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
60 
61 extern char linux_sigcode[];
62 extern int linux_szsigcode;
63 
64 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
65 
66 extern struct linker_set linux_ioctl_handler_set;
67 
68 static int	linux_fixup __P((register_t **stack_base,
69 				 struct image_params *iparams));
70 static int	elf_linux_fixup __P((register_t **stack_base,
71 				     struct image_params *iparams));
72 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
73 				       u_int *code, caddr_t *params));
74 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
75 				   u_long code));
76 
77 /*
78  * Linux syscalls return negative errno's, we do positive and map them
79  */
80 static int bsd_to_linux_errno[ELAST + 1] = {
81   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
82  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
83  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
84  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
85  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
86 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
87 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
88 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
89   	-6, -6, -43, -42, -75, -6, -84
90 };
91 
92 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
93 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
94 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
95 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
96 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
97 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
98 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
99 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
100 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
101 };
102 
103 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
104 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
105 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
106 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
107 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
108 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
109 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
110 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
111 	SIGIO, SIGURG, 0
112 };
113 
114 /*
115  * If FreeBSD & Linux have a difference of opinion about what a trap
116  * means, deal with it here.
117  */
118 static int
119 translate_traps(int signal, int trap_code)
120 {
121 	if (signal != SIGBUS)
122 		return signal;
123 	switch (trap_code) {
124 	case T_PROTFLT:
125 	case T_TSSFLT:
126 	case T_DOUBLEFLT:
127 	case T_PAGEFLT:
128 		return SIGSEGV;
129 	default:
130 		return signal;
131 	}
132 }
133 
134 static int
135 linux_fixup(register_t **stack_base, struct image_params *imgp)
136 {
137 	register_t *argv, *envp;
138 
139 	argv = *stack_base;
140 	envp = *stack_base + (imgp->argc + 1);
141 	(*stack_base)--;
142 	**stack_base = (intptr_t)(void *)envp;
143 	(*stack_base)--;
144 	**stack_base = (intptr_t)(void *)argv;
145 	(*stack_base)--;
146 	**stack_base = imgp->argc;
147 	return 0;
148 }
149 
150 static int
151 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
152 {
153 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
154 	register_t *pos;
155 
156 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
157 
158 	if (args->trace) {
159 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
160 	}
161 	if (args->execfd != -1) {
162 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
163 	}
164 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
165 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
166 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
167 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
168 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
169 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
170 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
171 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
172 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
173 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
174 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
175 	AUXARGS_ENTRY(pos, AT_NULL, 0);
176 
177 	free(imgp->auxargs, M_TEMP);
178 	imgp->auxargs = NULL;
179 
180 	(*stack_base)--;
181 	**stack_base = (long)imgp->argc;
182 	return 0;
183 }
184 
185 extern int _ucodesel, _udatasel;
186 
187 /*
188  * Send an interrupt to process.
189  *
190  * Stack is set up to allow sigcode stored
191  * in u. to call routine, followed by kcall
192  * to sigreturn routine below.  After sigreturn
193  * resets the signal mask, the stack, and the
194  * frame pointer, it returns to the user
195  * specified pc, psl.
196  */
197 
198 static void
199 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
200 {
201 	register struct proc *p = curproc;
202 	register struct trapframe *regs;
203 	struct linux_sigframe *fp, frame;
204 	struct sigacts *psp = p->p_sigacts;
205 	int oonstack;
206 
207 	regs = p->p_md.md_regs;
208 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
209 
210 #ifdef DEBUG
211 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
212 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
213 #endif
214 	/*
215 	 * Allocate space for the signal handler context.
216 	 */
217 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
218 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
219 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
220 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
221 		p->p_sigstk.ss_flags |= SS_ONSTACK;
222 	} else {
223 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
224 	}
225 
226 	/*
227 	 * grow() will return FALSE if the fp will not fit inside the stack
228 	 *	and the stack can not be grown. useracc will return FALSE
229 	 *	if access is denied.
230 	 */
231 	if ((grow_stack (p, (int)fp) == FALSE) ||
232 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
233 	    VM_PROT_WRITE)) {
234 		/*
235 		 * Process has trashed its stack; give it an illegal
236 		 * instruction to halt it in its tracks.
237 		 */
238 		SIGACTION(p, SIGILL) = SIG_DFL;
239 		SIGDELSET(p->p_sigignore, SIGILL);
240 		SIGDELSET(p->p_sigcatch, SIGILL);
241 		SIGDELSET(p->p_sigmask, SIGILL);
242 		psignal(p, SIGILL);
243 		return;
244 	}
245 
246 	/*
247 	 * Build the argument list for the signal handler.
248 	 */
249 	if (p->p_sysent->sv_sigtbl)
250 		if (sig <= p->p_sysent->sv_sigsize)
251 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
252 
253 	frame.sf_handler = catcher;
254 	frame.sf_sig = sig;
255 
256 	/*
257 	 * Build the signal context to be used by sigreturn.
258 	 */
259 	frame.sf_sc.sc_mask   = mask->__bits[0];
260 	frame.sf_sc.sc_gs     = rgs();
261 	frame.sf_sc.sc_fs     = regs->tf_fs;
262 	frame.sf_sc.sc_es     = regs->tf_es;
263 	frame.sf_sc.sc_ds     = regs->tf_ds;
264 	frame.sf_sc.sc_edi    = regs->tf_edi;
265 	frame.sf_sc.sc_esi    = regs->tf_esi;
266 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
267 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
268 	frame.sf_sc.sc_edx    = regs->tf_edx;
269 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
270 	frame.sf_sc.sc_eax    = regs->tf_eax;
271 	frame.sf_sc.sc_eip    = regs->tf_eip;
272 	frame.sf_sc.sc_cs     = regs->tf_cs;
273 	frame.sf_sc.sc_eflags = regs->tf_eflags;
274 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
275 	frame.sf_sc.sc_ss     = regs->tf_ss;
276 	frame.sf_sc.sc_err    = regs->tf_err;
277 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
278 
279 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
280 		/*
281 		 * Process has trashed its stack; give it an illegal
282 		 * instruction to halt it in its tracks.
283 		 */
284 		sigexit(p, SIGILL);
285 		/* NOTREACHED */
286 	}
287 
288 	/*
289 	 * Build context to run handler in.
290 	 */
291 	regs->tf_esp = (int)fp;
292 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
293 	regs->tf_eflags &= ~PSL_VM;
294 	regs->tf_cs = _ucodesel;
295 	regs->tf_ds = _udatasel;
296 	regs->tf_es = _udatasel;
297 	regs->tf_fs = _udatasel;
298 	load_gs(_udatasel);
299 	regs->tf_ss = _udatasel;
300 }
301 
302 /*
303  * System call to cleanup state after a signal
304  * has been taken.  Reset signal mask and
305  * stack state from context left by sendsig (above).
306  * Return to previous pc and psl as specified by
307  * context left by sendsig. Check carefully to
308  * make sure that the user has not modified the
309  * psl to gain improper privileges or to cause
310  * a machine fault.
311  */
312 int
313 linux_sigreturn(p, args)
314 	struct proc *p;
315 	struct linux_sigreturn_args *args;
316 {
317 	struct linux_sigcontext *scp, context;
318 	register struct trapframe *regs;
319 	int eflags;
320 
321 	regs = p->p_md.md_regs;
322 
323 #ifdef DEBUG
324 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
325 	    (long)p->p_pid, (void *)args->scp);
326 #endif
327 	/*
328 	 * The trampoline code hands us the context.
329 	 * It is unsafe to keep track of it ourselves, in the event that a
330 	 * program jumps out of a signal handler.
331 	 */
332 	scp = SCARG(args,scp);
333 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
334 		return (EFAULT);
335 
336 	/*
337 	 * Check for security violations.
338 	 */
339 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
340 	eflags = context.sc_eflags;
341 	/*
342 	 * XXX do allow users to change the privileged flag PSL_RF.  The
343 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
344 	 * sometimes set it there too.  tf_eflags is kept in the signal
345 	 * context during signal handling and there is no other place
346 	 * to remember it, so the PSL_RF bit may be corrupted by the
347 	 * signal handler without us knowing.  Corruption of the PSL_RF
348 	 * bit at worst causes one more or one less debugger trap, so
349 	 * allowing it is fairly harmless.
350 	 */
351 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
352     		return(EINVAL);
353 	}
354 
355 	/*
356 	 * Don't allow users to load a valid privileged %cs.  Let the
357 	 * hardware check for invalid selectors, excess privilege in
358 	 * other selectors, invalid %eip's and invalid %esp's.
359 	 */
360 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
361 	if (!CS_SECURE(context.sc_cs)) {
362 		trapsignal(p, SIGBUS, T_PROTFLT);
363 		return(EINVAL);
364 	}
365 
366 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
367 	SIGSETOLD(p->p_sigmask, context.sc_mask);
368 	SIG_CANTMASK(p->p_sigmask);
369 
370 	/*
371 	 * Restore signal context.
372 	 */
373 	/* %gs was restored by the trampoline. */
374 	regs->tf_fs     = context.sc_fs;
375 	regs->tf_es     = context.sc_es;
376 	regs->tf_ds     = context.sc_ds;
377 	regs->tf_edi    = context.sc_edi;
378 	regs->tf_esi    = context.sc_esi;
379 	regs->tf_ebp    = context.sc_ebp;
380 	regs->tf_ebx    = context.sc_ebx;
381 	regs->tf_edx    = context.sc_edx;
382 	regs->tf_ecx    = context.sc_ecx;
383 	regs->tf_eax    = context.sc_eax;
384 	regs->tf_eip    = context.sc_eip;
385 	regs->tf_cs     = context.sc_cs;
386 	regs->tf_eflags = eflags;
387 	regs->tf_esp    = context.sc_esp_at_signal;
388 	regs->tf_ss     = context.sc_ss;
389 
390 	return (EJUSTRETURN);
391 }
392 
393 static void
394 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
395 {
396 	args[0] = tf->tf_ebx;
397 	args[1] = tf->tf_ecx;
398 	args[2] = tf->tf_edx;
399 	args[3] = tf->tf_esi;
400 	args[4] = tf->tf_edi;
401 	*params = NULL;		/* no copyin */
402 }
403 
404 struct sysentvec linux_sysvec = {
405 	LINUX_SYS_MAXSYSCALL,
406 	linux_sysent,
407 	0xff,
408 	LINUX_SIGTBLSZ,
409 	bsd_to_linux_signal,
410 	ELAST + 1,
411 	bsd_to_linux_errno,
412 	translate_traps,
413 	linux_fixup,
414 	linux_sendsig,
415 	linux_sigcode,
416 	&linux_szsigcode,
417 	linux_prepsyscall,
418 	"Linux a.out",
419 	aout_coredump
420 };
421 
422 struct sysentvec elf_linux_sysvec = {
423 	LINUX_SYS_MAXSYSCALL,
424 	linux_sysent,
425 	0xff,
426 	LINUX_SIGTBLSZ,
427 	bsd_to_linux_signal,
428 	ELAST + 1,
429 	bsd_to_linux_errno,
430 	translate_traps,
431 	elf_linux_fixup,
432 	linux_sendsig,
433 	linux_sigcode,
434 	&linux_szsigcode,
435 	linux_prepsyscall,
436 	"Linux ELF",
437 	elf_coredump
438 };
439 
440 static Elf32_Brandinfo linux_brand = {
441 					ELFOSABI_LINUX,
442 					"/compat/linux",
443 					"/lib/ld-linux.so.1",
444 					&elf_linux_sysvec
445 				 };
446 
447 static Elf32_Brandinfo linux_glibc2brand = {
448 					ELFOSABI_LINUX,
449 					"/compat/linux",
450 					"/lib/ld-linux.so.2",
451 					&elf_linux_sysvec
452 				 };
453 
454 Elf32_Brandinfo *linux_brandlist[] = {
455 					&linux_brand,
456 					&linux_glibc2brand,
457 					NULL
458 				};
459 
460 static int
461 linux_elf_modevent(module_t mod, int type, void *data)
462 {
463 	Elf32_Brandinfo **brandinfo;
464 	int error;
465 
466 	error = 0;
467 
468 	switch(type) {
469 	case MOD_LOAD:
470 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
471 		     ++brandinfo)
472 			if (elf_insert_brand_entry(*brandinfo) < 0)
473 				error = EINVAL;
474 		if (error)
475 			printf("cannot insert Linux elf brand handler\n");
476 		else {
477 			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
478 			if (bootverbose)
479 				printf("Linux-ELF exec handler installed\n");
480 		}
481 		break;
482 	case MOD_UNLOAD:
483 		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
484 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
485 		     ++brandinfo)
486 			if (elf_brand_inuse(*brandinfo))
487 				error = EBUSY;
488 
489 		if (error == 0) {
490 			for (brandinfo = &linux_brandlist[0];
491 			     *brandinfo != NULL; ++brandinfo)
492 				if (elf_remove_brand_entry(*brandinfo) < 0)
493 					error = EINVAL;
494 		}
495 		if (error)
496 			printf("Could not deinstall ELF interpreter entry\n");
497 		else if (bootverbose)
498 			printf("Linux-elf exec handler removed\n");
499 		break;
500 	default:
501 		break;
502 	}
503 	return error;
504 }
505 static moduledata_t linux_elf_mod = {
506 	"linuxelf",
507 	linux_elf_modevent,
508 	0
509 };
510 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
511