xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision daf1cffce2e07931f27c6c6998652e90df6ba87e)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  * $FreeBSD$
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #include "opt_compat.h"
33 
34 #ifndef COMPAT_43
35 #error "Unable to compile Linux-emulator due to missing COMPAT_43 option!"
36 #endif
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/buf.h>
41 #include <sys/proc.h>
42 #include <sys/sysent.h>
43 #include <sys/imgact.h>
44 #include <sys/imgact_aout.h>
45 #include <sys/imgact_elf.h>
46 #include <sys/signalvar.h>
47 #include <sys/malloc.h>
48 #include <vm/vm.h>
49 #include <vm/vm_param.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_extern.h>
52 #include <sys/exec.h>
53 #include <sys/kernel.h>
54 #include <sys/module.h>
55 #include <machine/cpu.h>
56 
57 #include <i386/linux/linux.h>
58 #include <i386/linux/linux_proto.h>
59 
60 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures");
61 
62 extern char linux_sigcode[];
63 extern int linux_szsigcode;
64 
65 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL];
66 
67 extern struct linker_set linux_ioctl_handler_set;
68 
69 static int	linux_fixup __P((register_t **stack_base,
70 				 struct image_params *iparams));
71 static int	elf_linux_fixup __P((register_t **stack_base,
72 				     struct image_params *iparams));
73 static void	linux_prepsyscall __P((struct trapframe *tf, int *args,
74 				       u_int *code, caddr_t *params));
75 static void     linux_sendsig __P((sig_t catcher, int sig, sigset_t *mask,
76 				   u_long code));
77 
78 /*
79  * Linux syscalls return negative errno's, we do positive and map them
80  */
81 static int bsd_to_linux_errno[ELAST + 1] = {
82   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
83  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
84  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
85  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
86  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
87 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
88 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
89 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
90   	-6, -6, -43, -42, -75, -6, -84
91 };
92 
93 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = {
94 	LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL,
95 	LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE,
96 	LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 0,
97 	LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG,
98 	LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD,
99 	LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU,
100 	LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH,
101 	0, LINUX_SIGUSR1, LINUX_SIGUSR2
102 };
103 
104 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = {
105 	SIGHUP, SIGINT, SIGQUIT, SIGILL,
106 	SIGTRAP, SIGABRT, SIGBUS, SIGFPE,
107 	SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2,
108 	SIGPIPE, SIGALRM, SIGTERM, SIGBUS,
109 	SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP,
110 	SIGTTIN, SIGTTOU, SIGURG, SIGXCPU,
111 	SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH,
112 	SIGIO, SIGURG, 0
113 };
114 
115 /*
116  * If FreeBSD & Linux have a difference of opinion about what a trap
117  * means, deal with it here.
118  */
119 static int
120 translate_traps(int signal, int trap_code)
121 {
122 	if (signal != SIGBUS)
123 		return signal;
124 	switch (trap_code) {
125 	case T_PROTFLT:
126 	case T_TSSFLT:
127 	case T_DOUBLEFLT:
128 	case T_PAGEFLT:
129 		return SIGSEGV;
130 	default:
131 		return signal;
132 	}
133 }
134 
135 static int
136 linux_fixup(register_t **stack_base, struct image_params *imgp)
137 {
138 	register_t *argv, *envp;
139 
140 	argv = *stack_base;
141 	envp = *stack_base + (imgp->argc + 1);
142 	(*stack_base)--;
143 	**stack_base = (intptr_t)(void *)envp;
144 	(*stack_base)--;
145 	**stack_base = (intptr_t)(void *)argv;
146 	(*stack_base)--;
147 	**stack_base = imgp->argc;
148 	return 0;
149 }
150 
151 static int
152 elf_linux_fixup(register_t **stack_base, struct image_params *imgp)
153 {
154 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
155 	register_t *pos;
156 
157 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
158 
159 	if (args->trace) {
160 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
161 	}
162 	if (args->execfd != -1) {
163 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
164 	}
165 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
166 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
167 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
168 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
169 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
170 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
171 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
172 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
173 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
174 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
175 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
176 	AUXARGS_ENTRY(pos, AT_NULL, 0);
177 
178 	free(imgp->auxargs, M_TEMP);
179 	imgp->auxargs = NULL;
180 
181 	(*stack_base)--;
182 	**stack_base = (long)imgp->argc;
183 	return 0;
184 }
185 
186 extern int _ucodesel, _udatasel;
187 
188 /*
189  * Send an interrupt to process.
190  *
191  * Stack is set up to allow sigcode stored
192  * in u. to call routine, followed by kcall
193  * to sigreturn routine below.  After sigreturn
194  * resets the signal mask, the stack, and the
195  * frame pointer, it returns to the user
196  * specified pc, psl.
197  */
198 
199 static void
200 linux_sendsig(sig_t catcher, int sig, sigset_t *mask, u_long code)
201 {
202 	register struct proc *p = curproc;
203 	register struct trapframe *regs;
204 	struct linux_sigframe *fp, frame;
205 	struct sigacts *psp = p->p_sigacts;
206 	int oonstack;
207 
208 	regs = p->p_md.md_regs;
209 	oonstack = p->p_sigstk.ss_flags & SS_ONSTACK;
210 
211 #ifdef DEBUG
212 	printf("Linux-emul(%ld): linux_sendsig(%p, %d, %p, %lu)\n",
213 	    (long)p->p_pid, catcher, sig, (void*)mask, code);
214 #endif
215 	/*
216 	 * Allocate space for the signal handler context.
217 	 */
218 	if ((p->p_flag & P_ALTSTACK) && !oonstack &&
219 	    SIGISMEMBER(psp->ps_sigonstack, sig)) {
220 		fp = (struct linux_sigframe *)(p->p_sigstk.ss_sp +
221 		    p->p_sigstk.ss_size - sizeof(struct linux_sigframe));
222 		p->p_sigstk.ss_flags |= SS_ONSTACK;
223 	} else {
224 		fp = (struct linux_sigframe *)regs->tf_esp - 1;
225 	}
226 
227 	/*
228 	 * grow() will return FALSE if the fp will not fit inside the stack
229 	 *	and the stack can not be grown. useracc will return FALSE
230 	 *	if access is denied.
231 	 */
232 	if ((grow_stack (p, (int)fp) == FALSE) ||
233 	    !useracc((caddr_t)fp, sizeof (struct linux_sigframe),
234 	    VM_PROT_WRITE)) {
235 		/*
236 		 * Process has trashed its stack; give it an illegal
237 		 * instruction to halt it in its tracks.
238 		 */
239 		SIGACTION(p, SIGILL) = SIG_DFL;
240 		SIGDELSET(p->p_sigignore, SIGILL);
241 		SIGDELSET(p->p_sigcatch, SIGILL);
242 		SIGDELSET(p->p_sigmask, SIGILL);
243 		psignal(p, SIGILL);
244 		return;
245 	}
246 
247 	/*
248 	 * Build the argument list for the signal handler.
249 	 */
250 	if (p->p_sysent->sv_sigtbl)
251 		if (sig <= p->p_sysent->sv_sigsize)
252 			sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)];
253 
254 	frame.sf_handler = catcher;
255 	frame.sf_sig = sig;
256 
257 	/*
258 	 * Build the signal context to be used by sigreturn.
259 	 */
260 	frame.sf_sc.sc_mask   = mask->__bits[0];
261 	frame.sf_sc.sc_gs     = rgs();
262 	frame.sf_sc.sc_fs     = regs->tf_fs;
263 	frame.sf_sc.sc_es     = regs->tf_es;
264 	frame.sf_sc.sc_ds     = regs->tf_ds;
265 	frame.sf_sc.sc_edi    = regs->tf_edi;
266 	frame.sf_sc.sc_esi    = regs->tf_esi;
267 	frame.sf_sc.sc_ebp    = regs->tf_ebp;
268 	frame.sf_sc.sc_ebx    = regs->tf_ebx;
269 	frame.sf_sc.sc_edx    = regs->tf_edx;
270 	frame.sf_sc.sc_ecx    = regs->tf_ecx;
271 	frame.sf_sc.sc_eax    = regs->tf_eax;
272 	frame.sf_sc.sc_eip    = regs->tf_eip;
273 	frame.sf_sc.sc_cs     = regs->tf_cs;
274 	frame.sf_sc.sc_eflags = regs->tf_eflags;
275 	frame.sf_sc.sc_esp_at_signal = regs->tf_esp;
276 	frame.sf_sc.sc_ss     = regs->tf_ss;
277 	frame.sf_sc.sc_err    = regs->tf_err;
278 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
279 
280 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
281 		/*
282 		 * Process has trashed its stack; give it an illegal
283 		 * instruction to halt it in its tracks.
284 		 */
285 		sigexit(p, SIGILL);
286 		/* NOTREACHED */
287 	}
288 
289 	/*
290 	 * Build context to run handler in.
291 	 */
292 	regs->tf_esp = (int)fp;
293 	regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode);
294 	regs->tf_eflags &= ~PSL_VM;
295 	regs->tf_cs = _ucodesel;
296 	regs->tf_ds = _udatasel;
297 	regs->tf_es = _udatasel;
298 	regs->tf_fs = _udatasel;
299 	load_gs(_udatasel);
300 	regs->tf_ss = _udatasel;
301 }
302 
303 /*
304  * System call to cleanup state after a signal
305  * has been taken.  Reset signal mask and
306  * stack state from context left by sendsig (above).
307  * Return to previous pc and psl as specified by
308  * context left by sendsig. Check carefully to
309  * make sure that the user has not modified the
310  * psl to gain improper privileges or to cause
311  * a machine fault.
312  */
313 int
314 linux_sigreturn(p, args)
315 	struct proc *p;
316 	struct linux_sigreturn_args *args;
317 {
318 	struct linux_sigcontext *scp, context;
319 	register struct trapframe *regs;
320 	int eflags;
321 
322 	regs = p->p_md.md_regs;
323 
324 #ifdef DEBUG
325 	printf("Linux-emul(%ld): linux_sigreturn(%p)\n",
326 	    (long)p->p_pid, (void *)args->scp);
327 #endif
328 	/*
329 	 * The trampoline code hands us the context.
330 	 * It is unsafe to keep track of it ourselves, in the event that a
331 	 * program jumps out of a signal handler.
332 	 */
333 	scp = SCARG(args,scp);
334 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
335 		return (EFAULT);
336 
337 	/*
338 	 * Check for security violations.
339 	 */
340 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
341 	eflags = context.sc_eflags;
342 	/*
343 	 * XXX do allow users to change the privileged flag PSL_RF.  The
344 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
345 	 * sometimes set it there too.  tf_eflags is kept in the signal
346 	 * context during signal handling and there is no other place
347 	 * to remember it, so the PSL_RF bit may be corrupted by the
348 	 * signal handler without us knowing.  Corruption of the PSL_RF
349 	 * bit at worst causes one more or one less debugger trap, so
350 	 * allowing it is fairly harmless.
351 	 */
352 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) {
353     		return(EINVAL);
354 	}
355 
356 	/*
357 	 * Don't allow users to load a valid privileged %cs.  Let the
358 	 * hardware check for invalid selectors, excess privilege in
359 	 * other selectors, invalid %eip's and invalid %esp's.
360 	 */
361 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
362 	if (!CS_SECURE(context.sc_cs)) {
363 		trapsignal(p, SIGBUS, T_PROTFLT);
364 		return(EINVAL);
365 	}
366 
367 	p->p_sigstk.ss_flags &= ~SS_ONSTACK;
368 	SIGSETOLD(p->p_sigmask, context.sc_mask);
369 	SIG_CANTMASK(p->p_sigmask);
370 
371 	/*
372 	 * Restore signal context.
373 	 */
374 	/* %gs was restored by the trampoline. */
375 	regs->tf_fs     = context.sc_fs;
376 	regs->tf_es     = context.sc_es;
377 	regs->tf_ds     = context.sc_ds;
378 	regs->tf_edi    = context.sc_edi;
379 	regs->tf_esi    = context.sc_esi;
380 	regs->tf_ebp    = context.sc_ebp;
381 	regs->tf_ebx    = context.sc_ebx;
382 	regs->tf_edx    = context.sc_edx;
383 	regs->tf_ecx    = context.sc_ecx;
384 	regs->tf_eax    = context.sc_eax;
385 	regs->tf_eip    = context.sc_eip;
386 	regs->tf_cs     = context.sc_cs;
387 	regs->tf_eflags = eflags;
388 	regs->tf_esp    = context.sc_esp_at_signal;
389 	regs->tf_ss     = context.sc_ss;
390 
391 	return (EJUSTRETURN);
392 }
393 
394 static void
395 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
396 {
397 	args[0] = tf->tf_ebx;
398 	args[1] = tf->tf_ecx;
399 	args[2] = tf->tf_edx;
400 	args[3] = tf->tf_esi;
401 	args[4] = tf->tf_edi;
402 	*params = NULL;		/* no copyin */
403 }
404 
405 struct sysentvec linux_sysvec = {
406 	LINUX_SYS_MAXSYSCALL,
407 	linux_sysent,
408 	0xff,
409 	LINUX_SIGTBLSZ,
410 	bsd_to_linux_signal,
411 	ELAST + 1,
412 	bsd_to_linux_errno,
413 	translate_traps,
414 	linux_fixup,
415 	linux_sendsig,
416 	linux_sigcode,
417 	&linux_szsigcode,
418 	linux_prepsyscall,
419 	"Linux a.out",
420 	aout_coredump
421 };
422 
423 struct sysentvec elf_linux_sysvec = {
424 	LINUX_SYS_MAXSYSCALL,
425 	linux_sysent,
426 	0xff,
427 	LINUX_SIGTBLSZ,
428 	bsd_to_linux_signal,
429 	ELAST + 1,
430 	bsd_to_linux_errno,
431 	translate_traps,
432 	elf_linux_fixup,
433 	linux_sendsig,
434 	linux_sigcode,
435 	&linux_szsigcode,
436 	linux_prepsyscall,
437 	"Linux ELF",
438 	elf_coredump
439 };
440 
441 static Elf32_Brandinfo linux_brand = {
442 					"Linux",
443 					"/compat/linux",
444 					"/lib/ld-linux.so.1",
445 					&elf_linux_sysvec
446 				 };
447 
448 static Elf32_Brandinfo linux_glibc2brand = {
449 					"Linux",
450 					"/compat/linux",
451 					"/lib/ld-linux.so.2",
452 					&elf_linux_sysvec
453 				 };
454 
455 Elf32_Brandinfo *linux_brandlist[] = {
456 					&linux_brand,
457 					&linux_glibc2brand,
458 					NULL
459 				};
460 
461 static int
462 linux_elf_modevent(module_t mod, int type, void *data)
463 {
464 	Elf32_Brandinfo **brandinfo;
465 	int error;
466 
467 	error = 0;
468 
469 	switch(type) {
470 	case MOD_LOAD:
471 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
472 		     ++brandinfo)
473 			if (elf_insert_brand_entry(*brandinfo) < 0)
474 				error = EINVAL;
475 		if (error)
476 			printf("cannot insert Linux elf brand handler\n");
477 		else {
478 			linux_ioctl_register_handlers(&linux_ioctl_handler_set);
479 			if (bootverbose)
480 				printf("Linux-ELF exec handler installed\n");
481 		}
482 		break;
483 	case MOD_UNLOAD:
484 		linux_ioctl_unregister_handlers(&linux_ioctl_handler_set);
485 		for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL;
486 		     ++brandinfo)
487 			if (elf_brand_inuse(*brandinfo))
488 				error = EBUSY;
489 
490 		if (error == 0) {
491 			for (brandinfo = &linux_brandlist[0];
492 			     *brandinfo != NULL; ++brandinfo)
493 				if (elf_remove_brand_entry(*brandinfo) < 0)
494 					error = EINVAL;
495 		}
496 		if (error)
497 			printf("Could not deinstall ELF interpreter entry\n");
498 		else if (bootverbose)
499 			printf("Linux-elf exec handler removed\n");
500 		break;
501 	default:
502 		break;
503 	}
504 	return error;
505 }
506 static moduledata_t linux_elf_mod = {
507 	"linuxelf",
508 	linux_elf_modevent,
509 	0
510 };
511 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY);
512