xref: /freebsd/sys/i386/linux/linux_sysvec.c (revision 0ddf9be1f0723916ebd4feb7313d64dffab0c2bb)
1 /*-
2  * Copyright (c) 1994-1996 S�ren Schmidt
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer
10  *    in this position and unchanged.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  * 3. The name of the author may not be used to endorse or promote products
15  *    derived from this software withough specific prior written permission
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27  *
28  *  $Id: linux_sysvec.c,v 1.12 1997/03/29 10:50:27 peter Exp $
29  */
30 
31 /* XXX we use functions that might not exist. */
32 #define	COMPAT_43	1
33 
34 #include <sys/param.h>
35 #include <sys/buf.h>
36 #include <sys/proc.h>
37 #include <sys/systm.h>
38 #include <sys/sysproto.h>
39 #include <sys/sysent.h>
40 #include <sys/imgact.h>
41 #include <sys/imgact_elf.h>
42 #include <sys/signalvar.h>
43 #include <sys/malloc.h>
44 #include <vm/vm.h>
45 #include <vm/vm_param.h>
46 #include <vm/vm_prot.h>
47 #include <sys/lock.h>
48 #include <vm/vm_kern.h>
49 #include <vm/vm_object.h>
50 #include <vm/vm_page.h>
51 #include <vm/vm_map.h>
52 #include <vm/vm_pager.h>
53 #include <vm/vm_extern.h>
54 #include <sys/user.h>
55 #include <sys/exec.h>
56 #include <sys/kernel.h>
57 #include <machine/cpu.h>
58 #include <machine/frame.h>
59 #include <machine/reg.h>
60 #include <machine/specialreg.h>
61 #include <machine/psl.h>
62 #include <machine/sysarch.h>
63 #include <machine/md_var.h>
64 
65 #include <i386/linux/linux.h>
66 #include <i386/linux/linux_proto.h>
67 
68 int	linux_fixup __P((int **stack_base, struct image_params *iparams));
69 int	elf_linux_fixup __P((int **stack_base, struct image_params *iparams));
70 void	linux_prepsyscall __P((struct trapframe *tf, int *args, u_int *code, caddr_t *params));
71 void    linux_sendsig __P((sig_t catcher, int sig, int mask, u_long code));
72 static void linux_elf_init __P((void *dummy));
73 
74 /*
75  * Linux syscalls return negative errno's, we do positive and map them
76  */
77 int bsd_to_linux_errno[ELAST] = {
78   	-0,  -1,  -2,  -3,  -4,  -5,  -6,  -7,  -8,  -9,
79  	-10, -35, -12, -13, -14, -15, -16, -17, -18, -19,
80  	-20, -21, -22, -23, -24, -25, -26, -27, -28, -29,
81  	-30, -31, -32, -33, -34, -11,-115,-114, -88, -89,
82  	-90, -91, -92, -93, -94, -95, -96, -97, -98, -99,
83 	-100,-101,-102,-103,-104,-105,-106,-107,-108,-109,
84 	-110,-111, -40, -36,-112,-113, -39, -11, -87,-122,
85 	-116, -66,  -6,  -6,  -6,  -6,  -6, -37, -38,  -9,
86   	-6,
87 };
88 
89 int bsd_to_linux_signal[NSIG] = {
90 	0, LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT,
91 	LINUX_SIGILL, LINUX_SIGTRAP, LINUX_SIGABRT, 0,
92 	LINUX_SIGFPE, LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV,
93 	0, LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM,
94 	LINUX_SIGURG, LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT,
95 	LINUX_SIGCHLD, LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO,
96 	LINUX_SIGXCPU, LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF,
97 	LINUX_SIGWINCH, 0, LINUX_SIGUSR1, LINUX_SIGUSR2
98 };
99 
100 int linux_to_bsd_signal[LINUX_NSIG] = {
101 	0, SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGTRAP, SIGABRT, SIGEMT,
102 	SIGFPE, SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, SIGPIPE, SIGALRM, SIGTERM,
103 	SIGBUS, SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGIO,
104 	SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, SIGURG, SIGURG, 0
105 };
106 
107 int linux_fixup(int **stack_base, struct image_params *imgp)
108 {
109 	int *argv, *envp;
110 
111 	argv = *stack_base;
112 	envp = *stack_base + (imgp->argc + 1);
113 	(*stack_base)--;
114 	**stack_base = (int)envp;
115 	(*stack_base)--;
116 	**stack_base = (int)argv;
117 	(*stack_base)--;
118 	**stack_base = (int)imgp->argc;
119 	return 0;
120 }
121 
122 int elf_linux_fixup(int **stack_base, struct image_params *imgp)
123 {
124 	Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs;
125 	int *pos;
126 
127 	pos = *stack_base + (imgp->argc + imgp->envc + 2);
128 
129 	if (args->trace) {
130 		AUXARGS_ENTRY(pos, AT_DEBUG, 1);
131 	}
132 	if (args->execfd != -1) {
133 		AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd);
134 	}
135 	AUXARGS_ENTRY(pos, AT_PHDR, args->phdr);
136 	AUXARGS_ENTRY(pos, AT_PHENT, args->phent);
137 	AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum);
138 	AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz);
139 	AUXARGS_ENTRY(pos, AT_FLAGS, args->flags);
140 	AUXARGS_ENTRY(pos, AT_ENTRY, args->entry);
141 	AUXARGS_ENTRY(pos, AT_BASE, args->base);
142 	AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid);
143 	AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid);
144 	AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid);
145 	AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid);
146 	AUXARGS_ENTRY(pos, AT_NULL, 0);
147 
148 	free(imgp->auxargs, M_TEMP);
149 	imgp->auxargs = NULL;
150 
151 	(*stack_base)--;
152 	**stack_base = (int)imgp->argc;
153 	return 0;
154 }
155 
156 extern int _ucodesel, _udatasel;
157 
158 /*
159  * Send an interrupt to process.
160  *
161  * Stack is set up to allow sigcode stored
162  * in u. to call routine, followed by kcall
163  * to sigreturn routine below.  After sigreturn
164  * resets the signal mask, the stack, and the
165  * frame pointer, it returns to the user
166  * specified pc, psl.
167  */
168 
169 void
170 linux_sendsig(sig_t catcher, int sig, int mask, u_long code)
171 {
172 	register struct proc *p = curproc;
173 	register int *regs;
174 	struct linux_sigframe *fp, frame;
175 	struct sigacts *psp = p->p_sigacts;
176 	int oonstack;
177 
178 	regs = p->p_md.md_regs;
179 	oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK;
180 
181 #ifdef DEBUG
182 	printf("Linux-emul(%d): linux_sendsig(%8x, %d, %d, %ld)\n",
183 		p->p_pid, catcher, sig, mask, code);
184 #endif
185 	/*
186 	 * Allocate space for the signal handler context.
187 	 */
188 	if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack &&
189 	    (psp->ps_sigonstack & sigmask(sig))) {
190 		fp = (struct linux_sigframe *)(psp->ps_sigstk.ss_sp +
191 		    psp->ps_sigstk.ss_size - sizeof(struct linux_sigframe));
192 		psp->ps_sigstk.ss_flags |= SS_ONSTACK;
193 	} else {
194 		fp = (struct linux_sigframe *)regs[tESP] - 1;
195 	}
196 
197 	/*
198 	 * grow() will return FALSE if the fp will not fit inside the stack
199 	 *	and the stack can not be grown. useracc will return FALSE
200 	 *	if access is denied.
201 	 */
202 	if ((grow(p, (int)fp) == FALSE) ||
203 	    (useracc((caddr_t)fp, sizeof (struct linux_sigframe), B_WRITE) == FALSE)) {
204 		/*
205 		 * Process has trashed its stack; give it an illegal
206 		 * instruction to halt it in its tracks.
207 		 */
208 		SIGACTION(p, SIGILL) = SIG_DFL;
209 		sig = sigmask(SIGILL);
210 		p->p_sigignore &= ~sig;
211 		p->p_sigcatch &= ~sig;
212 		p->p_sigmask &= ~sig;
213 		psignal(p, SIGILL);
214 		return;
215 	}
216 
217 	/*
218 	 * Build the argument list for the signal handler.
219 	 */
220 	if (p->p_sysent->sv_sigtbl) {
221 		if (sig < p->p_sysent->sv_sigsize)
222 			sig = p->p_sysent->sv_sigtbl[sig];
223 		else
224 			sig = p->p_sysent->sv_sigsize + 1;
225 	}
226 
227 	frame.sf_handler = catcher;
228 	frame.sf_sig = sig;
229 
230 	/*
231 	 * Build the signal context to be used by sigreturn.
232 	 */
233 	frame.sf_sc.sc_mask   = mask;
234 	__asm("movl %%gs,%w0" : "=r" (frame.sf_sc.sc_gs));
235 	__asm("movl %%fs,%w0" : "=r" (frame.sf_sc.sc_fs));
236 	frame.sf_sc.sc_es     = regs[tES];
237 	frame.sf_sc.sc_ds     = regs[tDS];
238 	frame.sf_sc.sc_edi    = regs[tEDI];
239 	frame.sf_sc.sc_esi    = regs[tESI];
240 	frame.sf_sc.sc_ebp    = regs[tEBP];
241 	frame.sf_sc.sc_ebx    = regs[tEBX];
242 	frame.sf_sc.sc_edx    = regs[tEDX];
243 	frame.sf_sc.sc_ecx    = regs[tECX];
244 	frame.sf_sc.sc_eax    = regs[tEAX];
245 	frame.sf_sc.sc_eip    = regs[tEIP];
246 	frame.sf_sc.sc_cs     = regs[tCS];
247 	frame.sf_sc.sc_eflags = regs[tEFLAGS];
248 	frame.sf_sc.sc_esp_at_signal = regs[tESP];
249 	frame.sf_sc.sc_ss     = regs[tSS];
250 	frame.sf_sc.sc_err    = regs[tERR];
251 	frame.sf_sc.sc_trapno = code;	/* XXX ???? */
252 
253 	if (copyout(&frame, fp, sizeof(frame)) != 0) {
254 		/*
255 		 * Process has trashed its stack; give it an illegal
256 		 * instruction to halt it in its tracks.
257 		 */
258 		sigexit(p, SIGILL);
259 		/* NOTREACHED */
260 	}
261 
262 	/*
263 	 * Build context to run handler in.
264 	 */
265 	regs[tESP] = (int)fp;
266 	regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode));
267 	regs[tEFLAGS] &= ~PSL_VM;
268 	regs[tCS] = _ucodesel;
269 	regs[tDS] = _udatasel;
270 	regs[tES] = _udatasel;
271 	regs[tSS] = _udatasel;
272 }
273 
274 /*
275  * System call to cleanup state after a signal
276  * has been taken.  Reset signal mask and
277  * stack state from context left by sendsig (above).
278  * Return to previous pc and psl as specified by
279  * context left by sendsig. Check carefully to
280  * make sure that the user has not modified the
281  * psl to gain improper privileges or to cause
282  * a machine fault.
283  */
284 int
285 linux_sigreturn(p, args, retval)
286 	struct proc *p;
287 	struct linux_sigreturn_args *args;
288 	int *retval;
289 {
290 	struct linux_sigcontext *scp, context;
291 	register int *regs;
292 	int eflags;
293 
294 	regs = p->p_md.md_regs;
295 
296 #ifdef DEBUG
297 	printf("Linux-emul(%d): linux_sigreturn(%8x)\n", p->p_pid, args->scp);
298 #endif
299 	/*
300 	 * The trampoline code hands us the context.
301 	 * It is unsafe to keep track of it ourselves, in the event that a
302 	 * program jumps out of a signal handler.
303 	 */
304 	scp = args->scp;
305 	if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0)
306 		return (EFAULT);
307 
308 	/*
309 	 * Check for security violations.
310 	 */
311 #define	EFLAGS_SECURE(ef, oef)	((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0)
312 	eflags = context.sc_eflags;
313 	/*
314 	 * XXX do allow users to change the privileged flag PSL_RF.  The
315 	 * cpu sets PSL_RF in tf_eflags for faults.  Debuggers should
316 	 * sometimes set it there too.  tf_eflags is kept in the signal
317 	 * context during signal handling and there is no other place
318 	 * to remember it, so the PSL_RF bit may be corrupted by the
319 	 * signal handler without us knowing.  Corruption of the PSL_RF
320 	 * bit at worst causes one more or one less debugger trap, so
321 	 * allowing it is fairly harmless.
322 	 */
323 	if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) {
324     		return(EINVAL);
325 	}
326 
327 	/*
328 	 * Don't allow users to load a valid privileged %cs.  Let the
329 	 * hardware check for invalid selectors, excess privilege in
330 	 * other selectors, invalid %eip's and invalid %esp's.
331 	 */
332 #define	CS_SECURE(cs)	(ISPL(cs) == SEL_UPL)
333 	if (!CS_SECURE(context.sc_cs)) {
334 		trapsignal(p, SIGBUS, T_PROTFLT);
335 		return(EINVAL);
336 	}
337 
338 	p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK;
339 	p->p_sigmask = context.sc_mask &~
340 		(sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP));
341 	/*
342 	 * Restore signal context.
343 	 */
344 	/* %fs and %gs were restored by the trampoline. */
345 	regs[tES]     = context.sc_es;
346 	regs[tDS]     = context.sc_ds;
347 	regs[tEDI]    = context.sc_edi;
348 	regs[tESI]    = context.sc_esi;
349 	regs[tEBP]    = context.sc_ebp;
350 	regs[tEBX]    = context.sc_ebx;
351 	regs[tEDX]    = context.sc_edx;
352 	regs[tECX]    = context.sc_ecx;
353 	regs[tEAX]    = context.sc_eax;
354 	regs[tEIP]    = context.sc_eip;
355 	regs[tCS]     = context.sc_cs;
356 	regs[tEFLAGS] = eflags;
357 	regs[tESP]    = context.sc_esp_at_signal;
358 	regs[tSS]     = context.sc_ss;
359 
360 	return (EJUSTRETURN);
361 }
362 
363 void
364 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params)
365 {
366 	args[0] = tf->tf_ebx;
367 	args[1] = tf->tf_ecx;
368 	args[2] = tf->tf_edx;
369 	args[3] = tf->tf_esi;
370 	args[4] = tf->tf_edi;
371 	*params = NULL;		/* no copyin */
372 }
373 
374 struct sysentvec linux_sysvec = {
375 	LINUX_SYS_MAXSYSCALL,
376 	linux_sysent,
377 	0xff,
378 	NSIG,
379 	bsd_to_linux_signal,
380 	ELAST,
381 	bsd_to_linux_errno,
382 	linux_fixup,
383 	linux_sendsig,
384 	linux_sigcode,
385 	&linux_szsigcode,
386 	linux_prepsyscall,
387 	"Linux a.out"
388 };
389 
390 struct sysentvec elf_linux_sysvec = {
391         LINUX_SYS_MAXSYSCALL,
392         linux_sysent,
393         0xff,
394         NSIG,
395         bsd_to_linux_signal,
396         ELAST,
397         bsd_to_linux_errno,
398         elf_linux_fixup,
399         linux_sendsig,
400         linux_sigcode,
401         &linux_szsigcode,
402         linux_prepsyscall,
403 	"Linux ELF"
404 };
405 
406 /*
407  * Installed either via SYSINIT() or via LKM stubs.
408  */
409 Elf32_Brandinfo linux_brand = {
410 					"Linux",
411 					"/compat/linux",
412 					"/lib/ld-linux.so.1",
413 					&elf_linux_sysvec
414 				 };
415 
416 #ifndef LKM
417 /*
418  * XXX: this is WRONG, it needs to be SI_SUB_EXEC, but this is just at the
419  * "proof of concept" stage and will be fixed shortly
420  */
421 static void
422 linux_elf_init(dummy)
423 	void *dummy;
424 {
425 	if (elf_insert_brand_entry(&linux_brand) < 0)
426 		printf("cannot insert Linux elf brand handler\n");
427 	else if (bootverbose)
428 		printf("Linux-ELF exec handler installed\n");
429 }
430 
431 SYSINIT(linuxelf, SI_SUB_VFS, SI_ORDER_ANY, linux_elf_init, NULL);
432 #endif
433