1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_sysvec.c,v 1.12 1997/03/29 10:50:27 peter Exp $ 29 */ 30 31 /* XXX we use functions that might not exist. */ 32 #define COMPAT_43 1 33 34 #include <sys/param.h> 35 #include <sys/buf.h> 36 #include <sys/proc.h> 37 #include <sys/systm.h> 38 #include <sys/sysproto.h> 39 #include <sys/sysent.h> 40 #include <sys/imgact.h> 41 #include <sys/imgact_elf.h> 42 #include <sys/signalvar.h> 43 #include <sys/malloc.h> 44 #include <vm/vm.h> 45 #include <vm/vm_param.h> 46 #include <vm/vm_prot.h> 47 #include <sys/lock.h> 48 #include <vm/vm_kern.h> 49 #include <vm/vm_object.h> 50 #include <vm/vm_page.h> 51 #include <vm/vm_map.h> 52 #include <vm/vm_pager.h> 53 #include <vm/vm_extern.h> 54 #include <sys/user.h> 55 #include <sys/exec.h> 56 #include <sys/kernel.h> 57 #include <machine/cpu.h> 58 #include <machine/frame.h> 59 #include <machine/reg.h> 60 #include <machine/specialreg.h> 61 #include <machine/psl.h> 62 #include <machine/sysarch.h> 63 #include <machine/md_var.h> 64 65 #include <i386/linux/linux.h> 66 #include <i386/linux/linux_proto.h> 67 68 int linux_fixup __P((int **stack_base, struct image_params *iparams)); 69 int elf_linux_fixup __P((int **stack_base, struct image_params *iparams)); 70 void linux_prepsyscall __P((struct trapframe *tf, int *args, u_int *code, caddr_t *params)); 71 void linux_sendsig __P((sig_t catcher, int sig, int mask, u_long code)); 72 static void linux_elf_init __P((void *dummy)); 73 74 /* 75 * Linux syscalls return negative errno's, we do positive and map them 76 */ 77 int bsd_to_linux_errno[ELAST] = { 78 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 79 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 80 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 81 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 82 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 83 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 84 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 85 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 86 -6, 87 }; 88 89 int bsd_to_linux_signal[NSIG] = { 90 0, LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, 91 LINUX_SIGILL, LINUX_SIGTRAP, LINUX_SIGABRT, 0, 92 LINUX_SIGFPE, LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, 93 0, LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, 94 LINUX_SIGURG, LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, 95 LINUX_SIGCHLD, LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, 96 LINUX_SIGXCPU, LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, 97 LINUX_SIGWINCH, 0, LINUX_SIGUSR1, LINUX_SIGUSR2 98 }; 99 100 int linux_to_bsd_signal[LINUX_NSIG] = { 101 0, SIGHUP, SIGINT, SIGQUIT, SIGILL, SIGTRAP, SIGABRT, SIGEMT, 102 SIGFPE, SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, SIGPIPE, SIGALRM, SIGTERM, 103 SIGBUS, SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, SIGTTIN, SIGTTOU, SIGIO, 104 SIGXCPU, SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, SIGURG, SIGURG, 0 105 }; 106 107 int linux_fixup(int **stack_base, struct image_params *imgp) 108 { 109 int *argv, *envp; 110 111 argv = *stack_base; 112 envp = *stack_base + (imgp->argc + 1); 113 (*stack_base)--; 114 **stack_base = (int)envp; 115 (*stack_base)--; 116 **stack_base = (int)argv; 117 (*stack_base)--; 118 **stack_base = (int)imgp->argc; 119 return 0; 120 } 121 122 int elf_linux_fixup(int **stack_base, struct image_params *imgp) 123 { 124 Elf32_Auxargs *args = (Elf32_Auxargs *)imgp->auxargs; 125 int *pos; 126 127 pos = *stack_base + (imgp->argc + imgp->envc + 2); 128 129 if (args->trace) { 130 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 131 } 132 if (args->execfd != -1) { 133 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 134 } 135 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 136 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 137 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 138 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 139 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 140 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 141 AUXARGS_ENTRY(pos, AT_BASE, args->base); 142 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_cred->p_ruid); 143 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_cred->p_svuid); 144 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_cred->p_rgid); 145 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_cred->p_svgid); 146 AUXARGS_ENTRY(pos, AT_NULL, 0); 147 148 free(imgp->auxargs, M_TEMP); 149 imgp->auxargs = NULL; 150 151 (*stack_base)--; 152 **stack_base = (int)imgp->argc; 153 return 0; 154 } 155 156 extern int _ucodesel, _udatasel; 157 158 /* 159 * Send an interrupt to process. 160 * 161 * Stack is set up to allow sigcode stored 162 * in u. to call routine, followed by kcall 163 * to sigreturn routine below. After sigreturn 164 * resets the signal mask, the stack, and the 165 * frame pointer, it returns to the user 166 * specified pc, psl. 167 */ 168 169 void 170 linux_sendsig(sig_t catcher, int sig, int mask, u_long code) 171 { 172 register struct proc *p = curproc; 173 register int *regs; 174 struct linux_sigframe *fp, frame; 175 struct sigacts *psp = p->p_sigacts; 176 int oonstack; 177 178 regs = p->p_md.md_regs; 179 oonstack = psp->ps_sigstk.ss_flags & SS_ONSTACK; 180 181 #ifdef DEBUG 182 printf("Linux-emul(%d): linux_sendsig(%8x, %d, %d, %ld)\n", 183 p->p_pid, catcher, sig, mask, code); 184 #endif 185 /* 186 * Allocate space for the signal handler context. 187 */ 188 if ((psp->ps_flags & SAS_ALTSTACK) && !oonstack && 189 (psp->ps_sigonstack & sigmask(sig))) { 190 fp = (struct linux_sigframe *)(psp->ps_sigstk.ss_sp + 191 psp->ps_sigstk.ss_size - sizeof(struct linux_sigframe)); 192 psp->ps_sigstk.ss_flags |= SS_ONSTACK; 193 } else { 194 fp = (struct linux_sigframe *)regs[tESP] - 1; 195 } 196 197 /* 198 * grow() will return FALSE if the fp will not fit inside the stack 199 * and the stack can not be grown. useracc will return FALSE 200 * if access is denied. 201 */ 202 if ((grow(p, (int)fp) == FALSE) || 203 (useracc((caddr_t)fp, sizeof (struct linux_sigframe), B_WRITE) == FALSE)) { 204 /* 205 * Process has trashed its stack; give it an illegal 206 * instruction to halt it in its tracks. 207 */ 208 SIGACTION(p, SIGILL) = SIG_DFL; 209 sig = sigmask(SIGILL); 210 p->p_sigignore &= ~sig; 211 p->p_sigcatch &= ~sig; 212 p->p_sigmask &= ~sig; 213 psignal(p, SIGILL); 214 return; 215 } 216 217 /* 218 * Build the argument list for the signal handler. 219 */ 220 if (p->p_sysent->sv_sigtbl) { 221 if (sig < p->p_sysent->sv_sigsize) 222 sig = p->p_sysent->sv_sigtbl[sig]; 223 else 224 sig = p->p_sysent->sv_sigsize + 1; 225 } 226 227 frame.sf_handler = catcher; 228 frame.sf_sig = sig; 229 230 /* 231 * Build the signal context to be used by sigreturn. 232 */ 233 frame.sf_sc.sc_mask = mask; 234 __asm("movl %%gs,%w0" : "=r" (frame.sf_sc.sc_gs)); 235 __asm("movl %%fs,%w0" : "=r" (frame.sf_sc.sc_fs)); 236 frame.sf_sc.sc_es = regs[tES]; 237 frame.sf_sc.sc_ds = regs[tDS]; 238 frame.sf_sc.sc_edi = regs[tEDI]; 239 frame.sf_sc.sc_esi = regs[tESI]; 240 frame.sf_sc.sc_ebp = regs[tEBP]; 241 frame.sf_sc.sc_ebx = regs[tEBX]; 242 frame.sf_sc.sc_edx = regs[tEDX]; 243 frame.sf_sc.sc_ecx = regs[tECX]; 244 frame.sf_sc.sc_eax = regs[tEAX]; 245 frame.sf_sc.sc_eip = regs[tEIP]; 246 frame.sf_sc.sc_cs = regs[tCS]; 247 frame.sf_sc.sc_eflags = regs[tEFLAGS]; 248 frame.sf_sc.sc_esp_at_signal = regs[tESP]; 249 frame.sf_sc.sc_ss = regs[tSS]; 250 frame.sf_sc.sc_err = regs[tERR]; 251 frame.sf_sc.sc_trapno = code; /* XXX ???? */ 252 253 if (copyout(&frame, fp, sizeof(frame)) != 0) { 254 /* 255 * Process has trashed its stack; give it an illegal 256 * instruction to halt it in its tracks. 257 */ 258 sigexit(p, SIGILL); 259 /* NOTREACHED */ 260 } 261 262 /* 263 * Build context to run handler in. 264 */ 265 regs[tESP] = (int)fp; 266 regs[tEIP] = (int)(((char *)PS_STRINGS) - *(p->p_sysent->sv_szsigcode)); 267 regs[tEFLAGS] &= ~PSL_VM; 268 regs[tCS] = _ucodesel; 269 regs[tDS] = _udatasel; 270 regs[tES] = _udatasel; 271 regs[tSS] = _udatasel; 272 } 273 274 /* 275 * System call to cleanup state after a signal 276 * has been taken. Reset signal mask and 277 * stack state from context left by sendsig (above). 278 * Return to previous pc and psl as specified by 279 * context left by sendsig. Check carefully to 280 * make sure that the user has not modified the 281 * psl to gain improper privileges or to cause 282 * a machine fault. 283 */ 284 int 285 linux_sigreturn(p, args, retval) 286 struct proc *p; 287 struct linux_sigreturn_args *args; 288 int *retval; 289 { 290 struct linux_sigcontext *scp, context; 291 register int *regs; 292 int eflags; 293 294 regs = p->p_md.md_regs; 295 296 #ifdef DEBUG 297 printf("Linux-emul(%d): linux_sigreturn(%8x)\n", p->p_pid, args->scp); 298 #endif 299 /* 300 * The trampoline code hands us the context. 301 * It is unsafe to keep track of it ourselves, in the event that a 302 * program jumps out of a signal handler. 303 */ 304 scp = args->scp; 305 if (copyin((caddr_t)scp, &context, sizeof(*scp)) != 0) 306 return (EFAULT); 307 308 /* 309 * Check for security violations. 310 */ 311 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 312 eflags = context.sc_eflags; 313 /* 314 * XXX do allow users to change the privileged flag PSL_RF. The 315 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 316 * sometimes set it there too. tf_eflags is kept in the signal 317 * context during signal handling and there is no other place 318 * to remember it, so the PSL_RF bit may be corrupted by the 319 * signal handler without us knowing. Corruption of the PSL_RF 320 * bit at worst causes one more or one less debugger trap, so 321 * allowing it is fairly harmless. 322 */ 323 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs[tEFLAGS] & ~PSL_RF)) { 324 return(EINVAL); 325 } 326 327 /* 328 * Don't allow users to load a valid privileged %cs. Let the 329 * hardware check for invalid selectors, excess privilege in 330 * other selectors, invalid %eip's and invalid %esp's. 331 */ 332 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 333 if (!CS_SECURE(context.sc_cs)) { 334 trapsignal(p, SIGBUS, T_PROTFLT); 335 return(EINVAL); 336 } 337 338 p->p_sigacts->ps_sigstk.ss_flags &= ~SS_ONSTACK; 339 p->p_sigmask = context.sc_mask &~ 340 (sigmask(SIGKILL)|sigmask(SIGCONT)|sigmask(SIGSTOP)); 341 /* 342 * Restore signal context. 343 */ 344 /* %fs and %gs were restored by the trampoline. */ 345 regs[tES] = context.sc_es; 346 regs[tDS] = context.sc_ds; 347 regs[tEDI] = context.sc_edi; 348 regs[tESI] = context.sc_esi; 349 regs[tEBP] = context.sc_ebp; 350 regs[tEBX] = context.sc_ebx; 351 regs[tEDX] = context.sc_edx; 352 regs[tECX] = context.sc_ecx; 353 regs[tEAX] = context.sc_eax; 354 regs[tEIP] = context.sc_eip; 355 regs[tCS] = context.sc_cs; 356 regs[tEFLAGS] = eflags; 357 regs[tESP] = context.sc_esp_at_signal; 358 regs[tSS] = context.sc_ss; 359 360 return (EJUSTRETURN); 361 } 362 363 void 364 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 365 { 366 args[0] = tf->tf_ebx; 367 args[1] = tf->tf_ecx; 368 args[2] = tf->tf_edx; 369 args[3] = tf->tf_esi; 370 args[4] = tf->tf_edi; 371 *params = NULL; /* no copyin */ 372 } 373 374 struct sysentvec linux_sysvec = { 375 LINUX_SYS_MAXSYSCALL, 376 linux_sysent, 377 0xff, 378 NSIG, 379 bsd_to_linux_signal, 380 ELAST, 381 bsd_to_linux_errno, 382 linux_fixup, 383 linux_sendsig, 384 linux_sigcode, 385 &linux_szsigcode, 386 linux_prepsyscall, 387 "Linux a.out" 388 }; 389 390 struct sysentvec elf_linux_sysvec = { 391 LINUX_SYS_MAXSYSCALL, 392 linux_sysent, 393 0xff, 394 NSIG, 395 bsd_to_linux_signal, 396 ELAST, 397 bsd_to_linux_errno, 398 elf_linux_fixup, 399 linux_sendsig, 400 linux_sigcode, 401 &linux_szsigcode, 402 linux_prepsyscall, 403 "Linux ELF" 404 }; 405 406 /* 407 * Installed either via SYSINIT() or via LKM stubs. 408 */ 409 Elf32_Brandinfo linux_brand = { 410 "Linux", 411 "/compat/linux", 412 "/lib/ld-linux.so.1", 413 &elf_linux_sysvec 414 }; 415 416 #ifndef LKM 417 /* 418 * XXX: this is WRONG, it needs to be SI_SUB_EXEC, but this is just at the 419 * "proof of concept" stage and will be fixed shortly 420 */ 421 static void 422 linux_elf_init(dummy) 423 void *dummy; 424 { 425 if (elf_insert_brand_entry(&linux_brand) < 0) 426 printf("cannot insert Linux elf brand handler\n"); 427 else if (bootverbose) 428 printf("Linux-ELF exec handler installed\n"); 429 } 430 431 SYSINIT(linuxelf, SI_SUB_VFS, SI_ORDER_ANY, linux_elf_init, NULL); 432 #endif 433