1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/module.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/signalvar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysent.h> 48 #include <sys/sysproto.h> 49 #include <sys/vnode.h> 50 #include <sys/eventhandler.h> 51 52 #include <vm/vm.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_param.h> 59 60 #include <machine/cpu.h> 61 #include <machine/md_var.h> 62 #include <machine/pcb.h> 63 64 #include <i386/linux/linux.h> 65 #include <i386/linux/linux_proto.h> 66 #include <compat/linux/linux_emul.h> 67 #include <compat/linux/linux_mib.h> 68 #include <compat/linux/linux_signal.h> 69 #include <compat/linux/linux_util.h> 70 71 MODULE_VERSION(linux, 1); 72 73 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 74 75 #if BYTE_ORDER == LITTLE_ENDIAN 76 #define SHELLMAGIC 0x2123 /* #! */ 77 #else 78 #define SHELLMAGIC 0x2321 79 #endif 80 81 /* 82 * Allow the sendsig functions to use the ldebug() facility 83 * even though they are not syscalls themselves. Map them 84 * to syscall 0. This is slightly less bogus than using 85 * ldebug(sigreturn). 86 */ 87 #define LINUX_SYS_linux_rt_sendsig 0 88 #define LINUX_SYS_linux_sendsig 0 89 90 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 91 #define __LINUX_NPXCW__ 0x37f 92 93 extern char linux_sigcode[]; 94 extern int linux_szsigcode; 95 96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 97 98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 100 101 static int linux_fixup(register_t **stack_base, 102 struct image_params *iparams); 103 static int elf_linux_fixup(register_t **stack_base, 104 struct image_params *iparams); 105 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 106 caddr_t *params); 107 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 108 static void exec_linux_setregs(struct thread *td, u_long entry, 109 u_long stack, u_long ps_strings); 110 111 extern LIST_HEAD(futex_list, futex) futex_list; 112 extern struct sx futex_sx; 113 114 static eventhandler_tag linux_exit_tag; 115 static eventhandler_tag linux_schedtail_tag; 116 static eventhandler_tag linux_exec_tag; 117 118 /* 119 * Linux syscalls return negative errno's, we do positive and map them 120 * Reference: 121 * FreeBSD: src/sys/sys/errno.h 122 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 123 * linux-2.6.17.8/include/asm-generic/errno.h 124 */ 125 static int bsd_to_linux_errno[ELAST + 1] = { 126 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 127 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 128 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 129 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 130 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 131 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 132 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 133 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 134 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 135 -72, -67, -71 136 }; 137 138 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 139 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 140 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 141 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 142 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 143 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 144 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 145 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 146 0, LINUX_SIGUSR1, LINUX_SIGUSR2 147 }; 148 149 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 150 SIGHUP, SIGINT, SIGQUIT, SIGILL, 151 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 152 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 153 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 154 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 155 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 156 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 157 SIGIO, SIGURG, SIGSYS 158 }; 159 160 #define LINUX_T_UNKNOWN 255 161 static int _bsd_to_linux_trapcode[] = { 162 LINUX_T_UNKNOWN, /* 0 */ 163 6, /* 1 T_PRIVINFLT */ 164 LINUX_T_UNKNOWN, /* 2 */ 165 3, /* 3 T_BPTFLT */ 166 LINUX_T_UNKNOWN, /* 4 */ 167 LINUX_T_UNKNOWN, /* 5 */ 168 16, /* 6 T_ARITHTRAP */ 169 254, /* 7 T_ASTFLT */ 170 LINUX_T_UNKNOWN, /* 8 */ 171 13, /* 9 T_PROTFLT */ 172 1, /* 10 T_TRCTRAP */ 173 LINUX_T_UNKNOWN, /* 11 */ 174 14, /* 12 T_PAGEFLT */ 175 LINUX_T_UNKNOWN, /* 13 */ 176 17, /* 14 T_ALIGNFLT */ 177 LINUX_T_UNKNOWN, /* 15 */ 178 LINUX_T_UNKNOWN, /* 16 */ 179 LINUX_T_UNKNOWN, /* 17 */ 180 0, /* 18 T_DIVIDE */ 181 2, /* 19 T_NMI */ 182 4, /* 20 T_OFLOW */ 183 5, /* 21 T_BOUND */ 184 7, /* 22 T_DNA */ 185 8, /* 23 T_DOUBLEFLT */ 186 9, /* 24 T_FPOPFLT */ 187 10, /* 25 T_TSSFLT */ 188 11, /* 26 T_SEGNPFLT */ 189 12, /* 27 T_STKFLT */ 190 18, /* 28 T_MCHK */ 191 19, /* 29 T_XMMFLT */ 192 15 /* 30 T_RESERVED */ 193 }; 194 #define bsd_to_linux_trapcode(code) \ 195 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 196 _bsd_to_linux_trapcode[(code)]: \ 197 LINUX_T_UNKNOWN) 198 199 /* 200 * If FreeBSD & Linux have a difference of opinion about what a trap 201 * means, deal with it here. 202 * 203 * MPSAFE 204 */ 205 static int 206 translate_traps(int signal, int trap_code) 207 { 208 if (signal != SIGBUS) 209 return signal; 210 switch (trap_code) { 211 case T_PROTFLT: 212 case T_TSSFLT: 213 case T_DOUBLEFLT: 214 case T_PAGEFLT: 215 return SIGSEGV; 216 default: 217 return signal; 218 } 219 } 220 221 static int 222 linux_fixup(register_t **stack_base, struct image_params *imgp) 223 { 224 register_t *argv, *envp; 225 226 argv = *stack_base; 227 envp = *stack_base + (imgp->args->argc + 1); 228 (*stack_base)--; 229 **stack_base = (intptr_t)(void *)envp; 230 (*stack_base)--; 231 **stack_base = (intptr_t)(void *)argv; 232 (*stack_base)--; 233 **stack_base = imgp->args->argc; 234 return 0; 235 } 236 237 static int 238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 239 { 240 Elf32_Auxargs *args; 241 register_t *pos; 242 243 KASSERT(curthread->td_proc == imgp->proc, 244 ("unsafe elf_linux_fixup(), should be curproc")); 245 args = (Elf32_Auxargs *)imgp->auxargs; 246 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 247 248 if (args->execfd != -1) 249 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 250 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 251 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 252 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 253 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 254 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 255 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 256 AUXARGS_ENTRY(pos, AT_BASE, args->base); 257 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 258 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 259 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 260 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 261 AUXARGS_ENTRY(pos, AT_NULL, 0); 262 263 free(imgp->auxargs, M_TEMP); 264 imgp->auxargs = NULL; 265 266 (*stack_base)--; 267 **stack_base = (register_t)imgp->args->argc; 268 return 0; 269 } 270 271 extern int _ucodesel, _udatasel; 272 extern unsigned long linux_sznonrtsigcode; 273 274 static void 275 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 276 { 277 struct thread *td = curthread; 278 struct proc *p = td->td_proc; 279 struct sigacts *psp; 280 struct trapframe *regs; 281 struct l_rt_sigframe *fp, frame; 282 int sig, code; 283 int oonstack; 284 285 sig = ksi->ksi_signo; 286 code = ksi->ksi_code; 287 PROC_LOCK_ASSERT(p, MA_OWNED); 288 psp = p->p_sigacts; 289 mtx_assert(&psp->ps_mtx, MA_OWNED); 290 regs = td->td_frame; 291 oonstack = sigonstack(regs->tf_esp); 292 293 #ifdef DEBUG 294 if (ldebug(rt_sendsig)) 295 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 296 catcher, sig, (void*)mask, code); 297 #endif 298 /* 299 * Allocate space for the signal handler context. 300 */ 301 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 302 SIGISMEMBER(psp->ps_sigonstack, sig)) { 303 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 304 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 305 } else 306 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 307 mtx_unlock(&psp->ps_mtx); 308 309 /* 310 * Build the argument list for the signal handler. 311 */ 312 if (p->p_sysent->sv_sigtbl) 313 if (sig <= p->p_sysent->sv_sigsize) 314 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 315 316 bzero(&frame, sizeof(frame)); 317 318 frame.sf_handler = catcher; 319 frame.sf_sig = sig; 320 frame.sf_siginfo = &fp->sf_si; 321 frame.sf_ucontext = &fp->sf_sc; 322 323 /* Fill in POSIX parts */ 324 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 325 326 /* 327 * Build the signal context to be used by sigreturn. 328 */ 329 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 330 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 331 332 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 333 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 334 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 335 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 336 PROC_UNLOCK(p); 337 338 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 339 340 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 341 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 342 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 343 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 344 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 345 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 346 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 347 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 348 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 349 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 350 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 351 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 352 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 353 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 354 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 355 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 356 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 357 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 358 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 359 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 360 361 #ifdef DEBUG 362 if (ldebug(rt_sendsig)) 363 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 364 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 365 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 366 #endif 367 368 if (copyout(&frame, fp, sizeof(frame)) != 0) { 369 /* 370 * Process has trashed its stack; give it an illegal 371 * instruction to halt it in its tracks. 372 */ 373 #ifdef DEBUG 374 if (ldebug(rt_sendsig)) 375 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 376 fp, oonstack); 377 #endif 378 PROC_LOCK(p); 379 sigexit(td, SIGILL); 380 } 381 382 /* 383 * Build context to run handler in. 384 */ 385 regs->tf_esp = (int)fp; 386 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 387 linux_sznonrtsigcode; 388 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 389 regs->tf_cs = _ucodesel; 390 regs->tf_ds = _udatasel; 391 regs->tf_es = _udatasel; 392 regs->tf_fs = _udatasel; 393 regs->tf_ss = _udatasel; 394 PROC_LOCK(p); 395 mtx_lock(&psp->ps_mtx); 396 } 397 398 399 /* 400 * Send an interrupt to process. 401 * 402 * Stack is set up to allow sigcode stored 403 * in u. to call routine, followed by kcall 404 * to sigreturn routine below. After sigreturn 405 * resets the signal mask, the stack, and the 406 * frame pointer, it returns to the user 407 * specified pc, psl. 408 */ 409 static void 410 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 411 { 412 struct thread *td = curthread; 413 struct proc *p = td->td_proc; 414 struct sigacts *psp; 415 struct trapframe *regs; 416 struct l_sigframe *fp, frame; 417 l_sigset_t lmask; 418 int sig, code; 419 int oonstack, i; 420 421 PROC_LOCK_ASSERT(p, MA_OWNED); 422 psp = p->p_sigacts; 423 sig = ksi->ksi_signo; 424 code = ksi->ksi_code; 425 mtx_assert(&psp->ps_mtx, MA_OWNED); 426 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 427 /* Signal handler installed with SA_SIGINFO. */ 428 linux_rt_sendsig(catcher, ksi, mask); 429 return; 430 } 431 regs = td->td_frame; 432 oonstack = sigonstack(regs->tf_esp); 433 434 #ifdef DEBUG 435 if (ldebug(sendsig)) 436 printf(ARGS(sendsig, "%p, %d, %p, %u"), 437 catcher, sig, (void*)mask, code); 438 #endif 439 440 /* 441 * Allocate space for the signal handler context. 442 */ 443 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 444 SIGISMEMBER(psp->ps_sigonstack, sig)) { 445 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 446 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 447 } else 448 fp = (struct l_sigframe *)regs->tf_esp - 1; 449 mtx_unlock(&psp->ps_mtx); 450 PROC_UNLOCK(p); 451 452 /* 453 * Build the argument list for the signal handler. 454 */ 455 if (p->p_sysent->sv_sigtbl) 456 if (sig <= p->p_sysent->sv_sigsize) 457 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 458 459 bzero(&frame, sizeof(frame)); 460 461 frame.sf_handler = catcher; 462 frame.sf_sig = sig; 463 464 bsd_to_linux_sigset(mask, &lmask); 465 466 /* 467 * Build the signal context to be used by sigreturn. 468 */ 469 frame.sf_sc.sc_mask = lmask.__bits[0]; 470 frame.sf_sc.sc_gs = rgs(); 471 frame.sf_sc.sc_fs = regs->tf_fs; 472 frame.sf_sc.sc_es = regs->tf_es; 473 frame.sf_sc.sc_ds = regs->tf_ds; 474 frame.sf_sc.sc_edi = regs->tf_edi; 475 frame.sf_sc.sc_esi = regs->tf_esi; 476 frame.sf_sc.sc_ebp = regs->tf_ebp; 477 frame.sf_sc.sc_ebx = regs->tf_ebx; 478 frame.sf_sc.sc_edx = regs->tf_edx; 479 frame.sf_sc.sc_ecx = regs->tf_ecx; 480 frame.sf_sc.sc_eax = regs->tf_eax; 481 frame.sf_sc.sc_eip = regs->tf_eip; 482 frame.sf_sc.sc_cs = regs->tf_cs; 483 frame.sf_sc.sc_eflags = regs->tf_eflags; 484 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 485 frame.sf_sc.sc_ss = regs->tf_ss; 486 frame.sf_sc.sc_err = regs->tf_err; 487 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 488 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 489 490 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 491 frame.sf_extramask[i] = lmask.__bits[i+1]; 492 493 if (copyout(&frame, fp, sizeof(frame)) != 0) { 494 /* 495 * Process has trashed its stack; give it an illegal 496 * instruction to halt it in its tracks. 497 */ 498 PROC_LOCK(p); 499 sigexit(td, SIGILL); 500 } 501 502 /* 503 * Build context to run handler in. 504 */ 505 regs->tf_esp = (int)fp; 506 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 507 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 508 regs->tf_cs = _ucodesel; 509 regs->tf_ds = _udatasel; 510 regs->tf_es = _udatasel; 511 regs->tf_fs = _udatasel; 512 regs->tf_ss = _udatasel; 513 PROC_LOCK(p); 514 mtx_lock(&psp->ps_mtx); 515 } 516 517 /* 518 * System call to cleanup state after a signal 519 * has been taken. Reset signal mask and 520 * stack state from context left by sendsig (above). 521 * Return to previous pc and psl as specified by 522 * context left by sendsig. Check carefully to 523 * make sure that the user has not modified the 524 * psl to gain improper privileges or to cause 525 * a machine fault. 526 */ 527 int 528 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 529 { 530 struct proc *p = td->td_proc; 531 struct l_sigframe frame; 532 struct trapframe *regs; 533 l_sigset_t lmask; 534 int eflags, i; 535 ksiginfo_t ksi; 536 537 regs = td->td_frame; 538 539 #ifdef DEBUG 540 if (ldebug(sigreturn)) 541 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 542 #endif 543 /* 544 * The trampoline code hands us the sigframe. 545 * It is unsafe to keep track of it ourselves, in the event that a 546 * program jumps out of a signal handler. 547 */ 548 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 549 return (EFAULT); 550 551 /* 552 * Check for security violations. 553 */ 554 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 555 eflags = frame.sf_sc.sc_eflags; 556 /* 557 * XXX do allow users to change the privileged flag PSL_RF. The 558 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 559 * sometimes set it there too. tf_eflags is kept in the signal 560 * context during signal handling and there is no other place 561 * to remember it, so the PSL_RF bit may be corrupted by the 562 * signal handler without us knowing. Corruption of the PSL_RF 563 * bit at worst causes one more or one less debugger trap, so 564 * allowing it is fairly harmless. 565 */ 566 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 567 return(EINVAL); 568 569 /* 570 * Don't allow users to load a valid privileged %cs. Let the 571 * hardware check for invalid selectors, excess privilege in 572 * other selectors, invalid %eip's and invalid %esp's. 573 */ 574 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 575 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 576 ksiginfo_init_trap(&ksi); 577 ksi.ksi_signo = SIGBUS; 578 ksi.ksi_code = BUS_OBJERR; 579 ksi.ksi_trapno = T_PROTFLT; 580 ksi.ksi_addr = (void *)regs->tf_eip; 581 trapsignal(td, &ksi); 582 return(EINVAL); 583 } 584 585 lmask.__bits[0] = frame.sf_sc.sc_mask; 586 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 587 lmask.__bits[i+1] = frame.sf_extramask[i]; 588 PROC_LOCK(p); 589 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 590 SIG_CANTMASK(td->td_sigmask); 591 signotify(td); 592 PROC_UNLOCK(p); 593 594 /* 595 * Restore signal context. 596 */ 597 /* %gs was restored by the trampoline. */ 598 regs->tf_fs = frame.sf_sc.sc_fs; 599 regs->tf_es = frame.sf_sc.sc_es; 600 regs->tf_ds = frame.sf_sc.sc_ds; 601 regs->tf_edi = frame.sf_sc.sc_edi; 602 regs->tf_esi = frame.sf_sc.sc_esi; 603 regs->tf_ebp = frame.sf_sc.sc_ebp; 604 regs->tf_ebx = frame.sf_sc.sc_ebx; 605 regs->tf_edx = frame.sf_sc.sc_edx; 606 regs->tf_ecx = frame.sf_sc.sc_ecx; 607 regs->tf_eax = frame.sf_sc.sc_eax; 608 regs->tf_eip = frame.sf_sc.sc_eip; 609 regs->tf_cs = frame.sf_sc.sc_cs; 610 regs->tf_eflags = eflags; 611 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 612 regs->tf_ss = frame.sf_sc.sc_ss; 613 614 return (EJUSTRETURN); 615 } 616 617 /* 618 * System call to cleanup state after a signal 619 * has been taken. Reset signal mask and 620 * stack state from context left by rt_sendsig (above). 621 * Return to previous pc and psl as specified by 622 * context left by sendsig. Check carefully to 623 * make sure that the user has not modified the 624 * psl to gain improper privileges or to cause 625 * a machine fault. 626 */ 627 int 628 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 629 { 630 struct proc *p = td->td_proc; 631 struct l_ucontext uc; 632 struct l_sigcontext *context; 633 l_stack_t *lss; 634 stack_t ss; 635 struct trapframe *regs; 636 int eflags; 637 ksiginfo_t ksi; 638 639 regs = td->td_frame; 640 641 #ifdef DEBUG 642 if (ldebug(rt_sigreturn)) 643 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 644 #endif 645 /* 646 * The trampoline code hands us the ucontext. 647 * It is unsafe to keep track of it ourselves, in the event that a 648 * program jumps out of a signal handler. 649 */ 650 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 651 return (EFAULT); 652 653 context = &uc.uc_mcontext; 654 655 /* 656 * Check for security violations. 657 */ 658 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 659 eflags = context->sc_eflags; 660 /* 661 * XXX do allow users to change the privileged flag PSL_RF. The 662 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 663 * sometimes set it there too. tf_eflags is kept in the signal 664 * context during signal handling and there is no other place 665 * to remember it, so the PSL_RF bit may be corrupted by the 666 * signal handler without us knowing. Corruption of the PSL_RF 667 * bit at worst causes one more or one less debugger trap, so 668 * allowing it is fairly harmless. 669 */ 670 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 671 return(EINVAL); 672 673 /* 674 * Don't allow users to load a valid privileged %cs. Let the 675 * hardware check for invalid selectors, excess privilege in 676 * other selectors, invalid %eip's and invalid %esp's. 677 */ 678 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 679 if (!CS_SECURE(context->sc_cs)) { 680 ksiginfo_init_trap(&ksi); 681 ksi.ksi_signo = SIGBUS; 682 ksi.ksi_code = BUS_OBJERR; 683 ksi.ksi_trapno = T_PROTFLT; 684 ksi.ksi_addr = (void *)regs->tf_eip; 685 trapsignal(td, &ksi); 686 return(EINVAL); 687 } 688 689 PROC_LOCK(p); 690 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 691 SIG_CANTMASK(td->td_sigmask); 692 signotify(td); 693 PROC_UNLOCK(p); 694 695 /* 696 * Restore signal context 697 */ 698 /* %gs was restored by the trampoline. */ 699 regs->tf_fs = context->sc_fs; 700 regs->tf_es = context->sc_es; 701 regs->tf_ds = context->sc_ds; 702 regs->tf_edi = context->sc_edi; 703 regs->tf_esi = context->sc_esi; 704 regs->tf_ebp = context->sc_ebp; 705 regs->tf_ebx = context->sc_ebx; 706 regs->tf_edx = context->sc_edx; 707 regs->tf_ecx = context->sc_ecx; 708 regs->tf_eax = context->sc_eax; 709 regs->tf_eip = context->sc_eip; 710 regs->tf_cs = context->sc_cs; 711 regs->tf_eflags = eflags; 712 regs->tf_esp = context->sc_esp_at_signal; 713 regs->tf_ss = context->sc_ss; 714 715 /* 716 * call sigaltstack & ignore results.. 717 */ 718 lss = &uc.uc_stack; 719 ss.ss_sp = lss->ss_sp; 720 ss.ss_size = lss->ss_size; 721 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 722 723 #ifdef DEBUG 724 if (ldebug(rt_sigreturn)) 725 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 726 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 727 #endif 728 (void)kern_sigaltstack(td, &ss, NULL); 729 730 return (EJUSTRETURN); 731 } 732 733 /* 734 * MPSAFE 735 */ 736 static void 737 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 738 { 739 args[0] = tf->tf_ebx; 740 args[1] = tf->tf_ecx; 741 args[2] = tf->tf_edx; 742 args[3] = tf->tf_esi; 743 args[4] = tf->tf_edi; 744 args[5] = tf->tf_ebp; /* Unconfirmed */ 745 *params = NULL; /* no copyin */ 746 } 747 748 /* 749 * If a linux binary is exec'ing something, try this image activator 750 * first. We override standard shell script execution in order to 751 * be able to modify the interpreter path. We only do this if a linux 752 * binary is doing the exec, so we do not create an EXEC module for it. 753 */ 754 static int exec_linux_imgact_try(struct image_params *iparams); 755 756 static int 757 exec_linux_imgact_try(struct image_params *imgp) 758 { 759 const char *head = (const char *)imgp->image_header; 760 char *rpath; 761 int error = -1, len; 762 763 /* 764 * The interpreter for shell scripts run from a linux binary needs 765 * to be located in /compat/linux if possible in order to recursively 766 * maintain linux path emulation. 767 */ 768 if (((const short *)head)[0] == SHELLMAGIC) { 769 /* 770 * Run our normal shell image activator. If it succeeds attempt 771 * to use the alternate path for the interpreter. If an alternate 772 * path is found, use our stringspace to store it. 773 */ 774 if ((error = exec_shell_imgact(imgp)) == 0) { 775 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 776 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); 777 if (rpath != NULL) { 778 len = strlen(rpath) + 1; 779 780 if (len <= MAXSHELLCMDLEN) { 781 memcpy(imgp->interpreter_name, rpath, len); 782 } 783 free(rpath, M_TEMP); 784 } 785 } 786 } 787 return(error); 788 } 789 790 /* 791 * exec_setregs may initialize some registers differently than Linux 792 * does, thus potentially confusing Linux binaries. If necessary, we 793 * override the exec_setregs default(s) here. 794 */ 795 static void 796 exec_linux_setregs(struct thread *td, u_long entry, 797 u_long stack, u_long ps_strings) 798 { 799 static const u_short control = __LINUX_NPXCW__; 800 struct pcb *pcb = td->td_pcb; 801 802 exec_setregs(td, entry, stack, ps_strings); 803 804 /* Linux sets %gs to 0, we default to _udatasel */ 805 pcb->pcb_gs = 0; load_gs(0); 806 807 /* Linux sets the i387 to extended precision. */ 808 fldcw(&control); 809 } 810 811 struct sysentvec linux_sysvec = { 812 .sv_size = LINUX_SYS_MAXSYSCALL, 813 .sv_table = linux_sysent, 814 .sv_mask = 0, 815 .sv_sigsize = LINUX_SIGTBLSZ, 816 .sv_sigtbl = bsd_to_linux_signal, 817 .sv_errsize = ELAST + 1, 818 .sv_errtbl = bsd_to_linux_errno, 819 .sv_transtrap = translate_traps, 820 .sv_fixup = linux_fixup, 821 .sv_sendsig = linux_sendsig, 822 .sv_sigcode = linux_sigcode, 823 .sv_szsigcode = &linux_szsigcode, 824 .sv_prepsyscall = linux_prepsyscall, 825 .sv_name = "Linux a.out", 826 .sv_coredump = NULL, 827 .sv_imgact_try = exec_linux_imgact_try, 828 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 829 .sv_pagesize = PAGE_SIZE, 830 .sv_minuser = VM_MIN_ADDRESS, 831 .sv_maxuser = VM_MAXUSER_ADDRESS, 832 .sv_usrstack = USRSTACK, 833 .sv_psstrings = PS_STRINGS, 834 .sv_stackprot = VM_PROT_ALL, 835 .sv_copyout_strings = exec_copyout_strings, 836 .sv_setregs = exec_linux_setregs, 837 .sv_fixlimit = NULL, 838 .sv_maxssiz = NULL, 839 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 840 }; 841 842 struct sysentvec elf_linux_sysvec = { 843 .sv_size = LINUX_SYS_MAXSYSCALL, 844 .sv_table = linux_sysent, 845 .sv_mask = 0, 846 .sv_sigsize = LINUX_SIGTBLSZ, 847 .sv_sigtbl = bsd_to_linux_signal, 848 .sv_errsize = ELAST + 1, 849 .sv_errtbl = bsd_to_linux_errno, 850 .sv_transtrap = translate_traps, 851 .sv_fixup = elf_linux_fixup, 852 .sv_sendsig = linux_sendsig, 853 .sv_sigcode = linux_sigcode, 854 .sv_szsigcode = &linux_szsigcode, 855 .sv_prepsyscall = linux_prepsyscall, 856 .sv_name = "Linux ELF", 857 .sv_coredump = elf32_coredump, 858 .sv_imgact_try = exec_linux_imgact_try, 859 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 860 .sv_pagesize = PAGE_SIZE, 861 .sv_minuser = VM_MIN_ADDRESS, 862 .sv_maxuser = VM_MAXUSER_ADDRESS, 863 .sv_usrstack = USRSTACK, 864 .sv_psstrings = PS_STRINGS, 865 .sv_stackprot = VM_PROT_ALL, 866 .sv_copyout_strings = exec_copyout_strings, 867 .sv_setregs = exec_linux_setregs, 868 .sv_fixlimit = NULL, 869 .sv_maxssiz = NULL, 870 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 871 }; 872 873 static Elf32_Brandinfo linux_brand = { 874 .brand = ELFOSABI_LINUX, 875 .machine = EM_386, 876 .compat_3_brand = "Linux", 877 .emul_path = "/compat/linux", 878 .interp_path = "/lib/ld-linux.so.1", 879 .sysvec = &elf_linux_sysvec, 880 .interp_newpath = NULL, 881 .flags = BI_CAN_EXEC_DYN, 882 }; 883 884 static Elf32_Brandinfo linux_glibc2brand = { 885 .brand = ELFOSABI_LINUX, 886 .machine = EM_386, 887 .compat_3_brand = "Linux", 888 .emul_path = "/compat/linux", 889 .interp_path = "/lib/ld-linux.so.2", 890 .sysvec = &elf_linux_sysvec, 891 .interp_newpath = NULL, 892 .flags = BI_CAN_EXEC_DYN, 893 }; 894 895 Elf32_Brandinfo *linux_brandlist[] = { 896 &linux_brand, 897 &linux_glibc2brand, 898 NULL 899 }; 900 901 static int 902 linux_elf_modevent(module_t mod, int type, void *data) 903 { 904 Elf32_Brandinfo **brandinfo; 905 int error; 906 struct linux_ioctl_handler **lihp; 907 struct linux_device_handler **ldhp; 908 909 error = 0; 910 911 switch(type) { 912 case MOD_LOAD: 913 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 914 ++brandinfo) 915 if (elf32_insert_brand_entry(*brandinfo) < 0) 916 error = EINVAL; 917 if (error == 0) { 918 SET_FOREACH(lihp, linux_ioctl_handler_set) 919 linux_ioctl_register_handler(*lihp); 920 SET_FOREACH(ldhp, linux_device_handler_set) 921 linux_device_register_handler(*ldhp); 922 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 923 sx_init(&emul_shared_lock, "emuldata->shared lock"); 924 LIST_INIT(&futex_list); 925 sx_init(&futex_sx, "futex protection lock"); 926 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 927 NULL, 1000); 928 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 929 NULL, 1000); 930 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 931 NULL, 1000); 932 if (bootverbose) 933 printf("Linux ELF exec handler installed\n"); 934 } else 935 printf("cannot insert Linux ELF brand handler\n"); 936 break; 937 case MOD_UNLOAD: 938 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 939 ++brandinfo) 940 if (elf32_brand_inuse(*brandinfo)) 941 error = EBUSY; 942 if (error == 0) { 943 for (brandinfo = &linux_brandlist[0]; 944 *brandinfo != NULL; ++brandinfo) 945 if (elf32_remove_brand_entry(*brandinfo) < 0) 946 error = EINVAL; 947 } 948 if (error == 0) { 949 SET_FOREACH(lihp, linux_ioctl_handler_set) 950 linux_ioctl_unregister_handler(*lihp); 951 SET_FOREACH(ldhp, linux_device_handler_set) 952 linux_device_unregister_handler(*ldhp); 953 mtx_destroy(&emul_lock); 954 sx_destroy(&emul_shared_lock); 955 sx_destroy(&futex_sx); 956 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 957 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 958 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 959 if (bootverbose) 960 printf("Linux ELF exec handler removed\n"); 961 } else 962 printf("Could not deinstall ELF interpreter entry\n"); 963 break; 964 default: 965 return EOPNOTSUPP; 966 } 967 return error; 968 } 969 970 static moduledata_t linux_elf_mod = { 971 "linuxelf", 972 linux_elf_modevent, 973 0 974 }; 975 976 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 977