1 /*- 2 * Copyright (c) 1994-1996 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/module.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/signalvar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysent.h> 48 #include <sys/sysproto.h> 49 #include <sys/vnode.h> 50 #include <sys/eventhandler.h> 51 52 #include <vm/vm.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_param.h> 59 60 #include <machine/cpu.h> 61 #include <machine/md_var.h> 62 #include <machine/pcb.h> 63 64 #include <i386/linux/linux.h> 65 #include <i386/linux/linux_proto.h> 66 #include <compat/linux/linux_emul.h> 67 #include <compat/linux/linux_mib.h> 68 #include <compat/linux/linux_signal.h> 69 #include <compat/linux/linux_util.h> 70 71 MODULE_VERSION(linux, 1); 72 73 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 74 75 #if BYTE_ORDER == LITTLE_ENDIAN 76 #define SHELLMAGIC 0x2123 /* #! */ 77 #else 78 #define SHELLMAGIC 0x2321 79 #endif 80 81 /* 82 * Allow the sendsig functions to use the ldebug() facility 83 * even though they are not syscalls themselves. Map them 84 * to syscall 0. This is slightly less bogus than using 85 * ldebug(sigreturn). 86 */ 87 #define LINUX_SYS_linux_rt_sendsig 0 88 #define LINUX_SYS_linux_sendsig 0 89 90 #define fldcw(addr) __asm("fldcw %0" : : "m" (*(addr))) 91 #define __LINUX_NPXCW__ 0x37f 92 93 extern char linux_sigcode[]; 94 extern int linux_szsigcode; 95 96 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 97 98 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 99 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 100 101 static int linux_fixup(register_t **stack_base, 102 struct image_params *iparams); 103 static int elf_linux_fixup(register_t **stack_base, 104 struct image_params *iparams); 105 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 106 caddr_t *params); 107 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 108 static void exec_linux_setregs(struct thread *td, u_long entry, 109 u_long stack, u_long ps_strings); 110 111 extern LIST_HEAD(futex_list, futex) futex_list; 112 extern struct sx futex_sx; 113 114 static eventhandler_tag linux_exit_tag; 115 static eventhandler_tag linux_schedtail_tag; 116 static eventhandler_tag linux_exec_tag; 117 118 /* 119 * Linux syscalls return negative errno's, we do positive and map them 120 * Reference: 121 * FreeBSD: src/sys/sys/errno.h 122 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 123 * linux-2.6.17.8/include/asm-generic/errno.h 124 */ 125 static int bsd_to_linux_errno[ELAST + 1] = { 126 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 127 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 128 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 129 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 130 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 131 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 132 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 133 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 134 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 135 -72, -67, -71 136 }; 137 138 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 139 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 140 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 141 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 142 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 143 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 144 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 145 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 146 0, LINUX_SIGUSR1, LINUX_SIGUSR2 147 }; 148 149 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 150 SIGHUP, SIGINT, SIGQUIT, SIGILL, 151 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 152 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 153 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 154 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 155 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 156 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 157 SIGIO, SIGURG, SIGSYS 158 }; 159 160 #define LINUX_T_UNKNOWN 255 161 static int _bsd_to_linux_trapcode[] = { 162 LINUX_T_UNKNOWN, /* 0 */ 163 6, /* 1 T_PRIVINFLT */ 164 LINUX_T_UNKNOWN, /* 2 */ 165 3, /* 3 T_BPTFLT */ 166 LINUX_T_UNKNOWN, /* 4 */ 167 LINUX_T_UNKNOWN, /* 5 */ 168 16, /* 6 T_ARITHTRAP */ 169 254, /* 7 T_ASTFLT */ 170 LINUX_T_UNKNOWN, /* 8 */ 171 13, /* 9 T_PROTFLT */ 172 1, /* 10 T_TRCTRAP */ 173 LINUX_T_UNKNOWN, /* 11 */ 174 14, /* 12 T_PAGEFLT */ 175 LINUX_T_UNKNOWN, /* 13 */ 176 17, /* 14 T_ALIGNFLT */ 177 LINUX_T_UNKNOWN, /* 15 */ 178 LINUX_T_UNKNOWN, /* 16 */ 179 LINUX_T_UNKNOWN, /* 17 */ 180 0, /* 18 T_DIVIDE */ 181 2, /* 19 T_NMI */ 182 4, /* 20 T_OFLOW */ 183 5, /* 21 T_BOUND */ 184 7, /* 22 T_DNA */ 185 8, /* 23 T_DOUBLEFLT */ 186 9, /* 24 T_FPOPFLT */ 187 10, /* 25 T_TSSFLT */ 188 11, /* 26 T_SEGNPFLT */ 189 12, /* 27 T_STKFLT */ 190 18, /* 28 T_MCHK */ 191 19, /* 29 T_XMMFLT */ 192 15 /* 30 T_RESERVED */ 193 }; 194 #define bsd_to_linux_trapcode(code) \ 195 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 196 _bsd_to_linux_trapcode[(code)]: \ 197 LINUX_T_UNKNOWN) 198 199 /* 200 * If FreeBSD & Linux have a difference of opinion about what a trap 201 * means, deal with it here. 202 * 203 * MPSAFE 204 */ 205 static int 206 translate_traps(int signal, int trap_code) 207 { 208 if (signal != SIGBUS) 209 return signal; 210 switch (trap_code) { 211 case T_PROTFLT: 212 case T_TSSFLT: 213 case T_DOUBLEFLT: 214 case T_PAGEFLT: 215 return SIGSEGV; 216 default: 217 return signal; 218 } 219 } 220 221 static int 222 linux_fixup(register_t **stack_base, struct image_params *imgp) 223 { 224 register_t *argv, *envp; 225 226 argv = *stack_base; 227 envp = *stack_base + (imgp->args->argc + 1); 228 (*stack_base)--; 229 **stack_base = (intptr_t)(void *)envp; 230 (*stack_base)--; 231 **stack_base = (intptr_t)(void *)argv; 232 (*stack_base)--; 233 **stack_base = imgp->args->argc; 234 return 0; 235 } 236 237 static int 238 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 239 { 240 Elf32_Auxargs *args; 241 register_t *pos; 242 243 KASSERT(curthread->td_proc == imgp->proc, 244 ("unsafe elf_linux_fixup(), should be curproc")); 245 args = (Elf32_Auxargs *)imgp->auxargs; 246 pos = *stack_base + (imgp->args->argc + imgp->args->envc + 2); 247 248 if (args->trace) 249 AUXARGS_ENTRY(pos, AT_DEBUG, 1); 250 if (args->execfd != -1) 251 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 252 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 253 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 254 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 255 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 256 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 257 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 258 AUXARGS_ENTRY(pos, AT_BASE, args->base); 259 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 260 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 261 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 262 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 263 AUXARGS_ENTRY(pos, AT_NULL, 0); 264 265 free(imgp->auxargs, M_TEMP); 266 imgp->auxargs = NULL; 267 268 (*stack_base)--; 269 **stack_base = (register_t)imgp->args->argc; 270 return 0; 271 } 272 273 extern int _ucodesel, _udatasel; 274 extern unsigned long linux_sznonrtsigcode; 275 276 static void 277 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 278 { 279 struct thread *td = curthread; 280 struct proc *p = td->td_proc; 281 struct sigacts *psp; 282 struct trapframe *regs; 283 struct l_rt_sigframe *fp, frame; 284 int sig, code; 285 int oonstack; 286 287 sig = ksi->ksi_signo; 288 code = ksi->ksi_code; 289 PROC_LOCK_ASSERT(p, MA_OWNED); 290 psp = p->p_sigacts; 291 mtx_assert(&psp->ps_mtx, MA_OWNED); 292 regs = td->td_frame; 293 oonstack = sigonstack(regs->tf_esp); 294 295 #ifdef DEBUG 296 if (ldebug(rt_sendsig)) 297 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 298 catcher, sig, (void*)mask, code); 299 #endif 300 /* 301 * Allocate space for the signal handler context. 302 */ 303 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 304 SIGISMEMBER(psp->ps_sigonstack, sig)) { 305 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 306 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 307 } else 308 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 309 mtx_unlock(&psp->ps_mtx); 310 311 /* 312 * Build the argument list for the signal handler. 313 */ 314 if (p->p_sysent->sv_sigtbl) 315 if (sig <= p->p_sysent->sv_sigsize) 316 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 317 318 bzero(&frame, sizeof(frame)); 319 320 frame.sf_handler = catcher; 321 frame.sf_sig = sig; 322 frame.sf_siginfo = &fp->sf_si; 323 frame.sf_ucontext = &fp->sf_sc; 324 325 /* Fill in POSIX parts */ 326 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 327 328 /* 329 * Build the signal context to be used by sigreturn. 330 */ 331 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 332 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 333 334 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 335 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 336 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 337 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 338 PROC_UNLOCK(p); 339 340 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 341 342 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 343 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 344 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 345 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 346 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 347 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 348 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 349 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 350 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 351 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 352 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 353 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 354 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 355 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 356 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 357 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 358 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 359 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 360 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 361 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 362 363 #ifdef DEBUG 364 if (ldebug(rt_sendsig)) 365 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 366 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 367 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 368 #endif 369 370 if (copyout(&frame, fp, sizeof(frame)) != 0) { 371 /* 372 * Process has trashed its stack; give it an illegal 373 * instruction to halt it in its tracks. 374 */ 375 #ifdef DEBUG 376 if (ldebug(rt_sendsig)) 377 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 378 fp, oonstack); 379 #endif 380 PROC_LOCK(p); 381 sigexit(td, SIGILL); 382 } 383 384 /* 385 * Build context to run handler in. 386 */ 387 regs->tf_esp = (int)fp; 388 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 389 linux_sznonrtsigcode; 390 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 391 regs->tf_cs = _ucodesel; 392 regs->tf_ds = _udatasel; 393 regs->tf_es = _udatasel; 394 regs->tf_fs = _udatasel; 395 regs->tf_ss = _udatasel; 396 PROC_LOCK(p); 397 mtx_lock(&psp->ps_mtx); 398 } 399 400 401 /* 402 * Send an interrupt to process. 403 * 404 * Stack is set up to allow sigcode stored 405 * in u. to call routine, followed by kcall 406 * to sigreturn routine below. After sigreturn 407 * resets the signal mask, the stack, and the 408 * frame pointer, it returns to the user 409 * specified pc, psl. 410 */ 411 static void 412 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 413 { 414 struct thread *td = curthread; 415 struct proc *p = td->td_proc; 416 struct sigacts *psp; 417 struct trapframe *regs; 418 struct l_sigframe *fp, frame; 419 l_sigset_t lmask; 420 int sig, code; 421 int oonstack, i; 422 423 PROC_LOCK_ASSERT(p, MA_OWNED); 424 psp = p->p_sigacts; 425 sig = ksi->ksi_signo; 426 code = ksi->ksi_code; 427 mtx_assert(&psp->ps_mtx, MA_OWNED); 428 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 429 /* Signal handler installed with SA_SIGINFO. */ 430 linux_rt_sendsig(catcher, ksi, mask); 431 return; 432 } 433 regs = td->td_frame; 434 oonstack = sigonstack(regs->tf_esp); 435 436 #ifdef DEBUG 437 if (ldebug(sendsig)) 438 printf(ARGS(sendsig, "%p, %d, %p, %u"), 439 catcher, sig, (void*)mask, code); 440 #endif 441 442 /* 443 * Allocate space for the signal handler context. 444 */ 445 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 446 SIGISMEMBER(psp->ps_sigonstack, sig)) { 447 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 448 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 449 } else 450 fp = (struct l_sigframe *)regs->tf_esp - 1; 451 mtx_unlock(&psp->ps_mtx); 452 PROC_UNLOCK(p); 453 454 /* 455 * Build the argument list for the signal handler. 456 */ 457 if (p->p_sysent->sv_sigtbl) 458 if (sig <= p->p_sysent->sv_sigsize) 459 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 460 461 bzero(&frame, sizeof(frame)); 462 463 frame.sf_handler = catcher; 464 frame.sf_sig = sig; 465 466 bsd_to_linux_sigset(mask, &lmask); 467 468 /* 469 * Build the signal context to be used by sigreturn. 470 */ 471 frame.sf_sc.sc_mask = lmask.__bits[0]; 472 frame.sf_sc.sc_gs = rgs(); 473 frame.sf_sc.sc_fs = regs->tf_fs; 474 frame.sf_sc.sc_es = regs->tf_es; 475 frame.sf_sc.sc_ds = regs->tf_ds; 476 frame.sf_sc.sc_edi = regs->tf_edi; 477 frame.sf_sc.sc_esi = regs->tf_esi; 478 frame.sf_sc.sc_ebp = regs->tf_ebp; 479 frame.sf_sc.sc_ebx = regs->tf_ebx; 480 frame.sf_sc.sc_edx = regs->tf_edx; 481 frame.sf_sc.sc_ecx = regs->tf_ecx; 482 frame.sf_sc.sc_eax = regs->tf_eax; 483 frame.sf_sc.sc_eip = regs->tf_eip; 484 frame.sf_sc.sc_cs = regs->tf_cs; 485 frame.sf_sc.sc_eflags = regs->tf_eflags; 486 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 487 frame.sf_sc.sc_ss = regs->tf_ss; 488 frame.sf_sc.sc_err = regs->tf_err; 489 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 490 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 491 492 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 493 frame.sf_extramask[i] = lmask.__bits[i+1]; 494 495 if (copyout(&frame, fp, sizeof(frame)) != 0) { 496 /* 497 * Process has trashed its stack; give it an illegal 498 * instruction to halt it in its tracks. 499 */ 500 PROC_LOCK(p); 501 sigexit(td, SIGILL); 502 } 503 504 /* 505 * Build context to run handler in. 506 */ 507 regs->tf_esp = (int)fp; 508 regs->tf_eip = PS_STRINGS - *(p->p_sysent->sv_szsigcode); 509 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 510 regs->tf_cs = _ucodesel; 511 regs->tf_ds = _udatasel; 512 regs->tf_es = _udatasel; 513 regs->tf_fs = _udatasel; 514 regs->tf_ss = _udatasel; 515 PROC_LOCK(p); 516 mtx_lock(&psp->ps_mtx); 517 } 518 519 /* 520 * System call to cleanup state after a signal 521 * has been taken. Reset signal mask and 522 * stack state from context left by sendsig (above). 523 * Return to previous pc and psl as specified by 524 * context left by sendsig. Check carefully to 525 * make sure that the user has not modified the 526 * psl to gain improper privileges or to cause 527 * a machine fault. 528 */ 529 int 530 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 531 { 532 struct proc *p = td->td_proc; 533 struct l_sigframe frame; 534 struct trapframe *regs; 535 l_sigset_t lmask; 536 int eflags, i; 537 ksiginfo_t ksi; 538 539 regs = td->td_frame; 540 541 #ifdef DEBUG 542 if (ldebug(sigreturn)) 543 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 544 #endif 545 /* 546 * The trampoline code hands us the sigframe. 547 * It is unsafe to keep track of it ourselves, in the event that a 548 * program jumps out of a signal handler. 549 */ 550 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 551 return (EFAULT); 552 553 /* 554 * Check for security violations. 555 */ 556 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 557 eflags = frame.sf_sc.sc_eflags; 558 /* 559 * XXX do allow users to change the privileged flag PSL_RF. The 560 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 561 * sometimes set it there too. tf_eflags is kept in the signal 562 * context during signal handling and there is no other place 563 * to remember it, so the PSL_RF bit may be corrupted by the 564 * signal handler without us knowing. Corruption of the PSL_RF 565 * bit at worst causes one more or one less debugger trap, so 566 * allowing it is fairly harmless. 567 */ 568 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 569 return(EINVAL); 570 571 /* 572 * Don't allow users to load a valid privileged %cs. Let the 573 * hardware check for invalid selectors, excess privilege in 574 * other selectors, invalid %eip's and invalid %esp's. 575 */ 576 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 577 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 578 ksiginfo_init_trap(&ksi); 579 ksi.ksi_signo = SIGBUS; 580 ksi.ksi_code = BUS_OBJERR; 581 ksi.ksi_trapno = T_PROTFLT; 582 ksi.ksi_addr = (void *)regs->tf_eip; 583 trapsignal(td, &ksi); 584 return(EINVAL); 585 } 586 587 lmask.__bits[0] = frame.sf_sc.sc_mask; 588 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 589 lmask.__bits[i+1] = frame.sf_extramask[i]; 590 PROC_LOCK(p); 591 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 592 SIG_CANTMASK(td->td_sigmask); 593 signotify(td); 594 PROC_UNLOCK(p); 595 596 /* 597 * Restore signal context. 598 */ 599 /* %gs was restored by the trampoline. */ 600 regs->tf_fs = frame.sf_sc.sc_fs; 601 regs->tf_es = frame.sf_sc.sc_es; 602 regs->tf_ds = frame.sf_sc.sc_ds; 603 regs->tf_edi = frame.sf_sc.sc_edi; 604 regs->tf_esi = frame.sf_sc.sc_esi; 605 regs->tf_ebp = frame.sf_sc.sc_ebp; 606 regs->tf_ebx = frame.sf_sc.sc_ebx; 607 regs->tf_edx = frame.sf_sc.sc_edx; 608 regs->tf_ecx = frame.sf_sc.sc_ecx; 609 regs->tf_eax = frame.sf_sc.sc_eax; 610 regs->tf_eip = frame.sf_sc.sc_eip; 611 regs->tf_cs = frame.sf_sc.sc_cs; 612 regs->tf_eflags = eflags; 613 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 614 regs->tf_ss = frame.sf_sc.sc_ss; 615 616 return (EJUSTRETURN); 617 } 618 619 /* 620 * System call to cleanup state after a signal 621 * has been taken. Reset signal mask and 622 * stack state from context left by rt_sendsig (above). 623 * Return to previous pc and psl as specified by 624 * context left by sendsig. Check carefully to 625 * make sure that the user has not modified the 626 * psl to gain improper privileges or to cause 627 * a machine fault. 628 */ 629 int 630 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 631 { 632 struct proc *p = td->td_proc; 633 struct l_ucontext uc; 634 struct l_sigcontext *context; 635 l_stack_t *lss; 636 stack_t ss; 637 struct trapframe *regs; 638 int eflags; 639 ksiginfo_t ksi; 640 641 regs = td->td_frame; 642 643 #ifdef DEBUG 644 if (ldebug(rt_sigreturn)) 645 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 646 #endif 647 /* 648 * The trampoline code hands us the ucontext. 649 * It is unsafe to keep track of it ourselves, in the event that a 650 * program jumps out of a signal handler. 651 */ 652 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 653 return (EFAULT); 654 655 context = &uc.uc_mcontext; 656 657 /* 658 * Check for security violations. 659 */ 660 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 661 eflags = context->sc_eflags; 662 /* 663 * XXX do allow users to change the privileged flag PSL_RF. The 664 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 665 * sometimes set it there too. tf_eflags is kept in the signal 666 * context during signal handling and there is no other place 667 * to remember it, so the PSL_RF bit may be corrupted by the 668 * signal handler without us knowing. Corruption of the PSL_RF 669 * bit at worst causes one more or one less debugger trap, so 670 * allowing it is fairly harmless. 671 */ 672 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_eflags & ~PSL_RF)) 673 return(EINVAL); 674 675 /* 676 * Don't allow users to load a valid privileged %cs. Let the 677 * hardware check for invalid selectors, excess privilege in 678 * other selectors, invalid %eip's and invalid %esp's. 679 */ 680 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 681 if (!CS_SECURE(context->sc_cs)) { 682 ksiginfo_init_trap(&ksi); 683 ksi.ksi_signo = SIGBUS; 684 ksi.ksi_code = BUS_OBJERR; 685 ksi.ksi_trapno = T_PROTFLT; 686 ksi.ksi_addr = (void *)regs->tf_eip; 687 trapsignal(td, &ksi); 688 return(EINVAL); 689 } 690 691 PROC_LOCK(p); 692 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 693 SIG_CANTMASK(td->td_sigmask); 694 signotify(td); 695 PROC_UNLOCK(p); 696 697 /* 698 * Restore signal context 699 */ 700 /* %gs was restored by the trampoline. */ 701 regs->tf_fs = context->sc_fs; 702 regs->tf_es = context->sc_es; 703 regs->tf_ds = context->sc_ds; 704 regs->tf_edi = context->sc_edi; 705 regs->tf_esi = context->sc_esi; 706 regs->tf_ebp = context->sc_ebp; 707 regs->tf_ebx = context->sc_ebx; 708 regs->tf_edx = context->sc_edx; 709 regs->tf_ecx = context->sc_ecx; 710 regs->tf_eax = context->sc_eax; 711 regs->tf_eip = context->sc_eip; 712 regs->tf_cs = context->sc_cs; 713 regs->tf_eflags = eflags; 714 regs->tf_esp = context->sc_esp_at_signal; 715 regs->tf_ss = context->sc_ss; 716 717 /* 718 * call sigaltstack & ignore results.. 719 */ 720 lss = &uc.uc_stack; 721 ss.ss_sp = lss->ss_sp; 722 ss.ss_size = lss->ss_size; 723 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 724 725 #ifdef DEBUG 726 if (ldebug(rt_sigreturn)) 727 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%x, mask: 0x%x"), 728 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 729 #endif 730 (void)kern_sigaltstack(td, &ss, NULL); 731 732 return (EJUSTRETURN); 733 } 734 735 /* 736 * MPSAFE 737 */ 738 static void 739 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 740 { 741 args[0] = tf->tf_ebx; 742 args[1] = tf->tf_ecx; 743 args[2] = tf->tf_edx; 744 args[3] = tf->tf_esi; 745 args[4] = tf->tf_edi; 746 args[5] = tf->tf_ebp; /* Unconfirmed */ 747 *params = NULL; /* no copyin */ 748 } 749 750 /* 751 * If a linux binary is exec'ing something, try this image activator 752 * first. We override standard shell script execution in order to 753 * be able to modify the interpreter path. We only do this if a linux 754 * binary is doing the exec, so we do not create an EXEC module for it. 755 */ 756 static int exec_linux_imgact_try(struct image_params *iparams); 757 758 static int 759 exec_linux_imgact_try(struct image_params *imgp) 760 { 761 const char *head = (const char *)imgp->image_header; 762 char *rpath; 763 int error = -1, len; 764 765 /* 766 * The interpreter for shell scripts run from a linux binary needs 767 * to be located in /compat/linux if possible in order to recursively 768 * maintain linux path emulation. 769 */ 770 if (((const short *)head)[0] == SHELLMAGIC) { 771 /* 772 * Run our normal shell image activator. If it succeeds attempt 773 * to use the alternate path for the interpreter. If an alternate 774 * path is found, use our stringspace to store it. 775 */ 776 if ((error = exec_shell_imgact(imgp)) == 0) { 777 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 778 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, AT_FDCWD); 779 if (rpath != NULL) { 780 len = strlen(rpath) + 1; 781 782 if (len <= MAXSHELLCMDLEN) { 783 memcpy(imgp->interpreter_name, rpath, len); 784 } 785 free(rpath, M_TEMP); 786 } 787 } 788 } 789 return(error); 790 } 791 792 /* 793 * exec_setregs may initialize some registers differently than Linux 794 * does, thus potentially confusing Linux binaries. If necessary, we 795 * override the exec_setregs default(s) here. 796 */ 797 static void 798 exec_linux_setregs(struct thread *td, u_long entry, 799 u_long stack, u_long ps_strings) 800 { 801 static const u_short control = __LINUX_NPXCW__; 802 struct pcb *pcb = td->td_pcb; 803 804 exec_setregs(td, entry, stack, ps_strings); 805 806 /* Linux sets %gs to 0, we default to _udatasel */ 807 pcb->pcb_gs = 0; load_gs(0); 808 809 /* Linux sets the i387 to extended precision. */ 810 fldcw(&control); 811 } 812 813 struct sysentvec linux_sysvec = { 814 .sv_size = LINUX_SYS_MAXSYSCALL, 815 .sv_table = linux_sysent, 816 .sv_mask = 0, 817 .sv_sigsize = LINUX_SIGTBLSZ, 818 .sv_sigtbl = bsd_to_linux_signal, 819 .sv_errsize = ELAST + 1, 820 .sv_errtbl = bsd_to_linux_errno, 821 .sv_transtrap = translate_traps, 822 .sv_fixup = linux_fixup, 823 .sv_sendsig = linux_sendsig, 824 .sv_sigcode = linux_sigcode, 825 .sv_szsigcode = &linux_szsigcode, 826 .sv_prepsyscall = linux_prepsyscall, 827 .sv_name = "Linux a.out", 828 .sv_coredump = NULL, 829 .sv_imgact_try = exec_linux_imgact_try, 830 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 831 .sv_pagesize = PAGE_SIZE, 832 .sv_minuser = VM_MIN_ADDRESS, 833 .sv_maxuser = VM_MAXUSER_ADDRESS, 834 .sv_usrstack = USRSTACK, 835 .sv_psstrings = PS_STRINGS, 836 .sv_stackprot = VM_PROT_ALL, 837 .sv_copyout_strings = exec_copyout_strings, 838 .sv_setregs = exec_linux_setregs, 839 .sv_fixlimit = NULL, 840 .sv_maxssiz = NULL, 841 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 842 }; 843 844 struct sysentvec elf_linux_sysvec = { 845 .sv_size = LINUX_SYS_MAXSYSCALL, 846 .sv_table = linux_sysent, 847 .sv_mask = 0, 848 .sv_sigsize = LINUX_SIGTBLSZ, 849 .sv_sigtbl = bsd_to_linux_signal, 850 .sv_errsize = ELAST + 1, 851 .sv_errtbl = bsd_to_linux_errno, 852 .sv_transtrap = translate_traps, 853 .sv_fixup = elf_linux_fixup, 854 .sv_sendsig = linux_sendsig, 855 .sv_sigcode = linux_sigcode, 856 .sv_szsigcode = &linux_szsigcode, 857 .sv_prepsyscall = linux_prepsyscall, 858 .sv_name = "Linux ELF", 859 .sv_coredump = elf32_coredump, 860 .sv_imgact_try = exec_linux_imgact_try, 861 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 862 .sv_pagesize = PAGE_SIZE, 863 .sv_minuser = VM_MIN_ADDRESS, 864 .sv_maxuser = VM_MAXUSER_ADDRESS, 865 .sv_usrstack = USRSTACK, 866 .sv_psstrings = PS_STRINGS, 867 .sv_stackprot = VM_PROT_ALL, 868 .sv_copyout_strings = exec_copyout_strings, 869 .sv_setregs = exec_linux_setregs, 870 .sv_fixlimit = NULL, 871 .sv_maxssiz = NULL, 872 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 873 }; 874 875 static Elf32_Brandinfo linux_brand = { 876 .brand = ELFOSABI_LINUX, 877 .machine = EM_386, 878 .compat_3_brand = "Linux", 879 .emul_path = "/compat/linux", 880 .interp_path = "/lib/ld-linux.so.1", 881 .sysvec = &elf_linux_sysvec, 882 .interp_newpath = NULL, 883 .flags = BI_CAN_EXEC_DYN, 884 }; 885 886 static Elf32_Brandinfo linux_glibc2brand = { 887 .brand = ELFOSABI_LINUX, 888 .machine = EM_386, 889 .compat_3_brand = "Linux", 890 .emul_path = "/compat/linux", 891 .interp_path = "/lib/ld-linux.so.2", 892 .sysvec = &elf_linux_sysvec, 893 .interp_newpath = NULL, 894 .flags = BI_CAN_EXEC_DYN, 895 }; 896 897 Elf32_Brandinfo *linux_brandlist[] = { 898 &linux_brand, 899 &linux_glibc2brand, 900 NULL 901 }; 902 903 static int 904 linux_elf_modevent(module_t mod, int type, void *data) 905 { 906 Elf32_Brandinfo **brandinfo; 907 int error; 908 struct linux_ioctl_handler **lihp; 909 struct linux_device_handler **ldhp; 910 911 error = 0; 912 913 switch(type) { 914 case MOD_LOAD: 915 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 916 ++brandinfo) 917 if (elf32_insert_brand_entry(*brandinfo) < 0) 918 error = EINVAL; 919 if (error == 0) { 920 SET_FOREACH(lihp, linux_ioctl_handler_set) 921 linux_ioctl_register_handler(*lihp); 922 SET_FOREACH(ldhp, linux_device_handler_set) 923 linux_device_register_handler(*ldhp); 924 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 925 sx_init(&emul_shared_lock, "emuldata->shared lock"); 926 LIST_INIT(&futex_list); 927 sx_init(&futex_sx, "futex protection lock"); 928 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, linux_proc_exit, 929 NULL, 1000); 930 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, linux_schedtail, 931 NULL, 1000); 932 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, linux_proc_exec, 933 NULL, 1000); 934 if (bootverbose) 935 printf("Linux ELF exec handler installed\n"); 936 } else 937 printf("cannot insert Linux ELF brand handler\n"); 938 break; 939 case MOD_UNLOAD: 940 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 941 ++brandinfo) 942 if (elf32_brand_inuse(*brandinfo)) 943 error = EBUSY; 944 if (error == 0) { 945 for (brandinfo = &linux_brandlist[0]; 946 *brandinfo != NULL; ++brandinfo) 947 if (elf32_remove_brand_entry(*brandinfo) < 0) 948 error = EINVAL; 949 } 950 if (error == 0) { 951 SET_FOREACH(lihp, linux_ioctl_handler_set) 952 linux_ioctl_unregister_handler(*lihp); 953 SET_FOREACH(ldhp, linux_device_handler_set) 954 linux_device_unregister_handler(*ldhp); 955 mtx_destroy(&emul_lock); 956 sx_destroy(&emul_shared_lock); 957 sx_destroy(&futex_sx); 958 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 959 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 960 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 961 if (bootverbose) 962 printf("Linux ELF exec handler removed\n"); 963 } else 964 printf("Could not deinstall ELF interpreter entry\n"); 965 break; 966 default: 967 return EOPNOTSUPP; 968 } 969 return error; 970 } 971 972 static moduledata_t linux_elf_mod = { 973 "linuxelf", 974 linux_elf_modevent, 975 0 976 }; 977 978 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 979