1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_IA32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_IA32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/imgact.h> 47 #include <sys/imgact_elf.h> 48 #include <sys/kernel.h> 49 #include <sys/lock.h> 50 #include <sys/malloc.h> 51 #include <sys/module.h> 52 #include <sys/mutex.h> 53 #include <sys/proc.h> 54 #include <sys/resourcevar.h> 55 #include <sys/signalvar.h> 56 #include <sys/sysctl.h> 57 #include <sys/syscallsubr.h> 58 #include <sys/sysent.h> 59 #include <sys/sysproto.h> 60 #include <sys/vnode.h> 61 62 #include <vm/vm.h> 63 #include <vm/pmap.h> 64 #include <vm/vm_extern.h> 65 #include <vm/vm_map.h> 66 #include <vm/vm_object.h> 67 #include <vm/vm_page.h> 68 #include <vm/vm_param.h> 69 70 #include <machine/cpu.h> 71 #include <machine/md_var.h> 72 #include <machine/pcb.h> 73 #include <machine/specialreg.h> 74 75 #include <amd64/linux32/linux.h> 76 #include <amd64/linux32/linux32_proto.h> 77 #include <compat/linux/linux_mib.h> 78 #include <compat/linux/linux_signal.h> 79 #include <compat/linux/linux_util.h> 80 81 MODULE_VERSION(linux, 1); 82 83 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 84 85 #define AUXARGS_ENTRY_32(pos, id, val) \ 86 do { \ 87 suword32(pos++, id); \ 88 suword32(pos++, val); \ 89 } while (0) 90 91 #if BYTE_ORDER == LITTLE_ENDIAN 92 #define SHELLMAGIC 0x2123 /* #! */ 93 #else 94 #define SHELLMAGIC 0x2321 95 #endif 96 97 /* 98 * Allow the sendsig functions to use the ldebug() facility 99 * even though they are not syscalls themselves. Map them 100 * to syscall 0. This is slightly less bogus than using 101 * ldebug(sigreturn). 102 */ 103 #define LINUX_SYS_linux_rt_sendsig 0 104 #define LINUX_SYS_linux_sendsig 0 105 106 extern char linux_sigcode[]; 107 extern int linux_szsigcode; 108 109 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 110 111 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 112 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 113 114 static int elf_linux_fixup(register_t **stack_base, 115 struct image_params *iparams); 116 static register_t *linux_copyout_strings(struct image_params *imgp); 117 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 118 caddr_t *params); 119 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 120 static void exec_linux_setregs(struct thread *td, u_long entry, 121 u_long stack, u_long ps_strings); 122 static void linux32_fixlimits(struct proc *p); 123 124 /* 125 * Linux syscalls return negative errno's, we do positive and map them 126 * Reference: 127 * FreeBSD: src/sys/sys/errno.h 128 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 129 * linux-2.6.17.8/include/asm-generic/errno.h 130 */ 131 static int bsd_to_linux_errno[ELAST + 1] = { 132 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 133 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 134 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 135 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 136 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 137 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 138 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 139 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 140 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 141 -72, -67, -71 142 }; 143 144 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 145 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 146 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 147 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 148 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 149 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 150 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 151 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 152 0, LINUX_SIGUSR1, LINUX_SIGUSR2 153 }; 154 155 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 156 SIGHUP, SIGINT, SIGQUIT, SIGILL, 157 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 158 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 159 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 160 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 161 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 162 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 163 SIGIO, SIGURG, SIGSYS 164 }; 165 166 #define LINUX_T_UNKNOWN 255 167 static int _bsd_to_linux_trapcode[] = { 168 LINUX_T_UNKNOWN, /* 0 */ 169 6, /* 1 T_PRIVINFLT */ 170 LINUX_T_UNKNOWN, /* 2 */ 171 3, /* 3 T_BPTFLT */ 172 LINUX_T_UNKNOWN, /* 4 */ 173 LINUX_T_UNKNOWN, /* 5 */ 174 16, /* 6 T_ARITHTRAP */ 175 254, /* 7 T_ASTFLT */ 176 LINUX_T_UNKNOWN, /* 8 */ 177 13, /* 9 T_PROTFLT */ 178 1, /* 10 T_TRCTRAP */ 179 LINUX_T_UNKNOWN, /* 11 */ 180 14, /* 12 T_PAGEFLT */ 181 LINUX_T_UNKNOWN, /* 13 */ 182 17, /* 14 T_ALIGNFLT */ 183 LINUX_T_UNKNOWN, /* 15 */ 184 LINUX_T_UNKNOWN, /* 16 */ 185 LINUX_T_UNKNOWN, /* 17 */ 186 0, /* 18 T_DIVIDE */ 187 2, /* 19 T_NMI */ 188 4, /* 20 T_OFLOW */ 189 5, /* 21 T_BOUND */ 190 7, /* 22 T_DNA */ 191 8, /* 23 T_DOUBLEFLT */ 192 9, /* 24 T_FPOPFLT */ 193 10, /* 25 T_TSSFLT */ 194 11, /* 26 T_SEGNPFLT */ 195 12, /* 27 T_STKFLT */ 196 18, /* 28 T_MCHK */ 197 19, /* 29 T_XMMFLT */ 198 15 /* 30 T_RESERVED */ 199 }; 200 #define bsd_to_linux_trapcode(code) \ 201 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 202 _bsd_to_linux_trapcode[(code)]: \ 203 LINUX_T_UNKNOWN) 204 205 struct linux32_ps_strings { 206 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 207 u_int ps_nargvstr; /* the number of argument strings */ 208 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 209 u_int ps_nenvstr; /* the number of environment strings */ 210 }; 211 212 /* 213 * If FreeBSD & Linux have a difference of opinion about what a trap 214 * means, deal with it here. 215 * 216 * MPSAFE 217 */ 218 static int 219 translate_traps(int signal, int trap_code) 220 { 221 if (signal != SIGBUS) 222 return signal; 223 switch (trap_code) { 224 case T_PROTFLT: 225 case T_TSSFLT: 226 case T_DOUBLEFLT: 227 case T_PAGEFLT: 228 return SIGSEGV; 229 default: 230 return signal; 231 } 232 } 233 234 static int 235 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 236 { 237 Elf32_Auxargs *args; 238 Elf32_Addr *base; 239 Elf32_Addr *pos; 240 241 KASSERT(curthread->td_proc == imgp->proc && 242 (curthread->td_proc->p_flag & P_SA) == 0, 243 ("unsafe elf_linux_fixup(), should be curproc")); 244 base = (Elf32_Addr *)*stack_base; 245 args = (Elf32_Auxargs *)imgp->auxargs; 246 pos = base + (imgp->args->argc + imgp->args->envc + 2); 247 248 if (args->trace) 249 AUXARGS_ENTRY_32(pos, AT_DEBUG, 1); 250 if (args->execfd != -1) 251 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 252 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 253 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 254 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 255 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 256 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 257 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 258 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 259 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 260 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 261 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 262 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 263 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 264 265 free(imgp->auxargs, M_TEMP); 266 imgp->auxargs = NULL; 267 268 base--; 269 suword32(base, (uint32_t)imgp->args->argc); 270 *stack_base = (register_t *)base; 271 return 0; 272 } 273 274 extern int _ucodesel, _ucode32sel, _udatasel; 275 extern unsigned long linux_sznonrtsigcode; 276 277 static void 278 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 279 { 280 struct thread *td = curthread; 281 struct proc *p = td->td_proc; 282 struct sigacts *psp; 283 struct trapframe *regs; 284 struct l_rt_sigframe *fp, frame; 285 int oonstack; 286 int sig; 287 int code; 288 289 sig = ksi->ksi_signo; 290 code = ksi->ksi_code; 291 PROC_LOCK_ASSERT(p, MA_OWNED); 292 psp = p->p_sigacts; 293 mtx_assert(&psp->ps_mtx, MA_OWNED); 294 regs = td->td_frame; 295 oonstack = sigonstack(regs->tf_rsp); 296 297 #ifdef DEBUG 298 if (ldebug(rt_sendsig)) 299 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 300 catcher, sig, (void*)mask, code); 301 #endif 302 /* 303 * Allocate space for the signal handler context. 304 */ 305 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 306 SIGISMEMBER(psp->ps_sigonstack, sig)) { 307 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 308 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 309 } else 310 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 311 mtx_unlock(&psp->ps_mtx); 312 313 /* 314 * Build the argument list for the signal handler. 315 */ 316 if (p->p_sysent->sv_sigtbl) 317 if (sig <= p->p_sysent->sv_sigsize) 318 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 319 320 bzero(&frame, sizeof(frame)); 321 322 frame.sf_handler = PTROUT(catcher); 323 frame.sf_sig = sig; 324 frame.sf_siginfo = PTROUT(&fp->sf_si); 325 frame.sf_ucontext = PTROUT(&fp->sf_sc); 326 327 /* Fill in POSIX parts */ 328 frame.sf_si.lsi_signo = sig; 329 frame.sf_si.lsi_code = code; 330 frame.sf_si.lsi_addr = PTROUT(ksi->ksi_addr); 331 332 /* 333 * Build the signal context to be used by sigreturn. 334 */ 335 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 336 frame.sf_sc.uc_link = 0; /* XXX ??? */ 337 338 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 339 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 340 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 341 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 342 PROC_UNLOCK(p); 343 344 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 345 346 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 347 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 348 frame.sf_sc.uc_mcontext.sc_fs = rfs(); 349 __asm __volatile("movl %%es,%0" : 350 "=rm" (frame.sf_sc.uc_mcontext.sc_es)); 351 __asm __volatile("movl %%ds,%0" : 352 "=rm" (frame.sf_sc.uc_mcontext.sc_ds)); 353 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 354 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 355 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 356 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 357 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 358 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 359 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 360 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 361 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 362 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 363 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 364 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 365 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 366 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 367 368 #ifdef DEBUG 369 if (ldebug(rt_sendsig)) 370 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 371 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 372 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 373 #endif 374 375 if (copyout(&frame, fp, sizeof(frame)) != 0) { 376 /* 377 * Process has trashed its stack; give it an illegal 378 * instruction to halt it in its tracks. 379 */ 380 #ifdef DEBUG 381 if (ldebug(rt_sendsig)) 382 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 383 fp, oonstack); 384 #endif 385 PROC_LOCK(p); 386 sigexit(td, SIGILL); 387 } 388 389 /* 390 * Build context to run handler in. 391 */ 392 regs->tf_rsp = PTROUT(fp); 393 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 394 linux_sznonrtsigcode; 395 regs->tf_rflags &= ~PSL_T; 396 regs->tf_cs = _ucode32sel; 397 regs->tf_ss = _udatasel; 398 load_ds(_udatasel); 399 td->td_pcb->pcb_ds = _udatasel; 400 load_es(_udatasel); 401 td->td_pcb->pcb_es = _udatasel; 402 PROC_LOCK(p); 403 mtx_lock(&psp->ps_mtx); 404 } 405 406 407 /* 408 * Send an interrupt to process. 409 * 410 * Stack is set up to allow sigcode stored 411 * in u. to call routine, followed by kcall 412 * to sigreturn routine below. After sigreturn 413 * resets the signal mask, the stack, and the 414 * frame pointer, it returns to the user 415 * specified pc, psl. 416 */ 417 static void 418 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 419 { 420 struct thread *td = curthread; 421 struct proc *p = td->td_proc; 422 struct sigacts *psp; 423 struct trapframe *regs; 424 struct l_sigframe *fp, frame; 425 l_sigset_t lmask; 426 int oonstack, i; 427 int sig, code; 428 429 sig = ksi->ksi_signo; 430 code = ksi->ksi_code; 431 PROC_LOCK_ASSERT(p, MA_OWNED); 432 psp = p->p_sigacts; 433 mtx_assert(&psp->ps_mtx, MA_OWNED); 434 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 435 /* Signal handler installed with SA_SIGINFO. */ 436 linux_rt_sendsig(catcher, ksi, mask); 437 return; 438 } 439 440 regs = td->td_frame; 441 oonstack = sigonstack(regs->tf_rsp); 442 443 #ifdef DEBUG 444 if (ldebug(sendsig)) 445 printf(ARGS(sendsig, "%p, %d, %p, %u"), 446 catcher, sig, (void*)mask, code); 447 #endif 448 449 /* 450 * Allocate space for the signal handler context. 451 */ 452 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 453 SIGISMEMBER(psp->ps_sigonstack, sig)) { 454 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 455 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 456 } else 457 fp = (struct l_sigframe *)regs->tf_rsp - 1; 458 mtx_unlock(&psp->ps_mtx); 459 PROC_UNLOCK(p); 460 461 /* 462 * Build the argument list for the signal handler. 463 */ 464 if (p->p_sysent->sv_sigtbl) 465 if (sig <= p->p_sysent->sv_sigsize) 466 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 467 468 bzero(&frame, sizeof(frame)); 469 470 frame.sf_handler = PTROUT(catcher); 471 frame.sf_sig = sig; 472 473 bsd_to_linux_sigset(mask, &lmask); 474 475 /* 476 * Build the signal context to be used by sigreturn. 477 */ 478 frame.sf_sc.sc_mask = lmask.__bits[0]; 479 frame.sf_sc.sc_gs = rgs(); 480 frame.sf_sc.sc_fs = rfs(); 481 __asm __volatile("movl %%es,%0" : "=rm" (frame.sf_sc.sc_es)); 482 __asm __volatile("movl %%ds,%0" : "=rm" (frame.sf_sc.sc_ds)); 483 frame.sf_sc.sc_edi = regs->tf_rdi; 484 frame.sf_sc.sc_esi = regs->tf_rsi; 485 frame.sf_sc.sc_ebp = regs->tf_rbp; 486 frame.sf_sc.sc_ebx = regs->tf_rbx; 487 frame.sf_sc.sc_edx = regs->tf_rdx; 488 frame.sf_sc.sc_ecx = regs->tf_rcx; 489 frame.sf_sc.sc_eax = regs->tf_rax; 490 frame.sf_sc.sc_eip = regs->tf_rip; 491 frame.sf_sc.sc_cs = regs->tf_cs; 492 frame.sf_sc.sc_eflags = regs->tf_rflags; 493 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 494 frame.sf_sc.sc_ss = regs->tf_ss; 495 frame.sf_sc.sc_err = regs->tf_err; 496 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 497 498 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 499 frame.sf_extramask[i] = lmask.__bits[i+1]; 500 501 if (copyout(&frame, fp, sizeof(frame)) != 0) { 502 /* 503 * Process has trashed its stack; give it an illegal 504 * instruction to halt it in its tracks. 505 */ 506 PROC_LOCK(p); 507 sigexit(td, SIGILL); 508 } 509 510 /* 511 * Build context to run handler in. 512 */ 513 regs->tf_rsp = PTROUT(fp); 514 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 515 regs->tf_rflags &= ~PSL_T; 516 regs->tf_cs = _ucode32sel; 517 regs->tf_ss = _udatasel; 518 load_ds(_udatasel); 519 td->td_pcb->pcb_ds = _udatasel; 520 load_es(_udatasel); 521 td->td_pcb->pcb_es = _udatasel; 522 PROC_LOCK(p); 523 mtx_lock(&psp->ps_mtx); 524 } 525 526 /* 527 * System call to cleanup state after a signal 528 * has been taken. Reset signal mask and 529 * stack state from context left by sendsig (above). 530 * Return to previous pc and psl as specified by 531 * context left by sendsig. Check carefully to 532 * make sure that the user has not modified the 533 * psl to gain improper privileges or to cause 534 * a machine fault. 535 */ 536 int 537 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 538 { 539 struct proc *p = td->td_proc; 540 struct l_sigframe frame; 541 struct trapframe *regs; 542 l_sigset_t lmask; 543 int eflags, i; 544 ksiginfo_t ksi; 545 546 regs = td->td_frame; 547 548 #ifdef DEBUG 549 if (ldebug(sigreturn)) 550 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 551 #endif 552 /* 553 * The trampoline code hands us the sigframe. 554 * It is unsafe to keep track of it ourselves, in the event that a 555 * program jumps out of a signal handler. 556 */ 557 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 558 return (EFAULT); 559 560 /* 561 * Check for security violations. 562 */ 563 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 564 eflags = frame.sf_sc.sc_eflags; 565 /* 566 * XXX do allow users to change the privileged flag PSL_RF. The 567 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 568 * sometimes set it there too. tf_eflags is kept in the signal 569 * context during signal handling and there is no other place 570 * to remember it, so the PSL_RF bit may be corrupted by the 571 * signal handler without us knowing. Corruption of the PSL_RF 572 * bit at worst causes one more or one less debugger trap, so 573 * allowing it is fairly harmless. 574 */ 575 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 576 return(EINVAL); 577 578 /* 579 * Don't allow users to load a valid privileged %cs. Let the 580 * hardware check for invalid selectors, excess privilege in 581 * other selectors, invalid %eip's and invalid %esp's. 582 */ 583 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 584 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 585 ksiginfo_init_trap(&ksi); 586 ksi.ksi_signo = SIGBUS; 587 ksi.ksi_code = BUS_OBJERR; 588 ksi.ksi_trapno = T_PROTFLT; 589 ksi.ksi_addr = (void *)regs->tf_rip; 590 trapsignal(td, &ksi); 591 return(EINVAL); 592 } 593 594 lmask.__bits[0] = frame.sf_sc.sc_mask; 595 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 596 lmask.__bits[i+1] = frame.sf_extramask[i]; 597 PROC_LOCK(p); 598 linux_to_bsd_sigset(&lmask, &td->td_sigmask); 599 SIG_CANTMASK(td->td_sigmask); 600 signotify(td); 601 PROC_UNLOCK(p); 602 603 /* 604 * Restore signal context. 605 */ 606 /* Selectors were restored by the trampoline. */ 607 regs->tf_rdi = frame.sf_sc.sc_edi; 608 regs->tf_rsi = frame.sf_sc.sc_esi; 609 regs->tf_rbp = frame.sf_sc.sc_ebp; 610 regs->tf_rbx = frame.sf_sc.sc_ebx; 611 regs->tf_rdx = frame.sf_sc.sc_edx; 612 regs->tf_rcx = frame.sf_sc.sc_ecx; 613 regs->tf_rax = frame.sf_sc.sc_eax; 614 regs->tf_rip = frame.sf_sc.sc_eip; 615 regs->tf_cs = frame.sf_sc.sc_cs; 616 regs->tf_rflags = eflags; 617 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 618 regs->tf_ss = frame.sf_sc.sc_ss; 619 620 return (EJUSTRETURN); 621 } 622 623 /* 624 * System call to cleanup state after a signal 625 * has been taken. Reset signal mask and 626 * stack state from context left by rt_sendsig (above). 627 * Return to previous pc and psl as specified by 628 * context left by sendsig. Check carefully to 629 * make sure that the user has not modified the 630 * psl to gain improper privileges or to cause 631 * a machine fault. 632 */ 633 int 634 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 635 { 636 struct proc *p = td->td_proc; 637 struct l_ucontext uc; 638 struct l_sigcontext *context; 639 l_stack_t *lss; 640 stack_t ss; 641 struct trapframe *regs; 642 int eflags; 643 ksiginfo_t ksi; 644 645 regs = td->td_frame; 646 647 #ifdef DEBUG 648 if (ldebug(rt_sigreturn)) 649 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 650 #endif 651 /* 652 * The trampoline code hands us the ucontext. 653 * It is unsafe to keep track of it ourselves, in the event that a 654 * program jumps out of a signal handler. 655 */ 656 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 657 return (EFAULT); 658 659 context = &uc.uc_mcontext; 660 661 /* 662 * Check for security violations. 663 */ 664 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 665 eflags = context->sc_eflags; 666 /* 667 * XXX do allow users to change the privileged flag PSL_RF. The 668 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 669 * sometimes set it there too. tf_eflags is kept in the signal 670 * context during signal handling and there is no other place 671 * to remember it, so the PSL_RF bit may be corrupted by the 672 * signal handler without us knowing. Corruption of the PSL_RF 673 * bit at worst causes one more or one less debugger trap, so 674 * allowing it is fairly harmless. 675 */ 676 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 677 return(EINVAL); 678 679 /* 680 * Don't allow users to load a valid privileged %cs. Let the 681 * hardware check for invalid selectors, excess privilege in 682 * other selectors, invalid %eip's and invalid %esp's. 683 */ 684 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 685 if (!CS_SECURE(context->sc_cs)) { 686 ksiginfo_init_trap(&ksi); 687 ksi.ksi_signo = SIGBUS; 688 ksi.ksi_code = BUS_OBJERR; 689 ksi.ksi_trapno = T_PROTFLT; 690 ksi.ksi_addr = (void *)regs->tf_rip; 691 trapsignal(td, &ksi); 692 return(EINVAL); 693 } 694 695 PROC_LOCK(p); 696 linux_to_bsd_sigset(&uc.uc_sigmask, &td->td_sigmask); 697 SIG_CANTMASK(td->td_sigmask); 698 signotify(td); 699 PROC_UNLOCK(p); 700 701 /* 702 * Restore signal context 703 */ 704 /* Selectors were restored by the trampoline. */ 705 regs->tf_rdi = context->sc_edi; 706 regs->tf_rsi = context->sc_esi; 707 regs->tf_rbp = context->sc_ebp; 708 regs->tf_rbx = context->sc_ebx; 709 regs->tf_rdx = context->sc_edx; 710 regs->tf_rcx = context->sc_ecx; 711 regs->tf_rax = context->sc_eax; 712 regs->tf_rip = context->sc_eip; 713 regs->tf_cs = context->sc_cs; 714 regs->tf_rflags = eflags; 715 regs->tf_rsp = context->sc_esp_at_signal; 716 regs->tf_ss = context->sc_ss; 717 718 /* 719 * call sigaltstack & ignore results.. 720 */ 721 lss = &uc.uc_stack; 722 ss.ss_sp = PTRIN(lss->ss_sp); 723 ss.ss_size = lss->ss_size; 724 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 725 726 #ifdef DEBUG 727 if (ldebug(rt_sigreturn)) 728 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 729 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 730 #endif 731 (void)kern_sigaltstack(td, &ss, NULL); 732 733 return (EJUSTRETURN); 734 } 735 736 /* 737 * MPSAFE 738 */ 739 static void 740 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 741 { 742 args[0] = tf->tf_rbx; 743 args[1] = tf->tf_rcx; 744 args[2] = tf->tf_rdx; 745 args[3] = tf->tf_rsi; 746 args[4] = tf->tf_rdi; 747 args[5] = tf->tf_rbp; /* Unconfirmed */ 748 *params = NULL; /* no copyin */ 749 } 750 751 /* 752 * If a linux binary is exec'ing something, try this image activator 753 * first. We override standard shell script execution in order to 754 * be able to modify the interpreter path. We only do this if a linux 755 * binary is doing the exec, so we do not create an EXEC module for it. 756 */ 757 static int exec_linux_imgact_try(struct image_params *iparams); 758 759 static int 760 exec_linux_imgact_try(struct image_params *imgp) 761 { 762 const char *head = (const char *)imgp->image_header; 763 char *rpath; 764 int error = -1, len; 765 766 /* 767 * The interpreter for shell scripts run from a linux binary needs 768 * to be located in /compat/linux if possible in order to recursively 769 * maintain linux path emulation. 770 */ 771 if (((const short *)head)[0] == SHELLMAGIC) { 772 /* 773 * Run our normal shell image activator. If it succeeds attempt 774 * to use the alternate path for the interpreter. If an alternate 775 * path is found, use our stringspace to store it. 776 */ 777 if ((error = exec_shell_imgact(imgp)) == 0) { 778 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 779 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0); 780 if (rpath != NULL) { 781 len = strlen(rpath) + 1; 782 783 if (len <= MAXSHELLCMDLEN) { 784 memcpy(imgp->interpreter_name, rpath, len); 785 } 786 free(rpath, M_TEMP); 787 } 788 } 789 } 790 return(error); 791 } 792 793 /* 794 * Clear registers on exec 795 * XXX copied from ia32_signal.c. 796 */ 797 static void 798 exec_linux_setregs(td, entry, stack, ps_strings) 799 struct thread *td; 800 u_long entry; 801 u_long stack; 802 u_long ps_strings; 803 { 804 struct trapframe *regs = td->td_frame; 805 struct pcb *pcb = td->td_pcb; 806 807 wrmsr(MSR_FSBASE, 0); 808 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 809 pcb->pcb_fsbase = 0; 810 pcb->pcb_gsbase = 0; 811 load_ds(_udatasel); 812 load_es(_udatasel); 813 load_fs(_udatasel); 814 load_gs(0); 815 pcb->pcb_ds = _udatasel; 816 pcb->pcb_es = _udatasel; 817 pcb->pcb_fs = _udatasel; 818 pcb->pcb_gs = 0; 819 820 bzero((char *)regs, sizeof(struct trapframe)); 821 regs->tf_rip = entry; 822 regs->tf_rsp = stack; 823 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 824 regs->tf_ss = _udatasel; 825 regs->tf_cs = _ucode32sel; 826 regs->tf_rbx = ps_strings; 827 load_cr0(rcr0() | CR0_MP | CR0_TS); 828 fpstate_drop(td); 829 830 /* Return via doreti so that we can change to a different %cs */ 831 pcb->pcb_flags |= PCB_FULLCTX; 832 td->td_retval[1] = 0; 833 } 834 835 /* 836 * XXX copied from ia32_sysvec.c. 837 */ 838 static register_t * 839 linux_copyout_strings(struct image_params *imgp) 840 { 841 int argc, envc; 842 u_int32_t *vectp; 843 char *stringp, *destp; 844 u_int32_t *stack_base; 845 struct linux32_ps_strings *arginfo; 846 int sigcodesz; 847 848 /* 849 * Calculate string base and vector table pointers. 850 * Also deal with signal trampoline code for this exec type. 851 */ 852 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 853 sigcodesz = *(imgp->proc->p_sysent->sv_szsigcode); 854 destp = (caddr_t)arginfo - sigcodesz - SPARE_USRSPACE - 855 roundup((ARG_MAX - imgp->args->stringspace), sizeof(char *)); 856 857 /* 858 * install sigcode 859 */ 860 if (sigcodesz) 861 copyout(imgp->proc->p_sysent->sv_sigcode, 862 ((caddr_t)arginfo - sigcodesz), szsigcode); 863 864 /* 865 * If we have a valid auxargs ptr, prepare some room 866 * on the stack. 867 */ 868 if (imgp->auxargs) { 869 /* 870 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 871 * lower compatibility. 872 */ 873 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size 874 : (AT_COUNT * 2); 875 /* 876 * The '+ 2' is for the null pointers at the end of each of 877 * the arg and env vector sets,and imgp->auxarg_size is room 878 * for argument of Runtime loader. 879 */ 880 vectp = (u_int32_t *) (destp - (imgp->args->argc + imgp->args->envc + 2 + 881 imgp->auxarg_size) * sizeof(u_int32_t)); 882 883 } else 884 /* 885 * The '+ 2' is for the null pointers at the end of each of 886 * the arg and env vector sets 887 */ 888 vectp = (u_int32_t *) 889 (destp - (imgp->args->argc + imgp->args->envc + 2) * sizeof(u_int32_t)); 890 891 /* 892 * vectp also becomes our initial stack base 893 */ 894 stack_base = vectp; 895 896 stringp = imgp->args->begin_argv; 897 argc = imgp->args->argc; 898 envc = imgp->args->envc; 899 /* 900 * Copy out strings - arguments and environment. 901 */ 902 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 903 904 /* 905 * Fill in "ps_strings" struct for ps, w, etc. 906 */ 907 suword32(&arginfo->ps_argvstr, (u_int32_t)(intptr_t)vectp); 908 suword32(&arginfo->ps_nargvstr, argc); 909 910 /* 911 * Fill in argument portion of vector table. 912 */ 913 for (; argc > 0; --argc) { 914 suword32(vectp++, (u_int32_t)(intptr_t)destp); 915 while (*stringp++ != 0) 916 destp++; 917 destp++; 918 } 919 920 /* a null vector table pointer separates the argp's from the envp's */ 921 suword32(vectp++, 0); 922 923 suword32(&arginfo->ps_envstr, (u_int32_t)(intptr_t)vectp); 924 suword32(&arginfo->ps_nenvstr, envc); 925 926 /* 927 * Fill in environment portion of vector table. 928 */ 929 for (; envc > 0; --envc) { 930 suword32(vectp++, (u_int32_t)(intptr_t)destp); 931 while (*stringp++ != 0) 932 destp++; 933 destp++; 934 } 935 936 /* end of vector table is a null pointer */ 937 suword32(vectp, 0); 938 939 return ((register_t *)stack_base); 940 } 941 942 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 943 "32-bit Linux emulation"); 944 945 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 946 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 947 &linux32_maxdsiz, 0, ""); 948 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 949 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 950 &linux32_maxssiz, 0, ""); 951 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 952 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 953 &linux32_maxvmem, 0, ""); 954 955 /* 956 * XXX copied from ia32_sysvec.c. 957 */ 958 static void 959 linux32_fixlimits(struct proc *p) 960 { 961 struct plimit *oldlim, *newlim; 962 963 if (linux32_maxdsiz == 0 && linux32_maxssiz == 0 && 964 linux32_maxvmem == 0) 965 return; 966 newlim = lim_alloc(); 967 PROC_LOCK(p); 968 oldlim = p->p_limit; 969 lim_copy(newlim, oldlim); 970 if (linux32_maxdsiz != 0) { 971 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_cur > linux32_maxdsiz) 972 newlim->pl_rlimit[RLIMIT_DATA].rlim_cur = linux32_maxdsiz; 973 if (newlim->pl_rlimit[RLIMIT_DATA].rlim_max > linux32_maxdsiz) 974 newlim->pl_rlimit[RLIMIT_DATA].rlim_max = linux32_maxdsiz; 975 } 976 if (linux32_maxssiz != 0) { 977 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_cur > linux32_maxssiz) 978 newlim->pl_rlimit[RLIMIT_STACK].rlim_cur = linux32_maxssiz; 979 if (newlim->pl_rlimit[RLIMIT_STACK].rlim_max > linux32_maxssiz) 980 newlim->pl_rlimit[RLIMIT_STACK].rlim_max = linux32_maxssiz; 981 } 982 if (linux32_maxvmem != 0) { 983 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur > linux32_maxvmem) 984 newlim->pl_rlimit[RLIMIT_VMEM].rlim_cur = linux32_maxvmem; 985 if (newlim->pl_rlimit[RLIMIT_VMEM].rlim_max > linux32_maxvmem) 986 newlim->pl_rlimit[RLIMIT_VMEM].rlim_max = linux32_maxvmem; 987 } 988 p->p_limit = newlim; 989 PROC_UNLOCK(p); 990 lim_free(oldlim); 991 } 992 993 struct sysentvec elf_linux_sysvec = { 994 LINUX_SYS_MAXSYSCALL, 995 linux_sysent, 996 0, 997 LINUX_SIGTBLSZ, 998 bsd_to_linux_signal, 999 ELAST + 1, 1000 bsd_to_linux_errno, 1001 translate_traps, 1002 elf_linux_fixup, 1003 linux_sendsig, 1004 linux_sigcode, 1005 &linux_szsigcode, 1006 linux_prepsyscall, 1007 "Linux ELF32", 1008 elf32_coredump, 1009 exec_linux_imgact_try, 1010 LINUX_MINSIGSTKSZ, 1011 PAGE_SIZE, 1012 VM_MIN_ADDRESS, 1013 LINUX32_USRSTACK, 1014 LINUX32_USRSTACK, 1015 LINUX32_PS_STRINGS, 1016 VM_PROT_ALL, 1017 linux_copyout_strings, 1018 exec_linux_setregs, 1019 linux32_fixlimits 1020 }; 1021 1022 static Elf32_Brandinfo linux_brand = { 1023 ELFOSABI_LINUX, 1024 EM_386, 1025 "Linux", 1026 "/compat/linux", 1027 "/lib/ld-linux.so.1", 1028 &elf_linux_sysvec, 1029 NULL, 1030 BI_CAN_EXEC_DYN, 1031 }; 1032 1033 static Elf32_Brandinfo linux_glibc2brand = { 1034 ELFOSABI_LINUX, 1035 EM_386, 1036 "Linux", 1037 "/compat/linux", 1038 "/lib/ld-linux.so.2", 1039 &elf_linux_sysvec, 1040 NULL, 1041 BI_CAN_EXEC_DYN, 1042 }; 1043 1044 Elf32_Brandinfo *linux_brandlist[] = { 1045 &linux_brand, 1046 &linux_glibc2brand, 1047 NULL 1048 }; 1049 1050 static int 1051 linux_elf_modevent(module_t mod, int type, void *data) 1052 { 1053 Elf32_Brandinfo **brandinfo; 1054 int error; 1055 struct linux_ioctl_handler **lihp; 1056 struct linux_device_handler **ldhp; 1057 1058 error = 0; 1059 1060 switch(type) { 1061 case MOD_LOAD: 1062 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1063 ++brandinfo) 1064 if (elf32_insert_brand_entry(*brandinfo) < 0) 1065 error = EINVAL; 1066 if (error == 0) { 1067 SET_FOREACH(lihp, linux_ioctl_handler_set) 1068 linux_ioctl_register_handler(*lihp); 1069 SET_FOREACH(ldhp, linux_device_handler_set) 1070 linux_device_register_handler(*ldhp); 1071 if (bootverbose) 1072 printf("Linux ELF exec handler installed\n"); 1073 } else 1074 printf("cannot insert Linux ELF brand handler\n"); 1075 break; 1076 case MOD_UNLOAD: 1077 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1078 ++brandinfo) 1079 if (elf32_brand_inuse(*brandinfo)) 1080 error = EBUSY; 1081 if (error == 0) { 1082 for (brandinfo = &linux_brandlist[0]; 1083 *brandinfo != NULL; ++brandinfo) 1084 if (elf32_remove_brand_entry(*brandinfo) < 0) 1085 error = EINVAL; 1086 } 1087 if (error == 0) { 1088 SET_FOREACH(lihp, linux_ioctl_handler_set) 1089 linux_ioctl_unregister_handler(*lihp); 1090 SET_FOREACH(ldhp, linux_device_handler_set) 1091 linux_device_unregister_handler(*ldhp); 1092 if (bootverbose) 1093 printf("Linux ELF exec handler removed\n"); 1094 } else 1095 printf("Could not deinstall ELF interpreter entry\n"); 1096 break; 1097 default: 1098 break; 1099 } 1100 return error; 1101 } 1102 1103 static moduledata_t linux_elf_mod = { 1104 "linuxelf", 1105 linux_elf_modevent, 1106 0 1107 }; 1108 1109 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1110