1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_futex.h> 80 #include <compat/linux/linux_emul.h> 81 #include <compat/linux/linux_mib.h> 82 #include <compat/linux/linux_misc.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 const char *linux_platform = "i686"; 112 static int linux_szplatform; 113 extern char linux_sigcode[]; 114 extern int linux_szsigcode; 115 116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 117 118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 120 121 static int elf_linux_fixup(register_t **stack_base, 122 struct image_params *iparams); 123 static register_t *linux_copyout_strings(struct image_params *imgp); 124 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 125 static void exec_linux_setregs(struct thread *td, 126 struct image_params *imgp, u_long stack); 127 static void linux32_fixlimit(struct rlimit *rl, int which); 128 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 129 130 static eventhandler_tag linux_exit_tag; 131 static eventhandler_tag linux_schedtail_tag; 132 static eventhandler_tag linux_exec_tag; 133 134 /* 135 * Linux syscalls return negative errno's, we do positive and map them 136 * Reference: 137 * FreeBSD: src/sys/sys/errno.h 138 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 139 * linux-2.6.17.8/include/asm-generic/errno.h 140 */ 141 static int bsd_to_linux_errno[ELAST + 1] = { 142 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 143 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 144 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 145 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 146 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 147 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 148 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 149 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 150 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 151 -72, -67, -71 152 }; 153 154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 155 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 156 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 157 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 158 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 159 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 160 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 161 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 162 0, LINUX_SIGUSR1, LINUX_SIGUSR2 163 }; 164 165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 166 SIGHUP, SIGINT, SIGQUIT, SIGILL, 167 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 168 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 169 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 170 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 171 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 172 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 173 SIGIO, SIGURG, SIGSYS 174 }; 175 176 #define LINUX_T_UNKNOWN 255 177 static int _bsd_to_linux_trapcode[] = { 178 LINUX_T_UNKNOWN, /* 0 */ 179 6, /* 1 T_PRIVINFLT */ 180 LINUX_T_UNKNOWN, /* 2 */ 181 3, /* 3 T_BPTFLT */ 182 LINUX_T_UNKNOWN, /* 4 */ 183 LINUX_T_UNKNOWN, /* 5 */ 184 16, /* 6 T_ARITHTRAP */ 185 254, /* 7 T_ASTFLT */ 186 LINUX_T_UNKNOWN, /* 8 */ 187 13, /* 9 T_PROTFLT */ 188 1, /* 10 T_TRCTRAP */ 189 LINUX_T_UNKNOWN, /* 11 */ 190 14, /* 12 T_PAGEFLT */ 191 LINUX_T_UNKNOWN, /* 13 */ 192 17, /* 14 T_ALIGNFLT */ 193 LINUX_T_UNKNOWN, /* 15 */ 194 LINUX_T_UNKNOWN, /* 16 */ 195 LINUX_T_UNKNOWN, /* 17 */ 196 0, /* 18 T_DIVIDE */ 197 2, /* 19 T_NMI */ 198 4, /* 20 T_OFLOW */ 199 5, /* 21 T_BOUND */ 200 7, /* 22 T_DNA */ 201 8, /* 23 T_DOUBLEFLT */ 202 9, /* 24 T_FPOPFLT */ 203 10, /* 25 T_TSSFLT */ 204 11, /* 26 T_SEGNPFLT */ 205 12, /* 27 T_STKFLT */ 206 18, /* 28 T_MCHK */ 207 19, /* 29 T_XMMFLT */ 208 15 /* 30 T_RESERVED */ 209 }; 210 #define bsd_to_linux_trapcode(code) \ 211 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 212 _bsd_to_linux_trapcode[(code)]: \ 213 LINUX_T_UNKNOWN) 214 215 struct linux32_ps_strings { 216 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 217 u_int ps_nargvstr; /* the number of argument strings */ 218 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 219 u_int ps_nenvstr; /* the number of environment strings */ 220 }; 221 222 /* 223 * If FreeBSD & Linux have a difference of opinion about what a trap 224 * means, deal with it here. 225 * 226 * MPSAFE 227 */ 228 static int 229 translate_traps(int signal, int trap_code) 230 { 231 if (signal != SIGBUS) 232 return signal; 233 switch (trap_code) { 234 case T_PROTFLT: 235 case T_TSSFLT: 236 case T_DOUBLEFLT: 237 case T_PAGEFLT: 238 return SIGSEGV; 239 default: 240 return signal; 241 } 242 } 243 244 static int 245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 246 { 247 Elf32_Auxargs *args; 248 Elf32_Addr *base; 249 Elf32_Addr *pos, *uplatform; 250 struct linux32_ps_strings *arginfo; 251 252 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 253 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode - 254 linux_szplatform); 255 256 KASSERT(curthread->td_proc == imgp->proc, 257 ("unsafe elf_linux_fixup(), should be curproc")); 258 base = (Elf32_Addr *)*stack_base; 259 args = (Elf32_Auxargs *)imgp->auxargs; 260 pos = base + (imgp->args->argc + imgp->args->envc + 2); 261 262 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 263 264 /* 265 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 266 * as it has appeared in the 2.4.0-rc7 first time. 267 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 268 * glibc falls back to the hard-coded CLK_TCK value when aux entry 269 * is not present. 270 * Also see linux_times() implementation. 271 */ 272 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 273 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 274 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 275 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 276 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 277 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 278 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 279 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 280 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 281 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 282 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 283 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 284 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 285 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 286 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 287 if (args->execfd != -1) 288 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 289 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 290 291 free(imgp->auxargs, M_TEMP); 292 imgp->auxargs = NULL; 293 294 base--; 295 suword32(base, (uint32_t)imgp->args->argc); 296 *stack_base = (register_t *)base; 297 return 0; 298 } 299 300 extern unsigned long linux_sznonrtsigcode; 301 302 static void 303 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 304 { 305 struct thread *td = curthread; 306 struct proc *p = td->td_proc; 307 struct sigacts *psp; 308 struct trapframe *regs; 309 struct l_rt_sigframe *fp, frame; 310 int oonstack; 311 int sig; 312 int code; 313 314 sig = ksi->ksi_signo; 315 code = ksi->ksi_code; 316 PROC_LOCK_ASSERT(p, MA_OWNED); 317 psp = p->p_sigacts; 318 mtx_assert(&psp->ps_mtx, MA_OWNED); 319 regs = td->td_frame; 320 oonstack = sigonstack(regs->tf_rsp); 321 322 #ifdef DEBUG 323 if (ldebug(rt_sendsig)) 324 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 325 catcher, sig, (void*)mask, code); 326 #endif 327 /* 328 * Allocate space for the signal handler context. 329 */ 330 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 331 SIGISMEMBER(psp->ps_sigonstack, sig)) { 332 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 333 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 334 } else 335 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 336 mtx_unlock(&psp->ps_mtx); 337 338 /* 339 * Build the argument list for the signal handler. 340 */ 341 if (p->p_sysent->sv_sigtbl) 342 if (sig <= p->p_sysent->sv_sigsize) 343 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 344 345 bzero(&frame, sizeof(frame)); 346 347 frame.sf_handler = PTROUT(catcher); 348 frame.sf_sig = sig; 349 frame.sf_siginfo = PTROUT(&fp->sf_si); 350 frame.sf_ucontext = PTROUT(&fp->sf_sc); 351 352 /* Fill in POSIX parts */ 353 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 354 355 /* 356 * Build the signal context to be used by sigreturn. 357 */ 358 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 359 frame.sf_sc.uc_link = 0; /* XXX ??? */ 360 361 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 362 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 363 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 364 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 365 PROC_UNLOCK(p); 366 367 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 368 369 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 370 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 371 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 372 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 373 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 374 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 375 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 376 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 377 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 378 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 379 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 380 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 381 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 382 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 383 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 384 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 385 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 386 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 387 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 388 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 389 390 #ifdef DEBUG 391 if (ldebug(rt_sendsig)) 392 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 393 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 394 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 395 #endif 396 397 if (copyout(&frame, fp, sizeof(frame)) != 0) { 398 /* 399 * Process has trashed its stack; give it an illegal 400 * instruction to halt it in its tracks. 401 */ 402 #ifdef DEBUG 403 if (ldebug(rt_sendsig)) 404 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 405 fp, oonstack); 406 #endif 407 PROC_LOCK(p); 408 sigexit(td, SIGILL); 409 } 410 411 /* 412 * Build context to run handler in. 413 */ 414 regs->tf_rsp = PTROUT(fp); 415 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 416 linux_sznonrtsigcode; 417 regs->tf_rflags &= ~(PSL_T | PSL_D); 418 regs->tf_cs = _ucode32sel; 419 regs->tf_ss = _udatasel; 420 regs->tf_ds = _udatasel; 421 regs->tf_es = _udatasel; 422 regs->tf_fs = _ufssel; 423 regs->tf_gs = _ugssel; 424 regs->tf_flags = TF_HASSEGS; 425 td->td_pcb->pcb_full_iret = 1; 426 PROC_LOCK(p); 427 mtx_lock(&psp->ps_mtx); 428 } 429 430 431 /* 432 * Send an interrupt to process. 433 * 434 * Stack is set up to allow sigcode stored 435 * in u. to call routine, followed by kcall 436 * to sigreturn routine below. After sigreturn 437 * resets the signal mask, the stack, and the 438 * frame pointer, it returns to the user 439 * specified pc, psl. 440 */ 441 static void 442 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 443 { 444 struct thread *td = curthread; 445 struct proc *p = td->td_proc; 446 struct sigacts *psp; 447 struct trapframe *regs; 448 struct l_sigframe *fp, frame; 449 l_sigset_t lmask; 450 int oonstack, i; 451 int sig, code; 452 453 sig = ksi->ksi_signo; 454 code = ksi->ksi_code; 455 PROC_LOCK_ASSERT(p, MA_OWNED); 456 psp = p->p_sigacts; 457 mtx_assert(&psp->ps_mtx, MA_OWNED); 458 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 459 /* Signal handler installed with SA_SIGINFO. */ 460 linux_rt_sendsig(catcher, ksi, mask); 461 return; 462 } 463 464 regs = td->td_frame; 465 oonstack = sigonstack(regs->tf_rsp); 466 467 #ifdef DEBUG 468 if (ldebug(sendsig)) 469 printf(ARGS(sendsig, "%p, %d, %p, %u"), 470 catcher, sig, (void*)mask, code); 471 #endif 472 473 /* 474 * Allocate space for the signal handler context. 475 */ 476 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 477 SIGISMEMBER(psp->ps_sigonstack, sig)) { 478 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 479 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 480 } else 481 fp = (struct l_sigframe *)regs->tf_rsp - 1; 482 mtx_unlock(&psp->ps_mtx); 483 PROC_UNLOCK(p); 484 485 /* 486 * Build the argument list for the signal handler. 487 */ 488 if (p->p_sysent->sv_sigtbl) 489 if (sig <= p->p_sysent->sv_sigsize) 490 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 491 492 bzero(&frame, sizeof(frame)); 493 494 frame.sf_handler = PTROUT(catcher); 495 frame.sf_sig = sig; 496 497 bsd_to_linux_sigset(mask, &lmask); 498 499 /* 500 * Build the signal context to be used by sigreturn. 501 */ 502 frame.sf_sc.sc_mask = lmask.__bits[0]; 503 frame.sf_sc.sc_gs = regs->tf_gs; 504 frame.sf_sc.sc_fs = regs->tf_fs; 505 frame.sf_sc.sc_es = regs->tf_es; 506 frame.sf_sc.sc_ds = regs->tf_ds; 507 frame.sf_sc.sc_edi = regs->tf_rdi; 508 frame.sf_sc.sc_esi = regs->tf_rsi; 509 frame.sf_sc.sc_ebp = regs->tf_rbp; 510 frame.sf_sc.sc_ebx = regs->tf_rbx; 511 frame.sf_sc.sc_edx = regs->tf_rdx; 512 frame.sf_sc.sc_ecx = regs->tf_rcx; 513 frame.sf_sc.sc_eax = regs->tf_rax; 514 frame.sf_sc.sc_eip = regs->tf_rip; 515 frame.sf_sc.sc_cs = regs->tf_cs; 516 frame.sf_sc.sc_eflags = regs->tf_rflags; 517 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 518 frame.sf_sc.sc_ss = regs->tf_ss; 519 frame.sf_sc.sc_err = regs->tf_err; 520 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 521 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 522 523 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 524 frame.sf_extramask[i] = lmask.__bits[i+1]; 525 526 if (copyout(&frame, fp, sizeof(frame)) != 0) { 527 /* 528 * Process has trashed its stack; give it an illegal 529 * instruction to halt it in its tracks. 530 */ 531 PROC_LOCK(p); 532 sigexit(td, SIGILL); 533 } 534 535 /* 536 * Build context to run handler in. 537 */ 538 regs->tf_rsp = PTROUT(fp); 539 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 540 regs->tf_rflags &= ~(PSL_T | PSL_D); 541 regs->tf_cs = _ucode32sel; 542 regs->tf_ss = _udatasel; 543 regs->tf_ds = _udatasel; 544 regs->tf_es = _udatasel; 545 regs->tf_fs = _ufssel; 546 regs->tf_gs = _ugssel; 547 regs->tf_flags = TF_HASSEGS; 548 td->td_pcb->pcb_full_iret = 1; 549 PROC_LOCK(p); 550 mtx_lock(&psp->ps_mtx); 551 } 552 553 /* 554 * System call to cleanup state after a signal 555 * has been taken. Reset signal mask and 556 * stack state from context left by sendsig (above). 557 * Return to previous pc and psl as specified by 558 * context left by sendsig. Check carefully to 559 * make sure that the user has not modified the 560 * psl to gain improper privileges or to cause 561 * a machine fault. 562 */ 563 int 564 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 565 { 566 struct l_sigframe frame; 567 struct trapframe *regs; 568 sigset_t bmask; 569 l_sigset_t lmask; 570 int eflags, i; 571 ksiginfo_t ksi; 572 573 regs = td->td_frame; 574 575 #ifdef DEBUG 576 if (ldebug(sigreturn)) 577 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 578 #endif 579 /* 580 * The trampoline code hands us the sigframe. 581 * It is unsafe to keep track of it ourselves, in the event that a 582 * program jumps out of a signal handler. 583 */ 584 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 585 return (EFAULT); 586 587 /* 588 * Check for security violations. 589 */ 590 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 591 eflags = frame.sf_sc.sc_eflags; 592 /* 593 * XXX do allow users to change the privileged flag PSL_RF. The 594 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 595 * sometimes set it there too. tf_eflags is kept in the signal 596 * context during signal handling and there is no other place 597 * to remember it, so the PSL_RF bit may be corrupted by the 598 * signal handler without us knowing. Corruption of the PSL_RF 599 * bit at worst causes one more or one less debugger trap, so 600 * allowing it is fairly harmless. 601 */ 602 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 603 return(EINVAL); 604 605 /* 606 * Don't allow users to load a valid privileged %cs. Let the 607 * hardware check for invalid selectors, excess privilege in 608 * other selectors, invalid %eip's and invalid %esp's. 609 */ 610 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 611 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 612 ksiginfo_init_trap(&ksi); 613 ksi.ksi_signo = SIGBUS; 614 ksi.ksi_code = BUS_OBJERR; 615 ksi.ksi_trapno = T_PROTFLT; 616 ksi.ksi_addr = (void *)regs->tf_rip; 617 trapsignal(td, &ksi); 618 return(EINVAL); 619 } 620 621 lmask.__bits[0] = frame.sf_sc.sc_mask; 622 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 623 lmask.__bits[i+1] = frame.sf_extramask[i]; 624 linux_to_bsd_sigset(&lmask, &bmask); 625 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 626 627 /* 628 * Restore signal context. 629 */ 630 regs->tf_rdi = frame.sf_sc.sc_edi; 631 regs->tf_rsi = frame.sf_sc.sc_esi; 632 regs->tf_rbp = frame.sf_sc.sc_ebp; 633 regs->tf_rbx = frame.sf_sc.sc_ebx; 634 regs->tf_rdx = frame.sf_sc.sc_edx; 635 regs->tf_rcx = frame.sf_sc.sc_ecx; 636 regs->tf_rax = frame.sf_sc.sc_eax; 637 regs->tf_rip = frame.sf_sc.sc_eip; 638 regs->tf_cs = frame.sf_sc.sc_cs; 639 regs->tf_ds = frame.sf_sc.sc_ds; 640 regs->tf_es = frame.sf_sc.sc_es; 641 regs->tf_fs = frame.sf_sc.sc_fs; 642 regs->tf_gs = frame.sf_sc.sc_gs; 643 regs->tf_rflags = eflags; 644 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 645 regs->tf_ss = frame.sf_sc.sc_ss; 646 td->td_pcb->pcb_full_iret = 1; 647 648 return (EJUSTRETURN); 649 } 650 651 /* 652 * System call to cleanup state after a signal 653 * has been taken. Reset signal mask and 654 * stack state from context left by rt_sendsig (above). 655 * Return to previous pc and psl as specified by 656 * context left by sendsig. Check carefully to 657 * make sure that the user has not modified the 658 * psl to gain improper privileges or to cause 659 * a machine fault. 660 */ 661 int 662 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 663 { 664 struct l_ucontext uc; 665 struct l_sigcontext *context; 666 sigset_t bmask; 667 l_stack_t *lss; 668 stack_t ss; 669 struct trapframe *regs; 670 int eflags; 671 ksiginfo_t ksi; 672 673 regs = td->td_frame; 674 675 #ifdef DEBUG 676 if (ldebug(rt_sigreturn)) 677 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 678 #endif 679 /* 680 * The trampoline code hands us the ucontext. 681 * It is unsafe to keep track of it ourselves, in the event that a 682 * program jumps out of a signal handler. 683 */ 684 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 685 return (EFAULT); 686 687 context = &uc.uc_mcontext; 688 689 /* 690 * Check for security violations. 691 */ 692 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 693 eflags = context->sc_eflags; 694 /* 695 * XXX do allow users to change the privileged flag PSL_RF. The 696 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 697 * sometimes set it there too. tf_eflags is kept in the signal 698 * context during signal handling and there is no other place 699 * to remember it, so the PSL_RF bit may be corrupted by the 700 * signal handler without us knowing. Corruption of the PSL_RF 701 * bit at worst causes one more or one less debugger trap, so 702 * allowing it is fairly harmless. 703 */ 704 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 705 return(EINVAL); 706 707 /* 708 * Don't allow users to load a valid privileged %cs. Let the 709 * hardware check for invalid selectors, excess privilege in 710 * other selectors, invalid %eip's and invalid %esp's. 711 */ 712 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 713 if (!CS_SECURE(context->sc_cs)) { 714 ksiginfo_init_trap(&ksi); 715 ksi.ksi_signo = SIGBUS; 716 ksi.ksi_code = BUS_OBJERR; 717 ksi.ksi_trapno = T_PROTFLT; 718 ksi.ksi_addr = (void *)regs->tf_rip; 719 trapsignal(td, &ksi); 720 return(EINVAL); 721 } 722 723 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 724 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 725 726 /* 727 * Restore signal context 728 */ 729 regs->tf_gs = context->sc_gs; 730 regs->tf_fs = context->sc_fs; 731 regs->tf_es = context->sc_es; 732 regs->tf_ds = context->sc_ds; 733 regs->tf_rdi = context->sc_edi; 734 regs->tf_rsi = context->sc_esi; 735 regs->tf_rbp = context->sc_ebp; 736 regs->tf_rbx = context->sc_ebx; 737 regs->tf_rdx = context->sc_edx; 738 regs->tf_rcx = context->sc_ecx; 739 regs->tf_rax = context->sc_eax; 740 regs->tf_rip = context->sc_eip; 741 regs->tf_cs = context->sc_cs; 742 regs->tf_rflags = eflags; 743 regs->tf_rsp = context->sc_esp_at_signal; 744 regs->tf_ss = context->sc_ss; 745 td->td_pcb->pcb_full_iret = 1; 746 747 /* 748 * call sigaltstack & ignore results.. 749 */ 750 lss = &uc.uc_stack; 751 ss.ss_sp = PTRIN(lss->ss_sp); 752 ss.ss_size = lss->ss_size; 753 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 754 755 #ifdef DEBUG 756 if (ldebug(rt_sigreturn)) 757 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 758 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 759 #endif 760 (void)kern_sigaltstack(td, &ss, NULL); 761 762 return (EJUSTRETURN); 763 } 764 765 static int 766 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 767 { 768 struct proc *p; 769 struct trapframe *frame; 770 771 p = td->td_proc; 772 frame = td->td_frame; 773 774 sa->args[0] = frame->tf_rbx; 775 sa->args[1] = frame->tf_rcx; 776 sa->args[2] = frame->tf_rdx; 777 sa->args[3] = frame->tf_rsi; 778 sa->args[4] = frame->tf_rdi; 779 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 780 sa->code = frame->tf_rax; 781 782 if (sa->code >= p->p_sysent->sv_size) 783 sa->callp = &p->p_sysent->sv_table[0]; 784 else 785 sa->callp = &p->p_sysent->sv_table[sa->code]; 786 sa->narg = sa->callp->sy_narg; 787 788 td->td_retval[0] = 0; 789 td->td_retval[1] = frame->tf_rdx; 790 791 return (0); 792 } 793 794 /* 795 * If a linux binary is exec'ing something, try this image activator 796 * first. We override standard shell script execution in order to 797 * be able to modify the interpreter path. We only do this if a linux 798 * binary is doing the exec, so we do not create an EXEC module for it. 799 */ 800 static int exec_linux_imgact_try(struct image_params *iparams); 801 802 static int 803 exec_linux_imgact_try(struct image_params *imgp) 804 { 805 const char *head = (const char *)imgp->image_header; 806 char *rpath; 807 int error = -1, len; 808 809 /* 810 * The interpreter for shell scripts run from a linux binary needs 811 * to be located in /compat/linux if possible in order to recursively 812 * maintain linux path emulation. 813 */ 814 if (((const short *)head)[0] == SHELLMAGIC) { 815 /* 816 * Run our normal shell image activator. If it succeeds attempt 817 * to use the alternate path for the interpreter. If an 818 * alternate * path is found, use our stringspace to store it. 819 */ 820 if ((error = exec_shell_imgact(imgp)) == 0) { 821 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 822 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 823 AT_FDCWD); 824 if (rpath != NULL) { 825 len = strlen(rpath) + 1; 826 827 if (len <= MAXSHELLCMDLEN) { 828 memcpy(imgp->interpreter_name, rpath, 829 len); 830 } 831 free(rpath, M_TEMP); 832 } 833 } 834 } 835 return(error); 836 } 837 838 /* 839 * Clear registers on exec 840 * XXX copied from ia32_signal.c. 841 */ 842 static void 843 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 844 { 845 struct trapframe *regs = td->td_frame; 846 struct pcb *pcb = td->td_pcb; 847 848 mtx_lock(&dt_lock); 849 if (td->td_proc->p_md.md_ldt != NULL) 850 user_ldt_free(td); 851 else 852 mtx_unlock(&dt_lock); 853 854 critical_enter(); 855 wrmsr(MSR_FSBASE, 0); 856 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 857 pcb->pcb_fsbase = 0; 858 pcb->pcb_gsbase = 0; 859 critical_exit(); 860 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 861 862 bzero((char *)regs, sizeof(struct trapframe)); 863 regs->tf_rip = imgp->entry_addr; 864 regs->tf_rsp = stack; 865 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 866 regs->tf_gs = _ugssel; 867 regs->tf_fs = _ufssel; 868 regs->tf_es = _udatasel; 869 regs->tf_ds = _udatasel; 870 regs->tf_ss = _udatasel; 871 regs->tf_flags = TF_HASSEGS; 872 regs->tf_cs = _ucode32sel; 873 regs->tf_rbx = imgp->ps_strings; 874 td->td_pcb->pcb_full_iret = 1; 875 load_cr0(rcr0() | CR0_MP | CR0_TS); 876 fpstate_drop(td); 877 878 /* Return via doreti so that we can change to a different %cs */ 879 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT; 880 pcb->pcb_flags &= ~PCB_GS32BIT; 881 td->td_retval[1] = 0; 882 } 883 884 /* 885 * XXX copied from ia32_sysvec.c. 886 */ 887 static register_t * 888 linux_copyout_strings(struct image_params *imgp) 889 { 890 int argc, envc; 891 u_int32_t *vectp; 892 char *stringp, *destp; 893 u_int32_t *stack_base; 894 struct linux32_ps_strings *arginfo; 895 896 /* 897 * Calculate string base and vector table pointers. 898 * Also deal with signal trampoline code for this exec type. 899 */ 900 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 901 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE - 902 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace), 903 sizeof(char *)); 904 905 /* 906 * install sigcode 907 */ 908 copyout(imgp->proc->p_sysent->sv_sigcode, 909 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode); 910 911 /* 912 * Install LINUX_PLATFORM 913 */ 914 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode - 915 linux_szplatform), linux_szplatform); 916 917 /* 918 * If we have a valid auxargs ptr, prepare some room 919 * on the stack. 920 */ 921 if (imgp->auxargs) { 922 /* 923 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 924 * lower compatibility. 925 */ 926 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 927 (LINUX_AT_COUNT * 2); 928 /* 929 * The '+ 2' is for the null pointers at the end of each of 930 * the arg and env vector sets,and imgp->auxarg_size is room 931 * for argument of Runtime loader. 932 */ 933 vectp = (u_int32_t *) (destp - (imgp->args->argc + 934 imgp->args->envc + 2 + imgp->auxarg_size) * 935 sizeof(u_int32_t)); 936 937 } else 938 /* 939 * The '+ 2' is for the null pointers at the end of each of 940 * the arg and env vector sets 941 */ 942 vectp = (u_int32_t *)(destp - (imgp->args->argc + 943 imgp->args->envc + 2) * sizeof(u_int32_t)); 944 945 /* 946 * vectp also becomes our initial stack base 947 */ 948 stack_base = vectp; 949 950 stringp = imgp->args->begin_argv; 951 argc = imgp->args->argc; 952 envc = imgp->args->envc; 953 /* 954 * Copy out strings - arguments and environment. 955 */ 956 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 957 958 /* 959 * Fill in "ps_strings" struct for ps, w, etc. 960 */ 961 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 962 suword32(&arginfo->ps_nargvstr, argc); 963 964 /* 965 * Fill in argument portion of vector table. 966 */ 967 for (; argc > 0; --argc) { 968 suword32(vectp++, (uint32_t)(intptr_t)destp); 969 while (*stringp++ != 0) 970 destp++; 971 destp++; 972 } 973 974 /* a null vector table pointer separates the argp's from the envp's */ 975 suword32(vectp++, 0); 976 977 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 978 suword32(&arginfo->ps_nenvstr, envc); 979 980 /* 981 * Fill in environment portion of vector table. 982 */ 983 for (; envc > 0; --envc) { 984 suword32(vectp++, (uint32_t)(intptr_t)destp); 985 while (*stringp++ != 0) 986 destp++; 987 destp++; 988 } 989 990 /* end of vector table is a null pointer */ 991 suword32(vectp, 0); 992 993 return ((register_t *)stack_base); 994 } 995 996 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 997 "32-bit Linux emulation"); 998 999 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 1000 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 1001 &linux32_maxdsiz, 0, ""); 1002 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 1003 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 1004 &linux32_maxssiz, 0, ""); 1005 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 1006 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 1007 &linux32_maxvmem, 0, ""); 1008 1009 static void 1010 linux32_fixlimit(struct rlimit *rl, int which) 1011 { 1012 1013 switch (which) { 1014 case RLIMIT_DATA: 1015 if (linux32_maxdsiz != 0) { 1016 if (rl->rlim_cur > linux32_maxdsiz) 1017 rl->rlim_cur = linux32_maxdsiz; 1018 if (rl->rlim_max > linux32_maxdsiz) 1019 rl->rlim_max = linux32_maxdsiz; 1020 } 1021 break; 1022 case RLIMIT_STACK: 1023 if (linux32_maxssiz != 0) { 1024 if (rl->rlim_cur > linux32_maxssiz) 1025 rl->rlim_cur = linux32_maxssiz; 1026 if (rl->rlim_max > linux32_maxssiz) 1027 rl->rlim_max = linux32_maxssiz; 1028 } 1029 break; 1030 case RLIMIT_VMEM: 1031 if (linux32_maxvmem != 0) { 1032 if (rl->rlim_cur > linux32_maxvmem) 1033 rl->rlim_cur = linux32_maxvmem; 1034 if (rl->rlim_max > linux32_maxvmem) 1035 rl->rlim_max = linux32_maxvmem; 1036 } 1037 break; 1038 } 1039 } 1040 1041 struct sysentvec elf_linux_sysvec = { 1042 .sv_size = LINUX_SYS_MAXSYSCALL, 1043 .sv_table = linux_sysent, 1044 .sv_mask = 0, 1045 .sv_sigsize = LINUX_SIGTBLSZ, 1046 .sv_sigtbl = bsd_to_linux_signal, 1047 .sv_errsize = ELAST + 1, 1048 .sv_errtbl = bsd_to_linux_errno, 1049 .sv_transtrap = translate_traps, 1050 .sv_fixup = elf_linux_fixup, 1051 .sv_sendsig = linux_sendsig, 1052 .sv_sigcode = linux_sigcode, 1053 .sv_szsigcode = &linux_szsigcode, 1054 .sv_prepsyscall = NULL, 1055 .sv_name = "Linux ELF32", 1056 .sv_coredump = elf32_coredump, 1057 .sv_imgact_try = exec_linux_imgact_try, 1058 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1059 .sv_pagesize = PAGE_SIZE, 1060 .sv_minuser = VM_MIN_ADDRESS, 1061 .sv_maxuser = LINUX32_USRSTACK, 1062 .sv_usrstack = LINUX32_USRSTACK, 1063 .sv_psstrings = LINUX32_PS_STRINGS, 1064 .sv_stackprot = VM_PROT_ALL, 1065 .sv_copyout_strings = linux_copyout_strings, 1066 .sv_setregs = exec_linux_setregs, 1067 .sv_fixlimit = linux32_fixlimit, 1068 .sv_maxssiz = &linux32_maxssiz, 1069 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32, 1070 .sv_set_syscall_retval = cpu_set_syscall_retval, 1071 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1072 .sv_syscallnames = NULL, 1073 }; 1074 1075 static char GNU_ABI_VENDOR[] = "GNU"; 1076 static int GNULINUX_ABI_DESC = 0; 1077 1078 static boolean_t 1079 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1080 { 1081 const Elf32_Word *desc; 1082 uintptr_t p; 1083 1084 p = (uintptr_t)(note + 1); 1085 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1086 1087 desc = (const Elf32_Word *)p; 1088 if (desc[0] != GNULINUX_ABI_DESC) 1089 return (FALSE); 1090 1091 /* 1092 * For linux we encode osrel as follows (see linux_mib.c): 1093 * VVVMMMIII (version, major, minor), see linux_mib.c. 1094 */ 1095 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1096 1097 return (TRUE); 1098 } 1099 1100 static Elf_Brandnote linux32_brandnote = { 1101 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1102 .hdr.n_descsz = 16, /* XXX at least 16 */ 1103 .hdr.n_type = 1, 1104 .vendor = GNU_ABI_VENDOR, 1105 .flags = BN_TRANSLATE_OSREL, 1106 .trans_osrel = linux32_trans_osrel 1107 }; 1108 1109 static Elf32_Brandinfo linux_brand = { 1110 .brand = ELFOSABI_LINUX, 1111 .machine = EM_386, 1112 .compat_3_brand = "Linux", 1113 .emul_path = "/compat/linux", 1114 .interp_path = "/lib/ld-linux.so.1", 1115 .sysvec = &elf_linux_sysvec, 1116 .interp_newpath = NULL, 1117 .brand_note = &linux32_brandnote, 1118 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1119 }; 1120 1121 static Elf32_Brandinfo linux_glibc2brand = { 1122 .brand = ELFOSABI_LINUX, 1123 .machine = EM_386, 1124 .compat_3_brand = "Linux", 1125 .emul_path = "/compat/linux", 1126 .interp_path = "/lib/ld-linux.so.2", 1127 .sysvec = &elf_linux_sysvec, 1128 .interp_newpath = NULL, 1129 .brand_note = &linux32_brandnote, 1130 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1131 }; 1132 1133 Elf32_Brandinfo *linux_brandlist[] = { 1134 &linux_brand, 1135 &linux_glibc2brand, 1136 NULL 1137 }; 1138 1139 static int 1140 linux_elf_modevent(module_t mod, int type, void *data) 1141 { 1142 Elf32_Brandinfo **brandinfo; 1143 int error; 1144 struct linux_ioctl_handler **lihp; 1145 struct linux_device_handler **ldhp; 1146 1147 error = 0; 1148 1149 switch(type) { 1150 case MOD_LOAD: 1151 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1152 ++brandinfo) 1153 if (elf32_insert_brand_entry(*brandinfo) < 0) 1154 error = EINVAL; 1155 if (error == 0) { 1156 SET_FOREACH(lihp, linux_ioctl_handler_set) 1157 linux_ioctl_register_handler(*lihp); 1158 SET_FOREACH(ldhp, linux_device_handler_set) 1159 linux_device_register_handler(*ldhp); 1160 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 1161 sx_init(&emul_shared_lock, "emuldata->shared lock"); 1162 LIST_INIT(&futex_list); 1163 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1164 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1165 linux_proc_exit, NULL, 1000); 1166 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, 1167 linux_schedtail, NULL, 1000); 1168 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1169 linux_proc_exec, NULL, 1000); 1170 linux_szplatform = roundup(strlen(linux_platform) + 1, 1171 sizeof(char *)); 1172 linux_osd_jail_register(); 1173 stclohz = (stathz ? stathz : hz); 1174 if (bootverbose) 1175 printf("Linux ELF exec handler installed\n"); 1176 } else 1177 printf("cannot insert Linux ELF brand handler\n"); 1178 break; 1179 case MOD_UNLOAD: 1180 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1181 ++brandinfo) 1182 if (elf32_brand_inuse(*brandinfo)) 1183 error = EBUSY; 1184 if (error == 0) { 1185 for (brandinfo = &linux_brandlist[0]; 1186 *brandinfo != NULL; ++brandinfo) 1187 if (elf32_remove_brand_entry(*brandinfo) < 0) 1188 error = EINVAL; 1189 } 1190 if (error == 0) { 1191 SET_FOREACH(lihp, linux_ioctl_handler_set) 1192 linux_ioctl_unregister_handler(*lihp); 1193 SET_FOREACH(ldhp, linux_device_handler_set) 1194 linux_device_unregister_handler(*ldhp); 1195 mtx_destroy(&emul_lock); 1196 sx_destroy(&emul_shared_lock); 1197 mtx_destroy(&futex_mtx); 1198 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1199 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 1200 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1201 linux_osd_jail_deregister(); 1202 if (bootverbose) 1203 printf("Linux ELF exec handler removed\n"); 1204 } else 1205 printf("Could not deinstall ELF interpreter entry\n"); 1206 break; 1207 default: 1208 return EOPNOTSUPP; 1209 } 1210 return error; 1211 } 1212 1213 static moduledata_t linux_elf_mod = { 1214 "linuxelf", 1215 linux_elf_modevent, 1216 0 1217 }; 1218 1219 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1220