1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 Søren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_emul.h> 80 #include <compat/linux/linux_futex.h> 81 #include <compat/linux/linux_ioctl.h> 82 #include <compat/linux/linux_mib.h> 83 #include <compat/linux/linux_misc.h> 84 #include <compat/linux/linux_signal.h> 85 #include <compat/linux/linux_util.h> 86 87 MODULE_VERSION(linux, 1); 88 89 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 90 91 #define AUXARGS_ENTRY_32(pos, id, val) \ 92 do { \ 93 suword32(pos++, id); \ 94 suword32(pos++, val); \ 95 } while (0) 96 97 #if BYTE_ORDER == LITTLE_ENDIAN 98 #define SHELLMAGIC 0x2123 /* #! */ 99 #else 100 #define SHELLMAGIC 0x2321 101 #endif 102 103 /* 104 * Allow the sendsig functions to use the ldebug() facility 105 * even though they are not syscalls themselves. Map them 106 * to syscall 0. This is slightly less bogus than using 107 * ldebug(sigreturn). 108 */ 109 #define LINUX_SYS_linux_rt_sendsig 0 110 #define LINUX_SYS_linux_sendsig 0 111 112 const char *linux_platform = "i686"; 113 static int linux_szplatform; 114 extern char linux_sigcode[]; 115 extern int linux_szsigcode; 116 117 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 118 119 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 120 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 121 122 static int elf_linux_fixup(register_t **stack_base, 123 struct image_params *iparams); 124 static register_t *linux_copyout_strings(struct image_params *imgp); 125 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 126 static void exec_linux_setregs(struct thread *td, 127 struct image_params *imgp, u_long stack); 128 static void linux32_fixlimit(struct rlimit *rl, int which); 129 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 130 131 static eventhandler_tag linux_exit_tag; 132 static eventhandler_tag linux_exec_tag; 133 134 /* 135 * Linux syscalls return negative errno's, we do positive and map them 136 * Reference: 137 * FreeBSD: src/sys/sys/errno.h 138 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 139 * linux-2.6.17.8/include/asm-generic/errno.h 140 */ 141 static int bsd_to_linux_errno[ELAST + 1] = { 142 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 143 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 144 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 145 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 146 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 147 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 148 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 149 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 150 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 151 -72, -67, -71 152 }; 153 154 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 155 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 156 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 157 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 158 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 159 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 160 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 161 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 162 0, LINUX_SIGUSR1, LINUX_SIGUSR2 163 }; 164 165 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 166 SIGHUP, SIGINT, SIGQUIT, SIGILL, 167 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 168 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 169 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 170 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 171 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 172 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 173 SIGIO, SIGURG, SIGSYS 174 }; 175 176 #define LINUX_T_UNKNOWN 255 177 static int _bsd_to_linux_trapcode[] = { 178 LINUX_T_UNKNOWN, /* 0 */ 179 6, /* 1 T_PRIVINFLT */ 180 LINUX_T_UNKNOWN, /* 2 */ 181 3, /* 3 T_BPTFLT */ 182 LINUX_T_UNKNOWN, /* 4 */ 183 LINUX_T_UNKNOWN, /* 5 */ 184 16, /* 6 T_ARITHTRAP */ 185 254, /* 7 T_ASTFLT */ 186 LINUX_T_UNKNOWN, /* 8 */ 187 13, /* 9 T_PROTFLT */ 188 1, /* 10 T_TRCTRAP */ 189 LINUX_T_UNKNOWN, /* 11 */ 190 14, /* 12 T_PAGEFLT */ 191 LINUX_T_UNKNOWN, /* 13 */ 192 17, /* 14 T_ALIGNFLT */ 193 LINUX_T_UNKNOWN, /* 15 */ 194 LINUX_T_UNKNOWN, /* 16 */ 195 LINUX_T_UNKNOWN, /* 17 */ 196 0, /* 18 T_DIVIDE */ 197 2, /* 19 T_NMI */ 198 4, /* 20 T_OFLOW */ 199 5, /* 21 T_BOUND */ 200 7, /* 22 T_DNA */ 201 8, /* 23 T_DOUBLEFLT */ 202 9, /* 24 T_FPOPFLT */ 203 10, /* 25 T_TSSFLT */ 204 11, /* 26 T_SEGNPFLT */ 205 12, /* 27 T_STKFLT */ 206 18, /* 28 T_MCHK */ 207 19, /* 29 T_XMMFLT */ 208 15 /* 30 T_RESERVED */ 209 }; 210 #define bsd_to_linux_trapcode(code) \ 211 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 212 _bsd_to_linux_trapcode[(code)]: \ 213 LINUX_T_UNKNOWN) 214 215 struct linux32_ps_strings { 216 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 217 u_int ps_nargvstr; /* the number of argument strings */ 218 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 219 u_int ps_nenvstr; /* the number of environment strings */ 220 }; 221 222 /* 223 * If FreeBSD & Linux have a difference of opinion about what a trap 224 * means, deal with it here. 225 * 226 * MPSAFE 227 */ 228 static int 229 translate_traps(int signal, int trap_code) 230 { 231 if (signal != SIGBUS) 232 return signal; 233 switch (trap_code) { 234 case T_PROTFLT: 235 case T_TSSFLT: 236 case T_DOUBLEFLT: 237 case T_PAGEFLT: 238 return SIGSEGV; 239 default: 240 return signal; 241 } 242 } 243 244 static int 245 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 246 { 247 Elf32_Auxargs *args; 248 Elf32_Addr *base; 249 Elf32_Addr *pos, *uplatform; 250 struct linux32_ps_strings *arginfo; 251 252 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 253 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szplatform); 254 255 KASSERT(curthread->td_proc == imgp->proc, 256 ("unsafe elf_linux_fixup(), should be curproc")); 257 base = (Elf32_Addr *)*stack_base; 258 args = (Elf32_Auxargs *)imgp->auxargs; 259 pos = base + (imgp->args->argc + imgp->args->envc + 2); 260 261 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 262 263 /* 264 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 265 * as it has appeared in the 2.4.0-rc7 first time. 266 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 267 * glibc falls back to the hard-coded CLK_TCK value when aux entry 268 * is not present. 269 * Also see linux_times() implementation. 270 */ 271 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 272 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 273 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 274 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 275 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 276 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 277 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 278 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 279 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 280 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 281 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 282 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 283 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 284 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 285 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 286 if (args->execfd != -1) 287 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 288 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 289 290 free(imgp->auxargs, M_TEMP); 291 imgp->auxargs = NULL; 292 293 base--; 294 suword32(base, (uint32_t)imgp->args->argc); 295 *stack_base = (register_t *)base; 296 return 0; 297 } 298 299 extern unsigned long linux_sznonrtsigcode; 300 301 static void 302 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 303 { 304 struct thread *td = curthread; 305 struct proc *p = td->td_proc; 306 struct sigacts *psp; 307 struct trapframe *regs; 308 struct l_rt_sigframe *fp, frame; 309 int oonstack; 310 int sig; 311 int code; 312 313 sig = ksi->ksi_signo; 314 code = ksi->ksi_code; 315 PROC_LOCK_ASSERT(p, MA_OWNED); 316 psp = p->p_sigacts; 317 mtx_assert(&psp->ps_mtx, MA_OWNED); 318 regs = td->td_frame; 319 oonstack = sigonstack(regs->tf_rsp); 320 321 #ifdef DEBUG 322 if (ldebug(rt_sendsig)) 323 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 324 catcher, sig, (void*)mask, code); 325 #endif 326 /* 327 * Allocate space for the signal handler context. 328 */ 329 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 330 SIGISMEMBER(psp->ps_sigonstack, sig)) { 331 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 332 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 333 } else 334 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 335 mtx_unlock(&psp->ps_mtx); 336 337 /* 338 * Build the argument list for the signal handler. 339 */ 340 if (p->p_sysent->sv_sigtbl) 341 if (sig <= p->p_sysent->sv_sigsize) 342 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 343 344 bzero(&frame, sizeof(frame)); 345 346 frame.sf_handler = PTROUT(catcher); 347 frame.sf_sig = sig; 348 frame.sf_siginfo = PTROUT(&fp->sf_si); 349 frame.sf_ucontext = PTROUT(&fp->sf_sc); 350 351 /* Fill in POSIX parts */ 352 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 353 354 /* 355 * Build the signal context to be used by sigreturn. 356 */ 357 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 358 frame.sf_sc.uc_link = 0; /* XXX ??? */ 359 360 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 361 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 362 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 363 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 364 PROC_UNLOCK(p); 365 366 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 367 368 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 369 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 370 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 371 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 372 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 373 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 374 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 375 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 376 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 377 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 378 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 379 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 380 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 381 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 382 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 383 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 384 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 385 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 386 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 387 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 388 389 #ifdef DEBUG 390 if (ldebug(rt_sendsig)) 391 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 392 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 393 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 394 #endif 395 396 if (copyout(&frame, fp, sizeof(frame)) != 0) { 397 /* 398 * Process has trashed its stack; give it an illegal 399 * instruction to halt it in its tracks. 400 */ 401 #ifdef DEBUG 402 if (ldebug(rt_sendsig)) 403 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 404 fp, oonstack); 405 #endif 406 PROC_LOCK(p); 407 sigexit(td, SIGILL); 408 } 409 410 /* 411 * Build context to run handler in. 412 */ 413 regs->tf_rsp = PTROUT(fp); 414 regs->tf_rip = p->p_sysent->sv_sigcode_base + linux_sznonrtsigcode; 415 regs->tf_rflags &= ~(PSL_T | PSL_D); 416 regs->tf_cs = _ucode32sel; 417 regs->tf_ss = _udatasel; 418 regs->tf_ds = _udatasel; 419 regs->tf_es = _udatasel; 420 regs->tf_fs = _ufssel; 421 regs->tf_gs = _ugssel; 422 regs->tf_flags = TF_HASSEGS; 423 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 424 PROC_LOCK(p); 425 mtx_lock(&psp->ps_mtx); 426 } 427 428 429 /* 430 * Send an interrupt to process. 431 * 432 * Stack is set up to allow sigcode stored 433 * in u. to call routine, followed by kcall 434 * to sigreturn routine below. After sigreturn 435 * resets the signal mask, the stack, and the 436 * frame pointer, it returns to the user 437 * specified pc, psl. 438 */ 439 static void 440 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 441 { 442 struct thread *td = curthread; 443 struct proc *p = td->td_proc; 444 struct sigacts *psp; 445 struct trapframe *regs; 446 struct l_sigframe *fp, frame; 447 l_sigset_t lmask; 448 int oonstack, i; 449 int sig, code; 450 451 sig = ksi->ksi_signo; 452 code = ksi->ksi_code; 453 PROC_LOCK_ASSERT(p, MA_OWNED); 454 psp = p->p_sigacts; 455 mtx_assert(&psp->ps_mtx, MA_OWNED); 456 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 457 /* Signal handler installed with SA_SIGINFO. */ 458 linux_rt_sendsig(catcher, ksi, mask); 459 return; 460 } 461 462 regs = td->td_frame; 463 oonstack = sigonstack(regs->tf_rsp); 464 465 #ifdef DEBUG 466 if (ldebug(sendsig)) 467 printf(ARGS(sendsig, "%p, %d, %p, %u"), 468 catcher, sig, (void*)mask, code); 469 #endif 470 471 /* 472 * Allocate space for the signal handler context. 473 */ 474 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 475 SIGISMEMBER(psp->ps_sigonstack, sig)) { 476 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 477 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 478 } else 479 fp = (struct l_sigframe *)regs->tf_rsp - 1; 480 mtx_unlock(&psp->ps_mtx); 481 PROC_UNLOCK(p); 482 483 /* 484 * Build the argument list for the signal handler. 485 */ 486 if (p->p_sysent->sv_sigtbl) 487 if (sig <= p->p_sysent->sv_sigsize) 488 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 489 490 bzero(&frame, sizeof(frame)); 491 492 frame.sf_handler = PTROUT(catcher); 493 frame.sf_sig = sig; 494 495 bsd_to_linux_sigset(mask, &lmask); 496 497 /* 498 * Build the signal context to be used by sigreturn. 499 */ 500 frame.sf_sc.sc_mask = lmask.__bits[0]; 501 frame.sf_sc.sc_gs = regs->tf_gs; 502 frame.sf_sc.sc_fs = regs->tf_fs; 503 frame.sf_sc.sc_es = regs->tf_es; 504 frame.sf_sc.sc_ds = regs->tf_ds; 505 frame.sf_sc.sc_edi = regs->tf_rdi; 506 frame.sf_sc.sc_esi = regs->tf_rsi; 507 frame.sf_sc.sc_ebp = regs->tf_rbp; 508 frame.sf_sc.sc_ebx = regs->tf_rbx; 509 frame.sf_sc.sc_edx = regs->tf_rdx; 510 frame.sf_sc.sc_ecx = regs->tf_rcx; 511 frame.sf_sc.sc_eax = regs->tf_rax; 512 frame.sf_sc.sc_eip = regs->tf_rip; 513 frame.sf_sc.sc_cs = regs->tf_cs; 514 frame.sf_sc.sc_eflags = regs->tf_rflags; 515 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 516 frame.sf_sc.sc_ss = regs->tf_ss; 517 frame.sf_sc.sc_err = regs->tf_err; 518 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 519 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 520 521 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 522 frame.sf_extramask[i] = lmask.__bits[i+1]; 523 524 if (copyout(&frame, fp, sizeof(frame)) != 0) { 525 /* 526 * Process has trashed its stack; give it an illegal 527 * instruction to halt it in its tracks. 528 */ 529 PROC_LOCK(p); 530 sigexit(td, SIGILL); 531 } 532 533 /* 534 * Build context to run handler in. 535 */ 536 regs->tf_rsp = PTROUT(fp); 537 regs->tf_rip = p->p_sysent->sv_sigcode_base; 538 regs->tf_rflags &= ~(PSL_T | PSL_D); 539 regs->tf_cs = _ucode32sel; 540 regs->tf_ss = _udatasel; 541 regs->tf_ds = _udatasel; 542 regs->tf_es = _udatasel; 543 regs->tf_fs = _ufssel; 544 regs->tf_gs = _ugssel; 545 regs->tf_flags = TF_HASSEGS; 546 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 547 PROC_LOCK(p); 548 mtx_lock(&psp->ps_mtx); 549 } 550 551 /* 552 * System call to cleanup state after a signal 553 * has been taken. Reset signal mask and 554 * stack state from context left by sendsig (above). 555 * Return to previous pc and psl as specified by 556 * context left by sendsig. Check carefully to 557 * make sure that the user has not modified the 558 * psl to gain improper privileges or to cause 559 * a machine fault. 560 */ 561 int 562 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 563 { 564 struct l_sigframe frame; 565 struct trapframe *regs; 566 sigset_t bmask; 567 l_sigset_t lmask; 568 int eflags, i; 569 ksiginfo_t ksi; 570 571 regs = td->td_frame; 572 573 #ifdef DEBUG 574 if (ldebug(sigreturn)) 575 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 576 #endif 577 /* 578 * The trampoline code hands us the sigframe. 579 * It is unsafe to keep track of it ourselves, in the event that a 580 * program jumps out of a signal handler. 581 */ 582 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 583 return (EFAULT); 584 585 /* 586 * Check for security violations. 587 */ 588 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 589 eflags = frame.sf_sc.sc_eflags; 590 /* 591 * XXX do allow users to change the privileged flag PSL_RF. The 592 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 593 * sometimes set it there too. tf_eflags is kept in the signal 594 * context during signal handling and there is no other place 595 * to remember it, so the PSL_RF bit may be corrupted by the 596 * signal handler without us knowing. Corruption of the PSL_RF 597 * bit at worst causes one more or one less debugger trap, so 598 * allowing it is fairly harmless. 599 */ 600 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 601 return(EINVAL); 602 603 /* 604 * Don't allow users to load a valid privileged %cs. Let the 605 * hardware check for invalid selectors, excess privilege in 606 * other selectors, invalid %eip's and invalid %esp's. 607 */ 608 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 609 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 610 ksiginfo_init_trap(&ksi); 611 ksi.ksi_signo = SIGBUS; 612 ksi.ksi_code = BUS_OBJERR; 613 ksi.ksi_trapno = T_PROTFLT; 614 ksi.ksi_addr = (void *)regs->tf_rip; 615 trapsignal(td, &ksi); 616 return(EINVAL); 617 } 618 619 lmask.__bits[0] = frame.sf_sc.sc_mask; 620 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 621 lmask.__bits[i+1] = frame.sf_extramask[i]; 622 linux_to_bsd_sigset(&lmask, &bmask); 623 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 624 625 /* 626 * Restore signal context. 627 */ 628 regs->tf_rdi = frame.sf_sc.sc_edi; 629 regs->tf_rsi = frame.sf_sc.sc_esi; 630 regs->tf_rbp = frame.sf_sc.sc_ebp; 631 regs->tf_rbx = frame.sf_sc.sc_ebx; 632 regs->tf_rdx = frame.sf_sc.sc_edx; 633 regs->tf_rcx = frame.sf_sc.sc_ecx; 634 regs->tf_rax = frame.sf_sc.sc_eax; 635 regs->tf_rip = frame.sf_sc.sc_eip; 636 regs->tf_cs = frame.sf_sc.sc_cs; 637 regs->tf_ds = frame.sf_sc.sc_ds; 638 regs->tf_es = frame.sf_sc.sc_es; 639 regs->tf_fs = frame.sf_sc.sc_fs; 640 regs->tf_gs = frame.sf_sc.sc_gs; 641 regs->tf_rflags = eflags; 642 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 643 regs->tf_ss = frame.sf_sc.sc_ss; 644 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 645 646 return (EJUSTRETURN); 647 } 648 649 /* 650 * System call to cleanup state after a signal 651 * has been taken. Reset signal mask and 652 * stack state from context left by rt_sendsig (above). 653 * Return to previous pc and psl as specified by 654 * context left by sendsig. Check carefully to 655 * make sure that the user has not modified the 656 * psl to gain improper privileges or to cause 657 * a machine fault. 658 */ 659 int 660 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 661 { 662 struct l_ucontext uc; 663 struct l_sigcontext *context; 664 sigset_t bmask; 665 l_stack_t *lss; 666 stack_t ss; 667 struct trapframe *regs; 668 int eflags; 669 ksiginfo_t ksi; 670 671 regs = td->td_frame; 672 673 #ifdef DEBUG 674 if (ldebug(rt_sigreturn)) 675 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 676 #endif 677 /* 678 * The trampoline code hands us the ucontext. 679 * It is unsafe to keep track of it ourselves, in the event that a 680 * program jumps out of a signal handler. 681 */ 682 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 683 return (EFAULT); 684 685 context = &uc.uc_mcontext; 686 687 /* 688 * Check for security violations. 689 */ 690 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 691 eflags = context->sc_eflags; 692 /* 693 * XXX do allow users to change the privileged flag PSL_RF. The 694 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 695 * sometimes set it there too. tf_eflags is kept in the signal 696 * context during signal handling and there is no other place 697 * to remember it, so the PSL_RF bit may be corrupted by the 698 * signal handler without us knowing. Corruption of the PSL_RF 699 * bit at worst causes one more or one less debugger trap, so 700 * allowing it is fairly harmless. 701 */ 702 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 703 return(EINVAL); 704 705 /* 706 * Don't allow users to load a valid privileged %cs. Let the 707 * hardware check for invalid selectors, excess privilege in 708 * other selectors, invalid %eip's and invalid %esp's. 709 */ 710 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 711 if (!CS_SECURE(context->sc_cs)) { 712 ksiginfo_init_trap(&ksi); 713 ksi.ksi_signo = SIGBUS; 714 ksi.ksi_code = BUS_OBJERR; 715 ksi.ksi_trapno = T_PROTFLT; 716 ksi.ksi_addr = (void *)regs->tf_rip; 717 trapsignal(td, &ksi); 718 return(EINVAL); 719 } 720 721 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 722 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 723 724 /* 725 * Restore signal context 726 */ 727 regs->tf_gs = context->sc_gs; 728 regs->tf_fs = context->sc_fs; 729 regs->tf_es = context->sc_es; 730 regs->tf_ds = context->sc_ds; 731 regs->tf_rdi = context->sc_edi; 732 regs->tf_rsi = context->sc_esi; 733 regs->tf_rbp = context->sc_ebp; 734 regs->tf_rbx = context->sc_ebx; 735 regs->tf_rdx = context->sc_edx; 736 regs->tf_rcx = context->sc_ecx; 737 regs->tf_rax = context->sc_eax; 738 regs->tf_rip = context->sc_eip; 739 regs->tf_cs = context->sc_cs; 740 regs->tf_rflags = eflags; 741 regs->tf_rsp = context->sc_esp_at_signal; 742 regs->tf_ss = context->sc_ss; 743 set_pcb_flags(td->td_pcb, PCB_FULL_IRET); 744 745 /* 746 * call sigaltstack & ignore results.. 747 */ 748 lss = &uc.uc_stack; 749 ss.ss_sp = PTRIN(lss->ss_sp); 750 ss.ss_size = lss->ss_size; 751 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 752 753 #ifdef DEBUG 754 if (ldebug(rt_sigreturn)) 755 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 756 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 757 #endif 758 (void)kern_sigaltstack(td, &ss, NULL); 759 760 return (EJUSTRETURN); 761 } 762 763 static int 764 linux32_fetch_syscall_args(struct thread *td, struct syscall_args *sa) 765 { 766 struct proc *p; 767 struct trapframe *frame; 768 769 p = td->td_proc; 770 frame = td->td_frame; 771 772 sa->args[0] = frame->tf_rbx; 773 sa->args[1] = frame->tf_rcx; 774 sa->args[2] = frame->tf_rdx; 775 sa->args[3] = frame->tf_rsi; 776 sa->args[4] = frame->tf_rdi; 777 sa->args[5] = frame->tf_rbp; /* Unconfirmed */ 778 sa->code = frame->tf_rax; 779 780 if (sa->code >= p->p_sysent->sv_size) 781 sa->callp = &p->p_sysent->sv_table[0]; 782 else 783 sa->callp = &p->p_sysent->sv_table[sa->code]; 784 sa->narg = sa->callp->sy_narg; 785 786 td->td_retval[0] = 0; 787 td->td_retval[1] = frame->tf_rdx; 788 789 return (0); 790 } 791 792 /* 793 * If a linux binary is exec'ing something, try this image activator 794 * first. We override standard shell script execution in order to 795 * be able to modify the interpreter path. We only do this if a linux 796 * binary is doing the exec, so we do not create an EXEC module for it. 797 */ 798 static int exec_linux_imgact_try(struct image_params *iparams); 799 800 static int 801 exec_linux_imgact_try(struct image_params *imgp) 802 { 803 const char *head = (const char *)imgp->image_header; 804 char *rpath; 805 int error = -1; 806 807 /* 808 * The interpreter for shell scripts run from a linux binary needs 809 * to be located in /compat/linux if possible in order to recursively 810 * maintain linux path emulation. 811 */ 812 if (((const short *)head)[0] == SHELLMAGIC) { 813 /* 814 * Run our normal shell image activator. If it succeeds attempt 815 * to use the alternate path for the interpreter. If an 816 * alternate * path is found, use our stringspace to store it. 817 */ 818 if ((error = exec_shell_imgact(imgp)) == 0) { 819 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 820 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 821 AT_FDCWD); 822 if (rpath != NULL) 823 imgp->args->fname_buf = 824 imgp->interpreter_name = rpath; 825 } 826 } 827 return (error); 828 } 829 830 /* 831 * Clear registers on exec 832 * XXX copied from ia32_signal.c. 833 */ 834 static void 835 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 836 { 837 struct trapframe *regs = td->td_frame; 838 struct pcb *pcb = td->td_pcb; 839 840 mtx_lock(&dt_lock); 841 if (td->td_proc->p_md.md_ldt != NULL) 842 user_ldt_free(td); 843 else 844 mtx_unlock(&dt_lock); 845 846 critical_enter(); 847 wrmsr(MSR_FSBASE, 0); 848 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 849 pcb->pcb_fsbase = 0; 850 pcb->pcb_gsbase = 0; 851 critical_exit(); 852 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 853 854 bzero((char *)regs, sizeof(struct trapframe)); 855 regs->tf_rip = imgp->entry_addr; 856 regs->tf_rsp = stack; 857 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 858 regs->tf_gs = _ugssel; 859 regs->tf_fs = _ufssel; 860 regs->tf_es = _udatasel; 861 regs->tf_ds = _udatasel; 862 regs->tf_ss = _udatasel; 863 regs->tf_flags = TF_HASSEGS; 864 regs->tf_cs = _ucode32sel; 865 regs->tf_rbx = imgp->ps_strings; 866 867 fpstate_drop(td); 868 869 /* Do full restore on return so that we can change to a different %cs */ 870 set_pcb_flags(pcb, PCB_32BIT | PCB_FULL_IRET); 871 clear_pcb_flags(pcb, PCB_GS32BIT); 872 td->td_retval[1] = 0; 873 } 874 875 /* 876 * XXX copied from ia32_sysvec.c. 877 */ 878 static register_t * 879 linux_copyout_strings(struct image_params *imgp) 880 { 881 int argc, envc; 882 u_int32_t *vectp; 883 char *stringp, *destp; 884 u_int32_t *stack_base; 885 struct linux32_ps_strings *arginfo; 886 887 /* 888 * Calculate string base and vector table pointers. 889 * Also deal with signal trampoline code for this exec type. 890 */ 891 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 892 destp = (caddr_t)arginfo - SPARE_USRSPACE - linux_szplatform - 893 roundup((ARG_MAX - imgp->args->stringspace), 894 sizeof(char *)); 895 896 /* 897 * Install LINUX_PLATFORM 898 */ 899 copyout(linux_platform, ((caddr_t)arginfo - linux_szplatform), 900 linux_szplatform); 901 902 /* 903 * If we have a valid auxargs ptr, prepare some room 904 * on the stack. 905 */ 906 if (imgp->auxargs) { 907 /* 908 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 909 * lower compatibility. 910 */ 911 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 912 (LINUX_AT_COUNT * 2); 913 /* 914 * The '+ 2' is for the null pointers at the end of each of 915 * the arg and env vector sets,and imgp->auxarg_size is room 916 * for argument of Runtime loader. 917 */ 918 vectp = (u_int32_t *) (destp - (imgp->args->argc + 919 imgp->args->envc + 2 + imgp->auxarg_size) * 920 sizeof(u_int32_t)); 921 922 } else 923 /* 924 * The '+ 2' is for the null pointers at the end of each of 925 * the arg and env vector sets 926 */ 927 vectp = (u_int32_t *)(destp - (imgp->args->argc + 928 imgp->args->envc + 2) * sizeof(u_int32_t)); 929 930 /* 931 * vectp also becomes our initial stack base 932 */ 933 stack_base = vectp; 934 935 stringp = imgp->args->begin_argv; 936 argc = imgp->args->argc; 937 envc = imgp->args->envc; 938 /* 939 * Copy out strings - arguments and environment. 940 */ 941 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 942 943 /* 944 * Fill in "ps_strings" struct for ps, w, etc. 945 */ 946 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 947 suword32(&arginfo->ps_nargvstr, argc); 948 949 /* 950 * Fill in argument portion of vector table. 951 */ 952 for (; argc > 0; --argc) { 953 suword32(vectp++, (uint32_t)(intptr_t)destp); 954 while (*stringp++ != 0) 955 destp++; 956 destp++; 957 } 958 959 /* a null vector table pointer separates the argp's from the envp's */ 960 suword32(vectp++, 0); 961 962 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 963 suword32(&arginfo->ps_nenvstr, envc); 964 965 /* 966 * Fill in environment portion of vector table. 967 */ 968 for (; envc > 0; --envc) { 969 suword32(vectp++, (uint32_t)(intptr_t)destp); 970 while (*stringp++ != 0) 971 destp++; 972 destp++; 973 } 974 975 /* end of vector table is a null pointer */ 976 suword32(vectp, 0); 977 978 return ((register_t *)stack_base); 979 } 980 981 static SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 982 "32-bit Linux emulation"); 983 984 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 985 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 986 &linux32_maxdsiz, 0, ""); 987 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 988 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 989 &linux32_maxssiz, 0, ""); 990 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 991 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 992 &linux32_maxvmem, 0, ""); 993 994 static void 995 linux32_fixlimit(struct rlimit *rl, int which) 996 { 997 998 switch (which) { 999 case RLIMIT_DATA: 1000 if (linux32_maxdsiz != 0) { 1001 if (rl->rlim_cur > linux32_maxdsiz) 1002 rl->rlim_cur = linux32_maxdsiz; 1003 if (rl->rlim_max > linux32_maxdsiz) 1004 rl->rlim_max = linux32_maxdsiz; 1005 } 1006 break; 1007 case RLIMIT_STACK: 1008 if (linux32_maxssiz != 0) { 1009 if (rl->rlim_cur > linux32_maxssiz) 1010 rl->rlim_cur = linux32_maxssiz; 1011 if (rl->rlim_max > linux32_maxssiz) 1012 rl->rlim_max = linux32_maxssiz; 1013 } 1014 break; 1015 case RLIMIT_VMEM: 1016 if (linux32_maxvmem != 0) { 1017 if (rl->rlim_cur > linux32_maxvmem) 1018 rl->rlim_cur = linux32_maxvmem; 1019 if (rl->rlim_max > linux32_maxvmem) 1020 rl->rlim_max = linux32_maxvmem; 1021 } 1022 break; 1023 } 1024 } 1025 1026 struct sysentvec elf_linux_sysvec = { 1027 .sv_size = LINUX_SYS_MAXSYSCALL, 1028 .sv_table = linux_sysent, 1029 .sv_mask = 0, 1030 .sv_sigsize = LINUX_SIGTBLSZ, 1031 .sv_sigtbl = bsd_to_linux_signal, 1032 .sv_errsize = ELAST + 1, 1033 .sv_errtbl = bsd_to_linux_errno, 1034 .sv_transtrap = translate_traps, 1035 .sv_fixup = elf_linux_fixup, 1036 .sv_sendsig = linux_sendsig, 1037 .sv_sigcode = linux_sigcode, 1038 .sv_szsigcode = &linux_szsigcode, 1039 .sv_prepsyscall = NULL, 1040 .sv_name = "Linux ELF32", 1041 .sv_coredump = elf32_coredump, 1042 .sv_imgact_try = exec_linux_imgact_try, 1043 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1044 .sv_pagesize = PAGE_SIZE, 1045 .sv_minuser = VM_MIN_ADDRESS, 1046 .sv_maxuser = LINUX32_MAXUSER, 1047 .sv_usrstack = LINUX32_USRSTACK, 1048 .sv_psstrings = LINUX32_PS_STRINGS, 1049 .sv_stackprot = VM_PROT_ALL, 1050 .sv_copyout_strings = linux_copyout_strings, 1051 .sv_setregs = exec_linux_setregs, 1052 .sv_fixlimit = linux32_fixlimit, 1053 .sv_maxssiz = &linux32_maxssiz, 1054 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 | SV_SHP, 1055 .sv_set_syscall_retval = cpu_set_syscall_retval, 1056 .sv_fetch_syscall_args = linux32_fetch_syscall_args, 1057 .sv_syscallnames = NULL, 1058 .sv_shared_page_base = LINUX32_SHAREDPAGE, 1059 .sv_shared_page_len = PAGE_SIZE, 1060 .sv_schedtail = linux_schedtail, 1061 }; 1062 INIT_SYSENTVEC(elf_sysvec, &elf_linux_sysvec); 1063 1064 static char GNU_ABI_VENDOR[] = "GNU"; 1065 static int GNULINUX_ABI_DESC = 0; 1066 1067 static boolean_t 1068 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1069 { 1070 const Elf32_Word *desc; 1071 uintptr_t p; 1072 1073 p = (uintptr_t)(note + 1); 1074 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1075 1076 desc = (const Elf32_Word *)p; 1077 if (desc[0] != GNULINUX_ABI_DESC) 1078 return (FALSE); 1079 1080 /* 1081 * For linux we encode osrel as follows (see linux_mib.c): 1082 * VVVMMMIII (version, major, minor), see linux_mib.c. 1083 */ 1084 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1085 1086 return (TRUE); 1087 } 1088 1089 static Elf_Brandnote linux32_brandnote = { 1090 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1091 .hdr.n_descsz = 16, /* XXX at least 16 */ 1092 .hdr.n_type = 1, 1093 .vendor = GNU_ABI_VENDOR, 1094 .flags = BN_TRANSLATE_OSREL, 1095 .trans_osrel = linux32_trans_osrel 1096 }; 1097 1098 static Elf32_Brandinfo linux_brand = { 1099 .brand = ELFOSABI_LINUX, 1100 .machine = EM_386, 1101 .compat_3_brand = "Linux", 1102 .emul_path = "/compat/linux", 1103 .interp_path = "/lib/ld-linux.so.1", 1104 .sysvec = &elf_linux_sysvec, 1105 .interp_newpath = NULL, 1106 .brand_note = &linux32_brandnote, 1107 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1108 }; 1109 1110 static Elf32_Brandinfo linux_glibc2brand = { 1111 .brand = ELFOSABI_LINUX, 1112 .machine = EM_386, 1113 .compat_3_brand = "Linux", 1114 .emul_path = "/compat/linux", 1115 .interp_path = "/lib/ld-linux.so.2", 1116 .sysvec = &elf_linux_sysvec, 1117 .interp_newpath = NULL, 1118 .brand_note = &linux32_brandnote, 1119 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1120 }; 1121 1122 Elf32_Brandinfo *linux_brandlist[] = { 1123 &linux_brand, 1124 &linux_glibc2brand, 1125 NULL 1126 }; 1127 1128 static int 1129 linux_elf_modevent(module_t mod, int type, void *data) 1130 { 1131 Elf32_Brandinfo **brandinfo; 1132 int error; 1133 struct linux_ioctl_handler **lihp; 1134 struct linux_device_handler **ldhp; 1135 1136 error = 0; 1137 1138 switch(type) { 1139 case MOD_LOAD: 1140 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1141 ++brandinfo) 1142 if (elf32_insert_brand_entry(*brandinfo) < 0) 1143 error = EINVAL; 1144 if (error == 0) { 1145 SET_FOREACH(lihp, linux_ioctl_handler_set) 1146 linux_ioctl_register_handler(*lihp); 1147 SET_FOREACH(ldhp, linux_device_handler_set) 1148 linux_device_register_handler(*ldhp); 1149 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 1150 sx_init(&emul_shared_lock, "emuldata->shared lock"); 1151 LIST_INIT(&futex_list); 1152 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1153 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1154 linux_proc_exit, NULL, 1000); 1155 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1156 linux_proc_exec, NULL, 1000); 1157 linux_szplatform = roundup(strlen(linux_platform) + 1, 1158 sizeof(char *)); 1159 linux_osd_jail_register(); 1160 stclohz = (stathz ? stathz : hz); 1161 if (bootverbose) 1162 printf("Linux ELF exec handler installed\n"); 1163 } else 1164 printf("cannot insert Linux ELF brand handler\n"); 1165 break; 1166 case MOD_UNLOAD: 1167 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1168 ++brandinfo) 1169 if (elf32_brand_inuse(*brandinfo)) 1170 error = EBUSY; 1171 if (error == 0) { 1172 for (brandinfo = &linux_brandlist[0]; 1173 *brandinfo != NULL; ++brandinfo) 1174 if (elf32_remove_brand_entry(*brandinfo) < 0) 1175 error = EINVAL; 1176 } 1177 if (error == 0) { 1178 SET_FOREACH(lihp, linux_ioctl_handler_set) 1179 linux_ioctl_unregister_handler(*lihp); 1180 SET_FOREACH(ldhp, linux_device_handler_set) 1181 linux_device_unregister_handler(*ldhp); 1182 mtx_destroy(&emul_lock); 1183 sx_destroy(&emul_shared_lock); 1184 mtx_destroy(&futex_mtx); 1185 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1186 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1187 linux_osd_jail_deregister(); 1188 if (bootverbose) 1189 printf("Linux ELF exec handler removed\n"); 1190 } else 1191 printf("Could not deinstall ELF interpreter entry\n"); 1192 break; 1193 default: 1194 return EOPNOTSUPP; 1195 } 1196 return error; 1197 } 1198 1199 static moduledata_t linux_elf_mod = { 1200 "linuxelf", 1201 linux_elf_modevent, 1202 0 1203 }; 1204 1205 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1206