1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2003 Peter Wemm 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1998-1999 Andrew Gallatin 6 * Copyright (c) 1994-1996 S�ren Schmidt 7 * All rights reserved. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in this position and unchanged. 15 * 2. Redistributions in binary form must reproduce the above copyright 16 * notice, this list of conditions and the following disclaimer in the 17 * documentation and/or other materials provided with the distribution. 18 * 3. The name of the author may not be used to endorse or promote products 19 * derived from this software without specific prior written permission 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 22 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 23 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 24 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 25 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 26 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 30 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 #include "opt_compat.h" 36 37 #ifndef COMPAT_FREEBSD32 38 #error "Unable to compile Linux-emulator due to missing COMPAT_FREEBSD32 option!" 39 #endif 40 41 #define __ELF_WORD_SIZE 32 42 43 #include <sys/param.h> 44 #include <sys/systm.h> 45 #include <sys/exec.h> 46 #include <sys/fcntl.h> 47 #include <sys/imgact.h> 48 #include <sys/imgact_elf.h> 49 #include <sys/kernel.h> 50 #include <sys/lock.h> 51 #include <sys/malloc.h> 52 #include <sys/module.h> 53 #include <sys/mutex.h> 54 #include <sys/proc.h> 55 #include <sys/resourcevar.h> 56 #include <sys/signalvar.h> 57 #include <sys/sysctl.h> 58 #include <sys/syscallsubr.h> 59 #include <sys/sysent.h> 60 #include <sys/sysproto.h> 61 #include <sys/vnode.h> 62 #include <sys/eventhandler.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_map.h> 68 #include <vm/vm_object.h> 69 #include <vm/vm_page.h> 70 #include <vm/vm_param.h> 71 72 #include <machine/cpu.h> 73 #include <machine/md_var.h> 74 #include <machine/pcb.h> 75 #include <machine/specialreg.h> 76 77 #include <amd64/linux32/linux.h> 78 #include <amd64/linux32/linux32_proto.h> 79 #include <compat/linux/linux_futex.h> 80 #include <compat/linux/linux_emul.h> 81 #include <compat/linux/linux_mib.h> 82 #include <compat/linux/linux_misc.h> 83 #include <compat/linux/linux_signal.h> 84 #include <compat/linux/linux_util.h> 85 86 MODULE_VERSION(linux, 1); 87 88 MALLOC_DEFINE(M_LINUX, "linux", "Linux mode structures"); 89 90 #define AUXARGS_ENTRY_32(pos, id, val) \ 91 do { \ 92 suword32(pos++, id); \ 93 suword32(pos++, val); \ 94 } while (0) 95 96 #if BYTE_ORDER == LITTLE_ENDIAN 97 #define SHELLMAGIC 0x2123 /* #! */ 98 #else 99 #define SHELLMAGIC 0x2321 100 #endif 101 102 /* 103 * Allow the sendsig functions to use the ldebug() facility 104 * even though they are not syscalls themselves. Map them 105 * to syscall 0. This is slightly less bogus than using 106 * ldebug(sigreturn). 107 */ 108 #define LINUX_SYS_linux_rt_sendsig 0 109 #define LINUX_SYS_linux_sendsig 0 110 111 const char *linux_platform = "i686"; 112 static int linux_szplatform; 113 extern char linux_sigcode[]; 114 extern int linux_szsigcode; 115 116 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 117 118 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 119 SET_DECLARE(linux_device_handler_set, struct linux_device_handler); 120 121 static int elf_linux_fixup(register_t **stack_base, 122 struct image_params *iparams); 123 static register_t *linux_copyout_strings(struct image_params *imgp); 124 static void linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, 125 caddr_t *params); 126 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 127 static void exec_linux_setregs(struct thread *td, 128 struct image_params *imgp, u_long stack); 129 static void linux32_fixlimit(struct rlimit *rl, int which); 130 static boolean_t linux32_trans_osrel(const Elf_Note *note, int32_t *osrel); 131 132 static eventhandler_tag linux_exit_tag; 133 static eventhandler_tag linux_schedtail_tag; 134 static eventhandler_tag linux_exec_tag; 135 136 /* 137 * Linux syscalls return negative errno's, we do positive and map them 138 * Reference: 139 * FreeBSD: src/sys/sys/errno.h 140 * Linux: linux-2.6.17.8/include/asm-generic/errno-base.h 141 * linux-2.6.17.8/include/asm-generic/errno.h 142 */ 143 static int bsd_to_linux_errno[ELAST + 1] = { 144 -0, -1, -2, -3, -4, -5, -6, -7, -8, -9, 145 -10, -35, -12, -13, -14, -15, -16, -17, -18, -19, 146 -20, -21, -22, -23, -24, -25, -26, -27, -28, -29, 147 -30, -31, -32, -33, -34, -11,-115,-114, -88, -89, 148 -90, -91, -92, -93, -94, -95, -96, -97, -98, -99, 149 -100,-101,-102,-103,-104,-105,-106,-107,-108,-109, 150 -110,-111, -40, -36,-112,-113, -39, -11, -87,-122, 151 -116, -66, -6, -6, -6, -6, -6, -37, -38, -9, 152 -6, -6, -43, -42, -75,-125, -84, -95, -16, -74, 153 -72, -67, -71 154 }; 155 156 int bsd_to_linux_signal[LINUX_SIGTBLSZ] = { 157 LINUX_SIGHUP, LINUX_SIGINT, LINUX_SIGQUIT, LINUX_SIGILL, 158 LINUX_SIGTRAP, LINUX_SIGABRT, 0, LINUX_SIGFPE, 159 LINUX_SIGKILL, LINUX_SIGBUS, LINUX_SIGSEGV, LINUX_SIGSYS, 160 LINUX_SIGPIPE, LINUX_SIGALRM, LINUX_SIGTERM, LINUX_SIGURG, 161 LINUX_SIGSTOP, LINUX_SIGTSTP, LINUX_SIGCONT, LINUX_SIGCHLD, 162 LINUX_SIGTTIN, LINUX_SIGTTOU, LINUX_SIGIO, LINUX_SIGXCPU, 163 LINUX_SIGXFSZ, LINUX_SIGVTALRM, LINUX_SIGPROF, LINUX_SIGWINCH, 164 0, LINUX_SIGUSR1, LINUX_SIGUSR2 165 }; 166 167 int linux_to_bsd_signal[LINUX_SIGTBLSZ] = { 168 SIGHUP, SIGINT, SIGQUIT, SIGILL, 169 SIGTRAP, SIGABRT, SIGBUS, SIGFPE, 170 SIGKILL, SIGUSR1, SIGSEGV, SIGUSR2, 171 SIGPIPE, SIGALRM, SIGTERM, SIGBUS, 172 SIGCHLD, SIGCONT, SIGSTOP, SIGTSTP, 173 SIGTTIN, SIGTTOU, SIGURG, SIGXCPU, 174 SIGXFSZ, SIGVTALRM, SIGPROF, SIGWINCH, 175 SIGIO, SIGURG, SIGSYS 176 }; 177 178 #define LINUX_T_UNKNOWN 255 179 static int _bsd_to_linux_trapcode[] = { 180 LINUX_T_UNKNOWN, /* 0 */ 181 6, /* 1 T_PRIVINFLT */ 182 LINUX_T_UNKNOWN, /* 2 */ 183 3, /* 3 T_BPTFLT */ 184 LINUX_T_UNKNOWN, /* 4 */ 185 LINUX_T_UNKNOWN, /* 5 */ 186 16, /* 6 T_ARITHTRAP */ 187 254, /* 7 T_ASTFLT */ 188 LINUX_T_UNKNOWN, /* 8 */ 189 13, /* 9 T_PROTFLT */ 190 1, /* 10 T_TRCTRAP */ 191 LINUX_T_UNKNOWN, /* 11 */ 192 14, /* 12 T_PAGEFLT */ 193 LINUX_T_UNKNOWN, /* 13 */ 194 17, /* 14 T_ALIGNFLT */ 195 LINUX_T_UNKNOWN, /* 15 */ 196 LINUX_T_UNKNOWN, /* 16 */ 197 LINUX_T_UNKNOWN, /* 17 */ 198 0, /* 18 T_DIVIDE */ 199 2, /* 19 T_NMI */ 200 4, /* 20 T_OFLOW */ 201 5, /* 21 T_BOUND */ 202 7, /* 22 T_DNA */ 203 8, /* 23 T_DOUBLEFLT */ 204 9, /* 24 T_FPOPFLT */ 205 10, /* 25 T_TSSFLT */ 206 11, /* 26 T_SEGNPFLT */ 207 12, /* 27 T_STKFLT */ 208 18, /* 28 T_MCHK */ 209 19, /* 29 T_XMMFLT */ 210 15 /* 30 T_RESERVED */ 211 }; 212 #define bsd_to_linux_trapcode(code) \ 213 ((code)<sizeof(_bsd_to_linux_trapcode)/sizeof(*_bsd_to_linux_trapcode)? \ 214 _bsd_to_linux_trapcode[(code)]: \ 215 LINUX_T_UNKNOWN) 216 217 struct linux32_ps_strings { 218 u_int32_t ps_argvstr; /* first of 0 or more argument strings */ 219 u_int ps_nargvstr; /* the number of argument strings */ 220 u_int32_t ps_envstr; /* first of 0 or more environment strings */ 221 u_int ps_nenvstr; /* the number of environment strings */ 222 }; 223 224 /* 225 * If FreeBSD & Linux have a difference of opinion about what a trap 226 * means, deal with it here. 227 * 228 * MPSAFE 229 */ 230 static int 231 translate_traps(int signal, int trap_code) 232 { 233 if (signal != SIGBUS) 234 return signal; 235 switch (trap_code) { 236 case T_PROTFLT: 237 case T_TSSFLT: 238 case T_DOUBLEFLT: 239 case T_PAGEFLT: 240 return SIGSEGV; 241 default: 242 return signal; 243 } 244 } 245 246 static int 247 elf_linux_fixup(register_t **stack_base, struct image_params *imgp) 248 { 249 Elf32_Auxargs *args; 250 Elf32_Addr *base; 251 Elf32_Addr *pos, *uplatform; 252 struct linux32_ps_strings *arginfo; 253 254 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 255 uplatform = (Elf32_Addr *)((caddr_t)arginfo - linux_szsigcode - 256 linux_szplatform); 257 258 KASSERT(curthread->td_proc == imgp->proc, 259 ("unsafe elf_linux_fixup(), should be curproc")); 260 base = (Elf32_Addr *)*stack_base; 261 args = (Elf32_Auxargs *)imgp->auxargs; 262 pos = base + (imgp->args->argc + imgp->args->envc + 2); 263 264 AUXARGS_ENTRY_32(pos, LINUX_AT_HWCAP, cpu_feature); 265 266 /* 267 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 268 * as it has appeared in the 2.4.0-rc7 first time. 269 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 270 * glibc falls back to the hard-coded CLK_TCK value when aux entry 271 * is not present. 272 * Also see linux_times() implementation. 273 */ 274 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 275 AUXARGS_ENTRY_32(pos, LINUX_AT_CLKTCK, stclohz); 276 AUXARGS_ENTRY_32(pos, AT_PHDR, args->phdr); 277 AUXARGS_ENTRY_32(pos, AT_PHENT, args->phent); 278 AUXARGS_ENTRY_32(pos, AT_PHNUM, args->phnum); 279 AUXARGS_ENTRY_32(pos, AT_PAGESZ, args->pagesz); 280 AUXARGS_ENTRY_32(pos, AT_FLAGS, args->flags); 281 AUXARGS_ENTRY_32(pos, AT_ENTRY, args->entry); 282 AUXARGS_ENTRY_32(pos, AT_BASE, args->base); 283 AUXARGS_ENTRY_32(pos, LINUX_AT_SECURE, 0); 284 AUXARGS_ENTRY_32(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 285 AUXARGS_ENTRY_32(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 286 AUXARGS_ENTRY_32(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 287 AUXARGS_ENTRY_32(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 288 AUXARGS_ENTRY_32(pos, LINUX_AT_PLATFORM, PTROUT(uplatform)); 289 if (args->execfd != -1) 290 AUXARGS_ENTRY_32(pos, AT_EXECFD, args->execfd); 291 AUXARGS_ENTRY_32(pos, AT_NULL, 0); 292 293 free(imgp->auxargs, M_TEMP); 294 imgp->auxargs = NULL; 295 296 base--; 297 suword32(base, (uint32_t)imgp->args->argc); 298 *stack_base = (register_t *)base; 299 return 0; 300 } 301 302 extern unsigned long linux_sznonrtsigcode; 303 304 static void 305 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 306 { 307 struct thread *td = curthread; 308 struct proc *p = td->td_proc; 309 struct sigacts *psp; 310 struct trapframe *regs; 311 struct l_rt_sigframe *fp, frame; 312 int oonstack; 313 int sig; 314 int code; 315 316 sig = ksi->ksi_signo; 317 code = ksi->ksi_code; 318 PROC_LOCK_ASSERT(p, MA_OWNED); 319 psp = p->p_sigacts; 320 mtx_assert(&psp->ps_mtx, MA_OWNED); 321 regs = td->td_frame; 322 oonstack = sigonstack(regs->tf_rsp); 323 324 #ifdef DEBUG 325 if (ldebug(rt_sendsig)) 326 printf(ARGS(rt_sendsig, "%p, %d, %p, %u"), 327 catcher, sig, (void*)mask, code); 328 #endif 329 /* 330 * Allocate space for the signal handler context. 331 */ 332 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 333 SIGISMEMBER(psp->ps_sigonstack, sig)) { 334 fp = (struct l_rt_sigframe *)(td->td_sigstk.ss_sp + 335 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 336 } else 337 fp = (struct l_rt_sigframe *)regs->tf_rsp - 1; 338 mtx_unlock(&psp->ps_mtx); 339 340 /* 341 * Build the argument list for the signal handler. 342 */ 343 if (p->p_sysent->sv_sigtbl) 344 if (sig <= p->p_sysent->sv_sigsize) 345 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 346 347 bzero(&frame, sizeof(frame)); 348 349 frame.sf_handler = PTROUT(catcher); 350 frame.sf_sig = sig; 351 frame.sf_siginfo = PTROUT(&fp->sf_si); 352 frame.sf_ucontext = PTROUT(&fp->sf_sc); 353 354 /* Fill in POSIX parts */ 355 ksiginfo_to_lsiginfo(ksi, &frame.sf_si, sig); 356 357 /* 358 * Build the signal context to be used by sigreturn. 359 */ 360 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 361 frame.sf_sc.uc_link = 0; /* XXX ??? */ 362 363 frame.sf_sc.uc_stack.ss_sp = PTROUT(td->td_sigstk.ss_sp); 364 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 365 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 366 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 367 PROC_UNLOCK(p); 368 369 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 370 371 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__bits[0]; 372 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_rdi; 373 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_rsi; 374 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_rbp; 375 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_rbx; 376 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_rdx; 377 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_rcx; 378 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_rax; 379 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_rip; 380 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 381 frame.sf_sc.uc_mcontext.sc_gs = regs->tf_gs; 382 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 383 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 384 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 385 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_rflags; 386 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_rsp; 387 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 388 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 389 frame.sf_sc.uc_mcontext.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 390 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 391 392 #ifdef DEBUG 393 if (ldebug(rt_sendsig)) 394 printf(LMSG("rt_sendsig flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 395 frame.sf_sc.uc_stack.ss_flags, td->td_sigstk.ss_sp, 396 td->td_sigstk.ss_size, frame.sf_sc.uc_mcontext.sc_mask); 397 #endif 398 399 if (copyout(&frame, fp, sizeof(frame)) != 0) { 400 /* 401 * Process has trashed its stack; give it an illegal 402 * instruction to halt it in its tracks. 403 */ 404 #ifdef DEBUG 405 if (ldebug(rt_sendsig)) 406 printf(LMSG("rt_sendsig: bad stack %p, oonstack=%x"), 407 fp, oonstack); 408 #endif 409 PROC_LOCK(p); 410 sigexit(td, SIGILL); 411 } 412 413 /* 414 * Build context to run handler in. 415 */ 416 regs->tf_rsp = PTROUT(fp); 417 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode) + 418 linux_sznonrtsigcode; 419 regs->tf_rflags &= ~(PSL_T | PSL_D); 420 regs->tf_cs = _ucode32sel; 421 regs->tf_ss = _udatasel; 422 regs->tf_ds = _udatasel; 423 regs->tf_es = _udatasel; 424 regs->tf_fs = _ufssel; 425 regs->tf_gs = _ugssel; 426 regs->tf_flags = TF_HASSEGS; 427 td->td_pcb->pcb_full_iret = 1; 428 PROC_LOCK(p); 429 mtx_lock(&psp->ps_mtx); 430 } 431 432 433 /* 434 * Send an interrupt to process. 435 * 436 * Stack is set up to allow sigcode stored 437 * in u. to call routine, followed by kcall 438 * to sigreturn routine below. After sigreturn 439 * resets the signal mask, the stack, and the 440 * frame pointer, it returns to the user 441 * specified pc, psl. 442 */ 443 static void 444 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 445 { 446 struct thread *td = curthread; 447 struct proc *p = td->td_proc; 448 struct sigacts *psp; 449 struct trapframe *regs; 450 struct l_sigframe *fp, frame; 451 l_sigset_t lmask; 452 int oonstack, i; 453 int sig, code; 454 455 sig = ksi->ksi_signo; 456 code = ksi->ksi_code; 457 PROC_LOCK_ASSERT(p, MA_OWNED); 458 psp = p->p_sigacts; 459 mtx_assert(&psp->ps_mtx, MA_OWNED); 460 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 461 /* Signal handler installed with SA_SIGINFO. */ 462 linux_rt_sendsig(catcher, ksi, mask); 463 return; 464 } 465 466 regs = td->td_frame; 467 oonstack = sigonstack(regs->tf_rsp); 468 469 #ifdef DEBUG 470 if (ldebug(sendsig)) 471 printf(ARGS(sendsig, "%p, %d, %p, %u"), 472 catcher, sig, (void*)mask, code); 473 #endif 474 475 /* 476 * Allocate space for the signal handler context. 477 */ 478 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 479 SIGISMEMBER(psp->ps_sigonstack, sig)) { 480 fp = (struct l_sigframe *)(td->td_sigstk.ss_sp + 481 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 482 } else 483 fp = (struct l_sigframe *)regs->tf_rsp - 1; 484 mtx_unlock(&psp->ps_mtx); 485 PROC_UNLOCK(p); 486 487 /* 488 * Build the argument list for the signal handler. 489 */ 490 if (p->p_sysent->sv_sigtbl) 491 if (sig <= p->p_sysent->sv_sigsize) 492 sig = p->p_sysent->sv_sigtbl[_SIG_IDX(sig)]; 493 494 bzero(&frame, sizeof(frame)); 495 496 frame.sf_handler = PTROUT(catcher); 497 frame.sf_sig = sig; 498 499 bsd_to_linux_sigset(mask, &lmask); 500 501 /* 502 * Build the signal context to be used by sigreturn. 503 */ 504 frame.sf_sc.sc_mask = lmask.__bits[0]; 505 frame.sf_sc.sc_gs = regs->tf_gs; 506 frame.sf_sc.sc_fs = regs->tf_fs; 507 frame.sf_sc.sc_es = regs->tf_es; 508 frame.sf_sc.sc_ds = regs->tf_ds; 509 frame.sf_sc.sc_edi = regs->tf_rdi; 510 frame.sf_sc.sc_esi = regs->tf_rsi; 511 frame.sf_sc.sc_ebp = regs->tf_rbp; 512 frame.sf_sc.sc_ebx = regs->tf_rbx; 513 frame.sf_sc.sc_edx = regs->tf_rdx; 514 frame.sf_sc.sc_ecx = regs->tf_rcx; 515 frame.sf_sc.sc_eax = regs->tf_rax; 516 frame.sf_sc.sc_eip = regs->tf_rip; 517 frame.sf_sc.sc_cs = regs->tf_cs; 518 frame.sf_sc.sc_eflags = regs->tf_rflags; 519 frame.sf_sc.sc_esp_at_signal = regs->tf_rsp; 520 frame.sf_sc.sc_ss = regs->tf_ss; 521 frame.sf_sc.sc_err = regs->tf_err; 522 frame.sf_sc.sc_cr2 = (u_int32_t)(uintptr_t)ksi->ksi_addr; 523 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(code); 524 525 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 526 frame.sf_extramask[i] = lmask.__bits[i+1]; 527 528 if (copyout(&frame, fp, sizeof(frame)) != 0) { 529 /* 530 * Process has trashed its stack; give it an illegal 531 * instruction to halt it in its tracks. 532 */ 533 PROC_LOCK(p); 534 sigexit(td, SIGILL); 535 } 536 537 /* 538 * Build context to run handler in. 539 */ 540 regs->tf_rsp = PTROUT(fp); 541 regs->tf_rip = LINUX32_PS_STRINGS - *(p->p_sysent->sv_szsigcode); 542 regs->tf_rflags &= ~(PSL_T | PSL_D); 543 regs->tf_cs = _ucode32sel; 544 regs->tf_ss = _udatasel; 545 regs->tf_ds = _udatasel; 546 regs->tf_es = _udatasel; 547 regs->tf_fs = _ufssel; 548 regs->tf_gs = _ugssel; 549 regs->tf_flags = TF_HASSEGS; 550 td->td_pcb->pcb_full_iret = 1; 551 PROC_LOCK(p); 552 mtx_lock(&psp->ps_mtx); 553 } 554 555 /* 556 * System call to cleanup state after a signal 557 * has been taken. Reset signal mask and 558 * stack state from context left by sendsig (above). 559 * Return to previous pc and psl as specified by 560 * context left by sendsig. Check carefully to 561 * make sure that the user has not modified the 562 * psl to gain improper privileges or to cause 563 * a machine fault. 564 */ 565 int 566 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 567 { 568 struct l_sigframe frame; 569 struct trapframe *regs; 570 sigset_t bmask; 571 l_sigset_t lmask; 572 int eflags, i; 573 ksiginfo_t ksi; 574 575 regs = td->td_frame; 576 577 #ifdef DEBUG 578 if (ldebug(sigreturn)) 579 printf(ARGS(sigreturn, "%p"), (void *)args->sfp); 580 #endif 581 /* 582 * The trampoline code hands us the sigframe. 583 * It is unsafe to keep track of it ourselves, in the event that a 584 * program jumps out of a signal handler. 585 */ 586 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 587 return (EFAULT); 588 589 /* 590 * Check for security violations. 591 */ 592 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 593 eflags = frame.sf_sc.sc_eflags; 594 /* 595 * XXX do allow users to change the privileged flag PSL_RF. The 596 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 597 * sometimes set it there too. tf_eflags is kept in the signal 598 * context during signal handling and there is no other place 599 * to remember it, so the PSL_RF bit may be corrupted by the 600 * signal handler without us knowing. Corruption of the PSL_RF 601 * bit at worst causes one more or one less debugger trap, so 602 * allowing it is fairly harmless. 603 */ 604 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 605 return(EINVAL); 606 607 /* 608 * Don't allow users to load a valid privileged %cs. Let the 609 * hardware check for invalid selectors, excess privilege in 610 * other selectors, invalid %eip's and invalid %esp's. 611 */ 612 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 613 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 614 ksiginfo_init_trap(&ksi); 615 ksi.ksi_signo = SIGBUS; 616 ksi.ksi_code = BUS_OBJERR; 617 ksi.ksi_trapno = T_PROTFLT; 618 ksi.ksi_addr = (void *)regs->tf_rip; 619 trapsignal(td, &ksi); 620 return(EINVAL); 621 } 622 623 lmask.__bits[0] = frame.sf_sc.sc_mask; 624 for (i = 0; i < (LINUX_NSIG_WORDS-1); i++) 625 lmask.__bits[i+1] = frame.sf_extramask[i]; 626 linux_to_bsd_sigset(&lmask, &bmask); 627 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 628 629 /* 630 * Restore signal context. 631 */ 632 regs->tf_rdi = frame.sf_sc.sc_edi; 633 regs->tf_rsi = frame.sf_sc.sc_esi; 634 regs->tf_rbp = frame.sf_sc.sc_ebp; 635 regs->tf_rbx = frame.sf_sc.sc_ebx; 636 regs->tf_rdx = frame.sf_sc.sc_edx; 637 regs->tf_rcx = frame.sf_sc.sc_ecx; 638 regs->tf_rax = frame.sf_sc.sc_eax; 639 regs->tf_rip = frame.sf_sc.sc_eip; 640 regs->tf_cs = frame.sf_sc.sc_cs; 641 regs->tf_ds = frame.sf_sc.sc_ds; 642 regs->tf_es = frame.sf_sc.sc_es; 643 regs->tf_fs = frame.sf_sc.sc_fs; 644 regs->tf_gs = frame.sf_sc.sc_gs; 645 regs->tf_rflags = eflags; 646 regs->tf_rsp = frame.sf_sc.sc_esp_at_signal; 647 regs->tf_ss = frame.sf_sc.sc_ss; 648 td->td_pcb->pcb_full_iret = 1; 649 650 return (EJUSTRETURN); 651 } 652 653 /* 654 * System call to cleanup state after a signal 655 * has been taken. Reset signal mask and 656 * stack state from context left by rt_sendsig (above). 657 * Return to previous pc and psl as specified by 658 * context left by sendsig. Check carefully to 659 * make sure that the user has not modified the 660 * psl to gain improper privileges or to cause 661 * a machine fault. 662 */ 663 int 664 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 665 { 666 struct l_ucontext uc; 667 struct l_sigcontext *context; 668 sigset_t bmask; 669 l_stack_t *lss; 670 stack_t ss; 671 struct trapframe *regs; 672 int eflags; 673 ksiginfo_t ksi; 674 675 regs = td->td_frame; 676 677 #ifdef DEBUG 678 if (ldebug(rt_sigreturn)) 679 printf(ARGS(rt_sigreturn, "%p"), (void *)args->ucp); 680 #endif 681 /* 682 * The trampoline code hands us the ucontext. 683 * It is unsafe to keep track of it ourselves, in the event that a 684 * program jumps out of a signal handler. 685 */ 686 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 687 return (EFAULT); 688 689 context = &uc.uc_mcontext; 690 691 /* 692 * Check for security violations. 693 */ 694 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 695 eflags = context->sc_eflags; 696 /* 697 * XXX do allow users to change the privileged flag PSL_RF. The 698 * cpu sets PSL_RF in tf_eflags for faults. Debuggers should 699 * sometimes set it there too. tf_eflags is kept in the signal 700 * context during signal handling and there is no other place 701 * to remember it, so the PSL_RF bit may be corrupted by the 702 * signal handler without us knowing. Corruption of the PSL_RF 703 * bit at worst causes one more or one less debugger trap, so 704 * allowing it is fairly harmless. 705 */ 706 if (!EFLAGS_SECURE(eflags & ~PSL_RF, regs->tf_rflags & ~PSL_RF)) 707 return(EINVAL); 708 709 /* 710 * Don't allow users to load a valid privileged %cs. Let the 711 * hardware check for invalid selectors, excess privilege in 712 * other selectors, invalid %eip's and invalid %esp's. 713 */ 714 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 715 if (!CS_SECURE(context->sc_cs)) { 716 ksiginfo_init_trap(&ksi); 717 ksi.ksi_signo = SIGBUS; 718 ksi.ksi_code = BUS_OBJERR; 719 ksi.ksi_trapno = T_PROTFLT; 720 ksi.ksi_addr = (void *)regs->tf_rip; 721 trapsignal(td, &ksi); 722 return(EINVAL); 723 } 724 725 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 726 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 727 728 /* 729 * Restore signal context 730 */ 731 regs->tf_gs = context->sc_gs; 732 regs->tf_fs = context->sc_fs; 733 regs->tf_es = context->sc_es; 734 regs->tf_ds = context->sc_ds; 735 regs->tf_rdi = context->sc_edi; 736 regs->tf_rsi = context->sc_esi; 737 regs->tf_rbp = context->sc_ebp; 738 regs->tf_rbx = context->sc_ebx; 739 regs->tf_rdx = context->sc_edx; 740 regs->tf_rcx = context->sc_ecx; 741 regs->tf_rax = context->sc_eax; 742 regs->tf_rip = context->sc_eip; 743 regs->tf_cs = context->sc_cs; 744 regs->tf_rflags = eflags; 745 regs->tf_rsp = context->sc_esp_at_signal; 746 regs->tf_ss = context->sc_ss; 747 td->td_pcb->pcb_full_iret = 1; 748 749 /* 750 * call sigaltstack & ignore results.. 751 */ 752 lss = &uc.uc_stack; 753 ss.ss_sp = PTRIN(lss->ss_sp); 754 ss.ss_size = lss->ss_size; 755 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 756 757 #ifdef DEBUG 758 if (ldebug(rt_sigreturn)) 759 printf(LMSG("rt_sigret flags: 0x%x, sp: %p, ss: 0x%lx, mask: 0x%x"), 760 ss.ss_flags, ss.ss_sp, ss.ss_size, context->sc_mask); 761 #endif 762 (void)kern_sigaltstack(td, &ss, NULL); 763 764 return (EJUSTRETURN); 765 } 766 767 /* 768 * MPSAFE 769 */ 770 static void 771 linux_prepsyscall(struct trapframe *tf, int *args, u_int *code, caddr_t *params) 772 { 773 args[0] = tf->tf_rbx; 774 args[1] = tf->tf_rcx; 775 args[2] = tf->tf_rdx; 776 args[3] = tf->tf_rsi; 777 args[4] = tf->tf_rdi; 778 args[5] = tf->tf_rbp; /* Unconfirmed */ 779 *params = NULL; /* no copyin */ 780 } 781 782 /* 783 * If a linux binary is exec'ing something, try this image activator 784 * first. We override standard shell script execution in order to 785 * be able to modify the interpreter path. We only do this if a linux 786 * binary is doing the exec, so we do not create an EXEC module for it. 787 */ 788 static int exec_linux_imgact_try(struct image_params *iparams); 789 790 static int 791 exec_linux_imgact_try(struct image_params *imgp) 792 { 793 const char *head = (const char *)imgp->image_header; 794 char *rpath; 795 int error = -1, len; 796 797 /* 798 * The interpreter for shell scripts run from a linux binary needs 799 * to be located in /compat/linux if possible in order to recursively 800 * maintain linux path emulation. 801 */ 802 if (((const short *)head)[0] == SHELLMAGIC) { 803 /* 804 * Run our normal shell image activator. If it succeeds attempt 805 * to use the alternate path for the interpreter. If an 806 * alternate * path is found, use our stringspace to store it. 807 */ 808 if ((error = exec_shell_imgact(imgp)) == 0) { 809 linux_emul_convpath(FIRST_THREAD_IN_PROC(imgp->proc), 810 imgp->interpreter_name, UIO_SYSSPACE, &rpath, 0, 811 AT_FDCWD); 812 if (rpath != NULL) { 813 len = strlen(rpath) + 1; 814 815 if (len <= MAXSHELLCMDLEN) { 816 memcpy(imgp->interpreter_name, rpath, 817 len); 818 } 819 free(rpath, M_TEMP); 820 } 821 } 822 } 823 return(error); 824 } 825 826 /* 827 * Clear registers on exec 828 * XXX copied from ia32_signal.c. 829 */ 830 static void 831 exec_linux_setregs(struct thread *td, struct image_params *imgp, u_long stack) 832 { 833 struct trapframe *regs = td->td_frame; 834 struct pcb *pcb = td->td_pcb; 835 836 mtx_lock(&dt_lock); 837 if (td->td_proc->p_md.md_ldt != NULL) 838 user_ldt_free(td); 839 else 840 mtx_unlock(&dt_lock); 841 842 critical_enter(); 843 wrmsr(MSR_FSBASE, 0); 844 wrmsr(MSR_KGSBASE, 0); /* User value while we're in the kernel */ 845 pcb->pcb_fsbase = 0; 846 pcb->pcb_gsbase = 0; 847 critical_exit(); 848 pcb->pcb_initial_fpucw = __LINUX_NPXCW__; 849 850 bzero((char *)regs, sizeof(struct trapframe)); 851 regs->tf_rip = imgp->entry_addr; 852 regs->tf_rsp = stack; 853 regs->tf_rflags = PSL_USER | (regs->tf_rflags & PSL_T); 854 regs->tf_gs = _ugssel; 855 regs->tf_fs = _ufssel; 856 regs->tf_es = _udatasel; 857 regs->tf_ds = _udatasel; 858 regs->tf_ss = _udatasel; 859 regs->tf_flags = TF_HASSEGS; 860 regs->tf_cs = _ucode32sel; 861 regs->tf_rbx = imgp->ps_strings; 862 td->td_pcb->pcb_full_iret = 1; 863 load_cr0(rcr0() | CR0_MP | CR0_TS); 864 fpstate_drop(td); 865 866 /* Return via doreti so that we can change to a different %cs */ 867 pcb->pcb_flags |= PCB_FULLCTX | PCB_32BIT; 868 pcb->pcb_flags &= ~PCB_GS32BIT; 869 td->td_retval[1] = 0; 870 } 871 872 /* 873 * XXX copied from ia32_sysvec.c. 874 */ 875 static register_t * 876 linux_copyout_strings(struct image_params *imgp) 877 { 878 int argc, envc; 879 u_int32_t *vectp; 880 char *stringp, *destp; 881 u_int32_t *stack_base; 882 struct linux32_ps_strings *arginfo; 883 884 /* 885 * Calculate string base and vector table pointers. 886 * Also deal with signal trampoline code for this exec type. 887 */ 888 arginfo = (struct linux32_ps_strings *)LINUX32_PS_STRINGS; 889 destp = (caddr_t)arginfo - linux_szsigcode - SPARE_USRSPACE - 890 linux_szplatform - roundup((ARG_MAX - imgp->args->stringspace), 891 sizeof(char *)); 892 893 /* 894 * install sigcode 895 */ 896 copyout(imgp->proc->p_sysent->sv_sigcode, 897 ((caddr_t)arginfo - linux_szsigcode), linux_szsigcode); 898 899 /* 900 * Install LINUX_PLATFORM 901 */ 902 copyout(linux_platform, ((caddr_t)arginfo - linux_szsigcode - 903 linux_szplatform), linux_szplatform); 904 905 /* 906 * If we have a valid auxargs ptr, prepare some room 907 * on the stack. 908 */ 909 if (imgp->auxargs) { 910 /* 911 * 'AT_COUNT*2' is size for the ELF Auxargs data. This is for 912 * lower compatibility. 913 */ 914 imgp->auxarg_size = (imgp->auxarg_size) ? imgp->auxarg_size : 915 (LINUX_AT_COUNT * 2); 916 /* 917 * The '+ 2' is for the null pointers at the end of each of 918 * the arg and env vector sets,and imgp->auxarg_size is room 919 * for argument of Runtime loader. 920 */ 921 vectp = (u_int32_t *) (destp - (imgp->args->argc + 922 imgp->args->envc + 2 + imgp->auxarg_size) * 923 sizeof(u_int32_t)); 924 925 } else 926 /* 927 * The '+ 2' is for the null pointers at the end of each of 928 * the arg and env vector sets 929 */ 930 vectp = (u_int32_t *)(destp - (imgp->args->argc + 931 imgp->args->envc + 2) * sizeof(u_int32_t)); 932 933 /* 934 * vectp also becomes our initial stack base 935 */ 936 stack_base = vectp; 937 938 stringp = imgp->args->begin_argv; 939 argc = imgp->args->argc; 940 envc = imgp->args->envc; 941 /* 942 * Copy out strings - arguments and environment. 943 */ 944 copyout(stringp, destp, ARG_MAX - imgp->args->stringspace); 945 946 /* 947 * Fill in "ps_strings" struct for ps, w, etc. 948 */ 949 suword32(&arginfo->ps_argvstr, (uint32_t)(intptr_t)vectp); 950 suword32(&arginfo->ps_nargvstr, argc); 951 952 /* 953 * Fill in argument portion of vector table. 954 */ 955 for (; argc > 0; --argc) { 956 suword32(vectp++, (uint32_t)(intptr_t)destp); 957 while (*stringp++ != 0) 958 destp++; 959 destp++; 960 } 961 962 /* a null vector table pointer separates the argp's from the envp's */ 963 suword32(vectp++, 0); 964 965 suword32(&arginfo->ps_envstr, (uint32_t)(intptr_t)vectp); 966 suword32(&arginfo->ps_nenvstr, envc); 967 968 /* 969 * Fill in environment portion of vector table. 970 */ 971 for (; envc > 0; --envc) { 972 suword32(vectp++, (uint32_t)(intptr_t)destp); 973 while (*stringp++ != 0) 974 destp++; 975 destp++; 976 } 977 978 /* end of vector table is a null pointer */ 979 suword32(vectp, 0); 980 981 return ((register_t *)stack_base); 982 } 983 984 SYSCTL_NODE(_compat, OID_AUTO, linux32, CTLFLAG_RW, 0, 985 "32-bit Linux emulation"); 986 987 static u_long linux32_maxdsiz = LINUX32_MAXDSIZ; 988 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxdsiz, CTLFLAG_RW, 989 &linux32_maxdsiz, 0, ""); 990 static u_long linux32_maxssiz = LINUX32_MAXSSIZ; 991 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxssiz, CTLFLAG_RW, 992 &linux32_maxssiz, 0, ""); 993 static u_long linux32_maxvmem = LINUX32_MAXVMEM; 994 SYSCTL_ULONG(_compat_linux32, OID_AUTO, maxvmem, CTLFLAG_RW, 995 &linux32_maxvmem, 0, ""); 996 997 static void 998 linux32_fixlimit(struct rlimit *rl, int which) 999 { 1000 1001 switch (which) { 1002 case RLIMIT_DATA: 1003 if (linux32_maxdsiz != 0) { 1004 if (rl->rlim_cur > linux32_maxdsiz) 1005 rl->rlim_cur = linux32_maxdsiz; 1006 if (rl->rlim_max > linux32_maxdsiz) 1007 rl->rlim_max = linux32_maxdsiz; 1008 } 1009 break; 1010 case RLIMIT_STACK: 1011 if (linux32_maxssiz != 0) { 1012 if (rl->rlim_cur > linux32_maxssiz) 1013 rl->rlim_cur = linux32_maxssiz; 1014 if (rl->rlim_max > linux32_maxssiz) 1015 rl->rlim_max = linux32_maxssiz; 1016 } 1017 break; 1018 case RLIMIT_VMEM: 1019 if (linux32_maxvmem != 0) { 1020 if (rl->rlim_cur > linux32_maxvmem) 1021 rl->rlim_cur = linux32_maxvmem; 1022 if (rl->rlim_max > linux32_maxvmem) 1023 rl->rlim_max = linux32_maxvmem; 1024 } 1025 break; 1026 } 1027 } 1028 1029 struct sysentvec elf_linux_sysvec = { 1030 .sv_size = LINUX_SYS_MAXSYSCALL, 1031 .sv_table = linux_sysent, 1032 .sv_mask = 0, 1033 .sv_sigsize = LINUX_SIGTBLSZ, 1034 .sv_sigtbl = bsd_to_linux_signal, 1035 .sv_errsize = ELAST + 1, 1036 .sv_errtbl = bsd_to_linux_errno, 1037 .sv_transtrap = translate_traps, 1038 .sv_fixup = elf_linux_fixup, 1039 .sv_sendsig = linux_sendsig, 1040 .sv_sigcode = linux_sigcode, 1041 .sv_szsigcode = &linux_szsigcode, 1042 .sv_prepsyscall = linux_prepsyscall, 1043 .sv_name = "Linux ELF32", 1044 .sv_coredump = elf32_coredump, 1045 .sv_imgact_try = exec_linux_imgact_try, 1046 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 1047 .sv_pagesize = PAGE_SIZE, 1048 .sv_minuser = VM_MIN_ADDRESS, 1049 .sv_maxuser = LINUX32_USRSTACK, 1050 .sv_usrstack = LINUX32_USRSTACK, 1051 .sv_psstrings = LINUX32_PS_STRINGS, 1052 .sv_stackprot = VM_PROT_ALL, 1053 .sv_copyout_strings = linux_copyout_strings, 1054 .sv_setregs = exec_linux_setregs, 1055 .sv_fixlimit = linux32_fixlimit, 1056 .sv_maxssiz = &linux32_maxssiz, 1057 .sv_flags = SV_ABI_LINUX | SV_ILP32 | SV_IA32 1058 }; 1059 1060 static char GNU_ABI_VENDOR[] = "GNU"; 1061 static int GNULINUX_ABI_DESC = 0; 1062 1063 static boolean_t 1064 linux32_trans_osrel(const Elf_Note *note, int32_t *osrel) 1065 { 1066 const Elf32_Word *desc; 1067 uintptr_t p; 1068 1069 p = (uintptr_t)(note + 1); 1070 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 1071 1072 desc = (const Elf32_Word *)p; 1073 if (desc[0] != GNULINUX_ABI_DESC) 1074 return (FALSE); 1075 1076 /* 1077 * For linux we encode osrel as follows (see linux_mib.c): 1078 * VVVMMMIII (version, major, minor), see linux_mib.c. 1079 */ 1080 *osrel = desc[1] * 1000000 + desc[2] * 1000 + desc[3]; 1081 1082 return (TRUE); 1083 } 1084 1085 static Elf_Brandnote linux32_brandnote = { 1086 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 1087 .hdr.n_descsz = 16, /* XXX at least 16 */ 1088 .hdr.n_type = 1, 1089 .vendor = GNU_ABI_VENDOR, 1090 .flags = BN_TRANSLATE_OSREL, 1091 .trans_osrel = linux32_trans_osrel 1092 }; 1093 1094 static Elf32_Brandinfo linux_brand = { 1095 .brand = ELFOSABI_LINUX, 1096 .machine = EM_386, 1097 .compat_3_brand = "Linux", 1098 .emul_path = "/compat/linux", 1099 .interp_path = "/lib/ld-linux.so.1", 1100 .sysvec = &elf_linux_sysvec, 1101 .interp_newpath = NULL, 1102 .brand_note = &linux32_brandnote, 1103 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1104 }; 1105 1106 static Elf32_Brandinfo linux_glibc2brand = { 1107 .brand = ELFOSABI_LINUX, 1108 .machine = EM_386, 1109 .compat_3_brand = "Linux", 1110 .emul_path = "/compat/linux", 1111 .interp_path = "/lib/ld-linux.so.2", 1112 .sysvec = &elf_linux_sysvec, 1113 .interp_newpath = NULL, 1114 .brand_note = &linux32_brandnote, 1115 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 1116 }; 1117 1118 Elf32_Brandinfo *linux_brandlist[] = { 1119 &linux_brand, 1120 &linux_glibc2brand, 1121 NULL 1122 }; 1123 1124 static int 1125 linux_elf_modevent(module_t mod, int type, void *data) 1126 { 1127 Elf32_Brandinfo **brandinfo; 1128 int error; 1129 struct linux_ioctl_handler **lihp; 1130 struct linux_device_handler **ldhp; 1131 1132 error = 0; 1133 1134 switch(type) { 1135 case MOD_LOAD: 1136 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1137 ++brandinfo) 1138 if (elf32_insert_brand_entry(*brandinfo) < 0) 1139 error = EINVAL; 1140 if (error == 0) { 1141 SET_FOREACH(lihp, linux_ioctl_handler_set) 1142 linux_ioctl_register_handler(*lihp); 1143 SET_FOREACH(ldhp, linux_device_handler_set) 1144 linux_device_register_handler(*ldhp); 1145 mtx_init(&emul_lock, "emuldata lock", NULL, MTX_DEF); 1146 sx_init(&emul_shared_lock, "emuldata->shared lock"); 1147 LIST_INIT(&futex_list); 1148 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1149 linux_exit_tag = EVENTHANDLER_REGISTER(process_exit, 1150 linux_proc_exit, NULL, 1000); 1151 linux_schedtail_tag = EVENTHANDLER_REGISTER(schedtail, 1152 linux_schedtail, NULL, 1000); 1153 linux_exec_tag = EVENTHANDLER_REGISTER(process_exec, 1154 linux_proc_exec, NULL, 1000); 1155 linux_szplatform = roundup(strlen(linux_platform) + 1, 1156 sizeof(char *)); 1157 linux_osd_jail_register(); 1158 stclohz = (stathz ? stathz : hz); 1159 if (bootverbose) 1160 printf("Linux ELF exec handler installed\n"); 1161 } else 1162 printf("cannot insert Linux ELF brand handler\n"); 1163 break; 1164 case MOD_UNLOAD: 1165 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1166 ++brandinfo) 1167 if (elf32_brand_inuse(*brandinfo)) 1168 error = EBUSY; 1169 if (error == 0) { 1170 for (brandinfo = &linux_brandlist[0]; 1171 *brandinfo != NULL; ++brandinfo) 1172 if (elf32_remove_brand_entry(*brandinfo) < 0) 1173 error = EINVAL; 1174 } 1175 if (error == 0) { 1176 SET_FOREACH(lihp, linux_ioctl_handler_set) 1177 linux_ioctl_unregister_handler(*lihp); 1178 SET_FOREACH(ldhp, linux_device_handler_set) 1179 linux_device_unregister_handler(*ldhp); 1180 mtx_destroy(&emul_lock); 1181 sx_destroy(&emul_shared_lock); 1182 mtx_destroy(&futex_mtx); 1183 EVENTHANDLER_DEREGISTER(process_exit, linux_exit_tag); 1184 EVENTHANDLER_DEREGISTER(schedtail, linux_schedtail_tag); 1185 EVENTHANDLER_DEREGISTER(process_exec, linux_exec_tag); 1186 linux_osd_jail_deregister(); 1187 if (bootverbose) 1188 printf("Linux ELF exec handler removed\n"); 1189 } else 1190 printf("Could not deinstall ELF interpreter entry\n"); 1191 break; 1192 default: 1193 return EOPNOTSUPP; 1194 } 1195 return error; 1196 } 1197 1198 static moduledata_t linux_elf_mod = { 1199 "linuxelf", 1200 linux_elf_modevent, 1201 0 1202 }; 1203 1204 DECLARE_MODULE(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1205