1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 1994-1996 Søren Schmidt 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/exec.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/imgact_elf.h> 39 #include <sys/kernel.h> 40 #include <sys/lock.h> 41 #include <sys/malloc.h> 42 #include <sys/module.h> 43 #include <sys/mutex.h> 44 #include <sys/proc.h> 45 #include <sys/signalvar.h> 46 #include <sys/syscallsubr.h> 47 #include <sys/sysctl.h> 48 #include <sys/sysent.h> 49 #include <sys/sysproto.h> 50 #include <sys/vnode.h> 51 52 #include <vm/vm.h> 53 #include <vm/pmap.h> 54 #include <vm/vm_extern.h> 55 #include <vm/vm_map.h> 56 #include <vm/vm_object.h> 57 #include <vm/vm_page.h> 58 #include <vm/vm_param.h> 59 60 #include <machine/cpu.h> 61 #include <machine/cputypes.h> 62 #include <machine/md_var.h> 63 #include <machine/pcb.h> 64 #include <machine/trap.h> 65 66 #include <i386/linux/linux.h> 67 #include <i386/linux/linux_proto.h> 68 #include <compat/linux/linux_emul.h> 69 #include <compat/linux/linux_ioctl.h> 70 #include <compat/linux/linux_mib.h> 71 #include <compat/linux/linux_misc.h> 72 #include <compat/linux/linux_signal.h> 73 #include <compat/linux/linux_util.h> 74 #include <compat/linux/linux_vdso.h> 75 76 MODULE_VERSION(linux, 1); 77 78 #define LINUX_PS_STRINGS (LINUX_USRSTACK - sizeof(struct ps_strings)) 79 80 static int linux_szsigcode; 81 static vm_object_t linux_shared_page_obj; 82 static char *linux_shared_page_mapping; 83 extern char _binary_linux_locore_o_start; 84 extern char _binary_linux_locore_o_end; 85 86 extern struct sysent linux_sysent[LINUX_SYS_MAXSYSCALL]; 87 88 SET_DECLARE(linux_ioctl_handler_set, struct linux_ioctl_handler); 89 90 static int linux_fixup(uintptr_t *stack_base, 91 struct image_params *iparams); 92 static int linux_fixup_elf(uintptr_t *stack_base, 93 struct image_params *iparams); 94 static void linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask); 95 static void linux_exec_setregs(struct thread *td, 96 struct image_params *imgp, uintptr_t stack); 97 static int linux_copyout_strings(struct image_params *imgp, 98 uintptr_t *stack_base); 99 static bool linux_trans_osrel(const Elf_Note *note, int32_t *osrel); 100 static void linux_vdso_install(void *param); 101 static void linux_vdso_deinstall(void *param); 102 103 #define LINUX_T_UNKNOWN 255 104 static int _bsd_to_linux_trapcode[] = { 105 LINUX_T_UNKNOWN, /* 0 */ 106 6, /* 1 T_PRIVINFLT */ 107 LINUX_T_UNKNOWN, /* 2 */ 108 3, /* 3 T_BPTFLT */ 109 LINUX_T_UNKNOWN, /* 4 */ 110 LINUX_T_UNKNOWN, /* 5 */ 111 16, /* 6 T_ARITHTRAP */ 112 254, /* 7 T_ASTFLT */ 113 LINUX_T_UNKNOWN, /* 8 */ 114 13, /* 9 T_PROTFLT */ 115 1, /* 10 T_TRCTRAP */ 116 LINUX_T_UNKNOWN, /* 11 */ 117 14, /* 12 T_PAGEFLT */ 118 LINUX_T_UNKNOWN, /* 13 */ 119 17, /* 14 T_ALIGNFLT */ 120 LINUX_T_UNKNOWN, /* 15 */ 121 LINUX_T_UNKNOWN, /* 16 */ 122 LINUX_T_UNKNOWN, /* 17 */ 123 0, /* 18 T_DIVIDE */ 124 2, /* 19 T_NMI */ 125 4, /* 20 T_OFLOW */ 126 5, /* 21 T_BOUND */ 127 7, /* 22 T_DNA */ 128 8, /* 23 T_DOUBLEFLT */ 129 9, /* 24 T_FPOPFLT */ 130 10, /* 25 T_TSSFLT */ 131 11, /* 26 T_SEGNPFLT */ 132 12, /* 27 T_STKFLT */ 133 18, /* 28 T_MCHK */ 134 19, /* 29 T_XMMFLT */ 135 15 /* 30 T_RESERVED */ 136 }; 137 #define bsd_to_linux_trapcode(code) \ 138 ((code)<nitems(_bsd_to_linux_trapcode)? \ 139 _bsd_to_linux_trapcode[(code)]: \ 140 LINUX_T_UNKNOWN) 141 142 LINUX_VDSO_SYM_CHAR(linux_platform); 143 LINUX_VDSO_SYM_INTPTR(linux_sigcode); 144 LINUX_VDSO_SYM_INTPTR(linux_rt_sigcode); 145 LINUX_VDSO_SYM_INTPTR(linux_vsyscall); 146 147 /* 148 * If FreeBSD & Linux have a difference of opinion about what a trap 149 * means, deal with it here. 150 * 151 * MPSAFE 152 */ 153 static int 154 linux_translate_traps(int signal, int trap_code) 155 { 156 if (signal != SIGBUS) 157 return (signal); 158 switch (trap_code) { 159 case T_PROTFLT: 160 case T_TSSFLT: 161 case T_DOUBLEFLT: 162 case T_PAGEFLT: 163 return (SIGSEGV); 164 default: 165 return (signal); 166 } 167 } 168 169 static int 170 linux_fixup(uintptr_t *stack_base, struct image_params *imgp) 171 { 172 register_t *base, *argv, *envp; 173 174 base = (register_t *)*stack_base; 175 argv = base; 176 envp = base + (imgp->args->argc + 1); 177 base--; 178 suword(base, (intptr_t)envp); 179 base--; 180 suword(base, (intptr_t)argv); 181 base--; 182 suword(base, imgp->args->argc); 183 *stack_base = (uintptr_t)base; 184 return (0); 185 } 186 187 static int 188 linux_copyout_auxargs(struct image_params *imgp, uintptr_t base) 189 { 190 struct proc *p; 191 Elf32_Auxargs *args; 192 Elf32_Auxinfo *argarray, *pos; 193 struct ps_strings *arginfo; 194 int error, issetugid; 195 196 p = imgp->proc; 197 issetugid = imgp->proc->p_flag & P_SUGID ? 1 : 0; 198 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 199 args = (Elf32_Auxargs *)imgp->auxargs; 200 argarray = pos = malloc(LINUX_AT_COUNT * sizeof(*pos), M_TEMP, 201 M_WAITOK | M_ZERO); 202 203 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO_EHDR, 204 imgp->proc->p_sysent->sv_shared_page_base); 205 AUXARGS_ENTRY(pos, LINUX_AT_SYSINFO, linux_vsyscall); 206 AUXARGS_ENTRY(pos, LINUX_AT_HWCAP, cpu_feature); 207 208 /* 209 * Do not export AT_CLKTCK when emulating Linux kernel prior to 2.4.0, 210 * as it has appeared in the 2.4.0-rc7 first time. 211 * Being exported, AT_CLKTCK is returned by sysconf(_SC_CLK_TCK), 212 * glibc falls back to the hard-coded CLK_TCK value when aux entry 213 * is not present. 214 * Also see linux_times() implementation. 215 */ 216 if (linux_kernver(curthread) >= LINUX_KERNVER_2004000) 217 AUXARGS_ENTRY(pos, LINUX_AT_CLKTCK, stclohz); 218 AUXARGS_ENTRY(pos, AT_PHDR, args->phdr); 219 AUXARGS_ENTRY(pos, AT_PHENT, args->phent); 220 AUXARGS_ENTRY(pos, AT_PHNUM, args->phnum); 221 AUXARGS_ENTRY(pos, AT_PAGESZ, args->pagesz); 222 AUXARGS_ENTRY(pos, AT_FLAGS, args->flags); 223 AUXARGS_ENTRY(pos, AT_ENTRY, args->entry); 224 AUXARGS_ENTRY(pos, AT_BASE, args->base); 225 AUXARGS_ENTRY(pos, LINUX_AT_SECURE, issetugid); 226 AUXARGS_ENTRY(pos, AT_UID, imgp->proc->p_ucred->cr_ruid); 227 AUXARGS_ENTRY(pos, AT_EUID, imgp->proc->p_ucred->cr_svuid); 228 AUXARGS_ENTRY(pos, AT_GID, imgp->proc->p_ucred->cr_rgid); 229 AUXARGS_ENTRY(pos, AT_EGID, imgp->proc->p_ucred->cr_svgid); 230 AUXARGS_ENTRY(pos, LINUX_AT_PLATFORM, PTROUT(linux_platform)); 231 AUXARGS_ENTRY_PTR(pos, LINUX_AT_RANDOM, imgp->canary); 232 if (imgp->execpathp != 0) 233 AUXARGS_ENTRY_PTR(pos, LINUX_AT_EXECFN, imgp->execpathp); 234 if (args->execfd != -1) 235 AUXARGS_ENTRY(pos, AT_EXECFD, args->execfd); 236 AUXARGS_ENTRY(pos, AT_NULL, 0); 237 238 free(imgp->auxargs, M_TEMP); 239 imgp->auxargs = NULL; 240 KASSERT(pos - argarray <= LINUX_AT_COUNT, ("Too many auxargs")); 241 242 error = copyout(argarray, (void *)base, 243 sizeof(*argarray) * LINUX_AT_COUNT); 244 free(argarray, M_TEMP); 245 return (error); 246 } 247 248 static int 249 linux_fixup_elf(uintptr_t *stack_base, struct image_params *imgp) 250 { 251 register_t *base; 252 253 base = (register_t *)*stack_base; 254 base--; 255 if (suword(base, (register_t)imgp->args->argc) == -1) 256 return (EFAULT); 257 *stack_base = (uintptr_t)base; 258 return (0); 259 } 260 261 /* 262 * Copied from kern/kern_exec.c 263 */ 264 static int 265 linux_copyout_strings(struct image_params *imgp, uintptr_t *stack_base) 266 { 267 int argc, envc, error; 268 char **vectp; 269 char *stringp; 270 uintptr_t destp, ustringp; 271 struct ps_strings *arginfo; 272 char canary[LINUX_AT_RANDOM_LEN]; 273 size_t execpath_len; 274 struct proc *p; 275 276 /* Calculate string base and vector table pointers. */ 277 p = imgp->proc; 278 if (imgp->execpath != NULL && imgp->auxargs != NULL) 279 execpath_len = strlen(imgp->execpath) + 1; 280 else 281 execpath_len = 0; 282 arginfo = (struct ps_strings *)p->p_sysent->sv_psstrings; 283 destp = (uintptr_t)arginfo; 284 285 if (execpath_len != 0) { 286 destp -= execpath_len; 287 destp = rounddown2(destp, sizeof(void *)); 288 imgp->execpathp = (void *)destp; 289 error = copyout(imgp->execpath, imgp->execpathp, execpath_len); 290 if (error != 0) 291 return (error); 292 } 293 294 /* Prepare the canary for SSP. */ 295 arc4rand(canary, sizeof(canary), 0); 296 destp -= roundup(sizeof(canary), sizeof(void *)); 297 imgp->canary = (void *)destp; 298 error = copyout(canary, imgp->canary, sizeof(canary)); 299 if (error != 0) 300 return (error); 301 302 /* Allocate room for the argument and environment strings. */ 303 destp -= ARG_MAX - imgp->args->stringspace; 304 destp = rounddown2(destp, sizeof(void *)); 305 ustringp = destp; 306 307 if (imgp->auxargs) { 308 /* 309 * Allocate room on the stack for the ELF auxargs 310 * array. It has LINUX_AT_COUNT entries. 311 */ 312 destp -= LINUX_AT_COUNT * sizeof(Elf32_Auxinfo); 313 destp = rounddown2(destp, sizeof(void *)); 314 } 315 316 vectp = (char **)destp; 317 318 /* 319 * Allocate room for the argv[] and env vectors including the 320 * terminating NULL pointers. 321 */ 322 vectp -= imgp->args->argc + 1 + imgp->args->envc + 1; 323 324 /* vectp also becomes our initial stack base. */ 325 *stack_base = (uintptr_t)vectp; 326 327 stringp = imgp->args->begin_argv; 328 argc = imgp->args->argc; 329 envc = imgp->args->envc; 330 331 /* Copy out strings - arguments and environment. */ 332 error = copyout(stringp, (void *)ustringp, 333 ARG_MAX - imgp->args->stringspace); 334 if (error != 0) 335 return (error); 336 337 /* Fill in "ps_strings" struct for ps, w, etc. */ 338 if (suword(&arginfo->ps_argvstr, (long)(intptr_t)vectp) != 0 || 339 suword(&arginfo->ps_nargvstr, argc) != 0) 340 return (EFAULT); 341 342 /* Fill in argument portion of vector table. */ 343 for (; argc > 0; --argc) { 344 if (suword(vectp++, ustringp) != 0) 345 return (EFAULT); 346 while (*stringp++ != 0) 347 ustringp++; 348 ustringp++; 349 } 350 351 /* A null vector table pointer separates the argp's from the envp's. */ 352 if (suword(vectp++, 0) != 0) 353 return (EFAULT); 354 355 if (suword(&arginfo->ps_envstr, (long)(intptr_t)vectp) != 0 || 356 suword(&arginfo->ps_nenvstr, envc) != 0) 357 return (EFAULT); 358 359 /* Fill in environment portion of vector table. */ 360 for (; envc > 0; --envc) { 361 if (suword(vectp++, ustringp) != 0) 362 return (EFAULT); 363 while (*stringp++ != 0) 364 ustringp++; 365 ustringp++; 366 } 367 368 /* The end of the vector table is a null pointer. */ 369 if (suword(vectp, 0) != 0) 370 return (EFAULT); 371 372 if (imgp->auxargs) { 373 vectp++; 374 error = imgp->sysent->sv_copyout_auxargs(imgp, 375 (uintptr_t)vectp); 376 if (error != 0) 377 return (error); 378 } 379 380 return (0); 381 } 382 383 static void 384 linux_rt_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 385 { 386 struct thread *td = curthread; 387 struct proc *p = td->td_proc; 388 struct sigacts *psp; 389 struct trapframe *regs; 390 struct l_rt_sigframe *fp, frame; 391 int sig, code; 392 int oonstack; 393 394 sig = ksi->ksi_signo; 395 code = ksi->ksi_code; 396 PROC_LOCK_ASSERT(p, MA_OWNED); 397 psp = p->p_sigacts; 398 mtx_assert(&psp->ps_mtx, MA_OWNED); 399 regs = td->td_frame; 400 oonstack = sigonstack(regs->tf_esp); 401 402 /* Allocate space for the signal handler context. */ 403 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 404 SIGISMEMBER(psp->ps_sigonstack, sig)) { 405 fp = (struct l_rt_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + 406 td->td_sigstk.ss_size - sizeof(struct l_rt_sigframe)); 407 } else 408 fp = (struct l_rt_sigframe *)regs->tf_esp - 1; 409 mtx_unlock(&psp->ps_mtx); 410 411 /* Build the argument list for the signal handler. */ 412 sig = bsd_to_linux_signal(sig); 413 414 bzero(&frame, sizeof(frame)); 415 416 frame.sf_handler = catcher; 417 frame.sf_sig = sig; 418 frame.sf_siginfo = &fp->sf_si; 419 frame.sf_ucontext = &fp->sf_sc; 420 421 /* Fill in POSIX parts. */ 422 siginfo_to_lsiginfo(&ksi->ksi_info, &frame.sf_si, sig); 423 424 /* Build the signal context to be used by sigreturn. */ 425 frame.sf_sc.uc_flags = 0; /* XXX ??? */ 426 frame.sf_sc.uc_link = NULL; /* XXX ??? */ 427 428 frame.sf_sc.uc_stack.ss_sp = td->td_sigstk.ss_sp; 429 frame.sf_sc.uc_stack.ss_size = td->td_sigstk.ss_size; 430 frame.sf_sc.uc_stack.ss_flags = (td->td_pflags & TDP_ALTSTACK) 431 ? ((oonstack) ? LINUX_SS_ONSTACK : 0) : LINUX_SS_DISABLE; 432 PROC_UNLOCK(p); 433 434 bsd_to_linux_sigset(mask, &frame.sf_sc.uc_sigmask); 435 436 frame.sf_sc.uc_mcontext.sc_mask = frame.sf_sc.uc_sigmask.__mask; 437 frame.sf_sc.uc_mcontext.sc_gs = rgs(); 438 frame.sf_sc.uc_mcontext.sc_fs = regs->tf_fs; 439 frame.sf_sc.uc_mcontext.sc_es = regs->tf_es; 440 frame.sf_sc.uc_mcontext.sc_ds = regs->tf_ds; 441 frame.sf_sc.uc_mcontext.sc_edi = regs->tf_edi; 442 frame.sf_sc.uc_mcontext.sc_esi = regs->tf_esi; 443 frame.sf_sc.uc_mcontext.sc_ebp = regs->tf_ebp; 444 frame.sf_sc.uc_mcontext.sc_ebx = regs->tf_ebx; 445 frame.sf_sc.uc_mcontext.sc_esp = regs->tf_esp; 446 frame.sf_sc.uc_mcontext.sc_edx = regs->tf_edx; 447 frame.sf_sc.uc_mcontext.sc_ecx = regs->tf_ecx; 448 frame.sf_sc.uc_mcontext.sc_eax = regs->tf_eax; 449 frame.sf_sc.uc_mcontext.sc_eip = regs->tf_eip; 450 frame.sf_sc.uc_mcontext.sc_cs = regs->tf_cs; 451 frame.sf_sc.uc_mcontext.sc_eflags = regs->tf_eflags; 452 frame.sf_sc.uc_mcontext.sc_esp_at_signal = regs->tf_esp; 453 frame.sf_sc.uc_mcontext.sc_ss = regs->tf_ss; 454 frame.sf_sc.uc_mcontext.sc_err = regs->tf_err; 455 frame.sf_sc.uc_mcontext.sc_cr2 = (register_t)ksi->ksi_addr; 456 frame.sf_sc.uc_mcontext.sc_trapno = bsd_to_linux_trapcode(code); 457 458 if (copyout(&frame, fp, sizeof(frame)) != 0) { 459 /* 460 * Process has trashed its stack; give it an illegal 461 * instruction to halt it in its tracks. 462 */ 463 PROC_LOCK(p); 464 sigexit(td, SIGILL); 465 } 466 467 /* Build context to run handler in. */ 468 regs->tf_esp = (int)fp; 469 regs->tf_eip = linux_rt_sigcode; 470 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 471 regs->tf_cs = _ucodesel; 472 regs->tf_ds = _udatasel; 473 regs->tf_es = _udatasel; 474 regs->tf_fs = _udatasel; 475 regs->tf_ss = _udatasel; 476 PROC_LOCK(p); 477 mtx_lock(&psp->ps_mtx); 478 } 479 480 /* 481 * Send an interrupt to process. 482 * 483 * Stack is set up to allow sigcode stored 484 * in u. to call routine, followed by kcall 485 * to sigreturn routine below. After sigreturn 486 * resets the signal mask, the stack, and the 487 * frame pointer, it returns to the user 488 * specified pc, psl. 489 */ 490 static void 491 linux_sendsig(sig_t catcher, ksiginfo_t *ksi, sigset_t *mask) 492 { 493 struct thread *td = curthread; 494 struct proc *p = td->td_proc; 495 struct sigacts *psp; 496 struct trapframe *regs; 497 struct l_sigframe *fp, frame; 498 l_sigset_t lmask; 499 int sig, code; 500 int oonstack; 501 502 PROC_LOCK_ASSERT(p, MA_OWNED); 503 psp = p->p_sigacts; 504 sig = ksi->ksi_signo; 505 code = ksi->ksi_code; 506 mtx_assert(&psp->ps_mtx, MA_OWNED); 507 if (SIGISMEMBER(psp->ps_siginfo, sig)) { 508 /* Signal handler installed with SA_SIGINFO. */ 509 linux_rt_sendsig(catcher, ksi, mask); 510 return; 511 } 512 regs = td->td_frame; 513 oonstack = sigonstack(regs->tf_esp); 514 515 /* Allocate space for the signal handler context. */ 516 if ((td->td_pflags & TDP_ALTSTACK) && !oonstack && 517 SIGISMEMBER(psp->ps_sigonstack, sig)) { 518 fp = (struct l_sigframe *)((uintptr_t)td->td_sigstk.ss_sp + 519 td->td_sigstk.ss_size - sizeof(struct l_sigframe)); 520 } else 521 fp = (struct l_sigframe *)regs->tf_esp - 1; 522 mtx_unlock(&psp->ps_mtx); 523 PROC_UNLOCK(p); 524 525 /* Build the argument list for the signal handler. */ 526 sig = bsd_to_linux_signal(sig); 527 528 bzero(&frame, sizeof(frame)); 529 530 frame.sf_handler = catcher; 531 frame.sf_sig = sig; 532 533 bsd_to_linux_sigset(mask, &lmask); 534 535 /* Build the signal context to be used by sigreturn. */ 536 frame.sf_sc.sc_mask = lmask.__mask; 537 frame.sf_sc.sc_gs = rgs(); 538 frame.sf_sc.sc_fs = regs->tf_fs; 539 frame.sf_sc.sc_es = regs->tf_es; 540 frame.sf_sc.sc_ds = regs->tf_ds; 541 frame.sf_sc.sc_edi = regs->tf_edi; 542 frame.sf_sc.sc_esi = regs->tf_esi; 543 frame.sf_sc.sc_ebp = regs->tf_ebp; 544 frame.sf_sc.sc_ebx = regs->tf_ebx; 545 frame.sf_sc.sc_esp = regs->tf_esp; 546 frame.sf_sc.sc_edx = regs->tf_edx; 547 frame.sf_sc.sc_ecx = regs->tf_ecx; 548 frame.sf_sc.sc_eax = regs->tf_eax; 549 frame.sf_sc.sc_eip = regs->tf_eip; 550 frame.sf_sc.sc_cs = regs->tf_cs; 551 frame.sf_sc.sc_eflags = regs->tf_eflags; 552 frame.sf_sc.sc_esp_at_signal = regs->tf_esp; 553 frame.sf_sc.sc_ss = regs->tf_ss; 554 frame.sf_sc.sc_err = regs->tf_err; 555 frame.sf_sc.sc_cr2 = (register_t)ksi->ksi_addr; 556 frame.sf_sc.sc_trapno = bsd_to_linux_trapcode(ksi->ksi_trapno); 557 558 frame.sf_extramask[0] = lmask.__mask; 559 560 if (copyout(&frame, fp, sizeof(frame)) != 0) { 561 /* 562 * Process has trashed its stack; give it an illegal 563 * instruction to halt it in its tracks. 564 */ 565 PROC_LOCK(p); 566 sigexit(td, SIGILL); 567 } 568 569 /* Build context to run handler in. */ 570 regs->tf_esp = (int)fp; 571 regs->tf_eip = linux_sigcode; 572 regs->tf_eflags &= ~(PSL_T | PSL_VM | PSL_D); 573 regs->tf_cs = _ucodesel; 574 regs->tf_ds = _udatasel; 575 regs->tf_es = _udatasel; 576 regs->tf_fs = _udatasel; 577 regs->tf_ss = _udatasel; 578 PROC_LOCK(p); 579 mtx_lock(&psp->ps_mtx); 580 } 581 582 /* 583 * System call to cleanup state after a signal 584 * has been taken. Reset signal mask and 585 * stack state from context left by sendsig (above). 586 * Return to previous pc and psl as specified by 587 * context left by sendsig. Check carefully to 588 * make sure that the user has not modified the 589 * psl to gain improper privileges or to cause 590 * a machine fault. 591 */ 592 int 593 linux_sigreturn(struct thread *td, struct linux_sigreturn_args *args) 594 { 595 struct l_sigframe frame; 596 struct trapframe *regs; 597 l_sigset_t lmask; 598 sigset_t bmask; 599 int eflags; 600 ksiginfo_t ksi; 601 602 regs = td->td_frame; 603 604 /* 605 * The trampoline code hands us the sigframe. 606 * It is unsafe to keep track of it ourselves, in the event that a 607 * program jumps out of a signal handler. 608 */ 609 if (copyin(args->sfp, &frame, sizeof(frame)) != 0) 610 return (EFAULT); 611 612 /* Check for security violations. */ 613 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 614 eflags = frame.sf_sc.sc_eflags; 615 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 616 return (EINVAL); 617 618 /* 619 * Don't allow users to load a valid privileged %cs. Let the 620 * hardware check for invalid selectors, excess privilege in 621 * other selectors, invalid %eip's and invalid %esp's. 622 */ 623 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 624 if (!CS_SECURE(frame.sf_sc.sc_cs)) { 625 ksiginfo_init_trap(&ksi); 626 ksi.ksi_signo = SIGBUS; 627 ksi.ksi_code = BUS_OBJERR; 628 ksi.ksi_trapno = T_PROTFLT; 629 ksi.ksi_addr = (void *)regs->tf_eip; 630 trapsignal(td, &ksi); 631 return (EINVAL); 632 } 633 634 lmask.__mask = frame.sf_sc.sc_mask; 635 linux_to_bsd_sigset(&lmask, &bmask); 636 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 637 638 /* Restore signal context. */ 639 /* %gs was restored by the trampoline. */ 640 regs->tf_fs = frame.sf_sc.sc_fs; 641 regs->tf_es = frame.sf_sc.sc_es; 642 regs->tf_ds = frame.sf_sc.sc_ds; 643 regs->tf_edi = frame.sf_sc.sc_edi; 644 regs->tf_esi = frame.sf_sc.sc_esi; 645 regs->tf_ebp = frame.sf_sc.sc_ebp; 646 regs->tf_ebx = frame.sf_sc.sc_ebx; 647 regs->tf_edx = frame.sf_sc.sc_edx; 648 regs->tf_ecx = frame.sf_sc.sc_ecx; 649 regs->tf_eax = frame.sf_sc.sc_eax; 650 regs->tf_eip = frame.sf_sc.sc_eip; 651 regs->tf_cs = frame.sf_sc.sc_cs; 652 regs->tf_eflags = eflags; 653 regs->tf_esp = frame.sf_sc.sc_esp_at_signal; 654 regs->tf_ss = frame.sf_sc.sc_ss; 655 656 return (EJUSTRETURN); 657 } 658 659 /* 660 * System call to cleanup state after a signal 661 * has been taken. Reset signal mask and 662 * stack state from context left by rt_sendsig (above). 663 * Return to previous pc and psl as specified by 664 * context left by sendsig. Check carefully to 665 * make sure that the user has not modified the 666 * psl to gain improper privileges or to cause 667 * a machine fault. 668 */ 669 int 670 linux_rt_sigreturn(struct thread *td, struct linux_rt_sigreturn_args *args) 671 { 672 struct l_ucontext uc; 673 struct l_sigcontext *context; 674 sigset_t bmask; 675 l_stack_t *lss; 676 stack_t ss; 677 struct trapframe *regs; 678 int eflags; 679 ksiginfo_t ksi; 680 681 regs = td->td_frame; 682 683 /* 684 * The trampoline code hands us the ucontext. 685 * It is unsafe to keep track of it ourselves, in the event that a 686 * program jumps out of a signal handler. 687 */ 688 if (copyin(args->ucp, &uc, sizeof(uc)) != 0) 689 return (EFAULT); 690 691 context = &uc.uc_mcontext; 692 693 /* Check for security violations. */ 694 #define EFLAGS_SECURE(ef, oef) ((((ef) ^ (oef)) & ~PSL_USERCHANGE) == 0) 695 eflags = context->sc_eflags; 696 if (!EFLAGS_SECURE(eflags, regs->tf_eflags)) 697 return (EINVAL); 698 699 /* 700 * Don't allow users to load a valid privileged %cs. Let the 701 * hardware check for invalid selectors, excess privilege in 702 * other selectors, invalid %eip's and invalid %esp's. 703 */ 704 #define CS_SECURE(cs) (ISPL(cs) == SEL_UPL) 705 if (!CS_SECURE(context->sc_cs)) { 706 ksiginfo_init_trap(&ksi); 707 ksi.ksi_signo = SIGBUS; 708 ksi.ksi_code = BUS_OBJERR; 709 ksi.ksi_trapno = T_PROTFLT; 710 ksi.ksi_addr = (void *)regs->tf_eip; 711 trapsignal(td, &ksi); 712 return (EINVAL); 713 } 714 715 linux_to_bsd_sigset(&uc.uc_sigmask, &bmask); 716 kern_sigprocmask(td, SIG_SETMASK, &bmask, NULL, 0); 717 718 /* Restore signal context. */ 719 /* %gs was restored by the trampoline. */ 720 regs->tf_fs = context->sc_fs; 721 regs->tf_es = context->sc_es; 722 regs->tf_ds = context->sc_ds; 723 regs->tf_edi = context->sc_edi; 724 regs->tf_esi = context->sc_esi; 725 regs->tf_ebp = context->sc_ebp; 726 regs->tf_ebx = context->sc_ebx; 727 regs->tf_edx = context->sc_edx; 728 regs->tf_ecx = context->sc_ecx; 729 regs->tf_eax = context->sc_eax; 730 regs->tf_eip = context->sc_eip; 731 regs->tf_cs = context->sc_cs; 732 regs->tf_eflags = eflags; 733 regs->tf_esp = context->sc_esp_at_signal; 734 regs->tf_ss = context->sc_ss; 735 736 /* Call sigaltstack & ignore results. */ 737 lss = &uc.uc_stack; 738 ss.ss_sp = lss->ss_sp; 739 ss.ss_size = lss->ss_size; 740 ss.ss_flags = linux_to_bsd_sigaltstack(lss->ss_flags); 741 742 (void)kern_sigaltstack(td, &ss, NULL); 743 744 return (EJUSTRETURN); 745 } 746 747 static int 748 linux_fetch_syscall_args(struct thread *td) 749 { 750 struct proc *p; 751 struct trapframe *frame; 752 struct syscall_args *sa; 753 754 p = td->td_proc; 755 frame = td->td_frame; 756 sa = &td->td_sa; 757 758 sa->code = frame->tf_eax; 759 sa->args[0] = frame->tf_ebx; 760 sa->args[1] = frame->tf_ecx; 761 sa->args[2] = frame->tf_edx; 762 sa->args[3] = frame->tf_esi; 763 sa->args[4] = frame->tf_edi; 764 sa->args[5] = frame->tf_ebp; /* Unconfirmed */ 765 766 if (sa->code >= p->p_sysent->sv_size) 767 /* nosys */ 768 sa->callp = &p->p_sysent->sv_table[p->p_sysent->sv_size - 1]; 769 else 770 sa->callp = &p->p_sysent->sv_table[sa->code]; 771 772 td->td_retval[0] = 0; 773 td->td_retval[1] = frame->tf_edx; 774 775 return (0); 776 } 777 778 static void 779 linux_set_syscall_retval(struct thread *td, int error) 780 { 781 struct trapframe *frame = td->td_frame; 782 783 cpu_set_syscall_retval(td, error); 784 785 if (__predict_false(error != 0)) { 786 if (error != ERESTART && error != EJUSTRETURN) 787 frame->tf_eax = bsd_to_linux_errno(error); 788 } 789 } 790 791 /* 792 * exec_setregs may initialize some registers differently than Linux 793 * does, thus potentially confusing Linux binaries. If necessary, we 794 * override the exec_setregs default(s) here. 795 */ 796 static void 797 linux_exec_setregs(struct thread *td, struct image_params *imgp, 798 uintptr_t stack) 799 { 800 struct pcb *pcb = td->td_pcb; 801 802 exec_setregs(td, imgp, stack); 803 804 /* Linux sets %gs to 0, we default to _udatasel. */ 805 pcb->pcb_gs = 0; 806 load_gs(0); 807 808 pcb->pcb_initial_npxcw = __LINUX_NPXCW__; 809 } 810 811 struct sysentvec linux_sysvec = { 812 .sv_size = LINUX_SYS_MAXSYSCALL, 813 .sv_table = linux_sysent, 814 .sv_transtrap = linux_translate_traps, 815 .sv_fixup = linux_fixup, 816 .sv_sendsig = linux_sendsig, 817 .sv_sigcode = &_binary_linux_locore_o_start, 818 .sv_szsigcode = &linux_szsigcode, 819 .sv_name = "Linux a.out", 820 .sv_coredump = NULL, 821 .sv_imgact_try = linux_exec_imgact_try, 822 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 823 .sv_minuser = VM_MIN_ADDRESS, 824 .sv_maxuser = VM_MAXUSER_ADDRESS, 825 .sv_usrstack = LINUX_USRSTACK, 826 .sv_psstrings = PS_STRINGS, 827 .sv_stackprot = VM_PROT_ALL, 828 .sv_copyout_strings = exec_copyout_strings, 829 .sv_setregs = linux_exec_setregs, 830 .sv_fixlimit = NULL, 831 .sv_maxssiz = NULL, 832 .sv_flags = SV_ABI_LINUX | SV_AOUT | SV_IA32 | SV_ILP32 | 833 SV_SIG_DISCIGN | SV_SIG_WAITNDQ, 834 .sv_set_syscall_retval = linux_set_syscall_retval, 835 .sv_fetch_syscall_args = linux_fetch_syscall_args, 836 .sv_syscallnames = NULL, 837 .sv_schedtail = linux_schedtail, 838 .sv_thread_detach = linux_thread_detach, 839 .sv_trap = NULL, 840 .sv_onexec = linux_on_exec, 841 .sv_onexit = linux_on_exit, 842 .sv_ontdexit = linux_thread_dtor, 843 .sv_setid_allowed = &linux_setid_allowed_query, 844 }; 845 INIT_SYSENTVEC(aout_sysvec, &linux_sysvec); 846 847 struct sysentvec elf_linux_sysvec = { 848 .sv_size = LINUX_SYS_MAXSYSCALL, 849 .sv_table = linux_sysent, 850 .sv_transtrap = linux_translate_traps, 851 .sv_fixup = linux_fixup_elf, 852 .sv_sendsig = linux_sendsig, 853 .sv_sigcode = &_binary_linux_locore_o_start, 854 .sv_szsigcode = &linux_szsigcode, 855 .sv_name = "Linux ELF32", 856 .sv_coredump = elf32_coredump, 857 .sv_elf_core_osabi = ELFOSABI_FREEBSD, 858 .sv_elf_core_abi_vendor = FREEBSD_ABI_VENDOR, 859 .sv_elf_core_prepare_notes = elf32_prepare_notes, 860 .sv_imgact_try = linux_exec_imgact_try, 861 .sv_minsigstksz = LINUX_MINSIGSTKSZ, 862 .sv_minuser = VM_MIN_ADDRESS, 863 .sv_maxuser = VM_MAXUSER_ADDRESS, 864 .sv_usrstack = LINUX_USRSTACK, 865 .sv_psstrings = LINUX_PS_STRINGS, 866 .sv_stackprot = VM_PROT_ALL, 867 .sv_copyout_auxargs = linux_copyout_auxargs, 868 .sv_copyout_strings = linux_copyout_strings, 869 .sv_setregs = linux_exec_setregs, 870 .sv_fixlimit = NULL, 871 .sv_maxssiz = NULL, 872 .sv_flags = SV_ABI_LINUX | SV_IA32 | SV_ILP32 | SV_SHP | 873 SV_SIG_DISCIGN | SV_SIG_WAITNDQ, 874 .sv_set_syscall_retval = linux_set_syscall_retval, 875 .sv_fetch_syscall_args = linux_fetch_syscall_args, 876 .sv_syscallnames = NULL, 877 .sv_shared_page_base = LINUX_SHAREDPAGE, 878 .sv_shared_page_len = PAGE_SIZE, 879 .sv_schedtail = linux_schedtail, 880 .sv_thread_detach = linux_thread_detach, 881 .sv_trap = NULL, 882 .sv_onexec = linux_on_exec, 883 .sv_onexit = linux_on_exit, 884 .sv_ontdexit = linux_thread_dtor, 885 .sv_setid_allowed = &linux_setid_allowed_query, 886 }; 887 888 static void 889 linux_vdso_install(void *param) 890 { 891 892 linux_szsigcode = (&_binary_linux_locore_o_end - 893 &_binary_linux_locore_o_start); 894 895 if (linux_szsigcode > elf_linux_sysvec.sv_shared_page_len) 896 panic("Linux invalid vdso size\n"); 897 898 __elfN(linux_vdso_fixup)(&elf_linux_sysvec); 899 900 linux_shared_page_obj = __elfN(linux_shared_page_init) 901 (&linux_shared_page_mapping); 902 903 __elfN(linux_vdso_reloc)(&elf_linux_sysvec); 904 905 bcopy(elf_linux_sysvec.sv_sigcode, linux_shared_page_mapping, 906 linux_szsigcode); 907 elf_linux_sysvec.sv_shared_page_obj = linux_shared_page_obj; 908 } 909 SYSINIT(elf_linux_vdso_init, SI_SUB_EXEC, SI_ORDER_ANY, 910 linux_vdso_install, NULL); 911 912 static void 913 linux_vdso_deinstall(void *param) 914 { 915 916 __elfN(linux_shared_page_fini)(linux_shared_page_obj, 917 linux_shared_page_mapping); 918 } 919 SYSUNINIT(elf_linux_vdso_uninit, SI_SUB_EXEC, SI_ORDER_FIRST, 920 linux_vdso_deinstall, NULL); 921 922 static char GNU_ABI_VENDOR[] = "GNU"; 923 static int GNULINUX_ABI_DESC = 0; 924 925 static bool 926 linux_trans_osrel(const Elf_Note *note, int32_t *osrel) 927 { 928 const Elf32_Word *desc; 929 uintptr_t p; 930 931 p = (uintptr_t)(note + 1); 932 p += roundup2(note->n_namesz, sizeof(Elf32_Addr)); 933 934 desc = (const Elf32_Word *)p; 935 if (desc[0] != GNULINUX_ABI_DESC) 936 return (false); 937 938 /* 939 * For Linux we encode osrel using the Linux convention of 940 * (version << 16) | (major << 8) | (minor) 941 * See macro in linux_mib.h 942 */ 943 *osrel = LINUX_KERNVER(desc[1], desc[2], desc[3]); 944 945 return (true); 946 } 947 948 static Elf_Brandnote linux_brandnote = { 949 .hdr.n_namesz = sizeof(GNU_ABI_VENDOR), 950 .hdr.n_descsz = 16, /* XXX at least 16 */ 951 .hdr.n_type = 1, 952 .vendor = GNU_ABI_VENDOR, 953 .flags = BN_TRANSLATE_OSREL, 954 .trans_osrel = linux_trans_osrel 955 }; 956 957 static Elf32_Brandinfo linux_brand = { 958 .brand = ELFOSABI_LINUX, 959 .machine = EM_386, 960 .compat_3_brand = "Linux", 961 .emul_path = linux_emul_path, 962 .interp_path = "/lib/ld-linux.so.1", 963 .sysvec = &elf_linux_sysvec, 964 .interp_newpath = NULL, 965 .brand_note = &linux_brandnote, 966 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 967 }; 968 969 static Elf32_Brandinfo linux_glibc2brand = { 970 .brand = ELFOSABI_LINUX, 971 .machine = EM_386, 972 .compat_3_brand = "Linux", 973 .emul_path = linux_emul_path, 974 .interp_path = "/lib/ld-linux.so.2", 975 .sysvec = &elf_linux_sysvec, 976 .interp_newpath = NULL, 977 .brand_note = &linux_brandnote, 978 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 979 }; 980 981 static Elf32_Brandinfo linux_muslbrand = { 982 .brand = ELFOSABI_LINUX, 983 .machine = EM_386, 984 .compat_3_brand = "Linux", 985 .emul_path = linux_emul_path, 986 .interp_path = "/lib/ld-musl-i386.so.1", 987 .sysvec = &elf_linux_sysvec, 988 .interp_newpath = NULL, 989 .brand_note = &linux_brandnote, 990 .flags = BI_CAN_EXEC_DYN | BI_BRAND_NOTE 991 }; 992 993 Elf32_Brandinfo *linux_brandlist[] = { 994 &linux_brand, 995 &linux_glibc2brand, 996 &linux_muslbrand, 997 NULL 998 }; 999 1000 static int 1001 linux_elf_modevent(module_t mod, int type, void *data) 1002 { 1003 Elf32_Brandinfo **brandinfo; 1004 int error; 1005 struct linux_ioctl_handler **lihp; 1006 1007 error = 0; 1008 1009 switch(type) { 1010 case MOD_LOAD: 1011 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1012 ++brandinfo) 1013 if (elf32_insert_brand_entry(*brandinfo) < 0) 1014 error = EINVAL; 1015 if (error == 0) { 1016 SET_FOREACH(lihp, linux_ioctl_handler_set) 1017 linux_ioctl_register_handler(*lihp); 1018 LIST_INIT(&futex_list); 1019 mtx_init(&futex_mtx, "ftllk", NULL, MTX_DEF); 1020 linux_dev_shm_create(); 1021 linux_osd_jail_register(); 1022 stclohz = (stathz ? stathz : hz); 1023 if (bootverbose) 1024 printf("Linux ELF exec handler installed\n"); 1025 } else 1026 printf("cannot insert Linux ELF brand handler\n"); 1027 break; 1028 case MOD_UNLOAD: 1029 for (brandinfo = &linux_brandlist[0]; *brandinfo != NULL; 1030 ++brandinfo) 1031 if (elf32_brand_inuse(*brandinfo)) 1032 error = EBUSY; 1033 if (error == 0) { 1034 for (brandinfo = &linux_brandlist[0]; 1035 *brandinfo != NULL; ++brandinfo) 1036 if (elf32_remove_brand_entry(*brandinfo) < 0) 1037 error = EINVAL; 1038 } 1039 if (error == 0) { 1040 SET_FOREACH(lihp, linux_ioctl_handler_set) 1041 linux_ioctl_unregister_handler(*lihp); 1042 mtx_destroy(&futex_mtx); 1043 linux_dev_shm_destroy(); 1044 linux_osd_jail_deregister(); 1045 if (bootverbose) 1046 printf("Linux ELF exec handler removed\n"); 1047 } else 1048 printf("Could not deinstall ELF interpreter entry\n"); 1049 break; 1050 default: 1051 return (EOPNOTSUPP); 1052 } 1053 return (error); 1054 } 1055 1056 static moduledata_t linux_elf_mod = { 1057 "linuxelf", 1058 linux_elf_modevent, 1059 0 1060 }; 1061 1062 DECLARE_MODULE_TIED(linuxelf, linux_elf_mod, SI_SUB_EXEC, SI_ORDER_ANY); 1063 FEATURE(linux, "Linux 32bit support"); 1064