1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2002 Doug Rabson 4 * Copyright (c) 2000 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_compat.h" 35 36 #include <sys/param.h> 37 #include <sys/kernel.h> 38 #include <sys/systm.h> 39 #include <sys/capsicum.h> 40 #include <sys/file.h> 41 #include <sys/fcntl.h> 42 #include <sys/clock.h> 43 #include <sys/imgact.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysproto.h> 55 #include <sys/unistd.h> 56 #include <sys/wait.h> 57 58 #include <machine/frame.h> 59 #include <machine/pcb.h> 60 #include <machine/psl.h> 61 #include <machine/segments.h> 62 #include <machine/specialreg.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 68 #include <compat/freebsd32/freebsd32_util.h> 69 #include <amd64/linux32/linux.h> 70 #include <amd64/linux32/linux32_proto.h> 71 #include <compat/linux/linux_ipc.h> 72 #include <compat/linux/linux_misc.h> 73 #include <compat/linux/linux_signal.h> 74 #include <compat/linux/linux_util.h> 75 #include <compat/linux/linux_emul.h> 76 77 static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); 78 79 struct l_old_select_argv { 80 l_int nfds; 81 l_uintptr_t readfds; 82 l_uintptr_t writefds; 83 l_uintptr_t exceptfds; 84 l_uintptr_t timeout; 85 } __packed; 86 87 static int linux_mmap_common(struct thread *td, l_uintptr_t addr, 88 l_size_t len, l_int prot, l_int flags, l_int fd, 89 l_loff_t pos); 90 91 static void 92 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 93 { 94 95 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 96 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 97 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 98 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 99 lru->ru_maxrss = ru->ru_maxrss; 100 lru->ru_ixrss = ru->ru_ixrss; 101 lru->ru_idrss = ru->ru_idrss; 102 lru->ru_isrss = ru->ru_isrss; 103 lru->ru_minflt = ru->ru_minflt; 104 lru->ru_majflt = ru->ru_majflt; 105 lru->ru_nswap = ru->ru_nswap; 106 lru->ru_inblock = ru->ru_inblock; 107 lru->ru_oublock = ru->ru_oublock; 108 lru->ru_msgsnd = ru->ru_msgsnd; 109 lru->ru_msgrcv = ru->ru_msgrcv; 110 lru->ru_nsignals = ru->ru_nsignals; 111 lru->ru_nvcsw = ru->ru_nvcsw; 112 lru->ru_nivcsw = ru->ru_nivcsw; 113 } 114 115 int 116 linux_copyout_rusage(struct rusage *ru, void *uaddr) 117 { 118 struct l_rusage lru; 119 120 bsd_to_linux_rusage(ru, &lru); 121 122 return (copyout(&lru, uaddr, sizeof(struct l_rusage))); 123 } 124 125 int 126 linux_execve(struct thread *td, struct linux_execve_args *args) 127 { 128 struct image_args eargs; 129 char *path; 130 int error; 131 132 LCONVPATHEXIST(td, args->path, &path); 133 134 #ifdef DEBUG 135 if (ldebug(execve)) 136 printf(ARGS(execve, "%s"), path); 137 #endif 138 139 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 140 args->argp, args->envp); 141 free(path, M_TEMP); 142 if (error == 0) 143 error = linux_common_execve(td, &eargs); 144 return (error); 145 } 146 147 CTASSERT(sizeof(struct l_iovec32) == 8); 148 149 static int 150 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 151 { 152 struct l_iovec32 iov32; 153 struct iovec *iov; 154 struct uio *uio; 155 uint32_t iovlen; 156 int error, i; 157 158 *uiop = NULL; 159 if (iovcnt > UIO_MAXIOV) 160 return (EINVAL); 161 iovlen = iovcnt * sizeof(struct iovec); 162 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 163 iov = (struct iovec *)(uio + 1); 164 for (i = 0; i < iovcnt; i++) { 165 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 166 if (error) { 167 free(uio, M_IOV); 168 return (error); 169 } 170 iov[i].iov_base = PTRIN(iov32.iov_base); 171 iov[i].iov_len = iov32.iov_len; 172 } 173 uio->uio_iov = iov; 174 uio->uio_iovcnt = iovcnt; 175 uio->uio_segflg = UIO_USERSPACE; 176 uio->uio_offset = -1; 177 uio->uio_resid = 0; 178 for (i = 0; i < iovcnt; i++) { 179 if (iov->iov_len > INT_MAX - uio->uio_resid) { 180 free(uio, M_IOV); 181 return (EINVAL); 182 } 183 uio->uio_resid += iov->iov_len; 184 iov++; 185 } 186 *uiop = uio; 187 return (0); 188 } 189 190 int 191 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 192 int error) 193 { 194 struct l_iovec32 iov32; 195 struct iovec *iov; 196 uint32_t iovlen; 197 int i; 198 199 *iovp = NULL; 200 if (iovcnt > UIO_MAXIOV) 201 return (error); 202 iovlen = iovcnt * sizeof(struct iovec); 203 iov = malloc(iovlen, M_IOV, M_WAITOK); 204 for (i = 0; i < iovcnt; i++) { 205 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 206 if (error) { 207 free(iov, M_IOV); 208 return (error); 209 } 210 iov[i].iov_base = PTRIN(iov32.iov_base); 211 iov[i].iov_len = iov32.iov_len; 212 } 213 *iovp = iov; 214 return(0); 215 216 } 217 218 int 219 linux_readv(struct thread *td, struct linux_readv_args *uap) 220 { 221 struct uio *auio; 222 int error; 223 224 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 225 if (error) 226 return (error); 227 error = kern_readv(td, uap->fd, auio); 228 free(auio, M_IOV); 229 return (error); 230 } 231 232 int 233 linux_writev(struct thread *td, struct linux_writev_args *uap) 234 { 235 struct uio *auio; 236 int error; 237 238 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 239 if (error) 240 return (error); 241 error = kern_writev(td, uap->fd, auio); 242 free(auio, M_IOV); 243 return (error); 244 } 245 246 struct l_ipc_kludge { 247 l_uintptr_t msgp; 248 l_long msgtyp; 249 } __packed; 250 251 int 252 linux_ipc(struct thread *td, struct linux_ipc_args *args) 253 { 254 255 switch (args->what & 0xFFFF) { 256 case LINUX_SEMOP: { 257 struct linux_semop_args a; 258 259 a.semid = args->arg1; 260 a.tsops = args->ptr; 261 a.nsops = args->arg2; 262 return (linux_semop(td, &a)); 263 } 264 case LINUX_SEMGET: { 265 struct linux_semget_args a; 266 267 a.key = args->arg1; 268 a.nsems = args->arg2; 269 a.semflg = args->arg3; 270 return (linux_semget(td, &a)); 271 } 272 case LINUX_SEMCTL: { 273 struct linux_semctl_args a; 274 int error; 275 276 a.semid = args->arg1; 277 a.semnum = args->arg2; 278 a.cmd = args->arg3; 279 error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 280 if (error) 281 return (error); 282 return (linux_semctl(td, &a)); 283 } 284 case LINUX_MSGSND: { 285 struct linux_msgsnd_args a; 286 287 a.msqid = args->arg1; 288 a.msgp = args->ptr; 289 a.msgsz = args->arg2; 290 a.msgflg = args->arg3; 291 return (linux_msgsnd(td, &a)); 292 } 293 case LINUX_MSGRCV: { 294 struct linux_msgrcv_args a; 295 296 a.msqid = args->arg1; 297 a.msgsz = args->arg2; 298 a.msgflg = args->arg3; 299 if ((args->what >> 16) == 0) { 300 struct l_ipc_kludge tmp; 301 int error; 302 303 if (args->ptr == 0) 304 return (EINVAL); 305 error = copyin(args->ptr, &tmp, sizeof(tmp)); 306 if (error) 307 return (error); 308 a.msgp = PTRIN(tmp.msgp); 309 a.msgtyp = tmp.msgtyp; 310 } else { 311 a.msgp = args->ptr; 312 a.msgtyp = args->arg5; 313 } 314 return (linux_msgrcv(td, &a)); 315 } 316 case LINUX_MSGGET: { 317 struct linux_msgget_args a; 318 319 a.key = args->arg1; 320 a.msgflg = args->arg2; 321 return (linux_msgget(td, &a)); 322 } 323 case LINUX_MSGCTL: { 324 struct linux_msgctl_args a; 325 326 a.msqid = args->arg1; 327 a.cmd = args->arg2; 328 a.buf = args->ptr; 329 return (linux_msgctl(td, &a)); 330 } 331 case LINUX_SHMAT: { 332 struct linux_shmat_args a; 333 334 a.shmid = args->arg1; 335 a.shmaddr = args->ptr; 336 a.shmflg = args->arg2; 337 a.raddr = PTRIN((l_uint)args->arg3); 338 return (linux_shmat(td, &a)); 339 } 340 case LINUX_SHMDT: { 341 struct linux_shmdt_args a; 342 343 a.shmaddr = args->ptr; 344 return (linux_shmdt(td, &a)); 345 } 346 case LINUX_SHMGET: { 347 struct linux_shmget_args a; 348 349 a.key = args->arg1; 350 a.size = args->arg2; 351 a.shmflg = args->arg3; 352 return (linux_shmget(td, &a)); 353 } 354 case LINUX_SHMCTL: { 355 struct linux_shmctl_args a; 356 357 a.shmid = args->arg1; 358 a.cmd = args->arg2; 359 a.buf = args->ptr; 360 return (linux_shmctl(td, &a)); 361 } 362 default: 363 break; 364 } 365 366 return (EINVAL); 367 } 368 369 int 370 linux_old_select(struct thread *td, struct linux_old_select_args *args) 371 { 372 struct l_old_select_argv linux_args; 373 struct linux_select_args newsel; 374 int error; 375 376 #ifdef DEBUG 377 if (ldebug(old_select)) 378 printf(ARGS(old_select, "%p"), args->ptr); 379 #endif 380 381 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 382 if (error) 383 return (error); 384 385 newsel.nfds = linux_args.nfds; 386 newsel.readfds = PTRIN(linux_args.readfds); 387 newsel.writefds = PTRIN(linux_args.writefds); 388 newsel.exceptfds = PTRIN(linux_args.exceptfds); 389 newsel.timeout = PTRIN(linux_args.timeout); 390 return (linux_select(td, &newsel)); 391 } 392 393 int 394 linux_set_cloned_tls(struct thread *td, void *desc) 395 { 396 struct user_segment_descriptor sd; 397 struct l_user_desc info; 398 struct pcb *pcb; 399 int error; 400 int a[2]; 401 402 error = copyin(desc, &info, sizeof(struct l_user_desc)); 403 if (error) { 404 printf(LMSG("copyin failed!")); 405 } else { 406 /* We might copy out the entry_number as GUGS32_SEL. */ 407 info.entry_number = GUGS32_SEL; 408 error = copyout(&info, desc, sizeof(struct l_user_desc)); 409 if (error) 410 printf(LMSG("copyout failed!")); 411 412 a[0] = LINUX_LDT_entry_a(&info); 413 a[1] = LINUX_LDT_entry_b(&info); 414 415 memcpy(&sd, &a, sizeof(a)); 416 #ifdef DEBUG 417 if (ldebug(clone)) 418 printf("Segment created in clone with " 419 "CLONE_SETTLS: lobase: %x, hibase: %x, " 420 "lolimit: %x, hilimit: %x, type: %i, " 421 "dpl: %i, p: %i, xx: %i, long: %i, " 422 "def32: %i, gran: %i\n", sd.sd_lobase, 423 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 424 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 425 sd.sd_long, sd.sd_def32, sd.sd_gran); 426 #endif 427 pcb = td->td_pcb; 428 pcb->pcb_gsbase = (register_t)info.base_addr; 429 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 430 set_pcb_flags(pcb, PCB_32BIT); 431 } 432 433 return (error); 434 } 435 436 int 437 linux_set_upcall_kse(struct thread *td, register_t stack) 438 { 439 440 if (stack) 441 td->td_frame->tf_rsp = stack; 442 443 /* 444 * The newly created Linux thread returns 445 * to the user space by the same path that a parent do. 446 */ 447 td->td_frame->tf_rax = 0; 448 return (0); 449 } 450 451 #define STACK_SIZE (2 * 1024 * 1024) 452 #define GUARD_SIZE (4 * PAGE_SIZE) 453 454 int 455 linux_mmap2(struct thread *td, struct linux_mmap2_args *args) 456 { 457 458 #ifdef DEBUG 459 if (ldebug(mmap2)) 460 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 461 args->addr, args->len, args->prot, 462 args->flags, args->fd, args->pgoff); 463 #endif 464 465 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 466 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 467 PAGE_SIZE)); 468 } 469 470 int 471 linux_mmap(struct thread *td, struct linux_mmap_args *args) 472 { 473 int error; 474 struct l_mmap_argv linux_args; 475 476 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 477 if (error) 478 return (error); 479 480 #ifdef DEBUG 481 if (ldebug(mmap)) 482 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 483 linux_args.addr, linux_args.len, linux_args.prot, 484 linux_args.flags, linux_args.fd, linux_args.pgoff); 485 #endif 486 487 return (linux_mmap_common(td, linux_args.addr, linux_args.len, 488 linux_args.prot, linux_args.flags, linux_args.fd, 489 (uint32_t)linux_args.pgoff)); 490 } 491 492 static int 493 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 494 l_int flags, l_int fd, l_loff_t pos) 495 { 496 struct proc *p = td->td_proc; 497 struct mmap_args /* { 498 caddr_t addr; 499 size_t len; 500 int prot; 501 int flags; 502 int fd; 503 long pad; 504 off_t pos; 505 } */ bsd_args; 506 int error; 507 struct file *fp; 508 cap_rights_t rights; 509 510 error = 0; 511 bsd_args.flags = 0; 512 fp = NULL; 513 514 /* 515 * Linux mmap(2): 516 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 517 */ 518 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 519 return (EINVAL); 520 521 if (flags & LINUX_MAP_SHARED) 522 bsd_args.flags |= MAP_SHARED; 523 if (flags & LINUX_MAP_PRIVATE) 524 bsd_args.flags |= MAP_PRIVATE; 525 if (flags & LINUX_MAP_FIXED) 526 bsd_args.flags |= MAP_FIXED; 527 if (flags & LINUX_MAP_ANON) { 528 /* Enforce pos to be on page boundary, then ignore. */ 529 if ((pos & PAGE_MASK) != 0) 530 return (EINVAL); 531 pos = 0; 532 bsd_args.flags |= MAP_ANON; 533 } else 534 bsd_args.flags |= MAP_NOSYNC; 535 if (flags & LINUX_MAP_GROWSDOWN) 536 bsd_args.flags |= MAP_STACK; 537 538 /* 539 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 540 * on Linux/i386. We do this to ensure maximum compatibility. 541 * Linux/ia64 does the same in i386 emulation mode. 542 */ 543 bsd_args.prot = prot; 544 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 545 bsd_args.prot |= PROT_READ | PROT_EXEC; 546 547 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 548 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 549 if (bsd_args.fd != -1) { 550 /* 551 * Linux follows Solaris mmap(2) description: 552 * The file descriptor fildes is opened with 553 * read permission, regardless of the 554 * protection options specified. 555 */ 556 557 error = fget(td, bsd_args.fd, 558 cap_rights_init(&rights, CAP_MMAP), &fp); 559 if (error != 0) 560 return (error); 561 if (fp->f_type != DTYPE_VNODE) { 562 fdrop(fp, td); 563 return (EINVAL); 564 } 565 566 /* Linux mmap() just fails for O_WRONLY files */ 567 if (!(fp->f_flag & FREAD)) { 568 fdrop(fp, td); 569 return (EACCES); 570 } 571 572 fdrop(fp, td); 573 } 574 575 if (flags & LINUX_MAP_GROWSDOWN) { 576 /* 577 * The Linux MAP_GROWSDOWN option does not limit auto 578 * growth of the region. Linux mmap with this option 579 * takes as addr the initial BOS, and as len, the initial 580 * region size. It can then grow down from addr without 581 * limit. However, Linux threads has an implicit internal 582 * limit to stack size of STACK_SIZE. Its just not 583 * enforced explicitly in Linux. But, here we impose 584 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 585 * region, since we can do this with our mmap. 586 * 587 * Our mmap with MAP_STACK takes addr as the maximum 588 * downsize limit on BOS, and as len the max size of 589 * the region. It then maps the top SGROWSIZ bytes, 590 * and auto grows the region down, up to the limit 591 * in addr. 592 * 593 * If we don't use the MAP_STACK option, the effect 594 * of this code is to allocate a stack region of a 595 * fixed size of (STACK_SIZE - GUARD_SIZE). 596 */ 597 598 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 599 /* 600 * Some Linux apps will attempt to mmap 601 * thread stacks near the top of their 602 * address space. If their TOS is greater 603 * than vm_maxsaddr, vm_map_growstack() 604 * will confuse the thread stack with the 605 * process stack and deliver a SEGV if they 606 * attempt to grow the thread stack past their 607 * current stacksize rlimit. To avoid this, 608 * adjust vm_maxsaddr upwards to reflect 609 * the current stacksize rlimit rather 610 * than the maximum possible stacksize. 611 * It would be better to adjust the 612 * mmap'ed region, but some apps do not check 613 * mmap's return value. 614 */ 615 PROC_LOCK(p); 616 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 617 lim_cur_proc(p, RLIMIT_STACK); 618 PROC_UNLOCK(p); 619 } 620 621 /* 622 * This gives us our maximum stack size and a new BOS. 623 * If we're using VM_STACK, then mmap will just map 624 * the top SGROWSIZ bytes, and let the stack grow down 625 * to the limit at BOS. If we're not using VM_STACK 626 * we map the full stack, since we don't have a way 627 * to autogrow it. 628 */ 629 if (len > STACK_SIZE - GUARD_SIZE) { 630 bsd_args.addr = (caddr_t)PTRIN(addr); 631 bsd_args.len = len; 632 } else { 633 bsd_args.addr = (caddr_t)PTRIN(addr) - 634 (STACK_SIZE - GUARD_SIZE - len); 635 bsd_args.len = STACK_SIZE - GUARD_SIZE; 636 } 637 } else { 638 bsd_args.addr = (caddr_t)PTRIN(addr); 639 bsd_args.len = len; 640 } 641 bsd_args.pos = pos; 642 643 #ifdef DEBUG 644 if (ldebug(mmap)) 645 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 646 __func__, 647 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 648 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 649 #endif 650 error = sys_mmap(td, &bsd_args); 651 #ifdef DEBUG 652 if (ldebug(mmap)) 653 printf("-> %s() return: 0x%x (0x%08x)\n", 654 __func__, error, (u_int)td->td_retval[0]); 655 #endif 656 return (error); 657 } 658 659 int 660 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 661 { 662 struct mprotect_args bsd_args; 663 664 bsd_args.addr = uap->addr; 665 bsd_args.len = uap->len; 666 bsd_args.prot = uap->prot; 667 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 668 bsd_args.prot |= PROT_READ | PROT_EXEC; 669 return (sys_mprotect(td, &bsd_args)); 670 } 671 672 int 673 linux_iopl(struct thread *td, struct linux_iopl_args *args) 674 { 675 int error; 676 677 if (args->level < 0 || args->level > 3) 678 return (EINVAL); 679 if ((error = priv_check(td, PRIV_IO)) != 0) 680 return (error); 681 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 682 return (error); 683 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 684 (args->level * (PSL_IOPL / 3)); 685 686 return (0); 687 } 688 689 int 690 linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 691 { 692 l_osigaction_t osa; 693 l_sigaction_t act, oact; 694 int error; 695 696 #ifdef DEBUG 697 if (ldebug(sigaction)) 698 printf(ARGS(sigaction, "%d, %p, %p"), 699 args->sig, (void *)args->nsa, (void *)args->osa); 700 #endif 701 702 if (args->nsa != NULL) { 703 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 704 if (error) 705 return (error); 706 act.lsa_handler = osa.lsa_handler; 707 act.lsa_flags = osa.lsa_flags; 708 act.lsa_restorer = osa.lsa_restorer; 709 LINUX_SIGEMPTYSET(act.lsa_mask); 710 act.lsa_mask.__mask = osa.lsa_mask; 711 } 712 713 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 714 args->osa ? &oact : NULL); 715 716 if (args->osa != NULL && !error) { 717 osa.lsa_handler = oact.lsa_handler; 718 osa.lsa_flags = oact.lsa_flags; 719 osa.lsa_restorer = oact.lsa_restorer; 720 osa.lsa_mask = oact.lsa_mask.__mask; 721 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 722 } 723 724 return (error); 725 } 726 727 /* 728 * Linux has two extra args, restart and oldmask. We don't use these, 729 * but it seems that "restart" is actually a context pointer that 730 * enables the signal to happen with a different register set. 731 */ 732 int 733 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 734 { 735 sigset_t sigmask; 736 l_sigset_t mask; 737 738 #ifdef DEBUG 739 if (ldebug(sigsuspend)) 740 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 741 #endif 742 743 LINUX_SIGEMPTYSET(mask); 744 mask.__mask = args->mask; 745 linux_to_bsd_sigset(&mask, &sigmask); 746 return (kern_sigsuspend(td, sigmask)); 747 } 748 749 int 750 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 751 { 752 l_sigset_t lmask; 753 sigset_t sigmask; 754 int error; 755 756 #ifdef DEBUG 757 if (ldebug(rt_sigsuspend)) 758 printf(ARGS(rt_sigsuspend, "%p, %d"), 759 (void *)uap->newset, uap->sigsetsize); 760 #endif 761 762 if (uap->sigsetsize != sizeof(l_sigset_t)) 763 return (EINVAL); 764 765 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 766 if (error) 767 return (error); 768 769 linux_to_bsd_sigset(&lmask, &sigmask); 770 return (kern_sigsuspend(td, sigmask)); 771 } 772 773 int 774 linux_pause(struct thread *td, struct linux_pause_args *args) 775 { 776 struct proc *p = td->td_proc; 777 sigset_t sigmask; 778 779 #ifdef DEBUG 780 if (ldebug(pause)) 781 printf(ARGS(pause, "")); 782 #endif 783 784 PROC_LOCK(p); 785 sigmask = td->td_sigmask; 786 PROC_UNLOCK(p); 787 return (kern_sigsuspend(td, sigmask)); 788 } 789 790 int 791 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 792 { 793 stack_t ss, oss; 794 l_stack_t lss; 795 int error; 796 797 #ifdef DEBUG 798 if (ldebug(sigaltstack)) 799 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 800 #endif 801 802 if (uap->uss != NULL) { 803 error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 804 if (error) 805 return (error); 806 807 ss.ss_sp = PTRIN(lss.ss_sp); 808 ss.ss_size = lss.ss_size; 809 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 810 } 811 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 812 (uap->uoss != NULL) ? &oss : NULL); 813 if (!error && uap->uoss != NULL) { 814 lss.ss_sp = PTROUT(oss.ss_sp); 815 lss.ss_size = oss.ss_size; 816 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 817 error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 818 } 819 820 return (error); 821 } 822 823 int 824 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 825 { 826 struct ftruncate_args sa; 827 828 #ifdef DEBUG 829 if (ldebug(ftruncate64)) 830 printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 831 (intmax_t)args->length); 832 #endif 833 834 sa.fd = args->fd; 835 sa.length = args->length; 836 return sys_ftruncate(td, &sa); 837 } 838 839 int 840 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 841 { 842 struct timeval atv; 843 l_timeval atv32; 844 struct timezone rtz; 845 int error = 0; 846 847 if (uap->tp) { 848 microtime(&atv); 849 atv32.tv_sec = atv.tv_sec; 850 atv32.tv_usec = atv.tv_usec; 851 error = copyout(&atv32, uap->tp, sizeof(atv32)); 852 } 853 if (error == 0 && uap->tzp != NULL) { 854 rtz.tz_minuteswest = tz_minuteswest; 855 rtz.tz_dsttime = tz_dsttime; 856 error = copyout(&rtz, uap->tzp, sizeof(rtz)); 857 } 858 return (error); 859 } 860 861 int 862 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 863 { 864 l_timeval atv32; 865 struct timeval atv, *tvp; 866 struct timezone atz, *tzp; 867 int error; 868 869 if (uap->tp) { 870 error = copyin(uap->tp, &atv32, sizeof(atv32)); 871 if (error) 872 return (error); 873 atv.tv_sec = atv32.tv_sec; 874 atv.tv_usec = atv32.tv_usec; 875 tvp = &atv; 876 } else 877 tvp = NULL; 878 if (uap->tzp) { 879 error = copyin(uap->tzp, &atz, sizeof(atz)); 880 if (error) 881 return (error); 882 tzp = &atz; 883 } else 884 tzp = NULL; 885 return (kern_settimeofday(td, tvp, tzp)); 886 } 887 888 int 889 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 890 { 891 struct rusage s; 892 int error; 893 894 error = kern_getrusage(td, uap->who, &s); 895 if (error != 0) 896 return (error); 897 if (uap->rusage != NULL) 898 error = linux_copyout_rusage(&s, uap->rusage); 899 return (error); 900 } 901 902 int 903 linux_set_thread_area(struct thread *td, 904 struct linux_set_thread_area_args *args) 905 { 906 struct l_user_desc info; 907 struct user_segment_descriptor sd; 908 struct pcb *pcb; 909 int a[2]; 910 int error; 911 912 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 913 if (error) 914 return (error); 915 916 #ifdef DEBUG 917 if (ldebug(set_thread_area)) 918 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 919 "%i, %i, %i"), info.entry_number, info.base_addr, 920 info.limit, info.seg_32bit, info.contents, 921 info.read_exec_only, info.limit_in_pages, 922 info.seg_not_present, info.useable); 923 #endif 924 925 /* 926 * Semantics of Linux version: every thread in the system has array 927 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 928 * This syscall loads one of the selected TLS decriptors with a value 929 * and also loads GDT descriptors 6, 7 and 8 with the content of 930 * the per-thread descriptors. 931 * 932 * Semantics of FreeBSD version: I think we can ignore that Linux has 933 * three per-thread descriptors and use just the first one. 934 * The tls_array[] is used only in [gs]et_thread_area() syscalls and 935 * for loading the GDT descriptors. We use just one GDT descriptor 936 * for TLS, so we will load just one. 937 * 938 * XXX: This doesn't work when a user space process tries to use more 939 * than one TLS segment. Comment in the Linux source says wine might 940 * do this. 941 */ 942 943 /* 944 * GLIBC reads current %gs and call set_thread_area() with it. 945 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 946 * we use these segments. 947 */ 948 switch (info.entry_number) { 949 case GUGS32_SEL: 950 case GUDATA_SEL: 951 case 6: 952 case -1: 953 info.entry_number = GUGS32_SEL; 954 break; 955 default: 956 return (EINVAL); 957 } 958 959 /* 960 * We have to copy out the GDT entry we use. 961 * 962 * XXX: What if a user space program does not check the return value 963 * and tries to use 6, 7 or 8? 964 */ 965 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 966 if (error) 967 return (error); 968 969 if (LINUX_LDT_empty(&info)) { 970 a[0] = 0; 971 a[1] = 0; 972 } else { 973 a[0] = LINUX_LDT_entry_a(&info); 974 a[1] = LINUX_LDT_entry_b(&info); 975 } 976 977 memcpy(&sd, &a, sizeof(a)); 978 #ifdef DEBUG 979 if (ldebug(set_thread_area)) 980 printf("Segment created in set_thread_area: " 981 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 982 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 983 "def32: %i, gran: %i\n", 984 sd.sd_lobase, 985 sd.sd_hibase, 986 sd.sd_lolimit, 987 sd.sd_hilimit, 988 sd.sd_type, 989 sd.sd_dpl, 990 sd.sd_p, 991 sd.sd_xx, 992 sd.sd_long, 993 sd.sd_def32, 994 sd.sd_gran); 995 #endif 996 997 pcb = td->td_pcb; 998 pcb->pcb_gsbase = (register_t)info.base_addr; 999 set_pcb_flags(pcb, PCB_32BIT); 1000 update_gdt_gsbase(td, info.base_addr); 1001 1002 return (0); 1003 } 1004