1 /*- 2 * Copyright (c) 2004 Tim J. Robbins 3 * Copyright (c) 2002 Doug Rabson 4 * Copyright (c) 2000 Marcel Moolenaar 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer 12 * in this position and unchanged. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 3. The name of the author may not be used to endorse or promote products 17 * derived from this software without specific prior written permission. 18 * 19 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 20 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 21 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 22 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 23 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 24 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 28 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include "opt_compat.h" 35 36 #include <sys/param.h> 37 #include <sys/kernel.h> 38 #include <sys/systm.h> 39 #include <sys/capsicum.h> 40 #include <sys/file.h> 41 #include <sys/fcntl.h> 42 #include <sys/clock.h> 43 #include <sys/imgact.h> 44 #include <sys/limits.h> 45 #include <sys/lock.h> 46 #include <sys/malloc.h> 47 #include <sys/mman.h> 48 #include <sys/mutex.h> 49 #include <sys/priv.h> 50 #include <sys/proc.h> 51 #include <sys/resource.h> 52 #include <sys/resourcevar.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysproto.h> 55 #include <sys/unistd.h> 56 #include <sys/wait.h> 57 58 #include <machine/frame.h> 59 #include <machine/pcb.h> 60 #include <machine/psl.h> 61 #include <machine/segments.h> 62 #include <machine/specialreg.h> 63 64 #include <vm/vm.h> 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 68 #include <compat/freebsd32/freebsd32_util.h> 69 #include <amd64/linux32/linux.h> 70 #include <amd64/linux32/linux32_proto.h> 71 #include <compat/linux/linux_ipc.h> 72 #include <compat/linux/linux_misc.h> 73 #include <compat/linux/linux_signal.h> 74 #include <compat/linux/linux_util.h> 75 #include <compat/linux/linux_emul.h> 76 77 static void bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru); 78 79 struct l_old_select_argv { 80 l_int nfds; 81 l_uintptr_t readfds; 82 l_uintptr_t writefds; 83 l_uintptr_t exceptfds; 84 l_uintptr_t timeout; 85 } __packed; 86 87 static int linux_mmap_common(struct thread *td, l_uintptr_t addr, 88 l_size_t len, l_int prot, l_int flags, l_int fd, 89 l_loff_t pos); 90 91 static void 92 bsd_to_linux_rusage(struct rusage *ru, struct l_rusage *lru) 93 { 94 95 lru->ru_utime.tv_sec = ru->ru_utime.tv_sec; 96 lru->ru_utime.tv_usec = ru->ru_utime.tv_usec; 97 lru->ru_stime.tv_sec = ru->ru_stime.tv_sec; 98 lru->ru_stime.tv_usec = ru->ru_stime.tv_usec; 99 lru->ru_maxrss = ru->ru_maxrss; 100 lru->ru_ixrss = ru->ru_ixrss; 101 lru->ru_idrss = ru->ru_idrss; 102 lru->ru_isrss = ru->ru_isrss; 103 lru->ru_minflt = ru->ru_minflt; 104 lru->ru_majflt = ru->ru_majflt; 105 lru->ru_nswap = ru->ru_nswap; 106 lru->ru_inblock = ru->ru_inblock; 107 lru->ru_oublock = ru->ru_oublock; 108 lru->ru_msgsnd = ru->ru_msgsnd; 109 lru->ru_msgrcv = ru->ru_msgrcv; 110 lru->ru_nsignals = ru->ru_nsignals; 111 lru->ru_nvcsw = ru->ru_nvcsw; 112 lru->ru_nivcsw = ru->ru_nivcsw; 113 } 114 115 int 116 linux_copyout_rusage(struct rusage *ru, void *uaddr) 117 { 118 struct l_rusage lru; 119 120 bsd_to_linux_rusage(ru, &lru); 121 122 return (copyout(&lru, uaddr, sizeof(struct l_rusage))); 123 } 124 125 int 126 linux_execve(struct thread *td, struct linux_execve_args *args) 127 { 128 struct image_args eargs; 129 char *path; 130 int error; 131 132 LCONVPATHEXIST(td, args->path, &path); 133 134 #ifdef DEBUG 135 if (ldebug(execve)) 136 printf(ARGS(execve, "%s"), path); 137 #endif 138 139 error = freebsd32_exec_copyin_args(&eargs, path, UIO_SYSSPACE, 140 args->argp, args->envp); 141 free(path, M_TEMP); 142 if (error == 0) 143 error = linux_common_execve(td, &eargs); 144 return (error); 145 } 146 147 CTASSERT(sizeof(struct l_iovec32) == 8); 148 149 static int 150 linux32_copyinuio(struct l_iovec32 *iovp, l_ulong iovcnt, struct uio **uiop) 151 { 152 struct l_iovec32 iov32; 153 struct iovec *iov; 154 struct uio *uio; 155 uint32_t iovlen; 156 int error, i; 157 158 *uiop = NULL; 159 if (iovcnt > UIO_MAXIOV) 160 return (EINVAL); 161 iovlen = iovcnt * sizeof(struct iovec); 162 uio = malloc(iovlen + sizeof(*uio), M_IOV, M_WAITOK); 163 iov = (struct iovec *)(uio + 1); 164 for (i = 0; i < iovcnt; i++) { 165 error = copyin(&iovp[i], &iov32, sizeof(struct l_iovec32)); 166 if (error) { 167 free(uio, M_IOV); 168 return (error); 169 } 170 iov[i].iov_base = PTRIN(iov32.iov_base); 171 iov[i].iov_len = iov32.iov_len; 172 } 173 uio->uio_iov = iov; 174 uio->uio_iovcnt = iovcnt; 175 uio->uio_segflg = UIO_USERSPACE; 176 uio->uio_offset = -1; 177 uio->uio_resid = 0; 178 for (i = 0; i < iovcnt; i++) { 179 if (iov->iov_len > INT_MAX - uio->uio_resid) { 180 free(uio, M_IOV); 181 return (EINVAL); 182 } 183 uio->uio_resid += iov->iov_len; 184 iov++; 185 } 186 *uiop = uio; 187 return (0); 188 } 189 190 int 191 linux32_copyiniov(struct l_iovec32 *iovp32, l_ulong iovcnt, struct iovec **iovp, 192 int error) 193 { 194 struct l_iovec32 iov32; 195 struct iovec *iov; 196 uint32_t iovlen; 197 int i; 198 199 *iovp = NULL; 200 if (iovcnt > UIO_MAXIOV) 201 return (error); 202 iovlen = iovcnt * sizeof(struct iovec); 203 iov = malloc(iovlen, M_IOV, M_WAITOK); 204 for (i = 0; i < iovcnt; i++) { 205 error = copyin(&iovp32[i], &iov32, sizeof(struct l_iovec32)); 206 if (error) { 207 free(iov, M_IOV); 208 return (error); 209 } 210 iov[i].iov_base = PTRIN(iov32.iov_base); 211 iov[i].iov_len = iov32.iov_len; 212 } 213 *iovp = iov; 214 return(0); 215 216 } 217 218 int 219 linux_readv(struct thread *td, struct linux_readv_args *uap) 220 { 221 struct uio *auio; 222 int error; 223 224 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 225 if (error) 226 return (error); 227 error = kern_readv(td, uap->fd, auio); 228 free(auio, M_IOV); 229 return (error); 230 } 231 232 int 233 linux_writev(struct thread *td, struct linux_writev_args *uap) 234 { 235 struct uio *auio; 236 int error; 237 238 error = linux32_copyinuio(uap->iovp, uap->iovcnt, &auio); 239 if (error) 240 return (error); 241 error = kern_writev(td, uap->fd, auio); 242 free(auio, M_IOV); 243 return (error); 244 } 245 246 struct l_ipc_kludge { 247 l_uintptr_t msgp; 248 l_long msgtyp; 249 } __packed; 250 251 int 252 linux_ipc(struct thread *td, struct linux_ipc_args *args) 253 { 254 255 switch (args->what & 0xFFFF) { 256 case LINUX_SEMOP: { 257 struct linux_semop_args a; 258 259 a.semid = args->arg1; 260 a.tsops = args->ptr; 261 a.nsops = args->arg2; 262 return (linux_semop(td, &a)); 263 } 264 case LINUX_SEMGET: { 265 struct linux_semget_args a; 266 267 a.key = args->arg1; 268 a.nsems = args->arg2; 269 a.semflg = args->arg3; 270 return (linux_semget(td, &a)); 271 } 272 case LINUX_SEMCTL: { 273 struct linux_semctl_args a; 274 int error; 275 276 a.semid = args->arg1; 277 a.semnum = args->arg2; 278 a.cmd = args->arg3; 279 error = copyin(args->ptr, &a.arg, sizeof(a.arg)); 280 if (error) 281 return (error); 282 return (linux_semctl(td, &a)); 283 } 284 case LINUX_MSGSND: { 285 struct linux_msgsnd_args a; 286 287 a.msqid = args->arg1; 288 a.msgp = args->ptr; 289 a.msgsz = args->arg2; 290 a.msgflg = args->arg3; 291 return (linux_msgsnd(td, &a)); 292 } 293 case LINUX_MSGRCV: { 294 struct linux_msgrcv_args a; 295 296 a.msqid = args->arg1; 297 a.msgsz = args->arg2; 298 a.msgflg = args->arg3; 299 if ((args->what >> 16) == 0) { 300 struct l_ipc_kludge tmp; 301 int error; 302 303 if (args->ptr == 0) 304 return (EINVAL); 305 error = copyin(args->ptr, &tmp, sizeof(tmp)); 306 if (error) 307 return (error); 308 a.msgp = PTRIN(tmp.msgp); 309 a.msgtyp = tmp.msgtyp; 310 } else { 311 a.msgp = args->ptr; 312 a.msgtyp = args->arg5; 313 } 314 return (linux_msgrcv(td, &a)); 315 } 316 case LINUX_MSGGET: { 317 struct linux_msgget_args a; 318 319 a.key = args->arg1; 320 a.msgflg = args->arg2; 321 return (linux_msgget(td, &a)); 322 } 323 case LINUX_MSGCTL: { 324 struct linux_msgctl_args a; 325 326 a.msqid = args->arg1; 327 a.cmd = args->arg2; 328 a.buf = args->ptr; 329 return (linux_msgctl(td, &a)); 330 } 331 case LINUX_SHMAT: { 332 struct linux_shmat_args a; 333 334 a.shmid = args->arg1; 335 a.shmaddr = args->ptr; 336 a.shmflg = args->arg2; 337 a.raddr = PTRIN((l_uint)args->arg3); 338 return (linux_shmat(td, &a)); 339 } 340 case LINUX_SHMDT: { 341 struct linux_shmdt_args a; 342 343 a.shmaddr = args->ptr; 344 return (linux_shmdt(td, &a)); 345 } 346 case LINUX_SHMGET: { 347 struct linux_shmget_args a; 348 349 a.key = args->arg1; 350 a.size = args->arg2; 351 a.shmflg = args->arg3; 352 return (linux_shmget(td, &a)); 353 } 354 case LINUX_SHMCTL: { 355 struct linux_shmctl_args a; 356 357 a.shmid = args->arg1; 358 a.cmd = args->arg2; 359 a.buf = args->ptr; 360 return (linux_shmctl(td, &a)); 361 } 362 default: 363 break; 364 } 365 366 return (EINVAL); 367 } 368 369 int 370 linux_old_select(struct thread *td, struct linux_old_select_args *args) 371 { 372 struct l_old_select_argv linux_args; 373 struct linux_select_args newsel; 374 int error; 375 376 #ifdef DEBUG 377 if (ldebug(old_select)) 378 printf(ARGS(old_select, "%p"), args->ptr); 379 #endif 380 381 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 382 if (error) 383 return (error); 384 385 newsel.nfds = linux_args.nfds; 386 newsel.readfds = PTRIN(linux_args.readfds); 387 newsel.writefds = PTRIN(linux_args.writefds); 388 newsel.exceptfds = PTRIN(linux_args.exceptfds); 389 newsel.timeout = PTRIN(linux_args.timeout); 390 return (linux_select(td, &newsel)); 391 } 392 393 int 394 linux_set_cloned_tls(struct thread *td, void *desc) 395 { 396 struct user_segment_descriptor sd; 397 struct l_user_desc info; 398 struct pcb *pcb; 399 int error; 400 int a[2]; 401 402 error = copyin(desc, &info, sizeof(struct l_user_desc)); 403 if (error) { 404 printf(LMSG("copyin failed!")); 405 } else { 406 /* We might copy out the entry_number as GUGS32_SEL. */ 407 info.entry_number = GUGS32_SEL; 408 error = copyout(&info, desc, sizeof(struct l_user_desc)); 409 if (error) 410 printf(LMSG("copyout failed!")); 411 412 a[0] = LINUX_LDT_entry_a(&info); 413 a[1] = LINUX_LDT_entry_b(&info); 414 415 memcpy(&sd, &a, sizeof(a)); 416 #ifdef DEBUG 417 if (ldebug(clone)) 418 printf("Segment created in clone with " 419 "CLONE_SETTLS: lobase: %x, hibase: %x, " 420 "lolimit: %x, hilimit: %x, type: %i, " 421 "dpl: %i, p: %i, xx: %i, long: %i, " 422 "def32: %i, gran: %i\n", sd.sd_lobase, 423 sd.sd_hibase, sd.sd_lolimit, sd.sd_hilimit, 424 sd.sd_type, sd.sd_dpl, sd.sd_p, sd.sd_xx, 425 sd.sd_long, sd.sd_def32, sd.sd_gran); 426 #endif 427 pcb = td->td_pcb; 428 pcb->pcb_gsbase = (register_t)info.base_addr; 429 /* XXXKIB pcb->pcb_gs32sd = sd; */ 430 td->td_frame->tf_gs = GSEL(GUGS32_SEL, SEL_UPL); 431 set_pcb_flags(pcb, PCB_32BIT); 432 } 433 434 return (error); 435 } 436 437 int 438 linux_set_upcall_kse(struct thread *td, register_t stack) 439 { 440 441 if (stack) 442 td->td_frame->tf_rsp = stack; 443 444 /* 445 * The newly created Linux thread returns 446 * to the user space by the same path that a parent do. 447 */ 448 td->td_frame->tf_rax = 0; 449 return (0); 450 } 451 452 #define STACK_SIZE (2 * 1024 * 1024) 453 #define GUARD_SIZE (4 * PAGE_SIZE) 454 455 int 456 linux_mmap2(struct thread *td, struct linux_mmap2_args *args) 457 { 458 459 #ifdef DEBUG 460 if (ldebug(mmap2)) 461 printf(ARGS(mmap2, "0x%08x, %d, %d, 0x%08x, %d, %d"), 462 args->addr, args->len, args->prot, 463 args->flags, args->fd, args->pgoff); 464 #endif 465 466 return (linux_mmap_common(td, PTROUT(args->addr), args->len, args->prot, 467 args->flags, args->fd, (uint64_t)(uint32_t)args->pgoff * 468 PAGE_SIZE)); 469 } 470 471 int 472 linux_mmap(struct thread *td, struct linux_mmap_args *args) 473 { 474 int error; 475 struct l_mmap_argv linux_args; 476 477 error = copyin(args->ptr, &linux_args, sizeof(linux_args)); 478 if (error) 479 return (error); 480 481 #ifdef DEBUG 482 if (ldebug(mmap)) 483 printf(ARGS(mmap, "0x%08x, %d, %d, 0x%08x, %d, %d"), 484 linux_args.addr, linux_args.len, linux_args.prot, 485 linux_args.flags, linux_args.fd, linux_args.pgoff); 486 #endif 487 488 return (linux_mmap_common(td, linux_args.addr, linux_args.len, 489 linux_args.prot, linux_args.flags, linux_args.fd, 490 (uint32_t)linux_args.pgoff)); 491 } 492 493 static int 494 linux_mmap_common(struct thread *td, l_uintptr_t addr, l_size_t len, l_int prot, 495 l_int flags, l_int fd, l_loff_t pos) 496 { 497 struct proc *p = td->td_proc; 498 struct mmap_args /* { 499 caddr_t addr; 500 size_t len; 501 int prot; 502 int flags; 503 int fd; 504 long pad; 505 off_t pos; 506 } */ bsd_args; 507 int error; 508 struct file *fp; 509 cap_rights_t rights; 510 511 error = 0; 512 bsd_args.flags = 0; 513 fp = NULL; 514 515 /* 516 * Linux mmap(2): 517 * You must specify exactly one of MAP_SHARED and MAP_PRIVATE 518 */ 519 if (!((flags & LINUX_MAP_SHARED) ^ (flags & LINUX_MAP_PRIVATE))) 520 return (EINVAL); 521 522 if (flags & LINUX_MAP_SHARED) 523 bsd_args.flags |= MAP_SHARED; 524 if (flags & LINUX_MAP_PRIVATE) 525 bsd_args.flags |= MAP_PRIVATE; 526 if (flags & LINUX_MAP_FIXED) 527 bsd_args.flags |= MAP_FIXED; 528 if (flags & LINUX_MAP_ANON) { 529 /* Enforce pos to be on page boundary, then ignore. */ 530 if ((pos & PAGE_MASK) != 0) 531 return (EINVAL); 532 pos = 0; 533 bsd_args.flags |= MAP_ANON; 534 } else 535 bsd_args.flags |= MAP_NOSYNC; 536 if (flags & LINUX_MAP_GROWSDOWN) 537 bsd_args.flags |= MAP_STACK; 538 539 /* 540 * PROT_READ, PROT_WRITE, or PROT_EXEC implies PROT_READ and PROT_EXEC 541 * on Linux/i386. We do this to ensure maximum compatibility. 542 * Linux/ia64 does the same in i386 emulation mode. 543 */ 544 bsd_args.prot = prot; 545 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 546 bsd_args.prot |= PROT_READ | PROT_EXEC; 547 548 /* Linux does not check file descriptor when MAP_ANONYMOUS is set. */ 549 bsd_args.fd = (bsd_args.flags & MAP_ANON) ? -1 : fd; 550 if (bsd_args.fd != -1) { 551 /* 552 * Linux follows Solaris mmap(2) description: 553 * The file descriptor fildes is opened with 554 * read permission, regardless of the 555 * protection options specified. 556 */ 557 558 error = fget(td, bsd_args.fd, 559 cap_rights_init(&rights, CAP_MMAP), &fp); 560 if (error != 0) 561 return (error); 562 if (fp->f_type != DTYPE_VNODE) { 563 fdrop(fp, td); 564 return (EINVAL); 565 } 566 567 /* Linux mmap() just fails for O_WRONLY files */ 568 if (!(fp->f_flag & FREAD)) { 569 fdrop(fp, td); 570 return (EACCES); 571 } 572 573 fdrop(fp, td); 574 } 575 576 if (flags & LINUX_MAP_GROWSDOWN) { 577 /* 578 * The Linux MAP_GROWSDOWN option does not limit auto 579 * growth of the region. Linux mmap with this option 580 * takes as addr the inital BOS, and as len, the initial 581 * region size. It can then grow down from addr without 582 * limit. However, Linux threads has an implicit internal 583 * limit to stack size of STACK_SIZE. Its just not 584 * enforced explicitly in Linux. But, here we impose 585 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 586 * region, since we can do this with our mmap. 587 * 588 * Our mmap with MAP_STACK takes addr as the maximum 589 * downsize limit on BOS, and as len the max size of 590 * the region. It then maps the top SGROWSIZ bytes, 591 * and auto grows the region down, up to the limit 592 * in addr. 593 * 594 * If we don't use the MAP_STACK option, the effect 595 * of this code is to allocate a stack region of a 596 * fixed size of (STACK_SIZE - GUARD_SIZE). 597 */ 598 599 if ((caddr_t)PTRIN(addr) + len > p->p_vmspace->vm_maxsaddr) { 600 /* 601 * Some Linux apps will attempt to mmap 602 * thread stacks near the top of their 603 * address space. If their TOS is greater 604 * than vm_maxsaddr, vm_map_growstack() 605 * will confuse the thread stack with the 606 * process stack and deliver a SEGV if they 607 * attempt to grow the thread stack past their 608 * current stacksize rlimit. To avoid this, 609 * adjust vm_maxsaddr upwards to reflect 610 * the current stacksize rlimit rather 611 * than the maximum possible stacksize. 612 * It would be better to adjust the 613 * mmap'ed region, but some apps do not check 614 * mmap's return value. 615 */ 616 PROC_LOCK(p); 617 p->p_vmspace->vm_maxsaddr = (char *)LINUX32_USRSTACK - 618 lim_cur_proc(p, RLIMIT_STACK); 619 PROC_UNLOCK(p); 620 } 621 622 /* 623 * This gives us our maximum stack size and a new BOS. 624 * If we're using VM_STACK, then mmap will just map 625 * the top SGROWSIZ bytes, and let the stack grow down 626 * to the limit at BOS. If we're not using VM_STACK 627 * we map the full stack, since we don't have a way 628 * to autogrow it. 629 */ 630 if (len > STACK_SIZE - GUARD_SIZE) { 631 bsd_args.addr = (caddr_t)PTRIN(addr); 632 bsd_args.len = len; 633 } else { 634 bsd_args.addr = (caddr_t)PTRIN(addr) - 635 (STACK_SIZE - GUARD_SIZE - len); 636 bsd_args.len = STACK_SIZE - GUARD_SIZE; 637 } 638 } else { 639 bsd_args.addr = (caddr_t)PTRIN(addr); 640 bsd_args.len = len; 641 } 642 bsd_args.pos = pos; 643 644 #ifdef DEBUG 645 if (ldebug(mmap)) 646 printf("-> %s(%p, %d, %d, 0x%08x, %d, 0x%x)\n", 647 __func__, 648 (void *)bsd_args.addr, (int)bsd_args.len, bsd_args.prot, 649 bsd_args.flags, bsd_args.fd, (int)bsd_args.pos); 650 #endif 651 error = sys_mmap(td, &bsd_args); 652 #ifdef DEBUG 653 if (ldebug(mmap)) 654 printf("-> %s() return: 0x%x (0x%08x)\n", 655 __func__, error, (u_int)td->td_retval[0]); 656 #endif 657 return (error); 658 } 659 660 int 661 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 662 { 663 struct mprotect_args bsd_args; 664 665 bsd_args.addr = uap->addr; 666 bsd_args.len = uap->len; 667 bsd_args.prot = uap->prot; 668 if (bsd_args.prot & (PROT_READ | PROT_WRITE | PROT_EXEC)) 669 bsd_args.prot |= PROT_READ | PROT_EXEC; 670 return (sys_mprotect(td, &bsd_args)); 671 } 672 673 int 674 linux_iopl(struct thread *td, struct linux_iopl_args *args) 675 { 676 int error; 677 678 if (args->level < 0 || args->level > 3) 679 return (EINVAL); 680 if ((error = priv_check(td, PRIV_IO)) != 0) 681 return (error); 682 if ((error = securelevel_gt(td->td_ucred, 0)) != 0) 683 return (error); 684 td->td_frame->tf_rflags = (td->td_frame->tf_rflags & ~PSL_IOPL) | 685 (args->level * (PSL_IOPL / 3)); 686 687 return (0); 688 } 689 690 int 691 linux_sigaction(struct thread *td, struct linux_sigaction_args *args) 692 { 693 l_osigaction_t osa; 694 l_sigaction_t act, oact; 695 int error; 696 697 #ifdef DEBUG 698 if (ldebug(sigaction)) 699 printf(ARGS(sigaction, "%d, %p, %p"), 700 args->sig, (void *)args->nsa, (void *)args->osa); 701 #endif 702 703 if (args->nsa != NULL) { 704 error = copyin(args->nsa, &osa, sizeof(l_osigaction_t)); 705 if (error) 706 return (error); 707 act.lsa_handler = osa.lsa_handler; 708 act.lsa_flags = osa.lsa_flags; 709 act.lsa_restorer = osa.lsa_restorer; 710 LINUX_SIGEMPTYSET(act.lsa_mask); 711 act.lsa_mask.__mask = osa.lsa_mask; 712 } 713 714 error = linux_do_sigaction(td, args->sig, args->nsa ? &act : NULL, 715 args->osa ? &oact : NULL); 716 717 if (args->osa != NULL && !error) { 718 osa.lsa_handler = oact.lsa_handler; 719 osa.lsa_flags = oact.lsa_flags; 720 osa.lsa_restorer = oact.lsa_restorer; 721 osa.lsa_mask = oact.lsa_mask.__mask; 722 error = copyout(&osa, args->osa, sizeof(l_osigaction_t)); 723 } 724 725 return (error); 726 } 727 728 /* 729 * Linux has two extra args, restart and oldmask. We don't use these, 730 * but it seems that "restart" is actually a context pointer that 731 * enables the signal to happen with a different register set. 732 */ 733 int 734 linux_sigsuspend(struct thread *td, struct linux_sigsuspend_args *args) 735 { 736 sigset_t sigmask; 737 l_sigset_t mask; 738 739 #ifdef DEBUG 740 if (ldebug(sigsuspend)) 741 printf(ARGS(sigsuspend, "%08lx"), (unsigned long)args->mask); 742 #endif 743 744 LINUX_SIGEMPTYSET(mask); 745 mask.__mask = args->mask; 746 linux_to_bsd_sigset(&mask, &sigmask); 747 return (kern_sigsuspend(td, sigmask)); 748 } 749 750 int 751 linux_rt_sigsuspend(struct thread *td, struct linux_rt_sigsuspend_args *uap) 752 { 753 l_sigset_t lmask; 754 sigset_t sigmask; 755 int error; 756 757 #ifdef DEBUG 758 if (ldebug(rt_sigsuspend)) 759 printf(ARGS(rt_sigsuspend, "%p, %d"), 760 (void *)uap->newset, uap->sigsetsize); 761 #endif 762 763 if (uap->sigsetsize != sizeof(l_sigset_t)) 764 return (EINVAL); 765 766 error = copyin(uap->newset, &lmask, sizeof(l_sigset_t)); 767 if (error) 768 return (error); 769 770 linux_to_bsd_sigset(&lmask, &sigmask); 771 return (kern_sigsuspend(td, sigmask)); 772 } 773 774 int 775 linux_pause(struct thread *td, struct linux_pause_args *args) 776 { 777 struct proc *p = td->td_proc; 778 sigset_t sigmask; 779 780 #ifdef DEBUG 781 if (ldebug(pause)) 782 printf(ARGS(pause, "")); 783 #endif 784 785 PROC_LOCK(p); 786 sigmask = td->td_sigmask; 787 PROC_UNLOCK(p); 788 return (kern_sigsuspend(td, sigmask)); 789 } 790 791 int 792 linux_sigaltstack(struct thread *td, struct linux_sigaltstack_args *uap) 793 { 794 stack_t ss, oss; 795 l_stack_t lss; 796 int error; 797 798 #ifdef DEBUG 799 if (ldebug(sigaltstack)) 800 printf(ARGS(sigaltstack, "%p, %p"), uap->uss, uap->uoss); 801 #endif 802 803 if (uap->uss != NULL) { 804 error = copyin(uap->uss, &lss, sizeof(l_stack_t)); 805 if (error) 806 return (error); 807 808 ss.ss_sp = PTRIN(lss.ss_sp); 809 ss.ss_size = lss.ss_size; 810 ss.ss_flags = linux_to_bsd_sigaltstack(lss.ss_flags); 811 } 812 error = kern_sigaltstack(td, (uap->uss != NULL) ? &ss : NULL, 813 (uap->uoss != NULL) ? &oss : NULL); 814 if (!error && uap->uoss != NULL) { 815 lss.ss_sp = PTROUT(oss.ss_sp); 816 lss.ss_size = oss.ss_size; 817 lss.ss_flags = bsd_to_linux_sigaltstack(oss.ss_flags); 818 error = copyout(&lss, uap->uoss, sizeof(l_stack_t)); 819 } 820 821 return (error); 822 } 823 824 int 825 linux_ftruncate64(struct thread *td, struct linux_ftruncate64_args *args) 826 { 827 struct ftruncate_args sa; 828 829 #ifdef DEBUG 830 if (ldebug(ftruncate64)) 831 printf(ARGS(ftruncate64, "%u, %jd"), args->fd, 832 (intmax_t)args->length); 833 #endif 834 835 sa.fd = args->fd; 836 sa.length = args->length; 837 return sys_ftruncate(td, &sa); 838 } 839 840 int 841 linux_gettimeofday(struct thread *td, struct linux_gettimeofday_args *uap) 842 { 843 struct timeval atv; 844 l_timeval atv32; 845 struct timezone rtz; 846 int error = 0; 847 848 if (uap->tp) { 849 microtime(&atv); 850 atv32.tv_sec = atv.tv_sec; 851 atv32.tv_usec = atv.tv_usec; 852 error = copyout(&atv32, uap->tp, sizeof(atv32)); 853 } 854 if (error == 0 && uap->tzp != NULL) { 855 rtz.tz_minuteswest = tz_minuteswest; 856 rtz.tz_dsttime = tz_dsttime; 857 error = copyout(&rtz, uap->tzp, sizeof(rtz)); 858 } 859 return (error); 860 } 861 862 int 863 linux_settimeofday(struct thread *td, struct linux_settimeofday_args *uap) 864 { 865 l_timeval atv32; 866 struct timeval atv, *tvp; 867 struct timezone atz, *tzp; 868 int error; 869 870 if (uap->tp) { 871 error = copyin(uap->tp, &atv32, sizeof(atv32)); 872 if (error) 873 return (error); 874 atv.tv_sec = atv32.tv_sec; 875 atv.tv_usec = atv32.tv_usec; 876 tvp = &atv; 877 } else 878 tvp = NULL; 879 if (uap->tzp) { 880 error = copyin(uap->tzp, &atz, sizeof(atz)); 881 if (error) 882 return (error); 883 tzp = &atz; 884 } else 885 tzp = NULL; 886 return (kern_settimeofday(td, tvp, tzp)); 887 } 888 889 int 890 linux_getrusage(struct thread *td, struct linux_getrusage_args *uap) 891 { 892 struct rusage s; 893 int error; 894 895 error = kern_getrusage(td, uap->who, &s); 896 if (error != 0) 897 return (error); 898 if (uap->rusage != NULL) 899 error = linux_copyout_rusage(&s, uap->rusage); 900 return (error); 901 } 902 903 int 904 linux_set_thread_area(struct thread *td, 905 struct linux_set_thread_area_args *args) 906 { 907 struct l_user_desc info; 908 struct user_segment_descriptor sd; 909 struct pcb *pcb; 910 int a[2]; 911 int error; 912 913 error = copyin(args->desc, &info, sizeof(struct l_user_desc)); 914 if (error) 915 return (error); 916 917 #ifdef DEBUG 918 if (ldebug(set_thread_area)) 919 printf(ARGS(set_thread_area, "%i, %x, %x, %i, %i, %i, " 920 "%i, %i, %i"), info.entry_number, info.base_addr, 921 info.limit, info.seg_32bit, info.contents, 922 info.read_exec_only, info.limit_in_pages, 923 info.seg_not_present, info.useable); 924 #endif 925 926 /* 927 * Semantics of Linux version: every thread in the system has array 928 * of three TLS descriptors. 1st is GLIBC TLS, 2nd is WINE, 3rd unknown. 929 * This syscall loads one of the selected TLS decriptors with a value 930 * and also loads GDT descriptors 6, 7 and 8 with the content of 931 * the per-thread descriptors. 932 * 933 * Semantics of FreeBSD version: I think we can ignore that Linux has 934 * three per-thread descriptors and use just the first one. 935 * The tls_array[] is used only in [gs]et_thread_area() syscalls and 936 * for loading the GDT descriptors. We use just one GDT descriptor 937 * for TLS, so we will load just one. 938 * 939 * XXX: This doesn't work when a user space process tries to use more 940 * than one TLS segment. Comment in the Linux source says wine might 941 * do this. 942 */ 943 944 /* 945 * GLIBC reads current %gs and call set_thread_area() with it. 946 * We should let GUDATA_SEL and GUGS32_SEL proceed as well because 947 * we use these segments. 948 */ 949 switch (info.entry_number) { 950 case GUGS32_SEL: 951 case GUDATA_SEL: 952 case 6: 953 case -1: 954 info.entry_number = GUGS32_SEL; 955 break; 956 default: 957 return (EINVAL); 958 } 959 960 /* 961 * We have to copy out the GDT entry we use. 962 * 963 * XXX: What if a user space program does not check the return value 964 * and tries to use 6, 7 or 8? 965 */ 966 error = copyout(&info, args->desc, sizeof(struct l_user_desc)); 967 if (error) 968 return (error); 969 970 if (LINUX_LDT_empty(&info)) { 971 a[0] = 0; 972 a[1] = 0; 973 } else { 974 a[0] = LINUX_LDT_entry_a(&info); 975 a[1] = LINUX_LDT_entry_b(&info); 976 } 977 978 memcpy(&sd, &a, sizeof(a)); 979 #ifdef DEBUG 980 if (ldebug(set_thread_area)) 981 printf("Segment created in set_thread_area: " 982 "lobase: %x, hibase: %x, lolimit: %x, hilimit: %x, " 983 "type: %i, dpl: %i, p: %i, xx: %i, long: %i, " 984 "def32: %i, gran: %i\n", 985 sd.sd_lobase, 986 sd.sd_hibase, 987 sd.sd_lolimit, 988 sd.sd_hilimit, 989 sd.sd_type, 990 sd.sd_dpl, 991 sd.sd_p, 992 sd.sd_xx, 993 sd.sd_long, 994 sd.sd_def32, 995 sd.sd_gran); 996 #endif 997 998 pcb = td->td_pcb; 999 pcb->pcb_gsbase = (register_t)info.base_addr; 1000 set_pcb_flags(pcb, PCB_32BIT); 1001 update_gdt_gsbase(td, info.base_addr); 1002 1003 return (0); 1004 } 1005