1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_misc.c,v 1.51 1999/01/06 23:05:38 julian Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysproto.h> 34 #include <sys/kernel.h> 35 #include <sys/mman.h> 36 #include <sys/proc.h> 37 #include <sys/fcntl.h> 38 #include <sys/imgact_aout.h> 39 #include <sys/mount.h> 40 #include <sys/namei.h> 41 #include <sys/resourcevar.h> 42 #include <sys/stat.h> 43 #include <sys/sysctl.h> 44 #include <sys/unistd.h> 45 #include <sys/vnode.h> 46 #include <sys/wait.h> 47 #include <sys/time.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 #include <vm/vm_kern.h> 52 #include <vm/vm_prot.h> 53 #include <vm/vm_map.h> 54 #include <vm/vm_extern.h> 55 56 #include <machine/frame.h> 57 #include <machine/psl.h> 58 59 #include <i386/linux/linux.h> 60 #include <i386/linux/linux_proto.h> 61 #include <i386/linux/linux_util.h> 62 63 int 64 linux_alarm(struct proc *p, struct linux_alarm_args *args) 65 { 66 struct itimerval it, old_it; 67 struct timeval tv; 68 int s; 69 70 #ifdef DEBUG 71 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 72 #endif 73 if (args->secs > 100000000) 74 return EINVAL; 75 it.it_value.tv_sec = (long)args->secs; 76 it.it_value.tv_usec = 0; 77 it.it_interval.tv_sec = 0; 78 it.it_interval.tv_usec = 0; 79 s = splsoftclock(); 80 old_it = p->p_realtimer; 81 getmicrouptime(&tv); 82 if (timevalisset(&old_it.it_value)) 83 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 84 if (it.it_value.tv_sec != 0) { 85 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 86 timevaladd(&it.it_value, &tv); 87 } 88 p->p_realtimer = it; 89 splx(s); 90 if (timevalcmp(&old_it.it_value, &tv, >)) { 91 timevalsub(&old_it.it_value, &tv); 92 if (old_it.it_value.tv_usec != 0) 93 old_it.it_value.tv_sec++; 94 p->p_retval[0] = old_it.it_value.tv_sec; 95 } 96 return 0; 97 } 98 99 int 100 linux_brk(struct proc *p, struct linux_brk_args *args) 101 { 102 #if 0 103 struct vmspace *vm = p->p_vmspace; 104 vm_offset_t new, old; 105 int error; 106 107 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 108 return EINVAL; 109 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 110 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 111 return ENOMEM; 112 113 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 114 new = round_page((vm_offset_t)args->dsend); 115 p->p_retval[0] = old; 116 if ((new-old) > 0) { 117 if (swap_pager_full) 118 return ENOMEM; 119 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 120 VM_PROT_ALL, VM_PROT_ALL, 0); 121 if (error) 122 return error; 123 vm->vm_dsize += btoc((new-old)); 124 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 125 } 126 return 0; 127 #else 128 struct vmspace *vm = p->p_vmspace; 129 vm_offset_t new, old; 130 struct obreak_args /* { 131 char * nsize; 132 } */ tmp; 133 134 #ifdef DEBUG 135 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 136 #endif 137 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 138 new = (vm_offset_t)args->dsend; 139 tmp.nsize = (char *) new; 140 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 141 p->p_retval[0] = (int)new; 142 else 143 p->p_retval[0] = (int)old; 144 145 return 0; 146 #endif 147 } 148 149 int 150 linux_uselib(struct proc *p, struct linux_uselib_args *args) 151 { 152 struct nameidata ni; 153 struct vnode *vp; 154 struct exec *a_out; 155 struct vattr attr; 156 vm_offset_t vmaddr; 157 unsigned long file_offset; 158 vm_offset_t buffer; 159 unsigned long bss_size; 160 int error; 161 caddr_t sg; 162 int locked; 163 164 sg = stackgap_init(); 165 CHECKALTEXIST(p, &sg, args->library); 166 167 #ifdef DEBUG 168 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library); 169 #endif 170 171 a_out = NULL; 172 locked = 0; 173 vp = NULL; 174 175 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p); 176 if (error = namei(&ni)) 177 goto cleanup; 178 179 vp = ni.ni_vp; 180 if (vp == NULL) { 181 error = ENOEXEC; /* ?? */ 182 goto cleanup; 183 } 184 185 /* 186 * From here on down, we have a locked vnode that must be unlocked. 187 */ 188 locked++; 189 190 /* 191 * Writable? 192 */ 193 if (vp->v_writecount) { 194 error = ETXTBSY; 195 goto cleanup; 196 } 197 198 /* 199 * Executable? 200 */ 201 if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p)) 202 goto cleanup; 203 204 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 205 ((attr.va_mode & 0111) == 0) || 206 (attr.va_type != VREG)) { 207 error = ENOEXEC; 208 goto cleanup; 209 } 210 211 /* 212 * Sensible size? 213 */ 214 if (attr.va_size == 0) { 215 error = ENOEXEC; 216 goto cleanup; 217 } 218 219 /* 220 * Can we access it? 221 */ 222 if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) 223 goto cleanup; 224 225 if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) 226 goto cleanup; 227 228 /* 229 * Lock no longer needed 230 */ 231 VOP_UNLOCK(vp, 0, p); 232 locked = 0; 233 234 /* 235 * Pull in executable header into kernel_map 236 */ 237 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 238 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 239 if (error) 240 goto cleanup; 241 242 /* 243 * Is it a Linux binary ? 244 */ 245 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 246 error = ENOEXEC; 247 goto cleanup; 248 } 249 250 /* While we are here, we should REALLY do some more checks */ 251 252 /* 253 * Set file/virtual offset based on a.out variant. 254 */ 255 switch ((int)(a_out->a_magic & 0xffff)) { 256 case 0413: /* ZMAGIC */ 257 file_offset = 1024; 258 break; 259 case 0314: /* QMAGIC */ 260 file_offset = 0; 261 break; 262 default: 263 error = ENOEXEC; 264 goto cleanup; 265 } 266 267 bss_size = round_page(a_out->a_bss); 268 269 /* 270 * Check various fields in header for validity/bounds. 271 */ 272 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 273 error = ENOEXEC; 274 goto cleanup; 275 } 276 277 /* text + data can't exceed file size */ 278 if (a_out->a_data + a_out->a_text > attr.va_size) { 279 error = EFAULT; 280 goto cleanup; 281 } 282 283 /* 284 * text/data/bss must not exceed limits 285 * XXX: this is not complete. it should check current usage PLUS 286 * the resources needed by this library. 287 */ 288 if (a_out->a_text > MAXTSIZ || 289 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 290 error = ENOMEM; 291 goto cleanup; 292 } 293 294 /* 295 * prevent more writers 296 */ 297 vp->v_flag |= VTEXT; 298 299 /* 300 * Check if file_offset page aligned,. 301 * Currently we cannot handle misalinged file offsets, 302 * and so we read in the entire image (what a waste). 303 */ 304 if (file_offset & PAGE_MASK) { 305 #ifdef DEBUG 306 printf("uselib: Non page aligned binary %lu\n", file_offset); 307 #endif 308 /* 309 * Map text+data read/write/execute 310 */ 311 312 /* a_entry is the load address and is page aligned */ 313 vmaddr = trunc_page(a_out->a_entry); 314 315 /* get anon user mapping, read+write+execute */ 316 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 317 a_out->a_text + a_out->a_data, FALSE, 318 VM_PROT_ALL, VM_PROT_ALL, 0); 319 if (error) 320 goto cleanup; 321 322 /* map file into kernel_map */ 323 error = vm_mmap(kernel_map, &buffer, 324 round_page(a_out->a_text + a_out->a_data + file_offset), 325 VM_PROT_READ, VM_PROT_READ, 0, 326 (caddr_t)vp, trunc_page(file_offset)); 327 if (error) 328 goto cleanup; 329 330 /* copy from kernel VM space to user space */ 331 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 332 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 333 334 /* release temporary kernel space */ 335 vm_map_remove(kernel_map, buffer, 336 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 337 338 if (error) 339 goto cleanup; 340 } 341 else { 342 #ifdef DEBUG 343 printf("uselib: Page aligned binary %lu\n", file_offset); 344 #endif 345 /* 346 * for QMAGIC, a_entry is 20 bytes beyond the load address 347 * to skip the executable header 348 */ 349 vmaddr = trunc_page(a_out->a_entry); 350 351 /* 352 * Map it all into the process's space as a single copy-on-write 353 * "data" segment. 354 */ 355 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 356 a_out->a_text + a_out->a_data, 357 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 358 (caddr_t)vp, file_offset); 359 if (error) 360 goto cleanup; 361 } 362 #ifdef DEBUG 363 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 364 #endif 365 if (bss_size != 0) { 366 /* 367 * Calculate BSS start address 368 */ 369 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 370 371 /* 372 * allocate some 'anon' space 373 */ 374 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 375 bss_size, FALSE, 376 VM_PROT_ALL, VM_PROT_ALL, 0); 377 if (error) 378 goto cleanup; 379 } 380 381 cleanup: 382 /* 383 * Unlock vnode if needed 384 */ 385 if (locked) 386 VOP_UNLOCK(vp, 0, p); 387 388 /* 389 * Release the kernel mapping. 390 */ 391 if (a_out) 392 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 393 394 return error; 395 } 396 397 /* XXX move */ 398 struct linux_select_argv { 399 int nfds; 400 fd_set *readfds; 401 fd_set *writefds; 402 fd_set *exceptfds; 403 struct timeval *timeout; 404 }; 405 406 int 407 linux_select(struct proc *p, struct linux_select_args *args) 408 { 409 struct linux_select_argv linux_args; 410 struct linux_newselect_args newsel; 411 int error; 412 413 #ifdef SELECT_DEBUG 414 printf("Linux-emul(%d): select(%x)\n", 415 p->p_pid, args->ptr); 416 #endif 417 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 418 sizeof(linux_args)))) 419 return error; 420 421 newsel.nfds = linux_args.nfds; 422 newsel.readfds = linux_args.readfds; 423 newsel.writefds = linux_args.writefds; 424 newsel.exceptfds = linux_args.exceptfds; 425 newsel.timeout = linux_args.timeout; 426 427 return linux_newselect(p, &newsel); 428 } 429 430 int 431 linux_newselect(struct proc *p, struct linux_newselect_args *args) 432 { 433 struct select_args bsa; 434 struct timeval tv0, tv1, utv, *tvp; 435 caddr_t sg; 436 int error; 437 438 #ifdef DEBUG 439 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 440 (long)p->p_pid, args->nfds, (void *)args->readfds, 441 (void *)args->writefds, (void *)args->exceptfds, 442 (void *)args->timeout); 443 #endif 444 error = 0; 445 bsa.nd = args->nfds; 446 bsa.in = args->readfds; 447 bsa.ou = args->writefds; 448 bsa.ex = args->exceptfds; 449 bsa.tv = args->timeout; 450 451 /* 452 * Store current time for computation of the amount of 453 * time left. 454 */ 455 if (args->timeout) { 456 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 457 goto select_out; 458 #ifdef DEBUG 459 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 460 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 461 #endif 462 if (itimerfix(&utv)) { 463 /* 464 * The timeval was invalid. Convert it to something 465 * valid that will act as it does under Linux. 466 */ 467 sg = stackgap_init(); 468 tvp = stackgap_alloc(&sg, sizeof(utv)); 469 utv.tv_sec += utv.tv_usec / 1000000; 470 utv.tv_usec %= 1000000; 471 if (utv.tv_usec < 0) { 472 utv.tv_sec -= 1; 473 utv.tv_usec += 1000000; 474 } 475 if (utv.tv_sec < 0) 476 timevalclear(&utv); 477 if ((error = copyout(&utv, tvp, sizeof(utv)))) 478 goto select_out; 479 bsa.tv = tvp; 480 } 481 microtime(&tv0); 482 } 483 484 error = select(p, &bsa); 485 #ifdef DEBUG 486 printf("Linux-emul(%d): real select returns %d\n", 487 p->p_pid, error); 488 #endif 489 490 if (error) { 491 /* 492 * See fs/select.c in the Linux kernel. Without this, 493 * Maelstrom doesn't work. 494 */ 495 if (error == ERESTART) 496 error = EINTR; 497 goto select_out; 498 } 499 500 if (args->timeout) { 501 if (p->p_retval[0]) { 502 /* 503 * Compute how much time was left of the timeout, 504 * by subtracting the current time and the time 505 * before we started the call, and subtracting 506 * that result from the user-supplied value. 507 */ 508 microtime(&tv1); 509 timevalsub(&tv1, &tv0); 510 timevalsub(&utv, &tv1); 511 if (utv.tv_sec < 0) 512 timevalclear(&utv); 513 } else 514 timevalclear(&utv); 515 #ifdef DEBUG 516 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 517 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 518 #endif 519 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 520 goto select_out; 521 } 522 523 select_out: 524 #ifdef DEBUG 525 printf("Linux-emul(%d): newselect_out -> %d\n", 526 p->p_pid, error); 527 #endif 528 return error; 529 } 530 531 int 532 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 533 { 534 struct proc *curproc; 535 536 #ifdef DEBUG 537 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid); 538 #endif 539 if (args->pid != p->p_pid) { 540 if (!(curproc = pfind(args->pid))) 541 return ESRCH; 542 } 543 else 544 curproc = p; 545 p->p_retval[0] = curproc->p_pgid; 546 return 0; 547 } 548 549 int 550 linux_fork(struct proc *p, struct linux_fork_args *args) 551 { 552 int error; 553 554 #ifdef DEBUG 555 printf("Linux-emul(%d): fork()\n", p->p_pid); 556 #endif 557 if (error = fork(p, (struct fork_args *)args)) 558 return error; 559 if (p->p_retval[1] == 1) 560 p->p_retval[0] = 0; 561 return 0; 562 } 563 564 #define CLONE_VM 0x100 565 #define CLONE_FS 0x200 566 #define CLONE_FILES 0x400 567 #define CLONE_SIGHAND 0x800 568 #define CLONE_PID 0x1000 569 570 int 571 linux_clone(struct proc *p, struct linux_clone_args *args) 572 { 573 int error, ff = RFPROC; 574 struct proc *p2; 575 int exit_signal; 576 vm_offset_t start; 577 struct rfork_args rf_args; 578 579 #ifdef SMP 580 printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid); 581 return (EOPNOTSUPP); 582 #endif 583 #ifdef DEBUG 584 if (args->flags & CLONE_PID) 585 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid); 586 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid, 587 (unsigned int)args->flags, (unsigned int)args->stack); 588 #endif 589 590 if (!args->stack) 591 return (EINVAL); 592 exit_signal = args->flags & 0x000000ff; 593 if (exit_signal >= LINUX_NSIG) 594 return EINVAL; 595 exit_signal = linux_to_bsd_signal[exit_signal]; 596 597 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 598 ff |= RFTHREAD; 599 600 if (args->flags & CLONE_VM) 601 ff |= RFMEM; 602 if (args->flags & CLONE_SIGHAND) 603 ff |= RFSIGSHARE; 604 if (!(args->flags & CLONE_FILES)) 605 ff |= RFFDG; 606 607 error = 0; 608 start = 0; 609 610 rf_args.flags = ff; 611 if (error = rfork(p, &rf_args)) 612 return error; 613 614 p2 = pfind(p->p_retval[0]); 615 if (p2 == 0) 616 return ESRCH; 617 618 p2->p_sigparent = exit_signal; 619 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 620 621 #ifdef DEBUG 622 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid); 623 #endif 624 return 0; 625 } 626 627 /* XXX move */ 628 struct linux_mmap_argv { 629 linux_caddr_t addr; 630 int len; 631 int prot; 632 int flags; 633 int fd; 634 int pos; 635 }; 636 637 #define STACK_SIZE (2 * 1024 * 1024) 638 #define GUARD_SIZE (4 * PAGE_SIZE) 639 int 640 linux_mmap(struct proc *p, struct linux_mmap_args *args) 641 { 642 struct mmap_args /* { 643 caddr_t addr; 644 size_t len; 645 int prot; 646 int flags; 647 int fd; 648 long pad; 649 off_t pos; 650 } */ bsd_args; 651 int error; 652 struct linux_mmap_argv linux_args; 653 654 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 655 sizeof(linux_args)))) 656 return error; 657 #ifdef DEBUG 658 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 659 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 660 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 661 #endif 662 bsd_args.flags = 0; 663 if (linux_args.flags & LINUX_MAP_SHARED) 664 bsd_args.flags |= MAP_SHARED; 665 if (linux_args.flags & LINUX_MAP_PRIVATE) 666 bsd_args.flags |= MAP_PRIVATE; 667 if (linux_args.flags & LINUX_MAP_FIXED) 668 bsd_args.flags |= MAP_FIXED; 669 if (linux_args.flags & LINUX_MAP_ANON) 670 bsd_args.flags |= MAP_ANON; 671 672 #ifndef VM_STACK 673 /* Linux Threads will map into the proc stack space, unless 674 * we prevent it. This causes problems if we're not using 675 * our VM_STACK options. 676 */ 677 if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ)) 678 return (EINVAL); 679 #endif 680 681 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 682 683 #ifdef VM_STACK 684 bsd_args.flags |= MAP_STACK; 685 #endif 686 687 /* The linux MAP_GROWSDOWN option does not limit auto 688 * growth of the region. Linux mmap with this option 689 * takes as addr the inital BOS, and as len, the initial 690 * region size. It can then grow down from addr without 691 * limit. However, linux threads has an implicit internal 692 * limit to stack size of STACK_SIZE. Its just not 693 * enforced explicitly in linux. But, here we impose 694 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 695 * region, since we can do this with our mmap. 696 * 697 * Our mmap with MAP_STACK takes addr as the maximum 698 * downsize limit on BOS, and as len the max size of 699 * the region. It them maps the top SGROWSIZ bytes, 700 * and autgrows the region down, up to the limit 701 * in addr. 702 * 703 * If we don't use the MAP_STACK option, the effect 704 * of this code is to allocate a stack region of a 705 * fixed size of (STACK_SIZE - GUARD_SIZE). 706 */ 707 708 /* This gives us TOS */ 709 bsd_args.addr = linux_args.addr + linux_args.len; 710 711 /* This gives us our maximum stack size */ 712 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 713 bsd_args.len = linux_args.len; 714 else 715 bsd_args.len = STACK_SIZE - GUARD_SIZE; 716 717 /* This gives us a new BOS. If we're using VM_STACK, then 718 * mmap will just map the top SGROWSIZ bytes, and let 719 * the stack grow down to the limit at BOS. If we're 720 * not using VM_STACK we map the full stack, since we 721 * don't have a way to autogrow it. 722 */ 723 bsd_args.addr -= bsd_args.len; 724 725 } else { 726 bsd_args.addr = linux_args.addr; 727 bsd_args.len = linux_args.len; 728 } 729 730 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 731 bsd_args.fd = linux_args.fd; 732 bsd_args.pos = linux_args.pos; 733 bsd_args.pad = 0; 734 return mmap(p, &bsd_args); 735 } 736 737 int 738 linux_mremap(struct proc *p, struct linux_mremap_args *args) 739 { 740 struct munmap_args /* { 741 void *addr; 742 size_t len; 743 } */ bsd_args; 744 int error = 0; 745 746 #ifdef DEBUG 747 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 748 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 749 args->flags); 750 #endif 751 args->new_len = round_page(args->new_len); 752 args->old_len = round_page(args->old_len); 753 754 if (args->new_len > args->old_len) { 755 p->p_retval[0] = 0; 756 return ENOMEM; 757 } 758 759 if (args->new_len < args->old_len) { 760 bsd_args.addr = args->addr + args->new_len; 761 bsd_args.len = args->old_len - args->new_len; 762 error = munmap(p, &bsd_args); 763 } 764 765 p->p_retval[0] = error ? 0 : (int)args->addr; 766 return error; 767 } 768 769 int 770 linux_msync(struct proc *p, struct linux_msync_args *args) 771 { 772 struct msync_args bsd_args; 773 774 bsd_args.addr = args->addr; 775 bsd_args.len = args->len; 776 bsd_args.flags = 0; /* XXX ignore */ 777 778 return msync(p, &bsd_args); 779 } 780 781 int 782 linux_pipe(struct proc *p, struct linux_pipe_args *args) 783 { 784 int error; 785 int reg_edx; 786 787 #ifdef DEBUG 788 printf("Linux-emul(%d): pipe(*)\n", p->p_pid); 789 #endif 790 reg_edx = p->p_retval[1]; 791 if (error = pipe(p, 0)) { 792 p->p_retval[1] = reg_edx; 793 return error; 794 } 795 796 if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) { 797 p->p_retval[1] = reg_edx; 798 return error; 799 } 800 801 p->p_retval[1] = reg_edx; 802 p->p_retval[0] = 0; 803 return 0; 804 } 805 806 int 807 linux_time(struct proc *p, struct linux_time_args *args) 808 { 809 struct timeval tv; 810 linux_time_t tm; 811 int error; 812 813 #ifdef DEBUG 814 printf("Linux-emul(%d): time(*)\n", p->p_pid); 815 #endif 816 microtime(&tv); 817 tm = tv.tv_sec; 818 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 819 return error; 820 p->p_retval[0] = tm; 821 return 0; 822 } 823 824 struct linux_times_argv { 825 long tms_utime; 826 long tms_stime; 827 long tms_cutime; 828 long tms_cstime; 829 }; 830 831 #define CLK_TCK 100 /* Linux uses 100 */ 832 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 833 834 int 835 linux_times(struct proc *p, struct linux_times_args *args) 836 { 837 struct timeval tv; 838 struct linux_times_argv tms; 839 struct rusage ru; 840 int error; 841 842 #ifdef DEBUG 843 printf("Linux-emul(%d): times(*)\n", p->p_pid); 844 #endif 845 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 846 847 tms.tms_utime = CONVTCK(ru.ru_utime); 848 tms.tms_stime = CONVTCK(ru.ru_stime); 849 850 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 851 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 852 853 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 854 sizeof(struct linux_times_argv)))) 855 return error; 856 857 microuptime(&tv); 858 p->p_retval[0] = (int)CONVTCK(tv); 859 return 0; 860 } 861 862 /* XXX move */ 863 struct linux_newuname_t { 864 char sysname[65]; 865 char nodename[65]; 866 char release[65]; 867 char version[65]; 868 char machine[65]; 869 char domainname[65]; 870 }; 871 872 int 873 linux_newuname(struct proc *p, struct linux_newuname_args *args) 874 { 875 struct linux_newuname_t linux_newuname; 876 877 #ifdef DEBUG 878 printf("Linux-emul(%d): newuname(*)\n", p->p_pid); 879 #endif 880 bzero(&linux_newuname, sizeof(struct linux_newuname_t)); 881 strncpy(linux_newuname.sysname, ostype, 882 sizeof(linux_newuname.sysname) - 1); 883 strncpy(linux_newuname.nodename, hostname, 884 sizeof(linux_newuname.nodename) - 1); 885 strncpy(linux_newuname.release, osrelease, 886 sizeof(linux_newuname.release) - 1); 887 strncpy(linux_newuname.version, version, 888 sizeof(linux_newuname.version) - 1); 889 strncpy(linux_newuname.machine, machine, 890 sizeof(linux_newuname.machine) - 1); 891 strncpy(linux_newuname.domainname, domainname, 892 sizeof(linux_newuname.domainname) - 1); 893 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf, 894 sizeof(struct linux_newuname_t))); 895 } 896 897 struct linux_utimbuf { 898 linux_time_t l_actime; 899 linux_time_t l_modtime; 900 }; 901 902 int 903 linux_utime(struct proc *p, struct linux_utime_args *args) 904 { 905 struct utimes_args /* { 906 char *path; 907 struct timeval *tptr; 908 } */ bsdutimes; 909 struct timeval tv[2], *tvp; 910 struct linux_utimbuf lut; 911 int error; 912 caddr_t sg; 913 914 sg = stackgap_init(); 915 CHECKALTEXIST(p, &sg, args->fname); 916 917 #ifdef DEBUG 918 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname); 919 #endif 920 if (args->times) { 921 if ((error = copyin(args->times, &lut, sizeof lut))) 922 return error; 923 tv[0].tv_sec = lut.l_actime; 924 tv[0].tv_usec = 0; 925 tv[1].tv_sec = lut.l_modtime; 926 tv[1].tv_usec = 0; 927 /* so that utimes can copyin */ 928 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 929 if ((error = copyout(tv, tvp, sizeof(tv)))) 930 return error; 931 bsdutimes.tptr = tvp; 932 } else 933 bsdutimes.tptr = NULL; 934 935 bsdutimes.path = args->fname; 936 return utimes(p, &bsdutimes); 937 } 938 939 int 940 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 941 { 942 struct wait_args /* { 943 int pid; 944 int *status; 945 int options; 946 struct rusage *rusage; 947 } */ tmp; 948 int error, tmpstat; 949 950 #ifdef DEBUG 951 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 952 (long)p->p_pid, args->pid, (void *)args->status, args->options); 953 #endif 954 tmp.pid = args->pid; 955 tmp.status = args->status; 956 /* This filters out the linux option _WCLONE. I don't 957 * think we need it, but I could be wrong. If we need 958 * it, we need to fix wait4, since it will give us an 959 * error return of EINVAL if we pass in _WCLONE, and 960 * of course, it won't do anything with it. 961 */ 962 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 963 tmp.rusage = NULL; 964 965 if (error = wait4(p, &tmp)) 966 return error; 967 968 if (args->status) { 969 if (error = copyin(args->status, &tmpstat, sizeof(int))) 970 return error; 971 if (WIFSIGNALED(tmpstat)) 972 tmpstat = (tmpstat & 0xffffff80) | 973 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 974 else if (WIFSTOPPED(tmpstat)) 975 tmpstat = (tmpstat & 0xffff00ff) | 976 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 977 return copyout(&tmpstat, args->status, sizeof(int)); 978 } else 979 return 0; 980 } 981 982 int 983 linux_wait4(struct proc *p, struct linux_wait4_args *args) 984 { 985 struct wait_args /* { 986 int pid; 987 int *status; 988 int options; 989 struct rusage *rusage; 990 } */ tmp; 991 int error, tmpstat; 992 993 #ifdef DEBUG 994 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 995 (long)p->p_pid, args->pid, (void *)args->status, args->options, 996 (void *)args->rusage); 997 #endif 998 tmp.pid = args->pid; 999 tmp.status = args->status; 1000 /* This filters out the linux option _WCLONE. I don't 1001 * think we need it, but I could be wrong. If we need 1002 * it, we need to fix wait4, since it will give us an 1003 * error return of EINVAL if we pass in _WCLONE, and 1004 * of course, it won't do anything with it. 1005 */ 1006 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1007 tmp.rusage = args->rusage; 1008 1009 if (error = wait4(p, &tmp)) 1010 return error; 1011 1012 p->p_siglist &= ~sigmask(SIGCHLD); 1013 1014 if (args->status) { 1015 if (error = copyin(args->status, &tmpstat, sizeof(int))) 1016 return error; 1017 if (WIFSIGNALED(tmpstat)) 1018 tmpstat = (tmpstat & 0xffffff80) | 1019 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1020 else if (WIFSTOPPED(tmpstat)) 1021 tmpstat = (tmpstat & 0xffff00ff) | 1022 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1023 return copyout(&tmpstat, args->status, sizeof(int)); 1024 } else 1025 return 0; 1026 } 1027 1028 int 1029 linux_mknod(struct proc *p, struct linux_mknod_args *args) 1030 { 1031 caddr_t sg; 1032 struct mknod_args bsd_mknod; 1033 struct mkfifo_args bsd_mkfifo; 1034 1035 sg = stackgap_init(); 1036 1037 CHECKALTCREAT(p, &sg, args->path); 1038 1039 #ifdef DEBUG 1040 printf("Linux-emul(%d): mknod(%s, %d, %d)\n", 1041 p->p_pid, args->path, args->mode, args->dev); 1042 #endif 1043 1044 if (args->mode & S_IFIFO) { 1045 bsd_mkfifo.path = args->path; 1046 bsd_mkfifo.mode = args->mode; 1047 return mkfifo(p, &bsd_mkfifo); 1048 } else { 1049 bsd_mknod.path = args->path; 1050 bsd_mknod.mode = args->mode; 1051 bsd_mknod.dev = args->dev; 1052 return mknod(p, &bsd_mknod); 1053 } 1054 } 1055 1056 /* 1057 * UGH! This is just about the dumbest idea I've ever heard!! 1058 */ 1059 int 1060 linux_personality(struct proc *p, struct linux_personality_args *args) 1061 { 1062 #ifdef DEBUG 1063 printf("Linux-emul(%d): personality(%d)\n", 1064 p->p_pid, args->per); 1065 #endif 1066 if (args->per != 0) 1067 return EINVAL; 1068 1069 /* Yes Jim, it's still a Linux... */ 1070 p->p_retval[0] = 0; 1071 return 0; 1072 } 1073 1074 /* 1075 * Wrappers for get/setitimer for debugging.. 1076 */ 1077 int 1078 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1079 { 1080 struct setitimer_args bsa; 1081 struct itimerval foo; 1082 int error; 1083 1084 #ifdef DEBUG 1085 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1086 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1087 #endif 1088 bsa.which = args->which; 1089 bsa.itv = args->itv; 1090 bsa.oitv = args->oitv; 1091 if (args->itv) { 1092 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1093 sizeof(foo)))) 1094 return error; 1095 #ifdef DEBUG 1096 printf("setitimer: value: sec: %ld, usec: %ld\n", 1097 foo.it_value.tv_sec, foo.it_value.tv_usec); 1098 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1099 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1100 #endif 1101 } 1102 return setitimer(p, &bsa); 1103 } 1104 1105 int 1106 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1107 { 1108 struct getitimer_args bsa; 1109 #ifdef DEBUG 1110 printf("Linux-emul(%ld): getitimer(%p)\n", 1111 (long)p->p_pid, (void *)args->itv); 1112 #endif 1113 bsa.which = args->which; 1114 bsa.itv = args->itv; 1115 return getitimer(p, &bsa); 1116 } 1117 1118 int 1119 linux_iopl(struct proc *p, struct linux_iopl_args *args) 1120 { 1121 int error; 1122 1123 error = suser(p->p_ucred, &p->p_acflag); 1124 if (error != 0) 1125 return error; 1126 if (securelevel > 0) 1127 return EPERM; 1128 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1129 return 0; 1130 } 1131 1132 int 1133 linux_nice(struct proc *p, struct linux_nice_args *args) 1134 { 1135 struct setpriority_args bsd_args; 1136 1137 bsd_args.which = PRIO_PROCESS; 1138 bsd_args.who = 0; /* current process */ 1139 bsd_args.prio = args->inc; 1140 return setpriority(p, &bsd_args); 1141 } 1142 1143 int 1144 linux_setgroups(p, uap) 1145 struct proc *p; 1146 struct linux_setgroups_args *uap; 1147 { 1148 struct pcred *pc = p->p_cred; 1149 linux_gid_t linux_gidset[NGROUPS]; 1150 gid_t *bsd_gidset; 1151 int ngrp, error; 1152 1153 if ((error = suser(pc->pc_ucred, &p->p_acflag))) 1154 return error; 1155 1156 if (uap->gidsetsize > NGROUPS) 1157 return EINVAL; 1158 1159 ngrp = uap->gidsetsize; 1160 pc->pc_ucred = crcopy(pc->pc_ucred); 1161 if (ngrp >= 1) { 1162 if ((error = copyin((caddr_t)uap->gidset, 1163 (caddr_t)linux_gidset, 1164 ngrp * sizeof(linux_gid_t)))) 1165 return error; 1166 1167 pc->pc_ucred->cr_ngroups = ngrp; 1168 1169 bsd_gidset = pc->pc_ucred->cr_groups; 1170 ngrp--; 1171 while (ngrp >= 0) { 1172 bsd_gidset[ngrp] = linux_gidset[ngrp]; 1173 ngrp--; 1174 } 1175 } 1176 else 1177 pc->pc_ucred->cr_ngroups = 1; 1178 1179 setsugid(p); 1180 return 0; 1181 } 1182 1183 int 1184 linux_getgroups(p, uap) 1185 struct proc *p; 1186 struct linux_getgroups_args *uap; 1187 { 1188 struct pcred *pc = p->p_cred; 1189 linux_gid_t linux_gidset[NGROUPS]; 1190 gid_t *bsd_gidset; 1191 int ngrp, error; 1192 1193 if ((ngrp = uap->gidsetsize) == 0) { 1194 p->p_retval[0] = pc->pc_ucred->cr_ngroups; 1195 return 0; 1196 } 1197 1198 if (ngrp < pc->pc_ucred->cr_ngroups) 1199 return EINVAL; 1200 1201 ngrp = 0; 1202 bsd_gidset = pc->pc_ucred->cr_groups; 1203 while (ngrp < pc->pc_ucred->cr_ngroups) { 1204 linux_gidset[ngrp] = bsd_gidset[ngrp]; 1205 ngrp++; 1206 } 1207 1208 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1209 ngrp * sizeof(linux_gid_t)))) 1210 return error; 1211 1212 p->p_retval[0] = ngrp; 1213 return (0); 1214 } 1215