1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_misc.c,v 1.52 1999/01/26 02:38:10 julian Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysproto.h> 34 #include <sys/kernel.h> 35 #include <sys/mman.h> 36 #include <sys/proc.h> 37 #include <sys/fcntl.h> 38 #include <sys/imgact_aout.h> 39 #include <sys/mount.h> 40 #include <sys/namei.h> 41 #include <sys/resourcevar.h> 42 #include <sys/stat.h> 43 #include <sys/sysctl.h> 44 #include <sys/unistd.h> 45 #include <sys/vnode.h> 46 #include <sys/wait.h> 47 #include <sys/time.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 #include <vm/vm_kern.h> 52 #include <vm/vm_prot.h> 53 #include <vm/vm_map.h> 54 #include <vm/vm_extern.h> 55 56 #include <machine/frame.h> 57 #include <machine/psl.h> 58 59 #include <i386/linux/linux.h> 60 #include <i386/linux/linux_proto.h> 61 #include <i386/linux/linux_util.h> 62 63 int 64 linux_alarm(struct proc *p, struct linux_alarm_args *args) 65 { 66 struct itimerval it, old_it; 67 struct timeval tv; 68 int s; 69 70 #ifdef DEBUG 71 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 72 #endif 73 if (args->secs > 100000000) 74 return EINVAL; 75 it.it_value.tv_sec = (long)args->secs; 76 it.it_value.tv_usec = 0; 77 it.it_interval.tv_sec = 0; 78 it.it_interval.tv_usec = 0; 79 s = splsoftclock(); 80 old_it = p->p_realtimer; 81 getmicrouptime(&tv); 82 if (timevalisset(&old_it.it_value)) 83 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 84 if (it.it_value.tv_sec != 0) { 85 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 86 timevaladd(&it.it_value, &tv); 87 } 88 p->p_realtimer = it; 89 splx(s); 90 if (timevalcmp(&old_it.it_value, &tv, >)) { 91 timevalsub(&old_it.it_value, &tv); 92 if (old_it.it_value.tv_usec != 0) 93 old_it.it_value.tv_sec++; 94 p->p_retval[0] = old_it.it_value.tv_sec; 95 } 96 return 0; 97 } 98 99 int 100 linux_brk(struct proc *p, struct linux_brk_args *args) 101 { 102 #if 0 103 struct vmspace *vm = p->p_vmspace; 104 vm_offset_t new, old; 105 int error; 106 107 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 108 return EINVAL; 109 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 110 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 111 return ENOMEM; 112 113 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 114 new = round_page((vm_offset_t)args->dsend); 115 p->p_retval[0] = old; 116 if ((new-old) > 0) { 117 if (swap_pager_full) 118 return ENOMEM; 119 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 120 VM_PROT_ALL, VM_PROT_ALL, 0); 121 if (error) 122 return error; 123 vm->vm_dsize += btoc((new-old)); 124 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 125 } 126 return 0; 127 #else 128 struct vmspace *vm = p->p_vmspace; 129 vm_offset_t new, old; 130 struct obreak_args /* { 131 char * nsize; 132 } */ tmp; 133 134 #ifdef DEBUG 135 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 136 #endif 137 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 138 new = (vm_offset_t)args->dsend; 139 tmp.nsize = (char *) new; 140 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 141 p->p_retval[0] = (int)new; 142 else 143 p->p_retval[0] = (int)old; 144 145 return 0; 146 #endif 147 } 148 149 int 150 linux_uselib(struct proc *p, struct linux_uselib_args *args) 151 { 152 struct nameidata ni; 153 struct vnode *vp; 154 struct exec *a_out; 155 struct vattr attr; 156 vm_offset_t vmaddr; 157 unsigned long file_offset; 158 vm_offset_t buffer; 159 unsigned long bss_size; 160 int error; 161 caddr_t sg; 162 int locked; 163 164 sg = stackgap_init(); 165 CHECKALTEXIST(p, &sg, args->library); 166 167 #ifdef DEBUG 168 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library); 169 #endif 170 171 a_out = NULL; 172 locked = 0; 173 vp = NULL; 174 175 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p); 176 if (error = namei(&ni)) 177 goto cleanup; 178 179 vp = ni.ni_vp; 180 if (vp == NULL) { 181 error = ENOEXEC; /* ?? */ 182 goto cleanup; 183 } 184 185 /* 186 * From here on down, we have a locked vnode that must be unlocked. 187 */ 188 locked++; 189 190 /* 191 * Writable? 192 */ 193 if (vp->v_writecount) { 194 error = ETXTBSY; 195 goto cleanup; 196 } 197 198 /* 199 * Executable? 200 */ 201 if (error = VOP_GETATTR(vp, &attr, p->p_ucred, p)) 202 goto cleanup; 203 204 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 205 ((attr.va_mode & 0111) == 0) || 206 (attr.va_type != VREG)) { 207 error = ENOEXEC; 208 goto cleanup; 209 } 210 211 /* 212 * Sensible size? 213 */ 214 if (attr.va_size == 0) { 215 error = ENOEXEC; 216 goto cleanup; 217 } 218 219 /* 220 * Can we access it? 221 */ 222 if (error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p)) 223 goto cleanup; 224 225 if (error = VOP_OPEN(vp, FREAD, p->p_ucred, p)) 226 goto cleanup; 227 228 /* 229 * Lock no longer needed 230 */ 231 VOP_UNLOCK(vp, 0, p); 232 locked = 0; 233 234 /* 235 * Pull in executable header into kernel_map 236 */ 237 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 238 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 239 if (error) 240 goto cleanup; 241 242 /* 243 * Is it a Linux binary ? 244 */ 245 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 246 error = ENOEXEC; 247 goto cleanup; 248 } 249 250 /* While we are here, we should REALLY do some more checks */ 251 252 /* 253 * Set file/virtual offset based on a.out variant. 254 */ 255 switch ((int)(a_out->a_magic & 0xffff)) { 256 case 0413: /* ZMAGIC */ 257 file_offset = 1024; 258 break; 259 case 0314: /* QMAGIC */ 260 file_offset = 0; 261 break; 262 default: 263 error = ENOEXEC; 264 goto cleanup; 265 } 266 267 bss_size = round_page(a_out->a_bss); 268 269 /* 270 * Check various fields in header for validity/bounds. 271 */ 272 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 273 error = ENOEXEC; 274 goto cleanup; 275 } 276 277 /* text + data can't exceed file size */ 278 if (a_out->a_data + a_out->a_text > attr.va_size) { 279 error = EFAULT; 280 goto cleanup; 281 } 282 283 /* 284 * text/data/bss must not exceed limits 285 * XXX: this is not complete. it should check current usage PLUS 286 * the resources needed by this library. 287 */ 288 if (a_out->a_text > MAXTSIZ || 289 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 290 error = ENOMEM; 291 goto cleanup; 292 } 293 294 /* 295 * prevent more writers 296 */ 297 vp->v_flag |= VTEXT; 298 299 /* 300 * Check if file_offset page aligned,. 301 * Currently we cannot handle misalinged file offsets, 302 * and so we read in the entire image (what a waste). 303 */ 304 if (file_offset & PAGE_MASK) { 305 #ifdef DEBUG 306 printf("uselib: Non page aligned binary %lu\n", file_offset); 307 #endif 308 /* 309 * Map text+data read/write/execute 310 */ 311 312 /* a_entry is the load address and is page aligned */ 313 vmaddr = trunc_page(a_out->a_entry); 314 315 /* get anon user mapping, read+write+execute */ 316 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 317 a_out->a_text + a_out->a_data, FALSE, 318 VM_PROT_ALL, VM_PROT_ALL, 0); 319 if (error) 320 goto cleanup; 321 322 /* map file into kernel_map */ 323 error = vm_mmap(kernel_map, &buffer, 324 round_page(a_out->a_text + a_out->a_data + file_offset), 325 VM_PROT_READ, VM_PROT_READ, 0, 326 (caddr_t)vp, trunc_page(file_offset)); 327 if (error) 328 goto cleanup; 329 330 /* copy from kernel VM space to user space */ 331 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 332 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 333 334 /* release temporary kernel space */ 335 vm_map_remove(kernel_map, buffer, 336 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 337 338 if (error) 339 goto cleanup; 340 } 341 else { 342 #ifdef DEBUG 343 printf("uselib: Page aligned binary %lu\n", file_offset); 344 #endif 345 /* 346 * for QMAGIC, a_entry is 20 bytes beyond the load address 347 * to skip the executable header 348 */ 349 vmaddr = trunc_page(a_out->a_entry); 350 351 /* 352 * Map it all into the process's space as a single copy-on-write 353 * "data" segment. 354 */ 355 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 356 a_out->a_text + a_out->a_data, 357 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 358 (caddr_t)vp, file_offset); 359 if (error) 360 goto cleanup; 361 } 362 #ifdef DEBUG 363 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 364 #endif 365 if (bss_size != 0) { 366 /* 367 * Calculate BSS start address 368 */ 369 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 370 371 /* 372 * allocate some 'anon' space 373 */ 374 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 375 bss_size, FALSE, 376 VM_PROT_ALL, VM_PROT_ALL, 0); 377 if (error) 378 goto cleanup; 379 } 380 381 cleanup: 382 /* 383 * Unlock vnode if needed 384 */ 385 if (locked) 386 VOP_UNLOCK(vp, 0, p); 387 388 /* 389 * Release the kernel mapping. 390 */ 391 if (a_out) 392 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 393 394 return error; 395 } 396 397 /* XXX move */ 398 struct linux_select_argv { 399 int nfds; 400 fd_set *readfds; 401 fd_set *writefds; 402 fd_set *exceptfds; 403 struct timeval *timeout; 404 }; 405 406 int 407 linux_select(struct proc *p, struct linux_select_args *args) 408 { 409 struct linux_select_argv linux_args; 410 struct linux_newselect_args newsel; 411 int error; 412 413 #ifdef SELECT_DEBUG 414 printf("Linux-emul(%d): select(%x)\n", 415 p->p_pid, args->ptr); 416 #endif 417 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 418 sizeof(linux_args)))) 419 return error; 420 421 newsel.nfds = linux_args.nfds; 422 newsel.readfds = linux_args.readfds; 423 newsel.writefds = linux_args.writefds; 424 newsel.exceptfds = linux_args.exceptfds; 425 newsel.timeout = linux_args.timeout; 426 427 return linux_newselect(p, &newsel); 428 } 429 430 int 431 linux_newselect(struct proc *p, struct linux_newselect_args *args) 432 { 433 struct select_args bsa; 434 struct timeval tv0, tv1, utv, *tvp; 435 caddr_t sg; 436 int error; 437 438 #ifdef DEBUG 439 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 440 (long)p->p_pid, args->nfds, (void *)args->readfds, 441 (void *)args->writefds, (void *)args->exceptfds, 442 (void *)args->timeout); 443 #endif 444 error = 0; 445 bsa.nd = args->nfds; 446 bsa.in = args->readfds; 447 bsa.ou = args->writefds; 448 bsa.ex = args->exceptfds; 449 bsa.tv = args->timeout; 450 451 /* 452 * Store current time for computation of the amount of 453 * time left. 454 */ 455 if (args->timeout) { 456 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 457 goto select_out; 458 #ifdef DEBUG 459 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 460 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 461 #endif 462 if (itimerfix(&utv)) { 463 /* 464 * The timeval was invalid. Convert it to something 465 * valid that will act as it does under Linux. 466 */ 467 sg = stackgap_init(); 468 tvp = stackgap_alloc(&sg, sizeof(utv)); 469 utv.tv_sec += utv.tv_usec / 1000000; 470 utv.tv_usec %= 1000000; 471 if (utv.tv_usec < 0) { 472 utv.tv_sec -= 1; 473 utv.tv_usec += 1000000; 474 } 475 if (utv.tv_sec < 0) 476 timevalclear(&utv); 477 if ((error = copyout(&utv, tvp, sizeof(utv)))) 478 goto select_out; 479 bsa.tv = tvp; 480 } 481 microtime(&tv0); 482 } 483 484 error = select(p, &bsa); 485 #ifdef DEBUG 486 printf("Linux-emul(%d): real select returns %d\n", 487 p->p_pid, error); 488 #endif 489 490 if (error) { 491 /* 492 * See fs/select.c in the Linux kernel. Without this, 493 * Maelstrom doesn't work. 494 */ 495 if (error == ERESTART) 496 error = EINTR; 497 goto select_out; 498 } 499 500 if (args->timeout) { 501 if (p->p_retval[0]) { 502 /* 503 * Compute how much time was left of the timeout, 504 * by subtracting the current time and the time 505 * before we started the call, and subtracting 506 * that result from the user-supplied value. 507 */ 508 microtime(&tv1); 509 timevalsub(&tv1, &tv0); 510 timevalsub(&utv, &tv1); 511 if (utv.tv_sec < 0) 512 timevalclear(&utv); 513 } else 514 timevalclear(&utv); 515 #ifdef DEBUG 516 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 517 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 518 #endif 519 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 520 goto select_out; 521 } 522 523 select_out: 524 #ifdef DEBUG 525 printf("Linux-emul(%d): newselect_out -> %d\n", 526 p->p_pid, error); 527 #endif 528 return error; 529 } 530 531 int 532 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 533 { 534 struct proc *curproc; 535 536 #ifdef DEBUG 537 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid); 538 #endif 539 if (args->pid != p->p_pid) { 540 if (!(curproc = pfind(args->pid))) 541 return ESRCH; 542 } 543 else 544 curproc = p; 545 p->p_retval[0] = curproc->p_pgid; 546 return 0; 547 } 548 549 int 550 linux_fork(struct proc *p, struct linux_fork_args *args) 551 { 552 int error; 553 554 #ifdef DEBUG 555 printf("Linux-emul(%d): fork()\n", p->p_pid); 556 #endif 557 if ((error = fork(p, (struct fork_args *)args)) != 0) 558 return error; 559 if (p->p_retval[1] == 1) 560 p->p_retval[0] = 0; 561 return 0; 562 } 563 564 #define CLONE_VM 0x100 565 #define CLONE_FS 0x200 566 #define CLONE_FILES 0x400 567 #define CLONE_SIGHAND 0x800 568 #define CLONE_PID 0x1000 569 570 int 571 linux_clone(struct proc *p, struct linux_clone_args *args) 572 { 573 int error, ff = RFPROC; 574 struct proc *p2; 575 int exit_signal; 576 vm_offset_t start; 577 struct rfork_args rf_args; 578 579 #ifdef SMP 580 printf("linux_clone(%d): does not work with SMP yet\n", p->p_pid); 581 return (EOPNOTSUPP); 582 #endif 583 #ifdef DEBUG 584 if (args->flags & CLONE_PID) 585 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid); 586 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid, 587 (unsigned int)args->flags, (unsigned int)args->stack); 588 #endif 589 590 if (!args->stack) 591 return (EINVAL); 592 593 exit_signal = args->flags & 0x000000ff; 594 if (exit_signal >= LINUX_NSIG) 595 return EINVAL; 596 exit_signal = linux_to_bsd_signal[exit_signal]; 597 598 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 599 ff |= RFTHREAD; 600 601 if (args->flags & CLONE_VM) 602 ff |= RFMEM; 603 if (args->flags & CLONE_SIGHAND) 604 ff |= RFSIGSHARE; 605 if (!(args->flags & CLONE_FILES)) 606 ff |= RFFDG; 607 608 error = 0; 609 start = 0; 610 611 rf_args.flags = ff; 612 if ((error = rfork(p, &rf_args)) != 0) 613 return error; 614 615 p2 = pfind(p->p_retval[0]); 616 if (p2 == 0) 617 return ESRCH; 618 619 p2->p_sigparent = exit_signal; 620 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 621 622 #ifdef DEBUG 623 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid); 624 #endif 625 return 0; 626 } 627 628 /* XXX move */ 629 struct linux_mmap_argv { 630 linux_caddr_t addr; 631 int len; 632 int prot; 633 int flags; 634 int fd; 635 int pos; 636 }; 637 638 #define STACK_SIZE (2 * 1024 * 1024) 639 #define GUARD_SIZE (4 * PAGE_SIZE) 640 int 641 linux_mmap(struct proc *p, struct linux_mmap_args *args) 642 { 643 struct mmap_args /* { 644 caddr_t addr; 645 size_t len; 646 int prot; 647 int flags; 648 int fd; 649 long pad; 650 off_t pos; 651 } */ bsd_args; 652 int error; 653 struct linux_mmap_argv linux_args; 654 655 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 656 sizeof(linux_args)))) 657 return error; 658 #ifdef DEBUG 659 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 660 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 661 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 662 #endif 663 bsd_args.flags = 0; 664 if (linux_args.flags & LINUX_MAP_SHARED) 665 bsd_args.flags |= MAP_SHARED; 666 if (linux_args.flags & LINUX_MAP_PRIVATE) 667 bsd_args.flags |= MAP_PRIVATE; 668 if (linux_args.flags & LINUX_MAP_FIXED) 669 bsd_args.flags |= MAP_FIXED; 670 if (linux_args.flags & LINUX_MAP_ANON) 671 bsd_args.flags |= MAP_ANON; 672 673 #ifndef VM_STACK 674 /* Linux Threads will map into the proc stack space, unless 675 * we prevent it. This causes problems if we're not using 676 * our VM_STACK options. 677 */ 678 if ((unsigned int)linux_args.addr + linux_args.len > (USRSTACK - MAXSSIZ)) 679 return (EINVAL); 680 #endif 681 682 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 683 684 #ifdef VM_STACK 685 bsd_args.flags |= MAP_STACK; 686 #endif 687 688 /* The linux MAP_GROWSDOWN option does not limit auto 689 * growth of the region. Linux mmap with this option 690 * takes as addr the inital BOS, and as len, the initial 691 * region size. It can then grow down from addr without 692 * limit. However, linux threads has an implicit internal 693 * limit to stack size of STACK_SIZE. Its just not 694 * enforced explicitly in linux. But, here we impose 695 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 696 * region, since we can do this with our mmap. 697 * 698 * Our mmap with MAP_STACK takes addr as the maximum 699 * downsize limit on BOS, and as len the max size of 700 * the region. It them maps the top SGROWSIZ bytes, 701 * and autgrows the region down, up to the limit 702 * in addr. 703 * 704 * If we don't use the MAP_STACK option, the effect 705 * of this code is to allocate a stack region of a 706 * fixed size of (STACK_SIZE - GUARD_SIZE). 707 */ 708 709 /* This gives us TOS */ 710 bsd_args.addr = linux_args.addr + linux_args.len; 711 712 /* This gives us our maximum stack size */ 713 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 714 bsd_args.len = linux_args.len; 715 else 716 bsd_args.len = STACK_SIZE - GUARD_SIZE; 717 718 /* This gives us a new BOS. If we're using VM_STACK, then 719 * mmap will just map the top SGROWSIZ bytes, and let 720 * the stack grow down to the limit at BOS. If we're 721 * not using VM_STACK we map the full stack, since we 722 * don't have a way to autogrow it. 723 */ 724 bsd_args.addr -= bsd_args.len; 725 726 } else { 727 bsd_args.addr = linux_args.addr; 728 bsd_args.len = linux_args.len; 729 } 730 731 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 732 bsd_args.fd = linux_args.fd; 733 bsd_args.pos = linux_args.pos; 734 bsd_args.pad = 0; 735 return mmap(p, &bsd_args); 736 } 737 738 int 739 linux_mremap(struct proc *p, struct linux_mremap_args *args) 740 { 741 struct munmap_args /* { 742 void *addr; 743 size_t len; 744 } */ bsd_args; 745 int error = 0; 746 747 #ifdef DEBUG 748 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 749 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 750 args->flags); 751 #endif 752 args->new_len = round_page(args->new_len); 753 args->old_len = round_page(args->old_len); 754 755 if (args->new_len > args->old_len) { 756 p->p_retval[0] = 0; 757 return ENOMEM; 758 } 759 760 if (args->new_len < args->old_len) { 761 bsd_args.addr = args->addr + args->new_len; 762 bsd_args.len = args->old_len - args->new_len; 763 error = munmap(p, &bsd_args); 764 } 765 766 p->p_retval[0] = error ? 0 : (int)args->addr; 767 return error; 768 } 769 770 int 771 linux_msync(struct proc *p, struct linux_msync_args *args) 772 { 773 struct msync_args bsd_args; 774 775 bsd_args.addr = args->addr; 776 bsd_args.len = args->len; 777 bsd_args.flags = 0; /* XXX ignore */ 778 779 return msync(p, &bsd_args); 780 } 781 782 int 783 linux_pipe(struct proc *p, struct linux_pipe_args *args) 784 { 785 int error; 786 int reg_edx; 787 788 #ifdef DEBUG 789 printf("Linux-emul(%d): pipe(*)\n", p->p_pid); 790 #endif 791 reg_edx = p->p_retval[1]; 792 if (error = pipe(p, 0)) { 793 p->p_retval[1] = reg_edx; 794 return error; 795 } 796 797 if (error = copyout(p->p_retval, args->pipefds, 2*sizeof(int))) { 798 p->p_retval[1] = reg_edx; 799 return error; 800 } 801 802 p->p_retval[1] = reg_edx; 803 p->p_retval[0] = 0; 804 return 0; 805 } 806 807 int 808 linux_time(struct proc *p, struct linux_time_args *args) 809 { 810 struct timeval tv; 811 linux_time_t tm; 812 int error; 813 814 #ifdef DEBUG 815 printf("Linux-emul(%d): time(*)\n", p->p_pid); 816 #endif 817 microtime(&tv); 818 tm = tv.tv_sec; 819 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 820 return error; 821 p->p_retval[0] = tm; 822 return 0; 823 } 824 825 struct linux_times_argv { 826 long tms_utime; 827 long tms_stime; 828 long tms_cutime; 829 long tms_cstime; 830 }; 831 832 #define CLK_TCK 100 /* Linux uses 100 */ 833 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 834 835 int 836 linux_times(struct proc *p, struct linux_times_args *args) 837 { 838 struct timeval tv; 839 struct linux_times_argv tms; 840 struct rusage ru; 841 int error; 842 843 #ifdef DEBUG 844 printf("Linux-emul(%d): times(*)\n", p->p_pid); 845 #endif 846 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 847 848 tms.tms_utime = CONVTCK(ru.ru_utime); 849 tms.tms_stime = CONVTCK(ru.ru_stime); 850 851 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 852 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 853 854 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 855 sizeof(struct linux_times_argv)))) 856 return error; 857 858 microuptime(&tv); 859 p->p_retval[0] = (int)CONVTCK(tv); 860 return 0; 861 } 862 863 /* XXX move */ 864 struct linux_newuname_t { 865 char sysname[65]; 866 char nodename[65]; 867 char release[65]; 868 char version[65]; 869 char machine[65]; 870 char domainname[65]; 871 }; 872 873 int 874 linux_newuname(struct proc *p, struct linux_newuname_args *args) 875 { 876 struct linux_newuname_t linux_newuname; 877 878 #ifdef DEBUG 879 printf("Linux-emul(%d): newuname(*)\n", p->p_pid); 880 #endif 881 bzero(&linux_newuname, sizeof(struct linux_newuname_t)); 882 strncpy(linux_newuname.sysname, ostype, 883 sizeof(linux_newuname.sysname) - 1); 884 strncpy(linux_newuname.nodename, hostname, 885 sizeof(linux_newuname.nodename) - 1); 886 strncpy(linux_newuname.release, osrelease, 887 sizeof(linux_newuname.release) - 1); 888 strncpy(linux_newuname.version, version, 889 sizeof(linux_newuname.version) - 1); 890 strncpy(linux_newuname.machine, machine, 891 sizeof(linux_newuname.machine) - 1); 892 strncpy(linux_newuname.domainname, domainname, 893 sizeof(linux_newuname.domainname) - 1); 894 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf, 895 sizeof(struct linux_newuname_t))); 896 } 897 898 struct linux_utimbuf { 899 linux_time_t l_actime; 900 linux_time_t l_modtime; 901 }; 902 903 int 904 linux_utime(struct proc *p, struct linux_utime_args *args) 905 { 906 struct utimes_args /* { 907 char *path; 908 struct timeval *tptr; 909 } */ bsdutimes; 910 struct timeval tv[2], *tvp; 911 struct linux_utimbuf lut; 912 int error; 913 caddr_t sg; 914 915 sg = stackgap_init(); 916 CHECKALTEXIST(p, &sg, args->fname); 917 918 #ifdef DEBUG 919 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname); 920 #endif 921 if (args->times) { 922 if ((error = copyin(args->times, &lut, sizeof lut))) 923 return error; 924 tv[0].tv_sec = lut.l_actime; 925 tv[0].tv_usec = 0; 926 tv[1].tv_sec = lut.l_modtime; 927 tv[1].tv_usec = 0; 928 /* so that utimes can copyin */ 929 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 930 if ((error = copyout(tv, tvp, sizeof(tv)))) 931 return error; 932 bsdutimes.tptr = tvp; 933 } else 934 bsdutimes.tptr = NULL; 935 936 bsdutimes.path = args->fname; 937 return utimes(p, &bsdutimes); 938 } 939 940 #define __WCLONE 0x80000000 941 942 int 943 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 944 { 945 struct wait_args /* { 946 int pid; 947 int *status; 948 int options; 949 struct rusage *rusage; 950 } */ tmp; 951 int error, tmpstat; 952 953 #ifdef DEBUG 954 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 955 (long)p->p_pid, args->pid, (void *)args->status, args->options); 956 #endif 957 tmp.pid = args->pid; 958 tmp.status = args->status; 959 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 960 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 961 if (args->options & __WCLONE) 962 tmp.options |= WLINUXCLONE; 963 tmp.rusage = NULL; 964 965 if ((error = wait4(p, &tmp)) != 0) 966 return error; 967 968 if (args->status) { 969 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 970 return error; 971 if (WIFSIGNALED(tmpstat)) 972 tmpstat = (tmpstat & 0xffffff80) | 973 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 974 else if (WIFSTOPPED(tmpstat)) 975 tmpstat = (tmpstat & 0xffff00ff) | 976 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 977 return copyout(&tmpstat, args->status, sizeof(int)); 978 } else 979 return 0; 980 } 981 982 int 983 linux_wait4(struct proc *p, struct linux_wait4_args *args) 984 { 985 struct wait_args /* { 986 int pid; 987 int *status; 988 int options; 989 struct rusage *rusage; 990 } */ tmp; 991 int error, tmpstat; 992 993 #ifdef DEBUG 994 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 995 (long)p->p_pid, args->pid, (void *)args->status, args->options, 996 (void *)args->rusage); 997 #endif 998 tmp.pid = args->pid; 999 tmp.status = args->status; 1000 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 1001 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 1002 if (args->options & __WCLONE) 1003 tmp.options |= WLINUXCLONE; 1004 tmp.rusage = args->rusage; 1005 1006 if ((error = wait4(p, &tmp)) != 0) 1007 return error; 1008 1009 p->p_siglist &= ~sigmask(SIGCHLD); 1010 1011 if (args->status) { 1012 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 1013 return error; 1014 if (WIFSIGNALED(tmpstat)) 1015 tmpstat = (tmpstat & 0xffffff80) | 1016 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1017 else if (WIFSTOPPED(tmpstat)) 1018 tmpstat = (tmpstat & 0xffff00ff) | 1019 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1020 return copyout(&tmpstat, args->status, sizeof(int)); 1021 } else 1022 return 0; 1023 } 1024 1025 int 1026 linux_mknod(struct proc *p, struct linux_mknod_args *args) 1027 { 1028 caddr_t sg; 1029 struct mknod_args bsd_mknod; 1030 struct mkfifo_args bsd_mkfifo; 1031 1032 sg = stackgap_init(); 1033 1034 CHECKALTCREAT(p, &sg, args->path); 1035 1036 #ifdef DEBUG 1037 printf("Linux-emul(%d): mknod(%s, %d, %d)\n", 1038 p->p_pid, args->path, args->mode, args->dev); 1039 #endif 1040 1041 if (args->mode & S_IFIFO) { 1042 bsd_mkfifo.path = args->path; 1043 bsd_mkfifo.mode = args->mode; 1044 return mkfifo(p, &bsd_mkfifo); 1045 } else { 1046 bsd_mknod.path = args->path; 1047 bsd_mknod.mode = args->mode; 1048 bsd_mknod.dev = args->dev; 1049 return mknod(p, &bsd_mknod); 1050 } 1051 } 1052 1053 /* 1054 * UGH! This is just about the dumbest idea I've ever heard!! 1055 */ 1056 int 1057 linux_personality(struct proc *p, struct linux_personality_args *args) 1058 { 1059 #ifdef DEBUG 1060 printf("Linux-emul(%d): personality(%d)\n", 1061 p->p_pid, args->per); 1062 #endif 1063 if (args->per != 0) 1064 return EINVAL; 1065 1066 /* Yes Jim, it's still a Linux... */ 1067 p->p_retval[0] = 0; 1068 return 0; 1069 } 1070 1071 /* 1072 * Wrappers for get/setitimer for debugging.. 1073 */ 1074 int 1075 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1076 { 1077 struct setitimer_args bsa; 1078 struct itimerval foo; 1079 int error; 1080 1081 #ifdef DEBUG 1082 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1083 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1084 #endif 1085 bsa.which = args->which; 1086 bsa.itv = args->itv; 1087 bsa.oitv = args->oitv; 1088 if (args->itv) { 1089 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1090 sizeof(foo)))) 1091 return error; 1092 #ifdef DEBUG 1093 printf("setitimer: value: sec: %ld, usec: %ld\n", 1094 foo.it_value.tv_sec, foo.it_value.tv_usec); 1095 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1096 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1097 #endif 1098 } 1099 return setitimer(p, &bsa); 1100 } 1101 1102 int 1103 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1104 { 1105 struct getitimer_args bsa; 1106 #ifdef DEBUG 1107 printf("Linux-emul(%ld): getitimer(%p)\n", 1108 (long)p->p_pid, (void *)args->itv); 1109 #endif 1110 bsa.which = args->which; 1111 bsa.itv = args->itv; 1112 return getitimer(p, &bsa); 1113 } 1114 1115 int 1116 linux_iopl(struct proc *p, struct linux_iopl_args *args) 1117 { 1118 int error; 1119 1120 error = suser(p->p_ucred, &p->p_acflag); 1121 if (error != 0) 1122 return error; 1123 if (securelevel > 0) 1124 return EPERM; 1125 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1126 return 0; 1127 } 1128 1129 int 1130 linux_nice(struct proc *p, struct linux_nice_args *args) 1131 { 1132 struct setpriority_args bsd_args; 1133 1134 bsd_args.which = PRIO_PROCESS; 1135 bsd_args.who = 0; /* current process */ 1136 bsd_args.prio = args->inc; 1137 return setpriority(p, &bsd_args); 1138 } 1139 1140 int 1141 linux_setgroups(p, uap) 1142 struct proc *p; 1143 struct linux_setgroups_args *uap; 1144 { 1145 struct pcred *pc = p->p_cred; 1146 linux_gid_t linux_gidset[NGROUPS]; 1147 gid_t *bsd_gidset; 1148 int ngrp, error; 1149 1150 if ((error = suser(pc->pc_ucred, &p->p_acflag))) 1151 return error; 1152 1153 if (uap->gidsetsize > NGROUPS) 1154 return EINVAL; 1155 1156 ngrp = uap->gidsetsize; 1157 pc->pc_ucred = crcopy(pc->pc_ucred); 1158 if (ngrp >= 1) { 1159 if ((error = copyin((caddr_t)uap->gidset, 1160 (caddr_t)linux_gidset, 1161 ngrp * sizeof(linux_gid_t)))) 1162 return error; 1163 1164 pc->pc_ucred->cr_ngroups = ngrp; 1165 1166 bsd_gidset = pc->pc_ucred->cr_groups; 1167 ngrp--; 1168 while (ngrp >= 0) { 1169 bsd_gidset[ngrp] = linux_gidset[ngrp]; 1170 ngrp--; 1171 } 1172 } 1173 else 1174 pc->pc_ucred->cr_ngroups = 1; 1175 1176 setsugid(p); 1177 return 0; 1178 } 1179 1180 int 1181 linux_getgroups(p, uap) 1182 struct proc *p; 1183 struct linux_getgroups_args *uap; 1184 { 1185 struct pcred *pc = p->p_cred; 1186 linux_gid_t linux_gidset[NGROUPS]; 1187 gid_t *bsd_gidset; 1188 int ngrp, error; 1189 1190 if ((ngrp = uap->gidsetsize) == 0) { 1191 p->p_retval[0] = pc->pc_ucred->cr_ngroups; 1192 return 0; 1193 } 1194 1195 if (ngrp < pc->pc_ucred->cr_ngroups) 1196 return EINVAL; 1197 1198 ngrp = 0; 1199 bsd_gidset = pc->pc_ucred->cr_groups; 1200 while (ngrp < pc->pc_ucred->cr_ngroups) { 1201 linux_gidset[ngrp] = bsd_gidset[ngrp]; 1202 ngrp++; 1203 } 1204 1205 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1206 ngrp * sizeof(linux_gid_t)))) 1207 return error; 1208 1209 p->p_retval[0] = ngrp; 1210 return (0); 1211 } 1212