1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $Id: linux_misc.c,v 1.58 1999/05/06 18:44:25 peter Exp $ 29 */ 30 31 #include <sys/param.h> 32 #include <sys/systm.h> 33 #include <sys/sysproto.h> 34 #include <sys/kernel.h> 35 #include <sys/mman.h> 36 #include <sys/proc.h> 37 #include <sys/fcntl.h> 38 #include <sys/imgact_aout.h> 39 #include <sys/mount.h> 40 #include <sys/namei.h> 41 #include <sys/resourcevar.h> 42 #include <sys/stat.h> 43 #include <sys/sysctl.h> 44 #include <sys/unistd.h> 45 #include <sys/vnode.h> 46 #include <sys/wait.h> 47 #include <sys/time.h> 48 49 #include <vm/vm.h> 50 #include <vm/pmap.h> 51 #include <vm/vm_kern.h> 52 #include <vm/vm_prot.h> 53 #include <vm/vm_map.h> 54 #include <vm/vm_extern.h> 55 56 #include <machine/frame.h> 57 #include <machine/psl.h> 58 59 #include <i386/linux/linux.h> 60 #include <i386/linux/linux_proto.h> 61 #include <i386/linux/linux_util.h> 62 63 int 64 linux_alarm(struct proc *p, struct linux_alarm_args *args) 65 { 66 struct itimerval it, old_it; 67 struct timeval tv; 68 int s; 69 70 #ifdef DEBUG 71 printf("Linux-emul(%ld): alarm(%u)\n", (long)p->p_pid, args->secs); 72 #endif 73 if (args->secs > 100000000) 74 return EINVAL; 75 it.it_value.tv_sec = (long)args->secs; 76 it.it_value.tv_usec = 0; 77 it.it_interval.tv_sec = 0; 78 it.it_interval.tv_usec = 0; 79 s = splsoftclock(); 80 old_it = p->p_realtimer; 81 getmicrouptime(&tv); 82 if (timevalisset(&old_it.it_value)) 83 untimeout(realitexpire, (caddr_t)p, p->p_ithandle); 84 if (it.it_value.tv_sec != 0) { 85 p->p_ithandle = timeout(realitexpire, (caddr_t)p, tvtohz(&it.it_value)); 86 timevaladd(&it.it_value, &tv); 87 } 88 p->p_realtimer = it; 89 splx(s); 90 if (timevalcmp(&old_it.it_value, &tv, >)) { 91 timevalsub(&old_it.it_value, &tv); 92 if (old_it.it_value.tv_usec != 0) 93 old_it.it_value.tv_sec++; 94 p->p_retval[0] = old_it.it_value.tv_sec; 95 } 96 return 0; 97 } 98 99 int 100 linux_brk(struct proc *p, struct linux_brk_args *args) 101 { 102 #if 0 103 struct vmspace *vm = p->p_vmspace; 104 vm_offset_t new, old; 105 int error; 106 107 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 108 return EINVAL; 109 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 110 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 111 return ENOMEM; 112 113 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 114 new = round_page((vm_offset_t)args->dsend); 115 p->p_retval[0] = old; 116 if ((new-old) > 0) { 117 if (swap_pager_full) 118 return ENOMEM; 119 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 120 VM_PROT_ALL, VM_PROT_ALL, 0); 121 if (error) 122 return error; 123 vm->vm_dsize += btoc((new-old)); 124 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 125 } 126 return 0; 127 #else 128 struct vmspace *vm = p->p_vmspace; 129 vm_offset_t new, old; 130 struct obreak_args /* { 131 char * nsize; 132 } */ tmp; 133 134 #ifdef DEBUG 135 printf("Linux-emul(%ld): brk(%p)\n", (long)p->p_pid, (void *)args->dsend); 136 #endif 137 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 138 new = (vm_offset_t)args->dsend; 139 tmp.nsize = (char *) new; 140 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 141 p->p_retval[0] = (int)new; 142 else 143 p->p_retval[0] = (int)old; 144 145 return 0; 146 #endif 147 } 148 149 int 150 linux_uselib(struct proc *p, struct linux_uselib_args *args) 151 { 152 struct nameidata ni; 153 struct vnode *vp; 154 struct exec *a_out; 155 struct vattr attr; 156 vm_offset_t vmaddr; 157 unsigned long file_offset; 158 vm_offset_t buffer; 159 unsigned long bss_size; 160 int error; 161 caddr_t sg; 162 int locked; 163 164 sg = stackgap_init(); 165 CHECKALTEXIST(p, &sg, args->library); 166 167 #ifdef DEBUG 168 printf("Linux-emul(%d): uselib(%s)\n", p->p_pid, args->library); 169 #endif 170 171 a_out = NULL; 172 locked = 0; 173 vp = NULL; 174 175 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, args->library, p); 176 error = namei(&ni); 177 if (error) 178 goto cleanup; 179 180 vp = ni.ni_vp; 181 if (vp == NULL) { 182 error = ENOEXEC; /* ?? */ 183 goto cleanup; 184 } 185 186 /* 187 * From here on down, we have a locked vnode that must be unlocked. 188 */ 189 locked++; 190 191 /* 192 * Writable? 193 */ 194 if (vp->v_writecount) { 195 error = ETXTBSY; 196 goto cleanup; 197 } 198 199 /* 200 * Executable? 201 */ 202 error = VOP_GETATTR(vp, &attr, p->p_ucred, p); 203 if (error) 204 goto cleanup; 205 206 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 207 ((attr.va_mode & 0111) == 0) || 208 (attr.va_type != VREG)) { 209 error = ENOEXEC; 210 goto cleanup; 211 } 212 213 /* 214 * Sensible size? 215 */ 216 if (attr.va_size == 0) { 217 error = ENOEXEC; 218 goto cleanup; 219 } 220 221 /* 222 * Can we access it? 223 */ 224 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); 225 if (error) 226 goto cleanup; 227 228 error = VOP_OPEN(vp, FREAD, p->p_ucred, p); 229 if (error) 230 goto cleanup; 231 232 /* 233 * Lock no longer needed 234 */ 235 VOP_UNLOCK(vp, 0, p); 236 locked = 0; 237 238 /* 239 * Pull in executable header into kernel_map 240 */ 241 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 242 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 243 if (error) 244 goto cleanup; 245 246 /* 247 * Is it a Linux binary ? 248 */ 249 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 250 error = ENOEXEC; 251 goto cleanup; 252 } 253 254 /* While we are here, we should REALLY do some more checks */ 255 256 /* 257 * Set file/virtual offset based on a.out variant. 258 */ 259 switch ((int)(a_out->a_magic & 0xffff)) { 260 case 0413: /* ZMAGIC */ 261 file_offset = 1024; 262 break; 263 case 0314: /* QMAGIC */ 264 file_offset = 0; 265 break; 266 default: 267 error = ENOEXEC; 268 goto cleanup; 269 } 270 271 bss_size = round_page(a_out->a_bss); 272 273 /* 274 * Check various fields in header for validity/bounds. 275 */ 276 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 277 error = ENOEXEC; 278 goto cleanup; 279 } 280 281 /* text + data can't exceed file size */ 282 if (a_out->a_data + a_out->a_text > attr.va_size) { 283 error = EFAULT; 284 goto cleanup; 285 } 286 287 /* 288 * text/data/bss must not exceed limits 289 * XXX: this is not complete. it should check current usage PLUS 290 * the resources needed by this library. 291 */ 292 if (a_out->a_text > MAXTSIZ || 293 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 294 error = ENOMEM; 295 goto cleanup; 296 } 297 298 /* 299 * prevent more writers 300 */ 301 vp->v_flag |= VTEXT; 302 303 /* 304 * Check if file_offset page aligned,. 305 * Currently we cannot handle misalinged file offsets, 306 * and so we read in the entire image (what a waste). 307 */ 308 if (file_offset & PAGE_MASK) { 309 #ifdef DEBUG 310 printf("uselib: Non page aligned binary %lu\n", file_offset); 311 #endif 312 /* 313 * Map text+data read/write/execute 314 */ 315 316 /* a_entry is the load address and is page aligned */ 317 vmaddr = trunc_page(a_out->a_entry); 318 319 /* get anon user mapping, read+write+execute */ 320 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 321 a_out->a_text + a_out->a_data, FALSE, 322 VM_PROT_ALL, VM_PROT_ALL, 0); 323 if (error) 324 goto cleanup; 325 326 /* map file into kernel_map */ 327 error = vm_mmap(kernel_map, &buffer, 328 round_page(a_out->a_text + a_out->a_data + file_offset), 329 VM_PROT_READ, VM_PROT_READ, 0, 330 (caddr_t)vp, trunc_page(file_offset)); 331 if (error) 332 goto cleanup; 333 334 /* copy from kernel VM space to user space */ 335 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 336 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 337 338 /* release temporary kernel space */ 339 vm_map_remove(kernel_map, buffer, 340 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 341 342 if (error) 343 goto cleanup; 344 } 345 else { 346 #ifdef DEBUG 347 printf("uselib: Page aligned binary %lu\n", file_offset); 348 #endif 349 /* 350 * for QMAGIC, a_entry is 20 bytes beyond the load address 351 * to skip the executable header 352 */ 353 vmaddr = trunc_page(a_out->a_entry); 354 355 /* 356 * Map it all into the process's space as a single copy-on-write 357 * "data" segment. 358 */ 359 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 360 a_out->a_text + a_out->a_data, 361 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 362 (caddr_t)vp, file_offset); 363 if (error) 364 goto cleanup; 365 } 366 #ifdef DEBUG 367 printf("mem=%08x = %08x %08x\n", vmaddr, ((int*)vmaddr)[0], ((int*)vmaddr)[1]); 368 #endif 369 if (bss_size != 0) { 370 /* 371 * Calculate BSS start address 372 */ 373 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 374 375 /* 376 * allocate some 'anon' space 377 */ 378 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 379 bss_size, FALSE, 380 VM_PROT_ALL, VM_PROT_ALL, 0); 381 if (error) 382 goto cleanup; 383 } 384 385 cleanup: 386 /* 387 * Unlock vnode if needed 388 */ 389 if (locked) 390 VOP_UNLOCK(vp, 0, p); 391 392 /* 393 * Release the kernel mapping. 394 */ 395 if (a_out) 396 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 397 398 return error; 399 } 400 401 /* XXX move */ 402 struct linux_select_argv { 403 int nfds; 404 fd_set *readfds; 405 fd_set *writefds; 406 fd_set *exceptfds; 407 struct timeval *timeout; 408 }; 409 410 int 411 linux_select(struct proc *p, struct linux_select_args *args) 412 { 413 struct linux_select_argv linux_args; 414 struct linux_newselect_args newsel; 415 int error; 416 417 #ifdef SELECT_DEBUG 418 printf("Linux-emul(%d): select(%x)\n", 419 p->p_pid, args->ptr); 420 #endif 421 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 422 sizeof(linux_args)))) 423 return error; 424 425 newsel.nfds = linux_args.nfds; 426 newsel.readfds = linux_args.readfds; 427 newsel.writefds = linux_args.writefds; 428 newsel.exceptfds = linux_args.exceptfds; 429 newsel.timeout = linux_args.timeout; 430 431 return linux_newselect(p, &newsel); 432 } 433 434 int 435 linux_newselect(struct proc *p, struct linux_newselect_args *args) 436 { 437 struct select_args bsa; 438 struct timeval tv0, tv1, utv, *tvp; 439 caddr_t sg; 440 int error; 441 442 #ifdef DEBUG 443 printf("Linux-emul(%ld): newselect(%d, %p, %p, %p, %p)\n", 444 (long)p->p_pid, args->nfds, (void *)args->readfds, 445 (void *)args->writefds, (void *)args->exceptfds, 446 (void *)args->timeout); 447 #endif 448 error = 0; 449 bsa.nd = args->nfds; 450 bsa.in = args->readfds; 451 bsa.ou = args->writefds; 452 bsa.ex = args->exceptfds; 453 bsa.tv = args->timeout; 454 455 /* 456 * Store current time for computation of the amount of 457 * time left. 458 */ 459 if (args->timeout) { 460 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 461 goto select_out; 462 #ifdef DEBUG 463 printf("Linux-emul(%ld): incoming timeout (%ld/%ld)\n", 464 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 465 #endif 466 if (itimerfix(&utv)) { 467 /* 468 * The timeval was invalid. Convert it to something 469 * valid that will act as it does under Linux. 470 */ 471 sg = stackgap_init(); 472 tvp = stackgap_alloc(&sg, sizeof(utv)); 473 utv.tv_sec += utv.tv_usec / 1000000; 474 utv.tv_usec %= 1000000; 475 if (utv.tv_usec < 0) { 476 utv.tv_sec -= 1; 477 utv.tv_usec += 1000000; 478 } 479 if (utv.tv_sec < 0) 480 timevalclear(&utv); 481 if ((error = copyout(&utv, tvp, sizeof(utv)))) 482 goto select_out; 483 bsa.tv = tvp; 484 } 485 microtime(&tv0); 486 } 487 488 error = select(p, &bsa); 489 #ifdef DEBUG 490 printf("Linux-emul(%d): real select returns %d\n", 491 p->p_pid, error); 492 #endif 493 494 if (error) { 495 /* 496 * See fs/select.c in the Linux kernel. Without this, 497 * Maelstrom doesn't work. 498 */ 499 if (error == ERESTART) 500 error = EINTR; 501 goto select_out; 502 } 503 504 if (args->timeout) { 505 if (p->p_retval[0]) { 506 /* 507 * Compute how much time was left of the timeout, 508 * by subtracting the current time and the time 509 * before we started the call, and subtracting 510 * that result from the user-supplied value. 511 */ 512 microtime(&tv1); 513 timevalsub(&tv1, &tv0); 514 timevalsub(&utv, &tv1); 515 if (utv.tv_sec < 0) 516 timevalclear(&utv); 517 } else 518 timevalclear(&utv); 519 #ifdef DEBUG 520 printf("Linux-emul(%ld): outgoing timeout (%ld/%ld)\n", 521 (long)p->p_pid, utv.tv_sec, utv.tv_usec); 522 #endif 523 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 524 goto select_out; 525 } 526 527 select_out: 528 #ifdef DEBUG 529 printf("Linux-emul(%d): newselect_out -> %d\n", 530 p->p_pid, error); 531 #endif 532 return error; 533 } 534 535 int 536 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 537 { 538 struct proc *curp; 539 540 #ifdef DEBUG 541 printf("Linux-emul(%d): getpgid(%d)\n", p->p_pid, args->pid); 542 #endif 543 if (args->pid != p->p_pid) { 544 if (!(curp = pfind(args->pid))) 545 return ESRCH; 546 } 547 else 548 curp = p; 549 p->p_retval[0] = curp->p_pgid; 550 return 0; 551 } 552 553 int 554 linux_fork(struct proc *p, struct linux_fork_args *args) 555 { 556 int error; 557 558 #ifdef DEBUG 559 printf("Linux-emul(%d): fork()\n", p->p_pid); 560 #endif 561 if ((error = fork(p, (struct fork_args *)args)) != 0) 562 return error; 563 if (p->p_retval[1] == 1) 564 p->p_retval[0] = 0; 565 return 0; 566 } 567 568 #define CLONE_VM 0x100 569 #define CLONE_FS 0x200 570 #define CLONE_FILES 0x400 571 #define CLONE_SIGHAND 0x800 572 #define CLONE_PID 0x1000 573 574 int 575 linux_clone(struct proc *p, struct linux_clone_args *args) 576 { 577 int error, ff = RFPROC; 578 struct proc *p2; 579 int exit_signal; 580 vm_offset_t start; 581 struct rfork_args rf_args; 582 583 #ifdef DEBUG 584 if (args->flags & CLONE_PID) 585 printf("linux_clone(%d): CLONE_PID not yet supported\n", p->p_pid); 586 printf ("linux_clone(%d): invoked with flags %x and stack %x\n", p->p_pid, 587 (unsigned int)args->flags, (unsigned int)args->stack); 588 #endif 589 590 if (!args->stack) 591 return (EINVAL); 592 593 exit_signal = args->flags & 0x000000ff; 594 if (exit_signal >= LINUX_NSIG) 595 return EINVAL; 596 exit_signal = linux_to_bsd_signal[exit_signal]; 597 598 /* RFTHREAD probably not necessary here, but it shouldn't hurt either */ 599 ff |= RFTHREAD; 600 601 if (args->flags & CLONE_VM) 602 ff |= RFMEM; 603 if (args->flags & CLONE_SIGHAND) 604 ff |= RFSIGSHARE; 605 if (!(args->flags & CLONE_FILES)) 606 ff |= RFFDG; 607 608 error = 0; 609 start = 0; 610 611 rf_args.flags = ff; 612 if ((error = rfork(p, &rf_args)) != 0) 613 return error; 614 615 p2 = pfind(p->p_retval[0]); 616 if (p2 == 0) 617 return ESRCH; 618 619 p2->p_sigparent = exit_signal; 620 p2->p_md.md_regs->tf_esp = (unsigned int)args->stack; 621 622 #ifdef DEBUG 623 printf ("linux_clone(%d): successful rfork to %d\n", p->p_pid, p2->p_pid); 624 #endif 625 return 0; 626 } 627 628 /* XXX move */ 629 struct linux_mmap_argv { 630 linux_caddr_t addr; 631 int len; 632 int prot; 633 int flags; 634 int fd; 635 int pos; 636 }; 637 638 #define STACK_SIZE (2 * 1024 * 1024) 639 #define GUARD_SIZE (4 * PAGE_SIZE) 640 int 641 linux_mmap(struct proc *p, struct linux_mmap_args *args) 642 { 643 struct mmap_args /* { 644 caddr_t addr; 645 size_t len; 646 int prot; 647 int flags; 648 int fd; 649 long pad; 650 off_t pos; 651 } */ bsd_args; 652 int error; 653 struct linux_mmap_argv linux_args; 654 655 if ((error = copyin((caddr_t)args->ptr, (caddr_t)&linux_args, 656 sizeof(linux_args)))) 657 return error; 658 #ifdef DEBUG 659 printf("Linux-emul(%ld): mmap(%p, %d, %d, %08x, %d, %d)\n", 660 (long)p->p_pid, (void *)linux_args.addr, linux_args.len, 661 linux_args.prot, linux_args.flags, linux_args.fd, linux_args.pos); 662 #endif 663 bsd_args.flags = 0; 664 if (linux_args.flags & LINUX_MAP_SHARED) 665 bsd_args.flags |= MAP_SHARED; 666 if (linux_args.flags & LINUX_MAP_PRIVATE) 667 bsd_args.flags |= MAP_PRIVATE; 668 if (linux_args.flags & LINUX_MAP_FIXED) 669 bsd_args.flags |= MAP_FIXED; 670 if (linux_args.flags & LINUX_MAP_ANON) 671 bsd_args.flags |= MAP_ANON; 672 if (linux_args.flags & LINUX_MAP_GROWSDOWN) { 673 bsd_args.flags |= MAP_STACK; 674 675 /* The linux MAP_GROWSDOWN option does not limit auto 676 * growth of the region. Linux mmap with this option 677 * takes as addr the inital BOS, and as len, the initial 678 * region size. It can then grow down from addr without 679 * limit. However, linux threads has an implicit internal 680 * limit to stack size of STACK_SIZE. Its just not 681 * enforced explicitly in linux. But, here we impose 682 * a limit of (STACK_SIZE - GUARD_SIZE) on the stack 683 * region, since we can do this with our mmap. 684 * 685 * Our mmap with MAP_STACK takes addr as the maximum 686 * downsize limit on BOS, and as len the max size of 687 * the region. It them maps the top SGROWSIZ bytes, 688 * and autgrows the region down, up to the limit 689 * in addr. 690 * 691 * If we don't use the MAP_STACK option, the effect 692 * of this code is to allocate a stack region of a 693 * fixed size of (STACK_SIZE - GUARD_SIZE). 694 */ 695 696 /* This gives us TOS */ 697 bsd_args.addr = linux_args.addr + linux_args.len; 698 699 /* This gives us our maximum stack size */ 700 if (linux_args.len > STACK_SIZE - GUARD_SIZE) 701 bsd_args.len = linux_args.len; 702 else 703 bsd_args.len = STACK_SIZE - GUARD_SIZE; 704 705 /* This gives us a new BOS. If we're using VM_STACK, then 706 * mmap will just map the top SGROWSIZ bytes, and let 707 * the stack grow down to the limit at BOS. If we're 708 * not using VM_STACK we map the full stack, since we 709 * don't have a way to autogrow it. 710 */ 711 bsd_args.addr -= bsd_args.len; 712 713 } else { 714 bsd_args.addr = linux_args.addr; 715 bsd_args.len = linux_args.len; 716 } 717 718 bsd_args.prot = linux_args.prot | PROT_READ; /* always required */ 719 bsd_args.fd = linux_args.fd; 720 bsd_args.pos = linux_args.pos; 721 bsd_args.pad = 0; 722 return mmap(p, &bsd_args); 723 } 724 725 int 726 linux_mremap(struct proc *p, struct linux_mremap_args *args) 727 { 728 struct munmap_args /* { 729 void *addr; 730 size_t len; 731 } */ bsd_args; 732 int error = 0; 733 734 #ifdef DEBUG 735 printf("Linux-emul(%ld): mremap(%p, %08x, %08x, %08x)\n", 736 (long)p->p_pid, (void *)args->addr, args->old_len, args->new_len, 737 args->flags); 738 #endif 739 args->new_len = round_page(args->new_len); 740 args->old_len = round_page(args->old_len); 741 742 if (args->new_len > args->old_len) { 743 p->p_retval[0] = 0; 744 return ENOMEM; 745 } 746 747 if (args->new_len < args->old_len) { 748 bsd_args.addr = args->addr + args->new_len; 749 bsd_args.len = args->old_len - args->new_len; 750 error = munmap(p, &bsd_args); 751 } 752 753 p->p_retval[0] = error ? 0 : (int)args->addr; 754 return error; 755 } 756 757 int 758 linux_msync(struct proc *p, struct linux_msync_args *args) 759 { 760 struct msync_args bsd_args; 761 762 bsd_args.addr = args->addr; 763 bsd_args.len = args->len; 764 bsd_args.flags = 0; /* XXX ignore */ 765 766 return msync(p, &bsd_args); 767 } 768 769 int 770 linux_pipe(struct proc *p, struct linux_pipe_args *args) 771 { 772 int error; 773 int reg_edx; 774 775 #ifdef DEBUG 776 printf("Linux-emul(%d): pipe(*)\n", p->p_pid); 777 #endif 778 reg_edx = p->p_retval[1]; 779 error = pipe(p, 0); 780 if (error) { 781 p->p_retval[1] = reg_edx; 782 return error; 783 } 784 785 error = copyout(p->p_retval, args->pipefds, 2*sizeof(int)); 786 if (error) { 787 p->p_retval[1] = reg_edx; 788 return error; 789 } 790 791 p->p_retval[1] = reg_edx; 792 p->p_retval[0] = 0; 793 return 0; 794 } 795 796 int 797 linux_time(struct proc *p, struct linux_time_args *args) 798 { 799 struct timeval tv; 800 linux_time_t tm; 801 int error; 802 803 #ifdef DEBUG 804 printf("Linux-emul(%d): time(*)\n", p->p_pid); 805 #endif 806 microtime(&tv); 807 tm = tv.tv_sec; 808 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 809 return error; 810 p->p_retval[0] = tm; 811 return 0; 812 } 813 814 struct linux_times_argv { 815 long tms_utime; 816 long tms_stime; 817 long tms_cutime; 818 long tms_cstime; 819 }; 820 821 #define CLK_TCK 100 /* Linux uses 100 */ 822 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 823 824 int 825 linux_times(struct proc *p, struct linux_times_args *args) 826 { 827 struct timeval tv; 828 struct linux_times_argv tms; 829 struct rusage ru; 830 int error; 831 832 #ifdef DEBUG 833 printf("Linux-emul(%d): times(*)\n", p->p_pid); 834 #endif 835 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 836 837 tms.tms_utime = CONVTCK(ru.ru_utime); 838 tms.tms_stime = CONVTCK(ru.ru_stime); 839 840 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 841 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 842 843 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 844 sizeof(struct linux_times_argv)))) 845 return error; 846 847 microuptime(&tv); 848 p->p_retval[0] = (int)CONVTCK(tv); 849 return 0; 850 } 851 852 /* XXX move */ 853 struct linux_newuname_t { 854 char sysname[65]; 855 char nodename[65]; 856 char release[65]; 857 char version[65]; 858 char machine[65]; 859 char domainname[65]; 860 }; 861 862 int 863 linux_newuname(struct proc *p, struct linux_newuname_args *args) 864 { 865 struct linux_newuname_t linux_newuname; 866 867 #ifdef DEBUG 868 printf("Linux-emul(%d): newuname(*)\n", p->p_pid); 869 #endif 870 bzero(&linux_newuname, sizeof(struct linux_newuname_t)); 871 strncpy(linux_newuname.sysname, "Linux", 872 sizeof(linux_newuname.sysname) - 1); 873 strncpy(linux_newuname.nodename, hostname, 874 sizeof(linux_newuname.nodename) - 1); 875 strncpy(linux_newuname.release, "2.0.36", 876 sizeof(linux_newuname.release) - 1); 877 strncpy(linux_newuname.version, version, 878 sizeof(linux_newuname.version) - 1); 879 strncpy(linux_newuname.machine, machine, 880 sizeof(linux_newuname.machine) - 1); 881 strncpy(linux_newuname.domainname, domainname, 882 sizeof(linux_newuname.domainname) - 1); 883 return (copyout((caddr_t)&linux_newuname, (caddr_t)args->buf, 884 sizeof(struct linux_newuname_t))); 885 } 886 887 struct linux_utimbuf { 888 linux_time_t l_actime; 889 linux_time_t l_modtime; 890 }; 891 892 int 893 linux_utime(struct proc *p, struct linux_utime_args *args) 894 { 895 struct utimes_args /* { 896 char *path; 897 struct timeval *tptr; 898 } */ bsdutimes; 899 struct timeval tv[2], *tvp; 900 struct linux_utimbuf lut; 901 int error; 902 caddr_t sg; 903 904 sg = stackgap_init(); 905 CHECKALTEXIST(p, &sg, args->fname); 906 907 #ifdef DEBUG 908 printf("Linux-emul(%d): utime(%s, *)\n", p->p_pid, args->fname); 909 #endif 910 if (args->times) { 911 if ((error = copyin(args->times, &lut, sizeof lut))) 912 return error; 913 tv[0].tv_sec = lut.l_actime; 914 tv[0].tv_usec = 0; 915 tv[1].tv_sec = lut.l_modtime; 916 tv[1].tv_usec = 0; 917 /* so that utimes can copyin */ 918 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 919 if ((error = copyout(tv, tvp, sizeof(tv)))) 920 return error; 921 bsdutimes.tptr = tvp; 922 } else 923 bsdutimes.tptr = NULL; 924 925 bsdutimes.path = args->fname; 926 return utimes(p, &bsdutimes); 927 } 928 929 #define __WCLONE 0x80000000 930 931 int 932 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 933 { 934 struct wait_args /* { 935 int pid; 936 int *status; 937 int options; 938 struct rusage *rusage; 939 } */ tmp; 940 int error, tmpstat; 941 942 #ifdef DEBUG 943 printf("Linux-emul(%ld): waitpid(%d, %p, %d)\n", 944 (long)p->p_pid, args->pid, (void *)args->status, args->options); 945 #endif 946 tmp.pid = args->pid; 947 tmp.status = args->status; 948 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 949 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 950 if (args->options & __WCLONE) 951 tmp.options |= WLINUXCLONE; 952 tmp.rusage = NULL; 953 954 if ((error = wait4(p, &tmp)) != 0) 955 return error; 956 957 if (args->status) { 958 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 959 return error; 960 if (WIFSIGNALED(tmpstat)) 961 tmpstat = (tmpstat & 0xffffff80) | 962 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 963 else if (WIFSTOPPED(tmpstat)) 964 tmpstat = (tmpstat & 0xffff00ff) | 965 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 966 return copyout(&tmpstat, args->status, sizeof(int)); 967 } else 968 return 0; 969 } 970 971 int 972 linux_wait4(struct proc *p, struct linux_wait4_args *args) 973 { 974 struct wait_args /* { 975 int pid; 976 int *status; 977 int options; 978 struct rusage *rusage; 979 } */ tmp; 980 int error, tmpstat; 981 982 #ifdef DEBUG 983 printf("Linux-emul(%ld): wait4(%d, %p, %d, %p)\n", 984 (long)p->p_pid, args->pid, (void *)args->status, args->options, 985 (void *)args->rusage); 986 #endif 987 tmp.pid = args->pid; 988 tmp.status = args->status; 989 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 990 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 991 if (args->options & __WCLONE) 992 tmp.options |= WLINUXCLONE; 993 tmp.rusage = args->rusage; 994 995 if ((error = wait4(p, &tmp)) != 0) 996 return error; 997 998 p->p_siglist &= ~sigmask(SIGCHLD); 999 1000 if (args->status) { 1001 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 1002 return error; 1003 if (WIFSIGNALED(tmpstat)) 1004 tmpstat = (tmpstat & 0xffffff80) | 1005 bsd_to_linux_signal[WTERMSIG(tmpstat)]; 1006 else if (WIFSTOPPED(tmpstat)) 1007 tmpstat = (tmpstat & 0xffff00ff) | 1008 (bsd_to_linux_signal[WSTOPSIG(tmpstat)]<<8); 1009 return copyout(&tmpstat, args->status, sizeof(int)); 1010 } else 1011 return 0; 1012 } 1013 1014 int 1015 linux_mknod(struct proc *p, struct linux_mknod_args *args) 1016 { 1017 caddr_t sg; 1018 struct mknod_args bsd_mknod; 1019 struct mkfifo_args bsd_mkfifo; 1020 1021 sg = stackgap_init(); 1022 1023 CHECKALTCREAT(p, &sg, args->path); 1024 1025 #ifdef DEBUG 1026 printf("Linux-emul(%d): mknod(%s, %d, %d)\n", 1027 p->p_pid, args->path, args->mode, args->dev); 1028 #endif 1029 1030 if (args->mode & S_IFIFO) { 1031 bsd_mkfifo.path = args->path; 1032 bsd_mkfifo.mode = args->mode; 1033 return mkfifo(p, &bsd_mkfifo); 1034 } else { 1035 bsd_mknod.path = args->path; 1036 bsd_mknod.mode = args->mode; 1037 bsd_mknod.dev = args->dev; 1038 return mknod(p, &bsd_mknod); 1039 } 1040 } 1041 1042 /* 1043 * UGH! This is just about the dumbest idea I've ever heard!! 1044 */ 1045 int 1046 linux_personality(struct proc *p, struct linux_personality_args *args) 1047 { 1048 #ifdef DEBUG 1049 printf("Linux-emul(%d): personality(%d)\n", 1050 p->p_pid, args->per); 1051 #endif 1052 if (args->per != 0) 1053 return EINVAL; 1054 1055 /* Yes Jim, it's still a Linux... */ 1056 p->p_retval[0] = 0; 1057 return 0; 1058 } 1059 1060 /* 1061 * Wrappers for get/setitimer for debugging.. 1062 */ 1063 int 1064 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 1065 { 1066 struct setitimer_args bsa; 1067 struct itimerval foo; 1068 int error; 1069 1070 #ifdef DEBUG 1071 printf("Linux-emul(%ld): setitimer(%p, %p)\n", 1072 (long)p->p_pid, (void *)args->itv, (void *)args->oitv); 1073 #endif 1074 bsa.which = args->which; 1075 bsa.itv = args->itv; 1076 bsa.oitv = args->oitv; 1077 if (args->itv) { 1078 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 1079 sizeof(foo)))) 1080 return error; 1081 #ifdef DEBUG 1082 printf("setitimer: value: sec: %ld, usec: %ld\n", 1083 foo.it_value.tv_sec, foo.it_value.tv_usec); 1084 printf("setitimer: interval: sec: %ld, usec: %ld\n", 1085 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1086 #endif 1087 } 1088 return setitimer(p, &bsa); 1089 } 1090 1091 int 1092 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1093 { 1094 struct getitimer_args bsa; 1095 #ifdef DEBUG 1096 printf("Linux-emul(%ld): getitimer(%p)\n", 1097 (long)p->p_pid, (void *)args->itv); 1098 #endif 1099 bsa.which = args->which; 1100 bsa.itv = args->itv; 1101 return getitimer(p, &bsa); 1102 } 1103 1104 int 1105 linux_iopl(struct proc *p, struct linux_iopl_args *args) 1106 { 1107 int error; 1108 1109 error = suser(p); 1110 if (error != 0) 1111 return error; 1112 if (securelevel > 0) 1113 return EPERM; 1114 p->p_md.md_regs->tf_eflags |= PSL_IOPL; 1115 return 0; 1116 } 1117 1118 int 1119 linux_nice(struct proc *p, struct linux_nice_args *args) 1120 { 1121 struct setpriority_args bsd_args; 1122 1123 bsd_args.which = PRIO_PROCESS; 1124 bsd_args.who = 0; /* current process */ 1125 bsd_args.prio = args->inc; 1126 return setpriority(p, &bsd_args); 1127 } 1128 1129 int 1130 linux_setgroups(p, uap) 1131 struct proc *p; 1132 struct linux_setgroups_args *uap; 1133 { 1134 struct pcred *pc = p->p_cred; 1135 linux_gid_t linux_gidset[NGROUPS]; 1136 gid_t *bsd_gidset; 1137 int ngrp, error; 1138 1139 if ((error = suser(p))) 1140 return error; 1141 1142 if (uap->gidsetsize > NGROUPS) 1143 return EINVAL; 1144 1145 ngrp = uap->gidsetsize; 1146 pc->pc_ucred = crcopy(pc->pc_ucred); 1147 if (ngrp >= 1) { 1148 if ((error = copyin((caddr_t)uap->gidset, 1149 (caddr_t)linux_gidset, 1150 ngrp * sizeof(linux_gid_t)))) 1151 return error; 1152 1153 pc->pc_ucred->cr_ngroups = ngrp; 1154 1155 bsd_gidset = pc->pc_ucred->cr_groups; 1156 ngrp--; 1157 while (ngrp >= 0) { 1158 bsd_gidset[ngrp] = linux_gidset[ngrp]; 1159 ngrp--; 1160 } 1161 } 1162 else 1163 pc->pc_ucred->cr_ngroups = 1; 1164 1165 setsugid(p); 1166 return 0; 1167 } 1168 1169 int 1170 linux_getgroups(p, uap) 1171 struct proc *p; 1172 struct linux_getgroups_args *uap; 1173 { 1174 struct pcred *pc = p->p_cred; 1175 linux_gid_t linux_gidset[NGROUPS]; 1176 gid_t *bsd_gidset; 1177 int ngrp, error; 1178 1179 if ((ngrp = uap->gidsetsize) == 0) { 1180 p->p_retval[0] = pc->pc_ucred->cr_ngroups; 1181 return 0; 1182 } 1183 1184 if (ngrp < pc->pc_ucred->cr_ngroups) 1185 return EINVAL; 1186 1187 ngrp = 0; 1188 bsd_gidset = pc->pc_ucred->cr_groups; 1189 while (ngrp < pc->pc_ucred->cr_ngroups) { 1190 linux_gidset[ngrp] = bsd_gidset[ngrp]; 1191 ngrp++; 1192 } 1193 1194 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1195 ngrp * sizeof(linux_gid_t)))) 1196 return error; 1197 1198 p->p_retval[0] = ngrp; 1199 return (0); 1200 } 1201