1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software withough specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include "opt_compat.h" 32 33 #include <sys/param.h> 34 #include <sys/systm.h> 35 #include <sys/fcntl.h> 36 #include <sys/imgact_aout.h> 37 #include <sys/kernel.h> 38 #include <sys/lock.h> 39 #include <sys/mman.h> 40 #include <sys/mount.h> 41 #include <sys/mutex.h> 42 #include <sys/namei.h> 43 #include <sys/proc.h> 44 #include <sys/blist.h> 45 #include <sys/reboot.h> 46 #include <sys/resourcevar.h> 47 #include <sys/signalvar.h> 48 #include <sys/stat.h> 49 #include <sys/sysctl.h> 50 #include <sys/sysproto.h> 51 #include <sys/time.h> 52 #include <sys/unistd.h> 53 #include <sys/vmmeter.h> 54 #include <sys/vnode.h> 55 #include <sys/wait.h> 56 57 #include <vm/vm.h> 58 #include <vm/pmap.h> 59 #include <vm/vm_kern.h> 60 #include <vm/vm_map.h> 61 #include <vm/vm_extern.h> 62 #include <vm/vm_object.h> 63 #include <vm/vm_zone.h> 64 #include <vm/swap_pager.h> 65 66 #include <machine/frame.h> 67 #include <machine/limits.h> 68 #include <machine/psl.h> 69 #include <machine/sysarch.h> 70 #ifdef __i386__ 71 #include <machine/segments.h> 72 #endif 73 74 #include <posix4/sched.h> 75 76 #include <machine/../linux/linux.h> 77 #include <machine/../linux/linux_proto.h> 78 #include <compat/linux/linux_mib.h> 79 #include <compat/linux/linux_util.h> 80 81 #ifdef __alpha__ 82 #define BSD_TO_LINUX_SIGNAL(sig) (sig) 83 #else 84 #define BSD_TO_LINUX_SIGNAL(sig) \ 85 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) 86 #endif 87 88 struct linux_rlimit { 89 unsigned long rlim_cur; 90 unsigned long rlim_max; 91 }; 92 93 #ifndef __alpha__ 94 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = 95 { RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 96 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 97 RLIMIT_MEMLOCK, -1 98 }; 99 #endif /*!__alpha__*/ 100 101 struct linux_sysinfo { 102 long uptime; /* Seconds since boot */ 103 unsigned long loads[3]; /* 1, 5, and 15 minute load averages */ 104 unsigned long totalram; /* Total usable main memory size */ 105 unsigned long freeram; /* Available memory size */ 106 unsigned long sharedram; /* Amount of shared memory */ 107 unsigned long bufferram; /* Memory used by buffers */ 108 unsigned long totalswap; /* Total swap space size */ 109 unsigned long freeswap; /* swap space still available */ 110 unsigned short procs; /* Number of current processes */ 111 char _f[22]; /* Pads structure to 64 bytes */ 112 }; 113 114 #ifndef __alpha__ 115 int 116 linux_sysinfo(struct proc *p, struct linux_sysinfo_args *args) 117 { 118 struct linux_sysinfo sysinfo; 119 vm_object_t object; 120 int i; 121 struct timespec ts; 122 123 /* Uptime is copied out of print_uptime() procedure in kern_shutdown.c */ 124 getnanouptime(&ts); 125 i = 0; 126 if (ts.tv_sec >= 86400) { 127 ts.tv_sec %= 86400; 128 i = 1; 129 } 130 if (i || ts.tv_sec >= 3600) { 131 ts.tv_sec %= 3600; 132 i = 1; 133 } 134 if (i || ts.tv_sec >= 60) { 135 ts.tv_sec %= 60; 136 i = 1; 137 } 138 sysinfo.uptime=ts.tv_sec; 139 140 /* Use the information from the mib to get our load averages */ 141 for (i = 0; i < 3; i++) 142 sysinfo.loads[i] = averunnable.ldavg[i]; 143 144 sysinfo.totalram = physmem * PAGE_SIZE; 145 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 146 147 sysinfo.sharedram = 0; 148 for (object = TAILQ_FIRST(&vm_object_list); object != NULL; 149 object = TAILQ_NEXT(object, object_list)) 150 if (object->shadow_count > 1) 151 sysinfo.sharedram += object->resident_page_count; 152 153 sysinfo.sharedram *= PAGE_SIZE; 154 155 sysinfo.bufferram = 0; 156 157 if (swapblist == NULL) { 158 sysinfo.totalswap= 0; 159 sysinfo.freeswap = 0; 160 } else { 161 sysinfo.totalswap = swapblist->bl_blocks * 1024; 162 sysinfo.freeswap = swapblist->bl_root->u.bmu_avail * PAGE_SIZE; 163 } 164 165 sysinfo.procs = 20; /* Hack */ 166 167 return copyout((caddr_t)&sysinfo, (caddr_t)args->info, 168 sizeof(struct linux_sysinfo)); 169 } 170 #endif /*!__alpha__*/ 171 172 #ifndef __alpha__ 173 int 174 linux_alarm(struct proc *p, struct linux_alarm_args *args) 175 { 176 struct itimerval it, old_it; 177 struct timeval tv; 178 int s; 179 180 #ifdef DEBUG 181 if (ldebug(alarm)) 182 printf(ARGS(alarm, "%u"), args->secs); 183 #endif 184 if (args->secs > 100000000) 185 return EINVAL; 186 it.it_value.tv_sec = (long)args->secs; 187 it.it_value.tv_usec = 0; 188 it.it_interval.tv_sec = 0; 189 it.it_interval.tv_usec = 0; 190 s = splsoftclock(); 191 old_it = p->p_realtimer; 192 getmicrouptime(&tv); 193 if (timevalisset(&old_it.it_value)) 194 callout_stop(&p->p_itcallout); 195 if (it.it_value.tv_sec != 0) { 196 callout_reset(&p->p_itcallout, tvtohz(&it.it_value), realitexpire, p); 197 timevaladd(&it.it_value, &tv); 198 } 199 p->p_realtimer = it; 200 splx(s); 201 if (timevalcmp(&old_it.it_value, &tv, >)) { 202 timevalsub(&old_it.it_value, &tv); 203 if (old_it.it_value.tv_usec != 0) 204 old_it.it_value.tv_sec++; 205 p->p_retval[0] = old_it.it_value.tv_sec; 206 } 207 return 0; 208 } 209 #endif /*!__alpha__*/ 210 211 int 212 linux_brk(struct proc *p, struct linux_brk_args *args) 213 { 214 #if 0 215 struct vmspace *vm = p->p_vmspace; 216 vm_offset_t new, old; 217 int error; 218 219 if ((vm_offset_t)args->dsend < (vm_offset_t)vm->vm_daddr) 220 return EINVAL; 221 if (((caddr_t)args->dsend - (caddr_t)vm->vm_daddr) 222 > p->p_rlimit[RLIMIT_DATA].rlim_cur) 223 return ENOMEM; 224 225 old = round_page((vm_offset_t)vm->vm_daddr) + ctob(vm->vm_dsize); 226 new = round_page((vm_offset_t)args->dsend); 227 p->p_retval[0] = old; 228 if ((new-old) > 0) { 229 if (swap_pager_full) 230 return ENOMEM; 231 error = vm_map_find(&vm->vm_map, NULL, 0, &old, (new-old), FALSE, 232 VM_PROT_ALL, VM_PROT_ALL, 0); 233 if (error) 234 return error; 235 vm->vm_dsize += btoc((new-old)); 236 p->p_retval[0] = (int)(vm->vm_daddr + ctob(vm->vm_dsize)); 237 } 238 return 0; 239 #else 240 struct vmspace *vm = p->p_vmspace; 241 vm_offset_t new, old; 242 struct obreak_args /* { 243 char * nsize; 244 } */ tmp; 245 246 #ifdef DEBUG 247 if (ldebug(brk)) 248 printf(ARGS(brk, "%p"), (void *)args->dsend); 249 #endif 250 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 251 new = (vm_offset_t)args->dsend; 252 tmp.nsize = (char *) new; 253 if (((caddr_t)new > vm->vm_daddr) && !obreak(p, &tmp)) 254 p->p_retval[0] = (long)new; 255 else 256 p->p_retval[0] = (long)old; 257 258 return 0; 259 #endif 260 } 261 262 int 263 linux_uselib(struct proc *p, struct linux_uselib_args *args) 264 { 265 struct nameidata ni; 266 struct vnode *vp; 267 struct exec *a_out; 268 struct vattr attr; 269 vm_offset_t vmaddr; 270 unsigned long file_offset; 271 vm_offset_t buffer; 272 unsigned long bss_size; 273 int error; 274 caddr_t sg; 275 int locked; 276 277 sg = stackgap_init(); 278 CHECKALTEXIST(p, &sg, args->library); 279 280 #ifdef DEBUG 281 if (ldebug(uselib)) 282 printf(ARGS(uselib, "%s"), args->library); 283 #endif 284 285 a_out = NULL; 286 locked = 0; 287 vp = NULL; 288 289 NDINIT(&ni, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, args->library, p); 290 error = namei(&ni); 291 if (error) 292 goto cleanup; 293 294 vp = ni.ni_vp; 295 /* 296 * XXX This looks like a bogus check - a LOCKLEAF namei should not succeed 297 * without returning a vnode. 298 */ 299 if (vp == NULL) { 300 error = ENOEXEC; /* ?? */ 301 goto cleanup; 302 } 303 NDFREE(&ni, NDF_ONLY_PNBUF); 304 305 /* 306 * From here on down, we have a locked vnode that must be unlocked. 307 */ 308 locked++; 309 310 /* 311 * Writable? 312 */ 313 if (vp->v_writecount) { 314 error = ETXTBSY; 315 goto cleanup; 316 } 317 318 /* 319 * Executable? 320 */ 321 error = VOP_GETATTR(vp, &attr, p->p_ucred, p); 322 if (error) 323 goto cleanup; 324 325 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 326 ((attr.va_mode & 0111) == 0) || 327 (attr.va_type != VREG)) { 328 error = ENOEXEC; 329 goto cleanup; 330 } 331 332 /* 333 * Sensible size? 334 */ 335 if (attr.va_size == 0) { 336 error = ENOEXEC; 337 goto cleanup; 338 } 339 340 /* 341 * Can we access it? 342 */ 343 error = VOP_ACCESS(vp, VEXEC, p->p_ucred, p); 344 if (error) 345 goto cleanup; 346 347 error = VOP_OPEN(vp, FREAD, p->p_ucred, p); 348 if (error) 349 goto cleanup; 350 351 /* 352 * Lock no longer needed 353 */ 354 VOP_UNLOCK(vp, 0, p); 355 locked = 0; 356 357 /* 358 * Pull in executable header into kernel_map 359 */ 360 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 361 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 362 if (error) 363 goto cleanup; 364 365 /* 366 * Is it a Linux binary ? 367 */ 368 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 369 error = ENOEXEC; 370 goto cleanup; 371 } 372 373 /* While we are here, we should REALLY do some more checks */ 374 375 /* 376 * Set file/virtual offset based on a.out variant. 377 */ 378 switch ((int)(a_out->a_magic & 0xffff)) { 379 case 0413: /* ZMAGIC */ 380 file_offset = 1024; 381 break; 382 case 0314: /* QMAGIC */ 383 file_offset = 0; 384 break; 385 default: 386 error = ENOEXEC; 387 goto cleanup; 388 } 389 390 bss_size = round_page(a_out->a_bss); 391 392 /* 393 * Check various fields in header for validity/bounds. 394 */ 395 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 396 error = ENOEXEC; 397 goto cleanup; 398 } 399 400 /* text + data can't exceed file size */ 401 if (a_out->a_data + a_out->a_text > attr.va_size) { 402 error = EFAULT; 403 goto cleanup; 404 } 405 406 /* To protect p->p_rlimit in the if condition. */ 407 mtx_assert(&Giant, MA_OWNED); 408 409 /* 410 * text/data/bss must not exceed limits 411 * XXX: this is not complete. it should check current usage PLUS 412 * the resources needed by this library. 413 */ 414 if (a_out->a_text > MAXTSIZ || 415 a_out->a_data + bss_size > p->p_rlimit[RLIMIT_DATA].rlim_cur) { 416 error = ENOMEM; 417 goto cleanup; 418 } 419 420 /* 421 * prevent more writers 422 */ 423 vp->v_flag |= VTEXT; 424 425 /* 426 * Check if file_offset page aligned,. 427 * Currently we cannot handle misalinged file offsets, 428 * and so we read in the entire image (what a waste). 429 */ 430 if (file_offset & PAGE_MASK) { 431 #ifdef DEBUG 432 printf("uselib: Non page aligned binary %lu\n", file_offset); 433 #endif 434 /* 435 * Map text+data read/write/execute 436 */ 437 438 /* a_entry is the load address and is page aligned */ 439 vmaddr = trunc_page(a_out->a_entry); 440 441 /* get anon user mapping, read+write+execute */ 442 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 443 a_out->a_text + a_out->a_data, FALSE, 444 VM_PROT_ALL, VM_PROT_ALL, 0); 445 if (error) 446 goto cleanup; 447 448 /* map file into kernel_map */ 449 error = vm_mmap(kernel_map, &buffer, 450 round_page(a_out->a_text + a_out->a_data + file_offset), 451 VM_PROT_READ, VM_PROT_READ, 0, 452 (caddr_t)vp, trunc_page(file_offset)); 453 if (error) 454 goto cleanup; 455 456 /* copy from kernel VM space to user space */ 457 error = copyout((caddr_t)(void *)(uintptr_t)(buffer + file_offset), 458 (caddr_t)vmaddr, a_out->a_text + a_out->a_data); 459 460 /* release temporary kernel space */ 461 vm_map_remove(kernel_map, buffer, 462 buffer + round_page(a_out->a_text + a_out->a_data + file_offset)); 463 464 if (error) 465 goto cleanup; 466 } 467 else { 468 #ifdef DEBUG 469 printf("uselib: Page aligned binary %lu\n", file_offset); 470 #endif 471 /* 472 * for QMAGIC, a_entry is 20 bytes beyond the load address 473 * to skip the executable header 474 */ 475 vmaddr = trunc_page(a_out->a_entry); 476 477 /* 478 * Map it all into the process's space as a single copy-on-write 479 * "data" segment. 480 */ 481 error = vm_mmap(&p->p_vmspace->vm_map, &vmaddr, 482 a_out->a_text + a_out->a_data, 483 VM_PROT_ALL, VM_PROT_ALL, MAP_PRIVATE | MAP_FIXED, 484 (caddr_t)vp, file_offset); 485 if (error) 486 goto cleanup; 487 } 488 #ifdef DEBUG 489 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0], ((long*)vmaddr)[1]); 490 #endif 491 if (bss_size != 0) { 492 /* 493 * Calculate BSS start address 494 */ 495 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + a_out->a_data; 496 497 /* 498 * allocate some 'anon' space 499 */ 500 error = vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &vmaddr, 501 bss_size, FALSE, 502 VM_PROT_ALL, VM_PROT_ALL, 0); 503 if (error) 504 goto cleanup; 505 } 506 507 cleanup: 508 /* 509 * Unlock vnode if needed 510 */ 511 if (locked) 512 VOP_UNLOCK(vp, 0, p); 513 514 /* 515 * Release the kernel mapping. 516 */ 517 if (a_out) 518 vm_map_remove(kernel_map, (vm_offset_t)a_out, (vm_offset_t)a_out + PAGE_SIZE); 519 520 return error; 521 } 522 523 int 524 linux_newselect(struct proc *p, struct linux_newselect_args *args) 525 { 526 struct select_args bsa; 527 struct timeval tv0, tv1, utv, *tvp; 528 caddr_t sg; 529 int error; 530 531 #ifdef DEBUG 532 if (ldebug(newselect)) 533 printf(ARGS(newselect, "%d, %p, %p, %p, %p"), 534 args->nfds, (void *)args->readfds, 535 (void *)args->writefds, (void *)args->exceptfds, 536 (void *)args->timeout); 537 #endif 538 error = 0; 539 bsa.nd = args->nfds; 540 bsa.in = args->readfds; 541 bsa.ou = args->writefds; 542 bsa.ex = args->exceptfds; 543 bsa.tv = args->timeout; 544 545 /* 546 * Store current time for computation of the amount of 547 * time left. 548 */ 549 if (args->timeout) { 550 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 551 goto select_out; 552 #ifdef DEBUG 553 if (ldebug(newselect)) 554 printf(LMSG("incoming timeout (%ld/%ld)"), 555 utv.tv_sec, utv.tv_usec); 556 #endif 557 if (itimerfix(&utv)) { 558 /* 559 * The timeval was invalid. Convert it to something 560 * valid that will act as it does under Linux. 561 */ 562 sg = stackgap_init(); 563 tvp = stackgap_alloc(&sg, sizeof(utv)); 564 utv.tv_sec += utv.tv_usec / 1000000; 565 utv.tv_usec %= 1000000; 566 if (utv.tv_usec < 0) { 567 utv.tv_sec -= 1; 568 utv.tv_usec += 1000000; 569 } 570 if (utv.tv_sec < 0) 571 timevalclear(&utv); 572 if ((error = copyout(&utv, tvp, sizeof(utv)))) 573 goto select_out; 574 bsa.tv = tvp; 575 } 576 microtime(&tv0); 577 } 578 579 error = select(p, &bsa); 580 #ifdef DEBUG 581 if (ldebug(newselect)) 582 printf(LMSG("real select returns %d"), error); 583 #endif 584 585 if (error) { 586 /* 587 * See fs/select.c in the Linux kernel. Without this, 588 * Maelstrom doesn't work. 589 */ 590 if (error == ERESTART) 591 error = EINTR; 592 goto select_out; 593 } 594 595 if (args->timeout) { 596 if (p->p_retval[0]) { 597 /* 598 * Compute how much time was left of the timeout, 599 * by subtracting the current time and the time 600 * before we started the call, and subtracting 601 * that result from the user-supplied value. 602 */ 603 microtime(&tv1); 604 timevalsub(&tv1, &tv0); 605 timevalsub(&utv, &tv1); 606 if (utv.tv_sec < 0) 607 timevalclear(&utv); 608 } else 609 timevalclear(&utv); 610 #ifdef DEBUG 611 if (ldebug(newselect)) 612 printf(LMSG("outgoing timeout (%ld/%ld)"), 613 utv.tv_sec, utv.tv_usec); 614 #endif 615 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 616 goto select_out; 617 } 618 619 select_out: 620 #ifdef DEBUG 621 if (ldebug(newselect)) 622 printf(LMSG("newselect_out -> %d"), error); 623 #endif 624 return error; 625 } 626 627 int 628 linux_getpgid(struct proc *p, struct linux_getpgid_args *args) 629 { 630 struct proc *curp; 631 632 #ifdef DEBUG 633 if (ldebug(getpgid)) 634 printf(ARGS(getpgid, "%d"), args->pid); 635 #endif 636 if (args->pid != p->p_pid) { 637 if (!(curp = pfind(args->pid))) 638 return ESRCH; 639 p->p_retval[0] = curp->p_pgid; 640 PROC_UNLOCK(curp); 641 } 642 else 643 p->p_retval[0] = p->p_pgid; 644 return 0; 645 } 646 647 int 648 linux_mremap(struct proc *p, struct linux_mremap_args *args) 649 { 650 struct munmap_args /* { 651 void *addr; 652 size_t len; 653 } */ bsd_args; 654 int error = 0; 655 656 #ifdef DEBUG 657 if (ldebug(mremap)) 658 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 659 (void *)args->addr, 660 (unsigned long)args->old_len, 661 (unsigned long)args->new_len, 662 (unsigned long)args->flags); 663 #endif 664 args->new_len = round_page(args->new_len); 665 args->old_len = round_page(args->old_len); 666 667 if (args->new_len > args->old_len) { 668 p->p_retval[0] = 0; 669 return ENOMEM; 670 } 671 672 if (args->new_len < args->old_len) { 673 bsd_args.addr = args->addr + args->new_len; 674 bsd_args.len = args->old_len - args->new_len; 675 error = munmap(p, &bsd_args); 676 } 677 678 p->p_retval[0] = error ? 0 : (u_long)args->addr; 679 return error; 680 } 681 682 int 683 linux_msync(struct proc *p, struct linux_msync_args *args) 684 { 685 struct msync_args bsd_args; 686 687 bsd_args.addr = args->addr; 688 bsd_args.len = args->len; 689 bsd_args.flags = 0; /* XXX ignore */ 690 691 return msync(p, &bsd_args); 692 } 693 694 #ifndef __alpha__ 695 int 696 linux_time(struct proc *p, struct linux_time_args *args) 697 { 698 struct timeval tv; 699 linux_time_t tm; 700 int error; 701 702 #ifdef DEBUG 703 if (ldebug(time)) 704 printf(ARGS(time, "*")); 705 #endif 706 microtime(&tv); 707 tm = tv.tv_sec; 708 if (args->tm && (error = copyout(&tm, args->tm, sizeof(linux_time_t)))) 709 return error; 710 p->p_retval[0] = tm; 711 return 0; 712 } 713 #endif /*!__alpha__*/ 714 715 struct linux_times_argv { 716 long tms_utime; 717 long tms_stime; 718 long tms_cutime; 719 long tms_cstime; 720 }; 721 722 #ifdef __alpha__ 723 #define CLK_TCK 1024 /* Linux uses 1024 on alpha */ 724 #else 725 #define CLK_TCK 100 /* Linux uses 100 */ 726 #endif 727 728 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 729 730 int 731 linux_times(struct proc *p, struct linux_times_args *args) 732 { 733 struct timeval tv; 734 struct linux_times_argv tms; 735 struct rusage ru; 736 int error; 737 738 #ifdef DEBUG 739 if (ldebug(times)) 740 printf(ARGS(times, "*")); 741 #endif 742 mtx_lock_spin(&sched_lock); 743 calcru(p, &ru.ru_utime, &ru.ru_stime, NULL); 744 mtx_unlock_spin(&sched_lock); 745 746 tms.tms_utime = CONVTCK(ru.ru_utime); 747 tms.tms_stime = CONVTCK(ru.ru_stime); 748 749 tms.tms_cutime = CONVTCK(p->p_stats->p_cru.ru_utime); 750 tms.tms_cstime = CONVTCK(p->p_stats->p_cru.ru_stime); 751 752 if ((error = copyout((caddr_t)&tms, (caddr_t)args->buf, 753 sizeof(struct linux_times_argv)))) 754 return error; 755 756 microuptime(&tv); 757 p->p_retval[0] = (int)CONVTCK(tv); 758 return 0; 759 } 760 761 int 762 linux_newuname(struct proc *p, struct linux_newuname_args *args) 763 { 764 struct linux_new_utsname utsname; 765 char *osrelease, *osname; 766 767 #ifdef DEBUG 768 if (ldebug(newuname)) 769 printf(ARGS(newuname, "*")); 770 #endif 771 772 osname = linux_get_osname(p); 773 osrelease = linux_get_osrelease(p); 774 775 bzero(&utsname, sizeof(struct linux_new_utsname)); 776 strncpy(utsname.sysname, osname, LINUX_MAX_UTSNAME-1); 777 strncpy(utsname.nodename, hostname, LINUX_MAX_UTSNAME-1); 778 strncpy(utsname.release, osrelease, LINUX_MAX_UTSNAME-1); 779 strncpy(utsname.version, version, LINUX_MAX_UTSNAME-1); 780 strncpy(utsname.machine, machine, LINUX_MAX_UTSNAME-1); 781 strncpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME-1); 782 783 return (copyout((caddr_t)&utsname, (caddr_t)args->buf, 784 sizeof(struct linux_new_utsname))); 785 } 786 787 struct linux_utimbuf { 788 linux_time_t l_actime; 789 linux_time_t l_modtime; 790 }; 791 792 int 793 linux_utime(struct proc *p, struct linux_utime_args *args) 794 { 795 struct utimes_args /* { 796 char *path; 797 struct timeval *tptr; 798 } */ bsdutimes; 799 struct timeval tv[2], *tvp; 800 struct linux_utimbuf lut; 801 int error; 802 caddr_t sg; 803 804 sg = stackgap_init(); 805 CHECKALTEXIST(p, &sg, args->fname); 806 807 #ifdef DEBUG 808 if (ldebug(utime)) 809 printf(ARGS(utime, "%s, *"), args->fname); 810 #endif 811 if (args->times) { 812 if ((error = copyin(args->times, &lut, sizeof lut))) 813 return error; 814 tv[0].tv_sec = lut.l_actime; 815 tv[0].tv_usec = 0; 816 tv[1].tv_sec = lut.l_modtime; 817 tv[1].tv_usec = 0; 818 /* so that utimes can copyin */ 819 tvp = (struct timeval *)stackgap_alloc(&sg, sizeof(tv)); 820 if (tvp == NULL) 821 return (ENAMETOOLONG); 822 if ((error = copyout(tv, tvp, sizeof(tv)))) 823 return error; 824 bsdutimes.tptr = tvp; 825 } else 826 bsdutimes.tptr = NULL; 827 828 bsdutimes.path = args->fname; 829 return utimes(p, &bsdutimes); 830 } 831 832 #define __WCLONE 0x80000000 833 834 #ifndef __alpha__ 835 int 836 linux_waitpid(struct proc *p, struct linux_waitpid_args *args) 837 { 838 struct wait_args /* { 839 int pid; 840 int *status; 841 int options; 842 struct rusage *rusage; 843 } */ tmp; 844 int error, tmpstat; 845 846 #ifdef DEBUG 847 if (ldebug(waitpid)) 848 printf(ARGS(waitpid, "%d, %p, %d"), 849 args->pid, (void *)args->status, args->options); 850 #endif 851 tmp.pid = args->pid; 852 tmp.status = args->status; 853 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 854 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 855 if (args->options & __WCLONE) 856 tmp.options |= WLINUXCLONE; 857 tmp.rusage = NULL; 858 859 if ((error = wait4(p, &tmp)) != 0) 860 return error; 861 862 if (args->status) { 863 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 864 return error; 865 tmpstat &= 0xffff; 866 if (WIFSIGNALED(tmpstat)) 867 tmpstat = (tmpstat & 0xffffff80) | 868 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 869 else if (WIFSTOPPED(tmpstat)) 870 tmpstat = (tmpstat & 0xffff00ff) | 871 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 872 return copyout(&tmpstat, args->status, sizeof(int)); 873 } else 874 return 0; 875 } 876 #endif /*!__alpha__*/ 877 878 int 879 linux_wait4(struct proc *p, struct linux_wait4_args *args) 880 { 881 struct wait_args /* { 882 int pid; 883 int *status; 884 int options; 885 struct rusage *rusage; 886 } */ tmp; 887 int error, tmpstat; 888 889 #ifdef DEBUG 890 if (ldebug(wait4)) 891 printf(ARGS(wait4, "%d, %p, %d, %p"), 892 args->pid, (void *)args->status, args->options, 893 (void *)args->rusage); 894 #endif 895 tmp.pid = args->pid; 896 tmp.status = args->status; 897 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 898 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 899 if (args->options & __WCLONE) 900 tmp.options |= WLINUXCLONE; 901 tmp.rusage = args->rusage; 902 903 if ((error = wait4(p, &tmp)) != 0) 904 return error; 905 906 SIGDELSET(p->p_siglist, SIGCHLD); 907 908 if (args->status) { 909 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 910 return error; 911 tmpstat &= 0xffff; 912 if (WIFSIGNALED(tmpstat)) 913 tmpstat = (tmpstat & 0xffffff80) | 914 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 915 else if (WIFSTOPPED(tmpstat)) 916 tmpstat = (tmpstat & 0xffff00ff) | 917 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 918 return copyout(&tmpstat, args->status, sizeof(int)); 919 } else 920 return 0; 921 } 922 923 int 924 linux_mknod(struct proc *p, struct linux_mknod_args *args) 925 { 926 caddr_t sg; 927 struct mknod_args bsd_mknod; 928 struct mkfifo_args bsd_mkfifo; 929 930 sg = stackgap_init(); 931 932 CHECKALTCREAT(p, &sg, args->path); 933 934 #ifdef DEBUG 935 if (ldebug(mknod)) 936 printf(ARGS(mknod, "%s, %d, %d"), 937 args->path, args->mode, args->dev); 938 #endif 939 940 if (args->mode & S_IFIFO) { 941 bsd_mkfifo.path = args->path; 942 bsd_mkfifo.mode = args->mode; 943 return mkfifo(p, &bsd_mkfifo); 944 } else { 945 bsd_mknod.path = args->path; 946 bsd_mknod.mode = args->mode; 947 bsd_mknod.dev = args->dev; 948 return mknod(p, &bsd_mknod); 949 } 950 } 951 952 /* 953 * UGH! This is just about the dumbest idea I've ever heard!! 954 */ 955 int 956 linux_personality(struct proc *p, struct linux_personality_args *args) 957 { 958 #ifdef DEBUG 959 if (ldebug(personality)) 960 printf(ARGS(personality, "%d"), args->per); 961 #endif 962 #ifndef __alpha__ 963 if (args->per != 0) 964 return EINVAL; 965 #endif 966 967 /* Yes Jim, it's still a Linux... */ 968 p->p_retval[0] = 0; 969 return 0; 970 } 971 972 /* 973 * Wrappers for get/setitimer for debugging.. 974 */ 975 int 976 linux_setitimer(struct proc *p, struct linux_setitimer_args *args) 977 { 978 struct setitimer_args bsa; 979 struct itimerval foo; 980 int error; 981 982 #ifdef DEBUG 983 if (ldebug(setitimer)) 984 printf(ARGS(setitimer, "%p, %p"), 985 (void *)args->itv, (void *)args->oitv); 986 #endif 987 bsa.which = args->which; 988 bsa.itv = args->itv; 989 bsa.oitv = args->oitv; 990 if (args->itv) { 991 if ((error = copyin((caddr_t)args->itv, (caddr_t)&foo, 992 sizeof(foo)))) 993 return error; 994 #ifdef DEBUG 995 if (ldebug(setitimer)) { 996 printf("setitimer: value: sec: %ld, usec: %ld\n", 997 foo.it_value.tv_sec, foo.it_value.tv_usec); 998 printf("setitimer: interval: sec: %ld, usec: %ld\n", 999 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 1000 } 1001 #endif 1002 } 1003 return setitimer(p, &bsa); 1004 } 1005 1006 int 1007 linux_getitimer(struct proc *p, struct linux_getitimer_args *args) 1008 { 1009 struct getitimer_args bsa; 1010 #ifdef DEBUG 1011 if (ldebug(getitimer)) 1012 printf(ARGS(getitimer, "%p"), (void *)args->itv); 1013 #endif 1014 bsa.which = args->which; 1015 bsa.itv = args->itv; 1016 return getitimer(p, &bsa); 1017 } 1018 1019 #ifndef __alpha__ 1020 int 1021 linux_nice(struct proc *p, struct linux_nice_args *args) 1022 { 1023 struct setpriority_args bsd_args; 1024 1025 bsd_args.which = PRIO_PROCESS; 1026 bsd_args.who = 0; /* current process */ 1027 bsd_args.prio = args->inc; 1028 return setpriority(p, &bsd_args); 1029 } 1030 #endif /*!__alpha__*/ 1031 1032 int 1033 linux_setgroups(p, uap) 1034 struct proc *p; 1035 struct linux_setgroups_args *uap; 1036 { 1037 struct ucred *newcred, *oldcred; 1038 linux_gid_t linux_gidset[NGROUPS]; 1039 gid_t *bsd_gidset; 1040 int ngrp, error; 1041 1042 ngrp = uap->gidsetsize; 1043 oldcred = p->p_ucred; 1044 1045 /* 1046 * cr_groups[0] holds egid. Setting the whole set from 1047 * the supplied set will cause egid to be changed too. 1048 * Keep cr_groups[0] unchanged to prevent that. 1049 */ 1050 1051 if ((error = suser_xxx(oldcred, NULL, PRISON_ROOT)) != 0) 1052 return (error); 1053 1054 if (ngrp >= NGROUPS) 1055 return (EINVAL); 1056 1057 newcred = crdup(oldcred); 1058 if (ngrp > 0) { 1059 error = copyin((caddr_t)uap->gidset, (caddr_t)linux_gidset, 1060 ngrp * sizeof(linux_gid_t)); 1061 if (error) 1062 return (error); 1063 1064 newcred->cr_ngroups = ngrp + 1; 1065 1066 bsd_gidset = newcred->cr_groups; 1067 ngrp--; 1068 while (ngrp >= 0) { 1069 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1070 ngrp--; 1071 } 1072 } 1073 else 1074 newcred->cr_ngroups = 1; 1075 1076 setsugid(p); 1077 p->p_ucred = newcred; 1078 crfree(oldcred); 1079 return (0); 1080 } 1081 1082 int 1083 linux_getgroups(p, uap) 1084 struct proc *p; 1085 struct linux_getgroups_args *uap; 1086 { 1087 struct ucred *cred; 1088 linux_gid_t linux_gidset[NGROUPS]; 1089 gid_t *bsd_gidset; 1090 int bsd_gidsetsz, ngrp, error; 1091 1092 cred = p->p_ucred; 1093 bsd_gidset = cred->cr_groups; 1094 bsd_gidsetsz = cred->cr_ngroups - 1; 1095 1096 /* 1097 * cr_groups[0] holds egid. Returning the whole set 1098 * here will cause a duplicate. Exclude cr_groups[0] 1099 * to prevent that. 1100 */ 1101 1102 if ((ngrp = uap->gidsetsize) == 0) { 1103 p->p_retval[0] = bsd_gidsetsz; 1104 return (0); 1105 } 1106 1107 if (ngrp < bsd_gidsetsz) 1108 return (EINVAL); 1109 1110 ngrp = 0; 1111 while (ngrp < bsd_gidsetsz) { 1112 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1113 ngrp++; 1114 } 1115 1116 if ((error = copyout((caddr_t)linux_gidset, (caddr_t)uap->gidset, 1117 ngrp * sizeof(linux_gid_t)))) 1118 return (error); 1119 1120 p->p_retval[0] = ngrp; 1121 return (0); 1122 } 1123 1124 #ifndef __alpha__ 1125 int 1126 linux_setrlimit(p, uap) 1127 struct proc *p; 1128 struct linux_setrlimit_args *uap; 1129 { 1130 struct __setrlimit_args bsd; 1131 struct linux_rlimit rlim; 1132 int error; 1133 caddr_t sg = stackgap_init(); 1134 1135 #ifdef DEBUG 1136 if (ldebug(setrlimit)) 1137 printf(ARGS(setrlimit, "%d, %p"), 1138 uap->resource, (void *)uap->rlim); 1139 #endif 1140 1141 if (uap->resource >= LINUX_RLIM_NLIMITS) 1142 return (EINVAL); 1143 1144 bsd.which = linux_to_bsd_resource[uap->resource]; 1145 if (bsd.which == -1) 1146 return (EINVAL); 1147 1148 error = copyin(uap->rlim, &rlim, sizeof(rlim)); 1149 if (error) 1150 return (error); 1151 1152 bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit)); 1153 bsd.rlp->rlim_cur = (rlim_t)rlim.rlim_cur; 1154 bsd.rlp->rlim_max = (rlim_t)rlim.rlim_max; 1155 return (setrlimit(p, &bsd)); 1156 } 1157 1158 int 1159 linux_getrlimit(p, uap) 1160 struct proc *p; 1161 struct linux_getrlimit_args *uap; 1162 { 1163 struct __getrlimit_args bsd; 1164 struct linux_rlimit rlim; 1165 int error; 1166 caddr_t sg = stackgap_init(); 1167 1168 #ifdef DEBUG 1169 if (ldebug(getrlimit)) 1170 printf(ARGS(getrlimit, "%d, %p"), 1171 uap->resource, (void *)uap->rlim); 1172 #endif 1173 1174 if (uap->resource >= LINUX_RLIM_NLIMITS) 1175 return (EINVAL); 1176 1177 bsd.which = linux_to_bsd_resource[uap->resource]; 1178 if (bsd.which == -1) 1179 return (EINVAL); 1180 1181 bsd.rlp = stackgap_alloc(&sg, sizeof(struct rlimit)); 1182 error = getrlimit(p, &bsd); 1183 if (error) 1184 return (error); 1185 1186 rlim.rlim_cur = (unsigned long)bsd.rlp->rlim_cur; 1187 if (rlim.rlim_cur == ULONG_MAX) 1188 rlim.rlim_cur = LONG_MAX; 1189 rlim.rlim_max = (unsigned long)bsd.rlp->rlim_max; 1190 if (rlim.rlim_max == ULONG_MAX) 1191 rlim.rlim_max = LONG_MAX; 1192 return (copyout(&rlim, uap->rlim, sizeof(rlim))); 1193 } 1194 #endif /*!__alpha__*/ 1195 1196 int 1197 linux_sched_setscheduler(p, uap) 1198 struct proc *p; 1199 struct linux_sched_setscheduler_args *uap; 1200 { 1201 struct sched_setscheduler_args bsd; 1202 1203 #ifdef DEBUG 1204 if (ldebug(sched_setscheduler)) 1205 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1206 uap->pid, uap->policy, (const void *)uap->param); 1207 #endif 1208 1209 switch (uap->policy) { 1210 case LINUX_SCHED_OTHER: 1211 bsd.policy = SCHED_OTHER; 1212 break; 1213 case LINUX_SCHED_FIFO: 1214 bsd.policy = SCHED_FIFO; 1215 break; 1216 case LINUX_SCHED_RR: 1217 bsd.policy = SCHED_RR; 1218 break; 1219 default: 1220 return EINVAL; 1221 } 1222 1223 bsd.pid = uap->pid; 1224 bsd.param = uap->param; 1225 return sched_setscheduler(p, &bsd); 1226 } 1227 1228 int 1229 linux_sched_getscheduler(p, uap) 1230 struct proc *p; 1231 struct linux_sched_getscheduler_args *uap; 1232 { 1233 struct sched_getscheduler_args bsd; 1234 int error; 1235 1236 #ifdef DEBUG 1237 if (ldebug(sched_getscheduler)) 1238 printf(ARGS(sched_getscheduler, "%d"), uap->pid); 1239 #endif 1240 1241 bsd.pid = uap->pid; 1242 error = sched_getscheduler(p, &bsd); 1243 1244 switch (p->p_retval[0]) { 1245 case SCHED_OTHER: 1246 p->p_retval[0] = LINUX_SCHED_OTHER; 1247 break; 1248 case SCHED_FIFO: 1249 p->p_retval[0] = LINUX_SCHED_FIFO; 1250 break; 1251 case SCHED_RR: 1252 p->p_retval[0] = LINUX_SCHED_RR; 1253 break; 1254 } 1255 1256 return error; 1257 } 1258 1259 int 1260 linux_sched_get_priority_max(p, uap) 1261 struct proc *p; 1262 struct linux_sched_get_priority_max_args *uap; 1263 { 1264 struct sched_get_priority_max_args bsd; 1265 1266 #ifdef DEBUG 1267 if (ldebug(sched_get_priority_max)) 1268 printf(ARGS(sched_get_priority_max, "%d"), uap->policy); 1269 #endif 1270 1271 switch (uap->policy) { 1272 case LINUX_SCHED_OTHER: 1273 bsd.policy = SCHED_OTHER; 1274 break; 1275 case LINUX_SCHED_FIFO: 1276 bsd.policy = SCHED_FIFO; 1277 break; 1278 case LINUX_SCHED_RR: 1279 bsd.policy = SCHED_RR; 1280 break; 1281 default: 1282 return EINVAL; 1283 } 1284 return sched_get_priority_max(p, &bsd); 1285 } 1286 1287 int 1288 linux_sched_get_priority_min(p, uap) 1289 struct proc *p; 1290 struct linux_sched_get_priority_min_args *uap; 1291 { 1292 struct sched_get_priority_min_args bsd; 1293 1294 #ifdef DEBUG 1295 if (ldebug(sched_get_priority_min)) 1296 printf(ARGS(sched_get_priority_min, "%d"), uap->policy); 1297 #endif 1298 1299 switch (uap->policy) { 1300 case LINUX_SCHED_OTHER: 1301 bsd.policy = SCHED_OTHER; 1302 break; 1303 case LINUX_SCHED_FIFO: 1304 bsd.policy = SCHED_FIFO; 1305 break; 1306 case LINUX_SCHED_RR: 1307 bsd.policy = SCHED_RR; 1308 break; 1309 default: 1310 return EINVAL; 1311 } 1312 return sched_get_priority_min(p, &bsd); 1313 } 1314 1315 #define REBOOT_CAD_ON 0x89abcdef 1316 #define REBOOT_CAD_OFF 0 1317 #define REBOOT_HALT 0xcdef0123 1318 1319 int 1320 linux_reboot(struct proc *p, struct linux_reboot_args *args) 1321 { 1322 struct reboot_args bsd_args; 1323 1324 #ifdef DEBUG 1325 if (ldebug(reboot)) 1326 printf(ARGS(reboot, "0x%x"), args->opt); 1327 #endif 1328 if (args->opt == REBOOT_CAD_ON || args->opt == REBOOT_CAD_OFF) 1329 return (0); 1330 bsd_args.opt = args->opt == REBOOT_HALT ? RB_HALT : 0; 1331 return (reboot(p, &bsd_args)); 1332 } 1333