1 /*- 2 * Copyright (c) 1994-1995 S�ren Schmidt 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer 10 * in this position and unchanged. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 3. The name of the author may not be used to endorse or promote products 15 * derived from this software without specific prior written permission 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include "opt_mac.h" 33 34 #include <sys/param.h> 35 #include <sys/blist.h> 36 #include <sys/fcntl.h> 37 #include <sys/imgact_aout.h> 38 #include <sys/jail.h> 39 #include <sys/kernel.h> 40 #include <sys/limits.h> 41 #include <sys/lock.h> 42 #include <sys/mac.h> 43 #include <sys/malloc.h> 44 #include <sys/mman.h> 45 #include <sys/mount.h> 46 #include <sys/mutex.h> 47 #include <sys/namei.h> 48 #include <sys/proc.h> 49 #include <sys/reboot.h> 50 #include <sys/resourcevar.h> 51 #include <sys/signalvar.h> 52 #include <sys/stat.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysproto.h> 56 #include <sys/systm.h> 57 #include <sys/time.h> 58 #include <sys/vmmeter.h> 59 #include <sys/vnode.h> 60 #include <sys/wait.h> 61 62 #include <vm/vm.h> 63 #include <vm/pmap.h> 64 #include <vm/vm_kern.h> 65 #include <vm/vm_map.h> 66 #include <vm/vm_extern.h> 67 #include <vm/vm_object.h> 68 #include <vm/swap_pager.h> 69 70 #include <posix4/sched.h> 71 72 #include <machine/../linux/linux.h> 73 #include <machine/../linux/linux_proto.h> 74 75 #include <compat/linux/linux_mib.h> 76 #include <compat/linux/linux_util.h> 77 78 #ifdef __i386__ 79 #include <machine/cputypes.h> 80 #endif 81 82 #ifdef __alpha__ 83 #define BSD_TO_LINUX_SIGNAL(sig) (sig) 84 #else 85 #define BSD_TO_LINUX_SIGNAL(sig) \ 86 (((sig) <= LINUX_SIGTBLSZ) ? bsd_to_linux_signal[_SIG_IDX(sig)] : sig) 87 #endif 88 89 #ifndef __alpha__ 90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 91 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 92 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 93 RLIMIT_MEMLOCK, -1 94 }; 95 #endif /*!__alpha__*/ 96 97 struct l_sysinfo { 98 l_long uptime; /* Seconds since boot */ 99 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 100 #define LINUX_SYSINFO_LOADS_SCALE 65536 101 l_ulong totalram; /* Total usable main memory size */ 102 l_ulong freeram; /* Available memory size */ 103 l_ulong sharedram; /* Amount of shared memory */ 104 l_ulong bufferram; /* Memory used by buffers */ 105 l_ulong totalswap; /* Total swap space size */ 106 l_ulong freeswap; /* swap space still available */ 107 l_ushort procs; /* Number of current processes */ 108 l_ulong totalbig; 109 l_ulong freebig; 110 l_uint mem_unit; 111 char _f[6]; /* Pads structure to 64 bytes */ 112 }; 113 #ifndef __alpha__ 114 int 115 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 116 { 117 struct l_sysinfo sysinfo; 118 vm_object_t object; 119 int i, j; 120 struct timespec ts; 121 122 /* Uptime is copied out of print_uptime() in kern_shutdown.c */ 123 getnanouptime(&ts); 124 i = 0; 125 if (ts.tv_sec >= 86400) { 126 ts.tv_sec %= 86400; 127 i = 1; 128 } 129 if (i || ts.tv_sec >= 3600) { 130 ts.tv_sec %= 3600; 131 i = 1; 132 } 133 if (i || ts.tv_sec >= 60) { 134 ts.tv_sec %= 60; 135 i = 1; 136 } 137 sysinfo.uptime=ts.tv_sec; 138 139 /* Use the information from the mib to get our load averages */ 140 for (i = 0; i < 3; i++) 141 sysinfo.loads[i] = averunnable.ldavg[i] * 142 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 143 144 sysinfo.totalram = physmem * PAGE_SIZE; 145 sysinfo.freeram = sysinfo.totalram - cnt.v_wire_count * PAGE_SIZE; 146 147 sysinfo.sharedram = 0; 148 mtx_lock(&vm_object_list_mtx); 149 TAILQ_FOREACH(object, &vm_object_list, object_list) 150 if (object->shadow_count > 1) 151 sysinfo.sharedram += object->resident_page_count; 152 mtx_unlock(&vm_object_list_mtx); 153 154 sysinfo.sharedram *= PAGE_SIZE; 155 sysinfo.bufferram = 0; 156 157 swap_pager_status(&i, &j); 158 sysinfo.totalswap= i * PAGE_SIZE; 159 sysinfo.freeswap = (i - j) * PAGE_SIZE; 160 161 sysinfo.procs = nprocs; 162 163 /* The following are only present in newer Linux kernels. */ 164 sysinfo.totalbig = 0; 165 sysinfo.freebig = 0; 166 sysinfo.mem_unit = 1; 167 168 return copyout(&sysinfo, args->info, sizeof(sysinfo)); 169 } 170 #endif /*!__alpha__*/ 171 172 #ifndef __alpha__ 173 int 174 linux_alarm(struct thread *td, struct linux_alarm_args *args) 175 { 176 struct itimerval it, old_it; 177 struct timeval tv; 178 struct proc *p; 179 180 #ifdef DEBUG 181 if (ldebug(alarm)) 182 printf(ARGS(alarm, "%u"), args->secs); 183 #endif 184 185 if (args->secs > 100000000) 186 return EINVAL; 187 188 it.it_value.tv_sec = (long)args->secs; 189 it.it_value.tv_usec = 0; 190 it.it_interval.tv_sec = 0; 191 it.it_interval.tv_usec = 0; 192 p = td->td_proc; 193 PROC_LOCK(p); 194 old_it = p->p_realtimer; 195 getmicrouptime(&tv); 196 if (timevalisset(&old_it.it_value)) 197 callout_stop(&p->p_itcallout); 198 if (it.it_value.tv_sec != 0) { 199 callout_reset(&p->p_itcallout, tvtohz(&it.it_value), 200 realitexpire, p); 201 timevaladd(&it.it_value, &tv); 202 } 203 p->p_realtimer = it; 204 PROC_UNLOCK(p); 205 if (timevalcmp(&old_it.it_value, &tv, >)) { 206 timevalsub(&old_it.it_value, &tv); 207 if (old_it.it_value.tv_usec != 0) 208 old_it.it_value.tv_sec++; 209 td->td_retval[0] = old_it.it_value.tv_sec; 210 } 211 return 0; 212 } 213 #endif /*!__alpha__*/ 214 215 int 216 linux_brk(struct thread *td, struct linux_brk_args *args) 217 { 218 struct vmspace *vm = td->td_proc->p_vmspace; 219 vm_offset_t new, old; 220 struct obreak_args /* { 221 char * nsize; 222 } */ tmp; 223 224 #ifdef DEBUG 225 if (ldebug(brk)) 226 printf(ARGS(brk, "%p"), (void *)args->dsend); 227 #endif 228 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 229 new = (vm_offset_t)args->dsend; 230 tmp.nsize = (char *) new; 231 if (((caddr_t)new > vm->vm_daddr) && !obreak(td, &tmp)) 232 td->td_retval[0] = (long)new; 233 else 234 td->td_retval[0] = (long)old; 235 236 return 0; 237 } 238 239 int 240 linux_uselib(struct thread *td, struct linux_uselib_args *args) 241 { 242 struct nameidata ni; 243 struct vnode *vp; 244 struct exec *a_out; 245 struct vattr attr; 246 vm_offset_t vmaddr; 247 unsigned long file_offset; 248 vm_offset_t buffer; 249 unsigned long bss_size; 250 char *library; 251 int error; 252 int locked; 253 254 LCONVPATHEXIST(td, args->library, &library); 255 256 #ifdef DEBUG 257 if (ldebug(uselib)) 258 printf(ARGS(uselib, "%s"), library); 259 #endif 260 261 a_out = NULL; 262 locked = 0; 263 vp = NULL; 264 265 /* 266 * XXX: This code should make use of vn_open(), rather than doing 267 * all this stuff itself. 268 */ 269 NDINIT(&ni, LOOKUP, FOLLOW|LOCKLEAF, UIO_SYSSPACE, library, td); 270 error = namei(&ni); 271 LFREEPATH(library); 272 if (error) 273 goto cleanup; 274 275 vp = ni.ni_vp; 276 /* 277 * XXX - This looks like a bogus check. A LOCKLEAF namei should not 278 * succeed without returning a vnode. 279 */ 280 if (vp == NULL) { 281 error = ENOEXEC; /* ?? */ 282 goto cleanup; 283 } 284 NDFREE(&ni, NDF_ONLY_PNBUF); 285 286 /* 287 * From here on down, we have a locked vnode that must be unlocked. 288 */ 289 locked++; 290 291 /* Writable? */ 292 if (vp->v_writecount) { 293 error = ETXTBSY; 294 goto cleanup; 295 } 296 297 /* Executable? */ 298 error = VOP_GETATTR(vp, &attr, td->td_ucred, td); 299 if (error) 300 goto cleanup; 301 302 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 303 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 304 error = ENOEXEC; 305 goto cleanup; 306 } 307 308 /* Sensible size? */ 309 if (attr.va_size == 0) { 310 error = ENOEXEC; 311 goto cleanup; 312 } 313 314 /* Can we access it? */ 315 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 316 if (error) 317 goto cleanup; 318 319 /* 320 * XXX: This should use vn_open() so that it is properly authorized, 321 * and to reduce code redundancy all over the place here. 322 */ 323 #ifdef MAC 324 error = mac_check_vnode_open(td->td_ucred, vp, FREAD); 325 if (error) 326 goto cleanup; 327 #endif 328 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, -1); 329 if (error) 330 goto cleanup; 331 332 /* Pull in executable header into kernel_map */ 333 error = vm_mmap(kernel_map, (vm_offset_t *)&a_out, PAGE_SIZE, 334 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 0); 335 /* 336 * Lock no longer needed 337 */ 338 locked = 0; 339 VOP_UNLOCK(vp, 0, td); 340 341 if (error) 342 goto cleanup; 343 344 /* Is it a Linux binary ? */ 345 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 346 error = ENOEXEC; 347 goto cleanup; 348 } 349 350 /* 351 * While we are here, we should REALLY do some more checks 352 */ 353 354 /* Set file/virtual offset based on a.out variant. */ 355 switch ((int)(a_out->a_magic & 0xffff)) { 356 case 0413: /* ZMAGIC */ 357 file_offset = 1024; 358 break; 359 case 0314: /* QMAGIC */ 360 file_offset = 0; 361 break; 362 default: 363 error = ENOEXEC; 364 goto cleanup; 365 } 366 367 bss_size = round_page(a_out->a_bss); 368 369 /* Check various fields in header for validity/bounds. */ 370 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 371 error = ENOEXEC; 372 goto cleanup; 373 } 374 375 /* text + data can't exceed file size */ 376 if (a_out->a_data + a_out->a_text > attr.va_size) { 377 error = EFAULT; 378 goto cleanup; 379 } 380 381 /* To protect td->td_proc->p_rlimit in the if condition. */ 382 mtx_assert(&Giant, MA_OWNED); 383 384 /* 385 * text/data/bss must not exceed limits 386 * XXX - this is not complete. it should check current usage PLUS 387 * the resources needed by this library. 388 */ 389 if (a_out->a_text > maxtsiz || 390 a_out->a_data + bss_size > 391 td->td_proc->p_rlimit[RLIMIT_DATA].rlim_cur) { 392 error = ENOMEM; 393 goto cleanup; 394 } 395 396 mp_fixme("Unlocked vflags access."); 397 /* prevent more writers */ 398 vp->v_vflag |= VV_TEXT; 399 400 /* 401 * Check if file_offset page aligned. Currently we cannot handle 402 * misalinged file offsets, and so we read in the entire image 403 * (what a waste). 404 */ 405 if (file_offset & PAGE_MASK) { 406 #ifdef DEBUG 407 printf("uselib: Non page aligned binary %lu\n", file_offset); 408 #endif 409 /* Map text+data read/write/execute */ 410 411 /* a_entry is the load address and is page aligned */ 412 vmaddr = trunc_page(a_out->a_entry); 413 414 /* get anon user mapping, read+write+execute */ 415 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 416 &vmaddr, a_out->a_text + a_out->a_data, FALSE, VM_PROT_ALL, 417 VM_PROT_ALL, 0); 418 if (error) 419 goto cleanup; 420 421 /* map file into kernel_map */ 422 error = vm_mmap(kernel_map, &buffer, 423 round_page(a_out->a_text + a_out->a_data + file_offset), 424 VM_PROT_READ, VM_PROT_READ, 0, (caddr_t)vp, 425 trunc_page(file_offset)); 426 if (error) 427 goto cleanup; 428 429 /* copy from kernel VM space to user space */ 430 error = copyout((void *)(uintptr_t)(buffer + file_offset), 431 (void *)vmaddr, a_out->a_text + a_out->a_data); 432 433 /* release temporary kernel space */ 434 vm_map_remove(kernel_map, buffer, buffer + 435 round_page(a_out->a_text + a_out->a_data + file_offset)); 436 437 if (error) 438 goto cleanup; 439 } else { 440 #ifdef DEBUG 441 printf("uselib: Page aligned binary %lu\n", file_offset); 442 #endif 443 /* 444 * for QMAGIC, a_entry is 20 bytes beyond the load address 445 * to skip the executable header 446 */ 447 vmaddr = trunc_page(a_out->a_entry); 448 449 /* 450 * Map it all into the process's space as a single 451 * copy-on-write "data" segment. 452 */ 453 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 454 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 455 MAP_PRIVATE | MAP_FIXED, (caddr_t)vp, file_offset); 456 if (error) 457 goto cleanup; 458 } 459 #ifdef DEBUG 460 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long*)vmaddr)[0], 461 ((long*)vmaddr)[1]); 462 #endif 463 if (bss_size != 0) { 464 /* Calculate BSS start address */ 465 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 466 a_out->a_data; 467 468 /* allocate some 'anon' space */ 469 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 470 &vmaddr, bss_size, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0); 471 if (error) 472 goto cleanup; 473 } 474 475 cleanup: 476 /* Unlock vnode if needed */ 477 if (locked) 478 VOP_UNLOCK(vp, 0, td); 479 480 /* Release the kernel mapping. */ 481 if (a_out) 482 vm_map_remove(kernel_map, (vm_offset_t)a_out, 483 (vm_offset_t)a_out + PAGE_SIZE); 484 485 return error; 486 } 487 488 int 489 linux_select(struct thread *td, struct linux_select_args *args) 490 { 491 struct timeval tv0, tv1, utv, *tvp; 492 int error; 493 494 #ifdef DEBUG 495 if (ldebug(select)) 496 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 497 (void *)args->readfds, (void *)args->writefds, 498 (void *)args->exceptfds, (void *)args->timeout); 499 #endif 500 501 /* 502 * Store current time for computation of the amount of 503 * time left. 504 */ 505 if (args->timeout) { 506 if ((error = copyin(args->timeout, &utv, sizeof(utv)))) 507 goto select_out; 508 #ifdef DEBUG 509 if (ldebug(select)) 510 printf(LMSG("incoming timeout (%ld/%ld)"), 511 utv.tv_sec, utv.tv_usec); 512 #endif 513 514 if (itimerfix(&utv)) { 515 /* 516 * The timeval was invalid. Convert it to something 517 * valid that will act as it does under Linux. 518 */ 519 utv.tv_sec += utv.tv_usec / 1000000; 520 utv.tv_usec %= 1000000; 521 if (utv.tv_usec < 0) { 522 utv.tv_sec -= 1; 523 utv.tv_usec += 1000000; 524 } 525 if (utv.tv_sec < 0) 526 timevalclear(&utv); 527 } 528 microtime(&tv0); 529 tvp = &utv; 530 } else 531 tvp = NULL; 532 533 error = kern_select(td, args->nfds, args->readfds, args->writefds, 534 args->exceptfds, tvp); 535 536 #ifdef DEBUG 537 if (ldebug(select)) 538 printf(LMSG("real select returns %d"), error); 539 #endif 540 if (error) { 541 /* 542 * See fs/select.c in the Linux kernel. Without this, 543 * Maelstrom doesn't work. 544 */ 545 if (error == ERESTART) 546 error = EINTR; 547 goto select_out; 548 } 549 550 if (args->timeout) { 551 if (td->td_retval[0]) { 552 /* 553 * Compute how much time was left of the timeout, 554 * by subtracting the current time and the time 555 * before we started the call, and subtracting 556 * that result from the user-supplied value. 557 */ 558 microtime(&tv1); 559 timevalsub(&tv1, &tv0); 560 timevalsub(&utv, &tv1); 561 if (utv.tv_sec < 0) 562 timevalclear(&utv); 563 } else 564 timevalclear(&utv); 565 #ifdef DEBUG 566 if (ldebug(select)) 567 printf(LMSG("outgoing timeout (%ld/%ld)"), 568 utv.tv_sec, utv.tv_usec); 569 #endif 570 if ((error = copyout(&utv, args->timeout, sizeof(utv)))) 571 goto select_out; 572 } 573 574 select_out: 575 #ifdef DEBUG 576 if (ldebug(select)) 577 printf(LMSG("select_out -> %d"), error); 578 #endif 579 return error; 580 } 581 582 int 583 linux_mremap(struct thread *td, struct linux_mremap_args *args) 584 { 585 struct munmap_args /* { 586 void *addr; 587 size_t len; 588 } */ bsd_args; 589 int error = 0; 590 591 #ifdef DEBUG 592 if (ldebug(mremap)) 593 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 594 (void *)args->addr, 595 (unsigned long)args->old_len, 596 (unsigned long)args->new_len, 597 (unsigned long)args->flags); 598 #endif 599 args->new_len = round_page(args->new_len); 600 args->old_len = round_page(args->old_len); 601 602 if (args->new_len > args->old_len) { 603 td->td_retval[0] = 0; 604 return ENOMEM; 605 } 606 607 if (args->new_len < args->old_len) { 608 bsd_args.addr = (caddr_t)(args->addr + args->new_len); 609 bsd_args.len = args->old_len - args->new_len; 610 error = munmap(td, &bsd_args); 611 } 612 613 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 614 return error; 615 } 616 617 #define LINUX_MS_ASYNC 0x0001 618 #define LINUX_MS_INVALIDATE 0x0002 619 #define LINUX_MS_SYNC 0x0004 620 621 int 622 linux_msync(struct thread *td, struct linux_msync_args *args) 623 { 624 struct msync_args bsd_args; 625 626 bsd_args.addr = (caddr_t)args->addr; 627 bsd_args.len = args->len; 628 bsd_args.flags = args->fl & ~LINUX_MS_SYNC; 629 630 return msync(td, &bsd_args); 631 } 632 633 #ifndef __alpha__ 634 int 635 linux_time(struct thread *td, struct linux_time_args *args) 636 { 637 struct timeval tv; 638 l_time_t tm; 639 int error; 640 641 #ifdef DEBUG 642 if (ldebug(time)) 643 printf(ARGS(time, "*")); 644 #endif 645 646 microtime(&tv); 647 tm = tv.tv_sec; 648 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 649 return error; 650 td->td_retval[0] = tm; 651 return 0; 652 } 653 #endif /*!__alpha__*/ 654 655 struct l_times_argv { 656 l_long tms_utime; 657 l_long tms_stime; 658 l_long tms_cutime; 659 l_long tms_cstime; 660 }; 661 662 #ifdef __alpha__ 663 #define CLK_TCK 1024 /* Linux uses 1024 on alpha */ 664 #else 665 #define CLK_TCK 100 /* Linux uses 100 */ 666 #endif 667 668 #define CONVTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 669 670 int 671 linux_times(struct thread *td, struct linux_times_args *args) 672 { 673 struct timeval tv; 674 struct l_times_argv tms; 675 struct rusage ru; 676 int error; 677 678 #ifdef DEBUG 679 if (ldebug(times)) 680 printf(ARGS(times, "*")); 681 #endif 682 683 mtx_lock_spin(&sched_lock); 684 calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL); 685 mtx_unlock_spin(&sched_lock); 686 687 tms.tms_utime = CONVTCK(ru.ru_utime); 688 tms.tms_stime = CONVTCK(ru.ru_stime); 689 690 tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime); 691 tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime); 692 693 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 694 return error; 695 696 microuptime(&tv); 697 td->td_retval[0] = (int)CONVTCK(tv); 698 return 0; 699 } 700 701 int 702 linux_newuname(struct thread *td, struct linux_newuname_args *args) 703 { 704 struct l_new_utsname utsname; 705 char osname[LINUX_MAX_UTSNAME]; 706 char osrelease[LINUX_MAX_UTSNAME]; 707 char *p; 708 709 #ifdef DEBUG 710 if (ldebug(newuname)) 711 printf(ARGS(newuname, "*")); 712 #endif 713 714 linux_get_osname(td, osname); 715 linux_get_osrelease(td, osrelease); 716 717 bzero(&utsname, sizeof(utsname)); 718 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 719 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 720 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 721 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 722 for (p = utsname.version; *p != '\0'; ++p) 723 if (*p == '\n') { 724 *p = '\0'; 725 break; 726 } 727 #ifdef __i386__ 728 { 729 const char *class; 730 switch (cpu_class) { 731 case CPUCLASS_686: 732 class = "i686"; 733 break; 734 case CPUCLASS_586: 735 class = "i586"; 736 break; 737 case CPUCLASS_486: 738 class = "i486"; 739 break; 740 default: 741 class = "i386"; 742 } 743 strlcpy(utsname.machine, class, LINUX_MAX_UTSNAME); 744 } 745 #else 746 strlcpy(utsname.machine, machine, LINUX_MAX_UTSNAME); 747 #endif 748 strlcpy(utsname.domainname, domainname, LINUX_MAX_UTSNAME); 749 750 return (copyout(&utsname, args->buf, sizeof(utsname))); 751 } 752 753 #if defined(__i386__) 754 struct l_utimbuf { 755 l_time_t l_actime; 756 l_time_t l_modtime; 757 }; 758 759 int 760 linux_utime(struct thread *td, struct linux_utime_args *args) 761 { 762 struct timeval tv[2], *tvp; 763 struct l_utimbuf lut; 764 char *fname; 765 int error; 766 767 LCONVPATHEXIST(td, args->fname, &fname); 768 769 #ifdef DEBUG 770 if (ldebug(utime)) 771 printf(ARGS(utime, "%s, *"), fname); 772 #endif 773 774 if (args->times) { 775 if ((error = copyin(args->times, &lut, sizeof lut))) { 776 LFREEPATH(fname); 777 return error; 778 } 779 tv[0].tv_sec = lut.l_actime; 780 tv[0].tv_usec = 0; 781 tv[1].tv_sec = lut.l_modtime; 782 tv[1].tv_usec = 0; 783 tvp = tv; 784 } else 785 tvp = NULL; 786 787 error = kern_utimes(td, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 788 LFREEPATH(fname); 789 return (error); 790 } 791 #endif /* __i386__ */ 792 793 #define __WCLONE 0x80000000 794 795 #ifndef __alpha__ 796 int 797 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 798 { 799 struct wait_args /* { 800 int pid; 801 int *status; 802 int options; 803 struct rusage *rusage; 804 } */ tmp; 805 int error, tmpstat; 806 807 #ifdef DEBUG 808 if (ldebug(waitpid)) 809 printf(ARGS(waitpid, "%d, %p, %d"), 810 args->pid, (void *)args->status, args->options); 811 #endif 812 813 tmp.pid = args->pid; 814 tmp.status = args->status; 815 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 816 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 817 if (args->options & __WCLONE) 818 tmp.options |= WLINUXCLONE; 819 tmp.rusage = NULL; 820 821 if ((error = wait4(td, &tmp)) != 0) 822 return error; 823 824 if (args->status) { 825 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 826 return error; 827 tmpstat &= 0xffff; 828 if (WIFSIGNALED(tmpstat)) 829 tmpstat = (tmpstat & 0xffffff80) | 830 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 831 else if (WIFSTOPPED(tmpstat)) 832 tmpstat = (tmpstat & 0xffff00ff) | 833 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 834 return copyout(&tmpstat, args->status, sizeof(int)); 835 } 836 837 return 0; 838 } 839 #endif /*!__alpha__*/ 840 841 int 842 linux_wait4(struct thread *td, struct linux_wait4_args *args) 843 { 844 struct wait_args /* { 845 int pid; 846 int *status; 847 int options; 848 struct rusage *rusage; 849 } */ tmp; 850 int error, tmpstat; 851 struct proc *p; 852 853 #ifdef DEBUG 854 if (ldebug(wait4)) 855 printf(ARGS(wait4, "%d, %p, %d, %p"), 856 args->pid, (void *)args->status, args->options, 857 (void *)args->rusage); 858 #endif 859 860 tmp.pid = args->pid; 861 tmp.status = args->status; 862 tmp.options = (args->options & (WNOHANG | WUNTRACED)); 863 /* WLINUXCLONE should be equal to __WCLONE, but we make sure */ 864 if (args->options & __WCLONE) 865 tmp.options |= WLINUXCLONE; 866 tmp.rusage = (struct rusage *)args->rusage; 867 868 if ((error = wait4(td, &tmp)) != 0) 869 return error; 870 871 p = td->td_proc; 872 PROC_LOCK(p); 873 SIGDELSET(p->p_siglist, SIGCHLD); 874 PROC_UNLOCK(p); 875 876 if (args->status) { 877 if ((error = copyin(args->status, &tmpstat, sizeof(int))) != 0) 878 return error; 879 tmpstat &= 0xffff; 880 if (WIFSIGNALED(tmpstat)) 881 tmpstat = (tmpstat & 0xffffff80) | 882 BSD_TO_LINUX_SIGNAL(WTERMSIG(tmpstat)); 883 else if (WIFSTOPPED(tmpstat)) 884 tmpstat = (tmpstat & 0xffff00ff) | 885 (BSD_TO_LINUX_SIGNAL(WSTOPSIG(tmpstat)) << 8); 886 return copyout(&tmpstat, args->status, sizeof(int)); 887 } 888 889 return 0; 890 } 891 892 int 893 linux_mknod(struct thread *td, struct linux_mknod_args *args) 894 { 895 char *path; 896 int error; 897 898 LCONVPATHCREAT(td, args->path, &path); 899 900 #ifdef DEBUG 901 if (ldebug(mknod)) 902 printf(ARGS(mknod, "%s, %d, %d"), path, args->mode, args->dev); 903 #endif 904 905 if (args->mode & S_IFIFO) 906 error = kern_mkfifo(td, path, UIO_SYSSPACE, args->mode); 907 else 908 error = kern_mknod(td, path, UIO_SYSSPACE, args->mode, 909 args->dev); 910 LFREEPATH(path); 911 return (error); 912 } 913 914 /* 915 * UGH! This is just about the dumbest idea I've ever heard!! 916 */ 917 int 918 linux_personality(struct thread *td, struct linux_personality_args *args) 919 { 920 #ifdef DEBUG 921 if (ldebug(personality)) 922 printf(ARGS(personality, "%lu"), (unsigned long)args->per); 923 #endif 924 #ifndef __alpha__ 925 if (args->per != 0) 926 return EINVAL; 927 #endif 928 929 /* Yes Jim, it's still a Linux... */ 930 td->td_retval[0] = 0; 931 return 0; 932 } 933 934 /* 935 * Wrappers for get/setitimer for debugging.. 936 */ 937 int 938 linux_setitimer(struct thread *td, struct linux_setitimer_args *args) 939 { 940 struct setitimer_args bsa; 941 struct itimerval foo; 942 int error; 943 944 #ifdef DEBUG 945 if (ldebug(setitimer)) 946 printf(ARGS(setitimer, "%p, %p"), 947 (void *)args->itv, (void *)args->oitv); 948 #endif 949 bsa.which = args->which; 950 bsa.itv = (struct itimerval *)args->itv; 951 bsa.oitv = (struct itimerval *)args->oitv; 952 if (args->itv) { 953 if ((error = copyin(args->itv, &foo, sizeof(foo)))) 954 return error; 955 #ifdef DEBUG 956 if (ldebug(setitimer)) { 957 printf("setitimer: value: sec: %ld, usec: %ld\n", 958 foo.it_value.tv_sec, foo.it_value.tv_usec); 959 printf("setitimer: interval: sec: %ld, usec: %ld\n", 960 foo.it_interval.tv_sec, foo.it_interval.tv_usec); 961 } 962 #endif 963 } 964 return setitimer(td, &bsa); 965 } 966 967 int 968 linux_getitimer(struct thread *td, struct linux_getitimer_args *args) 969 { 970 struct getitimer_args bsa; 971 #ifdef DEBUG 972 if (ldebug(getitimer)) 973 printf(ARGS(getitimer, "%p"), (void *)args->itv); 974 #endif 975 bsa.which = args->which; 976 bsa.itv = (struct itimerval *)args->itv; 977 return getitimer(td, &bsa); 978 } 979 980 #ifndef __alpha__ 981 int 982 linux_nice(struct thread *td, struct linux_nice_args *args) 983 { 984 struct setpriority_args bsd_args; 985 986 bsd_args.which = PRIO_PROCESS; 987 bsd_args.who = 0; /* current process */ 988 bsd_args.prio = args->inc; 989 return setpriority(td, &bsd_args); 990 } 991 #endif /*!__alpha__*/ 992 993 int 994 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 995 { 996 struct ucred *newcred, *oldcred; 997 l_gid_t linux_gidset[NGROUPS]; 998 gid_t *bsd_gidset; 999 int ngrp, error; 1000 struct proc *p; 1001 1002 ngrp = args->gidsetsize; 1003 if (ngrp < 0 || ngrp >= NGROUPS) 1004 return (EINVAL); 1005 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1006 if (error) 1007 return (error); 1008 newcred = crget(); 1009 p = td->td_proc; 1010 PROC_LOCK(p); 1011 oldcred = p->p_ucred; 1012 1013 /* 1014 * cr_groups[0] holds egid. Setting the whole set from 1015 * the supplied set will cause egid to be changed too. 1016 * Keep cr_groups[0] unchanged to prevent that. 1017 */ 1018 1019 if ((error = suser_cred(oldcred, PRISON_ROOT)) != 0) { 1020 PROC_UNLOCK(p); 1021 crfree(newcred); 1022 return (error); 1023 } 1024 1025 crcopy(newcred, oldcred); 1026 if (ngrp > 0) { 1027 newcred->cr_ngroups = ngrp + 1; 1028 1029 bsd_gidset = newcred->cr_groups; 1030 ngrp--; 1031 while (ngrp >= 0) { 1032 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1033 ngrp--; 1034 } 1035 } 1036 else 1037 newcred->cr_ngroups = 1; 1038 1039 setsugid(p); 1040 p->p_ucred = newcred; 1041 PROC_UNLOCK(p); 1042 crfree(oldcred); 1043 return (0); 1044 } 1045 1046 int 1047 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1048 { 1049 struct ucred *cred; 1050 l_gid_t linux_gidset[NGROUPS]; 1051 gid_t *bsd_gidset; 1052 int bsd_gidsetsz, ngrp, error; 1053 1054 cred = td->td_ucred; 1055 bsd_gidset = cred->cr_groups; 1056 bsd_gidsetsz = cred->cr_ngroups - 1; 1057 1058 /* 1059 * cr_groups[0] holds egid. Returning the whole set 1060 * here will cause a duplicate. Exclude cr_groups[0] 1061 * to prevent that. 1062 */ 1063 1064 if ((ngrp = args->gidsetsize) == 0) { 1065 td->td_retval[0] = bsd_gidsetsz; 1066 return (0); 1067 } 1068 1069 if (ngrp < bsd_gidsetsz) 1070 return (EINVAL); 1071 1072 ngrp = 0; 1073 while (ngrp < bsd_gidsetsz) { 1074 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1075 ngrp++; 1076 } 1077 1078 if ((error = copyout(linux_gidset, args->grouplist, 1079 ngrp * sizeof(l_gid_t)))) 1080 return (error); 1081 1082 td->td_retval[0] = ngrp; 1083 return (0); 1084 } 1085 1086 #ifndef __alpha__ 1087 int 1088 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1089 { 1090 struct rlimit bsd_rlim; 1091 struct l_rlimit rlim; 1092 u_int which; 1093 int error; 1094 1095 #ifdef DEBUG 1096 if (ldebug(setrlimit)) 1097 printf(ARGS(setrlimit, "%d, %p"), 1098 args->resource, (void *)args->rlim); 1099 #endif 1100 1101 if (args->resource >= LINUX_RLIM_NLIMITS) 1102 return (EINVAL); 1103 1104 which = linux_to_bsd_resource[args->resource]; 1105 if (which == -1) 1106 return (EINVAL); 1107 1108 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1109 if (error) 1110 return (error); 1111 1112 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1113 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1114 return (dosetrlimit(td, which, &bsd_rlim)); 1115 } 1116 1117 int 1118 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1119 { 1120 struct l_rlimit rlim; 1121 struct proc *p = td->td_proc; 1122 struct rlimit *bsd_rlp; 1123 u_int which; 1124 1125 #ifdef DEBUG 1126 if (ldebug(old_getrlimit)) 1127 printf(ARGS(old_getrlimit, "%d, %p"), 1128 args->resource, (void *)args->rlim); 1129 #endif 1130 1131 if (args->resource >= LINUX_RLIM_NLIMITS) 1132 return (EINVAL); 1133 1134 which = linux_to_bsd_resource[args->resource]; 1135 if (which == -1) 1136 return (EINVAL); 1137 bsd_rlp = &p->p_rlimit[which]; 1138 1139 rlim.rlim_cur = (unsigned long)bsd_rlp->rlim_cur; 1140 if (rlim.rlim_cur == ULONG_MAX) 1141 rlim.rlim_cur = LONG_MAX; 1142 rlim.rlim_max = (unsigned long)bsd_rlp->rlim_max; 1143 if (rlim.rlim_max == ULONG_MAX) 1144 rlim.rlim_max = LONG_MAX; 1145 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1146 } 1147 1148 int 1149 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1150 { 1151 struct l_rlimit rlim; 1152 struct proc *p = td->td_proc; 1153 struct rlimit *bsd_rlp; 1154 u_int which; 1155 1156 #ifdef DEBUG 1157 if (ldebug(getrlimit)) 1158 printf(ARGS(getrlimit, "%d, %p"), 1159 args->resource, (void *)args->rlim); 1160 #endif 1161 1162 if (args->resource >= LINUX_RLIM_NLIMITS) 1163 return (EINVAL); 1164 1165 which = linux_to_bsd_resource[args->resource]; 1166 if (which == -1) 1167 return (EINVAL); 1168 bsd_rlp = &p->p_rlimit[which]; 1169 1170 rlim.rlim_cur = (l_ulong)bsd_rlp->rlim_cur; 1171 rlim.rlim_max = (l_ulong)bsd_rlp->rlim_max; 1172 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1173 } 1174 #endif /*!__alpha__*/ 1175 1176 int 1177 linux_sched_setscheduler(struct thread *td, 1178 struct linux_sched_setscheduler_args *args) 1179 { 1180 struct sched_setscheduler_args bsd; 1181 1182 #ifdef DEBUG 1183 if (ldebug(sched_setscheduler)) 1184 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1185 args->pid, args->policy, (const void *)args->param); 1186 #endif 1187 1188 switch (args->policy) { 1189 case LINUX_SCHED_OTHER: 1190 bsd.policy = SCHED_OTHER; 1191 break; 1192 case LINUX_SCHED_FIFO: 1193 bsd.policy = SCHED_FIFO; 1194 break; 1195 case LINUX_SCHED_RR: 1196 bsd.policy = SCHED_RR; 1197 break; 1198 default: 1199 return EINVAL; 1200 } 1201 1202 bsd.pid = args->pid; 1203 bsd.param = (struct sched_param *)args->param; 1204 return sched_setscheduler(td, &bsd); 1205 } 1206 1207 int 1208 linux_sched_getscheduler(struct thread *td, 1209 struct linux_sched_getscheduler_args *args) 1210 { 1211 struct sched_getscheduler_args bsd; 1212 int error; 1213 1214 #ifdef DEBUG 1215 if (ldebug(sched_getscheduler)) 1216 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1217 #endif 1218 1219 bsd.pid = args->pid; 1220 error = sched_getscheduler(td, &bsd); 1221 1222 switch (td->td_retval[0]) { 1223 case SCHED_OTHER: 1224 td->td_retval[0] = LINUX_SCHED_OTHER; 1225 break; 1226 case SCHED_FIFO: 1227 td->td_retval[0] = LINUX_SCHED_FIFO; 1228 break; 1229 case SCHED_RR: 1230 td->td_retval[0] = LINUX_SCHED_RR; 1231 break; 1232 } 1233 1234 return error; 1235 } 1236 1237 int 1238 linux_sched_get_priority_max(struct thread *td, 1239 struct linux_sched_get_priority_max_args *args) 1240 { 1241 struct sched_get_priority_max_args bsd; 1242 1243 #ifdef DEBUG 1244 if (ldebug(sched_get_priority_max)) 1245 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1246 #endif 1247 1248 switch (args->policy) { 1249 case LINUX_SCHED_OTHER: 1250 bsd.policy = SCHED_OTHER; 1251 break; 1252 case LINUX_SCHED_FIFO: 1253 bsd.policy = SCHED_FIFO; 1254 break; 1255 case LINUX_SCHED_RR: 1256 bsd.policy = SCHED_RR; 1257 break; 1258 default: 1259 return EINVAL; 1260 } 1261 return sched_get_priority_max(td, &bsd); 1262 } 1263 1264 int 1265 linux_sched_get_priority_min(struct thread *td, 1266 struct linux_sched_get_priority_min_args *args) 1267 { 1268 struct sched_get_priority_min_args bsd; 1269 1270 #ifdef DEBUG 1271 if (ldebug(sched_get_priority_min)) 1272 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1273 #endif 1274 1275 switch (args->policy) { 1276 case LINUX_SCHED_OTHER: 1277 bsd.policy = SCHED_OTHER; 1278 break; 1279 case LINUX_SCHED_FIFO: 1280 bsd.policy = SCHED_FIFO; 1281 break; 1282 case LINUX_SCHED_RR: 1283 bsd.policy = SCHED_RR; 1284 break; 1285 default: 1286 return EINVAL; 1287 } 1288 return sched_get_priority_min(td, &bsd); 1289 } 1290 1291 #define REBOOT_CAD_ON 0x89abcdef 1292 #define REBOOT_CAD_OFF 0 1293 #define REBOOT_HALT 0xcdef0123 1294 1295 int 1296 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1297 { 1298 struct reboot_args bsd_args; 1299 1300 #ifdef DEBUG 1301 if (ldebug(reboot)) 1302 printf(ARGS(reboot, "0x%x"), args->cmd); 1303 #endif 1304 if (args->cmd == REBOOT_CAD_ON || args->cmd == REBOOT_CAD_OFF) 1305 return (0); 1306 bsd_args.opt = (args->cmd == REBOOT_HALT) ? RB_HALT : 0; 1307 return (reboot(td, &bsd_args)); 1308 } 1309 1310 #ifndef __alpha__ 1311 1312 /* 1313 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1314 * td->td_retval[1] when COMPAT_43 or COMPAT_SUNOS is defined. This 1315 * globbers registers that are assumed to be preserved. The following 1316 * lightweight syscalls fixes this. See also linux_getgid16() and 1317 * linux_getuid16() in linux_uid16.c. 1318 * 1319 * linux_getpid() - MP SAFE 1320 * linux_getgid() - MP SAFE 1321 * linux_getuid() - MP SAFE 1322 */ 1323 1324 int 1325 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1326 { 1327 1328 td->td_retval[0] = td->td_proc->p_pid; 1329 return (0); 1330 } 1331 1332 int 1333 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1334 { 1335 1336 td->td_retval[0] = td->td_ucred->cr_rgid; 1337 return (0); 1338 } 1339 1340 int 1341 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1342 { 1343 1344 td->td_retval[0] = td->td_ucred->cr_ruid; 1345 return (0); 1346 } 1347 1348 #endif /*!__alpha__*/ 1349 1350 int 1351 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1352 { 1353 struct getsid_args bsd; 1354 bsd.pid = args->pid; 1355 return getsid(td, &bsd); 1356 } 1357