1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/procctl.h> 55 #include <sys/reboot.h> 56 #include <sys/racct.h> 57 #include <sys/random.h> 58 #include <sys/resourcevar.h> 59 #include <sys/sched.h> 60 #include <sys/sdt.h> 61 #include <sys/signalvar.h> 62 #include <sys/stat.h> 63 #include <sys/syscallsubr.h> 64 #include <sys/sysctl.h> 65 #include <sys/sysproto.h> 66 #include <sys/systm.h> 67 #include <sys/time.h> 68 #include <sys/vmmeter.h> 69 #include <sys/vnode.h> 70 #include <sys/wait.h> 71 #include <sys/cpuset.h> 72 #include <sys/uio.h> 73 74 #include <security/mac/mac_framework.h> 75 76 #include <vm/vm.h> 77 #include <vm/pmap.h> 78 #include <vm/vm_kern.h> 79 #include <vm/vm_map.h> 80 #include <vm/vm_extern.h> 81 #include <vm/vm_object.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 /** 103 * Special DTrace provider for the linuxulator. 104 * 105 * In this file we define the provider for the entire linuxulator. All 106 * modules (= files of the linuxulator) use it. 107 * 108 * We define a different name depending on the emulated bitsize, see 109 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 110 * native bitsize = linuxulator 111 * amd64, 32bit emulation = linuxulator32 112 */ 113 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 114 115 int stclohz; /* Statistics clock frequency */ 116 117 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 118 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 119 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 120 RLIMIT_MEMLOCK, RLIMIT_AS 121 }; 122 123 struct l_sysinfo { 124 l_long uptime; /* Seconds since boot */ 125 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 126 #define LINUX_SYSINFO_LOADS_SCALE 65536 127 l_ulong totalram; /* Total usable main memory size */ 128 l_ulong freeram; /* Available memory size */ 129 l_ulong sharedram; /* Amount of shared memory */ 130 l_ulong bufferram; /* Memory used by buffers */ 131 l_ulong totalswap; /* Total swap space size */ 132 l_ulong freeswap; /* swap space still available */ 133 l_ushort procs; /* Number of current processes */ 134 l_ushort pads; 135 l_ulong totalbig; 136 l_ulong freebig; 137 l_uint mem_unit; 138 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 139 }; 140 141 struct l_pselect6arg { 142 l_uintptr_t ss; 143 l_size_t ss_len; 144 }; 145 146 static int linux_utimensat_nsec_valid(l_long); 147 148 149 int 150 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 151 { 152 struct l_sysinfo sysinfo; 153 vm_object_t object; 154 int i, j; 155 struct timespec ts; 156 157 bzero(&sysinfo, sizeof(sysinfo)); 158 getnanouptime(&ts); 159 if (ts.tv_nsec != 0) 160 ts.tv_sec++; 161 sysinfo.uptime = ts.tv_sec; 162 163 /* Use the information from the mib to get our load averages */ 164 for (i = 0; i < 3; i++) 165 sysinfo.loads[i] = averunnable.ldavg[i] * 166 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 167 168 sysinfo.totalram = physmem * PAGE_SIZE; 169 sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE; 170 171 sysinfo.sharedram = 0; 172 mtx_lock(&vm_object_list_mtx); 173 TAILQ_FOREACH(object, &vm_object_list, object_list) 174 if (object->shadow_count > 1) 175 sysinfo.sharedram += object->resident_page_count; 176 mtx_unlock(&vm_object_list_mtx); 177 178 sysinfo.sharedram *= PAGE_SIZE; 179 sysinfo.bufferram = 0; 180 181 swap_pager_status(&i, &j); 182 sysinfo.totalswap = i * PAGE_SIZE; 183 sysinfo.freeswap = (i - j) * PAGE_SIZE; 184 185 sysinfo.procs = nprocs; 186 187 /* The following are only present in newer Linux kernels. */ 188 sysinfo.totalbig = 0; 189 sysinfo.freebig = 0; 190 sysinfo.mem_unit = 1; 191 192 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 193 } 194 195 #ifdef LINUX_LEGACY_SYSCALLS 196 int 197 linux_alarm(struct thread *td, struct linux_alarm_args *args) 198 { 199 struct itimerval it, old_it; 200 u_int secs; 201 int error; 202 203 #ifdef DEBUG 204 if (ldebug(alarm)) 205 printf(ARGS(alarm, "%u"), args->secs); 206 #endif 207 secs = args->secs; 208 /* 209 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 210 * to match kern_setitimer()'s limit to avoid error from it. 211 * 212 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 213 * platforms. 214 */ 215 if (secs > INT32_MAX / 2) 216 secs = INT32_MAX / 2; 217 218 it.it_value.tv_sec = secs; 219 it.it_value.tv_usec = 0; 220 timevalclear(&it.it_interval); 221 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 222 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 223 224 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 225 old_it.it_value.tv_usec >= 500000) 226 old_it.it_value.tv_sec++; 227 td->td_retval[0] = old_it.it_value.tv_sec; 228 return (0); 229 } 230 #endif 231 232 int 233 linux_brk(struct thread *td, struct linux_brk_args *args) 234 { 235 struct vmspace *vm = td->td_proc->p_vmspace; 236 uintptr_t new, old; 237 238 #ifdef DEBUG 239 if (ldebug(brk)) 240 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 241 #endif 242 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 243 new = (uintptr_t)args->dsend; 244 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 245 td->td_retval[0] = (register_t)new; 246 else 247 td->td_retval[0] = (register_t)old; 248 249 return (0); 250 } 251 252 #if defined(__i386__) 253 /* XXX: what about amd64/linux32? */ 254 255 int 256 linux_uselib(struct thread *td, struct linux_uselib_args *args) 257 { 258 struct nameidata ni; 259 struct vnode *vp; 260 struct exec *a_out; 261 vm_map_t map; 262 vm_map_entry_t entry; 263 struct vattr attr; 264 vm_offset_t vmaddr; 265 unsigned long file_offset; 266 unsigned long bss_size; 267 char *library; 268 ssize_t aresid; 269 int error; 270 bool locked, opened, textset; 271 272 LCONVPATHEXIST(td, args->library, &library); 273 274 #ifdef DEBUG 275 if (ldebug(uselib)) 276 printf(ARGS(uselib, "%s"), library); 277 #endif 278 279 a_out = NULL; 280 vp = NULL; 281 locked = false; 282 textset = false; 283 opened = false; 284 285 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 286 UIO_SYSSPACE, library, td); 287 error = namei(&ni); 288 LFREEPATH(library); 289 if (error) 290 goto cleanup; 291 292 vp = ni.ni_vp; 293 NDFREE(&ni, NDF_ONLY_PNBUF); 294 295 /* 296 * From here on down, we have a locked vnode that must be unlocked. 297 * XXX: The code below largely duplicates exec_check_permissions(). 298 */ 299 locked = true; 300 301 /* Executable? */ 302 error = VOP_GETATTR(vp, &attr, td->td_ucred); 303 if (error) 304 goto cleanup; 305 306 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 307 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 308 /* EACCESS is what exec(2) returns. */ 309 error = ENOEXEC; 310 goto cleanup; 311 } 312 313 /* Sensible size? */ 314 if (attr.va_size == 0) { 315 error = ENOEXEC; 316 goto cleanup; 317 } 318 319 /* Can we access it? */ 320 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 321 if (error) 322 goto cleanup; 323 324 /* 325 * XXX: This should use vn_open() so that it is properly authorized, 326 * and to reduce code redundancy all over the place here. 327 * XXX: Not really, it duplicates far more of exec_check_permissions() 328 * than vn_open(). 329 */ 330 #ifdef MAC 331 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 332 if (error) 333 goto cleanup; 334 #endif 335 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 336 if (error) 337 goto cleanup; 338 opened = true; 339 340 /* Pull in executable header into exec_map */ 341 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 342 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 343 if (error) 344 goto cleanup; 345 346 /* Is it a Linux binary ? */ 347 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 348 error = ENOEXEC; 349 goto cleanup; 350 } 351 352 /* 353 * While we are here, we should REALLY do some more checks 354 */ 355 356 /* Set file/virtual offset based on a.out variant. */ 357 switch ((int)(a_out->a_magic & 0xffff)) { 358 case 0413: /* ZMAGIC */ 359 file_offset = 1024; 360 break; 361 case 0314: /* QMAGIC */ 362 file_offset = 0; 363 break; 364 default: 365 error = ENOEXEC; 366 goto cleanup; 367 } 368 369 bss_size = round_page(a_out->a_bss); 370 371 /* Check various fields in header for validity/bounds. */ 372 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 373 error = ENOEXEC; 374 goto cleanup; 375 } 376 377 /* text + data can't exceed file size */ 378 if (a_out->a_data + a_out->a_text > attr.va_size) { 379 error = EFAULT; 380 goto cleanup; 381 } 382 383 /* 384 * text/data/bss must not exceed limits 385 * XXX - this is not complete. it should check current usage PLUS 386 * the resources needed by this library. 387 */ 388 PROC_LOCK(td->td_proc); 389 if (a_out->a_text > maxtsiz || 390 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 391 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 392 bss_size) != 0) { 393 PROC_UNLOCK(td->td_proc); 394 error = ENOMEM; 395 goto cleanup; 396 } 397 PROC_UNLOCK(td->td_proc); 398 399 /* 400 * Prevent more writers. 401 */ 402 error = VOP_SET_TEXT(vp); 403 if (error != 0) 404 goto cleanup; 405 textset = true; 406 407 /* 408 * Lock no longer needed 409 */ 410 locked = false; 411 VOP_UNLOCK(vp, 0); 412 413 /* 414 * Check if file_offset page aligned. Currently we cannot handle 415 * misalinged file offsets, and so we read in the entire image 416 * (what a waste). 417 */ 418 if (file_offset & PAGE_MASK) { 419 #ifdef DEBUG 420 printf("uselib: Non page aligned binary %lu\n", file_offset); 421 #endif 422 /* Map text+data read/write/execute */ 423 424 /* a_entry is the load address and is page aligned */ 425 vmaddr = trunc_page(a_out->a_entry); 426 427 /* get anon user mapping, read+write+execute */ 428 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 429 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 430 VM_PROT_ALL, VM_PROT_ALL, 0); 431 if (error) 432 goto cleanup; 433 434 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 435 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 436 td->td_ucred, NOCRED, &aresid, td); 437 if (error != 0) 438 goto cleanup; 439 if (aresid != 0) { 440 error = ENOEXEC; 441 goto cleanup; 442 } 443 } else { 444 #ifdef DEBUG 445 printf("uselib: Page aligned binary %lu\n", file_offset); 446 #endif 447 /* 448 * for QMAGIC, a_entry is 20 bytes beyond the load address 449 * to skip the executable header 450 */ 451 vmaddr = trunc_page(a_out->a_entry); 452 453 /* 454 * Map it all into the process's space as a single 455 * copy-on-write "data" segment. 456 */ 457 map = &td->td_proc->p_vmspace->vm_map; 458 error = vm_mmap(map, &vmaddr, 459 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 460 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 461 if (error) 462 goto cleanup; 463 vm_map_lock(map); 464 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 465 vm_map_unlock(map); 466 error = EDOOFUS; 467 goto cleanup; 468 } 469 entry->eflags |= MAP_ENTRY_VN_EXEC; 470 vm_map_unlock(map); 471 textset = false; 472 } 473 #ifdef DEBUG 474 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 475 ((long *)vmaddr)[1]); 476 #endif 477 if (bss_size != 0) { 478 /* Calculate BSS start address */ 479 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 480 a_out->a_data; 481 482 /* allocate some 'anon' space */ 483 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 484 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 485 VM_PROT_ALL, 0); 486 if (error) 487 goto cleanup; 488 } 489 490 cleanup: 491 if (opened) { 492 if (locked) 493 VOP_UNLOCK(vp, 0); 494 locked = false; 495 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 496 } 497 if (textset) 498 VOP_UNSET_TEXT_CHECKED(vp); 499 if (locked) 500 VOP_UNLOCK(vp, 0); 501 502 /* Release the temporary mapping. */ 503 if (a_out) 504 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 505 506 return (error); 507 } 508 509 #endif /* __i386__ */ 510 511 #ifdef LINUX_LEGACY_SYSCALLS 512 int 513 linux_select(struct thread *td, struct linux_select_args *args) 514 { 515 l_timeval ltv; 516 struct timeval tv0, tv1, utv, *tvp; 517 int error; 518 519 #ifdef DEBUG 520 if (ldebug(select)) 521 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 522 (void *)args->readfds, (void *)args->writefds, 523 (void *)args->exceptfds, (void *)args->timeout); 524 #endif 525 526 /* 527 * Store current time for computation of the amount of 528 * time left. 529 */ 530 if (args->timeout) { 531 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 532 goto select_out; 533 utv.tv_sec = ltv.tv_sec; 534 utv.tv_usec = ltv.tv_usec; 535 #ifdef DEBUG 536 if (ldebug(select)) 537 printf(LMSG("incoming timeout (%jd/%ld)"), 538 (intmax_t)utv.tv_sec, utv.tv_usec); 539 #endif 540 541 if (itimerfix(&utv)) { 542 /* 543 * The timeval was invalid. Convert it to something 544 * valid that will act as it does under Linux. 545 */ 546 utv.tv_sec += utv.tv_usec / 1000000; 547 utv.tv_usec %= 1000000; 548 if (utv.tv_usec < 0) { 549 utv.tv_sec -= 1; 550 utv.tv_usec += 1000000; 551 } 552 if (utv.tv_sec < 0) 553 timevalclear(&utv); 554 } 555 microtime(&tv0); 556 tvp = &utv; 557 } else 558 tvp = NULL; 559 560 error = kern_select(td, args->nfds, args->readfds, args->writefds, 561 args->exceptfds, tvp, LINUX_NFDBITS); 562 563 #ifdef DEBUG 564 if (ldebug(select)) 565 printf(LMSG("real select returns %d"), error); 566 #endif 567 if (error) 568 goto select_out; 569 570 if (args->timeout) { 571 if (td->td_retval[0]) { 572 /* 573 * Compute how much time was left of the timeout, 574 * by subtracting the current time and the time 575 * before we started the call, and subtracting 576 * that result from the user-supplied value. 577 */ 578 microtime(&tv1); 579 timevalsub(&tv1, &tv0); 580 timevalsub(&utv, &tv1); 581 if (utv.tv_sec < 0) 582 timevalclear(&utv); 583 } else 584 timevalclear(&utv); 585 #ifdef DEBUG 586 if (ldebug(select)) 587 printf(LMSG("outgoing timeout (%jd/%ld)"), 588 (intmax_t)utv.tv_sec, utv.tv_usec); 589 #endif 590 ltv.tv_sec = utv.tv_sec; 591 ltv.tv_usec = utv.tv_usec; 592 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 593 goto select_out; 594 } 595 596 select_out: 597 #ifdef DEBUG 598 if (ldebug(select)) 599 printf(LMSG("select_out -> %d"), error); 600 #endif 601 return (error); 602 } 603 #endif 604 605 int 606 linux_mremap(struct thread *td, struct linux_mremap_args *args) 607 { 608 uintptr_t addr; 609 size_t len; 610 int error = 0; 611 612 #ifdef DEBUG 613 if (ldebug(mremap)) 614 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 615 (void *)(uintptr_t)args->addr, 616 (unsigned long)args->old_len, 617 (unsigned long)args->new_len, 618 (unsigned long)args->flags); 619 #endif 620 621 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 622 td->td_retval[0] = 0; 623 return (EINVAL); 624 } 625 626 /* 627 * Check for the page alignment. 628 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 629 */ 630 if (args->addr & PAGE_MASK) { 631 td->td_retval[0] = 0; 632 return (EINVAL); 633 } 634 635 args->new_len = round_page(args->new_len); 636 args->old_len = round_page(args->old_len); 637 638 if (args->new_len > args->old_len) { 639 td->td_retval[0] = 0; 640 return (ENOMEM); 641 } 642 643 if (args->new_len < args->old_len) { 644 addr = args->addr + args->new_len; 645 len = args->old_len - args->new_len; 646 error = kern_munmap(td, addr, len); 647 } 648 649 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 650 return (error); 651 } 652 653 #define LINUX_MS_ASYNC 0x0001 654 #define LINUX_MS_INVALIDATE 0x0002 655 #define LINUX_MS_SYNC 0x0004 656 657 int 658 linux_msync(struct thread *td, struct linux_msync_args *args) 659 { 660 661 return (kern_msync(td, args->addr, args->len, 662 args->fl & ~LINUX_MS_SYNC)); 663 } 664 665 #ifdef LINUX_LEGACY_SYSCALLS 666 int 667 linux_time(struct thread *td, struct linux_time_args *args) 668 { 669 struct timeval tv; 670 l_time_t tm; 671 int error; 672 673 #ifdef DEBUG 674 if (ldebug(time)) 675 printf(ARGS(time, "*")); 676 #endif 677 678 microtime(&tv); 679 tm = tv.tv_sec; 680 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 681 return (error); 682 td->td_retval[0] = tm; 683 return (0); 684 } 685 #endif 686 687 struct l_times_argv { 688 l_clock_t tms_utime; 689 l_clock_t tms_stime; 690 l_clock_t tms_cutime; 691 l_clock_t tms_cstime; 692 }; 693 694 695 /* 696 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 697 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 698 * auxiliary vector entry. 699 */ 700 #define CLK_TCK 100 701 702 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 703 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 704 705 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 706 CONVNTCK(r) : CONVOTCK(r)) 707 708 int 709 linux_times(struct thread *td, struct linux_times_args *args) 710 { 711 struct timeval tv, utime, stime, cutime, cstime; 712 struct l_times_argv tms; 713 struct proc *p; 714 int error; 715 716 #ifdef DEBUG 717 if (ldebug(times)) 718 printf(ARGS(times, "*")); 719 #endif 720 721 if (args->buf != NULL) { 722 p = td->td_proc; 723 PROC_LOCK(p); 724 PROC_STATLOCK(p); 725 calcru(p, &utime, &stime); 726 PROC_STATUNLOCK(p); 727 calccru(p, &cutime, &cstime); 728 PROC_UNLOCK(p); 729 730 tms.tms_utime = CONVTCK(utime); 731 tms.tms_stime = CONVTCK(stime); 732 733 tms.tms_cutime = CONVTCK(cutime); 734 tms.tms_cstime = CONVTCK(cstime); 735 736 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 737 return (error); 738 } 739 740 microuptime(&tv); 741 td->td_retval[0] = (int)CONVTCK(tv); 742 return (0); 743 } 744 745 int 746 linux_newuname(struct thread *td, struct linux_newuname_args *args) 747 { 748 struct l_new_utsname utsname; 749 char osname[LINUX_MAX_UTSNAME]; 750 char osrelease[LINUX_MAX_UTSNAME]; 751 char *p; 752 753 #ifdef DEBUG 754 if (ldebug(newuname)) 755 printf(ARGS(newuname, "*")); 756 #endif 757 758 linux_get_osname(td, osname); 759 linux_get_osrelease(td, osrelease); 760 761 bzero(&utsname, sizeof(utsname)); 762 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 763 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 764 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 765 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 766 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 767 for (p = utsname.version; *p != '\0'; ++p) 768 if (*p == '\n') { 769 *p = '\0'; 770 break; 771 } 772 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 773 774 return (copyout(&utsname, args->buf, sizeof(utsname))); 775 } 776 777 struct l_utimbuf { 778 l_time_t l_actime; 779 l_time_t l_modtime; 780 }; 781 782 #ifdef LINUX_LEGACY_SYSCALLS 783 int 784 linux_utime(struct thread *td, struct linux_utime_args *args) 785 { 786 struct timeval tv[2], *tvp; 787 struct l_utimbuf lut; 788 char *fname; 789 int error; 790 791 LCONVPATHEXIST(td, args->fname, &fname); 792 793 #ifdef DEBUG 794 if (ldebug(utime)) 795 printf(ARGS(utime, "%s, *"), fname); 796 #endif 797 798 if (args->times) { 799 if ((error = copyin(args->times, &lut, sizeof lut))) { 800 LFREEPATH(fname); 801 return (error); 802 } 803 tv[0].tv_sec = lut.l_actime; 804 tv[0].tv_usec = 0; 805 tv[1].tv_sec = lut.l_modtime; 806 tv[1].tv_usec = 0; 807 tvp = tv; 808 } else 809 tvp = NULL; 810 811 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 812 UIO_SYSSPACE); 813 LFREEPATH(fname); 814 return (error); 815 } 816 #endif 817 818 #ifdef LINUX_LEGACY_SYSCALLS 819 int 820 linux_utimes(struct thread *td, struct linux_utimes_args *args) 821 { 822 l_timeval ltv[2]; 823 struct timeval tv[2], *tvp = NULL; 824 char *fname; 825 int error; 826 827 LCONVPATHEXIST(td, args->fname, &fname); 828 829 #ifdef DEBUG 830 if (ldebug(utimes)) 831 printf(ARGS(utimes, "%s, *"), fname); 832 #endif 833 834 if (args->tptr != NULL) { 835 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 836 LFREEPATH(fname); 837 return (error); 838 } 839 tv[0].tv_sec = ltv[0].tv_sec; 840 tv[0].tv_usec = ltv[0].tv_usec; 841 tv[1].tv_sec = ltv[1].tv_sec; 842 tv[1].tv_usec = ltv[1].tv_usec; 843 tvp = tv; 844 } 845 846 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 847 tvp, UIO_SYSSPACE); 848 LFREEPATH(fname); 849 return (error); 850 } 851 #endif 852 853 static int 854 linux_utimensat_nsec_valid(l_long nsec) 855 { 856 857 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 858 return (0); 859 if (nsec >= 0 && nsec <= 999999999) 860 return (0); 861 return (1); 862 } 863 864 int 865 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 866 { 867 struct l_timespec l_times[2]; 868 struct timespec times[2], *timesp = NULL; 869 char *path = NULL; 870 int error, dfd, flags = 0; 871 872 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 873 874 #ifdef DEBUG 875 if (ldebug(utimensat)) 876 printf(ARGS(utimensat, "%d, *"), dfd); 877 #endif 878 879 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 880 return (EINVAL); 881 882 if (args->times != NULL) { 883 error = copyin(args->times, l_times, sizeof(l_times)); 884 if (error != 0) 885 return (error); 886 887 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 888 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 889 return (EINVAL); 890 891 times[0].tv_sec = l_times[0].tv_sec; 892 switch (l_times[0].tv_nsec) 893 { 894 case LINUX_UTIME_OMIT: 895 times[0].tv_nsec = UTIME_OMIT; 896 break; 897 case LINUX_UTIME_NOW: 898 times[0].tv_nsec = UTIME_NOW; 899 break; 900 default: 901 times[0].tv_nsec = l_times[0].tv_nsec; 902 } 903 904 times[1].tv_sec = l_times[1].tv_sec; 905 switch (l_times[1].tv_nsec) 906 { 907 case LINUX_UTIME_OMIT: 908 times[1].tv_nsec = UTIME_OMIT; 909 break; 910 case LINUX_UTIME_NOW: 911 times[1].tv_nsec = UTIME_NOW; 912 break; 913 default: 914 times[1].tv_nsec = l_times[1].tv_nsec; 915 break; 916 } 917 timesp = times; 918 919 /* This breaks POSIX, but is what the Linux kernel does 920 * _on purpose_ (documented in the man page for utimensat(2)), 921 * so we must follow that behaviour. */ 922 if (times[0].tv_nsec == UTIME_OMIT && 923 times[1].tv_nsec == UTIME_OMIT) 924 return (0); 925 } 926 927 if (args->pathname != NULL) 928 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 929 else if (args->flags != 0) 930 return (EINVAL); 931 932 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 933 flags |= AT_SYMLINK_NOFOLLOW; 934 935 if (path == NULL) 936 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 937 else { 938 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 939 UIO_SYSSPACE, flags); 940 LFREEPATH(path); 941 } 942 943 return (error); 944 } 945 946 #ifdef LINUX_LEGACY_SYSCALLS 947 int 948 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 949 { 950 l_timeval ltv[2]; 951 struct timeval tv[2], *tvp = NULL; 952 char *fname; 953 int error, dfd; 954 955 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 956 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 957 958 #ifdef DEBUG 959 if (ldebug(futimesat)) 960 printf(ARGS(futimesat, "%s, *"), fname); 961 #endif 962 963 if (args->utimes != NULL) { 964 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 965 LFREEPATH(fname); 966 return (error); 967 } 968 tv[0].tv_sec = ltv[0].tv_sec; 969 tv[0].tv_usec = ltv[0].tv_usec; 970 tv[1].tv_sec = ltv[1].tv_sec; 971 tv[1].tv_usec = ltv[1].tv_usec; 972 tvp = tv; 973 } 974 975 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 976 LFREEPATH(fname); 977 return (error); 978 } 979 #endif 980 981 int 982 linux_common_wait(struct thread *td, int pid, int *status, 983 int options, struct rusage *ru) 984 { 985 int error, tmpstat; 986 987 error = kern_wait(td, pid, &tmpstat, options, ru); 988 if (error) 989 return (error); 990 991 if (status) { 992 tmpstat &= 0xffff; 993 if (WIFSIGNALED(tmpstat)) 994 tmpstat = (tmpstat & 0xffffff80) | 995 bsd_to_linux_signal(WTERMSIG(tmpstat)); 996 else if (WIFSTOPPED(tmpstat)) 997 tmpstat = (tmpstat & 0xffff00ff) | 998 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 999 else if (WIFCONTINUED(tmpstat)) 1000 tmpstat = 0xffff; 1001 error = copyout(&tmpstat, status, sizeof(int)); 1002 } 1003 1004 return (error); 1005 } 1006 1007 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1008 int 1009 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1010 { 1011 struct linux_wait4_args wait4_args; 1012 1013 #ifdef DEBUG 1014 if (ldebug(waitpid)) 1015 printf(ARGS(waitpid, "%d, %p, %d"), 1016 args->pid, (void *)args->status, args->options); 1017 #endif 1018 1019 wait4_args.pid = args->pid; 1020 wait4_args.status = args->status; 1021 wait4_args.options = args->options; 1022 wait4_args.rusage = NULL; 1023 1024 return (linux_wait4(td, &wait4_args)); 1025 } 1026 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1027 1028 int 1029 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1030 { 1031 int error, options; 1032 struct rusage ru, *rup; 1033 1034 #ifdef DEBUG 1035 if (ldebug(wait4)) 1036 printf(ARGS(wait4, "%d, %p, %d, %p"), 1037 args->pid, (void *)args->status, args->options, 1038 (void *)args->rusage); 1039 #endif 1040 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1041 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1042 return (EINVAL); 1043 1044 options = WEXITED; 1045 linux_to_bsd_waitopts(args->options, &options); 1046 1047 if (args->rusage != NULL) 1048 rup = &ru; 1049 else 1050 rup = NULL; 1051 error = linux_common_wait(td, args->pid, args->status, options, rup); 1052 if (error != 0) 1053 return (error); 1054 if (args->rusage != NULL) 1055 error = linux_copyout_rusage(&ru, args->rusage); 1056 return (error); 1057 } 1058 1059 int 1060 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1061 { 1062 int status, options, sig; 1063 struct __wrusage wru; 1064 siginfo_t siginfo; 1065 l_siginfo_t lsi; 1066 idtype_t idtype; 1067 struct proc *p; 1068 int error; 1069 1070 options = 0; 1071 linux_to_bsd_waitopts(args->options, &options); 1072 1073 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1074 return (EINVAL); 1075 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1076 return (EINVAL); 1077 1078 switch (args->idtype) { 1079 case LINUX_P_ALL: 1080 idtype = P_ALL; 1081 break; 1082 case LINUX_P_PID: 1083 if (args->id <= 0) 1084 return (EINVAL); 1085 idtype = P_PID; 1086 break; 1087 case LINUX_P_PGID: 1088 if (args->id <= 0) 1089 return (EINVAL); 1090 idtype = P_PGID; 1091 break; 1092 default: 1093 return (EINVAL); 1094 } 1095 1096 error = kern_wait6(td, idtype, args->id, &status, options, 1097 &wru, &siginfo); 1098 if (error != 0) 1099 return (error); 1100 if (args->rusage != NULL) { 1101 error = linux_copyout_rusage(&wru.wru_children, 1102 args->rusage); 1103 if (error != 0) 1104 return (error); 1105 } 1106 if (args->info != NULL) { 1107 p = td->td_proc; 1108 bzero(&lsi, sizeof(lsi)); 1109 if (td->td_retval[0] != 0) { 1110 sig = bsd_to_linux_signal(siginfo.si_signo); 1111 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1112 } 1113 error = copyout(&lsi, args->info, sizeof(lsi)); 1114 } 1115 td->td_retval[0] = 0; 1116 1117 return (error); 1118 } 1119 1120 #ifdef LINUX_LEGACY_SYSCALLS 1121 int 1122 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1123 { 1124 char *path; 1125 int error; 1126 1127 LCONVPATHCREAT(td, args->path, &path); 1128 1129 #ifdef DEBUG 1130 if (ldebug(mknod)) 1131 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, 1132 (uintmax_t)args->dev); 1133 #endif 1134 1135 switch (args->mode & S_IFMT) { 1136 case S_IFIFO: 1137 case S_IFSOCK: 1138 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1139 args->mode); 1140 break; 1141 1142 case S_IFCHR: 1143 case S_IFBLK: 1144 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1145 args->mode, args->dev); 1146 break; 1147 1148 case S_IFDIR: 1149 error = EPERM; 1150 break; 1151 1152 case 0: 1153 args->mode |= S_IFREG; 1154 /* FALLTHROUGH */ 1155 case S_IFREG: 1156 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1157 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1158 if (error == 0) 1159 kern_close(td, td->td_retval[0]); 1160 break; 1161 1162 default: 1163 error = EINVAL; 1164 break; 1165 } 1166 LFREEPATH(path); 1167 return (error); 1168 } 1169 #endif 1170 1171 int 1172 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1173 { 1174 char *path; 1175 int error, dfd; 1176 1177 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1178 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1179 1180 #ifdef DEBUG 1181 if (ldebug(mknodat)) 1182 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 1183 #endif 1184 1185 switch (args->mode & S_IFMT) { 1186 case S_IFIFO: 1187 case S_IFSOCK: 1188 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1189 break; 1190 1191 case S_IFCHR: 1192 case S_IFBLK: 1193 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1194 args->dev); 1195 break; 1196 1197 case S_IFDIR: 1198 error = EPERM; 1199 break; 1200 1201 case 0: 1202 args->mode |= S_IFREG; 1203 /* FALLTHROUGH */ 1204 case S_IFREG: 1205 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1206 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1207 if (error == 0) 1208 kern_close(td, td->td_retval[0]); 1209 break; 1210 1211 default: 1212 error = EINVAL; 1213 break; 1214 } 1215 LFREEPATH(path); 1216 return (error); 1217 } 1218 1219 /* 1220 * UGH! This is just about the dumbest idea I've ever heard!! 1221 */ 1222 int 1223 linux_personality(struct thread *td, struct linux_personality_args *args) 1224 { 1225 struct linux_pemuldata *pem; 1226 struct proc *p = td->td_proc; 1227 uint32_t old; 1228 1229 #ifdef DEBUG 1230 if (ldebug(personality)) 1231 printf(ARGS(personality, "%u"), args->per); 1232 #endif 1233 1234 PROC_LOCK(p); 1235 pem = pem_find(p); 1236 old = pem->persona; 1237 if (args->per != 0xffffffff) 1238 pem->persona = args->per; 1239 PROC_UNLOCK(p); 1240 1241 td->td_retval[0] = old; 1242 return (0); 1243 } 1244 1245 struct l_itimerval { 1246 l_timeval it_interval; 1247 l_timeval it_value; 1248 }; 1249 1250 #define B2L_ITIMERVAL(bip, lip) \ 1251 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1252 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1253 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1254 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1255 1256 int 1257 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1258 { 1259 int error; 1260 struct l_itimerval ls; 1261 struct itimerval aitv, oitv; 1262 1263 #ifdef DEBUG 1264 if (ldebug(setitimer)) 1265 printf(ARGS(setitimer, "%p, %p"), 1266 (void *)uap->itv, (void *)uap->oitv); 1267 #endif 1268 1269 if (uap->itv == NULL) { 1270 uap->itv = uap->oitv; 1271 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1272 } 1273 1274 error = copyin(uap->itv, &ls, sizeof(ls)); 1275 if (error != 0) 1276 return (error); 1277 B2L_ITIMERVAL(&aitv, &ls); 1278 #ifdef DEBUG 1279 if (ldebug(setitimer)) { 1280 printf("setitimer: value: sec: %jd, usec: %ld\n", 1281 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1282 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1283 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1284 } 1285 #endif 1286 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1287 if (error != 0 || uap->oitv == NULL) 1288 return (error); 1289 B2L_ITIMERVAL(&ls, &oitv); 1290 1291 return (copyout(&ls, uap->oitv, sizeof(ls))); 1292 } 1293 1294 int 1295 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1296 { 1297 int error; 1298 struct l_itimerval ls; 1299 struct itimerval aitv; 1300 1301 #ifdef DEBUG 1302 if (ldebug(getitimer)) 1303 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1304 #endif 1305 error = kern_getitimer(td, uap->which, &aitv); 1306 if (error != 0) 1307 return (error); 1308 B2L_ITIMERVAL(&ls, &aitv); 1309 return (copyout(&ls, uap->itv, sizeof(ls))); 1310 } 1311 1312 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1313 int 1314 linux_nice(struct thread *td, struct linux_nice_args *args) 1315 { 1316 struct setpriority_args bsd_args; 1317 1318 bsd_args.which = PRIO_PROCESS; 1319 bsd_args.who = 0; /* current process */ 1320 bsd_args.prio = args->inc; 1321 return (sys_setpriority(td, &bsd_args)); 1322 } 1323 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1324 1325 int 1326 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1327 { 1328 struct ucred *newcred, *oldcred; 1329 l_gid_t *linux_gidset; 1330 gid_t *bsd_gidset; 1331 int ngrp, error; 1332 struct proc *p; 1333 1334 ngrp = args->gidsetsize; 1335 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1336 return (EINVAL); 1337 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1338 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1339 if (error) 1340 goto out; 1341 newcred = crget(); 1342 crextend(newcred, ngrp + 1); 1343 p = td->td_proc; 1344 PROC_LOCK(p); 1345 oldcred = p->p_ucred; 1346 crcopy(newcred, oldcred); 1347 1348 /* 1349 * cr_groups[0] holds egid. Setting the whole set from 1350 * the supplied set will cause egid to be changed too. 1351 * Keep cr_groups[0] unchanged to prevent that. 1352 */ 1353 1354 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1355 PROC_UNLOCK(p); 1356 crfree(newcred); 1357 goto out; 1358 } 1359 1360 if (ngrp > 0) { 1361 newcred->cr_ngroups = ngrp + 1; 1362 1363 bsd_gidset = newcred->cr_groups; 1364 ngrp--; 1365 while (ngrp >= 0) { 1366 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1367 ngrp--; 1368 } 1369 } else 1370 newcred->cr_ngroups = 1; 1371 1372 setsugid(p); 1373 proc_set_cred(p, newcred); 1374 PROC_UNLOCK(p); 1375 crfree(oldcred); 1376 error = 0; 1377 out: 1378 free(linux_gidset, M_LINUX); 1379 return (error); 1380 } 1381 1382 int 1383 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1384 { 1385 struct ucred *cred; 1386 l_gid_t *linux_gidset; 1387 gid_t *bsd_gidset; 1388 int bsd_gidsetsz, ngrp, error; 1389 1390 cred = td->td_ucred; 1391 bsd_gidset = cred->cr_groups; 1392 bsd_gidsetsz = cred->cr_ngroups - 1; 1393 1394 /* 1395 * cr_groups[0] holds egid. Returning the whole set 1396 * here will cause a duplicate. Exclude cr_groups[0] 1397 * to prevent that. 1398 */ 1399 1400 if ((ngrp = args->gidsetsize) == 0) { 1401 td->td_retval[0] = bsd_gidsetsz; 1402 return (0); 1403 } 1404 1405 if (ngrp < bsd_gidsetsz) 1406 return (EINVAL); 1407 1408 ngrp = 0; 1409 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1410 M_LINUX, M_WAITOK); 1411 while (ngrp < bsd_gidsetsz) { 1412 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1413 ngrp++; 1414 } 1415 1416 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1417 free(linux_gidset, M_LINUX); 1418 if (error) 1419 return (error); 1420 1421 td->td_retval[0] = ngrp; 1422 return (0); 1423 } 1424 1425 int 1426 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1427 { 1428 struct rlimit bsd_rlim; 1429 struct l_rlimit rlim; 1430 u_int which; 1431 int error; 1432 1433 #ifdef DEBUG 1434 if (ldebug(setrlimit)) 1435 printf(ARGS(setrlimit, "%d, %p"), 1436 args->resource, (void *)args->rlim); 1437 #endif 1438 1439 if (args->resource >= LINUX_RLIM_NLIMITS) 1440 return (EINVAL); 1441 1442 which = linux_to_bsd_resource[args->resource]; 1443 if (which == -1) 1444 return (EINVAL); 1445 1446 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1447 if (error) 1448 return (error); 1449 1450 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1451 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1452 return (kern_setrlimit(td, which, &bsd_rlim)); 1453 } 1454 1455 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1456 int 1457 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1458 { 1459 struct l_rlimit rlim; 1460 struct rlimit bsd_rlim; 1461 u_int which; 1462 1463 #ifdef DEBUG 1464 if (ldebug(old_getrlimit)) 1465 printf(ARGS(old_getrlimit, "%d, %p"), 1466 args->resource, (void *)args->rlim); 1467 #endif 1468 1469 if (args->resource >= LINUX_RLIM_NLIMITS) 1470 return (EINVAL); 1471 1472 which = linux_to_bsd_resource[args->resource]; 1473 if (which == -1) 1474 return (EINVAL); 1475 1476 lim_rlimit(td, which, &bsd_rlim); 1477 1478 #ifdef COMPAT_LINUX32 1479 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1480 if (rlim.rlim_cur == UINT_MAX) 1481 rlim.rlim_cur = INT_MAX; 1482 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1483 if (rlim.rlim_max == UINT_MAX) 1484 rlim.rlim_max = INT_MAX; 1485 #else 1486 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1487 if (rlim.rlim_cur == ULONG_MAX) 1488 rlim.rlim_cur = LONG_MAX; 1489 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1490 if (rlim.rlim_max == ULONG_MAX) 1491 rlim.rlim_max = LONG_MAX; 1492 #endif 1493 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1494 } 1495 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1496 1497 int 1498 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1499 { 1500 struct l_rlimit rlim; 1501 struct rlimit bsd_rlim; 1502 u_int which; 1503 1504 #ifdef DEBUG 1505 if (ldebug(getrlimit)) 1506 printf(ARGS(getrlimit, "%d, %p"), 1507 args->resource, (void *)args->rlim); 1508 #endif 1509 1510 if (args->resource >= LINUX_RLIM_NLIMITS) 1511 return (EINVAL); 1512 1513 which = linux_to_bsd_resource[args->resource]; 1514 if (which == -1) 1515 return (EINVAL); 1516 1517 lim_rlimit(td, which, &bsd_rlim); 1518 1519 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1520 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1521 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1522 } 1523 1524 int 1525 linux_sched_setscheduler(struct thread *td, 1526 struct linux_sched_setscheduler_args *args) 1527 { 1528 struct sched_param sched_param; 1529 struct thread *tdt; 1530 int error, policy; 1531 1532 #ifdef DEBUG 1533 if (ldebug(sched_setscheduler)) 1534 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1535 args->pid, args->policy, (const void *)args->param); 1536 #endif 1537 1538 switch (args->policy) { 1539 case LINUX_SCHED_OTHER: 1540 policy = SCHED_OTHER; 1541 break; 1542 case LINUX_SCHED_FIFO: 1543 policy = SCHED_FIFO; 1544 break; 1545 case LINUX_SCHED_RR: 1546 policy = SCHED_RR; 1547 break; 1548 default: 1549 return (EINVAL); 1550 } 1551 1552 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1553 if (error) 1554 return (error); 1555 1556 tdt = linux_tdfind(td, args->pid, -1); 1557 if (tdt == NULL) 1558 return (ESRCH); 1559 1560 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1561 PROC_UNLOCK(tdt->td_proc); 1562 return (error); 1563 } 1564 1565 int 1566 linux_sched_getscheduler(struct thread *td, 1567 struct linux_sched_getscheduler_args *args) 1568 { 1569 struct thread *tdt; 1570 int error, policy; 1571 1572 #ifdef DEBUG 1573 if (ldebug(sched_getscheduler)) 1574 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1575 #endif 1576 1577 tdt = linux_tdfind(td, args->pid, -1); 1578 if (tdt == NULL) 1579 return (ESRCH); 1580 1581 error = kern_sched_getscheduler(td, tdt, &policy); 1582 PROC_UNLOCK(tdt->td_proc); 1583 1584 switch (policy) { 1585 case SCHED_OTHER: 1586 td->td_retval[0] = LINUX_SCHED_OTHER; 1587 break; 1588 case SCHED_FIFO: 1589 td->td_retval[0] = LINUX_SCHED_FIFO; 1590 break; 1591 case SCHED_RR: 1592 td->td_retval[0] = LINUX_SCHED_RR; 1593 break; 1594 } 1595 return (error); 1596 } 1597 1598 int 1599 linux_sched_get_priority_max(struct thread *td, 1600 struct linux_sched_get_priority_max_args *args) 1601 { 1602 struct sched_get_priority_max_args bsd; 1603 1604 #ifdef DEBUG 1605 if (ldebug(sched_get_priority_max)) 1606 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1607 #endif 1608 1609 switch (args->policy) { 1610 case LINUX_SCHED_OTHER: 1611 bsd.policy = SCHED_OTHER; 1612 break; 1613 case LINUX_SCHED_FIFO: 1614 bsd.policy = SCHED_FIFO; 1615 break; 1616 case LINUX_SCHED_RR: 1617 bsd.policy = SCHED_RR; 1618 break; 1619 default: 1620 return (EINVAL); 1621 } 1622 return (sys_sched_get_priority_max(td, &bsd)); 1623 } 1624 1625 int 1626 linux_sched_get_priority_min(struct thread *td, 1627 struct linux_sched_get_priority_min_args *args) 1628 { 1629 struct sched_get_priority_min_args bsd; 1630 1631 #ifdef DEBUG 1632 if (ldebug(sched_get_priority_min)) 1633 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1634 #endif 1635 1636 switch (args->policy) { 1637 case LINUX_SCHED_OTHER: 1638 bsd.policy = SCHED_OTHER; 1639 break; 1640 case LINUX_SCHED_FIFO: 1641 bsd.policy = SCHED_FIFO; 1642 break; 1643 case LINUX_SCHED_RR: 1644 bsd.policy = SCHED_RR; 1645 break; 1646 default: 1647 return (EINVAL); 1648 } 1649 return (sys_sched_get_priority_min(td, &bsd)); 1650 } 1651 1652 #define REBOOT_CAD_ON 0x89abcdef 1653 #define REBOOT_CAD_OFF 0 1654 #define REBOOT_HALT 0xcdef0123 1655 #define REBOOT_RESTART 0x01234567 1656 #define REBOOT_RESTART2 0xA1B2C3D4 1657 #define REBOOT_POWEROFF 0x4321FEDC 1658 #define REBOOT_MAGIC1 0xfee1dead 1659 #define REBOOT_MAGIC2 0x28121969 1660 #define REBOOT_MAGIC2A 0x05121996 1661 #define REBOOT_MAGIC2B 0x16041998 1662 1663 int 1664 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1665 { 1666 struct reboot_args bsd_args; 1667 1668 #ifdef DEBUG 1669 if (ldebug(reboot)) 1670 printf(ARGS(reboot, "0x%x"), args->cmd); 1671 #endif 1672 1673 if (args->magic1 != REBOOT_MAGIC1) 1674 return (EINVAL); 1675 1676 switch (args->magic2) { 1677 case REBOOT_MAGIC2: 1678 case REBOOT_MAGIC2A: 1679 case REBOOT_MAGIC2B: 1680 break; 1681 default: 1682 return (EINVAL); 1683 } 1684 1685 switch (args->cmd) { 1686 case REBOOT_CAD_ON: 1687 case REBOOT_CAD_OFF: 1688 return (priv_check(td, PRIV_REBOOT)); 1689 case REBOOT_HALT: 1690 bsd_args.opt = RB_HALT; 1691 break; 1692 case REBOOT_RESTART: 1693 case REBOOT_RESTART2: 1694 bsd_args.opt = 0; 1695 break; 1696 case REBOOT_POWEROFF: 1697 bsd_args.opt = RB_POWEROFF; 1698 break; 1699 default: 1700 return (EINVAL); 1701 } 1702 return (sys_reboot(td, &bsd_args)); 1703 } 1704 1705 1706 /* 1707 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1708 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1709 * are assumed to be preserved. The following lightweight syscalls fixes 1710 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1711 * 1712 * linux_getpid() - MP SAFE 1713 * linux_getgid() - MP SAFE 1714 * linux_getuid() - MP SAFE 1715 */ 1716 1717 int 1718 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1719 { 1720 1721 #ifdef DEBUG 1722 if (ldebug(getpid)) 1723 printf(ARGS(getpid, "")); 1724 #endif 1725 td->td_retval[0] = td->td_proc->p_pid; 1726 1727 return (0); 1728 } 1729 1730 int 1731 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1732 { 1733 struct linux_emuldata *em; 1734 1735 #ifdef DEBUG 1736 if (ldebug(gettid)) 1737 printf(ARGS(gettid, "")); 1738 #endif 1739 1740 em = em_find(td); 1741 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1742 1743 td->td_retval[0] = em->em_tid; 1744 1745 return (0); 1746 } 1747 1748 1749 int 1750 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1751 { 1752 1753 #ifdef DEBUG 1754 if (ldebug(getppid)) 1755 printf(ARGS(getppid, "")); 1756 #endif 1757 1758 td->td_retval[0] = kern_getppid(td); 1759 return (0); 1760 } 1761 1762 int 1763 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1764 { 1765 1766 #ifdef DEBUG 1767 if (ldebug(getgid)) 1768 printf(ARGS(getgid, "")); 1769 #endif 1770 1771 td->td_retval[0] = td->td_ucred->cr_rgid; 1772 return (0); 1773 } 1774 1775 int 1776 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1777 { 1778 1779 #ifdef DEBUG 1780 if (ldebug(getuid)) 1781 printf(ARGS(getuid, "")); 1782 #endif 1783 1784 td->td_retval[0] = td->td_ucred->cr_ruid; 1785 return (0); 1786 } 1787 1788 1789 int 1790 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1791 { 1792 struct getsid_args bsd; 1793 1794 #ifdef DEBUG 1795 if (ldebug(getsid)) 1796 printf(ARGS(getsid, "%i"), args->pid); 1797 #endif 1798 1799 bsd.pid = args->pid; 1800 return (sys_getsid(td, &bsd)); 1801 } 1802 1803 int 1804 linux_nosys(struct thread *td, struct nosys_args *ignore) 1805 { 1806 1807 return (ENOSYS); 1808 } 1809 1810 int 1811 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1812 { 1813 struct getpriority_args bsd_args; 1814 int error; 1815 1816 #ifdef DEBUG 1817 if (ldebug(getpriority)) 1818 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1819 #endif 1820 1821 bsd_args.which = args->which; 1822 bsd_args.who = args->who; 1823 error = sys_getpriority(td, &bsd_args); 1824 td->td_retval[0] = 20 - td->td_retval[0]; 1825 return (error); 1826 } 1827 1828 int 1829 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1830 { 1831 int name[2]; 1832 1833 #ifdef DEBUG 1834 if (ldebug(sethostname)) 1835 printf(ARGS(sethostname, "*, %i"), args->len); 1836 #endif 1837 1838 name[0] = CTL_KERN; 1839 name[1] = KERN_HOSTNAME; 1840 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1841 args->len, 0, 0)); 1842 } 1843 1844 int 1845 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1846 { 1847 int name[2]; 1848 1849 #ifdef DEBUG 1850 if (ldebug(setdomainname)) 1851 printf(ARGS(setdomainname, "*, %i"), args->len); 1852 #endif 1853 1854 name[0] = CTL_KERN; 1855 name[1] = KERN_NISDOMAINNAME; 1856 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1857 args->len, 0, 0)); 1858 } 1859 1860 int 1861 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1862 { 1863 1864 #ifdef DEBUG 1865 if (ldebug(exit_group)) 1866 printf(ARGS(exit_group, "%i"), args->error_code); 1867 #endif 1868 1869 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1870 args->error_code); 1871 1872 /* 1873 * XXX: we should send a signal to the parent if 1874 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1875 * as it doesnt occur often. 1876 */ 1877 exit1(td, args->error_code, 0); 1878 /* NOTREACHED */ 1879 } 1880 1881 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1882 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1883 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1884 1885 struct l_user_cap_header { 1886 l_int version; 1887 l_int pid; 1888 }; 1889 1890 struct l_user_cap_data { 1891 l_int effective; 1892 l_int permitted; 1893 l_int inheritable; 1894 }; 1895 1896 int 1897 linux_capget(struct thread *td, struct linux_capget_args *uap) 1898 { 1899 struct l_user_cap_header luch; 1900 struct l_user_cap_data lucd[2]; 1901 int error, u32s; 1902 1903 if (uap->hdrp == NULL) 1904 return (EFAULT); 1905 1906 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1907 if (error != 0) 1908 return (error); 1909 1910 switch (luch.version) { 1911 case _LINUX_CAPABILITY_VERSION_1: 1912 u32s = 1; 1913 break; 1914 case _LINUX_CAPABILITY_VERSION_2: 1915 case _LINUX_CAPABILITY_VERSION_3: 1916 u32s = 2; 1917 break; 1918 default: 1919 #ifdef DEBUG 1920 if (ldebug(capget)) 1921 printf(LMSG("invalid capget capability version 0x%x"), 1922 luch.version); 1923 #endif 1924 luch.version = _LINUX_CAPABILITY_VERSION_1; 1925 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1926 if (error) 1927 return (error); 1928 return (EINVAL); 1929 } 1930 1931 if (luch.pid) 1932 return (EPERM); 1933 1934 if (uap->datap) { 1935 /* 1936 * The current implementation doesn't support setting 1937 * a capability (it's essentially a stub) so indicate 1938 * that no capabilities are currently set or available 1939 * to request. 1940 */ 1941 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1942 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1943 } 1944 1945 return (error); 1946 } 1947 1948 int 1949 linux_capset(struct thread *td, struct linux_capset_args *uap) 1950 { 1951 struct l_user_cap_header luch; 1952 struct l_user_cap_data lucd[2]; 1953 int error, i, u32s; 1954 1955 if (uap->hdrp == NULL || uap->datap == NULL) 1956 return (EFAULT); 1957 1958 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1959 if (error != 0) 1960 return (error); 1961 1962 switch (luch.version) { 1963 case _LINUX_CAPABILITY_VERSION_1: 1964 u32s = 1; 1965 break; 1966 case _LINUX_CAPABILITY_VERSION_2: 1967 case _LINUX_CAPABILITY_VERSION_3: 1968 u32s = 2; 1969 break; 1970 default: 1971 #ifdef DEBUG 1972 if (ldebug(capset)) 1973 printf(LMSG("invalid capset capability version 0x%x"), 1974 luch.version); 1975 #endif 1976 luch.version = _LINUX_CAPABILITY_VERSION_1; 1977 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1978 if (error) 1979 return (error); 1980 return (EINVAL); 1981 } 1982 1983 if (luch.pid) 1984 return (EPERM); 1985 1986 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1987 if (error != 0) 1988 return (error); 1989 1990 /* We currently don't support setting any capabilities. */ 1991 for (i = 0; i < u32s; i++) { 1992 if (lucd[i].effective || lucd[i].permitted || 1993 lucd[i].inheritable) { 1994 linux_msg(td, 1995 "capset[%d] effective=0x%x, permitted=0x%x, " 1996 "inheritable=0x%x is not implemented", i, 1997 (int)lucd[i].effective, (int)lucd[i].permitted, 1998 (int)lucd[i].inheritable); 1999 return (EPERM); 2000 } 2001 } 2002 2003 return (0); 2004 } 2005 2006 int 2007 linux_prctl(struct thread *td, struct linux_prctl_args *args) 2008 { 2009 int error = 0, max_size; 2010 struct proc *p = td->td_proc; 2011 char comm[LINUX_MAX_COMM_LEN]; 2012 int pdeath_signal; 2013 2014 #ifdef DEBUG 2015 if (ldebug(prctl)) 2016 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, 2017 (uintmax_t)args->arg2, (uintmax_t)args->arg3, 2018 (uintmax_t)args->arg4, (uintmax_t)args->arg5); 2019 #endif 2020 2021 switch (args->option) { 2022 case LINUX_PR_SET_PDEATHSIG: 2023 if (!LINUX_SIG_VALID(args->arg2)) 2024 return (EINVAL); 2025 pdeath_signal = linux_to_bsd_signal(args->arg2); 2026 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 2027 &pdeath_signal)); 2028 case LINUX_PR_GET_PDEATHSIG: 2029 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 2030 &pdeath_signal); 2031 if (error != 0) 2032 return (error); 2033 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 2034 return (copyout(&pdeath_signal, 2035 (void *)(register_t)args->arg2, 2036 sizeof(pdeath_signal))); 2037 break; 2038 case LINUX_PR_GET_KEEPCAPS: 2039 /* 2040 * Indicate that we always clear the effective and 2041 * permitted capability sets when the user id becomes 2042 * non-zero (actually the capability sets are simply 2043 * always zero in the current implementation). 2044 */ 2045 td->td_retval[0] = 0; 2046 break; 2047 case LINUX_PR_SET_KEEPCAPS: 2048 /* 2049 * Ignore requests to keep the effective and permitted 2050 * capability sets when the user id becomes non-zero. 2051 */ 2052 break; 2053 case LINUX_PR_SET_NAME: 2054 /* 2055 * To be on the safe side we need to make sure to not 2056 * overflow the size a Linux program expects. We already 2057 * do this here in the copyin, so that we don't need to 2058 * check on copyout. 2059 */ 2060 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2061 error = copyinstr((void *)(register_t)args->arg2, comm, 2062 max_size, NULL); 2063 2064 /* Linux silently truncates the name if it is too long. */ 2065 if (error == ENAMETOOLONG) { 2066 /* 2067 * XXX: copyinstr() isn't documented to populate the 2068 * array completely, so do a copyin() to be on the 2069 * safe side. This should be changed in case 2070 * copyinstr() is changed to guarantee this. 2071 */ 2072 error = copyin((void *)(register_t)args->arg2, comm, 2073 max_size - 1); 2074 comm[max_size - 1] = '\0'; 2075 } 2076 if (error) 2077 return (error); 2078 2079 PROC_LOCK(p); 2080 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2081 PROC_UNLOCK(p); 2082 break; 2083 case LINUX_PR_GET_NAME: 2084 PROC_LOCK(p); 2085 strlcpy(comm, p->p_comm, sizeof(comm)); 2086 PROC_UNLOCK(p); 2087 error = copyout(comm, (void *)(register_t)args->arg2, 2088 strlen(comm) + 1); 2089 break; 2090 default: 2091 error = EINVAL; 2092 break; 2093 } 2094 2095 return (error); 2096 } 2097 2098 int 2099 linux_sched_setparam(struct thread *td, 2100 struct linux_sched_setparam_args *uap) 2101 { 2102 struct sched_param sched_param; 2103 struct thread *tdt; 2104 int error; 2105 2106 #ifdef DEBUG 2107 if (ldebug(sched_setparam)) 2108 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 2109 #endif 2110 2111 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2112 if (error) 2113 return (error); 2114 2115 tdt = linux_tdfind(td, uap->pid, -1); 2116 if (tdt == NULL) 2117 return (ESRCH); 2118 2119 error = kern_sched_setparam(td, tdt, &sched_param); 2120 PROC_UNLOCK(tdt->td_proc); 2121 return (error); 2122 } 2123 2124 int 2125 linux_sched_getparam(struct thread *td, 2126 struct linux_sched_getparam_args *uap) 2127 { 2128 struct sched_param sched_param; 2129 struct thread *tdt; 2130 int error; 2131 2132 #ifdef DEBUG 2133 if (ldebug(sched_getparam)) 2134 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 2135 #endif 2136 2137 tdt = linux_tdfind(td, uap->pid, -1); 2138 if (tdt == NULL) 2139 return (ESRCH); 2140 2141 error = kern_sched_getparam(td, tdt, &sched_param); 2142 PROC_UNLOCK(tdt->td_proc); 2143 if (error == 0) 2144 error = copyout(&sched_param, uap->param, 2145 sizeof(sched_param)); 2146 return (error); 2147 } 2148 2149 /* 2150 * Get affinity of a process. 2151 */ 2152 int 2153 linux_sched_getaffinity(struct thread *td, 2154 struct linux_sched_getaffinity_args *args) 2155 { 2156 int error; 2157 struct thread *tdt; 2158 2159 #ifdef DEBUG 2160 if (ldebug(sched_getaffinity)) 2161 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 2162 args->len); 2163 #endif 2164 if (args->len < sizeof(cpuset_t)) 2165 return (EINVAL); 2166 2167 tdt = linux_tdfind(td, args->pid, -1); 2168 if (tdt == NULL) 2169 return (ESRCH); 2170 2171 PROC_UNLOCK(tdt->td_proc); 2172 2173 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2174 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2175 if (error == 0) 2176 td->td_retval[0] = sizeof(cpuset_t); 2177 2178 return (error); 2179 } 2180 2181 /* 2182 * Set affinity of a process. 2183 */ 2184 int 2185 linux_sched_setaffinity(struct thread *td, 2186 struct linux_sched_setaffinity_args *args) 2187 { 2188 struct thread *tdt; 2189 2190 #ifdef DEBUG 2191 if (ldebug(sched_setaffinity)) 2192 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 2193 args->len); 2194 #endif 2195 if (args->len < sizeof(cpuset_t)) 2196 return (EINVAL); 2197 2198 tdt = linux_tdfind(td, args->pid, -1); 2199 if (tdt == NULL) 2200 return (ESRCH); 2201 2202 PROC_UNLOCK(tdt->td_proc); 2203 2204 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2205 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2206 } 2207 2208 struct linux_rlimit64 { 2209 uint64_t rlim_cur; 2210 uint64_t rlim_max; 2211 }; 2212 2213 int 2214 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2215 { 2216 struct rlimit rlim, nrlim; 2217 struct linux_rlimit64 lrlim; 2218 struct proc *p; 2219 u_int which; 2220 int flags; 2221 int error; 2222 2223 #ifdef DEBUG 2224 if (ldebug(prlimit64)) 2225 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, 2226 args->resource, (void *)args->new, (void *)args->old); 2227 #endif 2228 2229 if (args->resource >= LINUX_RLIM_NLIMITS) 2230 return (EINVAL); 2231 2232 which = linux_to_bsd_resource[args->resource]; 2233 if (which == -1) 2234 return (EINVAL); 2235 2236 if (args->new != NULL) { 2237 /* 2238 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2239 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2240 * as INFINITY so we do not need a conversion even. 2241 */ 2242 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2243 if (error != 0) 2244 return (error); 2245 } 2246 2247 flags = PGET_HOLD | PGET_NOTWEXIT; 2248 if (args->new != NULL) 2249 flags |= PGET_CANDEBUG; 2250 else 2251 flags |= PGET_CANSEE; 2252 error = pget(args->pid, flags, &p); 2253 if (error != 0) 2254 return (error); 2255 2256 if (args->old != NULL) { 2257 PROC_LOCK(p); 2258 lim_rlimit_proc(p, which, &rlim); 2259 PROC_UNLOCK(p); 2260 if (rlim.rlim_cur == RLIM_INFINITY) 2261 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2262 else 2263 lrlim.rlim_cur = rlim.rlim_cur; 2264 if (rlim.rlim_max == RLIM_INFINITY) 2265 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2266 else 2267 lrlim.rlim_max = rlim.rlim_max; 2268 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2269 if (error != 0) 2270 goto out; 2271 } 2272 2273 if (args->new != NULL) 2274 error = kern_proc_setrlimit(td, p, which, &nrlim); 2275 2276 out: 2277 PRELE(p); 2278 return (error); 2279 } 2280 2281 int 2282 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2283 { 2284 struct timeval utv, tv0, tv1, *tvp; 2285 struct l_pselect6arg lpse6; 2286 struct l_timespec lts; 2287 struct timespec uts; 2288 l_sigset_t l_ss; 2289 sigset_t *ssp; 2290 sigset_t ss; 2291 int error; 2292 2293 ssp = NULL; 2294 if (args->sig != NULL) { 2295 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2296 if (error != 0) 2297 return (error); 2298 if (lpse6.ss_len != sizeof(l_ss)) 2299 return (EINVAL); 2300 if (lpse6.ss != 0) { 2301 error = copyin(PTRIN(lpse6.ss), &l_ss, 2302 sizeof(l_ss)); 2303 if (error != 0) 2304 return (error); 2305 linux_to_bsd_sigset(&l_ss, &ss); 2306 ssp = &ss; 2307 } 2308 } 2309 2310 /* 2311 * Currently glibc changes nanosecond number to microsecond. 2312 * This mean losing precision but for now it is hardly seen. 2313 */ 2314 if (args->tsp != NULL) { 2315 error = copyin(args->tsp, <s, sizeof(lts)); 2316 if (error != 0) 2317 return (error); 2318 error = linux_to_native_timespec(&uts, <s); 2319 if (error != 0) 2320 return (error); 2321 2322 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2323 if (itimerfix(&utv)) 2324 return (EINVAL); 2325 2326 microtime(&tv0); 2327 tvp = &utv; 2328 } else 2329 tvp = NULL; 2330 2331 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2332 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2333 2334 if (error == 0 && args->tsp != NULL) { 2335 if (td->td_retval[0] != 0) { 2336 /* 2337 * Compute how much time was left of the timeout, 2338 * by subtracting the current time and the time 2339 * before we started the call, and subtracting 2340 * that result from the user-supplied value. 2341 */ 2342 2343 microtime(&tv1); 2344 timevalsub(&tv1, &tv0); 2345 timevalsub(&utv, &tv1); 2346 if (utv.tv_sec < 0) 2347 timevalclear(&utv); 2348 } else 2349 timevalclear(&utv); 2350 2351 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2352 2353 error = native_to_linux_timespec(<s, &uts); 2354 if (error == 0) 2355 error = copyout(<s, args->tsp, sizeof(lts)); 2356 } 2357 2358 return (error); 2359 } 2360 2361 int 2362 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2363 { 2364 struct timespec ts0, ts1; 2365 struct l_timespec lts; 2366 struct timespec uts, *tsp; 2367 l_sigset_t l_ss; 2368 sigset_t *ssp; 2369 sigset_t ss; 2370 int error; 2371 2372 if (args->sset != NULL) { 2373 if (args->ssize != sizeof(l_ss)) 2374 return (EINVAL); 2375 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2376 if (error) 2377 return (error); 2378 linux_to_bsd_sigset(&l_ss, &ss); 2379 ssp = &ss; 2380 } else 2381 ssp = NULL; 2382 if (args->tsp != NULL) { 2383 error = copyin(args->tsp, <s, sizeof(lts)); 2384 if (error) 2385 return (error); 2386 error = linux_to_native_timespec(&uts, <s); 2387 if (error != 0) 2388 return (error); 2389 2390 nanotime(&ts0); 2391 tsp = &uts; 2392 } else 2393 tsp = NULL; 2394 2395 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2396 2397 if (error == 0 && args->tsp != NULL) { 2398 if (td->td_retval[0]) { 2399 nanotime(&ts1); 2400 timespecsub(&ts1, &ts0, &ts1); 2401 timespecsub(&uts, &ts1, &uts); 2402 if (uts.tv_sec < 0) 2403 timespecclear(&uts); 2404 } else 2405 timespecclear(&uts); 2406 2407 error = native_to_linux_timespec(<s, &uts); 2408 if (error == 0) 2409 error = copyout(<s, args->tsp, sizeof(lts)); 2410 } 2411 2412 return (error); 2413 } 2414 2415 #if defined(DEBUG) || defined(KTR) 2416 /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ 2417 2418 #ifdef COMPAT_LINUX32 2419 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL 2420 #else 2421 #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL 2422 #endif 2423 2424 u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; 2425 2426 static int 2427 linux_debug(int syscall, int toggle, int global) 2428 { 2429 2430 if (global) { 2431 char c = toggle ? 0 : 0xff; 2432 2433 memset(linux_debug_map, c, sizeof(linux_debug_map)); 2434 return (0); 2435 } 2436 if (syscall < 0 || syscall >= L_MAXSYSCALL) 2437 return (EINVAL); 2438 if (toggle) 2439 clrbit(linux_debug_map, syscall); 2440 else 2441 setbit(linux_debug_map, syscall); 2442 return (0); 2443 } 2444 #undef L_MAXSYSCALL 2445 2446 /* 2447 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 2448 * 2449 * E.g.: sysctl linux.debug=21.0 2450 * 2451 * As a special case, syscall "all" will apply to all syscalls globally. 2452 */ 2453 #define LINUX_MAX_DEBUGSTR 16 2454 int 2455 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 2456 { 2457 char value[LINUX_MAX_DEBUGSTR], *p; 2458 int error, sysc, toggle; 2459 int global = 0; 2460 2461 value[0] = '\0'; 2462 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 2463 if (error || req->newptr == NULL) 2464 return (error); 2465 for (p = value; *p != '\0' && *p != '.'; p++); 2466 if (*p == '\0') 2467 return (EINVAL); 2468 *p++ = '\0'; 2469 sysc = strtol(value, NULL, 0); 2470 toggle = strtol(p, NULL, 0); 2471 if (strcmp(value, "all") == 0) 2472 global = 1; 2473 error = linux_debug(sysc, toggle, global); 2474 return (error); 2475 } 2476 2477 #endif /* DEBUG || KTR */ 2478 2479 int 2480 linux_sched_rr_get_interval(struct thread *td, 2481 struct linux_sched_rr_get_interval_args *uap) 2482 { 2483 struct timespec ts; 2484 struct l_timespec lts; 2485 struct thread *tdt; 2486 int error; 2487 2488 /* 2489 * According to man in case the invalid pid specified 2490 * EINVAL should be returned. 2491 */ 2492 if (uap->pid < 0) 2493 return (EINVAL); 2494 2495 tdt = linux_tdfind(td, uap->pid, -1); 2496 if (tdt == NULL) 2497 return (ESRCH); 2498 2499 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2500 PROC_UNLOCK(tdt->td_proc); 2501 if (error != 0) 2502 return (error); 2503 error = native_to_linux_timespec(<s, &ts); 2504 if (error != 0) 2505 return (error); 2506 return (copyout(<s, uap->interval, sizeof(lts))); 2507 } 2508 2509 /* 2510 * In case when the Linux thread is the initial thread in 2511 * the thread group thread id is equal to the process id. 2512 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2513 */ 2514 struct thread * 2515 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2516 { 2517 struct linux_emuldata *em; 2518 struct thread *tdt; 2519 struct proc *p; 2520 2521 tdt = NULL; 2522 if (tid == 0 || tid == td->td_tid) { 2523 tdt = td; 2524 PROC_LOCK(tdt->td_proc); 2525 } else if (tid > PID_MAX) 2526 tdt = tdfind(tid, pid); 2527 else { 2528 /* 2529 * Initial thread where the tid equal to the pid. 2530 */ 2531 p = pfind(tid); 2532 if (p != NULL) { 2533 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2534 /* 2535 * p is not a Linuxulator process. 2536 */ 2537 PROC_UNLOCK(p); 2538 return (NULL); 2539 } 2540 FOREACH_THREAD_IN_PROC(p, tdt) { 2541 em = em_find(tdt); 2542 if (tid == em->em_tid) 2543 return (tdt); 2544 } 2545 PROC_UNLOCK(p); 2546 } 2547 return (NULL); 2548 } 2549 2550 return (tdt); 2551 } 2552 2553 void 2554 linux_to_bsd_waitopts(int options, int *bsdopts) 2555 { 2556 2557 if (options & LINUX_WNOHANG) 2558 *bsdopts |= WNOHANG; 2559 if (options & LINUX_WUNTRACED) 2560 *bsdopts |= WUNTRACED; 2561 if (options & LINUX_WEXITED) 2562 *bsdopts |= WEXITED; 2563 if (options & LINUX_WCONTINUED) 2564 *bsdopts |= WCONTINUED; 2565 if (options & LINUX_WNOWAIT) 2566 *bsdopts |= WNOWAIT; 2567 2568 if (options & __WCLONE) 2569 *bsdopts |= WLINUXCLONE; 2570 } 2571 2572 int 2573 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2574 { 2575 struct uio uio; 2576 struct iovec iov; 2577 int error; 2578 2579 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2580 return (EINVAL); 2581 if (args->count > INT_MAX) 2582 args->count = INT_MAX; 2583 2584 iov.iov_base = args->buf; 2585 iov.iov_len = args->count; 2586 2587 uio.uio_iov = &iov; 2588 uio.uio_iovcnt = 1; 2589 uio.uio_resid = iov.iov_len; 2590 uio.uio_segflg = UIO_USERSPACE; 2591 uio.uio_rw = UIO_READ; 2592 uio.uio_td = td; 2593 2594 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2595 if (error == 0) 2596 td->td_retval[0] = args->count - uio.uio_resid; 2597 return (error); 2598 } 2599 2600 int 2601 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2602 { 2603 2604 /* Needs to be page-aligned */ 2605 if (args->start & PAGE_MASK) 2606 return (EINVAL); 2607 return (kern_mincore(td, args->start, args->len, args->vec)); 2608 } 2609