1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/mutex.h> 51 #include <sys/namei.h> 52 #include <sys/priv.h> 53 #include <sys/proc.h> 54 #include <sys/reboot.h> 55 #include <sys/racct.h> 56 #include <sys/random.h> 57 #include <sys/resourcevar.h> 58 #include <sys/sched.h> 59 #include <sys/sdt.h> 60 #include <sys/signalvar.h> 61 #include <sys/stat.h> 62 #include <sys/syscallsubr.h> 63 #include <sys/sysctl.h> 64 #include <sys/sysproto.h> 65 #include <sys/systm.h> 66 #include <sys/time.h> 67 #include <sys/vmmeter.h> 68 #include <sys/vnode.h> 69 #include <sys/wait.h> 70 #include <sys/cpuset.h> 71 #include <sys/uio.h> 72 73 #include <security/mac/mac_framework.h> 74 75 #include <vm/vm.h> 76 #include <vm/pmap.h> 77 #include <vm/vm_kern.h> 78 #include <vm/vm_map.h> 79 #include <vm/vm_extern.h> 80 #include <vm/vm_object.h> 81 #include <vm/swap_pager.h> 82 83 #ifdef COMPAT_LINUX32 84 #include <machine/../linux32/linux.h> 85 #include <machine/../linux32/linux32_proto.h> 86 #else 87 #include <machine/../linux/linux.h> 88 #include <machine/../linux/linux_proto.h> 89 #endif 90 91 #include <compat/linux/linux_dtrace.h> 92 #include <compat/linux/linux_file.h> 93 #include <compat/linux/linux_mib.h> 94 #include <compat/linux/linux_signal.h> 95 #include <compat/linux/linux_timer.h> 96 #include <compat/linux/linux_util.h> 97 #include <compat/linux/linux_sysproto.h> 98 #include <compat/linux/linux_emul.h> 99 #include <compat/linux/linux_misc.h> 100 101 /** 102 * Special DTrace provider for the linuxulator. 103 * 104 * In this file we define the provider for the entire linuxulator. All 105 * modules (= files of the linuxulator) use it. 106 * 107 * We define a different name depending on the emulated bitsize, see 108 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 109 * native bitsize = linuxulator 110 * amd64, 32bit emulation = linuxulator32 111 */ 112 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 113 114 int stclohz; /* Statistics clock frequency */ 115 116 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 117 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 118 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 119 RLIMIT_MEMLOCK, RLIMIT_AS 120 }; 121 122 struct l_sysinfo { 123 l_long uptime; /* Seconds since boot */ 124 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 125 #define LINUX_SYSINFO_LOADS_SCALE 65536 126 l_ulong totalram; /* Total usable main memory size */ 127 l_ulong freeram; /* Available memory size */ 128 l_ulong sharedram; /* Amount of shared memory */ 129 l_ulong bufferram; /* Memory used by buffers */ 130 l_ulong totalswap; /* Total swap space size */ 131 l_ulong freeswap; /* swap space still available */ 132 l_ushort procs; /* Number of current processes */ 133 l_ushort pads; 134 l_ulong totalbig; 135 l_ulong freebig; 136 l_uint mem_unit; 137 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 138 }; 139 140 struct l_pselect6arg { 141 l_uintptr_t ss; 142 l_size_t ss_len; 143 }; 144 145 static int linux_utimensat_nsec_valid(l_long); 146 147 148 int 149 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 150 { 151 struct l_sysinfo sysinfo; 152 vm_object_t object; 153 int i, j; 154 struct timespec ts; 155 156 bzero(&sysinfo, sizeof(sysinfo)); 157 getnanouptime(&ts); 158 if (ts.tv_nsec != 0) 159 ts.tv_sec++; 160 sysinfo.uptime = ts.tv_sec; 161 162 /* Use the information from the mib to get our load averages */ 163 for (i = 0; i < 3; i++) 164 sysinfo.loads[i] = averunnable.ldavg[i] * 165 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 166 167 sysinfo.totalram = physmem * PAGE_SIZE; 168 sysinfo.freeram = sysinfo.totalram - vm_wire_count() * PAGE_SIZE; 169 170 sysinfo.sharedram = 0; 171 mtx_lock(&vm_object_list_mtx); 172 TAILQ_FOREACH(object, &vm_object_list, object_list) 173 if (object->shadow_count > 1) 174 sysinfo.sharedram += object->resident_page_count; 175 mtx_unlock(&vm_object_list_mtx); 176 177 sysinfo.sharedram *= PAGE_SIZE; 178 sysinfo.bufferram = 0; 179 180 swap_pager_status(&i, &j); 181 sysinfo.totalswap = i * PAGE_SIZE; 182 sysinfo.freeswap = (i - j) * PAGE_SIZE; 183 184 sysinfo.procs = nprocs; 185 186 /* The following are only present in newer Linux kernels. */ 187 sysinfo.totalbig = 0; 188 sysinfo.freebig = 0; 189 sysinfo.mem_unit = 1; 190 191 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 192 } 193 194 #ifdef LINUX_LEGACY_SYSCALLS 195 int 196 linux_alarm(struct thread *td, struct linux_alarm_args *args) 197 { 198 struct itimerval it, old_it; 199 u_int secs; 200 int error; 201 202 #ifdef DEBUG 203 if (ldebug(alarm)) 204 printf(ARGS(alarm, "%u"), args->secs); 205 #endif 206 secs = args->secs; 207 /* 208 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 209 * to match kern_setitimer()'s limit to avoid error from it. 210 * 211 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 212 * platforms. 213 */ 214 if (secs > INT32_MAX / 2) 215 secs = INT32_MAX / 2; 216 217 it.it_value.tv_sec = secs; 218 it.it_value.tv_usec = 0; 219 timevalclear(&it.it_interval); 220 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 221 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 222 223 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 224 old_it.it_value.tv_usec >= 500000) 225 old_it.it_value.tv_sec++; 226 td->td_retval[0] = old_it.it_value.tv_sec; 227 return (0); 228 } 229 #endif 230 231 int 232 linux_brk(struct thread *td, struct linux_brk_args *args) 233 { 234 struct vmspace *vm = td->td_proc->p_vmspace; 235 vm_offset_t new, old; 236 struct break_args /* { 237 char * nsize; 238 } */ tmp; 239 240 #ifdef DEBUG 241 if (ldebug(brk)) 242 printf(ARGS(brk, "%p"), (void *)(uintptr_t)args->dsend); 243 #endif 244 old = (vm_offset_t)vm->vm_daddr + ctob(vm->vm_dsize); 245 new = (vm_offset_t)args->dsend; 246 tmp.nsize = (char *)new; 247 if (((caddr_t)new > vm->vm_daddr) && !sys_break(td, &tmp)) 248 td->td_retval[0] = (long)new; 249 else 250 td->td_retval[0] = (long)old; 251 252 return (0); 253 } 254 255 #if defined(__i386__) 256 /* XXX: what about amd64/linux32? */ 257 258 int 259 linux_uselib(struct thread *td, struct linux_uselib_args *args) 260 { 261 struct nameidata ni; 262 struct vnode *vp; 263 struct exec *a_out; 264 struct vattr attr; 265 vm_offset_t vmaddr; 266 unsigned long file_offset; 267 unsigned long bss_size; 268 char *library; 269 ssize_t aresid; 270 int error, locked, writecount; 271 272 LCONVPATHEXIST(td, args->library, &library); 273 274 #ifdef DEBUG 275 if (ldebug(uselib)) 276 printf(ARGS(uselib, "%s"), library); 277 #endif 278 279 a_out = NULL; 280 locked = 0; 281 vp = NULL; 282 283 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 284 UIO_SYSSPACE, library, td); 285 error = namei(&ni); 286 LFREEPATH(library); 287 if (error) 288 goto cleanup; 289 290 vp = ni.ni_vp; 291 NDFREE(&ni, NDF_ONLY_PNBUF); 292 293 /* 294 * From here on down, we have a locked vnode that must be unlocked. 295 * XXX: The code below largely duplicates exec_check_permissions(). 296 */ 297 locked = 1; 298 299 /* Writable? */ 300 error = VOP_GET_WRITECOUNT(vp, &writecount); 301 if (error != 0) 302 goto cleanup; 303 if (writecount != 0) { 304 error = ETXTBSY; 305 goto cleanup; 306 } 307 308 /* Executable? */ 309 error = VOP_GETATTR(vp, &attr, td->td_ucred); 310 if (error) 311 goto cleanup; 312 313 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 314 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 315 /* EACCESS is what exec(2) returns. */ 316 error = ENOEXEC; 317 goto cleanup; 318 } 319 320 /* Sensible size? */ 321 if (attr.va_size == 0) { 322 error = ENOEXEC; 323 goto cleanup; 324 } 325 326 /* Can we access it? */ 327 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 328 if (error) 329 goto cleanup; 330 331 /* 332 * XXX: This should use vn_open() so that it is properly authorized, 333 * and to reduce code redundancy all over the place here. 334 * XXX: Not really, it duplicates far more of exec_check_permissions() 335 * than vn_open(). 336 */ 337 #ifdef MAC 338 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 339 if (error) 340 goto cleanup; 341 #endif 342 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 343 if (error) 344 goto cleanup; 345 346 /* Pull in executable header into exec_map */ 347 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 348 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 349 if (error) 350 goto cleanup; 351 352 /* Is it a Linux binary ? */ 353 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 354 error = ENOEXEC; 355 goto cleanup; 356 } 357 358 /* 359 * While we are here, we should REALLY do some more checks 360 */ 361 362 /* Set file/virtual offset based on a.out variant. */ 363 switch ((int)(a_out->a_magic & 0xffff)) { 364 case 0413: /* ZMAGIC */ 365 file_offset = 1024; 366 break; 367 case 0314: /* QMAGIC */ 368 file_offset = 0; 369 break; 370 default: 371 error = ENOEXEC; 372 goto cleanup; 373 } 374 375 bss_size = round_page(a_out->a_bss); 376 377 /* Check various fields in header for validity/bounds. */ 378 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 379 error = ENOEXEC; 380 goto cleanup; 381 } 382 383 /* text + data can't exceed file size */ 384 if (a_out->a_data + a_out->a_text > attr.va_size) { 385 error = EFAULT; 386 goto cleanup; 387 } 388 389 /* 390 * text/data/bss must not exceed limits 391 * XXX - this is not complete. it should check current usage PLUS 392 * the resources needed by this library. 393 */ 394 PROC_LOCK(td->td_proc); 395 if (a_out->a_text > maxtsiz || 396 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 397 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 398 bss_size) != 0) { 399 PROC_UNLOCK(td->td_proc); 400 error = ENOMEM; 401 goto cleanup; 402 } 403 PROC_UNLOCK(td->td_proc); 404 405 /* 406 * Prevent more writers. 407 * XXX: Note that if any of the VM operations fail below we don't 408 * clear this flag. 409 */ 410 VOP_SET_TEXT(vp); 411 412 /* 413 * Lock no longer needed 414 */ 415 locked = 0; 416 VOP_UNLOCK(vp, 0); 417 418 /* 419 * Check if file_offset page aligned. Currently we cannot handle 420 * misalinged file offsets, and so we read in the entire image 421 * (what a waste). 422 */ 423 if (file_offset & PAGE_MASK) { 424 #ifdef DEBUG 425 printf("uselib: Non page aligned binary %lu\n", file_offset); 426 #endif 427 /* Map text+data read/write/execute */ 428 429 /* a_entry is the load address and is page aligned */ 430 vmaddr = trunc_page(a_out->a_entry); 431 432 /* get anon user mapping, read+write+execute */ 433 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 434 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 435 VM_PROT_ALL, VM_PROT_ALL, 0); 436 if (error) 437 goto cleanup; 438 439 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 440 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 441 td->td_ucred, NOCRED, &aresid, td); 442 if (error != 0) 443 goto cleanup; 444 if (aresid != 0) { 445 error = ENOEXEC; 446 goto cleanup; 447 } 448 } else { 449 #ifdef DEBUG 450 printf("uselib: Page aligned binary %lu\n", file_offset); 451 #endif 452 /* 453 * for QMAGIC, a_entry is 20 bytes beyond the load address 454 * to skip the executable header 455 */ 456 vmaddr = trunc_page(a_out->a_entry); 457 458 /* 459 * Map it all into the process's space as a single 460 * copy-on-write "data" segment. 461 */ 462 error = vm_mmap(&td->td_proc->p_vmspace->vm_map, &vmaddr, 463 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 464 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 465 if (error) 466 goto cleanup; 467 } 468 #ifdef DEBUG 469 printf("mem=%08lx = %08lx %08lx\n", (long)vmaddr, ((long *)vmaddr)[0], 470 ((long *)vmaddr)[1]); 471 #endif 472 if (bss_size != 0) { 473 /* Calculate BSS start address */ 474 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 475 a_out->a_data; 476 477 /* allocate some 'anon' space */ 478 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 479 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 480 VM_PROT_ALL, 0); 481 if (error) 482 goto cleanup; 483 } 484 485 cleanup: 486 /* Unlock vnode if needed */ 487 if (locked) 488 VOP_UNLOCK(vp, 0); 489 490 /* Release the temporary mapping. */ 491 if (a_out) 492 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 493 494 return (error); 495 } 496 497 #endif /* __i386__ */ 498 499 #ifdef LINUX_LEGACY_SYSCALLS 500 int 501 linux_select(struct thread *td, struct linux_select_args *args) 502 { 503 l_timeval ltv; 504 struct timeval tv0, tv1, utv, *tvp; 505 int error; 506 507 #ifdef DEBUG 508 if (ldebug(select)) 509 printf(ARGS(select, "%d, %p, %p, %p, %p"), args->nfds, 510 (void *)args->readfds, (void *)args->writefds, 511 (void *)args->exceptfds, (void *)args->timeout); 512 #endif 513 514 /* 515 * Store current time for computation of the amount of 516 * time left. 517 */ 518 if (args->timeout) { 519 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 520 goto select_out; 521 utv.tv_sec = ltv.tv_sec; 522 utv.tv_usec = ltv.tv_usec; 523 #ifdef DEBUG 524 if (ldebug(select)) 525 printf(LMSG("incoming timeout (%jd/%ld)"), 526 (intmax_t)utv.tv_sec, utv.tv_usec); 527 #endif 528 529 if (itimerfix(&utv)) { 530 /* 531 * The timeval was invalid. Convert it to something 532 * valid that will act as it does under Linux. 533 */ 534 utv.tv_sec += utv.tv_usec / 1000000; 535 utv.tv_usec %= 1000000; 536 if (utv.tv_usec < 0) { 537 utv.tv_sec -= 1; 538 utv.tv_usec += 1000000; 539 } 540 if (utv.tv_sec < 0) 541 timevalclear(&utv); 542 } 543 microtime(&tv0); 544 tvp = &utv; 545 } else 546 tvp = NULL; 547 548 error = kern_select(td, args->nfds, args->readfds, args->writefds, 549 args->exceptfds, tvp, LINUX_NFDBITS); 550 551 #ifdef DEBUG 552 if (ldebug(select)) 553 printf(LMSG("real select returns %d"), error); 554 #endif 555 if (error) 556 goto select_out; 557 558 if (args->timeout) { 559 if (td->td_retval[0]) { 560 /* 561 * Compute how much time was left of the timeout, 562 * by subtracting the current time and the time 563 * before we started the call, and subtracting 564 * that result from the user-supplied value. 565 */ 566 microtime(&tv1); 567 timevalsub(&tv1, &tv0); 568 timevalsub(&utv, &tv1); 569 if (utv.tv_sec < 0) 570 timevalclear(&utv); 571 } else 572 timevalclear(&utv); 573 #ifdef DEBUG 574 if (ldebug(select)) 575 printf(LMSG("outgoing timeout (%jd/%ld)"), 576 (intmax_t)utv.tv_sec, utv.tv_usec); 577 #endif 578 ltv.tv_sec = utv.tv_sec; 579 ltv.tv_usec = utv.tv_usec; 580 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 581 goto select_out; 582 } 583 584 select_out: 585 #ifdef DEBUG 586 if (ldebug(select)) 587 printf(LMSG("select_out -> %d"), error); 588 #endif 589 return (error); 590 } 591 #endif 592 593 int 594 linux_mremap(struct thread *td, struct linux_mremap_args *args) 595 { 596 uintptr_t addr; 597 size_t len; 598 int error = 0; 599 600 #ifdef DEBUG 601 if (ldebug(mremap)) 602 printf(ARGS(mremap, "%p, %08lx, %08lx, %08lx"), 603 (void *)(uintptr_t)args->addr, 604 (unsigned long)args->old_len, 605 (unsigned long)args->new_len, 606 (unsigned long)args->flags); 607 #endif 608 609 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 610 td->td_retval[0] = 0; 611 return (EINVAL); 612 } 613 614 /* 615 * Check for the page alignment. 616 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 617 */ 618 if (args->addr & PAGE_MASK) { 619 td->td_retval[0] = 0; 620 return (EINVAL); 621 } 622 623 args->new_len = round_page(args->new_len); 624 args->old_len = round_page(args->old_len); 625 626 if (args->new_len > args->old_len) { 627 td->td_retval[0] = 0; 628 return (ENOMEM); 629 } 630 631 if (args->new_len < args->old_len) { 632 addr = args->addr + args->new_len; 633 len = args->old_len - args->new_len; 634 error = kern_munmap(td, addr, len); 635 } 636 637 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 638 return (error); 639 } 640 641 #define LINUX_MS_ASYNC 0x0001 642 #define LINUX_MS_INVALIDATE 0x0002 643 #define LINUX_MS_SYNC 0x0004 644 645 int 646 linux_msync(struct thread *td, struct linux_msync_args *args) 647 { 648 649 return (kern_msync(td, args->addr, args->len, 650 args->fl & ~LINUX_MS_SYNC)); 651 } 652 653 #ifdef LINUX_LEGACY_SYSCALLS 654 int 655 linux_time(struct thread *td, struct linux_time_args *args) 656 { 657 struct timeval tv; 658 l_time_t tm; 659 int error; 660 661 #ifdef DEBUG 662 if (ldebug(time)) 663 printf(ARGS(time, "*")); 664 #endif 665 666 microtime(&tv); 667 tm = tv.tv_sec; 668 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 669 return (error); 670 td->td_retval[0] = tm; 671 return (0); 672 } 673 #endif 674 675 struct l_times_argv { 676 l_clock_t tms_utime; 677 l_clock_t tms_stime; 678 l_clock_t tms_cutime; 679 l_clock_t tms_cstime; 680 }; 681 682 683 /* 684 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 685 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 686 * auxiliary vector entry. 687 */ 688 #define CLK_TCK 100 689 690 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 691 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 692 693 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 694 CONVNTCK(r) : CONVOTCK(r)) 695 696 int 697 linux_times(struct thread *td, struct linux_times_args *args) 698 { 699 struct timeval tv, utime, stime, cutime, cstime; 700 struct l_times_argv tms; 701 struct proc *p; 702 int error; 703 704 #ifdef DEBUG 705 if (ldebug(times)) 706 printf(ARGS(times, "*")); 707 #endif 708 709 if (args->buf != NULL) { 710 p = td->td_proc; 711 PROC_LOCK(p); 712 PROC_STATLOCK(p); 713 calcru(p, &utime, &stime); 714 PROC_STATUNLOCK(p); 715 calccru(p, &cutime, &cstime); 716 PROC_UNLOCK(p); 717 718 tms.tms_utime = CONVTCK(utime); 719 tms.tms_stime = CONVTCK(stime); 720 721 tms.tms_cutime = CONVTCK(cutime); 722 tms.tms_cstime = CONVTCK(cstime); 723 724 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 725 return (error); 726 } 727 728 microuptime(&tv); 729 td->td_retval[0] = (int)CONVTCK(tv); 730 return (0); 731 } 732 733 int 734 linux_newuname(struct thread *td, struct linux_newuname_args *args) 735 { 736 struct l_new_utsname utsname; 737 char osname[LINUX_MAX_UTSNAME]; 738 char osrelease[LINUX_MAX_UTSNAME]; 739 char *p; 740 741 #ifdef DEBUG 742 if (ldebug(newuname)) 743 printf(ARGS(newuname, "*")); 744 #endif 745 746 linux_get_osname(td, osname); 747 linux_get_osrelease(td, osrelease); 748 749 bzero(&utsname, sizeof(utsname)); 750 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 751 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 752 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 753 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 754 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 755 for (p = utsname.version; *p != '\0'; ++p) 756 if (*p == '\n') { 757 *p = '\0'; 758 break; 759 } 760 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 761 762 return (copyout(&utsname, args->buf, sizeof(utsname))); 763 } 764 765 struct l_utimbuf { 766 l_time_t l_actime; 767 l_time_t l_modtime; 768 }; 769 770 #ifdef LINUX_LEGACY_SYSCALLS 771 int 772 linux_utime(struct thread *td, struct linux_utime_args *args) 773 { 774 struct timeval tv[2], *tvp; 775 struct l_utimbuf lut; 776 char *fname; 777 int error; 778 779 LCONVPATHEXIST(td, args->fname, &fname); 780 781 #ifdef DEBUG 782 if (ldebug(utime)) 783 printf(ARGS(utime, "%s, *"), fname); 784 #endif 785 786 if (args->times) { 787 if ((error = copyin(args->times, &lut, sizeof lut))) { 788 LFREEPATH(fname); 789 return (error); 790 } 791 tv[0].tv_sec = lut.l_actime; 792 tv[0].tv_usec = 0; 793 tv[1].tv_sec = lut.l_modtime; 794 tv[1].tv_usec = 0; 795 tvp = tv; 796 } else 797 tvp = NULL; 798 799 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 800 UIO_SYSSPACE); 801 LFREEPATH(fname); 802 return (error); 803 } 804 #endif 805 806 #ifdef LINUX_LEGACY_SYSCALLS 807 int 808 linux_utimes(struct thread *td, struct linux_utimes_args *args) 809 { 810 l_timeval ltv[2]; 811 struct timeval tv[2], *tvp = NULL; 812 char *fname; 813 int error; 814 815 LCONVPATHEXIST(td, args->fname, &fname); 816 817 #ifdef DEBUG 818 if (ldebug(utimes)) 819 printf(ARGS(utimes, "%s, *"), fname); 820 #endif 821 822 if (args->tptr != NULL) { 823 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 824 LFREEPATH(fname); 825 return (error); 826 } 827 tv[0].tv_sec = ltv[0].tv_sec; 828 tv[0].tv_usec = ltv[0].tv_usec; 829 tv[1].tv_sec = ltv[1].tv_sec; 830 tv[1].tv_usec = ltv[1].tv_usec; 831 tvp = tv; 832 } 833 834 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 835 tvp, UIO_SYSSPACE); 836 LFREEPATH(fname); 837 return (error); 838 } 839 #endif 840 841 static int 842 linux_utimensat_nsec_valid(l_long nsec) 843 { 844 845 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 846 return (0); 847 if (nsec >= 0 && nsec <= 999999999) 848 return (0); 849 return (1); 850 } 851 852 int 853 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 854 { 855 struct l_timespec l_times[2]; 856 struct timespec times[2], *timesp = NULL; 857 char *path = NULL; 858 int error, dfd, flags = 0; 859 860 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 861 862 #ifdef DEBUG 863 if (ldebug(utimensat)) 864 printf(ARGS(utimensat, "%d, *"), dfd); 865 #endif 866 867 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 868 return (EINVAL); 869 870 if (args->times != NULL) { 871 error = copyin(args->times, l_times, sizeof(l_times)); 872 if (error != 0) 873 return (error); 874 875 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 876 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 877 return (EINVAL); 878 879 times[0].tv_sec = l_times[0].tv_sec; 880 switch (l_times[0].tv_nsec) 881 { 882 case LINUX_UTIME_OMIT: 883 times[0].tv_nsec = UTIME_OMIT; 884 break; 885 case LINUX_UTIME_NOW: 886 times[0].tv_nsec = UTIME_NOW; 887 break; 888 default: 889 times[0].tv_nsec = l_times[0].tv_nsec; 890 } 891 892 times[1].tv_sec = l_times[1].tv_sec; 893 switch (l_times[1].tv_nsec) 894 { 895 case LINUX_UTIME_OMIT: 896 times[1].tv_nsec = UTIME_OMIT; 897 break; 898 case LINUX_UTIME_NOW: 899 times[1].tv_nsec = UTIME_NOW; 900 break; 901 default: 902 times[1].tv_nsec = l_times[1].tv_nsec; 903 break; 904 } 905 timesp = times; 906 907 /* This breaks POSIX, but is what the Linux kernel does 908 * _on purpose_ (documented in the man page for utimensat(2)), 909 * so we must follow that behaviour. */ 910 if (times[0].tv_nsec == UTIME_OMIT && 911 times[1].tv_nsec == UTIME_OMIT) 912 return (0); 913 } 914 915 if (args->pathname != NULL) 916 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 917 else if (args->flags != 0) 918 return (EINVAL); 919 920 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 921 flags |= AT_SYMLINK_NOFOLLOW; 922 923 if (path == NULL) 924 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 925 else { 926 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 927 UIO_SYSSPACE, flags); 928 LFREEPATH(path); 929 } 930 931 return (error); 932 } 933 934 #ifdef LINUX_LEGACY_SYSCALLS 935 int 936 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 937 { 938 l_timeval ltv[2]; 939 struct timeval tv[2], *tvp = NULL; 940 char *fname; 941 int error, dfd; 942 943 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 944 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 945 946 #ifdef DEBUG 947 if (ldebug(futimesat)) 948 printf(ARGS(futimesat, "%s, *"), fname); 949 #endif 950 951 if (args->utimes != NULL) { 952 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 953 LFREEPATH(fname); 954 return (error); 955 } 956 tv[0].tv_sec = ltv[0].tv_sec; 957 tv[0].tv_usec = ltv[0].tv_usec; 958 tv[1].tv_sec = ltv[1].tv_sec; 959 tv[1].tv_usec = ltv[1].tv_usec; 960 tvp = tv; 961 } 962 963 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 964 LFREEPATH(fname); 965 return (error); 966 } 967 #endif 968 969 int 970 linux_common_wait(struct thread *td, int pid, int *status, 971 int options, struct rusage *ru) 972 { 973 int error, tmpstat; 974 975 error = kern_wait(td, pid, &tmpstat, options, ru); 976 if (error) 977 return (error); 978 979 if (status) { 980 tmpstat &= 0xffff; 981 if (WIFSIGNALED(tmpstat)) 982 tmpstat = (tmpstat & 0xffffff80) | 983 bsd_to_linux_signal(WTERMSIG(tmpstat)); 984 else if (WIFSTOPPED(tmpstat)) 985 tmpstat = (tmpstat & 0xffff00ff) | 986 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 987 else if (WIFCONTINUED(tmpstat)) 988 tmpstat = 0xffff; 989 error = copyout(&tmpstat, status, sizeof(int)); 990 } 991 992 return (error); 993 } 994 995 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 996 int 997 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 998 { 999 struct linux_wait4_args wait4_args; 1000 1001 #ifdef DEBUG 1002 if (ldebug(waitpid)) 1003 printf(ARGS(waitpid, "%d, %p, %d"), 1004 args->pid, (void *)args->status, args->options); 1005 #endif 1006 1007 wait4_args.pid = args->pid; 1008 wait4_args.status = args->status; 1009 wait4_args.options = args->options; 1010 wait4_args.rusage = NULL; 1011 1012 return (linux_wait4(td, &wait4_args)); 1013 } 1014 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1015 1016 int 1017 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1018 { 1019 int error, options; 1020 struct rusage ru, *rup; 1021 1022 #ifdef DEBUG 1023 if (ldebug(wait4)) 1024 printf(ARGS(wait4, "%d, %p, %d, %p"), 1025 args->pid, (void *)args->status, args->options, 1026 (void *)args->rusage); 1027 #endif 1028 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1029 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1030 return (EINVAL); 1031 1032 options = WEXITED; 1033 linux_to_bsd_waitopts(args->options, &options); 1034 1035 if (args->rusage != NULL) 1036 rup = &ru; 1037 else 1038 rup = NULL; 1039 error = linux_common_wait(td, args->pid, args->status, options, rup); 1040 if (error != 0) 1041 return (error); 1042 if (args->rusage != NULL) 1043 error = linux_copyout_rusage(&ru, args->rusage); 1044 return (error); 1045 } 1046 1047 int 1048 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1049 { 1050 int status, options, sig; 1051 struct __wrusage wru; 1052 siginfo_t siginfo; 1053 l_siginfo_t lsi; 1054 idtype_t idtype; 1055 struct proc *p; 1056 int error; 1057 1058 options = 0; 1059 linux_to_bsd_waitopts(args->options, &options); 1060 1061 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1062 return (EINVAL); 1063 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1064 return (EINVAL); 1065 1066 switch (args->idtype) { 1067 case LINUX_P_ALL: 1068 idtype = P_ALL; 1069 break; 1070 case LINUX_P_PID: 1071 if (args->id <= 0) 1072 return (EINVAL); 1073 idtype = P_PID; 1074 break; 1075 case LINUX_P_PGID: 1076 if (args->id <= 0) 1077 return (EINVAL); 1078 idtype = P_PGID; 1079 break; 1080 default: 1081 return (EINVAL); 1082 } 1083 1084 error = kern_wait6(td, idtype, args->id, &status, options, 1085 &wru, &siginfo); 1086 if (error != 0) 1087 return (error); 1088 if (args->rusage != NULL) { 1089 error = linux_copyout_rusage(&wru.wru_children, 1090 args->rusage); 1091 if (error != 0) 1092 return (error); 1093 } 1094 if (args->info != NULL) { 1095 p = td->td_proc; 1096 if (td->td_retval[0] == 0) 1097 bzero(&lsi, sizeof(lsi)); 1098 else { 1099 sig = bsd_to_linux_signal(siginfo.si_signo); 1100 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1101 } 1102 error = copyout(&lsi, args->info, sizeof(lsi)); 1103 } 1104 td->td_retval[0] = 0; 1105 1106 return (error); 1107 } 1108 1109 #ifdef LINUX_LEGACY_SYSCALLS 1110 int 1111 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1112 { 1113 char *path; 1114 int error; 1115 1116 LCONVPATHCREAT(td, args->path, &path); 1117 1118 #ifdef DEBUG 1119 if (ldebug(mknod)) 1120 printf(ARGS(mknod, "%s, %d, %ju"), path, args->mode, 1121 (uintmax_t)args->dev); 1122 #endif 1123 1124 switch (args->mode & S_IFMT) { 1125 case S_IFIFO: 1126 case S_IFSOCK: 1127 error = kern_mkfifoat(td, AT_FDCWD, path, UIO_SYSSPACE, 1128 args->mode); 1129 break; 1130 1131 case S_IFCHR: 1132 case S_IFBLK: 1133 error = kern_mknodat(td, AT_FDCWD, path, UIO_SYSSPACE, 1134 args->mode, args->dev); 1135 break; 1136 1137 case S_IFDIR: 1138 error = EPERM; 1139 break; 1140 1141 case 0: 1142 args->mode |= S_IFREG; 1143 /* FALLTHROUGH */ 1144 case S_IFREG: 1145 error = kern_openat(td, AT_FDCWD, path, UIO_SYSSPACE, 1146 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1147 if (error == 0) 1148 kern_close(td, td->td_retval[0]); 1149 break; 1150 1151 default: 1152 error = EINVAL; 1153 break; 1154 } 1155 LFREEPATH(path); 1156 return (error); 1157 } 1158 #endif 1159 1160 int 1161 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1162 { 1163 char *path; 1164 int error, dfd; 1165 1166 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1167 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1168 1169 #ifdef DEBUG 1170 if (ldebug(mknodat)) 1171 printf(ARGS(mknodat, "%s, %d, %d"), path, args->mode, args->dev); 1172 #endif 1173 1174 switch (args->mode & S_IFMT) { 1175 case S_IFIFO: 1176 case S_IFSOCK: 1177 error = kern_mkfifoat(td, dfd, path, UIO_SYSSPACE, args->mode); 1178 break; 1179 1180 case S_IFCHR: 1181 case S_IFBLK: 1182 error = kern_mknodat(td, dfd, path, UIO_SYSSPACE, args->mode, 1183 args->dev); 1184 break; 1185 1186 case S_IFDIR: 1187 error = EPERM; 1188 break; 1189 1190 case 0: 1191 args->mode |= S_IFREG; 1192 /* FALLTHROUGH */ 1193 case S_IFREG: 1194 error = kern_openat(td, dfd, path, UIO_SYSSPACE, 1195 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1196 if (error == 0) 1197 kern_close(td, td->td_retval[0]); 1198 break; 1199 1200 default: 1201 error = EINVAL; 1202 break; 1203 } 1204 LFREEPATH(path); 1205 return (error); 1206 } 1207 1208 /* 1209 * UGH! This is just about the dumbest idea I've ever heard!! 1210 */ 1211 int 1212 linux_personality(struct thread *td, struct linux_personality_args *args) 1213 { 1214 struct linux_pemuldata *pem; 1215 struct proc *p = td->td_proc; 1216 uint32_t old; 1217 1218 #ifdef DEBUG 1219 if (ldebug(personality)) 1220 printf(ARGS(personality, "%u"), args->per); 1221 #endif 1222 1223 PROC_LOCK(p); 1224 pem = pem_find(p); 1225 old = pem->persona; 1226 if (args->per != 0xffffffff) 1227 pem->persona = args->per; 1228 PROC_UNLOCK(p); 1229 1230 td->td_retval[0] = old; 1231 return (0); 1232 } 1233 1234 struct l_itimerval { 1235 l_timeval it_interval; 1236 l_timeval it_value; 1237 }; 1238 1239 #define B2L_ITIMERVAL(bip, lip) \ 1240 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1241 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1242 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1243 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1244 1245 int 1246 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1247 { 1248 int error; 1249 struct l_itimerval ls; 1250 struct itimerval aitv, oitv; 1251 1252 #ifdef DEBUG 1253 if (ldebug(setitimer)) 1254 printf(ARGS(setitimer, "%p, %p"), 1255 (void *)uap->itv, (void *)uap->oitv); 1256 #endif 1257 1258 if (uap->itv == NULL) { 1259 uap->itv = uap->oitv; 1260 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1261 } 1262 1263 error = copyin(uap->itv, &ls, sizeof(ls)); 1264 if (error != 0) 1265 return (error); 1266 B2L_ITIMERVAL(&aitv, &ls); 1267 #ifdef DEBUG 1268 if (ldebug(setitimer)) { 1269 printf("setitimer: value: sec: %jd, usec: %ld\n", 1270 (intmax_t)aitv.it_value.tv_sec, aitv.it_value.tv_usec); 1271 printf("setitimer: interval: sec: %jd, usec: %ld\n", 1272 (intmax_t)aitv.it_interval.tv_sec, aitv.it_interval.tv_usec); 1273 } 1274 #endif 1275 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1276 if (error != 0 || uap->oitv == NULL) 1277 return (error); 1278 B2L_ITIMERVAL(&ls, &oitv); 1279 1280 return (copyout(&ls, uap->oitv, sizeof(ls))); 1281 } 1282 1283 int 1284 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1285 { 1286 int error; 1287 struct l_itimerval ls; 1288 struct itimerval aitv; 1289 1290 #ifdef DEBUG 1291 if (ldebug(getitimer)) 1292 printf(ARGS(getitimer, "%p"), (void *)uap->itv); 1293 #endif 1294 error = kern_getitimer(td, uap->which, &aitv); 1295 if (error != 0) 1296 return (error); 1297 B2L_ITIMERVAL(&ls, &aitv); 1298 return (copyout(&ls, uap->itv, sizeof(ls))); 1299 } 1300 1301 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1302 int 1303 linux_nice(struct thread *td, struct linux_nice_args *args) 1304 { 1305 struct setpriority_args bsd_args; 1306 1307 bsd_args.which = PRIO_PROCESS; 1308 bsd_args.who = 0; /* current process */ 1309 bsd_args.prio = args->inc; 1310 return (sys_setpriority(td, &bsd_args)); 1311 } 1312 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1313 1314 int 1315 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1316 { 1317 struct ucred *newcred, *oldcred; 1318 l_gid_t *linux_gidset; 1319 gid_t *bsd_gidset; 1320 int ngrp, error; 1321 struct proc *p; 1322 1323 ngrp = args->gidsetsize; 1324 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1325 return (EINVAL); 1326 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1327 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1328 if (error) 1329 goto out; 1330 newcred = crget(); 1331 crextend(newcred, ngrp + 1); 1332 p = td->td_proc; 1333 PROC_LOCK(p); 1334 oldcred = p->p_ucred; 1335 crcopy(newcred, oldcred); 1336 1337 /* 1338 * cr_groups[0] holds egid. Setting the whole set from 1339 * the supplied set will cause egid to be changed too. 1340 * Keep cr_groups[0] unchanged to prevent that. 1341 */ 1342 1343 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS, 0)) != 0) { 1344 PROC_UNLOCK(p); 1345 crfree(newcred); 1346 goto out; 1347 } 1348 1349 if (ngrp > 0) { 1350 newcred->cr_ngroups = ngrp + 1; 1351 1352 bsd_gidset = newcred->cr_groups; 1353 ngrp--; 1354 while (ngrp >= 0) { 1355 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1356 ngrp--; 1357 } 1358 } else 1359 newcred->cr_ngroups = 1; 1360 1361 setsugid(p); 1362 proc_set_cred(p, newcred); 1363 PROC_UNLOCK(p); 1364 crfree(oldcred); 1365 error = 0; 1366 out: 1367 free(linux_gidset, M_LINUX); 1368 return (error); 1369 } 1370 1371 int 1372 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1373 { 1374 struct ucred *cred; 1375 l_gid_t *linux_gidset; 1376 gid_t *bsd_gidset; 1377 int bsd_gidsetsz, ngrp, error; 1378 1379 cred = td->td_ucred; 1380 bsd_gidset = cred->cr_groups; 1381 bsd_gidsetsz = cred->cr_ngroups - 1; 1382 1383 /* 1384 * cr_groups[0] holds egid. Returning the whole set 1385 * here will cause a duplicate. Exclude cr_groups[0] 1386 * to prevent that. 1387 */ 1388 1389 if ((ngrp = args->gidsetsize) == 0) { 1390 td->td_retval[0] = bsd_gidsetsz; 1391 return (0); 1392 } 1393 1394 if (ngrp < bsd_gidsetsz) 1395 return (EINVAL); 1396 1397 ngrp = 0; 1398 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1399 M_LINUX, M_WAITOK); 1400 while (ngrp < bsd_gidsetsz) { 1401 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1402 ngrp++; 1403 } 1404 1405 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1406 free(linux_gidset, M_LINUX); 1407 if (error) 1408 return (error); 1409 1410 td->td_retval[0] = ngrp; 1411 return (0); 1412 } 1413 1414 int 1415 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1416 { 1417 struct rlimit bsd_rlim; 1418 struct l_rlimit rlim; 1419 u_int which; 1420 int error; 1421 1422 #ifdef DEBUG 1423 if (ldebug(setrlimit)) 1424 printf(ARGS(setrlimit, "%d, %p"), 1425 args->resource, (void *)args->rlim); 1426 #endif 1427 1428 if (args->resource >= LINUX_RLIM_NLIMITS) 1429 return (EINVAL); 1430 1431 which = linux_to_bsd_resource[args->resource]; 1432 if (which == -1) 1433 return (EINVAL); 1434 1435 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1436 if (error) 1437 return (error); 1438 1439 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1440 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1441 return (kern_setrlimit(td, which, &bsd_rlim)); 1442 } 1443 1444 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1445 int 1446 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1447 { 1448 struct l_rlimit rlim; 1449 struct rlimit bsd_rlim; 1450 u_int which; 1451 1452 #ifdef DEBUG 1453 if (ldebug(old_getrlimit)) 1454 printf(ARGS(old_getrlimit, "%d, %p"), 1455 args->resource, (void *)args->rlim); 1456 #endif 1457 1458 if (args->resource >= LINUX_RLIM_NLIMITS) 1459 return (EINVAL); 1460 1461 which = linux_to_bsd_resource[args->resource]; 1462 if (which == -1) 1463 return (EINVAL); 1464 1465 lim_rlimit(td, which, &bsd_rlim); 1466 1467 #ifdef COMPAT_LINUX32 1468 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1469 if (rlim.rlim_cur == UINT_MAX) 1470 rlim.rlim_cur = INT_MAX; 1471 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1472 if (rlim.rlim_max == UINT_MAX) 1473 rlim.rlim_max = INT_MAX; 1474 #else 1475 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1476 if (rlim.rlim_cur == ULONG_MAX) 1477 rlim.rlim_cur = LONG_MAX; 1478 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1479 if (rlim.rlim_max == ULONG_MAX) 1480 rlim.rlim_max = LONG_MAX; 1481 #endif 1482 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1483 } 1484 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1485 1486 int 1487 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1488 { 1489 struct l_rlimit rlim; 1490 struct rlimit bsd_rlim; 1491 u_int which; 1492 1493 #ifdef DEBUG 1494 if (ldebug(getrlimit)) 1495 printf(ARGS(getrlimit, "%d, %p"), 1496 args->resource, (void *)args->rlim); 1497 #endif 1498 1499 if (args->resource >= LINUX_RLIM_NLIMITS) 1500 return (EINVAL); 1501 1502 which = linux_to_bsd_resource[args->resource]; 1503 if (which == -1) 1504 return (EINVAL); 1505 1506 lim_rlimit(td, which, &bsd_rlim); 1507 1508 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1509 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1510 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1511 } 1512 1513 int 1514 linux_sched_setscheduler(struct thread *td, 1515 struct linux_sched_setscheduler_args *args) 1516 { 1517 struct sched_param sched_param; 1518 struct thread *tdt; 1519 int error, policy; 1520 1521 #ifdef DEBUG 1522 if (ldebug(sched_setscheduler)) 1523 printf(ARGS(sched_setscheduler, "%d, %d, %p"), 1524 args->pid, args->policy, (const void *)args->param); 1525 #endif 1526 1527 switch (args->policy) { 1528 case LINUX_SCHED_OTHER: 1529 policy = SCHED_OTHER; 1530 break; 1531 case LINUX_SCHED_FIFO: 1532 policy = SCHED_FIFO; 1533 break; 1534 case LINUX_SCHED_RR: 1535 policy = SCHED_RR; 1536 break; 1537 default: 1538 return (EINVAL); 1539 } 1540 1541 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1542 if (error) 1543 return (error); 1544 1545 tdt = linux_tdfind(td, args->pid, -1); 1546 if (tdt == NULL) 1547 return (ESRCH); 1548 1549 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1550 PROC_UNLOCK(tdt->td_proc); 1551 return (error); 1552 } 1553 1554 int 1555 linux_sched_getscheduler(struct thread *td, 1556 struct linux_sched_getscheduler_args *args) 1557 { 1558 struct thread *tdt; 1559 int error, policy; 1560 1561 #ifdef DEBUG 1562 if (ldebug(sched_getscheduler)) 1563 printf(ARGS(sched_getscheduler, "%d"), args->pid); 1564 #endif 1565 1566 tdt = linux_tdfind(td, args->pid, -1); 1567 if (tdt == NULL) 1568 return (ESRCH); 1569 1570 error = kern_sched_getscheduler(td, tdt, &policy); 1571 PROC_UNLOCK(tdt->td_proc); 1572 1573 switch (policy) { 1574 case SCHED_OTHER: 1575 td->td_retval[0] = LINUX_SCHED_OTHER; 1576 break; 1577 case SCHED_FIFO: 1578 td->td_retval[0] = LINUX_SCHED_FIFO; 1579 break; 1580 case SCHED_RR: 1581 td->td_retval[0] = LINUX_SCHED_RR; 1582 break; 1583 } 1584 return (error); 1585 } 1586 1587 int 1588 linux_sched_get_priority_max(struct thread *td, 1589 struct linux_sched_get_priority_max_args *args) 1590 { 1591 struct sched_get_priority_max_args bsd; 1592 1593 #ifdef DEBUG 1594 if (ldebug(sched_get_priority_max)) 1595 printf(ARGS(sched_get_priority_max, "%d"), args->policy); 1596 #endif 1597 1598 switch (args->policy) { 1599 case LINUX_SCHED_OTHER: 1600 bsd.policy = SCHED_OTHER; 1601 break; 1602 case LINUX_SCHED_FIFO: 1603 bsd.policy = SCHED_FIFO; 1604 break; 1605 case LINUX_SCHED_RR: 1606 bsd.policy = SCHED_RR; 1607 break; 1608 default: 1609 return (EINVAL); 1610 } 1611 return (sys_sched_get_priority_max(td, &bsd)); 1612 } 1613 1614 int 1615 linux_sched_get_priority_min(struct thread *td, 1616 struct linux_sched_get_priority_min_args *args) 1617 { 1618 struct sched_get_priority_min_args bsd; 1619 1620 #ifdef DEBUG 1621 if (ldebug(sched_get_priority_min)) 1622 printf(ARGS(sched_get_priority_min, "%d"), args->policy); 1623 #endif 1624 1625 switch (args->policy) { 1626 case LINUX_SCHED_OTHER: 1627 bsd.policy = SCHED_OTHER; 1628 break; 1629 case LINUX_SCHED_FIFO: 1630 bsd.policy = SCHED_FIFO; 1631 break; 1632 case LINUX_SCHED_RR: 1633 bsd.policy = SCHED_RR; 1634 break; 1635 default: 1636 return (EINVAL); 1637 } 1638 return (sys_sched_get_priority_min(td, &bsd)); 1639 } 1640 1641 #define REBOOT_CAD_ON 0x89abcdef 1642 #define REBOOT_CAD_OFF 0 1643 #define REBOOT_HALT 0xcdef0123 1644 #define REBOOT_RESTART 0x01234567 1645 #define REBOOT_RESTART2 0xA1B2C3D4 1646 #define REBOOT_POWEROFF 0x4321FEDC 1647 #define REBOOT_MAGIC1 0xfee1dead 1648 #define REBOOT_MAGIC2 0x28121969 1649 #define REBOOT_MAGIC2A 0x05121996 1650 #define REBOOT_MAGIC2B 0x16041998 1651 1652 int 1653 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1654 { 1655 struct reboot_args bsd_args; 1656 1657 #ifdef DEBUG 1658 if (ldebug(reboot)) 1659 printf(ARGS(reboot, "0x%x"), args->cmd); 1660 #endif 1661 1662 if (args->magic1 != REBOOT_MAGIC1) 1663 return (EINVAL); 1664 1665 switch (args->magic2) { 1666 case REBOOT_MAGIC2: 1667 case REBOOT_MAGIC2A: 1668 case REBOOT_MAGIC2B: 1669 break; 1670 default: 1671 return (EINVAL); 1672 } 1673 1674 switch (args->cmd) { 1675 case REBOOT_CAD_ON: 1676 case REBOOT_CAD_OFF: 1677 return (priv_check(td, PRIV_REBOOT)); 1678 case REBOOT_HALT: 1679 bsd_args.opt = RB_HALT; 1680 break; 1681 case REBOOT_RESTART: 1682 case REBOOT_RESTART2: 1683 bsd_args.opt = 0; 1684 break; 1685 case REBOOT_POWEROFF: 1686 bsd_args.opt = RB_POWEROFF; 1687 break; 1688 default: 1689 return (EINVAL); 1690 } 1691 return (sys_reboot(td, &bsd_args)); 1692 } 1693 1694 1695 /* 1696 * The FreeBSD native getpid(2), getgid(2) and getuid(2) also modify 1697 * td->td_retval[1] when COMPAT_43 is defined. This clobbers registers that 1698 * are assumed to be preserved. The following lightweight syscalls fixes 1699 * this. See also linux_getgid16() and linux_getuid16() in linux_uid16.c 1700 * 1701 * linux_getpid() - MP SAFE 1702 * linux_getgid() - MP SAFE 1703 * linux_getuid() - MP SAFE 1704 */ 1705 1706 int 1707 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1708 { 1709 1710 #ifdef DEBUG 1711 if (ldebug(getpid)) 1712 printf(ARGS(getpid, "")); 1713 #endif 1714 td->td_retval[0] = td->td_proc->p_pid; 1715 1716 return (0); 1717 } 1718 1719 int 1720 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1721 { 1722 struct linux_emuldata *em; 1723 1724 #ifdef DEBUG 1725 if (ldebug(gettid)) 1726 printf(ARGS(gettid, "")); 1727 #endif 1728 1729 em = em_find(td); 1730 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1731 1732 td->td_retval[0] = em->em_tid; 1733 1734 return (0); 1735 } 1736 1737 1738 int 1739 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1740 { 1741 1742 #ifdef DEBUG 1743 if (ldebug(getppid)) 1744 printf(ARGS(getppid, "")); 1745 #endif 1746 1747 td->td_retval[0] = kern_getppid(td); 1748 return (0); 1749 } 1750 1751 int 1752 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1753 { 1754 1755 #ifdef DEBUG 1756 if (ldebug(getgid)) 1757 printf(ARGS(getgid, "")); 1758 #endif 1759 1760 td->td_retval[0] = td->td_ucred->cr_rgid; 1761 return (0); 1762 } 1763 1764 int 1765 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1766 { 1767 1768 #ifdef DEBUG 1769 if (ldebug(getuid)) 1770 printf(ARGS(getuid, "")); 1771 #endif 1772 1773 td->td_retval[0] = td->td_ucred->cr_ruid; 1774 return (0); 1775 } 1776 1777 1778 int 1779 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1780 { 1781 struct getsid_args bsd; 1782 1783 #ifdef DEBUG 1784 if (ldebug(getsid)) 1785 printf(ARGS(getsid, "%i"), args->pid); 1786 #endif 1787 1788 bsd.pid = args->pid; 1789 return (sys_getsid(td, &bsd)); 1790 } 1791 1792 int 1793 linux_nosys(struct thread *td, struct nosys_args *ignore) 1794 { 1795 1796 return (ENOSYS); 1797 } 1798 1799 int 1800 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1801 { 1802 struct getpriority_args bsd_args; 1803 int error; 1804 1805 #ifdef DEBUG 1806 if (ldebug(getpriority)) 1807 printf(ARGS(getpriority, "%i, %i"), args->which, args->who); 1808 #endif 1809 1810 bsd_args.which = args->which; 1811 bsd_args.who = args->who; 1812 error = sys_getpriority(td, &bsd_args); 1813 td->td_retval[0] = 20 - td->td_retval[0]; 1814 return (error); 1815 } 1816 1817 int 1818 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1819 { 1820 int name[2]; 1821 1822 #ifdef DEBUG 1823 if (ldebug(sethostname)) 1824 printf(ARGS(sethostname, "*, %i"), args->len); 1825 #endif 1826 1827 name[0] = CTL_KERN; 1828 name[1] = KERN_HOSTNAME; 1829 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1830 args->len, 0, 0)); 1831 } 1832 1833 int 1834 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1835 { 1836 int name[2]; 1837 1838 #ifdef DEBUG 1839 if (ldebug(setdomainname)) 1840 printf(ARGS(setdomainname, "*, %i"), args->len); 1841 #endif 1842 1843 name[0] = CTL_KERN; 1844 name[1] = KERN_NISDOMAINNAME; 1845 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1846 args->len, 0, 0)); 1847 } 1848 1849 int 1850 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1851 { 1852 1853 #ifdef DEBUG 1854 if (ldebug(exit_group)) 1855 printf(ARGS(exit_group, "%i"), args->error_code); 1856 #endif 1857 1858 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1859 args->error_code); 1860 1861 /* 1862 * XXX: we should send a signal to the parent if 1863 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1864 * as it doesnt occur often. 1865 */ 1866 exit1(td, args->error_code, 0); 1867 /* NOTREACHED */ 1868 } 1869 1870 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1871 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1872 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1873 1874 struct l_user_cap_header { 1875 l_int version; 1876 l_int pid; 1877 }; 1878 1879 struct l_user_cap_data { 1880 l_int effective; 1881 l_int permitted; 1882 l_int inheritable; 1883 }; 1884 1885 int 1886 linux_capget(struct thread *td, struct linux_capget_args *uap) 1887 { 1888 struct l_user_cap_header luch; 1889 struct l_user_cap_data lucd[2]; 1890 int error, u32s; 1891 1892 if (uap->hdrp == NULL) 1893 return (EFAULT); 1894 1895 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1896 if (error != 0) 1897 return (error); 1898 1899 switch (luch.version) { 1900 case _LINUX_CAPABILITY_VERSION_1: 1901 u32s = 1; 1902 break; 1903 case _LINUX_CAPABILITY_VERSION_2: 1904 case _LINUX_CAPABILITY_VERSION_3: 1905 u32s = 2; 1906 break; 1907 default: 1908 #ifdef DEBUG 1909 if (ldebug(capget)) 1910 printf(LMSG("invalid capget capability version 0x%x"), 1911 luch.version); 1912 #endif 1913 luch.version = _LINUX_CAPABILITY_VERSION_1; 1914 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1915 if (error) 1916 return (error); 1917 return (EINVAL); 1918 } 1919 1920 if (luch.pid) 1921 return (EPERM); 1922 1923 if (uap->datap) { 1924 /* 1925 * The current implementation doesn't support setting 1926 * a capability (it's essentially a stub) so indicate 1927 * that no capabilities are currently set or available 1928 * to request. 1929 */ 1930 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1931 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1932 } 1933 1934 return (error); 1935 } 1936 1937 int 1938 linux_capset(struct thread *td, struct linux_capset_args *uap) 1939 { 1940 struct l_user_cap_header luch; 1941 struct l_user_cap_data lucd[2]; 1942 int error, i, u32s; 1943 1944 if (uap->hdrp == NULL || uap->datap == NULL) 1945 return (EFAULT); 1946 1947 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1948 if (error != 0) 1949 return (error); 1950 1951 switch (luch.version) { 1952 case _LINUX_CAPABILITY_VERSION_1: 1953 u32s = 1; 1954 break; 1955 case _LINUX_CAPABILITY_VERSION_2: 1956 case _LINUX_CAPABILITY_VERSION_3: 1957 u32s = 2; 1958 break; 1959 default: 1960 #ifdef DEBUG 1961 if (ldebug(capset)) 1962 printf(LMSG("invalid capset capability version 0x%x"), 1963 luch.version); 1964 #endif 1965 luch.version = _LINUX_CAPABILITY_VERSION_1; 1966 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1967 if (error) 1968 return (error); 1969 return (EINVAL); 1970 } 1971 1972 if (luch.pid) 1973 return (EPERM); 1974 1975 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1976 if (error != 0) 1977 return (error); 1978 1979 /* We currently don't support setting any capabilities. */ 1980 for (i = 0; i < u32s; i++) { 1981 if (lucd[i].effective || lucd[i].permitted || 1982 lucd[i].inheritable) { 1983 linux_msg(td, 1984 "capset[%d] effective=0x%x, permitted=0x%x, " 1985 "inheritable=0x%x is not implemented", i, 1986 (int)lucd[i].effective, (int)lucd[i].permitted, 1987 (int)lucd[i].inheritable); 1988 return (EPERM); 1989 } 1990 } 1991 1992 return (0); 1993 } 1994 1995 int 1996 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1997 { 1998 int error = 0, max_size; 1999 struct proc *p = td->td_proc; 2000 char comm[LINUX_MAX_COMM_LEN]; 2001 struct linux_emuldata *em; 2002 int pdeath_signal; 2003 2004 #ifdef DEBUG 2005 if (ldebug(prctl)) 2006 printf(ARGS(prctl, "%d, %ju, %ju, %ju, %ju"), args->option, 2007 (uintmax_t)args->arg2, (uintmax_t)args->arg3, 2008 (uintmax_t)args->arg4, (uintmax_t)args->arg5); 2009 #endif 2010 2011 switch (args->option) { 2012 case LINUX_PR_SET_PDEATHSIG: 2013 if (!LINUX_SIG_VALID(args->arg2)) 2014 return (EINVAL); 2015 em = em_find(td); 2016 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 2017 em->pdeath_signal = args->arg2; 2018 break; 2019 case LINUX_PR_GET_PDEATHSIG: 2020 em = em_find(td); 2021 KASSERT(em != NULL, ("prctl: emuldata not found.\n")); 2022 pdeath_signal = em->pdeath_signal; 2023 error = copyout(&pdeath_signal, 2024 (void *)(register_t)args->arg2, 2025 sizeof(pdeath_signal)); 2026 break; 2027 case LINUX_PR_GET_KEEPCAPS: 2028 /* 2029 * Indicate that we always clear the effective and 2030 * permitted capability sets when the user id becomes 2031 * non-zero (actually the capability sets are simply 2032 * always zero in the current implementation). 2033 */ 2034 td->td_retval[0] = 0; 2035 break; 2036 case LINUX_PR_SET_KEEPCAPS: 2037 /* 2038 * Ignore requests to keep the effective and permitted 2039 * capability sets when the user id becomes non-zero. 2040 */ 2041 break; 2042 case LINUX_PR_SET_NAME: 2043 /* 2044 * To be on the safe side we need to make sure to not 2045 * overflow the size a Linux program expects. We already 2046 * do this here in the copyin, so that we don't need to 2047 * check on copyout. 2048 */ 2049 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2050 error = copyinstr((void *)(register_t)args->arg2, comm, 2051 max_size, NULL); 2052 2053 /* Linux silently truncates the name if it is too long. */ 2054 if (error == ENAMETOOLONG) { 2055 /* 2056 * XXX: copyinstr() isn't documented to populate the 2057 * array completely, so do a copyin() to be on the 2058 * safe side. This should be changed in case 2059 * copyinstr() is changed to guarantee this. 2060 */ 2061 error = copyin((void *)(register_t)args->arg2, comm, 2062 max_size - 1); 2063 comm[max_size - 1] = '\0'; 2064 } 2065 if (error) 2066 return (error); 2067 2068 PROC_LOCK(p); 2069 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2070 PROC_UNLOCK(p); 2071 break; 2072 case LINUX_PR_GET_NAME: 2073 PROC_LOCK(p); 2074 strlcpy(comm, p->p_comm, sizeof(comm)); 2075 PROC_UNLOCK(p); 2076 error = copyout(comm, (void *)(register_t)args->arg2, 2077 strlen(comm) + 1); 2078 break; 2079 default: 2080 error = EINVAL; 2081 break; 2082 } 2083 2084 return (error); 2085 } 2086 2087 int 2088 linux_sched_setparam(struct thread *td, 2089 struct linux_sched_setparam_args *uap) 2090 { 2091 struct sched_param sched_param; 2092 struct thread *tdt; 2093 int error; 2094 2095 #ifdef DEBUG 2096 if (ldebug(sched_setparam)) 2097 printf(ARGS(sched_setparam, "%d, *"), uap->pid); 2098 #endif 2099 2100 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2101 if (error) 2102 return (error); 2103 2104 tdt = linux_tdfind(td, uap->pid, -1); 2105 if (tdt == NULL) 2106 return (ESRCH); 2107 2108 error = kern_sched_setparam(td, tdt, &sched_param); 2109 PROC_UNLOCK(tdt->td_proc); 2110 return (error); 2111 } 2112 2113 int 2114 linux_sched_getparam(struct thread *td, 2115 struct linux_sched_getparam_args *uap) 2116 { 2117 struct sched_param sched_param; 2118 struct thread *tdt; 2119 int error; 2120 2121 #ifdef DEBUG 2122 if (ldebug(sched_getparam)) 2123 printf(ARGS(sched_getparam, "%d, *"), uap->pid); 2124 #endif 2125 2126 tdt = linux_tdfind(td, uap->pid, -1); 2127 if (tdt == NULL) 2128 return (ESRCH); 2129 2130 error = kern_sched_getparam(td, tdt, &sched_param); 2131 PROC_UNLOCK(tdt->td_proc); 2132 if (error == 0) 2133 error = copyout(&sched_param, uap->param, 2134 sizeof(sched_param)); 2135 return (error); 2136 } 2137 2138 /* 2139 * Get affinity of a process. 2140 */ 2141 int 2142 linux_sched_getaffinity(struct thread *td, 2143 struct linux_sched_getaffinity_args *args) 2144 { 2145 int error; 2146 struct thread *tdt; 2147 2148 #ifdef DEBUG 2149 if (ldebug(sched_getaffinity)) 2150 printf(ARGS(sched_getaffinity, "%d, %d, *"), args->pid, 2151 args->len); 2152 #endif 2153 if (args->len < sizeof(cpuset_t)) 2154 return (EINVAL); 2155 2156 tdt = linux_tdfind(td, args->pid, -1); 2157 if (tdt == NULL) 2158 return (ESRCH); 2159 2160 PROC_UNLOCK(tdt->td_proc); 2161 2162 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2163 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2164 if (error == 0) 2165 td->td_retval[0] = sizeof(cpuset_t); 2166 2167 return (error); 2168 } 2169 2170 /* 2171 * Set affinity of a process. 2172 */ 2173 int 2174 linux_sched_setaffinity(struct thread *td, 2175 struct linux_sched_setaffinity_args *args) 2176 { 2177 struct thread *tdt; 2178 2179 #ifdef DEBUG 2180 if (ldebug(sched_setaffinity)) 2181 printf(ARGS(sched_setaffinity, "%d, %d, *"), args->pid, 2182 args->len); 2183 #endif 2184 if (args->len < sizeof(cpuset_t)) 2185 return (EINVAL); 2186 2187 tdt = linux_tdfind(td, args->pid, -1); 2188 if (tdt == NULL) 2189 return (ESRCH); 2190 2191 PROC_UNLOCK(tdt->td_proc); 2192 2193 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2194 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2195 } 2196 2197 struct linux_rlimit64 { 2198 uint64_t rlim_cur; 2199 uint64_t rlim_max; 2200 }; 2201 2202 int 2203 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2204 { 2205 struct rlimit rlim, nrlim; 2206 struct linux_rlimit64 lrlim; 2207 struct proc *p; 2208 u_int which; 2209 int flags; 2210 int error; 2211 2212 #ifdef DEBUG 2213 if (ldebug(prlimit64)) 2214 printf(ARGS(prlimit64, "%d, %d, %p, %p"), args->pid, 2215 args->resource, (void *)args->new, (void *)args->old); 2216 #endif 2217 2218 if (args->resource >= LINUX_RLIM_NLIMITS) 2219 return (EINVAL); 2220 2221 which = linux_to_bsd_resource[args->resource]; 2222 if (which == -1) 2223 return (EINVAL); 2224 2225 if (args->new != NULL) { 2226 /* 2227 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2228 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2229 * as INFINITY so we do not need a conversion even. 2230 */ 2231 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2232 if (error != 0) 2233 return (error); 2234 } 2235 2236 flags = PGET_HOLD | PGET_NOTWEXIT; 2237 if (args->new != NULL) 2238 flags |= PGET_CANDEBUG; 2239 else 2240 flags |= PGET_CANSEE; 2241 error = pget(args->pid, flags, &p); 2242 if (error != 0) 2243 return (error); 2244 2245 if (args->old != NULL) { 2246 PROC_LOCK(p); 2247 lim_rlimit_proc(p, which, &rlim); 2248 PROC_UNLOCK(p); 2249 if (rlim.rlim_cur == RLIM_INFINITY) 2250 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2251 else 2252 lrlim.rlim_cur = rlim.rlim_cur; 2253 if (rlim.rlim_max == RLIM_INFINITY) 2254 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2255 else 2256 lrlim.rlim_max = rlim.rlim_max; 2257 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2258 if (error != 0) 2259 goto out; 2260 } 2261 2262 if (args->new != NULL) 2263 error = kern_proc_setrlimit(td, p, which, &nrlim); 2264 2265 out: 2266 PRELE(p); 2267 return (error); 2268 } 2269 2270 int 2271 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2272 { 2273 struct timeval utv, tv0, tv1, *tvp; 2274 struct l_pselect6arg lpse6; 2275 struct l_timespec lts; 2276 struct timespec uts; 2277 l_sigset_t l_ss; 2278 sigset_t *ssp; 2279 sigset_t ss; 2280 int error; 2281 2282 ssp = NULL; 2283 if (args->sig != NULL) { 2284 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2285 if (error != 0) 2286 return (error); 2287 if (lpse6.ss_len != sizeof(l_ss)) 2288 return (EINVAL); 2289 if (lpse6.ss != 0) { 2290 error = copyin(PTRIN(lpse6.ss), &l_ss, 2291 sizeof(l_ss)); 2292 if (error != 0) 2293 return (error); 2294 linux_to_bsd_sigset(&l_ss, &ss); 2295 ssp = &ss; 2296 } 2297 } 2298 2299 /* 2300 * Currently glibc changes nanosecond number to microsecond. 2301 * This mean losing precision but for now it is hardly seen. 2302 */ 2303 if (args->tsp != NULL) { 2304 error = copyin(args->tsp, <s, sizeof(lts)); 2305 if (error != 0) 2306 return (error); 2307 error = linux_to_native_timespec(&uts, <s); 2308 if (error != 0) 2309 return (error); 2310 2311 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2312 if (itimerfix(&utv)) 2313 return (EINVAL); 2314 2315 microtime(&tv0); 2316 tvp = &utv; 2317 } else 2318 tvp = NULL; 2319 2320 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2321 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2322 2323 if (error == 0 && args->tsp != NULL) { 2324 if (td->td_retval[0] != 0) { 2325 /* 2326 * Compute how much time was left of the timeout, 2327 * by subtracting the current time and the time 2328 * before we started the call, and subtracting 2329 * that result from the user-supplied value. 2330 */ 2331 2332 microtime(&tv1); 2333 timevalsub(&tv1, &tv0); 2334 timevalsub(&utv, &tv1); 2335 if (utv.tv_sec < 0) 2336 timevalclear(&utv); 2337 } else 2338 timevalclear(&utv); 2339 2340 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2341 2342 error = native_to_linux_timespec(<s, &uts); 2343 if (error == 0) 2344 error = copyout(<s, args->tsp, sizeof(lts)); 2345 } 2346 2347 return (error); 2348 } 2349 2350 int 2351 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2352 { 2353 struct timespec ts0, ts1; 2354 struct l_timespec lts; 2355 struct timespec uts, *tsp; 2356 l_sigset_t l_ss; 2357 sigset_t *ssp; 2358 sigset_t ss; 2359 int error; 2360 2361 if (args->sset != NULL) { 2362 if (args->ssize != sizeof(l_ss)) 2363 return (EINVAL); 2364 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2365 if (error) 2366 return (error); 2367 linux_to_bsd_sigset(&l_ss, &ss); 2368 ssp = &ss; 2369 } else 2370 ssp = NULL; 2371 if (args->tsp != NULL) { 2372 error = copyin(args->tsp, <s, sizeof(lts)); 2373 if (error) 2374 return (error); 2375 error = linux_to_native_timespec(&uts, <s); 2376 if (error != 0) 2377 return (error); 2378 2379 nanotime(&ts0); 2380 tsp = &uts; 2381 } else 2382 tsp = NULL; 2383 2384 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2385 2386 if (error == 0 && args->tsp != NULL) { 2387 if (td->td_retval[0]) { 2388 nanotime(&ts1); 2389 timespecsub(&ts1, &ts0); 2390 timespecsub(&uts, &ts1); 2391 if (uts.tv_sec < 0) 2392 timespecclear(&uts); 2393 } else 2394 timespecclear(&uts); 2395 2396 error = native_to_linux_timespec(<s, &uts); 2397 if (error == 0) 2398 error = copyout(<s, args->tsp, sizeof(lts)); 2399 } 2400 2401 return (error); 2402 } 2403 2404 #if defined(DEBUG) || defined(KTR) 2405 /* XXX: can be removed when every ldebug(...) and KTR stuff are removed. */ 2406 2407 #ifdef COMPAT_LINUX32 2408 #define L_MAXSYSCALL LINUX32_SYS_MAXSYSCALL 2409 #else 2410 #define L_MAXSYSCALL LINUX_SYS_MAXSYSCALL 2411 #endif 2412 2413 u_char linux_debug_map[howmany(L_MAXSYSCALL, sizeof(u_char))]; 2414 2415 static int 2416 linux_debug(int syscall, int toggle, int global) 2417 { 2418 2419 if (global) { 2420 char c = toggle ? 0 : 0xff; 2421 2422 memset(linux_debug_map, c, sizeof(linux_debug_map)); 2423 return (0); 2424 } 2425 if (syscall < 0 || syscall >= L_MAXSYSCALL) 2426 return (EINVAL); 2427 if (toggle) 2428 clrbit(linux_debug_map, syscall); 2429 else 2430 setbit(linux_debug_map, syscall); 2431 return (0); 2432 } 2433 #undef L_MAXSYSCALL 2434 2435 /* 2436 * Usage: sysctl linux.debug=<syscall_nr>.<0/1> 2437 * 2438 * E.g.: sysctl linux.debug=21.0 2439 * 2440 * As a special case, syscall "all" will apply to all syscalls globally. 2441 */ 2442 #define LINUX_MAX_DEBUGSTR 16 2443 int 2444 linux_sysctl_debug(SYSCTL_HANDLER_ARGS) 2445 { 2446 char value[LINUX_MAX_DEBUGSTR], *p; 2447 int error, sysc, toggle; 2448 int global = 0; 2449 2450 value[0] = '\0'; 2451 error = sysctl_handle_string(oidp, value, LINUX_MAX_DEBUGSTR, req); 2452 if (error || req->newptr == NULL) 2453 return (error); 2454 for (p = value; *p != '\0' && *p != '.'; p++); 2455 if (*p == '\0') 2456 return (EINVAL); 2457 *p++ = '\0'; 2458 sysc = strtol(value, NULL, 0); 2459 toggle = strtol(p, NULL, 0); 2460 if (strcmp(value, "all") == 0) 2461 global = 1; 2462 error = linux_debug(sysc, toggle, global); 2463 return (error); 2464 } 2465 2466 #endif /* DEBUG || KTR */ 2467 2468 int 2469 linux_sched_rr_get_interval(struct thread *td, 2470 struct linux_sched_rr_get_interval_args *uap) 2471 { 2472 struct timespec ts; 2473 struct l_timespec lts; 2474 struct thread *tdt; 2475 int error; 2476 2477 /* 2478 * According to man in case the invalid pid specified 2479 * EINVAL should be returned. 2480 */ 2481 if (uap->pid < 0) 2482 return (EINVAL); 2483 2484 tdt = linux_tdfind(td, uap->pid, -1); 2485 if (tdt == NULL) 2486 return (ESRCH); 2487 2488 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2489 PROC_UNLOCK(tdt->td_proc); 2490 if (error != 0) 2491 return (error); 2492 error = native_to_linux_timespec(<s, &ts); 2493 if (error != 0) 2494 return (error); 2495 return (copyout(<s, uap->interval, sizeof(lts))); 2496 } 2497 2498 /* 2499 * In case when the Linux thread is the initial thread in 2500 * the thread group thread id is equal to the process id. 2501 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2502 */ 2503 struct thread * 2504 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2505 { 2506 struct linux_emuldata *em; 2507 struct thread *tdt; 2508 struct proc *p; 2509 2510 tdt = NULL; 2511 if (tid == 0 || tid == td->td_tid) { 2512 tdt = td; 2513 PROC_LOCK(tdt->td_proc); 2514 } else if (tid > PID_MAX) 2515 tdt = tdfind(tid, pid); 2516 else { 2517 /* 2518 * Initial thread where the tid equal to the pid. 2519 */ 2520 p = pfind(tid); 2521 if (p != NULL) { 2522 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2523 /* 2524 * p is not a Linuxulator process. 2525 */ 2526 PROC_UNLOCK(p); 2527 return (NULL); 2528 } 2529 FOREACH_THREAD_IN_PROC(p, tdt) { 2530 em = em_find(tdt); 2531 if (tid == em->em_tid) 2532 return (tdt); 2533 } 2534 PROC_UNLOCK(p); 2535 } 2536 return (NULL); 2537 } 2538 2539 return (tdt); 2540 } 2541 2542 void 2543 linux_to_bsd_waitopts(int options, int *bsdopts) 2544 { 2545 2546 if (options & LINUX_WNOHANG) 2547 *bsdopts |= WNOHANG; 2548 if (options & LINUX_WUNTRACED) 2549 *bsdopts |= WUNTRACED; 2550 if (options & LINUX_WEXITED) 2551 *bsdopts |= WEXITED; 2552 if (options & LINUX_WCONTINUED) 2553 *bsdopts |= WCONTINUED; 2554 if (options & LINUX_WNOWAIT) 2555 *bsdopts |= WNOWAIT; 2556 2557 if (options & __WCLONE) 2558 *bsdopts |= WLINUXCLONE; 2559 } 2560 2561 int 2562 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2563 { 2564 struct uio uio; 2565 struct iovec iov; 2566 int error; 2567 2568 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2569 return (EINVAL); 2570 if (args->count > INT_MAX) 2571 args->count = INT_MAX; 2572 2573 iov.iov_base = args->buf; 2574 iov.iov_len = args->count; 2575 2576 uio.uio_iov = &iov; 2577 uio.uio_iovcnt = 1; 2578 uio.uio_resid = iov.iov_len; 2579 uio.uio_segflg = UIO_USERSPACE; 2580 uio.uio_rw = UIO_READ; 2581 uio.uio_td = td; 2582 2583 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2584 if (error == 0) 2585 td->td_retval[0] = args->count - uio.uio_resid; 2586 return (error); 2587 } 2588 2589 int 2590 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2591 { 2592 2593 /* Needs to be page-aligned */ 2594 if (args->start & PAGE_MASK) 2595 return (EINVAL); 2596 return (kern_mincore(td, args->start, args->len, args->vec)); 2597 } 2598