1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 /** 103 * Special DTrace provider for the linuxulator. 104 * 105 * In this file we define the provider for the entire linuxulator. All 106 * modules (= files of the linuxulator) use it. 107 * 108 * We define a different name depending on the emulated bitsize, see 109 * ../../<ARCH>/linux{,32}/linux.h, e.g.: 110 * native bitsize = linuxulator 111 * amd64, 32bit emulation = linuxulator32 112 */ 113 LIN_SDT_PROVIDER_DEFINE(LINUX_DTRACE); 114 115 int stclohz; /* Statistics clock frequency */ 116 117 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 118 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 119 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 120 RLIMIT_MEMLOCK, RLIMIT_AS 121 }; 122 123 struct l_sysinfo { 124 l_long uptime; /* Seconds since boot */ 125 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 126 #define LINUX_SYSINFO_LOADS_SCALE 65536 127 l_ulong totalram; /* Total usable main memory size */ 128 l_ulong freeram; /* Available memory size */ 129 l_ulong sharedram; /* Amount of shared memory */ 130 l_ulong bufferram; /* Memory used by buffers */ 131 l_ulong totalswap; /* Total swap space size */ 132 l_ulong freeswap; /* swap space still available */ 133 l_ushort procs; /* Number of current processes */ 134 l_ushort pads; 135 l_ulong totalhigh; 136 l_ulong freehigh; 137 l_uint mem_unit; 138 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 139 }; 140 141 struct l_pselect6arg { 142 l_uintptr_t ss; 143 l_size_t ss_len; 144 }; 145 146 static int linux_utimensat_nsec_valid(l_long); 147 148 int 149 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 150 { 151 struct l_sysinfo sysinfo; 152 int i, j; 153 struct timespec ts; 154 155 bzero(&sysinfo, sizeof(sysinfo)); 156 getnanouptime(&ts); 157 if (ts.tv_nsec != 0) 158 ts.tv_sec++; 159 sysinfo.uptime = ts.tv_sec; 160 161 /* Use the information from the mib to get our load averages */ 162 for (i = 0; i < 3; i++) 163 sysinfo.loads[i] = averunnable.ldavg[i] * 164 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 165 166 sysinfo.totalram = physmem * PAGE_SIZE; 167 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 168 169 /* 170 * sharedram counts pages allocated to named, swap-backed objects such 171 * as shared memory segments and tmpfs files. There is no cheap way to 172 * compute this, so just leave the field unpopulated. Linux itself only 173 * started setting this field in the 3.x timeframe. 174 */ 175 sysinfo.sharedram = 0; 176 sysinfo.bufferram = 0; 177 178 swap_pager_status(&i, &j); 179 sysinfo.totalswap = i * PAGE_SIZE; 180 sysinfo.freeswap = (i - j) * PAGE_SIZE; 181 182 sysinfo.procs = nprocs; 183 184 /* 185 * Platforms supported by the emulation layer do not have a notion of 186 * high memory. 187 */ 188 sysinfo.totalhigh = 0; 189 sysinfo.freehigh = 0; 190 191 sysinfo.mem_unit = 1; 192 193 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 194 } 195 196 #ifdef LINUX_LEGACY_SYSCALLS 197 int 198 linux_alarm(struct thread *td, struct linux_alarm_args *args) 199 { 200 struct itimerval it, old_it; 201 u_int secs; 202 int error; 203 204 secs = args->secs; 205 /* 206 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 207 * to match kern_setitimer()'s limit to avoid error from it. 208 * 209 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 210 * platforms. 211 */ 212 if (secs > INT32_MAX / 2) 213 secs = INT32_MAX / 2; 214 215 it.it_value.tv_sec = secs; 216 it.it_value.tv_usec = 0; 217 timevalclear(&it.it_interval); 218 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 219 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 220 221 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 222 old_it.it_value.tv_usec >= 500000) 223 old_it.it_value.tv_sec++; 224 td->td_retval[0] = old_it.it_value.tv_sec; 225 return (0); 226 } 227 #endif 228 229 int 230 linux_brk(struct thread *td, struct linux_brk_args *args) 231 { 232 struct vmspace *vm = td->td_proc->p_vmspace; 233 uintptr_t new, old; 234 235 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 236 new = (uintptr_t)args->dsend; 237 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 238 td->td_retval[0] = (register_t)new; 239 else 240 td->td_retval[0] = (register_t)old; 241 242 return (0); 243 } 244 245 #if defined(__i386__) 246 /* XXX: what about amd64/linux32? */ 247 248 int 249 linux_uselib(struct thread *td, struct linux_uselib_args *args) 250 { 251 struct nameidata ni; 252 struct vnode *vp; 253 struct exec *a_out; 254 vm_map_t map; 255 vm_map_entry_t entry; 256 struct vattr attr; 257 vm_offset_t vmaddr; 258 unsigned long file_offset; 259 unsigned long bss_size; 260 char *library; 261 ssize_t aresid; 262 int error; 263 bool locked, opened, textset; 264 265 a_out = NULL; 266 vp = NULL; 267 locked = false; 268 textset = false; 269 opened = false; 270 271 if (!LUSECONVPATH(td)) { 272 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 273 UIO_USERSPACE, args->library, td); 274 error = namei(&ni); 275 } else { 276 LCONVPATHEXIST(td, args->library, &library); 277 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 278 UIO_SYSSPACE, library, td); 279 error = namei(&ni); 280 LFREEPATH(library); 281 } 282 if (error) 283 goto cleanup; 284 285 vp = ni.ni_vp; 286 NDFREE(&ni, NDF_ONLY_PNBUF); 287 288 /* 289 * From here on down, we have a locked vnode that must be unlocked. 290 * XXX: The code below largely duplicates exec_check_permissions(). 291 */ 292 locked = true; 293 294 /* Executable? */ 295 error = VOP_GETATTR(vp, &attr, td->td_ucred); 296 if (error) 297 goto cleanup; 298 299 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 300 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 301 /* EACCESS is what exec(2) returns. */ 302 error = ENOEXEC; 303 goto cleanup; 304 } 305 306 /* Sensible size? */ 307 if (attr.va_size == 0) { 308 error = ENOEXEC; 309 goto cleanup; 310 } 311 312 /* Can we access it? */ 313 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 314 if (error) 315 goto cleanup; 316 317 /* 318 * XXX: This should use vn_open() so that it is properly authorized, 319 * and to reduce code redundancy all over the place here. 320 * XXX: Not really, it duplicates far more of exec_check_permissions() 321 * than vn_open(). 322 */ 323 #ifdef MAC 324 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 325 if (error) 326 goto cleanup; 327 #endif 328 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 329 if (error) 330 goto cleanup; 331 opened = true; 332 333 /* Pull in executable header into exec_map */ 334 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 335 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 336 if (error) 337 goto cleanup; 338 339 /* Is it a Linux binary ? */ 340 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 341 error = ENOEXEC; 342 goto cleanup; 343 } 344 345 /* 346 * While we are here, we should REALLY do some more checks 347 */ 348 349 /* Set file/virtual offset based on a.out variant. */ 350 switch ((int)(a_out->a_magic & 0xffff)) { 351 case 0413: /* ZMAGIC */ 352 file_offset = 1024; 353 break; 354 case 0314: /* QMAGIC */ 355 file_offset = 0; 356 break; 357 default: 358 error = ENOEXEC; 359 goto cleanup; 360 } 361 362 bss_size = round_page(a_out->a_bss); 363 364 /* Check various fields in header for validity/bounds. */ 365 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 366 error = ENOEXEC; 367 goto cleanup; 368 } 369 370 /* text + data can't exceed file size */ 371 if (a_out->a_data + a_out->a_text > attr.va_size) { 372 error = EFAULT; 373 goto cleanup; 374 } 375 376 /* 377 * text/data/bss must not exceed limits 378 * XXX - this is not complete. it should check current usage PLUS 379 * the resources needed by this library. 380 */ 381 PROC_LOCK(td->td_proc); 382 if (a_out->a_text > maxtsiz || 383 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 384 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 385 bss_size) != 0) { 386 PROC_UNLOCK(td->td_proc); 387 error = ENOMEM; 388 goto cleanup; 389 } 390 PROC_UNLOCK(td->td_proc); 391 392 /* 393 * Prevent more writers. 394 */ 395 error = VOP_SET_TEXT(vp); 396 if (error != 0) 397 goto cleanup; 398 textset = true; 399 400 /* 401 * Lock no longer needed 402 */ 403 locked = false; 404 VOP_UNLOCK(vp); 405 406 /* 407 * Check if file_offset page aligned. Currently we cannot handle 408 * misalinged file offsets, and so we read in the entire image 409 * (what a waste). 410 */ 411 if (file_offset & PAGE_MASK) { 412 /* Map text+data read/write/execute */ 413 414 /* a_entry is the load address and is page aligned */ 415 vmaddr = trunc_page(a_out->a_entry); 416 417 /* get anon user mapping, read+write+execute */ 418 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 419 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 420 VM_PROT_ALL, VM_PROT_ALL, 0); 421 if (error) 422 goto cleanup; 423 424 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 425 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 426 td->td_ucred, NOCRED, &aresid, td); 427 if (error != 0) 428 goto cleanup; 429 if (aresid != 0) { 430 error = ENOEXEC; 431 goto cleanup; 432 } 433 } else { 434 /* 435 * for QMAGIC, a_entry is 20 bytes beyond the load address 436 * to skip the executable header 437 */ 438 vmaddr = trunc_page(a_out->a_entry); 439 440 /* 441 * Map it all into the process's space as a single 442 * copy-on-write "data" segment. 443 */ 444 map = &td->td_proc->p_vmspace->vm_map; 445 error = vm_mmap(map, &vmaddr, 446 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 447 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 448 if (error) 449 goto cleanup; 450 vm_map_lock(map); 451 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 452 vm_map_unlock(map); 453 error = EDOOFUS; 454 goto cleanup; 455 } 456 entry->eflags |= MAP_ENTRY_VN_EXEC; 457 vm_map_unlock(map); 458 textset = false; 459 } 460 461 if (bss_size != 0) { 462 /* Calculate BSS start address */ 463 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 464 a_out->a_data; 465 466 /* allocate some 'anon' space */ 467 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 468 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 469 VM_PROT_ALL, 0); 470 if (error) 471 goto cleanup; 472 } 473 474 cleanup: 475 if (opened) { 476 if (locked) 477 VOP_UNLOCK(vp); 478 locked = false; 479 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 480 } 481 if (textset) { 482 if (!locked) { 483 locked = true; 484 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 485 } 486 VOP_UNSET_TEXT_CHECKED(vp); 487 } 488 if (locked) 489 VOP_UNLOCK(vp); 490 491 /* Release the temporary mapping. */ 492 if (a_out) 493 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 494 495 return (error); 496 } 497 498 #endif /* __i386__ */ 499 500 #ifdef LINUX_LEGACY_SYSCALLS 501 int 502 linux_select(struct thread *td, struct linux_select_args *args) 503 { 504 l_timeval ltv; 505 struct timeval tv0, tv1, utv, *tvp; 506 int error; 507 508 /* 509 * Store current time for computation of the amount of 510 * time left. 511 */ 512 if (args->timeout) { 513 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 514 goto select_out; 515 utv.tv_sec = ltv.tv_sec; 516 utv.tv_usec = ltv.tv_usec; 517 518 if (itimerfix(&utv)) { 519 /* 520 * The timeval was invalid. Convert it to something 521 * valid that will act as it does under Linux. 522 */ 523 utv.tv_sec += utv.tv_usec / 1000000; 524 utv.tv_usec %= 1000000; 525 if (utv.tv_usec < 0) { 526 utv.tv_sec -= 1; 527 utv.tv_usec += 1000000; 528 } 529 if (utv.tv_sec < 0) 530 timevalclear(&utv); 531 } 532 microtime(&tv0); 533 tvp = &utv; 534 } else 535 tvp = NULL; 536 537 error = kern_select(td, args->nfds, args->readfds, args->writefds, 538 args->exceptfds, tvp, LINUX_NFDBITS); 539 if (error) 540 goto select_out; 541 542 if (args->timeout) { 543 if (td->td_retval[0]) { 544 /* 545 * Compute how much time was left of the timeout, 546 * by subtracting the current time and the time 547 * before we started the call, and subtracting 548 * that result from the user-supplied value. 549 */ 550 microtime(&tv1); 551 timevalsub(&tv1, &tv0); 552 timevalsub(&utv, &tv1); 553 if (utv.tv_sec < 0) 554 timevalclear(&utv); 555 } else 556 timevalclear(&utv); 557 ltv.tv_sec = utv.tv_sec; 558 ltv.tv_usec = utv.tv_usec; 559 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 560 goto select_out; 561 } 562 563 select_out: 564 return (error); 565 } 566 #endif 567 568 int 569 linux_mremap(struct thread *td, struct linux_mremap_args *args) 570 { 571 uintptr_t addr; 572 size_t len; 573 int error = 0; 574 575 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 576 td->td_retval[0] = 0; 577 return (EINVAL); 578 } 579 580 /* 581 * Check for the page alignment. 582 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 583 */ 584 if (args->addr & PAGE_MASK) { 585 td->td_retval[0] = 0; 586 return (EINVAL); 587 } 588 589 args->new_len = round_page(args->new_len); 590 args->old_len = round_page(args->old_len); 591 592 if (args->new_len > args->old_len) { 593 td->td_retval[0] = 0; 594 return (ENOMEM); 595 } 596 597 if (args->new_len < args->old_len) { 598 addr = args->addr + args->new_len; 599 len = args->old_len - args->new_len; 600 error = kern_munmap(td, addr, len); 601 } 602 603 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 604 return (error); 605 } 606 607 #define LINUX_MS_ASYNC 0x0001 608 #define LINUX_MS_INVALIDATE 0x0002 609 #define LINUX_MS_SYNC 0x0004 610 611 int 612 linux_msync(struct thread *td, struct linux_msync_args *args) 613 { 614 615 return (kern_msync(td, args->addr, args->len, 616 args->fl & ~LINUX_MS_SYNC)); 617 } 618 619 #ifdef LINUX_LEGACY_SYSCALLS 620 int 621 linux_time(struct thread *td, struct linux_time_args *args) 622 { 623 struct timeval tv; 624 l_time_t tm; 625 int error; 626 627 microtime(&tv); 628 tm = tv.tv_sec; 629 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 630 return (error); 631 td->td_retval[0] = tm; 632 return (0); 633 } 634 #endif 635 636 struct l_times_argv { 637 l_clock_t tms_utime; 638 l_clock_t tms_stime; 639 l_clock_t tms_cutime; 640 l_clock_t tms_cstime; 641 }; 642 643 /* 644 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 645 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 646 * auxiliary vector entry. 647 */ 648 #define CLK_TCK 100 649 650 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 651 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 652 653 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 654 CONVNTCK(r) : CONVOTCK(r)) 655 656 int 657 linux_times(struct thread *td, struct linux_times_args *args) 658 { 659 struct timeval tv, utime, stime, cutime, cstime; 660 struct l_times_argv tms; 661 struct proc *p; 662 int error; 663 664 if (args->buf != NULL) { 665 p = td->td_proc; 666 PROC_LOCK(p); 667 PROC_STATLOCK(p); 668 calcru(p, &utime, &stime); 669 PROC_STATUNLOCK(p); 670 calccru(p, &cutime, &cstime); 671 PROC_UNLOCK(p); 672 673 tms.tms_utime = CONVTCK(utime); 674 tms.tms_stime = CONVTCK(stime); 675 676 tms.tms_cutime = CONVTCK(cutime); 677 tms.tms_cstime = CONVTCK(cstime); 678 679 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 680 return (error); 681 } 682 683 microuptime(&tv); 684 td->td_retval[0] = (int)CONVTCK(tv); 685 return (0); 686 } 687 688 int 689 linux_newuname(struct thread *td, struct linux_newuname_args *args) 690 { 691 struct l_new_utsname utsname; 692 char osname[LINUX_MAX_UTSNAME]; 693 char osrelease[LINUX_MAX_UTSNAME]; 694 char *p; 695 696 linux_get_osname(td, osname); 697 linux_get_osrelease(td, osrelease); 698 699 bzero(&utsname, sizeof(utsname)); 700 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 701 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 702 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 703 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 704 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 705 for (p = utsname.version; *p != '\0'; ++p) 706 if (*p == '\n') { 707 *p = '\0'; 708 break; 709 } 710 #if defined(__amd64__) 711 /* 712 * On amd64, Linux uname(2) needs to return "x86_64" 713 * for both 64-bit and 32-bit applications. On 32-bit, 714 * the string returned by getauxval(AT_PLATFORM) needs 715 * to remain "i686", though. 716 */ 717 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 718 #else 719 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 720 #endif 721 722 return (copyout(&utsname, args->buf, sizeof(utsname))); 723 } 724 725 struct l_utimbuf { 726 l_time_t l_actime; 727 l_time_t l_modtime; 728 }; 729 730 #ifdef LINUX_LEGACY_SYSCALLS 731 int 732 linux_utime(struct thread *td, struct linux_utime_args *args) 733 { 734 struct timeval tv[2], *tvp; 735 struct l_utimbuf lut; 736 char *fname; 737 int error; 738 bool convpath; 739 740 convpath = LUSECONVPATH(td); 741 if (convpath) 742 LCONVPATHEXIST(td, args->fname, &fname); 743 744 if (args->times) { 745 if ((error = copyin(args->times, &lut, sizeof lut))) { 746 if (convpath) 747 LFREEPATH(fname); 748 return (error); 749 } 750 tv[0].tv_sec = lut.l_actime; 751 tv[0].tv_usec = 0; 752 tv[1].tv_sec = lut.l_modtime; 753 tv[1].tv_usec = 0; 754 tvp = tv; 755 } else 756 tvp = NULL; 757 758 if (!convpath) { 759 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 760 tvp, UIO_SYSSPACE); 761 } else { 762 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 763 UIO_SYSSPACE); 764 LFREEPATH(fname); 765 } 766 return (error); 767 } 768 #endif 769 770 #ifdef LINUX_LEGACY_SYSCALLS 771 int 772 linux_utimes(struct thread *td, struct linux_utimes_args *args) 773 { 774 l_timeval ltv[2]; 775 struct timeval tv[2], *tvp = NULL; 776 char *fname; 777 int error; 778 bool convpath; 779 780 convpath = LUSECONVPATH(td); 781 if (convpath) 782 LCONVPATHEXIST(td, args->fname, &fname); 783 784 if (args->tptr != NULL) { 785 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 786 LFREEPATH(fname); 787 return (error); 788 } 789 tv[0].tv_sec = ltv[0].tv_sec; 790 tv[0].tv_usec = ltv[0].tv_usec; 791 tv[1].tv_sec = ltv[1].tv_sec; 792 tv[1].tv_usec = ltv[1].tv_usec; 793 tvp = tv; 794 } 795 796 if (!convpath) { 797 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 798 tvp, UIO_SYSSPACE); 799 } else { 800 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 801 tvp, UIO_SYSSPACE); 802 LFREEPATH(fname); 803 } 804 return (error); 805 } 806 #endif 807 808 static int 809 linux_utimensat_nsec_valid(l_long nsec) 810 { 811 812 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 813 return (0); 814 if (nsec >= 0 && nsec <= 999999999) 815 return (0); 816 return (1); 817 } 818 819 int 820 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 821 { 822 struct l_timespec l_times[2]; 823 struct timespec times[2], *timesp = NULL; 824 char *path = NULL; 825 int error, dfd, flags = 0; 826 827 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 828 829 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 830 return (EINVAL); 831 832 if (args->times != NULL) { 833 error = copyin(args->times, l_times, sizeof(l_times)); 834 if (error != 0) 835 return (error); 836 837 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 838 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 839 return (EINVAL); 840 841 times[0].tv_sec = l_times[0].tv_sec; 842 switch (l_times[0].tv_nsec) 843 { 844 case LINUX_UTIME_OMIT: 845 times[0].tv_nsec = UTIME_OMIT; 846 break; 847 case LINUX_UTIME_NOW: 848 times[0].tv_nsec = UTIME_NOW; 849 break; 850 default: 851 times[0].tv_nsec = l_times[0].tv_nsec; 852 } 853 854 times[1].tv_sec = l_times[1].tv_sec; 855 switch (l_times[1].tv_nsec) 856 { 857 case LINUX_UTIME_OMIT: 858 times[1].tv_nsec = UTIME_OMIT; 859 break; 860 case LINUX_UTIME_NOW: 861 times[1].tv_nsec = UTIME_NOW; 862 break; 863 default: 864 times[1].tv_nsec = l_times[1].tv_nsec; 865 break; 866 } 867 timesp = times; 868 869 /* This breaks POSIX, but is what the Linux kernel does 870 * _on purpose_ (documented in the man page for utimensat(2)), 871 * so we must follow that behaviour. */ 872 if (times[0].tv_nsec == UTIME_OMIT && 873 times[1].tv_nsec == UTIME_OMIT) 874 return (0); 875 } 876 877 if (!LUSECONVPATH(td)) { 878 if (args->pathname != NULL) { 879 return (kern_utimensat(td, dfd, args->pathname, 880 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 881 } 882 } 883 884 if (args->pathname != NULL) 885 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 886 else if (args->flags != 0) 887 return (EINVAL); 888 889 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 890 flags |= AT_SYMLINK_NOFOLLOW; 891 892 if (path == NULL) 893 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 894 else { 895 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 896 UIO_SYSSPACE, flags); 897 LFREEPATH(path); 898 } 899 900 return (error); 901 } 902 903 #ifdef LINUX_LEGACY_SYSCALLS 904 int 905 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 906 { 907 l_timeval ltv[2]; 908 struct timeval tv[2], *tvp = NULL; 909 char *fname; 910 int error, dfd; 911 bool convpath; 912 913 convpath = LUSECONVPATH(td); 914 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 915 if (convpath) 916 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 917 918 if (args->utimes != NULL) { 919 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 920 if (convpath) 921 LFREEPATH(fname); 922 return (error); 923 } 924 tv[0].tv_sec = ltv[0].tv_sec; 925 tv[0].tv_usec = ltv[0].tv_usec; 926 tv[1].tv_sec = ltv[1].tv_sec; 927 tv[1].tv_usec = ltv[1].tv_usec; 928 tvp = tv; 929 } 930 931 if (!convpath) { 932 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 933 tvp, UIO_SYSSPACE); 934 } else { 935 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 936 LFREEPATH(fname); 937 } 938 return (error); 939 } 940 #endif 941 942 static int 943 linux_common_wait(struct thread *td, int pid, int *statusp, 944 int options, struct __wrusage *wrup) 945 { 946 siginfo_t siginfo; 947 idtype_t idtype; 948 id_t id; 949 int error, status, tmpstat; 950 951 if (pid == WAIT_ANY) { 952 idtype = P_ALL; 953 id = 0; 954 } else if (pid < 0) { 955 idtype = P_PGID; 956 id = (id_t)-pid; 957 } else { 958 idtype = P_PID; 959 id = (id_t)pid; 960 } 961 962 /* 963 * For backward compatibility we implicitly add flags WEXITED 964 * and WTRAPPED here. 965 */ 966 options |= WEXITED | WTRAPPED; 967 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 968 if (error) 969 return (error); 970 971 if (statusp) { 972 tmpstat = status & 0xffff; 973 if (WIFSIGNALED(tmpstat)) { 974 tmpstat = (tmpstat & 0xffffff80) | 975 bsd_to_linux_signal(WTERMSIG(tmpstat)); 976 } else if (WIFSTOPPED(tmpstat)) { 977 tmpstat = (tmpstat & 0xffff00ff) | 978 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 979 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 980 if (WSTOPSIG(status) == SIGTRAP) { 981 tmpstat = linux_ptrace_status(td, 982 siginfo.si_pid, tmpstat); 983 } 984 #endif 985 } else if (WIFCONTINUED(tmpstat)) { 986 tmpstat = 0xffff; 987 } 988 error = copyout(&tmpstat, statusp, sizeof(int)); 989 } 990 991 return (error); 992 } 993 994 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 995 int 996 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 997 { 998 struct linux_wait4_args wait4_args; 999 1000 wait4_args.pid = args->pid; 1001 wait4_args.status = args->status; 1002 wait4_args.options = args->options; 1003 wait4_args.rusage = NULL; 1004 1005 return (linux_wait4(td, &wait4_args)); 1006 } 1007 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1008 1009 int 1010 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1011 { 1012 int error, options; 1013 struct __wrusage wru, *wrup; 1014 1015 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1016 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1017 return (EINVAL); 1018 1019 options = WEXITED; 1020 linux_to_bsd_waitopts(args->options, &options); 1021 1022 if (args->rusage != NULL) 1023 wrup = &wru; 1024 else 1025 wrup = NULL; 1026 error = linux_common_wait(td, args->pid, args->status, options, wrup); 1027 if (error != 0) 1028 return (error); 1029 if (args->rusage != NULL) 1030 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 1031 return (error); 1032 } 1033 1034 int 1035 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1036 { 1037 int status, options, sig; 1038 struct __wrusage wru; 1039 siginfo_t siginfo; 1040 l_siginfo_t lsi; 1041 idtype_t idtype; 1042 struct proc *p; 1043 int error; 1044 1045 options = 0; 1046 linux_to_bsd_waitopts(args->options, &options); 1047 1048 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1049 return (EINVAL); 1050 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1051 return (EINVAL); 1052 1053 switch (args->idtype) { 1054 case LINUX_P_ALL: 1055 idtype = P_ALL; 1056 break; 1057 case LINUX_P_PID: 1058 if (args->id <= 0) 1059 return (EINVAL); 1060 idtype = P_PID; 1061 break; 1062 case LINUX_P_PGID: 1063 if (args->id <= 0) 1064 return (EINVAL); 1065 idtype = P_PGID; 1066 break; 1067 default: 1068 return (EINVAL); 1069 } 1070 1071 error = kern_wait6(td, idtype, args->id, &status, options, 1072 &wru, &siginfo); 1073 if (error != 0) 1074 return (error); 1075 if (args->rusage != NULL) { 1076 error = linux_copyout_rusage(&wru.wru_children, 1077 args->rusage); 1078 if (error != 0) 1079 return (error); 1080 } 1081 if (args->info != NULL) { 1082 p = td->td_proc; 1083 bzero(&lsi, sizeof(lsi)); 1084 if (td->td_retval[0] != 0) { 1085 sig = bsd_to_linux_signal(siginfo.si_signo); 1086 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1087 } 1088 error = copyout(&lsi, args->info, sizeof(lsi)); 1089 } 1090 td->td_retval[0] = 0; 1091 1092 return (error); 1093 } 1094 1095 #ifdef LINUX_LEGACY_SYSCALLS 1096 int 1097 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1098 { 1099 char *path; 1100 int error; 1101 enum uio_seg seg; 1102 bool convpath; 1103 1104 convpath = LUSECONVPATH(td); 1105 if (!convpath) { 1106 path = args->path; 1107 seg = UIO_USERSPACE; 1108 } else { 1109 LCONVPATHCREAT(td, args->path, &path); 1110 seg = UIO_SYSSPACE; 1111 } 1112 1113 switch (args->mode & S_IFMT) { 1114 case S_IFIFO: 1115 case S_IFSOCK: 1116 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1117 args->mode); 1118 break; 1119 1120 case S_IFCHR: 1121 case S_IFBLK: 1122 error = kern_mknodat(td, AT_FDCWD, path, seg, 1123 args->mode, args->dev); 1124 break; 1125 1126 case S_IFDIR: 1127 error = EPERM; 1128 break; 1129 1130 case 0: 1131 args->mode |= S_IFREG; 1132 /* FALLTHROUGH */ 1133 case S_IFREG: 1134 error = kern_openat(td, AT_FDCWD, path, seg, 1135 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1136 if (error == 0) 1137 kern_close(td, td->td_retval[0]); 1138 break; 1139 1140 default: 1141 error = EINVAL; 1142 break; 1143 } 1144 if (convpath) 1145 LFREEPATH(path); 1146 return (error); 1147 } 1148 #endif 1149 1150 int 1151 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1152 { 1153 char *path; 1154 int error, dfd; 1155 enum uio_seg seg; 1156 bool convpath; 1157 1158 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1159 1160 convpath = LUSECONVPATH(td); 1161 if (!convpath) { 1162 path = __DECONST(char *, args->filename); 1163 seg = UIO_USERSPACE; 1164 } else { 1165 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1166 seg = UIO_SYSSPACE; 1167 } 1168 1169 switch (args->mode & S_IFMT) { 1170 case S_IFIFO: 1171 case S_IFSOCK: 1172 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1173 break; 1174 1175 case S_IFCHR: 1176 case S_IFBLK: 1177 error = kern_mknodat(td, dfd, path, seg, args->mode, 1178 args->dev); 1179 break; 1180 1181 case S_IFDIR: 1182 error = EPERM; 1183 break; 1184 1185 case 0: 1186 args->mode |= S_IFREG; 1187 /* FALLTHROUGH */ 1188 case S_IFREG: 1189 error = kern_openat(td, dfd, path, seg, 1190 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1191 if (error == 0) 1192 kern_close(td, td->td_retval[0]); 1193 break; 1194 1195 default: 1196 error = EINVAL; 1197 break; 1198 } 1199 if (convpath) 1200 LFREEPATH(path); 1201 return (error); 1202 } 1203 1204 /* 1205 * UGH! This is just about the dumbest idea I've ever heard!! 1206 */ 1207 int 1208 linux_personality(struct thread *td, struct linux_personality_args *args) 1209 { 1210 struct linux_pemuldata *pem; 1211 struct proc *p = td->td_proc; 1212 uint32_t old; 1213 1214 PROC_LOCK(p); 1215 pem = pem_find(p); 1216 old = pem->persona; 1217 if (args->per != 0xffffffff) 1218 pem->persona = args->per; 1219 PROC_UNLOCK(p); 1220 1221 td->td_retval[0] = old; 1222 return (0); 1223 } 1224 1225 struct l_itimerval { 1226 l_timeval it_interval; 1227 l_timeval it_value; 1228 }; 1229 1230 #define B2L_ITIMERVAL(bip, lip) \ 1231 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1232 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1233 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1234 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1235 1236 int 1237 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1238 { 1239 int error; 1240 struct l_itimerval ls; 1241 struct itimerval aitv, oitv; 1242 1243 if (uap->itv == NULL) { 1244 uap->itv = uap->oitv; 1245 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1246 } 1247 1248 error = copyin(uap->itv, &ls, sizeof(ls)); 1249 if (error != 0) 1250 return (error); 1251 B2L_ITIMERVAL(&aitv, &ls); 1252 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1253 if (error != 0 || uap->oitv == NULL) 1254 return (error); 1255 B2L_ITIMERVAL(&ls, &oitv); 1256 1257 return (copyout(&ls, uap->oitv, sizeof(ls))); 1258 } 1259 1260 int 1261 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1262 { 1263 int error; 1264 struct l_itimerval ls; 1265 struct itimerval aitv; 1266 1267 error = kern_getitimer(td, uap->which, &aitv); 1268 if (error != 0) 1269 return (error); 1270 B2L_ITIMERVAL(&ls, &aitv); 1271 return (copyout(&ls, uap->itv, sizeof(ls))); 1272 } 1273 1274 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1275 int 1276 linux_nice(struct thread *td, struct linux_nice_args *args) 1277 { 1278 1279 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1280 } 1281 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1282 1283 int 1284 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1285 { 1286 struct ucred *newcred, *oldcred; 1287 l_gid_t *linux_gidset; 1288 gid_t *bsd_gidset; 1289 int ngrp, error; 1290 struct proc *p; 1291 1292 ngrp = args->gidsetsize; 1293 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1294 return (EINVAL); 1295 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1296 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1297 if (error) 1298 goto out; 1299 newcred = crget(); 1300 crextend(newcred, ngrp + 1); 1301 p = td->td_proc; 1302 PROC_LOCK(p); 1303 oldcred = p->p_ucred; 1304 crcopy(newcred, oldcred); 1305 1306 /* 1307 * cr_groups[0] holds egid. Setting the whole set from 1308 * the supplied set will cause egid to be changed too. 1309 * Keep cr_groups[0] unchanged to prevent that. 1310 */ 1311 1312 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1313 PROC_UNLOCK(p); 1314 crfree(newcred); 1315 goto out; 1316 } 1317 1318 if (ngrp > 0) { 1319 newcred->cr_ngroups = ngrp + 1; 1320 1321 bsd_gidset = newcred->cr_groups; 1322 ngrp--; 1323 while (ngrp >= 0) { 1324 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1325 ngrp--; 1326 } 1327 } else 1328 newcred->cr_ngroups = 1; 1329 1330 setsugid(p); 1331 proc_set_cred(p, newcred); 1332 PROC_UNLOCK(p); 1333 crfree(oldcred); 1334 error = 0; 1335 out: 1336 free(linux_gidset, M_LINUX); 1337 return (error); 1338 } 1339 1340 int 1341 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1342 { 1343 struct ucred *cred; 1344 l_gid_t *linux_gidset; 1345 gid_t *bsd_gidset; 1346 int bsd_gidsetsz, ngrp, error; 1347 1348 cred = td->td_ucred; 1349 bsd_gidset = cred->cr_groups; 1350 bsd_gidsetsz = cred->cr_ngroups - 1; 1351 1352 /* 1353 * cr_groups[0] holds egid. Returning the whole set 1354 * here will cause a duplicate. Exclude cr_groups[0] 1355 * to prevent that. 1356 */ 1357 1358 if ((ngrp = args->gidsetsize) == 0) { 1359 td->td_retval[0] = bsd_gidsetsz; 1360 return (0); 1361 } 1362 1363 if (ngrp < bsd_gidsetsz) 1364 return (EINVAL); 1365 1366 ngrp = 0; 1367 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1368 M_LINUX, M_WAITOK); 1369 while (ngrp < bsd_gidsetsz) { 1370 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1371 ngrp++; 1372 } 1373 1374 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1375 free(linux_gidset, M_LINUX); 1376 if (error) 1377 return (error); 1378 1379 td->td_retval[0] = ngrp; 1380 return (0); 1381 } 1382 1383 static bool 1384 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1385 { 1386 1387 if (linux_dummy_rlimits == 0) 1388 return (false); 1389 1390 switch (resource) { 1391 case LINUX_RLIMIT_LOCKS: 1392 case LINUX_RLIMIT_SIGPENDING: 1393 case LINUX_RLIMIT_MSGQUEUE: 1394 case LINUX_RLIMIT_RTTIME: 1395 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1396 rlim->rlim_max = LINUX_RLIM_INFINITY; 1397 return (true); 1398 case LINUX_RLIMIT_NICE: 1399 case LINUX_RLIMIT_RTPRIO: 1400 rlim->rlim_cur = 0; 1401 rlim->rlim_max = 0; 1402 return (true); 1403 default: 1404 return (false); 1405 } 1406 } 1407 1408 int 1409 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1410 { 1411 struct rlimit bsd_rlim; 1412 struct l_rlimit rlim; 1413 u_int which; 1414 int error; 1415 1416 if (args->resource >= LINUX_RLIM_NLIMITS) 1417 return (EINVAL); 1418 1419 which = linux_to_bsd_resource[args->resource]; 1420 if (which == -1) 1421 return (EINVAL); 1422 1423 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1424 if (error) 1425 return (error); 1426 1427 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1428 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1429 return (kern_setrlimit(td, which, &bsd_rlim)); 1430 } 1431 1432 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1433 int 1434 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1435 { 1436 struct l_rlimit rlim; 1437 struct rlimit bsd_rlim; 1438 u_int which; 1439 1440 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1441 rlim.rlim_cur = bsd_rlim.rlim_cur; 1442 rlim.rlim_max = bsd_rlim.rlim_max; 1443 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1444 } 1445 1446 if (args->resource >= LINUX_RLIM_NLIMITS) 1447 return (EINVAL); 1448 1449 which = linux_to_bsd_resource[args->resource]; 1450 if (which == -1) 1451 return (EINVAL); 1452 1453 lim_rlimit(td, which, &bsd_rlim); 1454 1455 #ifdef COMPAT_LINUX32 1456 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1457 if (rlim.rlim_cur == UINT_MAX) 1458 rlim.rlim_cur = INT_MAX; 1459 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1460 if (rlim.rlim_max == UINT_MAX) 1461 rlim.rlim_max = INT_MAX; 1462 #else 1463 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1464 if (rlim.rlim_cur == ULONG_MAX) 1465 rlim.rlim_cur = LONG_MAX; 1466 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1467 if (rlim.rlim_max == ULONG_MAX) 1468 rlim.rlim_max = LONG_MAX; 1469 #endif 1470 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1471 } 1472 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1473 1474 int 1475 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1476 { 1477 struct l_rlimit rlim; 1478 struct rlimit bsd_rlim; 1479 u_int which; 1480 1481 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1482 rlim.rlim_cur = bsd_rlim.rlim_cur; 1483 rlim.rlim_max = bsd_rlim.rlim_max; 1484 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1485 } 1486 1487 if (args->resource >= LINUX_RLIM_NLIMITS) 1488 return (EINVAL); 1489 1490 which = linux_to_bsd_resource[args->resource]; 1491 if (which == -1) 1492 return (EINVAL); 1493 1494 lim_rlimit(td, which, &bsd_rlim); 1495 1496 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1497 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1498 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1499 } 1500 1501 int 1502 linux_sched_setscheduler(struct thread *td, 1503 struct linux_sched_setscheduler_args *args) 1504 { 1505 struct sched_param sched_param; 1506 struct thread *tdt; 1507 int error, policy; 1508 1509 switch (args->policy) { 1510 case LINUX_SCHED_OTHER: 1511 policy = SCHED_OTHER; 1512 break; 1513 case LINUX_SCHED_FIFO: 1514 policy = SCHED_FIFO; 1515 break; 1516 case LINUX_SCHED_RR: 1517 policy = SCHED_RR; 1518 break; 1519 default: 1520 return (EINVAL); 1521 } 1522 1523 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1524 if (error) 1525 return (error); 1526 1527 if (linux_map_sched_prio) { 1528 switch (policy) { 1529 case SCHED_OTHER: 1530 if (sched_param.sched_priority != 0) 1531 return (EINVAL); 1532 1533 sched_param.sched_priority = 1534 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1535 break; 1536 case SCHED_FIFO: 1537 case SCHED_RR: 1538 if (sched_param.sched_priority < 1 || 1539 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1540 return (EINVAL); 1541 1542 /* 1543 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1544 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1545 */ 1546 sched_param.sched_priority = 1547 (sched_param.sched_priority - 1) * 1548 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1549 (LINUX_MAX_RT_PRIO - 1); 1550 break; 1551 } 1552 } 1553 1554 tdt = linux_tdfind(td, args->pid, -1); 1555 if (tdt == NULL) 1556 return (ESRCH); 1557 1558 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1559 PROC_UNLOCK(tdt->td_proc); 1560 return (error); 1561 } 1562 1563 int 1564 linux_sched_getscheduler(struct thread *td, 1565 struct linux_sched_getscheduler_args *args) 1566 { 1567 struct thread *tdt; 1568 int error, policy; 1569 1570 tdt = linux_tdfind(td, args->pid, -1); 1571 if (tdt == NULL) 1572 return (ESRCH); 1573 1574 error = kern_sched_getscheduler(td, tdt, &policy); 1575 PROC_UNLOCK(tdt->td_proc); 1576 1577 switch (policy) { 1578 case SCHED_OTHER: 1579 td->td_retval[0] = LINUX_SCHED_OTHER; 1580 break; 1581 case SCHED_FIFO: 1582 td->td_retval[0] = LINUX_SCHED_FIFO; 1583 break; 1584 case SCHED_RR: 1585 td->td_retval[0] = LINUX_SCHED_RR; 1586 break; 1587 } 1588 return (error); 1589 } 1590 1591 int 1592 linux_sched_get_priority_max(struct thread *td, 1593 struct linux_sched_get_priority_max_args *args) 1594 { 1595 struct sched_get_priority_max_args bsd; 1596 1597 if (linux_map_sched_prio) { 1598 switch (args->policy) { 1599 case LINUX_SCHED_OTHER: 1600 td->td_retval[0] = 0; 1601 return (0); 1602 case LINUX_SCHED_FIFO: 1603 case LINUX_SCHED_RR: 1604 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1605 return (0); 1606 default: 1607 return (EINVAL); 1608 } 1609 } 1610 1611 switch (args->policy) { 1612 case LINUX_SCHED_OTHER: 1613 bsd.policy = SCHED_OTHER; 1614 break; 1615 case LINUX_SCHED_FIFO: 1616 bsd.policy = SCHED_FIFO; 1617 break; 1618 case LINUX_SCHED_RR: 1619 bsd.policy = SCHED_RR; 1620 break; 1621 default: 1622 return (EINVAL); 1623 } 1624 return (sys_sched_get_priority_max(td, &bsd)); 1625 } 1626 1627 int 1628 linux_sched_get_priority_min(struct thread *td, 1629 struct linux_sched_get_priority_min_args *args) 1630 { 1631 struct sched_get_priority_min_args bsd; 1632 1633 if (linux_map_sched_prio) { 1634 switch (args->policy) { 1635 case LINUX_SCHED_OTHER: 1636 td->td_retval[0] = 0; 1637 return (0); 1638 case LINUX_SCHED_FIFO: 1639 case LINUX_SCHED_RR: 1640 td->td_retval[0] = 1; 1641 return (0); 1642 default: 1643 return (EINVAL); 1644 } 1645 } 1646 1647 switch (args->policy) { 1648 case LINUX_SCHED_OTHER: 1649 bsd.policy = SCHED_OTHER; 1650 break; 1651 case LINUX_SCHED_FIFO: 1652 bsd.policy = SCHED_FIFO; 1653 break; 1654 case LINUX_SCHED_RR: 1655 bsd.policy = SCHED_RR; 1656 break; 1657 default: 1658 return (EINVAL); 1659 } 1660 return (sys_sched_get_priority_min(td, &bsd)); 1661 } 1662 1663 #define REBOOT_CAD_ON 0x89abcdef 1664 #define REBOOT_CAD_OFF 0 1665 #define REBOOT_HALT 0xcdef0123 1666 #define REBOOT_RESTART 0x01234567 1667 #define REBOOT_RESTART2 0xA1B2C3D4 1668 #define REBOOT_POWEROFF 0x4321FEDC 1669 #define REBOOT_MAGIC1 0xfee1dead 1670 #define REBOOT_MAGIC2 0x28121969 1671 #define REBOOT_MAGIC2A 0x05121996 1672 #define REBOOT_MAGIC2B 0x16041998 1673 1674 int 1675 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1676 { 1677 struct reboot_args bsd_args; 1678 1679 if (args->magic1 != REBOOT_MAGIC1) 1680 return (EINVAL); 1681 1682 switch (args->magic2) { 1683 case REBOOT_MAGIC2: 1684 case REBOOT_MAGIC2A: 1685 case REBOOT_MAGIC2B: 1686 break; 1687 default: 1688 return (EINVAL); 1689 } 1690 1691 switch (args->cmd) { 1692 case REBOOT_CAD_ON: 1693 case REBOOT_CAD_OFF: 1694 return (priv_check(td, PRIV_REBOOT)); 1695 case REBOOT_HALT: 1696 bsd_args.opt = RB_HALT; 1697 break; 1698 case REBOOT_RESTART: 1699 case REBOOT_RESTART2: 1700 bsd_args.opt = 0; 1701 break; 1702 case REBOOT_POWEROFF: 1703 bsd_args.opt = RB_POWEROFF; 1704 break; 1705 default: 1706 return (EINVAL); 1707 } 1708 return (sys_reboot(td, &bsd_args)); 1709 } 1710 1711 int 1712 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1713 { 1714 1715 td->td_retval[0] = td->td_proc->p_pid; 1716 1717 return (0); 1718 } 1719 1720 int 1721 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1722 { 1723 struct linux_emuldata *em; 1724 1725 em = em_find(td); 1726 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1727 1728 td->td_retval[0] = em->em_tid; 1729 1730 return (0); 1731 } 1732 1733 int 1734 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1735 { 1736 1737 td->td_retval[0] = kern_getppid(td); 1738 return (0); 1739 } 1740 1741 int 1742 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1743 { 1744 1745 td->td_retval[0] = td->td_ucred->cr_rgid; 1746 return (0); 1747 } 1748 1749 int 1750 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1751 { 1752 1753 td->td_retval[0] = td->td_ucred->cr_ruid; 1754 return (0); 1755 } 1756 1757 int 1758 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1759 { 1760 1761 return (kern_getsid(td, args->pid)); 1762 } 1763 1764 int 1765 linux_nosys(struct thread *td, struct nosys_args *ignore) 1766 { 1767 1768 return (ENOSYS); 1769 } 1770 1771 int 1772 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1773 { 1774 int error; 1775 1776 error = kern_getpriority(td, args->which, args->who); 1777 td->td_retval[0] = 20 - td->td_retval[0]; 1778 return (error); 1779 } 1780 1781 int 1782 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1783 { 1784 int name[2]; 1785 1786 name[0] = CTL_KERN; 1787 name[1] = KERN_HOSTNAME; 1788 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1789 args->len, 0, 0)); 1790 } 1791 1792 int 1793 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1794 { 1795 int name[2]; 1796 1797 name[0] = CTL_KERN; 1798 name[1] = KERN_NISDOMAINNAME; 1799 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1800 args->len, 0, 0)); 1801 } 1802 1803 int 1804 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1805 { 1806 1807 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1808 args->error_code); 1809 1810 /* 1811 * XXX: we should send a signal to the parent if 1812 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1813 * as it doesnt occur often. 1814 */ 1815 exit1(td, args->error_code, 0); 1816 /* NOTREACHED */ 1817 } 1818 1819 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1820 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1821 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1822 1823 struct l_user_cap_header { 1824 l_int version; 1825 l_int pid; 1826 }; 1827 1828 struct l_user_cap_data { 1829 l_int effective; 1830 l_int permitted; 1831 l_int inheritable; 1832 }; 1833 1834 int 1835 linux_capget(struct thread *td, struct linux_capget_args *uap) 1836 { 1837 struct l_user_cap_header luch; 1838 struct l_user_cap_data lucd[2]; 1839 int error, u32s; 1840 1841 if (uap->hdrp == NULL) 1842 return (EFAULT); 1843 1844 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1845 if (error != 0) 1846 return (error); 1847 1848 switch (luch.version) { 1849 case _LINUX_CAPABILITY_VERSION_1: 1850 u32s = 1; 1851 break; 1852 case _LINUX_CAPABILITY_VERSION_2: 1853 case _LINUX_CAPABILITY_VERSION_3: 1854 u32s = 2; 1855 break; 1856 default: 1857 luch.version = _LINUX_CAPABILITY_VERSION_1; 1858 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1859 if (error) 1860 return (error); 1861 return (EINVAL); 1862 } 1863 1864 if (luch.pid) 1865 return (EPERM); 1866 1867 if (uap->datap) { 1868 /* 1869 * The current implementation doesn't support setting 1870 * a capability (it's essentially a stub) so indicate 1871 * that no capabilities are currently set or available 1872 * to request. 1873 */ 1874 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1875 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1876 } 1877 1878 return (error); 1879 } 1880 1881 int 1882 linux_capset(struct thread *td, struct linux_capset_args *uap) 1883 { 1884 struct l_user_cap_header luch; 1885 struct l_user_cap_data lucd[2]; 1886 int error, i, u32s; 1887 1888 if (uap->hdrp == NULL || uap->datap == NULL) 1889 return (EFAULT); 1890 1891 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1892 if (error != 0) 1893 return (error); 1894 1895 switch (luch.version) { 1896 case _LINUX_CAPABILITY_VERSION_1: 1897 u32s = 1; 1898 break; 1899 case _LINUX_CAPABILITY_VERSION_2: 1900 case _LINUX_CAPABILITY_VERSION_3: 1901 u32s = 2; 1902 break; 1903 default: 1904 luch.version = _LINUX_CAPABILITY_VERSION_1; 1905 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1906 if (error) 1907 return (error); 1908 return (EINVAL); 1909 } 1910 1911 if (luch.pid) 1912 return (EPERM); 1913 1914 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1915 if (error != 0) 1916 return (error); 1917 1918 /* We currently don't support setting any capabilities. */ 1919 for (i = 0; i < u32s; i++) { 1920 if (lucd[i].effective || lucd[i].permitted || 1921 lucd[i].inheritable) { 1922 linux_msg(td, 1923 "capset[%d] effective=0x%x, permitted=0x%x, " 1924 "inheritable=0x%x is not implemented", i, 1925 (int)lucd[i].effective, (int)lucd[i].permitted, 1926 (int)lucd[i].inheritable); 1927 return (EPERM); 1928 } 1929 } 1930 1931 return (0); 1932 } 1933 1934 int 1935 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1936 { 1937 int error = 0, max_size; 1938 struct proc *p = td->td_proc; 1939 char comm[LINUX_MAX_COMM_LEN]; 1940 int pdeath_signal, trace_state; 1941 1942 switch (args->option) { 1943 case LINUX_PR_SET_PDEATHSIG: 1944 if (!LINUX_SIG_VALID(args->arg2)) 1945 return (EINVAL); 1946 pdeath_signal = linux_to_bsd_signal(args->arg2); 1947 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1948 &pdeath_signal)); 1949 case LINUX_PR_GET_PDEATHSIG: 1950 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1951 &pdeath_signal); 1952 if (error != 0) 1953 return (error); 1954 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1955 return (copyout(&pdeath_signal, 1956 (void *)(register_t)args->arg2, 1957 sizeof(pdeath_signal))); 1958 /* 1959 * In Linux, this flag controls if set[gu]id processes can coredump. 1960 * There are additional semantics imposed on processes that cannot 1961 * coredump: 1962 * - Such processes can not be ptraced. 1963 * - There are some semantics around ownership of process-related files 1964 * in the /proc namespace. 1965 * 1966 * In FreeBSD, we can (and by default, do) disable setuid coredump 1967 * system-wide with 'sugid_coredump.' We control tracability on a 1968 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1969 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1970 * procctl is roughly analogous to Linux's DUMPABLE. 1971 * 1972 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1973 */ 1974 case LINUX_PR_GET_DUMPABLE: 1975 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1976 &trace_state); 1977 if (error != 0) 1978 return (error); 1979 td->td_retval[0] = (trace_state != -1); 1980 return (0); 1981 case LINUX_PR_SET_DUMPABLE: 1982 /* 1983 * It is only valid for userspace to set one of these two 1984 * flags, and only one at a time. 1985 */ 1986 switch (args->arg2) { 1987 case LINUX_SUID_DUMP_DISABLE: 1988 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1989 break; 1990 case LINUX_SUID_DUMP_USER: 1991 trace_state = PROC_TRACE_CTL_ENABLE; 1992 break; 1993 default: 1994 return (EINVAL); 1995 } 1996 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1997 &trace_state)); 1998 case LINUX_PR_GET_KEEPCAPS: 1999 /* 2000 * Indicate that we always clear the effective and 2001 * permitted capability sets when the user id becomes 2002 * non-zero (actually the capability sets are simply 2003 * always zero in the current implementation). 2004 */ 2005 td->td_retval[0] = 0; 2006 break; 2007 case LINUX_PR_SET_KEEPCAPS: 2008 /* 2009 * Ignore requests to keep the effective and permitted 2010 * capability sets when the user id becomes non-zero. 2011 */ 2012 break; 2013 case LINUX_PR_SET_NAME: 2014 /* 2015 * To be on the safe side we need to make sure to not 2016 * overflow the size a Linux program expects. We already 2017 * do this here in the copyin, so that we don't need to 2018 * check on copyout. 2019 */ 2020 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2021 error = copyinstr((void *)(register_t)args->arg2, comm, 2022 max_size, NULL); 2023 2024 /* Linux silently truncates the name if it is too long. */ 2025 if (error == ENAMETOOLONG) { 2026 /* 2027 * XXX: copyinstr() isn't documented to populate the 2028 * array completely, so do a copyin() to be on the 2029 * safe side. This should be changed in case 2030 * copyinstr() is changed to guarantee this. 2031 */ 2032 error = copyin((void *)(register_t)args->arg2, comm, 2033 max_size - 1); 2034 comm[max_size - 1] = '\0'; 2035 } 2036 if (error) 2037 return (error); 2038 2039 PROC_LOCK(p); 2040 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2041 PROC_UNLOCK(p); 2042 break; 2043 case LINUX_PR_GET_NAME: 2044 PROC_LOCK(p); 2045 strlcpy(comm, p->p_comm, sizeof(comm)); 2046 PROC_UNLOCK(p); 2047 error = copyout(comm, (void *)(register_t)args->arg2, 2048 strlen(comm) + 1); 2049 break; 2050 case LINUX_PR_GET_SECCOMP: 2051 case LINUX_PR_SET_SECCOMP: 2052 /* 2053 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2054 */ 2055 error = EINVAL; 2056 break; 2057 case LINUX_PR_SET_NO_NEW_PRIVS: 2058 linux_msg(td, "unsupported prctl PR_SET_NO_NEW_PRIVS"); 2059 error = EINVAL; 2060 break; 2061 case LINUX_PR_SET_PTRACER: 2062 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2063 error = EINVAL; 2064 break; 2065 default: 2066 linux_msg(td, "unsupported prctl option %d", args->option); 2067 error = EINVAL; 2068 break; 2069 } 2070 2071 return (error); 2072 } 2073 2074 int 2075 linux_sched_setparam(struct thread *td, 2076 struct linux_sched_setparam_args *uap) 2077 { 2078 struct sched_param sched_param; 2079 struct thread *tdt; 2080 int error, policy; 2081 2082 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2083 if (error) 2084 return (error); 2085 2086 tdt = linux_tdfind(td, uap->pid, -1); 2087 if (tdt == NULL) 2088 return (ESRCH); 2089 2090 if (linux_map_sched_prio) { 2091 error = kern_sched_getscheduler(td, tdt, &policy); 2092 if (error) 2093 goto out; 2094 2095 switch (policy) { 2096 case SCHED_OTHER: 2097 if (sched_param.sched_priority != 0) { 2098 error = EINVAL; 2099 goto out; 2100 } 2101 sched_param.sched_priority = 2102 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2103 break; 2104 case SCHED_FIFO: 2105 case SCHED_RR: 2106 if (sched_param.sched_priority < 1 || 2107 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2108 error = EINVAL; 2109 goto out; 2110 } 2111 /* 2112 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2113 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2114 */ 2115 sched_param.sched_priority = 2116 (sched_param.sched_priority - 1) * 2117 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2118 (LINUX_MAX_RT_PRIO - 1); 2119 break; 2120 } 2121 } 2122 2123 error = kern_sched_setparam(td, tdt, &sched_param); 2124 out: PROC_UNLOCK(tdt->td_proc); 2125 return (error); 2126 } 2127 2128 int 2129 linux_sched_getparam(struct thread *td, 2130 struct linux_sched_getparam_args *uap) 2131 { 2132 struct sched_param sched_param; 2133 struct thread *tdt; 2134 int error, policy; 2135 2136 tdt = linux_tdfind(td, uap->pid, -1); 2137 if (tdt == NULL) 2138 return (ESRCH); 2139 2140 error = kern_sched_getparam(td, tdt, &sched_param); 2141 if (error) { 2142 PROC_UNLOCK(tdt->td_proc); 2143 return (error); 2144 } 2145 2146 if (linux_map_sched_prio) { 2147 error = kern_sched_getscheduler(td, tdt, &policy); 2148 PROC_UNLOCK(tdt->td_proc); 2149 if (error) 2150 return (error); 2151 2152 switch (policy) { 2153 case SCHED_OTHER: 2154 sched_param.sched_priority = 0; 2155 break; 2156 case SCHED_FIFO: 2157 case SCHED_RR: 2158 /* 2159 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2160 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2161 */ 2162 sched_param.sched_priority = 2163 (sched_param.sched_priority * 2164 (LINUX_MAX_RT_PRIO - 1) + 2165 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2166 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2167 break; 2168 } 2169 } else 2170 PROC_UNLOCK(tdt->td_proc); 2171 2172 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2173 return (error); 2174 } 2175 2176 /* 2177 * Get affinity of a process. 2178 */ 2179 int 2180 linux_sched_getaffinity(struct thread *td, 2181 struct linux_sched_getaffinity_args *args) 2182 { 2183 int error; 2184 struct thread *tdt; 2185 2186 if (args->len < sizeof(cpuset_t)) 2187 return (EINVAL); 2188 2189 tdt = linux_tdfind(td, args->pid, -1); 2190 if (tdt == NULL) 2191 return (ESRCH); 2192 2193 PROC_UNLOCK(tdt->td_proc); 2194 2195 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2196 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2197 if (error == 0) 2198 td->td_retval[0] = sizeof(cpuset_t); 2199 2200 return (error); 2201 } 2202 2203 /* 2204 * Set affinity of a process. 2205 */ 2206 int 2207 linux_sched_setaffinity(struct thread *td, 2208 struct linux_sched_setaffinity_args *args) 2209 { 2210 struct thread *tdt; 2211 2212 if (args->len < sizeof(cpuset_t)) 2213 return (EINVAL); 2214 2215 tdt = linux_tdfind(td, args->pid, -1); 2216 if (tdt == NULL) 2217 return (ESRCH); 2218 2219 PROC_UNLOCK(tdt->td_proc); 2220 2221 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2222 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2223 } 2224 2225 struct linux_rlimit64 { 2226 uint64_t rlim_cur; 2227 uint64_t rlim_max; 2228 }; 2229 2230 int 2231 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2232 { 2233 struct rlimit rlim, nrlim; 2234 struct linux_rlimit64 lrlim; 2235 struct proc *p; 2236 u_int which; 2237 int flags; 2238 int error; 2239 2240 if (args->new == NULL && args->old != NULL) { 2241 if (linux_get_dummy_limit(args->resource, &rlim)) { 2242 lrlim.rlim_cur = rlim.rlim_cur; 2243 lrlim.rlim_max = rlim.rlim_max; 2244 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2245 } 2246 } 2247 2248 if (args->resource >= LINUX_RLIM_NLIMITS) 2249 return (EINVAL); 2250 2251 which = linux_to_bsd_resource[args->resource]; 2252 if (which == -1) 2253 return (EINVAL); 2254 2255 if (args->new != NULL) { 2256 /* 2257 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2258 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2259 * as INFINITY so we do not need a conversion even. 2260 */ 2261 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2262 if (error != 0) 2263 return (error); 2264 } 2265 2266 flags = PGET_HOLD | PGET_NOTWEXIT; 2267 if (args->new != NULL) 2268 flags |= PGET_CANDEBUG; 2269 else 2270 flags |= PGET_CANSEE; 2271 if (args->pid == 0) { 2272 p = td->td_proc; 2273 PHOLD(p); 2274 } else { 2275 error = pget(args->pid, flags, &p); 2276 if (error != 0) 2277 return (error); 2278 } 2279 if (args->old != NULL) { 2280 PROC_LOCK(p); 2281 lim_rlimit_proc(p, which, &rlim); 2282 PROC_UNLOCK(p); 2283 if (rlim.rlim_cur == RLIM_INFINITY) 2284 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2285 else 2286 lrlim.rlim_cur = rlim.rlim_cur; 2287 if (rlim.rlim_max == RLIM_INFINITY) 2288 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2289 else 2290 lrlim.rlim_max = rlim.rlim_max; 2291 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2292 if (error != 0) 2293 goto out; 2294 } 2295 2296 if (args->new != NULL) 2297 error = kern_proc_setrlimit(td, p, which, &nrlim); 2298 2299 out: 2300 PRELE(p); 2301 return (error); 2302 } 2303 2304 int 2305 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2306 { 2307 struct timeval utv, tv0, tv1, *tvp; 2308 struct l_pselect6arg lpse6; 2309 struct l_timespec lts; 2310 struct timespec uts; 2311 l_sigset_t l_ss; 2312 sigset_t *ssp; 2313 sigset_t ss; 2314 int error; 2315 2316 ssp = NULL; 2317 if (args->sig != NULL) { 2318 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2319 if (error != 0) 2320 return (error); 2321 if (lpse6.ss_len != sizeof(l_ss)) 2322 return (EINVAL); 2323 if (lpse6.ss != 0) { 2324 error = copyin(PTRIN(lpse6.ss), &l_ss, 2325 sizeof(l_ss)); 2326 if (error != 0) 2327 return (error); 2328 linux_to_bsd_sigset(&l_ss, &ss); 2329 ssp = &ss; 2330 } 2331 } 2332 2333 /* 2334 * Currently glibc changes nanosecond number to microsecond. 2335 * This mean losing precision but for now it is hardly seen. 2336 */ 2337 if (args->tsp != NULL) { 2338 error = copyin(args->tsp, <s, sizeof(lts)); 2339 if (error != 0) 2340 return (error); 2341 error = linux_to_native_timespec(&uts, <s); 2342 if (error != 0) 2343 return (error); 2344 2345 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2346 if (itimerfix(&utv)) 2347 return (EINVAL); 2348 2349 microtime(&tv0); 2350 tvp = &utv; 2351 } else 2352 tvp = NULL; 2353 2354 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2355 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2356 2357 if (error == 0 && args->tsp != NULL) { 2358 if (td->td_retval[0] != 0) { 2359 /* 2360 * Compute how much time was left of the timeout, 2361 * by subtracting the current time and the time 2362 * before we started the call, and subtracting 2363 * that result from the user-supplied value. 2364 */ 2365 2366 microtime(&tv1); 2367 timevalsub(&tv1, &tv0); 2368 timevalsub(&utv, &tv1); 2369 if (utv.tv_sec < 0) 2370 timevalclear(&utv); 2371 } else 2372 timevalclear(&utv); 2373 2374 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2375 2376 error = native_to_linux_timespec(<s, &uts); 2377 if (error == 0) 2378 error = copyout(<s, args->tsp, sizeof(lts)); 2379 } 2380 2381 return (error); 2382 } 2383 2384 int 2385 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2386 { 2387 struct timespec ts0, ts1; 2388 struct l_timespec lts; 2389 struct timespec uts, *tsp; 2390 l_sigset_t l_ss; 2391 sigset_t *ssp; 2392 sigset_t ss; 2393 int error; 2394 2395 if (args->sset != NULL) { 2396 if (args->ssize != sizeof(l_ss)) 2397 return (EINVAL); 2398 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2399 if (error) 2400 return (error); 2401 linux_to_bsd_sigset(&l_ss, &ss); 2402 ssp = &ss; 2403 } else 2404 ssp = NULL; 2405 if (args->tsp != NULL) { 2406 error = copyin(args->tsp, <s, sizeof(lts)); 2407 if (error) 2408 return (error); 2409 error = linux_to_native_timespec(&uts, <s); 2410 if (error != 0) 2411 return (error); 2412 2413 nanotime(&ts0); 2414 tsp = &uts; 2415 } else 2416 tsp = NULL; 2417 2418 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2419 2420 if (error == 0 && args->tsp != NULL) { 2421 if (td->td_retval[0]) { 2422 nanotime(&ts1); 2423 timespecsub(&ts1, &ts0, &ts1); 2424 timespecsub(&uts, &ts1, &uts); 2425 if (uts.tv_sec < 0) 2426 timespecclear(&uts); 2427 } else 2428 timespecclear(&uts); 2429 2430 error = native_to_linux_timespec(<s, &uts); 2431 if (error == 0) 2432 error = copyout(<s, args->tsp, sizeof(lts)); 2433 } 2434 2435 return (error); 2436 } 2437 2438 int 2439 linux_sched_rr_get_interval(struct thread *td, 2440 struct linux_sched_rr_get_interval_args *uap) 2441 { 2442 struct timespec ts; 2443 struct l_timespec lts; 2444 struct thread *tdt; 2445 int error; 2446 2447 /* 2448 * According to man in case the invalid pid specified 2449 * EINVAL should be returned. 2450 */ 2451 if (uap->pid < 0) 2452 return (EINVAL); 2453 2454 tdt = linux_tdfind(td, uap->pid, -1); 2455 if (tdt == NULL) 2456 return (ESRCH); 2457 2458 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2459 PROC_UNLOCK(tdt->td_proc); 2460 if (error != 0) 2461 return (error); 2462 error = native_to_linux_timespec(<s, &ts); 2463 if (error != 0) 2464 return (error); 2465 return (copyout(<s, uap->interval, sizeof(lts))); 2466 } 2467 2468 /* 2469 * In case when the Linux thread is the initial thread in 2470 * the thread group thread id is equal to the process id. 2471 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2472 */ 2473 struct thread * 2474 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2475 { 2476 struct linux_emuldata *em; 2477 struct thread *tdt; 2478 struct proc *p; 2479 2480 tdt = NULL; 2481 if (tid == 0 || tid == td->td_tid) { 2482 tdt = td; 2483 PROC_LOCK(tdt->td_proc); 2484 } else if (tid > PID_MAX) 2485 tdt = tdfind(tid, pid); 2486 else { 2487 /* 2488 * Initial thread where the tid equal to the pid. 2489 */ 2490 p = pfind(tid); 2491 if (p != NULL) { 2492 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2493 /* 2494 * p is not a Linuxulator process. 2495 */ 2496 PROC_UNLOCK(p); 2497 return (NULL); 2498 } 2499 FOREACH_THREAD_IN_PROC(p, tdt) { 2500 em = em_find(tdt); 2501 if (tid == em->em_tid) 2502 return (tdt); 2503 } 2504 PROC_UNLOCK(p); 2505 } 2506 return (NULL); 2507 } 2508 2509 return (tdt); 2510 } 2511 2512 void 2513 linux_to_bsd_waitopts(int options, int *bsdopts) 2514 { 2515 2516 if (options & LINUX_WNOHANG) 2517 *bsdopts |= WNOHANG; 2518 if (options & LINUX_WUNTRACED) 2519 *bsdopts |= WUNTRACED; 2520 if (options & LINUX_WEXITED) 2521 *bsdopts |= WEXITED; 2522 if (options & LINUX_WCONTINUED) 2523 *bsdopts |= WCONTINUED; 2524 if (options & LINUX_WNOWAIT) 2525 *bsdopts |= WNOWAIT; 2526 2527 if (options & __WCLONE) 2528 *bsdopts |= WLINUXCLONE; 2529 } 2530 2531 int 2532 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2533 { 2534 struct uio uio; 2535 struct iovec iov; 2536 int error; 2537 2538 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2539 return (EINVAL); 2540 if (args->count > INT_MAX) 2541 args->count = INT_MAX; 2542 2543 iov.iov_base = args->buf; 2544 iov.iov_len = args->count; 2545 2546 uio.uio_iov = &iov; 2547 uio.uio_iovcnt = 1; 2548 uio.uio_resid = iov.iov_len; 2549 uio.uio_segflg = UIO_USERSPACE; 2550 uio.uio_rw = UIO_READ; 2551 uio.uio_td = td; 2552 2553 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2554 if (error == 0) 2555 td->td_retval[0] = args->count - uio.uio_resid; 2556 return (error); 2557 } 2558 2559 int 2560 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2561 { 2562 2563 /* Needs to be page-aligned */ 2564 if (args->start & PAGE_MASK) 2565 return (EINVAL); 2566 return (kern_mincore(td, args->start, args->len, args->vec)); 2567 } 2568 2569 #define SYSLOG_TAG "<6>" 2570 2571 int 2572 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2573 { 2574 char buf[128], *src, *dst; 2575 u_int seq; 2576 int buflen, error; 2577 2578 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2579 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2580 return (EINVAL); 2581 } 2582 2583 if (args->len < 6) { 2584 td->td_retval[0] = 0; 2585 return (0); 2586 } 2587 2588 error = priv_check(td, PRIV_MSGBUF); 2589 if (error) 2590 return (error); 2591 2592 mtx_lock(&msgbuf_lock); 2593 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2594 mtx_unlock(&msgbuf_lock); 2595 2596 dst = args->buf; 2597 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2598 /* The -1 is to skip the trailing '\0'. */ 2599 dst += sizeof(SYSLOG_TAG) - 1; 2600 2601 while (error == 0) { 2602 mtx_lock(&msgbuf_lock); 2603 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2604 mtx_unlock(&msgbuf_lock); 2605 2606 if (buflen == 0) 2607 break; 2608 2609 for (src = buf; src < buf + buflen && error == 0; src++) { 2610 if (*src == '\0') 2611 continue; 2612 2613 if (dst >= args->buf + args->len) 2614 goto out; 2615 2616 error = copyout(src, dst, 1); 2617 dst++; 2618 2619 if (*src == '\n' && *(src + 1) != '<' && 2620 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2621 error = copyout(&SYSLOG_TAG, 2622 dst, sizeof(SYSLOG_TAG)); 2623 dst += sizeof(SYSLOG_TAG) - 1; 2624 } 2625 } 2626 } 2627 out: 2628 td->td_retval[0] = dst - args->buf; 2629 return (error); 2630 } 2631 2632 int 2633 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2634 { 2635 int cpu, error, node; 2636 2637 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2638 error = 0; 2639 node = cpuid_to_pcpu[cpu]->pc_domain; 2640 2641 if (args->cpu != NULL) 2642 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2643 if (args->node != NULL) 2644 error = copyout(&node, args->node, sizeof(l_int)); 2645 return (error); 2646 } 2647