1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/poll.h> 54 #include <sys/priv.h> 55 #include <sys/proc.h> 56 #include <sys/procctl.h> 57 #include <sys/reboot.h> 58 #include <sys/racct.h> 59 #include <sys/random.h> 60 #include <sys/resourcevar.h> 61 #include <sys/sched.h> 62 #include <sys/sdt.h> 63 #include <sys/signalvar.h> 64 #include <sys/smp.h> 65 #include <sys/stat.h> 66 #include <sys/syscallsubr.h> 67 #include <sys/sysctl.h> 68 #include <sys/sysproto.h> 69 #include <sys/systm.h> 70 #include <sys/time.h> 71 #include <sys/vmmeter.h> 72 #include <sys/vnode.h> 73 #include <sys/wait.h> 74 #include <sys/cpuset.h> 75 #include <sys/uio.h> 76 77 #include <security/mac/mac_framework.h> 78 79 #include <vm/vm.h> 80 #include <vm/pmap.h> 81 #include <vm/vm_kern.h> 82 #include <vm/vm_map.h> 83 #include <vm/vm_extern.h> 84 #include <vm/swap_pager.h> 85 86 #ifdef COMPAT_LINUX32 87 #include <machine/../linux32/linux.h> 88 #include <machine/../linux32/linux32_proto.h> 89 #else 90 #include <machine/../linux/linux.h> 91 #include <machine/../linux/linux_proto.h> 92 #endif 93 94 #include <compat/linux/linux_common.h> 95 #include <compat/linux/linux_dtrace.h> 96 #include <compat/linux/linux_file.h> 97 #include <compat/linux/linux_mib.h> 98 #include <compat/linux/linux_signal.h> 99 #include <compat/linux/linux_timer.h> 100 #include <compat/linux/linux_util.h> 101 #include <compat/linux/linux_sysproto.h> 102 #include <compat/linux/linux_emul.h> 103 #include <compat/linux/linux_misc.h> 104 105 int stclohz; /* Statistics clock frequency */ 106 107 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 108 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 109 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 110 RLIMIT_MEMLOCK, RLIMIT_AS 111 }; 112 113 struct l_sysinfo { 114 l_long uptime; /* Seconds since boot */ 115 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 116 #define LINUX_SYSINFO_LOADS_SCALE 65536 117 l_ulong totalram; /* Total usable main memory size */ 118 l_ulong freeram; /* Available memory size */ 119 l_ulong sharedram; /* Amount of shared memory */ 120 l_ulong bufferram; /* Memory used by buffers */ 121 l_ulong totalswap; /* Total swap space size */ 122 l_ulong freeswap; /* swap space still available */ 123 l_ushort procs; /* Number of current processes */ 124 l_ushort pads; 125 l_ulong totalhigh; 126 l_ulong freehigh; 127 l_uint mem_unit; 128 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 129 }; 130 131 struct l_pselect6arg { 132 l_uintptr_t ss; 133 l_size_t ss_len; 134 }; 135 136 static int linux_utimensat_lts_to_ts(struct l_timespec *, 137 struct timespec *); 138 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 139 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 140 struct timespec *); 141 #endif 142 static int linux_common_utimensat(struct thread *, int, 143 const char *, struct timespec *, int); 144 static int linux_common_pselect6(struct thread *, l_int, 145 l_fd_set *, l_fd_set *, l_fd_set *, 146 struct timespec *, l_uintptr_t *); 147 static int linux_common_ppoll(struct thread *, struct pollfd *, 148 uint32_t, struct timespec *, l_sigset_t *, 149 l_size_t); 150 static int linux_pollin(struct thread *, struct pollfd *, 151 struct pollfd *, u_int); 152 static int linux_pollout(struct thread *, struct pollfd *, 153 struct pollfd *, u_int); 154 155 int 156 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 157 { 158 struct l_sysinfo sysinfo; 159 int i, j; 160 struct timespec ts; 161 162 bzero(&sysinfo, sizeof(sysinfo)); 163 getnanouptime(&ts); 164 if (ts.tv_nsec != 0) 165 ts.tv_sec++; 166 sysinfo.uptime = ts.tv_sec; 167 168 /* Use the information from the mib to get our load averages */ 169 for (i = 0; i < 3; i++) 170 sysinfo.loads[i] = averunnable.ldavg[i] * 171 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 172 173 sysinfo.totalram = physmem * PAGE_SIZE; 174 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 175 176 /* 177 * sharedram counts pages allocated to named, swap-backed objects such 178 * as shared memory segments and tmpfs files. There is no cheap way to 179 * compute this, so just leave the field unpopulated. Linux itself only 180 * started setting this field in the 3.x timeframe. 181 */ 182 sysinfo.sharedram = 0; 183 sysinfo.bufferram = 0; 184 185 swap_pager_status(&i, &j); 186 sysinfo.totalswap = i * PAGE_SIZE; 187 sysinfo.freeswap = (i - j) * PAGE_SIZE; 188 189 sysinfo.procs = nprocs; 190 191 /* 192 * Platforms supported by the emulation layer do not have a notion of 193 * high memory. 194 */ 195 sysinfo.totalhigh = 0; 196 sysinfo.freehigh = 0; 197 198 sysinfo.mem_unit = 1; 199 200 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 201 } 202 203 #ifdef LINUX_LEGACY_SYSCALLS 204 int 205 linux_alarm(struct thread *td, struct linux_alarm_args *args) 206 { 207 struct itimerval it, old_it; 208 u_int secs; 209 int error __diagused; 210 211 secs = args->secs; 212 /* 213 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 214 * to match kern_setitimer()'s limit to avoid error from it. 215 * 216 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 217 * platforms. 218 */ 219 if (secs > INT32_MAX / 2) 220 secs = INT32_MAX / 2; 221 222 it.it_value.tv_sec = secs; 223 it.it_value.tv_usec = 0; 224 timevalclear(&it.it_interval); 225 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 226 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 227 228 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 229 old_it.it_value.tv_usec >= 500000) 230 old_it.it_value.tv_sec++; 231 td->td_retval[0] = old_it.it_value.tv_sec; 232 return (0); 233 } 234 #endif 235 236 int 237 linux_brk(struct thread *td, struct linux_brk_args *args) 238 { 239 struct vmspace *vm = td->td_proc->p_vmspace; 240 uintptr_t new, old; 241 242 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 243 new = (uintptr_t)args->dsend; 244 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 245 td->td_retval[0] = (register_t)new; 246 else 247 td->td_retval[0] = (register_t)old; 248 249 return (0); 250 } 251 252 #if defined(__i386__) 253 /* XXX: what about amd64/linux32? */ 254 255 int 256 linux_uselib(struct thread *td, struct linux_uselib_args *args) 257 { 258 struct nameidata ni; 259 struct vnode *vp; 260 struct exec *a_out; 261 vm_map_t map; 262 vm_map_entry_t entry; 263 struct vattr attr; 264 vm_offset_t vmaddr; 265 unsigned long file_offset; 266 unsigned long bss_size; 267 char *library; 268 ssize_t aresid; 269 int error; 270 bool locked, opened, textset; 271 272 a_out = NULL; 273 vp = NULL; 274 locked = false; 275 textset = false; 276 opened = false; 277 278 if (!LUSECONVPATH(td)) { 279 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 280 UIO_USERSPACE, args->library); 281 error = namei(&ni); 282 } else { 283 LCONVPATHEXIST(args->library, &library); 284 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 285 UIO_SYSSPACE, library); 286 error = namei(&ni); 287 LFREEPATH(library); 288 } 289 if (error) 290 goto cleanup; 291 292 vp = ni.ni_vp; 293 NDFREE_PNBUF(&ni); 294 295 /* 296 * From here on down, we have a locked vnode that must be unlocked. 297 * XXX: The code below largely duplicates exec_check_permissions(). 298 */ 299 locked = true; 300 301 /* Executable? */ 302 error = VOP_GETATTR(vp, &attr, td->td_ucred); 303 if (error) 304 goto cleanup; 305 306 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 307 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 308 /* EACCESS is what exec(2) returns. */ 309 error = ENOEXEC; 310 goto cleanup; 311 } 312 313 /* Sensible size? */ 314 if (attr.va_size == 0) { 315 error = ENOEXEC; 316 goto cleanup; 317 } 318 319 /* Can we access it? */ 320 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 321 if (error) 322 goto cleanup; 323 324 /* 325 * XXX: This should use vn_open() so that it is properly authorized, 326 * and to reduce code redundancy all over the place here. 327 * XXX: Not really, it duplicates far more of exec_check_permissions() 328 * than vn_open(). 329 */ 330 #ifdef MAC 331 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 332 if (error) 333 goto cleanup; 334 #endif 335 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 336 if (error) 337 goto cleanup; 338 opened = true; 339 340 /* Pull in executable header into exec_map */ 341 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 342 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 343 if (error) 344 goto cleanup; 345 346 /* Is it a Linux binary ? */ 347 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 348 error = ENOEXEC; 349 goto cleanup; 350 } 351 352 /* 353 * While we are here, we should REALLY do some more checks 354 */ 355 356 /* Set file/virtual offset based on a.out variant. */ 357 switch ((int)(a_out->a_magic & 0xffff)) { 358 case 0413: /* ZMAGIC */ 359 file_offset = 1024; 360 break; 361 case 0314: /* QMAGIC */ 362 file_offset = 0; 363 break; 364 default: 365 error = ENOEXEC; 366 goto cleanup; 367 } 368 369 bss_size = round_page(a_out->a_bss); 370 371 /* Check various fields in header for validity/bounds. */ 372 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 373 error = ENOEXEC; 374 goto cleanup; 375 } 376 377 /* text + data can't exceed file size */ 378 if (a_out->a_data + a_out->a_text > attr.va_size) { 379 error = EFAULT; 380 goto cleanup; 381 } 382 383 /* 384 * text/data/bss must not exceed limits 385 * XXX - this is not complete. it should check current usage PLUS 386 * the resources needed by this library. 387 */ 388 PROC_LOCK(td->td_proc); 389 if (a_out->a_text > maxtsiz || 390 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 391 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 392 bss_size) != 0) { 393 PROC_UNLOCK(td->td_proc); 394 error = ENOMEM; 395 goto cleanup; 396 } 397 PROC_UNLOCK(td->td_proc); 398 399 /* 400 * Prevent more writers. 401 */ 402 error = VOP_SET_TEXT(vp); 403 if (error != 0) 404 goto cleanup; 405 textset = true; 406 407 /* 408 * Lock no longer needed 409 */ 410 locked = false; 411 VOP_UNLOCK(vp); 412 413 /* 414 * Check if file_offset page aligned. Currently we cannot handle 415 * misalinged file offsets, and so we read in the entire image 416 * (what a waste). 417 */ 418 if (file_offset & PAGE_MASK) { 419 /* Map text+data read/write/execute */ 420 421 /* a_entry is the load address and is page aligned */ 422 vmaddr = trunc_page(a_out->a_entry); 423 424 /* get anon user mapping, read+write+execute */ 425 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 426 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 427 VM_PROT_ALL, VM_PROT_ALL, 0); 428 if (error) 429 goto cleanup; 430 431 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 432 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 433 td->td_ucred, NOCRED, &aresid, td); 434 if (error != 0) 435 goto cleanup; 436 if (aresid != 0) { 437 error = ENOEXEC; 438 goto cleanup; 439 } 440 } else { 441 /* 442 * for QMAGIC, a_entry is 20 bytes beyond the load address 443 * to skip the executable header 444 */ 445 vmaddr = trunc_page(a_out->a_entry); 446 447 /* 448 * Map it all into the process's space as a single 449 * copy-on-write "data" segment. 450 */ 451 map = &td->td_proc->p_vmspace->vm_map; 452 error = vm_mmap(map, &vmaddr, 453 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 454 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 455 if (error) 456 goto cleanup; 457 vm_map_lock(map); 458 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 459 vm_map_unlock(map); 460 error = EDOOFUS; 461 goto cleanup; 462 } 463 entry->eflags |= MAP_ENTRY_VN_EXEC; 464 vm_map_unlock(map); 465 textset = false; 466 } 467 468 if (bss_size != 0) { 469 /* Calculate BSS start address */ 470 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 471 a_out->a_data; 472 473 /* allocate some 'anon' space */ 474 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 475 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 476 VM_PROT_ALL, 0); 477 if (error) 478 goto cleanup; 479 } 480 481 cleanup: 482 if (opened) { 483 if (locked) 484 VOP_UNLOCK(vp); 485 locked = false; 486 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 487 } 488 if (textset) { 489 if (!locked) { 490 locked = true; 491 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 492 } 493 VOP_UNSET_TEXT_CHECKED(vp); 494 } 495 if (locked) 496 VOP_UNLOCK(vp); 497 498 /* Release the temporary mapping. */ 499 if (a_out) 500 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 501 502 return (error); 503 } 504 505 #endif /* __i386__ */ 506 507 #ifdef LINUX_LEGACY_SYSCALLS 508 int 509 linux_select(struct thread *td, struct linux_select_args *args) 510 { 511 l_timeval ltv; 512 struct timeval tv0, tv1, utv, *tvp; 513 int error; 514 515 /* 516 * Store current time for computation of the amount of 517 * time left. 518 */ 519 if (args->timeout) { 520 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 521 goto select_out; 522 utv.tv_sec = ltv.tv_sec; 523 utv.tv_usec = ltv.tv_usec; 524 525 if (itimerfix(&utv)) { 526 /* 527 * The timeval was invalid. Convert it to something 528 * valid that will act as it does under Linux. 529 */ 530 utv.tv_sec += utv.tv_usec / 1000000; 531 utv.tv_usec %= 1000000; 532 if (utv.tv_usec < 0) { 533 utv.tv_sec -= 1; 534 utv.tv_usec += 1000000; 535 } 536 if (utv.tv_sec < 0) 537 timevalclear(&utv); 538 } 539 microtime(&tv0); 540 tvp = &utv; 541 } else 542 tvp = NULL; 543 544 error = kern_select(td, args->nfds, args->readfds, args->writefds, 545 args->exceptfds, tvp, LINUX_NFDBITS); 546 if (error) 547 goto select_out; 548 549 if (args->timeout) { 550 if (td->td_retval[0]) { 551 /* 552 * Compute how much time was left of the timeout, 553 * by subtracting the current time and the time 554 * before we started the call, and subtracting 555 * that result from the user-supplied value. 556 */ 557 microtime(&tv1); 558 timevalsub(&tv1, &tv0); 559 timevalsub(&utv, &tv1); 560 if (utv.tv_sec < 0) 561 timevalclear(&utv); 562 } else 563 timevalclear(&utv); 564 ltv.tv_sec = utv.tv_sec; 565 ltv.tv_usec = utv.tv_usec; 566 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 567 goto select_out; 568 } 569 570 select_out: 571 return (error); 572 } 573 #endif 574 575 int 576 linux_mremap(struct thread *td, struct linux_mremap_args *args) 577 { 578 uintptr_t addr; 579 size_t len; 580 int error = 0; 581 582 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 583 td->td_retval[0] = 0; 584 return (EINVAL); 585 } 586 587 /* 588 * Check for the page alignment. 589 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 590 */ 591 if (args->addr & PAGE_MASK) { 592 td->td_retval[0] = 0; 593 return (EINVAL); 594 } 595 596 args->new_len = round_page(args->new_len); 597 args->old_len = round_page(args->old_len); 598 599 if (args->new_len > args->old_len) { 600 td->td_retval[0] = 0; 601 return (ENOMEM); 602 } 603 604 if (args->new_len < args->old_len) { 605 addr = args->addr + args->new_len; 606 len = args->old_len - args->new_len; 607 error = kern_munmap(td, addr, len); 608 } 609 610 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 611 return (error); 612 } 613 614 #define LINUX_MS_ASYNC 0x0001 615 #define LINUX_MS_INVALIDATE 0x0002 616 #define LINUX_MS_SYNC 0x0004 617 618 int 619 linux_msync(struct thread *td, struct linux_msync_args *args) 620 { 621 622 return (kern_msync(td, args->addr, args->len, 623 args->fl & ~LINUX_MS_SYNC)); 624 } 625 626 #ifdef LINUX_LEGACY_SYSCALLS 627 int 628 linux_time(struct thread *td, struct linux_time_args *args) 629 { 630 struct timeval tv; 631 l_time_t tm; 632 int error; 633 634 microtime(&tv); 635 tm = tv.tv_sec; 636 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 637 return (error); 638 td->td_retval[0] = tm; 639 return (0); 640 } 641 #endif 642 643 struct l_times_argv { 644 l_clock_t tms_utime; 645 l_clock_t tms_stime; 646 l_clock_t tms_cutime; 647 l_clock_t tms_cstime; 648 }; 649 650 /* 651 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 652 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 653 * auxiliary vector entry. 654 */ 655 #define CLK_TCK 100 656 657 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 658 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 659 660 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 661 CONVNTCK(r) : CONVOTCK(r)) 662 663 int 664 linux_times(struct thread *td, struct linux_times_args *args) 665 { 666 struct timeval tv, utime, stime, cutime, cstime; 667 struct l_times_argv tms; 668 struct proc *p; 669 int error; 670 671 if (args->buf != NULL) { 672 p = td->td_proc; 673 PROC_LOCK(p); 674 PROC_STATLOCK(p); 675 calcru(p, &utime, &stime); 676 PROC_STATUNLOCK(p); 677 calccru(p, &cutime, &cstime); 678 PROC_UNLOCK(p); 679 680 tms.tms_utime = CONVTCK(utime); 681 tms.tms_stime = CONVTCK(stime); 682 683 tms.tms_cutime = CONVTCK(cutime); 684 tms.tms_cstime = CONVTCK(cstime); 685 686 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 687 return (error); 688 } 689 690 microuptime(&tv); 691 td->td_retval[0] = (int)CONVTCK(tv); 692 return (0); 693 } 694 695 int 696 linux_newuname(struct thread *td, struct linux_newuname_args *args) 697 { 698 struct l_new_utsname utsname; 699 char osname[LINUX_MAX_UTSNAME]; 700 char osrelease[LINUX_MAX_UTSNAME]; 701 char *p; 702 703 linux_get_osname(td, osname); 704 linux_get_osrelease(td, osrelease); 705 706 bzero(&utsname, sizeof(utsname)); 707 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 708 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 709 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 710 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 711 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 712 for (p = utsname.version; *p != '\0'; ++p) 713 if (*p == '\n') { 714 *p = '\0'; 715 break; 716 } 717 #if defined(__amd64__) 718 /* 719 * On amd64, Linux uname(2) needs to return "x86_64" 720 * for both 64-bit and 32-bit applications. On 32-bit, 721 * the string returned by getauxval(AT_PLATFORM) needs 722 * to remain "i686", though. 723 */ 724 #if defined(COMPAT_LINUX32) 725 if (linux32_emulate_i386) 726 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 727 else 728 #endif 729 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 730 #elif defined(__aarch64__) 731 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 732 #elif defined(__i386__) 733 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 734 #endif 735 736 return (copyout(&utsname, args->buf, sizeof(utsname))); 737 } 738 739 struct l_utimbuf { 740 l_time_t l_actime; 741 l_time_t l_modtime; 742 }; 743 744 #ifdef LINUX_LEGACY_SYSCALLS 745 int 746 linux_utime(struct thread *td, struct linux_utime_args *args) 747 { 748 struct timeval tv[2], *tvp; 749 struct l_utimbuf lut; 750 char *fname; 751 int error; 752 753 if (args->times) { 754 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 755 return (error); 756 tv[0].tv_sec = lut.l_actime; 757 tv[0].tv_usec = 0; 758 tv[1].tv_sec = lut.l_modtime; 759 tv[1].tv_usec = 0; 760 tvp = tv; 761 } else 762 tvp = NULL; 763 764 if (!LUSECONVPATH(td)) { 765 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 766 tvp, UIO_SYSSPACE); 767 } else { 768 LCONVPATHEXIST(args->fname, &fname); 769 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 770 UIO_SYSSPACE); 771 LFREEPATH(fname); 772 } 773 return (error); 774 } 775 #endif 776 777 #ifdef LINUX_LEGACY_SYSCALLS 778 int 779 linux_utimes(struct thread *td, struct linux_utimes_args *args) 780 { 781 l_timeval ltv[2]; 782 struct timeval tv[2], *tvp = NULL; 783 char *fname; 784 int error; 785 786 if (args->tptr != NULL) { 787 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 788 return (error); 789 tv[0].tv_sec = ltv[0].tv_sec; 790 tv[0].tv_usec = ltv[0].tv_usec; 791 tv[1].tv_sec = ltv[1].tv_sec; 792 tv[1].tv_usec = ltv[1].tv_usec; 793 tvp = tv; 794 } 795 796 if (!LUSECONVPATH(td)) { 797 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 798 tvp, UIO_SYSSPACE); 799 } else { 800 LCONVPATHEXIST(args->fname, &fname); 801 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 802 tvp, UIO_SYSSPACE); 803 LFREEPATH(fname); 804 } 805 return (error); 806 } 807 #endif 808 809 static int 810 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 811 { 812 813 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 814 l_times->tv_nsec != LINUX_UTIME_NOW && 815 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 816 return (EINVAL); 817 818 times->tv_sec = l_times->tv_sec; 819 switch (l_times->tv_nsec) 820 { 821 case LINUX_UTIME_OMIT: 822 times->tv_nsec = UTIME_OMIT; 823 break; 824 case LINUX_UTIME_NOW: 825 times->tv_nsec = UTIME_NOW; 826 break; 827 default: 828 times->tv_nsec = l_times->tv_nsec; 829 } 830 831 return (0); 832 } 833 834 static int 835 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 836 struct timespec *timesp, int lflags) 837 { 838 char *path = NULL; 839 int error, dfd, flags = 0; 840 841 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 842 843 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 844 return (EINVAL); 845 846 if (timesp != NULL) { 847 /* This breaks POSIX, but is what the Linux kernel does 848 * _on purpose_ (documented in the man page for utimensat(2)), 849 * so we must follow that behaviour. */ 850 if (timesp[0].tv_nsec == UTIME_OMIT && 851 timesp[1].tv_nsec == UTIME_OMIT) 852 return (0); 853 } 854 855 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 856 flags |= AT_SYMLINK_NOFOLLOW; 857 if (lflags & LINUX_AT_EMPTY_PATH) 858 flags |= AT_EMPTY_PATH; 859 860 if (!LUSECONVPATH(td)) { 861 if (pathname != NULL) { 862 return (kern_utimensat(td, dfd, pathname, 863 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 864 } 865 } 866 867 if (pathname != NULL) 868 LCONVPATHEXIST_AT(pathname, &path, dfd); 869 else if (lflags != 0) 870 return (EINVAL); 871 872 if (path == NULL) 873 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 874 else { 875 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 876 UIO_SYSSPACE, flags); 877 LFREEPATH(path); 878 } 879 880 return (error); 881 } 882 883 int 884 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 885 { 886 struct l_timespec l_times[2]; 887 struct timespec times[2], *timesp; 888 int error; 889 890 if (args->times != NULL) { 891 error = copyin(args->times, l_times, sizeof(l_times)); 892 if (error != 0) 893 return (error); 894 895 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 896 if (error != 0) 897 return (error); 898 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 899 if (error != 0) 900 return (error); 901 timesp = times; 902 } else 903 timesp = NULL; 904 905 return (linux_common_utimensat(td, args->dfd, args->pathname, 906 timesp, args->flags)); 907 } 908 909 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 910 static int 911 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 912 { 913 914 /* Zero out the padding in compat mode. */ 915 l_times->tv_nsec &= 0xFFFFFFFFUL; 916 917 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 918 l_times->tv_nsec != LINUX_UTIME_NOW && 919 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 920 return (EINVAL); 921 922 times->tv_sec = l_times->tv_sec; 923 switch (l_times->tv_nsec) 924 { 925 case LINUX_UTIME_OMIT: 926 times->tv_nsec = UTIME_OMIT; 927 break; 928 case LINUX_UTIME_NOW: 929 times->tv_nsec = UTIME_NOW; 930 break; 931 default: 932 times->tv_nsec = l_times->tv_nsec; 933 } 934 935 return (0); 936 } 937 938 int 939 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 940 { 941 struct l_timespec64 l_times[2]; 942 struct timespec times[2], *timesp; 943 int error; 944 945 if (args->times64 != NULL) { 946 error = copyin(args->times64, l_times, sizeof(l_times)); 947 if (error != 0) 948 return (error); 949 950 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 951 if (error != 0) 952 return (error); 953 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 954 if (error != 0) 955 return (error); 956 timesp = times; 957 } else 958 timesp = NULL; 959 960 return (linux_common_utimensat(td, args->dfd, args->pathname, 961 timesp, args->flags)); 962 } 963 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 964 965 #ifdef LINUX_LEGACY_SYSCALLS 966 int 967 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 968 { 969 l_timeval ltv[2]; 970 struct timeval tv[2], *tvp = NULL; 971 char *fname; 972 int error, dfd; 973 974 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 975 976 if (args->utimes != NULL) { 977 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 978 return (error); 979 tv[0].tv_sec = ltv[0].tv_sec; 980 tv[0].tv_usec = ltv[0].tv_usec; 981 tv[1].tv_sec = ltv[1].tv_sec; 982 tv[1].tv_usec = ltv[1].tv_usec; 983 tvp = tv; 984 } 985 986 if (!LUSECONVPATH(td)) { 987 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 988 tvp, UIO_SYSSPACE); 989 } else { 990 LCONVPATHEXIST_AT(args->filename, &fname, dfd); 991 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 992 tvp, UIO_SYSSPACE); 993 LFREEPATH(fname); 994 } 995 return (error); 996 } 997 #endif 998 999 static int 1000 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 1001 int options, void *rup, l_siginfo_t *infop) 1002 { 1003 l_siginfo_t lsi; 1004 siginfo_t siginfo; 1005 struct __wrusage wru; 1006 int error, status, tmpstat, sig; 1007 1008 error = kern_wait6(td, idtype, id, &status, options, 1009 rup != NULL ? &wru : NULL, &siginfo); 1010 1011 if (error == 0 && statusp) { 1012 tmpstat = status & 0xffff; 1013 if (WIFSIGNALED(tmpstat)) { 1014 tmpstat = (tmpstat & 0xffffff80) | 1015 bsd_to_linux_signal(WTERMSIG(tmpstat)); 1016 } else if (WIFSTOPPED(tmpstat)) { 1017 tmpstat = (tmpstat & 0xffff00ff) | 1018 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 1019 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 1020 if (WSTOPSIG(status) == SIGTRAP) { 1021 tmpstat = linux_ptrace_status(td, 1022 siginfo.si_pid, tmpstat); 1023 } 1024 #endif 1025 } else if (WIFCONTINUED(tmpstat)) { 1026 tmpstat = 0xffff; 1027 } 1028 error = copyout(&tmpstat, statusp, sizeof(int)); 1029 } 1030 if (error == 0 && rup != NULL) 1031 error = linux_copyout_rusage(&wru.wru_self, rup); 1032 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 1033 sig = bsd_to_linux_signal(siginfo.si_signo); 1034 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1035 error = copyout(&lsi, infop, sizeof(lsi)); 1036 } 1037 1038 return (error); 1039 } 1040 1041 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1042 int 1043 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1044 { 1045 struct linux_wait4_args wait4_args; 1046 1047 wait4_args.pid = args->pid; 1048 wait4_args.status = args->status; 1049 wait4_args.options = args->options; 1050 wait4_args.rusage = NULL; 1051 1052 return (linux_wait4(td, &wait4_args)); 1053 } 1054 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1055 1056 int 1057 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1058 { 1059 struct proc *p; 1060 int options, id, idtype; 1061 1062 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1063 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1064 return (EINVAL); 1065 1066 /* -INT_MIN is not defined. */ 1067 if (args->pid == INT_MIN) 1068 return (ESRCH); 1069 1070 options = 0; 1071 linux_to_bsd_waitopts(args->options, &options); 1072 1073 /* 1074 * For backward compatibility we implicitly add flags WEXITED 1075 * and WTRAPPED here. 1076 */ 1077 options |= WEXITED | WTRAPPED; 1078 1079 if (args->pid == WAIT_ANY) { 1080 idtype = P_ALL; 1081 id = 0; 1082 } else if (args->pid < 0) { 1083 idtype = P_PGID; 1084 id = (id_t)-args->pid; 1085 } else if (args->pid == 0) { 1086 idtype = P_PGID; 1087 p = td->td_proc; 1088 PROC_LOCK(p); 1089 id = p->p_pgid; 1090 PROC_UNLOCK(p); 1091 } else { 1092 idtype = P_PID; 1093 id = (id_t)args->pid; 1094 } 1095 1096 return (linux_common_wait(td, idtype, id, args->status, options, 1097 args->rusage, NULL)); 1098 } 1099 1100 int 1101 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1102 { 1103 idtype_t idtype; 1104 int error, options; 1105 struct proc *p; 1106 pid_t id; 1107 1108 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 1109 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1110 return (EINVAL); 1111 1112 options = 0; 1113 linux_to_bsd_waitopts(args->options, &options); 1114 1115 id = args->id; 1116 switch (args->idtype) { 1117 case LINUX_P_ALL: 1118 idtype = P_ALL; 1119 break; 1120 case LINUX_P_PID: 1121 if (args->id <= 0) 1122 return (EINVAL); 1123 idtype = P_PID; 1124 break; 1125 case LINUX_P_PGID: 1126 if (linux_use54(td) && args->id == 0) { 1127 p = td->td_proc; 1128 PROC_LOCK(p); 1129 id = p->p_pgid; 1130 PROC_UNLOCK(p); 1131 } else if (args->id <= 0) 1132 return (EINVAL); 1133 idtype = P_PGID; 1134 break; 1135 case LINUX_P_PIDFD: 1136 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 1137 return (ENOSYS); 1138 default: 1139 return (EINVAL); 1140 } 1141 1142 error = linux_common_wait(td, idtype, id, NULL, options, 1143 args->rusage, args->info); 1144 td->td_retval[0] = 0; 1145 1146 return (error); 1147 } 1148 1149 #ifdef LINUX_LEGACY_SYSCALLS 1150 int 1151 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1152 { 1153 char *path; 1154 int error; 1155 enum uio_seg seg; 1156 bool convpath; 1157 1158 convpath = LUSECONVPATH(td); 1159 if (!convpath) { 1160 path = args->path; 1161 seg = UIO_USERSPACE; 1162 } else { 1163 LCONVPATHCREAT(args->path, &path); 1164 seg = UIO_SYSSPACE; 1165 } 1166 1167 switch (args->mode & S_IFMT) { 1168 case S_IFIFO: 1169 case S_IFSOCK: 1170 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1171 args->mode); 1172 break; 1173 1174 case S_IFCHR: 1175 case S_IFBLK: 1176 error = kern_mknodat(td, AT_FDCWD, path, seg, 1177 args->mode, args->dev); 1178 break; 1179 1180 case S_IFDIR: 1181 error = EPERM; 1182 break; 1183 1184 case 0: 1185 args->mode |= S_IFREG; 1186 /* FALLTHROUGH */ 1187 case S_IFREG: 1188 error = kern_openat(td, AT_FDCWD, path, seg, 1189 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1190 if (error == 0) 1191 kern_close(td, td->td_retval[0]); 1192 break; 1193 1194 default: 1195 error = EINVAL; 1196 break; 1197 } 1198 if (convpath) 1199 LFREEPATH(path); 1200 return (error); 1201 } 1202 #endif 1203 1204 int 1205 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1206 { 1207 char *path; 1208 int error, dfd; 1209 enum uio_seg seg; 1210 bool convpath; 1211 1212 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1213 1214 convpath = LUSECONVPATH(td); 1215 if (!convpath) { 1216 path = __DECONST(char *, args->filename); 1217 seg = UIO_USERSPACE; 1218 } else { 1219 LCONVPATHCREAT_AT(args->filename, &path, dfd); 1220 seg = UIO_SYSSPACE; 1221 } 1222 1223 switch (args->mode & S_IFMT) { 1224 case S_IFIFO: 1225 case S_IFSOCK: 1226 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1227 break; 1228 1229 case S_IFCHR: 1230 case S_IFBLK: 1231 error = kern_mknodat(td, dfd, path, seg, args->mode, 1232 args->dev); 1233 break; 1234 1235 case S_IFDIR: 1236 error = EPERM; 1237 break; 1238 1239 case 0: 1240 args->mode |= S_IFREG; 1241 /* FALLTHROUGH */ 1242 case S_IFREG: 1243 error = kern_openat(td, dfd, path, seg, 1244 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1245 if (error == 0) 1246 kern_close(td, td->td_retval[0]); 1247 break; 1248 1249 default: 1250 error = EINVAL; 1251 break; 1252 } 1253 if (convpath) 1254 LFREEPATH(path); 1255 return (error); 1256 } 1257 1258 /* 1259 * UGH! This is just about the dumbest idea I've ever heard!! 1260 */ 1261 int 1262 linux_personality(struct thread *td, struct linux_personality_args *args) 1263 { 1264 struct linux_pemuldata *pem; 1265 struct proc *p = td->td_proc; 1266 uint32_t old; 1267 1268 PROC_LOCK(p); 1269 pem = pem_find(p); 1270 old = pem->persona; 1271 if (args->per != 0xffffffff) 1272 pem->persona = args->per; 1273 PROC_UNLOCK(p); 1274 1275 td->td_retval[0] = old; 1276 return (0); 1277 } 1278 1279 struct l_itimerval { 1280 l_timeval it_interval; 1281 l_timeval it_value; 1282 }; 1283 1284 #define B2L_ITIMERVAL(bip, lip) \ 1285 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1286 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1287 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1288 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1289 1290 int 1291 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1292 { 1293 int error; 1294 struct l_itimerval ls; 1295 struct itimerval aitv, oitv; 1296 1297 if (uap->itv == NULL) { 1298 uap->itv = uap->oitv; 1299 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1300 } 1301 1302 error = copyin(uap->itv, &ls, sizeof(ls)); 1303 if (error != 0) 1304 return (error); 1305 B2L_ITIMERVAL(&aitv, &ls); 1306 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1307 if (error != 0 || uap->oitv == NULL) 1308 return (error); 1309 B2L_ITIMERVAL(&ls, &oitv); 1310 1311 return (copyout(&ls, uap->oitv, sizeof(ls))); 1312 } 1313 1314 int 1315 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1316 { 1317 int error; 1318 struct l_itimerval ls; 1319 struct itimerval aitv; 1320 1321 error = kern_getitimer(td, uap->which, &aitv); 1322 if (error != 0) 1323 return (error); 1324 B2L_ITIMERVAL(&ls, &aitv); 1325 return (copyout(&ls, uap->itv, sizeof(ls))); 1326 } 1327 1328 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1329 int 1330 linux_nice(struct thread *td, struct linux_nice_args *args) 1331 { 1332 1333 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1334 } 1335 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1336 1337 int 1338 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1339 { 1340 struct ucred *newcred, *oldcred; 1341 l_gid_t *linux_gidset; 1342 gid_t *bsd_gidset; 1343 int ngrp, error; 1344 struct proc *p; 1345 1346 ngrp = args->gidsetsize; 1347 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1348 return (EINVAL); 1349 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1350 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1351 if (error) 1352 goto out; 1353 newcred = crget(); 1354 crextend(newcred, ngrp + 1); 1355 p = td->td_proc; 1356 PROC_LOCK(p); 1357 oldcred = p->p_ucred; 1358 crcopy(newcred, oldcred); 1359 1360 /* 1361 * cr_groups[0] holds egid. Setting the whole set from 1362 * the supplied set will cause egid to be changed too. 1363 * Keep cr_groups[0] unchanged to prevent that. 1364 */ 1365 1366 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1367 PROC_UNLOCK(p); 1368 crfree(newcred); 1369 goto out; 1370 } 1371 1372 if (ngrp > 0) { 1373 newcred->cr_ngroups = ngrp + 1; 1374 1375 bsd_gidset = newcred->cr_groups; 1376 ngrp--; 1377 while (ngrp >= 0) { 1378 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1379 ngrp--; 1380 } 1381 } else 1382 newcred->cr_ngroups = 1; 1383 1384 setsugid(p); 1385 proc_set_cred(p, newcred); 1386 PROC_UNLOCK(p); 1387 crfree(oldcred); 1388 error = 0; 1389 out: 1390 free(linux_gidset, M_LINUX); 1391 return (error); 1392 } 1393 1394 int 1395 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1396 { 1397 struct ucred *cred; 1398 l_gid_t *linux_gidset; 1399 gid_t *bsd_gidset; 1400 int bsd_gidsetsz, ngrp, error; 1401 1402 cred = td->td_ucred; 1403 bsd_gidset = cred->cr_groups; 1404 bsd_gidsetsz = cred->cr_ngroups - 1; 1405 1406 /* 1407 * cr_groups[0] holds egid. Returning the whole set 1408 * here will cause a duplicate. Exclude cr_groups[0] 1409 * to prevent that. 1410 */ 1411 1412 if ((ngrp = args->gidsetsize) == 0) { 1413 td->td_retval[0] = bsd_gidsetsz; 1414 return (0); 1415 } 1416 1417 if (ngrp < bsd_gidsetsz) 1418 return (EINVAL); 1419 1420 ngrp = 0; 1421 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1422 M_LINUX, M_WAITOK); 1423 while (ngrp < bsd_gidsetsz) { 1424 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1425 ngrp++; 1426 } 1427 1428 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1429 free(linux_gidset, M_LINUX); 1430 if (error) 1431 return (error); 1432 1433 td->td_retval[0] = ngrp; 1434 return (0); 1435 } 1436 1437 static bool 1438 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1439 { 1440 1441 if (linux_dummy_rlimits == 0) 1442 return (false); 1443 1444 switch (resource) { 1445 case LINUX_RLIMIT_LOCKS: 1446 case LINUX_RLIMIT_SIGPENDING: 1447 case LINUX_RLIMIT_MSGQUEUE: 1448 case LINUX_RLIMIT_RTTIME: 1449 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1450 rlim->rlim_max = LINUX_RLIM_INFINITY; 1451 return (true); 1452 case LINUX_RLIMIT_NICE: 1453 case LINUX_RLIMIT_RTPRIO: 1454 rlim->rlim_cur = 0; 1455 rlim->rlim_max = 0; 1456 return (true); 1457 default: 1458 return (false); 1459 } 1460 } 1461 1462 int 1463 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1464 { 1465 struct rlimit bsd_rlim; 1466 struct l_rlimit rlim; 1467 u_int which; 1468 int error; 1469 1470 if (args->resource >= LINUX_RLIM_NLIMITS) 1471 return (EINVAL); 1472 1473 which = linux_to_bsd_resource[args->resource]; 1474 if (which == -1) 1475 return (EINVAL); 1476 1477 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1478 if (error) 1479 return (error); 1480 1481 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1482 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1483 return (kern_setrlimit(td, which, &bsd_rlim)); 1484 } 1485 1486 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1487 int 1488 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1489 { 1490 struct l_rlimit rlim; 1491 struct rlimit bsd_rlim; 1492 u_int which; 1493 1494 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1495 rlim.rlim_cur = bsd_rlim.rlim_cur; 1496 rlim.rlim_max = bsd_rlim.rlim_max; 1497 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1498 } 1499 1500 if (args->resource >= LINUX_RLIM_NLIMITS) 1501 return (EINVAL); 1502 1503 which = linux_to_bsd_resource[args->resource]; 1504 if (which == -1) 1505 return (EINVAL); 1506 1507 lim_rlimit(td, which, &bsd_rlim); 1508 1509 #ifdef COMPAT_LINUX32 1510 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1511 if (rlim.rlim_cur == UINT_MAX) 1512 rlim.rlim_cur = INT_MAX; 1513 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1514 if (rlim.rlim_max == UINT_MAX) 1515 rlim.rlim_max = INT_MAX; 1516 #else 1517 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1518 if (rlim.rlim_cur == ULONG_MAX) 1519 rlim.rlim_cur = LONG_MAX; 1520 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1521 if (rlim.rlim_max == ULONG_MAX) 1522 rlim.rlim_max = LONG_MAX; 1523 #endif 1524 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1525 } 1526 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1527 1528 int 1529 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1530 { 1531 struct l_rlimit rlim; 1532 struct rlimit bsd_rlim; 1533 u_int which; 1534 1535 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1536 rlim.rlim_cur = bsd_rlim.rlim_cur; 1537 rlim.rlim_max = bsd_rlim.rlim_max; 1538 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1539 } 1540 1541 if (args->resource >= LINUX_RLIM_NLIMITS) 1542 return (EINVAL); 1543 1544 which = linux_to_bsd_resource[args->resource]; 1545 if (which == -1) 1546 return (EINVAL); 1547 1548 lim_rlimit(td, which, &bsd_rlim); 1549 1550 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1551 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1552 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1553 } 1554 1555 int 1556 linux_sched_setscheduler(struct thread *td, 1557 struct linux_sched_setscheduler_args *args) 1558 { 1559 struct sched_param sched_param; 1560 struct thread *tdt; 1561 int error, policy; 1562 1563 switch (args->policy) { 1564 case LINUX_SCHED_OTHER: 1565 policy = SCHED_OTHER; 1566 break; 1567 case LINUX_SCHED_FIFO: 1568 policy = SCHED_FIFO; 1569 break; 1570 case LINUX_SCHED_RR: 1571 policy = SCHED_RR; 1572 break; 1573 default: 1574 return (EINVAL); 1575 } 1576 1577 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1578 if (error) 1579 return (error); 1580 1581 if (linux_map_sched_prio) { 1582 switch (policy) { 1583 case SCHED_OTHER: 1584 if (sched_param.sched_priority != 0) 1585 return (EINVAL); 1586 1587 sched_param.sched_priority = 1588 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1589 break; 1590 case SCHED_FIFO: 1591 case SCHED_RR: 1592 if (sched_param.sched_priority < 1 || 1593 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1594 return (EINVAL); 1595 1596 /* 1597 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1598 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1599 */ 1600 sched_param.sched_priority = 1601 (sched_param.sched_priority - 1) * 1602 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1603 (LINUX_MAX_RT_PRIO - 1); 1604 break; 1605 } 1606 } 1607 1608 tdt = linux_tdfind(td, args->pid, -1); 1609 if (tdt == NULL) 1610 return (ESRCH); 1611 1612 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1613 PROC_UNLOCK(tdt->td_proc); 1614 return (error); 1615 } 1616 1617 int 1618 linux_sched_getscheduler(struct thread *td, 1619 struct linux_sched_getscheduler_args *args) 1620 { 1621 struct thread *tdt; 1622 int error, policy; 1623 1624 tdt = linux_tdfind(td, args->pid, -1); 1625 if (tdt == NULL) 1626 return (ESRCH); 1627 1628 error = kern_sched_getscheduler(td, tdt, &policy); 1629 PROC_UNLOCK(tdt->td_proc); 1630 1631 switch (policy) { 1632 case SCHED_OTHER: 1633 td->td_retval[0] = LINUX_SCHED_OTHER; 1634 break; 1635 case SCHED_FIFO: 1636 td->td_retval[0] = LINUX_SCHED_FIFO; 1637 break; 1638 case SCHED_RR: 1639 td->td_retval[0] = LINUX_SCHED_RR; 1640 break; 1641 } 1642 return (error); 1643 } 1644 1645 int 1646 linux_sched_get_priority_max(struct thread *td, 1647 struct linux_sched_get_priority_max_args *args) 1648 { 1649 struct sched_get_priority_max_args bsd; 1650 1651 if (linux_map_sched_prio) { 1652 switch (args->policy) { 1653 case LINUX_SCHED_OTHER: 1654 td->td_retval[0] = 0; 1655 return (0); 1656 case LINUX_SCHED_FIFO: 1657 case LINUX_SCHED_RR: 1658 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1659 return (0); 1660 default: 1661 return (EINVAL); 1662 } 1663 } 1664 1665 switch (args->policy) { 1666 case LINUX_SCHED_OTHER: 1667 bsd.policy = SCHED_OTHER; 1668 break; 1669 case LINUX_SCHED_FIFO: 1670 bsd.policy = SCHED_FIFO; 1671 break; 1672 case LINUX_SCHED_RR: 1673 bsd.policy = SCHED_RR; 1674 break; 1675 default: 1676 return (EINVAL); 1677 } 1678 return (sys_sched_get_priority_max(td, &bsd)); 1679 } 1680 1681 int 1682 linux_sched_get_priority_min(struct thread *td, 1683 struct linux_sched_get_priority_min_args *args) 1684 { 1685 struct sched_get_priority_min_args bsd; 1686 1687 if (linux_map_sched_prio) { 1688 switch (args->policy) { 1689 case LINUX_SCHED_OTHER: 1690 td->td_retval[0] = 0; 1691 return (0); 1692 case LINUX_SCHED_FIFO: 1693 case LINUX_SCHED_RR: 1694 td->td_retval[0] = 1; 1695 return (0); 1696 default: 1697 return (EINVAL); 1698 } 1699 } 1700 1701 switch (args->policy) { 1702 case LINUX_SCHED_OTHER: 1703 bsd.policy = SCHED_OTHER; 1704 break; 1705 case LINUX_SCHED_FIFO: 1706 bsd.policy = SCHED_FIFO; 1707 break; 1708 case LINUX_SCHED_RR: 1709 bsd.policy = SCHED_RR; 1710 break; 1711 default: 1712 return (EINVAL); 1713 } 1714 return (sys_sched_get_priority_min(td, &bsd)); 1715 } 1716 1717 #define REBOOT_CAD_ON 0x89abcdef 1718 #define REBOOT_CAD_OFF 0 1719 #define REBOOT_HALT 0xcdef0123 1720 #define REBOOT_RESTART 0x01234567 1721 #define REBOOT_RESTART2 0xA1B2C3D4 1722 #define REBOOT_POWEROFF 0x4321FEDC 1723 #define REBOOT_MAGIC1 0xfee1dead 1724 #define REBOOT_MAGIC2 0x28121969 1725 #define REBOOT_MAGIC2A 0x05121996 1726 #define REBOOT_MAGIC2B 0x16041998 1727 1728 int 1729 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1730 { 1731 struct reboot_args bsd_args; 1732 1733 if (args->magic1 != REBOOT_MAGIC1) 1734 return (EINVAL); 1735 1736 switch (args->magic2) { 1737 case REBOOT_MAGIC2: 1738 case REBOOT_MAGIC2A: 1739 case REBOOT_MAGIC2B: 1740 break; 1741 default: 1742 return (EINVAL); 1743 } 1744 1745 switch (args->cmd) { 1746 case REBOOT_CAD_ON: 1747 case REBOOT_CAD_OFF: 1748 return (priv_check(td, PRIV_REBOOT)); 1749 case REBOOT_HALT: 1750 bsd_args.opt = RB_HALT; 1751 break; 1752 case REBOOT_RESTART: 1753 case REBOOT_RESTART2: 1754 bsd_args.opt = 0; 1755 break; 1756 case REBOOT_POWEROFF: 1757 bsd_args.opt = RB_POWEROFF; 1758 break; 1759 default: 1760 return (EINVAL); 1761 } 1762 return (sys_reboot(td, &bsd_args)); 1763 } 1764 1765 int 1766 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1767 { 1768 1769 td->td_retval[0] = td->td_proc->p_pid; 1770 1771 return (0); 1772 } 1773 1774 int 1775 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1776 { 1777 struct linux_emuldata *em; 1778 1779 em = em_find(td); 1780 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1781 1782 td->td_retval[0] = em->em_tid; 1783 1784 return (0); 1785 } 1786 1787 int 1788 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1789 { 1790 1791 td->td_retval[0] = kern_getppid(td); 1792 return (0); 1793 } 1794 1795 int 1796 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1797 { 1798 1799 td->td_retval[0] = td->td_ucred->cr_rgid; 1800 return (0); 1801 } 1802 1803 int 1804 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1805 { 1806 1807 td->td_retval[0] = td->td_ucred->cr_ruid; 1808 return (0); 1809 } 1810 1811 int 1812 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1813 { 1814 1815 return (kern_getsid(td, args->pid)); 1816 } 1817 1818 int 1819 linux_nosys(struct thread *td, struct nosys_args *ignore) 1820 { 1821 1822 return (ENOSYS); 1823 } 1824 1825 int 1826 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1827 { 1828 int error; 1829 1830 error = kern_getpriority(td, args->which, args->who); 1831 td->td_retval[0] = 20 - td->td_retval[0]; 1832 return (error); 1833 } 1834 1835 int 1836 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1837 { 1838 int name[2]; 1839 1840 name[0] = CTL_KERN; 1841 name[1] = KERN_HOSTNAME; 1842 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1843 args->len, 0, 0)); 1844 } 1845 1846 int 1847 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1848 { 1849 int name[2]; 1850 1851 name[0] = CTL_KERN; 1852 name[1] = KERN_NISDOMAINNAME; 1853 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1854 args->len, 0, 0)); 1855 } 1856 1857 int 1858 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1859 { 1860 1861 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1862 args->error_code); 1863 1864 /* 1865 * XXX: we should send a signal to the parent if 1866 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1867 * as it doesnt occur often. 1868 */ 1869 exit1(td, args->error_code, 0); 1870 /* NOTREACHED */ 1871 } 1872 1873 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1874 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1875 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1876 1877 struct l_user_cap_header { 1878 l_int version; 1879 l_int pid; 1880 }; 1881 1882 struct l_user_cap_data { 1883 l_int effective; 1884 l_int permitted; 1885 l_int inheritable; 1886 }; 1887 1888 int 1889 linux_capget(struct thread *td, struct linux_capget_args *uap) 1890 { 1891 struct l_user_cap_header luch; 1892 struct l_user_cap_data lucd[2]; 1893 int error, u32s; 1894 1895 if (uap->hdrp == NULL) 1896 return (EFAULT); 1897 1898 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1899 if (error != 0) 1900 return (error); 1901 1902 switch (luch.version) { 1903 case _LINUX_CAPABILITY_VERSION_1: 1904 u32s = 1; 1905 break; 1906 case _LINUX_CAPABILITY_VERSION_2: 1907 case _LINUX_CAPABILITY_VERSION_3: 1908 u32s = 2; 1909 break; 1910 default: 1911 luch.version = _LINUX_CAPABILITY_VERSION_1; 1912 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1913 if (error) 1914 return (error); 1915 return (EINVAL); 1916 } 1917 1918 if (luch.pid) 1919 return (EPERM); 1920 1921 if (uap->datap) { 1922 /* 1923 * The current implementation doesn't support setting 1924 * a capability (it's essentially a stub) so indicate 1925 * that no capabilities are currently set or available 1926 * to request. 1927 */ 1928 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1929 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1930 } 1931 1932 return (error); 1933 } 1934 1935 int 1936 linux_capset(struct thread *td, struct linux_capset_args *uap) 1937 { 1938 struct l_user_cap_header luch; 1939 struct l_user_cap_data lucd[2]; 1940 int error, i, u32s; 1941 1942 if (uap->hdrp == NULL || uap->datap == NULL) 1943 return (EFAULT); 1944 1945 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1946 if (error != 0) 1947 return (error); 1948 1949 switch (luch.version) { 1950 case _LINUX_CAPABILITY_VERSION_1: 1951 u32s = 1; 1952 break; 1953 case _LINUX_CAPABILITY_VERSION_2: 1954 case _LINUX_CAPABILITY_VERSION_3: 1955 u32s = 2; 1956 break; 1957 default: 1958 luch.version = _LINUX_CAPABILITY_VERSION_1; 1959 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1960 if (error) 1961 return (error); 1962 return (EINVAL); 1963 } 1964 1965 if (luch.pid) 1966 return (EPERM); 1967 1968 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1969 if (error != 0) 1970 return (error); 1971 1972 /* We currently don't support setting any capabilities. */ 1973 for (i = 0; i < u32s; i++) { 1974 if (lucd[i].effective || lucd[i].permitted || 1975 lucd[i].inheritable) { 1976 linux_msg(td, 1977 "capset[%d] effective=0x%x, permitted=0x%x, " 1978 "inheritable=0x%x is not implemented", i, 1979 (int)lucd[i].effective, (int)lucd[i].permitted, 1980 (int)lucd[i].inheritable); 1981 return (EPERM); 1982 } 1983 } 1984 1985 return (0); 1986 } 1987 1988 int 1989 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1990 { 1991 int error = 0, max_size, arg; 1992 struct proc *p = td->td_proc; 1993 char comm[LINUX_MAX_COMM_LEN]; 1994 int pdeath_signal, trace_state; 1995 1996 switch (args->option) { 1997 case LINUX_PR_SET_PDEATHSIG: 1998 if (!LINUX_SIG_VALID(args->arg2)) 1999 return (EINVAL); 2000 pdeath_signal = linux_to_bsd_signal(args->arg2); 2001 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 2002 &pdeath_signal)); 2003 case LINUX_PR_GET_PDEATHSIG: 2004 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 2005 &pdeath_signal); 2006 if (error != 0) 2007 return (error); 2008 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 2009 return (copyout(&pdeath_signal, 2010 (void *)(register_t)args->arg2, 2011 sizeof(pdeath_signal))); 2012 /* 2013 * In Linux, this flag controls if set[gu]id processes can coredump. 2014 * There are additional semantics imposed on processes that cannot 2015 * coredump: 2016 * - Such processes can not be ptraced. 2017 * - There are some semantics around ownership of process-related files 2018 * in the /proc namespace. 2019 * 2020 * In FreeBSD, we can (and by default, do) disable setuid coredump 2021 * system-wide with 'sugid_coredump.' We control tracability on a 2022 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 2023 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 2024 * procctl is roughly analogous to Linux's DUMPABLE. 2025 * 2026 * So, proxy these knobs to the corresponding PROC_TRACE setting. 2027 */ 2028 case LINUX_PR_GET_DUMPABLE: 2029 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 2030 &trace_state); 2031 if (error != 0) 2032 return (error); 2033 td->td_retval[0] = (trace_state != -1); 2034 return (0); 2035 case LINUX_PR_SET_DUMPABLE: 2036 /* 2037 * It is only valid for userspace to set one of these two 2038 * flags, and only one at a time. 2039 */ 2040 switch (args->arg2) { 2041 case LINUX_SUID_DUMP_DISABLE: 2042 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 2043 break; 2044 case LINUX_SUID_DUMP_USER: 2045 trace_state = PROC_TRACE_CTL_ENABLE; 2046 break; 2047 default: 2048 return (EINVAL); 2049 } 2050 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 2051 &trace_state)); 2052 case LINUX_PR_GET_KEEPCAPS: 2053 /* 2054 * Indicate that we always clear the effective and 2055 * permitted capability sets when the user id becomes 2056 * non-zero (actually the capability sets are simply 2057 * always zero in the current implementation). 2058 */ 2059 td->td_retval[0] = 0; 2060 break; 2061 case LINUX_PR_SET_KEEPCAPS: 2062 /* 2063 * Ignore requests to keep the effective and permitted 2064 * capability sets when the user id becomes non-zero. 2065 */ 2066 break; 2067 case LINUX_PR_SET_NAME: 2068 /* 2069 * To be on the safe side we need to make sure to not 2070 * overflow the size a Linux program expects. We already 2071 * do this here in the copyin, so that we don't need to 2072 * check on copyout. 2073 */ 2074 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2075 error = copyinstr((void *)(register_t)args->arg2, comm, 2076 max_size, NULL); 2077 2078 /* Linux silently truncates the name if it is too long. */ 2079 if (error == ENAMETOOLONG) { 2080 /* 2081 * XXX: copyinstr() isn't documented to populate the 2082 * array completely, so do a copyin() to be on the 2083 * safe side. This should be changed in case 2084 * copyinstr() is changed to guarantee this. 2085 */ 2086 error = copyin((void *)(register_t)args->arg2, comm, 2087 max_size - 1); 2088 comm[max_size - 1] = '\0'; 2089 } 2090 if (error) 2091 return (error); 2092 2093 PROC_LOCK(p); 2094 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2095 PROC_UNLOCK(p); 2096 break; 2097 case LINUX_PR_GET_NAME: 2098 PROC_LOCK(p); 2099 strlcpy(comm, p->p_comm, sizeof(comm)); 2100 PROC_UNLOCK(p); 2101 error = copyout(comm, (void *)(register_t)args->arg2, 2102 strlen(comm) + 1); 2103 break; 2104 case LINUX_PR_GET_SECCOMP: 2105 case LINUX_PR_SET_SECCOMP: 2106 /* 2107 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2108 */ 2109 error = EINVAL; 2110 break; 2111 case LINUX_PR_CAPBSET_READ: 2112 #if 0 2113 /* 2114 * This makes too much noise with Ubuntu Focal. 2115 */ 2116 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2117 (int)args->arg2); 2118 #endif 2119 error = EINVAL; 2120 break; 2121 case LINUX_PR_SET_NO_NEW_PRIVS: 2122 arg = args->arg2 == 1 ? 2123 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 2124 error = kern_procctl(td, P_PID, p->p_pid, 2125 PROC_NO_NEW_PRIVS_CTL, &arg); 2126 break; 2127 case LINUX_PR_SET_PTRACER: 2128 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2129 error = EINVAL; 2130 break; 2131 default: 2132 linux_msg(td, "unsupported prctl option %d", args->option); 2133 error = EINVAL; 2134 break; 2135 } 2136 2137 return (error); 2138 } 2139 2140 int 2141 linux_sched_setparam(struct thread *td, 2142 struct linux_sched_setparam_args *uap) 2143 { 2144 struct sched_param sched_param; 2145 struct thread *tdt; 2146 int error, policy; 2147 2148 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2149 if (error) 2150 return (error); 2151 2152 tdt = linux_tdfind(td, uap->pid, -1); 2153 if (tdt == NULL) 2154 return (ESRCH); 2155 2156 if (linux_map_sched_prio) { 2157 error = kern_sched_getscheduler(td, tdt, &policy); 2158 if (error) 2159 goto out; 2160 2161 switch (policy) { 2162 case SCHED_OTHER: 2163 if (sched_param.sched_priority != 0) { 2164 error = EINVAL; 2165 goto out; 2166 } 2167 sched_param.sched_priority = 2168 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2169 break; 2170 case SCHED_FIFO: 2171 case SCHED_RR: 2172 if (sched_param.sched_priority < 1 || 2173 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2174 error = EINVAL; 2175 goto out; 2176 } 2177 /* 2178 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2179 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2180 */ 2181 sched_param.sched_priority = 2182 (sched_param.sched_priority - 1) * 2183 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2184 (LINUX_MAX_RT_PRIO - 1); 2185 break; 2186 } 2187 } 2188 2189 error = kern_sched_setparam(td, tdt, &sched_param); 2190 out: PROC_UNLOCK(tdt->td_proc); 2191 return (error); 2192 } 2193 2194 int 2195 linux_sched_getparam(struct thread *td, 2196 struct linux_sched_getparam_args *uap) 2197 { 2198 struct sched_param sched_param; 2199 struct thread *tdt; 2200 int error, policy; 2201 2202 tdt = linux_tdfind(td, uap->pid, -1); 2203 if (tdt == NULL) 2204 return (ESRCH); 2205 2206 error = kern_sched_getparam(td, tdt, &sched_param); 2207 if (error) { 2208 PROC_UNLOCK(tdt->td_proc); 2209 return (error); 2210 } 2211 2212 if (linux_map_sched_prio) { 2213 error = kern_sched_getscheduler(td, tdt, &policy); 2214 PROC_UNLOCK(tdt->td_proc); 2215 if (error) 2216 return (error); 2217 2218 switch (policy) { 2219 case SCHED_OTHER: 2220 sched_param.sched_priority = 0; 2221 break; 2222 case SCHED_FIFO: 2223 case SCHED_RR: 2224 /* 2225 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2226 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2227 */ 2228 sched_param.sched_priority = 2229 (sched_param.sched_priority * 2230 (LINUX_MAX_RT_PRIO - 1) + 2231 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2232 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2233 break; 2234 } 2235 } else 2236 PROC_UNLOCK(tdt->td_proc); 2237 2238 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2239 return (error); 2240 } 2241 2242 /* 2243 * Get affinity of a process. 2244 */ 2245 int 2246 linux_sched_getaffinity(struct thread *td, 2247 struct linux_sched_getaffinity_args *args) 2248 { 2249 struct thread *tdt; 2250 int error; 2251 id_t tid; 2252 2253 tdt = linux_tdfind(td, args->pid, -1); 2254 if (tdt == NULL) 2255 return (ESRCH); 2256 tid = tdt->td_tid; 2257 PROC_UNLOCK(tdt->td_proc); 2258 2259 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2260 tid, args->len, (cpuset_t *)args->user_mask_ptr); 2261 if (error == ERANGE) 2262 error = EINVAL; 2263 if (error == 0) 2264 td->td_retval[0] = min(args->len, sizeof(cpuset_t)); 2265 2266 return (error); 2267 } 2268 2269 /* 2270 * Set affinity of a process. 2271 */ 2272 int 2273 linux_sched_setaffinity(struct thread *td, 2274 struct linux_sched_setaffinity_args *args) 2275 { 2276 struct thread *tdt; 2277 cpuset_t *mask; 2278 int cpu, error; 2279 size_t len; 2280 id_t tid; 2281 2282 tdt = linux_tdfind(td, args->pid, -1); 2283 if (tdt == NULL) 2284 return (ESRCH); 2285 tid = tdt->td_tid; 2286 PROC_UNLOCK(tdt->td_proc); 2287 2288 len = min(args->len, sizeof(cpuset_t)); 2289 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 2290 error = copyin(args->user_mask_ptr, mask, len); 2291 if (error != 0) 2292 goto out; 2293 /* Linux ignore high bits */ 2294 CPU_FOREACH_ISSET(cpu, mask) 2295 if (cpu > mp_maxid) 2296 CPU_CLR(cpu, mask); 2297 2298 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2299 tid, mask); 2300 if (error == EDEADLK) 2301 error = EINVAL; 2302 out: 2303 free(mask, M_TEMP); 2304 return (error); 2305 } 2306 2307 struct linux_rlimit64 { 2308 uint64_t rlim_cur; 2309 uint64_t rlim_max; 2310 }; 2311 2312 int 2313 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2314 { 2315 struct rlimit rlim, nrlim; 2316 struct linux_rlimit64 lrlim; 2317 struct proc *p; 2318 u_int which; 2319 int flags; 2320 int error; 2321 2322 if (args->new == NULL && args->old != NULL) { 2323 if (linux_get_dummy_limit(args->resource, &rlim)) { 2324 lrlim.rlim_cur = rlim.rlim_cur; 2325 lrlim.rlim_max = rlim.rlim_max; 2326 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2327 } 2328 } 2329 2330 if (args->resource >= LINUX_RLIM_NLIMITS) 2331 return (EINVAL); 2332 2333 which = linux_to_bsd_resource[args->resource]; 2334 if (which == -1) 2335 return (EINVAL); 2336 2337 if (args->new != NULL) { 2338 /* 2339 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2340 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2341 * as INFINITY so we do not need a conversion even. 2342 */ 2343 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2344 if (error != 0) 2345 return (error); 2346 } 2347 2348 flags = PGET_HOLD | PGET_NOTWEXIT; 2349 if (args->new != NULL) 2350 flags |= PGET_CANDEBUG; 2351 else 2352 flags |= PGET_CANSEE; 2353 if (args->pid == 0) { 2354 p = td->td_proc; 2355 PHOLD(p); 2356 } else { 2357 error = pget(args->pid, flags, &p); 2358 if (error != 0) 2359 return (error); 2360 } 2361 if (args->old != NULL) { 2362 PROC_LOCK(p); 2363 lim_rlimit_proc(p, which, &rlim); 2364 PROC_UNLOCK(p); 2365 if (rlim.rlim_cur == RLIM_INFINITY) 2366 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2367 else 2368 lrlim.rlim_cur = rlim.rlim_cur; 2369 if (rlim.rlim_max == RLIM_INFINITY) 2370 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2371 else 2372 lrlim.rlim_max = rlim.rlim_max; 2373 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2374 if (error != 0) 2375 goto out; 2376 } 2377 2378 if (args->new != NULL) 2379 error = kern_proc_setrlimit(td, p, which, &nrlim); 2380 2381 out: 2382 PRELE(p); 2383 return (error); 2384 } 2385 2386 int 2387 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2388 { 2389 struct timespec ts, *tsp; 2390 int error; 2391 2392 if (args->tsp != NULL) { 2393 error = linux_get_timespec(&ts, args->tsp); 2394 if (error != 0) 2395 return (error); 2396 tsp = &ts; 2397 } else 2398 tsp = NULL; 2399 2400 error = linux_common_pselect6(td, args->nfds, args->readfds, 2401 args->writefds, args->exceptfds, tsp, args->sig); 2402 2403 if (args->tsp != NULL) 2404 linux_put_timespec(&ts, args->tsp); 2405 return (error); 2406 } 2407 2408 static int 2409 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2410 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2411 l_uintptr_t *sig) 2412 { 2413 struct timeval utv, tv0, tv1, *tvp; 2414 struct l_pselect6arg lpse6; 2415 l_sigset_t l_ss; 2416 sigset_t *ssp; 2417 sigset_t ss; 2418 int error; 2419 2420 ssp = NULL; 2421 if (sig != NULL) { 2422 error = copyin(sig, &lpse6, sizeof(lpse6)); 2423 if (error != 0) 2424 return (error); 2425 if (lpse6.ss_len != sizeof(l_ss)) 2426 return (EINVAL); 2427 if (lpse6.ss != 0) { 2428 error = copyin(PTRIN(lpse6.ss), &l_ss, 2429 sizeof(l_ss)); 2430 if (error != 0) 2431 return (error); 2432 linux_to_bsd_sigset(&l_ss, &ss); 2433 ssp = &ss; 2434 } 2435 } else 2436 ssp = NULL; 2437 2438 /* 2439 * Currently glibc changes nanosecond number to microsecond. 2440 * This mean losing precision but for now it is hardly seen. 2441 */ 2442 if (tsp != NULL) { 2443 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2444 if (itimerfix(&utv)) 2445 return (EINVAL); 2446 2447 microtime(&tv0); 2448 tvp = &utv; 2449 } else 2450 tvp = NULL; 2451 2452 error = kern_pselect(td, nfds, readfds, writefds, 2453 exceptfds, tvp, ssp, LINUX_NFDBITS); 2454 2455 if (tsp != NULL) { 2456 /* 2457 * Compute how much time was left of the timeout, 2458 * by subtracting the current time and the time 2459 * before we started the call, and subtracting 2460 * that result from the user-supplied value. 2461 */ 2462 microtime(&tv1); 2463 timevalsub(&tv1, &tv0); 2464 timevalsub(&utv, &tv1); 2465 if (utv.tv_sec < 0) 2466 timevalclear(&utv); 2467 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2468 } 2469 return (error); 2470 } 2471 2472 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2473 int 2474 linux_pselect6_time64(struct thread *td, 2475 struct linux_pselect6_time64_args *args) 2476 { 2477 struct timespec ts, *tsp; 2478 int error; 2479 2480 if (args->tsp != NULL) { 2481 error = linux_get_timespec64(&ts, args->tsp); 2482 if (error != 0) 2483 return (error); 2484 tsp = &ts; 2485 } else 2486 tsp = NULL; 2487 2488 error = linux_common_pselect6(td, args->nfds, args->readfds, 2489 args->writefds, args->exceptfds, tsp, args->sig); 2490 2491 if (args->tsp != NULL) 2492 linux_put_timespec64(&ts, args->tsp); 2493 return (error); 2494 } 2495 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2496 2497 int 2498 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2499 { 2500 struct timespec uts, *tsp; 2501 int error; 2502 2503 if (args->tsp != NULL) { 2504 error = linux_get_timespec(&uts, args->tsp); 2505 if (error != 0) 2506 return (error); 2507 tsp = &uts; 2508 } else 2509 tsp = NULL; 2510 2511 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2512 args->sset, args->ssize); 2513 if (error == 0 && args->tsp != NULL) 2514 error = linux_put_timespec(&uts, args->tsp); 2515 return (error); 2516 } 2517 2518 static int 2519 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2520 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2521 { 2522 struct timespec ts0, ts1; 2523 struct pollfd stackfds[32]; 2524 struct pollfd *kfds; 2525 l_sigset_t l_ss; 2526 sigset_t *ssp; 2527 sigset_t ss; 2528 int error; 2529 2530 if (kern_poll_maxfds(nfds)) 2531 return (EINVAL); 2532 if (sset != NULL) { 2533 if (ssize != sizeof(l_ss)) 2534 return (EINVAL); 2535 error = copyin(sset, &l_ss, sizeof(l_ss)); 2536 if (error) 2537 return (error); 2538 linux_to_bsd_sigset(&l_ss, &ss); 2539 ssp = &ss; 2540 } else 2541 ssp = NULL; 2542 if (tsp != NULL) 2543 nanotime(&ts0); 2544 2545 if (nfds > nitems(stackfds)) 2546 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2547 else 2548 kfds = stackfds; 2549 error = linux_pollin(td, kfds, fds, nfds); 2550 if (error != 0) 2551 goto out; 2552 2553 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2554 if (error == 0) 2555 error = linux_pollout(td, kfds, fds, nfds); 2556 2557 if (error == 0 && tsp != NULL) { 2558 if (td->td_retval[0]) { 2559 nanotime(&ts1); 2560 timespecsub(&ts1, &ts0, &ts1); 2561 timespecsub(tsp, &ts1, tsp); 2562 if (tsp->tv_sec < 0) 2563 timespecclear(tsp); 2564 } else 2565 timespecclear(tsp); 2566 } 2567 2568 out: 2569 if (nfds > nitems(stackfds)) 2570 free(kfds, M_TEMP); 2571 return (error); 2572 } 2573 2574 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2575 int 2576 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2577 { 2578 struct timespec uts, *tsp; 2579 int error; 2580 2581 if (args->tsp != NULL) { 2582 error = linux_get_timespec64(&uts, args->tsp); 2583 if (error != 0) 2584 return (error); 2585 tsp = &uts; 2586 } else 2587 tsp = NULL; 2588 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2589 args->sset, args->ssize); 2590 if (error == 0 && args->tsp != NULL) 2591 error = linux_put_timespec64(&uts, args->tsp); 2592 return (error); 2593 } 2594 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2595 2596 static int 2597 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2598 { 2599 int error; 2600 u_int i; 2601 2602 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2603 if (error != 0) 2604 return (error); 2605 2606 for (i = 0; i < nfd; i++) { 2607 if (fds->events != 0) 2608 linux_to_bsd_poll_events(td, fds->fd, 2609 fds->events, &fds->events); 2610 fds++; 2611 } 2612 return (0); 2613 } 2614 2615 static int 2616 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2617 { 2618 int error = 0; 2619 u_int i, n = 0; 2620 2621 for (i = 0; i < nfd; i++) { 2622 if (fds->revents != 0) { 2623 bsd_to_linux_poll_events(fds->revents, 2624 &fds->revents); 2625 n++; 2626 } 2627 error = copyout(&fds->revents, &ufds->revents, 2628 sizeof(ufds->revents)); 2629 if (error) 2630 return (error); 2631 fds++; 2632 ufds++; 2633 } 2634 td->td_retval[0] = n; 2635 return (0); 2636 } 2637 2638 static int 2639 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2640 struct timespec *ts) 2641 { 2642 struct thread *tdt; 2643 int error; 2644 2645 /* 2646 * According to man in case the invalid pid specified 2647 * EINVAL should be returned. 2648 */ 2649 if (pid < 0) 2650 return (EINVAL); 2651 2652 tdt = linux_tdfind(td, pid, -1); 2653 if (tdt == NULL) 2654 return (ESRCH); 2655 2656 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2657 PROC_UNLOCK(tdt->td_proc); 2658 return (error); 2659 } 2660 2661 int 2662 linux_sched_rr_get_interval(struct thread *td, 2663 struct linux_sched_rr_get_interval_args *uap) 2664 { 2665 struct timespec ts; 2666 int error; 2667 2668 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2669 if (error != 0) 2670 return (error); 2671 return (linux_put_timespec(&ts, uap->interval)); 2672 } 2673 2674 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2675 int 2676 linux_sched_rr_get_interval_time64(struct thread *td, 2677 struct linux_sched_rr_get_interval_time64_args *uap) 2678 { 2679 struct timespec ts; 2680 int error; 2681 2682 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2683 if (error != 0) 2684 return (error); 2685 return (linux_put_timespec64(&ts, uap->interval)); 2686 } 2687 #endif 2688 2689 /* 2690 * In case when the Linux thread is the initial thread in 2691 * the thread group thread id is equal to the process id. 2692 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2693 */ 2694 struct thread * 2695 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2696 { 2697 struct linux_emuldata *em; 2698 struct thread *tdt; 2699 struct proc *p; 2700 2701 tdt = NULL; 2702 if (tid == 0 || tid == td->td_tid) { 2703 if (pid != -1 && td->td_proc->p_pid != pid) 2704 return (NULL); 2705 PROC_LOCK(td->td_proc); 2706 return (td); 2707 } else if (tid > PID_MAX) 2708 return (tdfind(tid, pid)); 2709 2710 /* 2711 * Initial thread where the tid equal to the pid. 2712 */ 2713 p = pfind(tid); 2714 if (p != NULL) { 2715 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2716 (pid != -1 && tid != pid)) { 2717 /* 2718 * p is not a Linuxulator process. 2719 */ 2720 PROC_UNLOCK(p); 2721 return (NULL); 2722 } 2723 FOREACH_THREAD_IN_PROC(p, tdt) { 2724 em = em_find(tdt); 2725 if (tid == em->em_tid) 2726 return (tdt); 2727 } 2728 PROC_UNLOCK(p); 2729 } 2730 return (NULL); 2731 } 2732 2733 void 2734 linux_to_bsd_waitopts(int options, int *bsdopts) 2735 { 2736 2737 if (options & LINUX_WNOHANG) 2738 *bsdopts |= WNOHANG; 2739 if (options & LINUX_WUNTRACED) 2740 *bsdopts |= WUNTRACED; 2741 if (options & LINUX_WEXITED) 2742 *bsdopts |= WEXITED; 2743 if (options & LINUX_WCONTINUED) 2744 *bsdopts |= WCONTINUED; 2745 if (options & LINUX_WNOWAIT) 2746 *bsdopts |= WNOWAIT; 2747 2748 if (options & __WCLONE) 2749 *bsdopts |= WLINUXCLONE; 2750 } 2751 2752 int 2753 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2754 { 2755 struct uio uio; 2756 struct iovec iov; 2757 int error; 2758 2759 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2760 return (EINVAL); 2761 if (args->count > INT_MAX) 2762 args->count = INT_MAX; 2763 2764 iov.iov_base = args->buf; 2765 iov.iov_len = args->count; 2766 2767 uio.uio_iov = &iov; 2768 uio.uio_iovcnt = 1; 2769 uio.uio_resid = iov.iov_len; 2770 uio.uio_segflg = UIO_USERSPACE; 2771 uio.uio_rw = UIO_READ; 2772 uio.uio_td = td; 2773 2774 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2775 if (error == 0) 2776 td->td_retval[0] = args->count - uio.uio_resid; 2777 return (error); 2778 } 2779 2780 int 2781 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2782 { 2783 2784 /* Needs to be page-aligned */ 2785 if (args->start & PAGE_MASK) 2786 return (EINVAL); 2787 return (kern_mincore(td, args->start, args->len, args->vec)); 2788 } 2789 2790 #define SYSLOG_TAG "<6>" 2791 2792 int 2793 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2794 { 2795 char buf[128], *src, *dst; 2796 u_int seq; 2797 int buflen, error; 2798 2799 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2800 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2801 return (EINVAL); 2802 } 2803 2804 if (args->len < 6) { 2805 td->td_retval[0] = 0; 2806 return (0); 2807 } 2808 2809 error = priv_check(td, PRIV_MSGBUF); 2810 if (error) 2811 return (error); 2812 2813 mtx_lock(&msgbuf_lock); 2814 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2815 mtx_unlock(&msgbuf_lock); 2816 2817 dst = args->buf; 2818 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2819 /* The -1 is to skip the trailing '\0'. */ 2820 dst += sizeof(SYSLOG_TAG) - 1; 2821 2822 while (error == 0) { 2823 mtx_lock(&msgbuf_lock); 2824 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2825 mtx_unlock(&msgbuf_lock); 2826 2827 if (buflen == 0) 2828 break; 2829 2830 for (src = buf; src < buf + buflen && error == 0; src++) { 2831 if (*src == '\0') 2832 continue; 2833 2834 if (dst >= args->buf + args->len) 2835 goto out; 2836 2837 error = copyout(src, dst, 1); 2838 dst++; 2839 2840 if (*src == '\n' && *(src + 1) != '<' && 2841 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2842 error = copyout(&SYSLOG_TAG, 2843 dst, sizeof(SYSLOG_TAG)); 2844 dst += sizeof(SYSLOG_TAG) - 1; 2845 } 2846 } 2847 } 2848 out: 2849 td->td_retval[0] = dst - args->buf; 2850 return (error); 2851 } 2852 2853 int 2854 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2855 { 2856 int cpu, error, node; 2857 2858 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2859 error = 0; 2860 node = cpuid_to_pcpu[cpu]->pc_domain; 2861 2862 if (args->cpu != NULL) 2863 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2864 if (args->node != NULL) 2865 error = copyout(&node, args->node, sizeof(l_int)); 2866 return (error); 2867 } 2868 2869 #if defined(__i386__) || defined(__amd64__) 2870 int 2871 linux_poll(struct thread *td, struct linux_poll_args *args) 2872 { 2873 struct timespec ts, *tsp; 2874 2875 if (args->timeout != INFTIM) { 2876 if (args->timeout < 0) 2877 return (EINVAL); 2878 ts.tv_sec = args->timeout / 1000; 2879 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2880 tsp = &ts; 2881 } else 2882 tsp = NULL; 2883 2884 return (linux_common_ppoll(td, args->fds, args->nfds, 2885 tsp, NULL, 0)); 2886 } 2887 #endif /* __i386__ || __amd64__ */ 2888 2889 int 2890 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2891 { 2892 2893 switch (args->op) { 2894 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2895 return (EOPNOTSUPP); 2896 default: 2897 /* 2898 * Ignore unknown operations, just like Linux kernel built 2899 * without CONFIG_SECCOMP. 2900 */ 2901 return (EINVAL); 2902 } 2903 } 2904