1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/imgact.h> 45 #include <sys/kernel.h> 46 #include <sys/limits.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mman.h> 50 #include <sys/mount.h> 51 #include <sys/msgbuf.h> 52 #include <sys/mutex.h> 53 #include <sys/namei.h> 54 #include <sys/poll.h> 55 #include <sys/priv.h> 56 #include <sys/proc.h> 57 #include <sys/procctl.h> 58 #include <sys/reboot.h> 59 #include <sys/racct.h> 60 #include <sys/random.h> 61 #include <sys/resourcevar.h> 62 #include <sys/sched.h> 63 #include <sys/sdt.h> 64 #include <sys/signalvar.h> 65 #include <sys/smp.h> 66 #include <sys/stat.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/systm.h> 71 #include <sys/time.h> 72 #include <sys/vmmeter.h> 73 #include <sys/vnode.h> 74 #include <sys/wait.h> 75 #include <sys/cpuset.h> 76 #include <sys/uio.h> 77 78 #include <security/audit/audit.h> 79 #include <security/mac/mac_framework.h> 80 81 #include <vm/vm.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_kern.h> 84 #include <vm/vm_map.h> 85 #include <vm/vm_extern.h> 86 #include <vm/swap_pager.h> 87 88 #ifdef COMPAT_LINUX32 89 #include <machine/../linux32/linux.h> 90 #include <machine/../linux32/linux32_proto.h> 91 #else 92 #include <machine/../linux/linux.h> 93 #include <machine/../linux/linux_proto.h> 94 #endif 95 96 #include <compat/linux/linux_common.h> 97 #include <compat/linux/linux_dtrace.h> 98 #include <compat/linux/linux_file.h> 99 #include <compat/linux/linux_mib.h> 100 #include <compat/linux/linux_signal.h> 101 #include <compat/linux/linux_timer.h> 102 #include <compat/linux/linux_util.h> 103 #include <compat/linux/linux_sysproto.h> 104 #include <compat/linux/linux_emul.h> 105 #include <compat/linux/linux_misc.h> 106 107 int stclohz; /* Statistics clock frequency */ 108 109 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 110 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 111 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 112 RLIMIT_MEMLOCK, RLIMIT_AS 113 }; 114 115 struct l_sysinfo { 116 l_long uptime; /* Seconds since boot */ 117 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 118 #define LINUX_SYSINFO_LOADS_SCALE 65536 119 l_ulong totalram; /* Total usable main memory size */ 120 l_ulong freeram; /* Available memory size */ 121 l_ulong sharedram; /* Amount of shared memory */ 122 l_ulong bufferram; /* Memory used by buffers */ 123 l_ulong totalswap; /* Total swap space size */ 124 l_ulong freeswap; /* swap space still available */ 125 l_ushort procs; /* Number of current processes */ 126 l_ushort pads; 127 l_ulong totalhigh; 128 l_ulong freehigh; 129 l_uint mem_unit; 130 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 131 }; 132 133 struct l_pselect6arg { 134 l_uintptr_t ss; 135 l_size_t ss_len; 136 }; 137 138 static int linux_utimensat_lts_to_ts(struct l_timespec *, 139 struct timespec *); 140 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 141 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 142 struct timespec *); 143 #endif 144 static int linux_common_utimensat(struct thread *, int, 145 const char *, struct timespec *, int); 146 static int linux_common_pselect6(struct thread *, l_int, 147 l_fd_set *, l_fd_set *, l_fd_set *, 148 struct timespec *, l_uintptr_t *); 149 static int linux_common_ppoll(struct thread *, struct pollfd *, 150 uint32_t, struct timespec *, l_sigset_t *, 151 l_size_t); 152 static int linux_pollin(struct thread *, struct pollfd *, 153 struct pollfd *, u_int); 154 static int linux_pollout(struct thread *, struct pollfd *, 155 struct pollfd *, u_int); 156 157 int 158 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 159 { 160 struct l_sysinfo sysinfo; 161 int i, j; 162 struct timespec ts; 163 164 bzero(&sysinfo, sizeof(sysinfo)); 165 getnanouptime(&ts); 166 if (ts.tv_nsec != 0) 167 ts.tv_sec++; 168 sysinfo.uptime = ts.tv_sec; 169 170 /* Use the information from the mib to get our load averages */ 171 for (i = 0; i < 3; i++) 172 sysinfo.loads[i] = averunnable.ldavg[i] * 173 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 174 175 sysinfo.totalram = physmem * PAGE_SIZE; 176 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 177 178 /* 179 * sharedram counts pages allocated to named, swap-backed objects such 180 * as shared memory segments and tmpfs files. There is no cheap way to 181 * compute this, so just leave the field unpopulated. Linux itself only 182 * started setting this field in the 3.x timeframe. 183 */ 184 sysinfo.sharedram = 0; 185 sysinfo.bufferram = 0; 186 187 swap_pager_status(&i, &j); 188 sysinfo.totalswap = i * PAGE_SIZE; 189 sysinfo.freeswap = (i - j) * PAGE_SIZE; 190 191 sysinfo.procs = nprocs; 192 193 /* 194 * Platforms supported by the emulation layer do not have a notion of 195 * high memory. 196 */ 197 sysinfo.totalhigh = 0; 198 sysinfo.freehigh = 0; 199 200 sysinfo.mem_unit = 1; 201 202 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 203 } 204 205 #ifdef LINUX_LEGACY_SYSCALLS 206 int 207 linux_alarm(struct thread *td, struct linux_alarm_args *args) 208 { 209 struct itimerval it, old_it; 210 u_int secs; 211 int error __diagused; 212 213 secs = args->secs; 214 /* 215 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 216 * to match kern_setitimer()'s limit to avoid error from it. 217 * 218 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 219 * platforms. 220 */ 221 if (secs > INT32_MAX / 2) 222 secs = INT32_MAX / 2; 223 224 it.it_value.tv_sec = secs; 225 it.it_value.tv_usec = 0; 226 timevalclear(&it.it_interval); 227 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 228 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 229 230 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 231 old_it.it_value.tv_usec >= 500000) 232 old_it.it_value.tv_sec++; 233 td->td_retval[0] = old_it.it_value.tv_sec; 234 return (0); 235 } 236 #endif 237 238 int 239 linux_brk(struct thread *td, struct linux_brk_args *args) 240 { 241 struct vmspace *vm = td->td_proc->p_vmspace; 242 uintptr_t new, old; 243 244 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 245 new = (uintptr_t)args->dsend; 246 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 247 td->td_retval[0] = (register_t)new; 248 else 249 td->td_retval[0] = (register_t)old; 250 251 return (0); 252 } 253 254 #if defined(__i386__) 255 /* XXX: what about amd64/linux32? */ 256 257 int 258 linux_uselib(struct thread *td, struct linux_uselib_args *args) 259 { 260 struct nameidata ni; 261 struct vnode *vp; 262 struct exec *a_out; 263 vm_map_t map; 264 vm_map_entry_t entry; 265 struct vattr attr; 266 vm_offset_t vmaddr; 267 unsigned long file_offset; 268 unsigned long bss_size; 269 char *library; 270 ssize_t aresid; 271 int error; 272 bool locked, opened, textset; 273 274 a_out = NULL; 275 vp = NULL; 276 locked = false; 277 textset = false; 278 opened = false; 279 280 if (!LUSECONVPATH(td)) { 281 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 282 UIO_USERSPACE, args->library); 283 error = namei(&ni); 284 } else { 285 LCONVPATHEXIST(args->library, &library); 286 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 287 UIO_SYSSPACE, library); 288 error = namei(&ni); 289 LFREEPATH(library); 290 } 291 if (error) 292 goto cleanup; 293 294 vp = ni.ni_vp; 295 NDFREE_PNBUF(&ni); 296 297 /* 298 * From here on down, we have a locked vnode that must be unlocked. 299 * XXX: The code below largely duplicates exec_check_permissions(). 300 */ 301 locked = true; 302 303 /* Executable? */ 304 error = VOP_GETATTR(vp, &attr, td->td_ucred); 305 if (error) 306 goto cleanup; 307 308 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 309 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 310 /* EACCESS is what exec(2) returns. */ 311 error = ENOEXEC; 312 goto cleanup; 313 } 314 315 /* Sensible size? */ 316 if (attr.va_size == 0) { 317 error = ENOEXEC; 318 goto cleanup; 319 } 320 321 /* Can we access it? */ 322 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 323 if (error) 324 goto cleanup; 325 326 /* 327 * XXX: This should use vn_open() so that it is properly authorized, 328 * and to reduce code redundancy all over the place here. 329 * XXX: Not really, it duplicates far more of exec_check_permissions() 330 * than vn_open(). 331 */ 332 #ifdef MAC 333 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 334 if (error) 335 goto cleanup; 336 #endif 337 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 338 if (error) 339 goto cleanup; 340 opened = true; 341 342 /* Pull in executable header into exec_map */ 343 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 344 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 345 if (error) 346 goto cleanup; 347 348 /* Is it a Linux binary ? */ 349 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 350 error = ENOEXEC; 351 goto cleanup; 352 } 353 354 /* 355 * While we are here, we should REALLY do some more checks 356 */ 357 358 /* Set file/virtual offset based on a.out variant. */ 359 switch ((int)(a_out->a_magic & 0xffff)) { 360 case 0413: /* ZMAGIC */ 361 file_offset = 1024; 362 break; 363 case 0314: /* QMAGIC */ 364 file_offset = 0; 365 break; 366 default: 367 error = ENOEXEC; 368 goto cleanup; 369 } 370 371 bss_size = round_page(a_out->a_bss); 372 373 /* Check various fields in header for validity/bounds. */ 374 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 375 error = ENOEXEC; 376 goto cleanup; 377 } 378 379 /* text + data can't exceed file size */ 380 if (a_out->a_data + a_out->a_text > attr.va_size) { 381 error = EFAULT; 382 goto cleanup; 383 } 384 385 /* 386 * text/data/bss must not exceed limits 387 * XXX - this is not complete. it should check current usage PLUS 388 * the resources needed by this library. 389 */ 390 PROC_LOCK(td->td_proc); 391 if (a_out->a_text > maxtsiz || 392 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 393 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 394 bss_size) != 0) { 395 PROC_UNLOCK(td->td_proc); 396 error = ENOMEM; 397 goto cleanup; 398 } 399 PROC_UNLOCK(td->td_proc); 400 401 /* 402 * Prevent more writers. 403 */ 404 error = VOP_SET_TEXT(vp); 405 if (error != 0) 406 goto cleanup; 407 textset = true; 408 409 /* 410 * Lock no longer needed 411 */ 412 locked = false; 413 VOP_UNLOCK(vp); 414 415 /* 416 * Check if file_offset page aligned. Currently we cannot handle 417 * misalinged file offsets, and so we read in the entire image 418 * (what a waste). 419 */ 420 if (file_offset & PAGE_MASK) { 421 /* Map text+data read/write/execute */ 422 423 /* a_entry is the load address and is page aligned */ 424 vmaddr = trunc_page(a_out->a_entry); 425 426 /* get anon user mapping, read+write+execute */ 427 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 428 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 429 VM_PROT_ALL, VM_PROT_ALL, 0); 430 if (error) 431 goto cleanup; 432 433 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 434 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 435 td->td_ucred, NOCRED, &aresid, td); 436 if (error != 0) 437 goto cleanup; 438 if (aresid != 0) { 439 error = ENOEXEC; 440 goto cleanup; 441 } 442 } else { 443 /* 444 * for QMAGIC, a_entry is 20 bytes beyond the load address 445 * to skip the executable header 446 */ 447 vmaddr = trunc_page(a_out->a_entry); 448 449 /* 450 * Map it all into the process's space as a single 451 * copy-on-write "data" segment. 452 */ 453 map = &td->td_proc->p_vmspace->vm_map; 454 error = vm_mmap(map, &vmaddr, 455 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 456 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 457 if (error) 458 goto cleanup; 459 vm_map_lock(map); 460 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 461 vm_map_unlock(map); 462 error = EDOOFUS; 463 goto cleanup; 464 } 465 entry->eflags |= MAP_ENTRY_VN_EXEC; 466 vm_map_unlock(map); 467 textset = false; 468 } 469 470 if (bss_size != 0) { 471 /* Calculate BSS start address */ 472 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 473 a_out->a_data; 474 475 /* allocate some 'anon' space */ 476 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 477 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 478 VM_PROT_ALL, 0); 479 if (error) 480 goto cleanup; 481 } 482 483 cleanup: 484 if (opened) { 485 if (locked) 486 VOP_UNLOCK(vp); 487 locked = false; 488 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 489 } 490 if (textset) { 491 if (!locked) { 492 locked = true; 493 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 494 } 495 VOP_UNSET_TEXT_CHECKED(vp); 496 } 497 if (locked) 498 VOP_UNLOCK(vp); 499 500 /* Release the temporary mapping. */ 501 if (a_out) 502 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 503 504 return (error); 505 } 506 507 #endif /* __i386__ */ 508 509 #ifdef LINUX_LEGACY_SYSCALLS 510 int 511 linux_select(struct thread *td, struct linux_select_args *args) 512 { 513 l_timeval ltv; 514 struct timeval tv0, tv1, utv, *tvp; 515 int error; 516 517 /* 518 * Store current time for computation of the amount of 519 * time left. 520 */ 521 if (args->timeout) { 522 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 523 goto select_out; 524 utv.tv_sec = ltv.tv_sec; 525 utv.tv_usec = ltv.tv_usec; 526 527 if (itimerfix(&utv)) { 528 /* 529 * The timeval was invalid. Convert it to something 530 * valid that will act as it does under Linux. 531 */ 532 utv.tv_sec += utv.tv_usec / 1000000; 533 utv.tv_usec %= 1000000; 534 if (utv.tv_usec < 0) { 535 utv.tv_sec -= 1; 536 utv.tv_usec += 1000000; 537 } 538 if (utv.tv_sec < 0) 539 timevalclear(&utv); 540 } 541 microtime(&tv0); 542 tvp = &utv; 543 } else 544 tvp = NULL; 545 546 error = kern_select(td, args->nfds, args->readfds, args->writefds, 547 args->exceptfds, tvp, LINUX_NFDBITS); 548 if (error) 549 goto select_out; 550 551 if (args->timeout) { 552 if (td->td_retval[0]) { 553 /* 554 * Compute how much time was left of the timeout, 555 * by subtracting the current time and the time 556 * before we started the call, and subtracting 557 * that result from the user-supplied value. 558 */ 559 microtime(&tv1); 560 timevalsub(&tv1, &tv0); 561 timevalsub(&utv, &tv1); 562 if (utv.tv_sec < 0) 563 timevalclear(&utv); 564 } else 565 timevalclear(&utv); 566 ltv.tv_sec = utv.tv_sec; 567 ltv.tv_usec = utv.tv_usec; 568 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 569 goto select_out; 570 } 571 572 select_out: 573 return (error); 574 } 575 #endif 576 577 int 578 linux_mremap(struct thread *td, struct linux_mremap_args *args) 579 { 580 uintptr_t addr; 581 size_t len; 582 int error = 0; 583 584 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 585 td->td_retval[0] = 0; 586 return (EINVAL); 587 } 588 589 /* 590 * Check for the page alignment. 591 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 592 */ 593 if (args->addr & PAGE_MASK) { 594 td->td_retval[0] = 0; 595 return (EINVAL); 596 } 597 598 args->new_len = round_page(args->new_len); 599 args->old_len = round_page(args->old_len); 600 601 if (args->new_len > args->old_len) { 602 td->td_retval[0] = 0; 603 return (ENOMEM); 604 } 605 606 if (args->new_len < args->old_len) { 607 addr = args->addr + args->new_len; 608 len = args->old_len - args->new_len; 609 error = kern_munmap(td, addr, len); 610 } 611 612 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 613 return (error); 614 } 615 616 #define LINUX_MS_ASYNC 0x0001 617 #define LINUX_MS_INVALIDATE 0x0002 618 #define LINUX_MS_SYNC 0x0004 619 620 int 621 linux_msync(struct thread *td, struct linux_msync_args *args) 622 { 623 624 return (kern_msync(td, args->addr, args->len, 625 args->fl & ~LINUX_MS_SYNC)); 626 } 627 628 #ifdef LINUX_LEGACY_SYSCALLS 629 int 630 linux_time(struct thread *td, struct linux_time_args *args) 631 { 632 struct timeval tv; 633 l_time_t tm; 634 int error; 635 636 microtime(&tv); 637 tm = tv.tv_sec; 638 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 639 return (error); 640 td->td_retval[0] = tm; 641 return (0); 642 } 643 #endif 644 645 struct l_times_argv { 646 l_clock_t tms_utime; 647 l_clock_t tms_stime; 648 l_clock_t tms_cutime; 649 l_clock_t tms_cstime; 650 }; 651 652 /* 653 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 654 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 655 * auxiliary vector entry. 656 */ 657 #define CLK_TCK 100 658 659 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 660 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 661 662 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 663 CONVNTCK(r) : CONVOTCK(r)) 664 665 int 666 linux_times(struct thread *td, struct linux_times_args *args) 667 { 668 struct timeval tv, utime, stime, cutime, cstime; 669 struct l_times_argv tms; 670 struct proc *p; 671 int error; 672 673 if (args->buf != NULL) { 674 p = td->td_proc; 675 PROC_LOCK(p); 676 PROC_STATLOCK(p); 677 calcru(p, &utime, &stime); 678 PROC_STATUNLOCK(p); 679 calccru(p, &cutime, &cstime); 680 PROC_UNLOCK(p); 681 682 tms.tms_utime = CONVTCK(utime); 683 tms.tms_stime = CONVTCK(stime); 684 685 tms.tms_cutime = CONVTCK(cutime); 686 tms.tms_cstime = CONVTCK(cstime); 687 688 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 689 return (error); 690 } 691 692 microuptime(&tv); 693 td->td_retval[0] = (int)CONVTCK(tv); 694 return (0); 695 } 696 697 int 698 linux_newuname(struct thread *td, struct linux_newuname_args *args) 699 { 700 struct l_new_utsname utsname; 701 char osname[LINUX_MAX_UTSNAME]; 702 char osrelease[LINUX_MAX_UTSNAME]; 703 char *p; 704 705 linux_get_osname(td, osname); 706 linux_get_osrelease(td, osrelease); 707 708 bzero(&utsname, sizeof(utsname)); 709 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 710 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 711 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 712 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 713 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 714 for (p = utsname.version; *p != '\0'; ++p) 715 if (*p == '\n') { 716 *p = '\0'; 717 break; 718 } 719 #if defined(__amd64__) 720 /* 721 * On amd64, Linux uname(2) needs to return "x86_64" 722 * for both 64-bit and 32-bit applications. On 32-bit, 723 * the string returned by getauxval(AT_PLATFORM) needs 724 * to remain "i686", though. 725 */ 726 #if defined(COMPAT_LINUX32) 727 if (linux32_emulate_i386) 728 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 729 else 730 #endif 731 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 732 #elif defined(__aarch64__) 733 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 734 #elif defined(__i386__) 735 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 736 #endif 737 738 return (copyout(&utsname, args->buf, sizeof(utsname))); 739 } 740 741 struct l_utimbuf { 742 l_time_t l_actime; 743 l_time_t l_modtime; 744 }; 745 746 #ifdef LINUX_LEGACY_SYSCALLS 747 int 748 linux_utime(struct thread *td, struct linux_utime_args *args) 749 { 750 struct timeval tv[2], *tvp; 751 struct l_utimbuf lut; 752 char *fname; 753 int error; 754 755 if (args->times) { 756 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 757 return (error); 758 tv[0].tv_sec = lut.l_actime; 759 tv[0].tv_usec = 0; 760 tv[1].tv_sec = lut.l_modtime; 761 tv[1].tv_usec = 0; 762 tvp = tv; 763 } else 764 tvp = NULL; 765 766 if (!LUSECONVPATH(td)) { 767 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 768 tvp, UIO_SYSSPACE); 769 } else { 770 LCONVPATHEXIST(args->fname, &fname); 771 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 772 UIO_SYSSPACE); 773 LFREEPATH(fname); 774 } 775 return (error); 776 } 777 #endif 778 779 #ifdef LINUX_LEGACY_SYSCALLS 780 int 781 linux_utimes(struct thread *td, struct linux_utimes_args *args) 782 { 783 l_timeval ltv[2]; 784 struct timeval tv[2], *tvp = NULL; 785 char *fname; 786 int error; 787 788 if (args->tptr != NULL) { 789 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 790 return (error); 791 tv[0].tv_sec = ltv[0].tv_sec; 792 tv[0].tv_usec = ltv[0].tv_usec; 793 tv[1].tv_sec = ltv[1].tv_sec; 794 tv[1].tv_usec = ltv[1].tv_usec; 795 tvp = tv; 796 } 797 798 if (!LUSECONVPATH(td)) { 799 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 800 tvp, UIO_SYSSPACE); 801 } else { 802 LCONVPATHEXIST(args->fname, &fname); 803 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 804 tvp, UIO_SYSSPACE); 805 LFREEPATH(fname); 806 } 807 return (error); 808 } 809 #endif 810 811 static int 812 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 813 { 814 815 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 816 l_times->tv_nsec != LINUX_UTIME_NOW && 817 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 818 return (EINVAL); 819 820 times->tv_sec = l_times->tv_sec; 821 switch (l_times->tv_nsec) 822 { 823 case LINUX_UTIME_OMIT: 824 times->tv_nsec = UTIME_OMIT; 825 break; 826 case LINUX_UTIME_NOW: 827 times->tv_nsec = UTIME_NOW; 828 break; 829 default: 830 times->tv_nsec = l_times->tv_nsec; 831 } 832 833 return (0); 834 } 835 836 static int 837 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 838 struct timespec *timesp, int lflags) 839 { 840 char *path = NULL; 841 int error, dfd, flags = 0; 842 843 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 844 845 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 846 return (EINVAL); 847 848 if (timesp != NULL) { 849 /* This breaks POSIX, but is what the Linux kernel does 850 * _on purpose_ (documented in the man page for utimensat(2)), 851 * so we must follow that behaviour. */ 852 if (timesp[0].tv_nsec == UTIME_OMIT && 853 timesp[1].tv_nsec == UTIME_OMIT) 854 return (0); 855 } 856 857 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 858 flags |= AT_SYMLINK_NOFOLLOW; 859 if (lflags & LINUX_AT_EMPTY_PATH) 860 flags |= AT_EMPTY_PATH; 861 862 if (!LUSECONVPATH(td)) { 863 if (pathname != NULL) { 864 return (kern_utimensat(td, dfd, pathname, 865 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 866 } 867 } 868 869 if (pathname != NULL) 870 LCONVPATHEXIST_AT(pathname, &path, dfd); 871 else if (lflags != 0) 872 return (EINVAL); 873 874 if (path == NULL) 875 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 876 else { 877 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 878 UIO_SYSSPACE, flags); 879 LFREEPATH(path); 880 } 881 882 return (error); 883 } 884 885 int 886 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 887 { 888 struct l_timespec l_times[2]; 889 struct timespec times[2], *timesp; 890 int error; 891 892 if (args->times != NULL) { 893 error = copyin(args->times, l_times, sizeof(l_times)); 894 if (error != 0) 895 return (error); 896 897 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 898 if (error != 0) 899 return (error); 900 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 901 if (error != 0) 902 return (error); 903 timesp = times; 904 } else 905 timesp = NULL; 906 907 return (linux_common_utimensat(td, args->dfd, args->pathname, 908 timesp, args->flags)); 909 } 910 911 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 912 static int 913 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 914 { 915 916 /* Zero out the padding in compat mode. */ 917 l_times->tv_nsec &= 0xFFFFFFFFUL; 918 919 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 920 l_times->tv_nsec != LINUX_UTIME_NOW && 921 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 922 return (EINVAL); 923 924 times->tv_sec = l_times->tv_sec; 925 switch (l_times->tv_nsec) 926 { 927 case LINUX_UTIME_OMIT: 928 times->tv_nsec = UTIME_OMIT; 929 break; 930 case LINUX_UTIME_NOW: 931 times->tv_nsec = UTIME_NOW; 932 break; 933 default: 934 times->tv_nsec = l_times->tv_nsec; 935 } 936 937 return (0); 938 } 939 940 int 941 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 942 { 943 struct l_timespec64 l_times[2]; 944 struct timespec times[2], *timesp; 945 int error; 946 947 if (args->times64 != NULL) { 948 error = copyin(args->times64, l_times, sizeof(l_times)); 949 if (error != 0) 950 return (error); 951 952 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 953 if (error != 0) 954 return (error); 955 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 956 if (error != 0) 957 return (error); 958 timesp = times; 959 } else 960 timesp = NULL; 961 962 return (linux_common_utimensat(td, args->dfd, args->pathname, 963 timesp, args->flags)); 964 } 965 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 966 967 #ifdef LINUX_LEGACY_SYSCALLS 968 int 969 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 970 { 971 l_timeval ltv[2]; 972 struct timeval tv[2], *tvp = NULL; 973 char *fname; 974 int error, dfd; 975 976 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 977 978 if (args->utimes != NULL) { 979 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 980 return (error); 981 tv[0].tv_sec = ltv[0].tv_sec; 982 tv[0].tv_usec = ltv[0].tv_usec; 983 tv[1].tv_sec = ltv[1].tv_sec; 984 tv[1].tv_usec = ltv[1].tv_usec; 985 tvp = tv; 986 } 987 988 if (!LUSECONVPATH(td)) { 989 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 990 tvp, UIO_SYSSPACE); 991 } else { 992 LCONVPATHEXIST_AT(args->filename, &fname, dfd); 993 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 994 tvp, UIO_SYSSPACE); 995 LFREEPATH(fname); 996 } 997 return (error); 998 } 999 #endif 1000 1001 static int 1002 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 1003 int options, void *rup, l_siginfo_t *infop) 1004 { 1005 l_siginfo_t lsi; 1006 siginfo_t siginfo; 1007 struct __wrusage wru; 1008 int error, status, tmpstat, sig; 1009 1010 error = kern_wait6(td, idtype, id, &status, options, 1011 rup != NULL ? &wru : NULL, &siginfo); 1012 1013 if (error == 0 && statusp) { 1014 tmpstat = status & 0xffff; 1015 if (WIFSIGNALED(tmpstat)) { 1016 tmpstat = (tmpstat & 0xffffff80) | 1017 bsd_to_linux_signal(WTERMSIG(tmpstat)); 1018 } else if (WIFSTOPPED(tmpstat)) { 1019 tmpstat = (tmpstat & 0xffff00ff) | 1020 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 1021 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 1022 if (WSTOPSIG(status) == SIGTRAP) { 1023 tmpstat = linux_ptrace_status(td, 1024 siginfo.si_pid, tmpstat); 1025 } 1026 #endif 1027 } else if (WIFCONTINUED(tmpstat)) { 1028 tmpstat = 0xffff; 1029 } 1030 error = copyout(&tmpstat, statusp, sizeof(int)); 1031 } 1032 if (error == 0 && rup != NULL) 1033 error = linux_copyout_rusage(&wru.wru_self, rup); 1034 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 1035 sig = bsd_to_linux_signal(siginfo.si_signo); 1036 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1037 error = copyout(&lsi, infop, sizeof(lsi)); 1038 } 1039 1040 return (error); 1041 } 1042 1043 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1044 int 1045 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1046 { 1047 struct linux_wait4_args wait4_args; 1048 1049 wait4_args.pid = args->pid; 1050 wait4_args.status = args->status; 1051 wait4_args.options = args->options; 1052 wait4_args.rusage = NULL; 1053 1054 return (linux_wait4(td, &wait4_args)); 1055 } 1056 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1057 1058 int 1059 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1060 { 1061 struct proc *p; 1062 int options, id, idtype; 1063 1064 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1065 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1066 return (EINVAL); 1067 1068 /* -INT_MIN is not defined. */ 1069 if (args->pid == INT_MIN) 1070 return (ESRCH); 1071 1072 options = 0; 1073 linux_to_bsd_waitopts(args->options, &options); 1074 1075 /* 1076 * For backward compatibility we implicitly add flags WEXITED 1077 * and WTRAPPED here. 1078 */ 1079 options |= WEXITED | WTRAPPED; 1080 1081 if (args->pid == WAIT_ANY) { 1082 idtype = P_ALL; 1083 id = 0; 1084 } else if (args->pid < 0) { 1085 idtype = P_PGID; 1086 id = (id_t)-args->pid; 1087 } else if (args->pid == 0) { 1088 idtype = P_PGID; 1089 p = td->td_proc; 1090 PROC_LOCK(p); 1091 id = p->p_pgid; 1092 PROC_UNLOCK(p); 1093 } else { 1094 idtype = P_PID; 1095 id = (id_t)args->pid; 1096 } 1097 1098 return (linux_common_wait(td, idtype, id, args->status, options, 1099 args->rusage, NULL)); 1100 } 1101 1102 int 1103 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1104 { 1105 idtype_t idtype; 1106 int error, options; 1107 struct proc *p; 1108 pid_t id; 1109 1110 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 1111 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1112 return (EINVAL); 1113 1114 options = 0; 1115 linux_to_bsd_waitopts(args->options, &options); 1116 1117 id = args->id; 1118 switch (args->idtype) { 1119 case LINUX_P_ALL: 1120 idtype = P_ALL; 1121 break; 1122 case LINUX_P_PID: 1123 if (args->id <= 0) 1124 return (EINVAL); 1125 idtype = P_PID; 1126 break; 1127 case LINUX_P_PGID: 1128 if (linux_use54(td) && args->id == 0) { 1129 p = td->td_proc; 1130 PROC_LOCK(p); 1131 id = p->p_pgid; 1132 PROC_UNLOCK(p); 1133 } else if (args->id <= 0) 1134 return (EINVAL); 1135 idtype = P_PGID; 1136 break; 1137 case LINUX_P_PIDFD: 1138 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 1139 return (ENOSYS); 1140 default: 1141 return (EINVAL); 1142 } 1143 1144 error = linux_common_wait(td, idtype, id, NULL, options, 1145 args->rusage, args->info); 1146 td->td_retval[0] = 0; 1147 1148 return (error); 1149 } 1150 1151 #ifdef LINUX_LEGACY_SYSCALLS 1152 int 1153 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1154 { 1155 char *path; 1156 int error; 1157 enum uio_seg seg; 1158 bool convpath; 1159 1160 convpath = LUSECONVPATH(td); 1161 if (!convpath) { 1162 path = args->path; 1163 seg = UIO_USERSPACE; 1164 } else { 1165 LCONVPATHCREAT(args->path, &path); 1166 seg = UIO_SYSSPACE; 1167 } 1168 1169 switch (args->mode & S_IFMT) { 1170 case S_IFIFO: 1171 case S_IFSOCK: 1172 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1173 args->mode); 1174 break; 1175 1176 case S_IFCHR: 1177 case S_IFBLK: 1178 error = kern_mknodat(td, AT_FDCWD, path, seg, 1179 args->mode, args->dev); 1180 break; 1181 1182 case S_IFDIR: 1183 error = EPERM; 1184 break; 1185 1186 case 0: 1187 args->mode |= S_IFREG; 1188 /* FALLTHROUGH */ 1189 case S_IFREG: 1190 error = kern_openat(td, AT_FDCWD, path, seg, 1191 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1192 if (error == 0) 1193 kern_close(td, td->td_retval[0]); 1194 break; 1195 1196 default: 1197 error = EINVAL; 1198 break; 1199 } 1200 if (convpath) 1201 LFREEPATH(path); 1202 return (error); 1203 } 1204 #endif 1205 1206 int 1207 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1208 { 1209 char *path; 1210 int error, dfd; 1211 enum uio_seg seg; 1212 bool convpath; 1213 1214 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1215 1216 convpath = LUSECONVPATH(td); 1217 if (!convpath) { 1218 path = __DECONST(char *, args->filename); 1219 seg = UIO_USERSPACE; 1220 } else { 1221 LCONVPATHCREAT_AT(args->filename, &path, dfd); 1222 seg = UIO_SYSSPACE; 1223 } 1224 1225 switch (args->mode & S_IFMT) { 1226 case S_IFIFO: 1227 case S_IFSOCK: 1228 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1229 break; 1230 1231 case S_IFCHR: 1232 case S_IFBLK: 1233 error = kern_mknodat(td, dfd, path, seg, args->mode, 1234 args->dev); 1235 break; 1236 1237 case S_IFDIR: 1238 error = EPERM; 1239 break; 1240 1241 case 0: 1242 args->mode |= S_IFREG; 1243 /* FALLTHROUGH */ 1244 case S_IFREG: 1245 error = kern_openat(td, dfd, path, seg, 1246 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1247 if (error == 0) 1248 kern_close(td, td->td_retval[0]); 1249 break; 1250 1251 default: 1252 error = EINVAL; 1253 break; 1254 } 1255 if (convpath) 1256 LFREEPATH(path); 1257 return (error); 1258 } 1259 1260 /* 1261 * UGH! This is just about the dumbest idea I've ever heard!! 1262 */ 1263 int 1264 linux_personality(struct thread *td, struct linux_personality_args *args) 1265 { 1266 struct linux_pemuldata *pem; 1267 struct proc *p = td->td_proc; 1268 uint32_t old; 1269 1270 PROC_LOCK(p); 1271 pem = pem_find(p); 1272 old = pem->persona; 1273 if (args->per != 0xffffffff) 1274 pem->persona = args->per; 1275 PROC_UNLOCK(p); 1276 1277 td->td_retval[0] = old; 1278 return (0); 1279 } 1280 1281 struct l_itimerval { 1282 l_timeval it_interval; 1283 l_timeval it_value; 1284 }; 1285 1286 #define B2L_ITIMERVAL(bip, lip) \ 1287 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1288 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1289 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1290 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1291 1292 int 1293 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1294 { 1295 int error; 1296 struct l_itimerval ls; 1297 struct itimerval aitv, oitv; 1298 1299 if (uap->itv == NULL) { 1300 uap->itv = uap->oitv; 1301 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1302 } 1303 1304 error = copyin(uap->itv, &ls, sizeof(ls)); 1305 if (error != 0) 1306 return (error); 1307 B2L_ITIMERVAL(&aitv, &ls); 1308 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1309 if (error != 0 || uap->oitv == NULL) 1310 return (error); 1311 B2L_ITIMERVAL(&ls, &oitv); 1312 1313 return (copyout(&ls, uap->oitv, sizeof(ls))); 1314 } 1315 1316 int 1317 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1318 { 1319 int error; 1320 struct l_itimerval ls; 1321 struct itimerval aitv; 1322 1323 error = kern_getitimer(td, uap->which, &aitv); 1324 if (error != 0) 1325 return (error); 1326 B2L_ITIMERVAL(&ls, &aitv); 1327 return (copyout(&ls, uap->itv, sizeof(ls))); 1328 } 1329 1330 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1331 int 1332 linux_nice(struct thread *td, struct linux_nice_args *args) 1333 { 1334 1335 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1336 } 1337 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1338 1339 int 1340 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1341 { 1342 struct ucred *newcred, *oldcred; 1343 l_gid_t *linux_gidset; 1344 gid_t *bsd_gidset; 1345 int ngrp, error; 1346 struct proc *p; 1347 1348 ngrp = args->gidsetsize; 1349 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1350 return (EINVAL); 1351 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1352 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1353 if (error) 1354 goto out; 1355 newcred = crget(); 1356 crextend(newcred, ngrp + 1); 1357 p = td->td_proc; 1358 PROC_LOCK(p); 1359 oldcred = p->p_ucred; 1360 crcopy(newcred, oldcred); 1361 1362 /* 1363 * cr_groups[0] holds egid. Setting the whole set from 1364 * the supplied set will cause egid to be changed too. 1365 * Keep cr_groups[0] unchanged to prevent that. 1366 */ 1367 1368 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1369 PROC_UNLOCK(p); 1370 crfree(newcred); 1371 goto out; 1372 } 1373 1374 if (ngrp > 0) { 1375 newcred->cr_ngroups = ngrp + 1; 1376 1377 bsd_gidset = newcred->cr_groups; 1378 ngrp--; 1379 while (ngrp >= 0) { 1380 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1381 ngrp--; 1382 } 1383 } else 1384 newcred->cr_ngroups = 1; 1385 1386 setsugid(p); 1387 proc_set_cred(p, newcred); 1388 PROC_UNLOCK(p); 1389 crfree(oldcred); 1390 error = 0; 1391 out: 1392 free(linux_gidset, M_LINUX); 1393 return (error); 1394 } 1395 1396 int 1397 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1398 { 1399 struct ucred *cred; 1400 l_gid_t *linux_gidset; 1401 gid_t *bsd_gidset; 1402 int bsd_gidsetsz, ngrp, error; 1403 1404 cred = td->td_ucred; 1405 bsd_gidset = cred->cr_groups; 1406 bsd_gidsetsz = cred->cr_ngroups - 1; 1407 1408 /* 1409 * cr_groups[0] holds egid. Returning the whole set 1410 * here will cause a duplicate. Exclude cr_groups[0] 1411 * to prevent that. 1412 */ 1413 1414 if ((ngrp = args->gidsetsize) == 0) { 1415 td->td_retval[0] = bsd_gidsetsz; 1416 return (0); 1417 } 1418 1419 if (ngrp < bsd_gidsetsz) 1420 return (EINVAL); 1421 1422 ngrp = 0; 1423 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1424 M_LINUX, M_WAITOK); 1425 while (ngrp < bsd_gidsetsz) { 1426 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1427 ngrp++; 1428 } 1429 1430 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1431 free(linux_gidset, M_LINUX); 1432 if (error) 1433 return (error); 1434 1435 td->td_retval[0] = ngrp; 1436 return (0); 1437 } 1438 1439 static bool 1440 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1441 { 1442 1443 if (linux_dummy_rlimits == 0) 1444 return (false); 1445 1446 switch (resource) { 1447 case LINUX_RLIMIT_LOCKS: 1448 case LINUX_RLIMIT_SIGPENDING: 1449 case LINUX_RLIMIT_MSGQUEUE: 1450 case LINUX_RLIMIT_RTTIME: 1451 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1452 rlim->rlim_max = LINUX_RLIM_INFINITY; 1453 return (true); 1454 case LINUX_RLIMIT_NICE: 1455 case LINUX_RLIMIT_RTPRIO: 1456 rlim->rlim_cur = 0; 1457 rlim->rlim_max = 0; 1458 return (true); 1459 default: 1460 return (false); 1461 } 1462 } 1463 1464 int 1465 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1466 { 1467 struct rlimit bsd_rlim; 1468 struct l_rlimit rlim; 1469 u_int which; 1470 int error; 1471 1472 if (args->resource >= LINUX_RLIM_NLIMITS) 1473 return (EINVAL); 1474 1475 which = linux_to_bsd_resource[args->resource]; 1476 if (which == -1) 1477 return (EINVAL); 1478 1479 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1480 if (error) 1481 return (error); 1482 1483 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1484 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1485 return (kern_setrlimit(td, which, &bsd_rlim)); 1486 } 1487 1488 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1489 int 1490 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1491 { 1492 struct l_rlimit rlim; 1493 struct rlimit bsd_rlim; 1494 u_int which; 1495 1496 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1497 rlim.rlim_cur = bsd_rlim.rlim_cur; 1498 rlim.rlim_max = bsd_rlim.rlim_max; 1499 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1500 } 1501 1502 if (args->resource >= LINUX_RLIM_NLIMITS) 1503 return (EINVAL); 1504 1505 which = linux_to_bsd_resource[args->resource]; 1506 if (which == -1) 1507 return (EINVAL); 1508 1509 lim_rlimit(td, which, &bsd_rlim); 1510 1511 #ifdef COMPAT_LINUX32 1512 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1513 if (rlim.rlim_cur == UINT_MAX) 1514 rlim.rlim_cur = INT_MAX; 1515 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1516 if (rlim.rlim_max == UINT_MAX) 1517 rlim.rlim_max = INT_MAX; 1518 #else 1519 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1520 if (rlim.rlim_cur == ULONG_MAX) 1521 rlim.rlim_cur = LONG_MAX; 1522 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1523 if (rlim.rlim_max == ULONG_MAX) 1524 rlim.rlim_max = LONG_MAX; 1525 #endif 1526 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1527 } 1528 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1529 1530 int 1531 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1532 { 1533 struct l_rlimit rlim; 1534 struct rlimit bsd_rlim; 1535 u_int which; 1536 1537 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1538 rlim.rlim_cur = bsd_rlim.rlim_cur; 1539 rlim.rlim_max = bsd_rlim.rlim_max; 1540 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1541 } 1542 1543 if (args->resource >= LINUX_RLIM_NLIMITS) 1544 return (EINVAL); 1545 1546 which = linux_to_bsd_resource[args->resource]; 1547 if (which == -1) 1548 return (EINVAL); 1549 1550 lim_rlimit(td, which, &bsd_rlim); 1551 1552 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1553 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1554 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1555 } 1556 1557 int 1558 linux_sched_setscheduler(struct thread *td, 1559 struct linux_sched_setscheduler_args *args) 1560 { 1561 struct sched_param sched_param; 1562 struct thread *tdt; 1563 int error, policy; 1564 1565 switch (args->policy) { 1566 case LINUX_SCHED_OTHER: 1567 policy = SCHED_OTHER; 1568 break; 1569 case LINUX_SCHED_FIFO: 1570 policy = SCHED_FIFO; 1571 break; 1572 case LINUX_SCHED_RR: 1573 policy = SCHED_RR; 1574 break; 1575 default: 1576 return (EINVAL); 1577 } 1578 1579 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1580 if (error) 1581 return (error); 1582 1583 if (linux_map_sched_prio) { 1584 switch (policy) { 1585 case SCHED_OTHER: 1586 if (sched_param.sched_priority != 0) 1587 return (EINVAL); 1588 1589 sched_param.sched_priority = 1590 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1591 break; 1592 case SCHED_FIFO: 1593 case SCHED_RR: 1594 if (sched_param.sched_priority < 1 || 1595 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1596 return (EINVAL); 1597 1598 /* 1599 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1600 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1601 */ 1602 sched_param.sched_priority = 1603 (sched_param.sched_priority - 1) * 1604 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1605 (LINUX_MAX_RT_PRIO - 1); 1606 break; 1607 } 1608 } 1609 1610 tdt = linux_tdfind(td, args->pid, -1); 1611 if (tdt == NULL) 1612 return (ESRCH); 1613 1614 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1615 PROC_UNLOCK(tdt->td_proc); 1616 return (error); 1617 } 1618 1619 int 1620 linux_sched_getscheduler(struct thread *td, 1621 struct linux_sched_getscheduler_args *args) 1622 { 1623 struct thread *tdt; 1624 int error, policy; 1625 1626 tdt = linux_tdfind(td, args->pid, -1); 1627 if (tdt == NULL) 1628 return (ESRCH); 1629 1630 error = kern_sched_getscheduler(td, tdt, &policy); 1631 PROC_UNLOCK(tdt->td_proc); 1632 1633 switch (policy) { 1634 case SCHED_OTHER: 1635 td->td_retval[0] = LINUX_SCHED_OTHER; 1636 break; 1637 case SCHED_FIFO: 1638 td->td_retval[0] = LINUX_SCHED_FIFO; 1639 break; 1640 case SCHED_RR: 1641 td->td_retval[0] = LINUX_SCHED_RR; 1642 break; 1643 } 1644 return (error); 1645 } 1646 1647 int 1648 linux_sched_get_priority_max(struct thread *td, 1649 struct linux_sched_get_priority_max_args *args) 1650 { 1651 struct sched_get_priority_max_args bsd; 1652 1653 if (linux_map_sched_prio) { 1654 switch (args->policy) { 1655 case LINUX_SCHED_OTHER: 1656 td->td_retval[0] = 0; 1657 return (0); 1658 case LINUX_SCHED_FIFO: 1659 case LINUX_SCHED_RR: 1660 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1661 return (0); 1662 default: 1663 return (EINVAL); 1664 } 1665 } 1666 1667 switch (args->policy) { 1668 case LINUX_SCHED_OTHER: 1669 bsd.policy = SCHED_OTHER; 1670 break; 1671 case LINUX_SCHED_FIFO: 1672 bsd.policy = SCHED_FIFO; 1673 break; 1674 case LINUX_SCHED_RR: 1675 bsd.policy = SCHED_RR; 1676 break; 1677 default: 1678 return (EINVAL); 1679 } 1680 return (sys_sched_get_priority_max(td, &bsd)); 1681 } 1682 1683 int 1684 linux_sched_get_priority_min(struct thread *td, 1685 struct linux_sched_get_priority_min_args *args) 1686 { 1687 struct sched_get_priority_min_args bsd; 1688 1689 if (linux_map_sched_prio) { 1690 switch (args->policy) { 1691 case LINUX_SCHED_OTHER: 1692 td->td_retval[0] = 0; 1693 return (0); 1694 case LINUX_SCHED_FIFO: 1695 case LINUX_SCHED_RR: 1696 td->td_retval[0] = 1; 1697 return (0); 1698 default: 1699 return (EINVAL); 1700 } 1701 } 1702 1703 switch (args->policy) { 1704 case LINUX_SCHED_OTHER: 1705 bsd.policy = SCHED_OTHER; 1706 break; 1707 case LINUX_SCHED_FIFO: 1708 bsd.policy = SCHED_FIFO; 1709 break; 1710 case LINUX_SCHED_RR: 1711 bsd.policy = SCHED_RR; 1712 break; 1713 default: 1714 return (EINVAL); 1715 } 1716 return (sys_sched_get_priority_min(td, &bsd)); 1717 } 1718 1719 #define REBOOT_CAD_ON 0x89abcdef 1720 #define REBOOT_CAD_OFF 0 1721 #define REBOOT_HALT 0xcdef0123 1722 #define REBOOT_RESTART 0x01234567 1723 #define REBOOT_RESTART2 0xA1B2C3D4 1724 #define REBOOT_POWEROFF 0x4321FEDC 1725 #define REBOOT_MAGIC1 0xfee1dead 1726 #define REBOOT_MAGIC2 0x28121969 1727 #define REBOOT_MAGIC2A 0x05121996 1728 #define REBOOT_MAGIC2B 0x16041998 1729 1730 int 1731 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1732 { 1733 struct reboot_args bsd_args; 1734 1735 if (args->magic1 != REBOOT_MAGIC1) 1736 return (EINVAL); 1737 1738 switch (args->magic2) { 1739 case REBOOT_MAGIC2: 1740 case REBOOT_MAGIC2A: 1741 case REBOOT_MAGIC2B: 1742 break; 1743 default: 1744 return (EINVAL); 1745 } 1746 1747 switch (args->cmd) { 1748 case REBOOT_CAD_ON: 1749 case REBOOT_CAD_OFF: 1750 return (priv_check(td, PRIV_REBOOT)); 1751 case REBOOT_HALT: 1752 bsd_args.opt = RB_HALT; 1753 break; 1754 case REBOOT_RESTART: 1755 case REBOOT_RESTART2: 1756 bsd_args.opt = 0; 1757 break; 1758 case REBOOT_POWEROFF: 1759 bsd_args.opt = RB_POWEROFF; 1760 break; 1761 default: 1762 return (EINVAL); 1763 } 1764 return (sys_reboot(td, &bsd_args)); 1765 } 1766 1767 int 1768 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1769 { 1770 1771 td->td_retval[0] = td->td_proc->p_pid; 1772 1773 return (0); 1774 } 1775 1776 int 1777 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1778 { 1779 struct linux_emuldata *em; 1780 1781 em = em_find(td); 1782 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1783 1784 td->td_retval[0] = em->em_tid; 1785 1786 return (0); 1787 } 1788 1789 int 1790 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1791 { 1792 1793 td->td_retval[0] = kern_getppid(td); 1794 return (0); 1795 } 1796 1797 int 1798 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1799 { 1800 1801 td->td_retval[0] = td->td_ucred->cr_rgid; 1802 return (0); 1803 } 1804 1805 int 1806 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1807 { 1808 1809 td->td_retval[0] = td->td_ucred->cr_ruid; 1810 return (0); 1811 } 1812 1813 int 1814 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1815 { 1816 1817 return (kern_getsid(td, args->pid)); 1818 } 1819 1820 int 1821 linux_nosys(struct thread *td, struct nosys_args *ignore) 1822 { 1823 1824 return (ENOSYS); 1825 } 1826 1827 int 1828 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1829 { 1830 int error; 1831 1832 error = kern_getpriority(td, args->which, args->who); 1833 td->td_retval[0] = 20 - td->td_retval[0]; 1834 return (error); 1835 } 1836 1837 int 1838 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1839 { 1840 int name[2]; 1841 1842 name[0] = CTL_KERN; 1843 name[1] = KERN_HOSTNAME; 1844 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1845 args->len, 0, 0)); 1846 } 1847 1848 int 1849 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1850 { 1851 int name[2]; 1852 1853 name[0] = CTL_KERN; 1854 name[1] = KERN_NISDOMAINNAME; 1855 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1856 args->len, 0, 0)); 1857 } 1858 1859 int 1860 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1861 { 1862 1863 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1864 args->error_code); 1865 1866 /* 1867 * XXX: we should send a signal to the parent if 1868 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1869 * as it doesnt occur often. 1870 */ 1871 exit1(td, args->error_code, 0); 1872 /* NOTREACHED */ 1873 } 1874 1875 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1876 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1877 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1878 1879 struct l_user_cap_header { 1880 l_int version; 1881 l_int pid; 1882 }; 1883 1884 struct l_user_cap_data { 1885 l_int effective; 1886 l_int permitted; 1887 l_int inheritable; 1888 }; 1889 1890 int 1891 linux_capget(struct thread *td, struct linux_capget_args *uap) 1892 { 1893 struct l_user_cap_header luch; 1894 struct l_user_cap_data lucd[2]; 1895 int error, u32s; 1896 1897 if (uap->hdrp == NULL) 1898 return (EFAULT); 1899 1900 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1901 if (error != 0) 1902 return (error); 1903 1904 switch (luch.version) { 1905 case _LINUX_CAPABILITY_VERSION_1: 1906 u32s = 1; 1907 break; 1908 case _LINUX_CAPABILITY_VERSION_2: 1909 case _LINUX_CAPABILITY_VERSION_3: 1910 u32s = 2; 1911 break; 1912 default: 1913 luch.version = _LINUX_CAPABILITY_VERSION_1; 1914 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1915 if (error) 1916 return (error); 1917 return (EINVAL); 1918 } 1919 1920 if (luch.pid) 1921 return (EPERM); 1922 1923 if (uap->datap) { 1924 /* 1925 * The current implementation doesn't support setting 1926 * a capability (it's essentially a stub) so indicate 1927 * that no capabilities are currently set or available 1928 * to request. 1929 */ 1930 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1931 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1932 } 1933 1934 return (error); 1935 } 1936 1937 int 1938 linux_capset(struct thread *td, struct linux_capset_args *uap) 1939 { 1940 struct l_user_cap_header luch; 1941 struct l_user_cap_data lucd[2]; 1942 int error, i, u32s; 1943 1944 if (uap->hdrp == NULL || uap->datap == NULL) 1945 return (EFAULT); 1946 1947 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1948 if (error != 0) 1949 return (error); 1950 1951 switch (luch.version) { 1952 case _LINUX_CAPABILITY_VERSION_1: 1953 u32s = 1; 1954 break; 1955 case _LINUX_CAPABILITY_VERSION_2: 1956 case _LINUX_CAPABILITY_VERSION_3: 1957 u32s = 2; 1958 break; 1959 default: 1960 luch.version = _LINUX_CAPABILITY_VERSION_1; 1961 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1962 if (error) 1963 return (error); 1964 return (EINVAL); 1965 } 1966 1967 if (luch.pid) 1968 return (EPERM); 1969 1970 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1971 if (error != 0) 1972 return (error); 1973 1974 /* We currently don't support setting any capabilities. */ 1975 for (i = 0; i < u32s; i++) { 1976 if (lucd[i].effective || lucd[i].permitted || 1977 lucd[i].inheritable) { 1978 linux_msg(td, 1979 "capset[%d] effective=0x%x, permitted=0x%x, " 1980 "inheritable=0x%x is not implemented", i, 1981 (int)lucd[i].effective, (int)lucd[i].permitted, 1982 (int)lucd[i].inheritable); 1983 return (EPERM); 1984 } 1985 } 1986 1987 return (0); 1988 } 1989 1990 int 1991 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1992 { 1993 int error = 0, max_size, arg; 1994 struct proc *p = td->td_proc; 1995 char comm[LINUX_MAX_COMM_LEN]; 1996 int pdeath_signal, trace_state; 1997 1998 switch (args->option) { 1999 case LINUX_PR_SET_PDEATHSIG: 2000 if (!LINUX_SIG_VALID(args->arg2)) 2001 return (EINVAL); 2002 pdeath_signal = linux_to_bsd_signal(args->arg2); 2003 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 2004 &pdeath_signal)); 2005 case LINUX_PR_GET_PDEATHSIG: 2006 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 2007 &pdeath_signal); 2008 if (error != 0) 2009 return (error); 2010 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 2011 return (copyout(&pdeath_signal, 2012 (void *)(register_t)args->arg2, 2013 sizeof(pdeath_signal))); 2014 /* 2015 * In Linux, this flag controls if set[gu]id processes can coredump. 2016 * There are additional semantics imposed on processes that cannot 2017 * coredump: 2018 * - Such processes can not be ptraced. 2019 * - There are some semantics around ownership of process-related files 2020 * in the /proc namespace. 2021 * 2022 * In FreeBSD, we can (and by default, do) disable setuid coredump 2023 * system-wide with 'sugid_coredump.' We control tracability on a 2024 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 2025 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 2026 * procctl is roughly analogous to Linux's DUMPABLE. 2027 * 2028 * So, proxy these knobs to the corresponding PROC_TRACE setting. 2029 */ 2030 case LINUX_PR_GET_DUMPABLE: 2031 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 2032 &trace_state); 2033 if (error != 0) 2034 return (error); 2035 td->td_retval[0] = (trace_state != -1); 2036 return (0); 2037 case LINUX_PR_SET_DUMPABLE: 2038 /* 2039 * It is only valid for userspace to set one of these two 2040 * flags, and only one at a time. 2041 */ 2042 switch (args->arg2) { 2043 case LINUX_SUID_DUMP_DISABLE: 2044 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 2045 break; 2046 case LINUX_SUID_DUMP_USER: 2047 trace_state = PROC_TRACE_CTL_ENABLE; 2048 break; 2049 default: 2050 return (EINVAL); 2051 } 2052 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 2053 &trace_state)); 2054 case LINUX_PR_GET_KEEPCAPS: 2055 /* 2056 * Indicate that we always clear the effective and 2057 * permitted capability sets when the user id becomes 2058 * non-zero (actually the capability sets are simply 2059 * always zero in the current implementation). 2060 */ 2061 td->td_retval[0] = 0; 2062 break; 2063 case LINUX_PR_SET_KEEPCAPS: 2064 /* 2065 * Ignore requests to keep the effective and permitted 2066 * capability sets when the user id becomes non-zero. 2067 */ 2068 break; 2069 case LINUX_PR_SET_NAME: 2070 /* 2071 * To be on the safe side we need to make sure to not 2072 * overflow the size a Linux program expects. We already 2073 * do this here in the copyin, so that we don't need to 2074 * check on copyout. 2075 */ 2076 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2077 error = copyinstr((void *)(register_t)args->arg2, comm, 2078 max_size, NULL); 2079 2080 /* Linux silently truncates the name if it is too long. */ 2081 if (error == ENAMETOOLONG) { 2082 /* 2083 * XXX: copyinstr() isn't documented to populate the 2084 * array completely, so do a copyin() to be on the 2085 * safe side. This should be changed in case 2086 * copyinstr() is changed to guarantee this. 2087 */ 2088 error = copyin((void *)(register_t)args->arg2, comm, 2089 max_size - 1); 2090 comm[max_size - 1] = '\0'; 2091 } 2092 if (error) 2093 return (error); 2094 2095 PROC_LOCK(p); 2096 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2097 PROC_UNLOCK(p); 2098 break; 2099 case LINUX_PR_GET_NAME: 2100 PROC_LOCK(p); 2101 strlcpy(comm, p->p_comm, sizeof(comm)); 2102 PROC_UNLOCK(p); 2103 error = copyout(comm, (void *)(register_t)args->arg2, 2104 strlen(comm) + 1); 2105 break; 2106 case LINUX_PR_GET_SECCOMP: 2107 case LINUX_PR_SET_SECCOMP: 2108 /* 2109 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2110 */ 2111 error = EINVAL; 2112 break; 2113 case LINUX_PR_CAPBSET_READ: 2114 #if 0 2115 /* 2116 * This makes too much noise with Ubuntu Focal. 2117 */ 2118 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2119 (int)args->arg2); 2120 #endif 2121 error = EINVAL; 2122 break; 2123 case LINUX_PR_SET_NO_NEW_PRIVS: 2124 arg = args->arg2 == 1 ? 2125 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 2126 error = kern_procctl(td, P_PID, p->p_pid, 2127 PROC_NO_NEW_PRIVS_CTL, &arg); 2128 break; 2129 case LINUX_PR_SET_PTRACER: 2130 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2131 error = EINVAL; 2132 break; 2133 default: 2134 linux_msg(td, "unsupported prctl option %d", args->option); 2135 error = EINVAL; 2136 break; 2137 } 2138 2139 return (error); 2140 } 2141 2142 int 2143 linux_sched_setparam(struct thread *td, 2144 struct linux_sched_setparam_args *uap) 2145 { 2146 struct sched_param sched_param; 2147 struct thread *tdt; 2148 int error, policy; 2149 2150 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2151 if (error) 2152 return (error); 2153 2154 tdt = linux_tdfind(td, uap->pid, -1); 2155 if (tdt == NULL) 2156 return (ESRCH); 2157 2158 if (linux_map_sched_prio) { 2159 error = kern_sched_getscheduler(td, tdt, &policy); 2160 if (error) 2161 goto out; 2162 2163 switch (policy) { 2164 case SCHED_OTHER: 2165 if (sched_param.sched_priority != 0) { 2166 error = EINVAL; 2167 goto out; 2168 } 2169 sched_param.sched_priority = 2170 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2171 break; 2172 case SCHED_FIFO: 2173 case SCHED_RR: 2174 if (sched_param.sched_priority < 1 || 2175 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2176 error = EINVAL; 2177 goto out; 2178 } 2179 /* 2180 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2181 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2182 */ 2183 sched_param.sched_priority = 2184 (sched_param.sched_priority - 1) * 2185 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2186 (LINUX_MAX_RT_PRIO - 1); 2187 break; 2188 } 2189 } 2190 2191 error = kern_sched_setparam(td, tdt, &sched_param); 2192 out: PROC_UNLOCK(tdt->td_proc); 2193 return (error); 2194 } 2195 2196 int 2197 linux_sched_getparam(struct thread *td, 2198 struct linux_sched_getparam_args *uap) 2199 { 2200 struct sched_param sched_param; 2201 struct thread *tdt; 2202 int error, policy; 2203 2204 tdt = linux_tdfind(td, uap->pid, -1); 2205 if (tdt == NULL) 2206 return (ESRCH); 2207 2208 error = kern_sched_getparam(td, tdt, &sched_param); 2209 if (error) { 2210 PROC_UNLOCK(tdt->td_proc); 2211 return (error); 2212 } 2213 2214 if (linux_map_sched_prio) { 2215 error = kern_sched_getscheduler(td, tdt, &policy); 2216 PROC_UNLOCK(tdt->td_proc); 2217 if (error) 2218 return (error); 2219 2220 switch (policy) { 2221 case SCHED_OTHER: 2222 sched_param.sched_priority = 0; 2223 break; 2224 case SCHED_FIFO: 2225 case SCHED_RR: 2226 /* 2227 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2228 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2229 */ 2230 sched_param.sched_priority = 2231 (sched_param.sched_priority * 2232 (LINUX_MAX_RT_PRIO - 1) + 2233 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2234 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2235 break; 2236 } 2237 } else 2238 PROC_UNLOCK(tdt->td_proc); 2239 2240 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2241 return (error); 2242 } 2243 2244 static const struct cpuset_copy_cb copy_set = { 2245 .cpuset_copyin = copyin, 2246 .cpuset_copyout = copyout 2247 }; 2248 2249 /* 2250 * Get affinity of a process. 2251 */ 2252 int 2253 linux_sched_getaffinity(struct thread *td, 2254 struct linux_sched_getaffinity_args *args) 2255 { 2256 struct thread *tdt; 2257 int error; 2258 id_t tid; 2259 2260 tdt = linux_tdfind(td, args->pid, -1); 2261 if (tdt == NULL) 2262 return (ESRCH); 2263 tid = tdt->td_tid; 2264 PROC_UNLOCK(tdt->td_proc); 2265 2266 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2267 tid, args->len, (cpuset_t *)args->user_mask_ptr, ©_set); 2268 if (error == ERANGE) 2269 error = EINVAL; 2270 if (error == 0) 2271 td->td_retval[0] = min(args->len, sizeof(cpuset_t)); 2272 2273 return (error); 2274 } 2275 2276 /* 2277 * Set affinity of a process. 2278 */ 2279 int 2280 linux_sched_setaffinity(struct thread *td, 2281 struct linux_sched_setaffinity_args *args) 2282 { 2283 struct thread *tdt; 2284 cpuset_t *mask; 2285 int cpu, error; 2286 size_t len; 2287 id_t tid; 2288 2289 tdt = linux_tdfind(td, args->pid, -1); 2290 if (tdt == NULL) 2291 return (ESRCH); 2292 tid = tdt->td_tid; 2293 PROC_UNLOCK(tdt->td_proc); 2294 2295 len = min(args->len, sizeof(cpuset_t)); 2296 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 2297 error = copyin(args->user_mask_ptr, mask, len); 2298 if (error != 0) 2299 goto out; 2300 /* Linux ignore high bits */ 2301 CPU_FOREACH_ISSET(cpu, mask) 2302 if (cpu > mp_maxid) 2303 CPU_CLR(cpu, mask); 2304 2305 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2306 tid, mask); 2307 if (error == EDEADLK) 2308 error = EINVAL; 2309 out: 2310 free(mask, M_TEMP); 2311 return (error); 2312 } 2313 2314 struct linux_rlimit64 { 2315 uint64_t rlim_cur; 2316 uint64_t rlim_max; 2317 }; 2318 2319 int 2320 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2321 { 2322 struct rlimit rlim, nrlim; 2323 struct linux_rlimit64 lrlim; 2324 struct proc *p; 2325 u_int which; 2326 int flags; 2327 int error; 2328 2329 if (args->new == NULL && args->old != NULL) { 2330 if (linux_get_dummy_limit(args->resource, &rlim)) { 2331 lrlim.rlim_cur = rlim.rlim_cur; 2332 lrlim.rlim_max = rlim.rlim_max; 2333 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2334 } 2335 } 2336 2337 if (args->resource >= LINUX_RLIM_NLIMITS) 2338 return (EINVAL); 2339 2340 which = linux_to_bsd_resource[args->resource]; 2341 if (which == -1) 2342 return (EINVAL); 2343 2344 if (args->new != NULL) { 2345 /* 2346 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2347 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2348 * as INFINITY so we do not need a conversion even. 2349 */ 2350 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2351 if (error != 0) 2352 return (error); 2353 } 2354 2355 flags = PGET_HOLD | PGET_NOTWEXIT; 2356 if (args->new != NULL) 2357 flags |= PGET_CANDEBUG; 2358 else 2359 flags |= PGET_CANSEE; 2360 if (args->pid == 0) { 2361 p = td->td_proc; 2362 PHOLD(p); 2363 } else { 2364 error = pget(args->pid, flags, &p); 2365 if (error != 0) 2366 return (error); 2367 } 2368 if (args->old != NULL) { 2369 PROC_LOCK(p); 2370 lim_rlimit_proc(p, which, &rlim); 2371 PROC_UNLOCK(p); 2372 if (rlim.rlim_cur == RLIM_INFINITY) 2373 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2374 else 2375 lrlim.rlim_cur = rlim.rlim_cur; 2376 if (rlim.rlim_max == RLIM_INFINITY) 2377 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2378 else 2379 lrlim.rlim_max = rlim.rlim_max; 2380 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2381 if (error != 0) 2382 goto out; 2383 } 2384 2385 if (args->new != NULL) 2386 error = kern_proc_setrlimit(td, p, which, &nrlim); 2387 2388 out: 2389 PRELE(p); 2390 return (error); 2391 } 2392 2393 int 2394 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2395 { 2396 struct timespec ts, *tsp; 2397 int error; 2398 2399 if (args->tsp != NULL) { 2400 error = linux_get_timespec(&ts, args->tsp); 2401 if (error != 0) 2402 return (error); 2403 tsp = &ts; 2404 } else 2405 tsp = NULL; 2406 2407 error = linux_common_pselect6(td, args->nfds, args->readfds, 2408 args->writefds, args->exceptfds, tsp, args->sig); 2409 2410 if (args->tsp != NULL) 2411 linux_put_timespec(&ts, args->tsp); 2412 return (error); 2413 } 2414 2415 static int 2416 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2417 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2418 l_uintptr_t *sig) 2419 { 2420 struct timeval utv, tv0, tv1, *tvp; 2421 struct l_pselect6arg lpse6; 2422 l_sigset_t l_ss; 2423 sigset_t *ssp; 2424 sigset_t ss; 2425 int error; 2426 2427 ssp = NULL; 2428 if (sig != NULL) { 2429 error = copyin(sig, &lpse6, sizeof(lpse6)); 2430 if (error != 0) 2431 return (error); 2432 if (lpse6.ss_len != sizeof(l_ss)) 2433 return (EINVAL); 2434 if (lpse6.ss != 0) { 2435 error = copyin(PTRIN(lpse6.ss), &l_ss, 2436 sizeof(l_ss)); 2437 if (error != 0) 2438 return (error); 2439 linux_to_bsd_sigset(&l_ss, &ss); 2440 ssp = &ss; 2441 } 2442 } else 2443 ssp = NULL; 2444 2445 /* 2446 * Currently glibc changes nanosecond number to microsecond. 2447 * This mean losing precision but for now it is hardly seen. 2448 */ 2449 if (tsp != NULL) { 2450 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2451 if (itimerfix(&utv)) 2452 return (EINVAL); 2453 2454 microtime(&tv0); 2455 tvp = &utv; 2456 } else 2457 tvp = NULL; 2458 2459 error = kern_pselect(td, nfds, readfds, writefds, 2460 exceptfds, tvp, ssp, LINUX_NFDBITS); 2461 2462 if (tsp != NULL) { 2463 /* 2464 * Compute how much time was left of the timeout, 2465 * by subtracting the current time and the time 2466 * before we started the call, and subtracting 2467 * that result from the user-supplied value. 2468 */ 2469 microtime(&tv1); 2470 timevalsub(&tv1, &tv0); 2471 timevalsub(&utv, &tv1); 2472 if (utv.tv_sec < 0) 2473 timevalclear(&utv); 2474 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2475 } 2476 return (error); 2477 } 2478 2479 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2480 int 2481 linux_pselect6_time64(struct thread *td, 2482 struct linux_pselect6_time64_args *args) 2483 { 2484 struct timespec ts, *tsp; 2485 int error; 2486 2487 if (args->tsp != NULL) { 2488 error = linux_get_timespec64(&ts, args->tsp); 2489 if (error != 0) 2490 return (error); 2491 tsp = &ts; 2492 } else 2493 tsp = NULL; 2494 2495 error = linux_common_pselect6(td, args->nfds, args->readfds, 2496 args->writefds, args->exceptfds, tsp, args->sig); 2497 2498 if (args->tsp != NULL) 2499 linux_put_timespec64(&ts, args->tsp); 2500 return (error); 2501 } 2502 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2503 2504 int 2505 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2506 { 2507 struct timespec uts, *tsp; 2508 int error; 2509 2510 if (args->tsp != NULL) { 2511 error = linux_get_timespec(&uts, args->tsp); 2512 if (error != 0) 2513 return (error); 2514 tsp = &uts; 2515 } else 2516 tsp = NULL; 2517 2518 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2519 args->sset, args->ssize); 2520 if (error == 0 && args->tsp != NULL) 2521 error = linux_put_timespec(&uts, args->tsp); 2522 return (error); 2523 } 2524 2525 static int 2526 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2527 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2528 { 2529 struct timespec ts0, ts1; 2530 struct pollfd stackfds[32]; 2531 struct pollfd *kfds; 2532 l_sigset_t l_ss; 2533 sigset_t *ssp; 2534 sigset_t ss; 2535 int error; 2536 2537 if (kern_poll_maxfds(nfds)) 2538 return (EINVAL); 2539 if (sset != NULL) { 2540 if (ssize != sizeof(l_ss)) 2541 return (EINVAL); 2542 error = copyin(sset, &l_ss, sizeof(l_ss)); 2543 if (error) 2544 return (error); 2545 linux_to_bsd_sigset(&l_ss, &ss); 2546 ssp = &ss; 2547 } else 2548 ssp = NULL; 2549 if (tsp != NULL) 2550 nanotime(&ts0); 2551 2552 if (nfds > nitems(stackfds)) 2553 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2554 else 2555 kfds = stackfds; 2556 error = linux_pollin(td, kfds, fds, nfds); 2557 if (error != 0) 2558 goto out; 2559 2560 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2561 if (error == 0) 2562 error = linux_pollout(td, kfds, fds, nfds); 2563 2564 if (error == 0 && tsp != NULL) { 2565 if (td->td_retval[0]) { 2566 nanotime(&ts1); 2567 timespecsub(&ts1, &ts0, &ts1); 2568 timespecsub(tsp, &ts1, tsp); 2569 if (tsp->tv_sec < 0) 2570 timespecclear(tsp); 2571 } else 2572 timespecclear(tsp); 2573 } 2574 2575 out: 2576 if (nfds > nitems(stackfds)) 2577 free(kfds, M_TEMP); 2578 return (error); 2579 } 2580 2581 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2582 int 2583 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2584 { 2585 struct timespec uts, *tsp; 2586 int error; 2587 2588 if (args->tsp != NULL) { 2589 error = linux_get_timespec64(&uts, args->tsp); 2590 if (error != 0) 2591 return (error); 2592 tsp = &uts; 2593 } else 2594 tsp = NULL; 2595 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2596 args->sset, args->ssize); 2597 if (error == 0 && args->tsp != NULL) 2598 error = linux_put_timespec64(&uts, args->tsp); 2599 return (error); 2600 } 2601 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2602 2603 static int 2604 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2605 { 2606 int error; 2607 u_int i; 2608 2609 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2610 if (error != 0) 2611 return (error); 2612 2613 for (i = 0; i < nfd; i++) { 2614 if (fds->events != 0) 2615 linux_to_bsd_poll_events(td, fds->fd, 2616 fds->events, &fds->events); 2617 fds++; 2618 } 2619 return (0); 2620 } 2621 2622 static int 2623 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2624 { 2625 int error = 0; 2626 u_int i, n = 0; 2627 2628 for (i = 0; i < nfd; i++) { 2629 if (fds->revents != 0) { 2630 bsd_to_linux_poll_events(fds->revents, 2631 &fds->revents); 2632 n++; 2633 } 2634 error = copyout(&fds->revents, &ufds->revents, 2635 sizeof(ufds->revents)); 2636 if (error) 2637 return (error); 2638 fds++; 2639 ufds++; 2640 } 2641 td->td_retval[0] = n; 2642 return (0); 2643 } 2644 2645 static int 2646 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2647 struct timespec *ts) 2648 { 2649 struct thread *tdt; 2650 int error; 2651 2652 /* 2653 * According to man in case the invalid pid specified 2654 * EINVAL should be returned. 2655 */ 2656 if (pid < 0) 2657 return (EINVAL); 2658 2659 tdt = linux_tdfind(td, pid, -1); 2660 if (tdt == NULL) 2661 return (ESRCH); 2662 2663 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2664 PROC_UNLOCK(tdt->td_proc); 2665 return (error); 2666 } 2667 2668 int 2669 linux_sched_rr_get_interval(struct thread *td, 2670 struct linux_sched_rr_get_interval_args *uap) 2671 { 2672 struct timespec ts; 2673 int error; 2674 2675 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2676 if (error != 0) 2677 return (error); 2678 return (linux_put_timespec(&ts, uap->interval)); 2679 } 2680 2681 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2682 int 2683 linux_sched_rr_get_interval_time64(struct thread *td, 2684 struct linux_sched_rr_get_interval_time64_args *uap) 2685 { 2686 struct timespec ts; 2687 int error; 2688 2689 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2690 if (error != 0) 2691 return (error); 2692 return (linux_put_timespec64(&ts, uap->interval)); 2693 } 2694 #endif 2695 2696 /* 2697 * In case when the Linux thread is the initial thread in 2698 * the thread group thread id is equal to the process id. 2699 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2700 */ 2701 struct thread * 2702 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2703 { 2704 struct linux_emuldata *em; 2705 struct thread *tdt; 2706 struct proc *p; 2707 2708 tdt = NULL; 2709 if (tid == 0 || tid == td->td_tid) { 2710 if (pid != -1 && td->td_proc->p_pid != pid) 2711 return (NULL); 2712 PROC_LOCK(td->td_proc); 2713 return (td); 2714 } else if (tid > PID_MAX) 2715 return (tdfind(tid, pid)); 2716 2717 /* 2718 * Initial thread where the tid equal to the pid. 2719 */ 2720 p = pfind(tid); 2721 if (p != NULL) { 2722 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2723 (pid != -1 && tid != pid)) { 2724 /* 2725 * p is not a Linuxulator process. 2726 */ 2727 PROC_UNLOCK(p); 2728 return (NULL); 2729 } 2730 FOREACH_THREAD_IN_PROC(p, tdt) { 2731 em = em_find(tdt); 2732 if (tid == em->em_tid) 2733 return (tdt); 2734 } 2735 PROC_UNLOCK(p); 2736 } 2737 return (NULL); 2738 } 2739 2740 void 2741 linux_to_bsd_waitopts(int options, int *bsdopts) 2742 { 2743 2744 if (options & LINUX_WNOHANG) 2745 *bsdopts |= WNOHANG; 2746 if (options & LINUX_WUNTRACED) 2747 *bsdopts |= WUNTRACED; 2748 if (options & LINUX_WEXITED) 2749 *bsdopts |= WEXITED; 2750 if (options & LINUX_WCONTINUED) 2751 *bsdopts |= WCONTINUED; 2752 if (options & LINUX_WNOWAIT) 2753 *bsdopts |= WNOWAIT; 2754 2755 if (options & __WCLONE) 2756 *bsdopts |= WLINUXCLONE; 2757 } 2758 2759 int 2760 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2761 { 2762 struct uio uio; 2763 struct iovec iov; 2764 int error; 2765 2766 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2767 return (EINVAL); 2768 if (args->count > INT_MAX) 2769 args->count = INT_MAX; 2770 2771 iov.iov_base = args->buf; 2772 iov.iov_len = args->count; 2773 2774 uio.uio_iov = &iov; 2775 uio.uio_iovcnt = 1; 2776 uio.uio_resid = iov.iov_len; 2777 uio.uio_segflg = UIO_USERSPACE; 2778 uio.uio_rw = UIO_READ; 2779 uio.uio_td = td; 2780 2781 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2782 if (error == 0) 2783 td->td_retval[0] = args->count - uio.uio_resid; 2784 return (error); 2785 } 2786 2787 int 2788 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2789 { 2790 2791 /* Needs to be page-aligned */ 2792 if (args->start & PAGE_MASK) 2793 return (EINVAL); 2794 return (kern_mincore(td, args->start, args->len, args->vec)); 2795 } 2796 2797 #define SYSLOG_TAG "<6>" 2798 2799 int 2800 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2801 { 2802 char buf[128], *src, *dst; 2803 u_int seq; 2804 int buflen, error; 2805 2806 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2807 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2808 return (EINVAL); 2809 } 2810 2811 if (args->len < 6) { 2812 td->td_retval[0] = 0; 2813 return (0); 2814 } 2815 2816 error = priv_check(td, PRIV_MSGBUF); 2817 if (error) 2818 return (error); 2819 2820 mtx_lock(&msgbuf_lock); 2821 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2822 mtx_unlock(&msgbuf_lock); 2823 2824 dst = args->buf; 2825 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2826 /* The -1 is to skip the trailing '\0'. */ 2827 dst += sizeof(SYSLOG_TAG) - 1; 2828 2829 while (error == 0) { 2830 mtx_lock(&msgbuf_lock); 2831 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2832 mtx_unlock(&msgbuf_lock); 2833 2834 if (buflen == 0) 2835 break; 2836 2837 for (src = buf; src < buf + buflen && error == 0; src++) { 2838 if (*src == '\0') 2839 continue; 2840 2841 if (dst >= args->buf + args->len) 2842 goto out; 2843 2844 error = copyout(src, dst, 1); 2845 dst++; 2846 2847 if (*src == '\n' && *(src + 1) != '<' && 2848 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2849 error = copyout(&SYSLOG_TAG, 2850 dst, sizeof(SYSLOG_TAG)); 2851 dst += sizeof(SYSLOG_TAG) - 1; 2852 } 2853 } 2854 } 2855 out: 2856 td->td_retval[0] = dst - args->buf; 2857 return (error); 2858 } 2859 2860 int 2861 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2862 { 2863 int cpu, error, node; 2864 2865 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2866 error = 0; 2867 node = cpuid_to_pcpu[cpu]->pc_domain; 2868 2869 if (args->cpu != NULL) 2870 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2871 if (args->node != NULL) 2872 error = copyout(&node, args->node, sizeof(l_int)); 2873 return (error); 2874 } 2875 2876 #if defined(__i386__) || defined(__amd64__) 2877 int 2878 linux_poll(struct thread *td, struct linux_poll_args *args) 2879 { 2880 struct timespec ts, *tsp; 2881 2882 if (args->timeout != INFTIM) { 2883 if (args->timeout < 0) 2884 return (EINVAL); 2885 ts.tv_sec = args->timeout / 1000; 2886 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2887 tsp = &ts; 2888 } else 2889 tsp = NULL; 2890 2891 return (linux_common_ppoll(td, args->fds, args->nfds, 2892 tsp, NULL, 0)); 2893 } 2894 #endif /* __i386__ || __amd64__ */ 2895 2896 int 2897 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2898 { 2899 2900 switch (args->op) { 2901 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2902 return (EOPNOTSUPP); 2903 default: 2904 /* 2905 * Ignore unknown operations, just like Linux kernel built 2906 * without CONFIG_SECCOMP. 2907 */ 2908 return (EINVAL); 2909 } 2910 } 2911 2912 #ifndef COMPAT_LINUX32 2913 int 2914 linux_execve(struct thread *td, struct linux_execve_args *args) 2915 { 2916 struct image_args eargs; 2917 char *path; 2918 int error; 2919 2920 LINUX_CTR(execve); 2921 2922 if (!LUSECONVPATH(td)) { 2923 error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2924 args->argp, args->envp); 2925 } else { 2926 LCONVPATHEXIST(args->path, &path); 2927 error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, 2928 args->envp); 2929 LFREEPATH(path); 2930 } 2931 if (error == 0) 2932 error = linux_common_execve(td, &eargs); 2933 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2934 return (error); 2935 } 2936 #endif 2937