1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/imgact.h> 45 #include <sys/kernel.h> 46 #include <sys/limits.h> 47 #include <sys/lock.h> 48 #include <sys/malloc.h> 49 #include <sys/mman.h> 50 #include <sys/mount.h> 51 #include <sys/msgbuf.h> 52 #include <sys/mutex.h> 53 #include <sys/namei.h> 54 #include <sys/poll.h> 55 #include <sys/priv.h> 56 #include <sys/proc.h> 57 #include <sys/procctl.h> 58 #include <sys/reboot.h> 59 #include <sys/racct.h> 60 #include <sys/random.h> 61 #include <sys/resourcevar.h> 62 #include <sys/sched.h> 63 #include <sys/sdt.h> 64 #include <sys/signalvar.h> 65 #include <sys/smp.h> 66 #include <sys/stat.h> 67 #include <sys/syscallsubr.h> 68 #include <sys/sysctl.h> 69 #include <sys/sysproto.h> 70 #include <sys/systm.h> 71 #include <sys/time.h> 72 #include <sys/vmmeter.h> 73 #include <sys/vnode.h> 74 #include <sys/wait.h> 75 #include <sys/cpuset.h> 76 #include <sys/uio.h> 77 78 #include <security/audit/audit.h> 79 #include <security/mac/mac_framework.h> 80 81 #include <vm/vm.h> 82 #include <vm/pmap.h> 83 #include <vm/vm_kern.h> 84 #include <vm/vm_map.h> 85 #include <vm/vm_extern.h> 86 #include <vm/swap_pager.h> 87 88 #ifdef COMPAT_LINUX32 89 #include <machine/../linux32/linux.h> 90 #include <machine/../linux32/linux32_proto.h> 91 #else 92 #include <machine/../linux/linux.h> 93 #include <machine/../linux/linux_proto.h> 94 #endif 95 96 #include <compat/linux/linux_common.h> 97 #include <compat/linux/linux_dtrace.h> 98 #include <compat/linux/linux_file.h> 99 #include <compat/linux/linux_mib.h> 100 #include <compat/linux/linux_signal.h> 101 #include <compat/linux/linux_timer.h> 102 #include <compat/linux/linux_util.h> 103 #include <compat/linux/linux_sysproto.h> 104 #include <compat/linux/linux_emul.h> 105 #include <compat/linux/linux_misc.h> 106 107 int stclohz; /* Statistics clock frequency */ 108 109 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 110 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 111 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 112 RLIMIT_MEMLOCK, RLIMIT_AS 113 }; 114 115 struct l_sysinfo { 116 l_long uptime; /* Seconds since boot */ 117 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 118 #define LINUX_SYSINFO_LOADS_SCALE 65536 119 l_ulong totalram; /* Total usable main memory size */ 120 l_ulong freeram; /* Available memory size */ 121 l_ulong sharedram; /* Amount of shared memory */ 122 l_ulong bufferram; /* Memory used by buffers */ 123 l_ulong totalswap; /* Total swap space size */ 124 l_ulong freeswap; /* swap space still available */ 125 l_ushort procs; /* Number of current processes */ 126 l_ushort pads; 127 l_ulong totalhigh; 128 l_ulong freehigh; 129 l_uint mem_unit; 130 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 131 }; 132 133 struct l_pselect6arg { 134 l_uintptr_t ss; 135 l_size_t ss_len; 136 }; 137 138 static int linux_utimensat_lts_to_ts(struct l_timespec *, 139 struct timespec *); 140 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 141 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 142 struct timespec *); 143 #endif 144 static int linux_common_utimensat(struct thread *, int, 145 const char *, struct timespec *, int); 146 static int linux_common_pselect6(struct thread *, l_int, 147 l_fd_set *, l_fd_set *, l_fd_set *, 148 struct timespec *, l_uintptr_t *); 149 static int linux_common_ppoll(struct thread *, struct pollfd *, 150 uint32_t, struct timespec *, l_sigset_t *, 151 l_size_t); 152 static int linux_pollin(struct thread *, struct pollfd *, 153 struct pollfd *, u_int); 154 static int linux_pollout(struct thread *, struct pollfd *, 155 struct pollfd *, u_int); 156 157 int 158 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 159 { 160 struct l_sysinfo sysinfo; 161 int i, j; 162 struct timespec ts; 163 164 bzero(&sysinfo, sizeof(sysinfo)); 165 getnanouptime(&ts); 166 if (ts.tv_nsec != 0) 167 ts.tv_sec++; 168 sysinfo.uptime = ts.tv_sec; 169 170 /* Use the information from the mib to get our load averages */ 171 for (i = 0; i < 3; i++) 172 sysinfo.loads[i] = averunnable.ldavg[i] * 173 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 174 175 sysinfo.totalram = physmem * PAGE_SIZE; 176 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 177 178 /* 179 * sharedram counts pages allocated to named, swap-backed objects such 180 * as shared memory segments and tmpfs files. There is no cheap way to 181 * compute this, so just leave the field unpopulated. Linux itself only 182 * started setting this field in the 3.x timeframe. 183 */ 184 sysinfo.sharedram = 0; 185 sysinfo.bufferram = 0; 186 187 swap_pager_status(&i, &j); 188 sysinfo.totalswap = i * PAGE_SIZE; 189 sysinfo.freeswap = (i - j) * PAGE_SIZE; 190 191 sysinfo.procs = nprocs; 192 193 /* 194 * Platforms supported by the emulation layer do not have a notion of 195 * high memory. 196 */ 197 sysinfo.totalhigh = 0; 198 sysinfo.freehigh = 0; 199 200 sysinfo.mem_unit = 1; 201 202 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 203 } 204 205 #ifdef LINUX_LEGACY_SYSCALLS 206 int 207 linux_alarm(struct thread *td, struct linux_alarm_args *args) 208 { 209 struct itimerval it, old_it; 210 u_int secs; 211 int error __diagused; 212 213 secs = args->secs; 214 /* 215 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 216 * to match kern_setitimer()'s limit to avoid error from it. 217 * 218 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 219 * platforms. 220 */ 221 if (secs > INT32_MAX / 2) 222 secs = INT32_MAX / 2; 223 224 it.it_value.tv_sec = secs; 225 it.it_value.tv_usec = 0; 226 timevalclear(&it.it_interval); 227 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 228 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 229 230 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 231 old_it.it_value.tv_usec >= 500000) 232 old_it.it_value.tv_sec++; 233 td->td_retval[0] = old_it.it_value.tv_sec; 234 return (0); 235 } 236 #endif 237 238 int 239 linux_brk(struct thread *td, struct linux_brk_args *args) 240 { 241 struct vmspace *vm = td->td_proc->p_vmspace; 242 uintptr_t new, old; 243 244 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 245 new = (uintptr_t)args->dsend; 246 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 247 td->td_retval[0] = (register_t)new; 248 else 249 td->td_retval[0] = (register_t)old; 250 251 return (0); 252 } 253 254 #if defined(__i386__) 255 /* XXX: what about amd64/linux32? */ 256 257 int 258 linux_uselib(struct thread *td, struct linux_uselib_args *args) 259 { 260 struct nameidata ni; 261 struct vnode *vp; 262 struct exec *a_out; 263 vm_map_t map; 264 vm_map_entry_t entry; 265 struct vattr attr; 266 vm_offset_t vmaddr; 267 unsigned long file_offset; 268 unsigned long bss_size; 269 char *library; 270 ssize_t aresid; 271 int error; 272 bool locked, opened, textset; 273 274 a_out = NULL; 275 vp = NULL; 276 locked = false; 277 textset = false; 278 opened = false; 279 280 if (!LUSECONVPATH(td)) { 281 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 282 UIO_USERSPACE, args->library); 283 error = namei(&ni); 284 } else { 285 LCONVPATHEXIST(args->library, &library); 286 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 287 UIO_SYSSPACE, library); 288 error = namei(&ni); 289 LFREEPATH(library); 290 } 291 if (error) 292 goto cleanup; 293 294 vp = ni.ni_vp; 295 NDFREE_PNBUF(&ni); 296 297 /* 298 * From here on down, we have a locked vnode that must be unlocked. 299 * XXX: The code below largely duplicates exec_check_permissions(). 300 */ 301 locked = true; 302 303 /* Executable? */ 304 error = VOP_GETATTR(vp, &attr, td->td_ucred); 305 if (error) 306 goto cleanup; 307 308 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 309 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 310 /* EACCESS is what exec(2) returns. */ 311 error = ENOEXEC; 312 goto cleanup; 313 } 314 315 /* Sensible size? */ 316 if (attr.va_size == 0) { 317 error = ENOEXEC; 318 goto cleanup; 319 } 320 321 /* Can we access it? */ 322 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 323 if (error) 324 goto cleanup; 325 326 /* 327 * XXX: This should use vn_open() so that it is properly authorized, 328 * and to reduce code redundancy all over the place here. 329 * XXX: Not really, it duplicates far more of exec_check_permissions() 330 * than vn_open(). 331 */ 332 #ifdef MAC 333 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 334 if (error) 335 goto cleanup; 336 #endif 337 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 338 if (error) 339 goto cleanup; 340 opened = true; 341 342 /* Pull in executable header into exec_map */ 343 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 344 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 345 if (error) 346 goto cleanup; 347 348 /* Is it a Linux binary ? */ 349 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 350 error = ENOEXEC; 351 goto cleanup; 352 } 353 354 /* 355 * While we are here, we should REALLY do some more checks 356 */ 357 358 /* Set file/virtual offset based on a.out variant. */ 359 switch ((int)(a_out->a_magic & 0xffff)) { 360 case 0413: /* ZMAGIC */ 361 file_offset = 1024; 362 break; 363 case 0314: /* QMAGIC */ 364 file_offset = 0; 365 break; 366 default: 367 error = ENOEXEC; 368 goto cleanup; 369 } 370 371 bss_size = round_page(a_out->a_bss); 372 373 /* Check various fields in header for validity/bounds. */ 374 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 375 error = ENOEXEC; 376 goto cleanup; 377 } 378 379 /* text + data can't exceed file size */ 380 if (a_out->a_data + a_out->a_text > attr.va_size) { 381 error = EFAULT; 382 goto cleanup; 383 } 384 385 /* 386 * text/data/bss must not exceed limits 387 * XXX - this is not complete. it should check current usage PLUS 388 * the resources needed by this library. 389 */ 390 PROC_LOCK(td->td_proc); 391 if (a_out->a_text > maxtsiz || 392 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 393 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 394 bss_size) != 0) { 395 PROC_UNLOCK(td->td_proc); 396 error = ENOMEM; 397 goto cleanup; 398 } 399 PROC_UNLOCK(td->td_proc); 400 401 /* 402 * Prevent more writers. 403 */ 404 error = VOP_SET_TEXT(vp); 405 if (error != 0) 406 goto cleanup; 407 textset = true; 408 409 /* 410 * Lock no longer needed 411 */ 412 locked = false; 413 VOP_UNLOCK(vp); 414 415 /* 416 * Check if file_offset page aligned. Currently we cannot handle 417 * misalinged file offsets, and so we read in the entire image 418 * (what a waste). 419 */ 420 if (file_offset & PAGE_MASK) { 421 /* Map text+data read/write/execute */ 422 423 /* a_entry is the load address and is page aligned */ 424 vmaddr = trunc_page(a_out->a_entry); 425 426 /* get anon user mapping, read+write+execute */ 427 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 428 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 429 VM_PROT_ALL, VM_PROT_ALL, 0); 430 if (error) 431 goto cleanup; 432 433 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 434 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 435 td->td_ucred, NOCRED, &aresid, td); 436 if (error != 0) 437 goto cleanup; 438 if (aresid != 0) { 439 error = ENOEXEC; 440 goto cleanup; 441 } 442 } else { 443 /* 444 * for QMAGIC, a_entry is 20 bytes beyond the load address 445 * to skip the executable header 446 */ 447 vmaddr = trunc_page(a_out->a_entry); 448 449 /* 450 * Map it all into the process's space as a single 451 * copy-on-write "data" segment. 452 */ 453 map = &td->td_proc->p_vmspace->vm_map; 454 error = vm_mmap(map, &vmaddr, 455 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 456 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 457 if (error) 458 goto cleanup; 459 vm_map_lock(map); 460 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 461 vm_map_unlock(map); 462 error = EDOOFUS; 463 goto cleanup; 464 } 465 entry->eflags |= MAP_ENTRY_VN_EXEC; 466 vm_map_unlock(map); 467 textset = false; 468 } 469 470 if (bss_size != 0) { 471 /* Calculate BSS start address */ 472 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 473 a_out->a_data; 474 475 /* allocate some 'anon' space */ 476 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 477 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 478 VM_PROT_ALL, 0); 479 if (error) 480 goto cleanup; 481 } 482 483 cleanup: 484 if (opened) { 485 if (locked) 486 VOP_UNLOCK(vp); 487 locked = false; 488 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 489 } 490 if (textset) { 491 if (!locked) { 492 locked = true; 493 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 494 } 495 VOP_UNSET_TEXT_CHECKED(vp); 496 } 497 if (locked) 498 VOP_UNLOCK(vp); 499 500 /* Release the temporary mapping. */ 501 if (a_out) 502 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 503 504 return (error); 505 } 506 507 #endif /* __i386__ */ 508 509 #ifdef LINUX_LEGACY_SYSCALLS 510 int 511 linux_select(struct thread *td, struct linux_select_args *args) 512 { 513 l_timeval ltv; 514 struct timeval tv0, tv1, utv, *tvp; 515 int error; 516 517 /* 518 * Store current time for computation of the amount of 519 * time left. 520 */ 521 if (args->timeout) { 522 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 523 goto select_out; 524 utv.tv_sec = ltv.tv_sec; 525 utv.tv_usec = ltv.tv_usec; 526 527 if (itimerfix(&utv)) { 528 /* 529 * The timeval was invalid. Convert it to something 530 * valid that will act as it does under Linux. 531 */ 532 utv.tv_sec += utv.tv_usec / 1000000; 533 utv.tv_usec %= 1000000; 534 if (utv.tv_usec < 0) { 535 utv.tv_sec -= 1; 536 utv.tv_usec += 1000000; 537 } 538 if (utv.tv_sec < 0) 539 timevalclear(&utv); 540 } 541 microtime(&tv0); 542 tvp = &utv; 543 } else 544 tvp = NULL; 545 546 error = kern_select(td, args->nfds, args->readfds, args->writefds, 547 args->exceptfds, tvp, LINUX_NFDBITS); 548 if (error) 549 goto select_out; 550 551 if (args->timeout) { 552 if (td->td_retval[0]) { 553 /* 554 * Compute how much time was left of the timeout, 555 * by subtracting the current time and the time 556 * before we started the call, and subtracting 557 * that result from the user-supplied value. 558 */ 559 microtime(&tv1); 560 timevalsub(&tv1, &tv0); 561 timevalsub(&utv, &tv1); 562 if (utv.tv_sec < 0) 563 timevalclear(&utv); 564 } else 565 timevalclear(&utv); 566 ltv.tv_sec = utv.tv_sec; 567 ltv.tv_usec = utv.tv_usec; 568 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 569 goto select_out; 570 } 571 572 select_out: 573 return (error); 574 } 575 #endif 576 577 int 578 linux_mremap(struct thread *td, struct linux_mremap_args *args) 579 { 580 uintptr_t addr; 581 size_t len; 582 int error = 0; 583 584 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 585 td->td_retval[0] = 0; 586 return (EINVAL); 587 } 588 589 /* 590 * Check for the page alignment. 591 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 592 */ 593 if (args->addr & PAGE_MASK) { 594 td->td_retval[0] = 0; 595 return (EINVAL); 596 } 597 598 args->new_len = round_page(args->new_len); 599 args->old_len = round_page(args->old_len); 600 601 if (args->new_len > args->old_len) { 602 td->td_retval[0] = 0; 603 return (ENOMEM); 604 } 605 606 if (args->new_len < args->old_len) { 607 addr = args->addr + args->new_len; 608 len = args->old_len - args->new_len; 609 error = kern_munmap(td, addr, len); 610 } 611 612 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 613 return (error); 614 } 615 616 #define LINUX_MS_ASYNC 0x0001 617 #define LINUX_MS_INVALIDATE 0x0002 618 #define LINUX_MS_SYNC 0x0004 619 620 int 621 linux_msync(struct thread *td, struct linux_msync_args *args) 622 { 623 624 return (kern_msync(td, args->addr, args->len, 625 args->fl & ~LINUX_MS_SYNC)); 626 } 627 628 #ifdef LINUX_LEGACY_SYSCALLS 629 int 630 linux_time(struct thread *td, struct linux_time_args *args) 631 { 632 struct timeval tv; 633 l_time_t tm; 634 int error; 635 636 microtime(&tv); 637 tm = tv.tv_sec; 638 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 639 return (error); 640 td->td_retval[0] = tm; 641 return (0); 642 } 643 #endif 644 645 struct l_times_argv { 646 l_clock_t tms_utime; 647 l_clock_t tms_stime; 648 l_clock_t tms_cutime; 649 l_clock_t tms_cstime; 650 }; 651 652 /* 653 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 654 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 655 * auxiliary vector entry. 656 */ 657 #define CLK_TCK 100 658 659 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 660 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 661 662 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 663 CONVNTCK(r) : CONVOTCK(r)) 664 665 int 666 linux_times(struct thread *td, struct linux_times_args *args) 667 { 668 struct timeval tv, utime, stime, cutime, cstime; 669 struct l_times_argv tms; 670 struct proc *p; 671 int error; 672 673 if (args->buf != NULL) { 674 p = td->td_proc; 675 PROC_LOCK(p); 676 PROC_STATLOCK(p); 677 calcru(p, &utime, &stime); 678 PROC_STATUNLOCK(p); 679 calccru(p, &cutime, &cstime); 680 PROC_UNLOCK(p); 681 682 tms.tms_utime = CONVTCK(utime); 683 tms.tms_stime = CONVTCK(stime); 684 685 tms.tms_cutime = CONVTCK(cutime); 686 tms.tms_cstime = CONVTCK(cstime); 687 688 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 689 return (error); 690 } 691 692 microuptime(&tv); 693 td->td_retval[0] = (int)CONVTCK(tv); 694 return (0); 695 } 696 697 int 698 linux_newuname(struct thread *td, struct linux_newuname_args *args) 699 { 700 struct l_new_utsname utsname; 701 char osname[LINUX_MAX_UTSNAME]; 702 char osrelease[LINUX_MAX_UTSNAME]; 703 char *p; 704 705 linux_get_osname(td, osname); 706 linux_get_osrelease(td, osrelease); 707 708 bzero(&utsname, sizeof(utsname)); 709 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 710 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 711 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 712 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 713 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 714 for (p = utsname.version; *p != '\0'; ++p) 715 if (*p == '\n') { 716 *p = '\0'; 717 break; 718 } 719 #if defined(__amd64__) 720 /* 721 * On amd64, Linux uname(2) needs to return "x86_64" 722 * for both 64-bit and 32-bit applications. On 32-bit, 723 * the string returned by getauxval(AT_PLATFORM) needs 724 * to remain "i686", though. 725 */ 726 #if defined(COMPAT_LINUX32) 727 if (linux32_emulate_i386) 728 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 729 else 730 #endif 731 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 732 #elif defined(__aarch64__) 733 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 734 #elif defined(__i386__) 735 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 736 #endif 737 738 return (copyout(&utsname, args->buf, sizeof(utsname))); 739 } 740 741 struct l_utimbuf { 742 l_time_t l_actime; 743 l_time_t l_modtime; 744 }; 745 746 #ifdef LINUX_LEGACY_SYSCALLS 747 int 748 linux_utime(struct thread *td, struct linux_utime_args *args) 749 { 750 struct timeval tv[2], *tvp; 751 struct l_utimbuf lut; 752 char *fname; 753 int error; 754 755 if (args->times) { 756 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 757 return (error); 758 tv[0].tv_sec = lut.l_actime; 759 tv[0].tv_usec = 0; 760 tv[1].tv_sec = lut.l_modtime; 761 tv[1].tv_usec = 0; 762 tvp = tv; 763 } else 764 tvp = NULL; 765 766 if (!LUSECONVPATH(td)) { 767 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 768 tvp, UIO_SYSSPACE); 769 } else { 770 LCONVPATHEXIST(args->fname, &fname); 771 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 772 UIO_SYSSPACE); 773 LFREEPATH(fname); 774 } 775 return (error); 776 } 777 #endif 778 779 #ifdef LINUX_LEGACY_SYSCALLS 780 int 781 linux_utimes(struct thread *td, struct linux_utimes_args *args) 782 { 783 l_timeval ltv[2]; 784 struct timeval tv[2], *tvp = NULL; 785 char *fname; 786 int error; 787 788 if (args->tptr != NULL) { 789 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 790 return (error); 791 tv[0].tv_sec = ltv[0].tv_sec; 792 tv[0].tv_usec = ltv[0].tv_usec; 793 tv[1].tv_sec = ltv[1].tv_sec; 794 tv[1].tv_usec = ltv[1].tv_usec; 795 tvp = tv; 796 } 797 798 if (!LUSECONVPATH(td)) { 799 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 800 tvp, UIO_SYSSPACE); 801 } else { 802 LCONVPATHEXIST(args->fname, &fname); 803 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 804 tvp, UIO_SYSSPACE); 805 LFREEPATH(fname); 806 } 807 return (error); 808 } 809 #endif 810 811 static int 812 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 813 { 814 815 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 816 l_times->tv_nsec != LINUX_UTIME_NOW && 817 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 818 return (EINVAL); 819 820 times->tv_sec = l_times->tv_sec; 821 switch (l_times->tv_nsec) 822 { 823 case LINUX_UTIME_OMIT: 824 times->tv_nsec = UTIME_OMIT; 825 break; 826 case LINUX_UTIME_NOW: 827 times->tv_nsec = UTIME_NOW; 828 break; 829 default: 830 times->tv_nsec = l_times->tv_nsec; 831 } 832 833 return (0); 834 } 835 836 static int 837 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 838 struct timespec *timesp, int lflags) 839 { 840 char *path = NULL; 841 int error, dfd, flags = 0; 842 843 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 844 845 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 846 return (EINVAL); 847 848 if (timesp != NULL) { 849 /* This breaks POSIX, but is what the Linux kernel does 850 * _on purpose_ (documented in the man page for utimensat(2)), 851 * so we must follow that behaviour. */ 852 if (timesp[0].tv_nsec == UTIME_OMIT && 853 timesp[1].tv_nsec == UTIME_OMIT) 854 return (0); 855 } 856 857 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 858 flags |= AT_SYMLINK_NOFOLLOW; 859 if (lflags & LINUX_AT_EMPTY_PATH) 860 flags |= AT_EMPTY_PATH; 861 862 if (!LUSECONVPATH(td)) { 863 if (pathname != NULL) { 864 return (kern_utimensat(td, dfd, pathname, 865 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 866 } 867 } 868 869 if (pathname != NULL) 870 LCONVPATHEXIST_AT(pathname, &path, dfd); 871 else if (lflags != 0) 872 return (EINVAL); 873 874 if (path == NULL) 875 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 876 else { 877 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 878 UIO_SYSSPACE, flags); 879 LFREEPATH(path); 880 } 881 882 return (error); 883 } 884 885 int 886 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 887 { 888 struct l_timespec l_times[2]; 889 struct timespec times[2], *timesp; 890 int error; 891 892 if (args->times != NULL) { 893 error = copyin(args->times, l_times, sizeof(l_times)); 894 if (error != 0) 895 return (error); 896 897 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 898 if (error != 0) 899 return (error); 900 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 901 if (error != 0) 902 return (error); 903 timesp = times; 904 } else 905 timesp = NULL; 906 907 return (linux_common_utimensat(td, args->dfd, args->pathname, 908 timesp, args->flags)); 909 } 910 911 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 912 static int 913 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 914 { 915 916 /* Zero out the padding in compat mode. */ 917 l_times->tv_nsec &= 0xFFFFFFFFUL; 918 919 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 920 l_times->tv_nsec != LINUX_UTIME_NOW && 921 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 922 return (EINVAL); 923 924 times->tv_sec = l_times->tv_sec; 925 switch (l_times->tv_nsec) 926 { 927 case LINUX_UTIME_OMIT: 928 times->tv_nsec = UTIME_OMIT; 929 break; 930 case LINUX_UTIME_NOW: 931 times->tv_nsec = UTIME_NOW; 932 break; 933 default: 934 times->tv_nsec = l_times->tv_nsec; 935 } 936 937 return (0); 938 } 939 940 int 941 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 942 { 943 struct l_timespec64 l_times[2]; 944 struct timespec times[2], *timesp; 945 int error; 946 947 if (args->times64 != NULL) { 948 error = copyin(args->times64, l_times, sizeof(l_times)); 949 if (error != 0) 950 return (error); 951 952 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 953 if (error != 0) 954 return (error); 955 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 956 if (error != 0) 957 return (error); 958 timesp = times; 959 } else 960 timesp = NULL; 961 962 return (linux_common_utimensat(td, args->dfd, args->pathname, 963 timesp, args->flags)); 964 } 965 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 966 967 #ifdef LINUX_LEGACY_SYSCALLS 968 int 969 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 970 { 971 l_timeval ltv[2]; 972 struct timeval tv[2], *tvp = NULL; 973 char *fname; 974 int error, dfd; 975 976 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 977 978 if (args->utimes != NULL) { 979 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 980 return (error); 981 tv[0].tv_sec = ltv[0].tv_sec; 982 tv[0].tv_usec = ltv[0].tv_usec; 983 tv[1].tv_sec = ltv[1].tv_sec; 984 tv[1].tv_usec = ltv[1].tv_usec; 985 tvp = tv; 986 } 987 988 if (!LUSECONVPATH(td)) { 989 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 990 tvp, UIO_SYSSPACE); 991 } else { 992 LCONVPATHEXIST_AT(args->filename, &fname, dfd); 993 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 994 tvp, UIO_SYSSPACE); 995 LFREEPATH(fname); 996 } 997 return (error); 998 } 999 #endif 1000 1001 static int 1002 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 1003 int options, void *rup, l_siginfo_t *infop) 1004 { 1005 l_siginfo_t lsi; 1006 siginfo_t siginfo; 1007 struct __wrusage wru; 1008 int error, status, tmpstat, sig; 1009 1010 error = kern_wait6(td, idtype, id, &status, options, 1011 rup != NULL ? &wru : NULL, &siginfo); 1012 1013 if (error == 0 && statusp) { 1014 tmpstat = status & 0xffff; 1015 if (WIFSIGNALED(tmpstat)) { 1016 tmpstat = (tmpstat & 0xffffff80) | 1017 bsd_to_linux_signal(WTERMSIG(tmpstat)); 1018 } else if (WIFSTOPPED(tmpstat)) { 1019 tmpstat = (tmpstat & 0xffff00ff) | 1020 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 1021 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 1022 if (WSTOPSIG(status) == SIGTRAP) { 1023 tmpstat = linux_ptrace_status(td, 1024 siginfo.si_pid, tmpstat); 1025 } 1026 #endif 1027 } else if (WIFCONTINUED(tmpstat)) { 1028 tmpstat = 0xffff; 1029 } 1030 error = copyout(&tmpstat, statusp, sizeof(int)); 1031 } 1032 if (error == 0 && rup != NULL) 1033 error = linux_copyout_rusage(&wru.wru_self, rup); 1034 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 1035 sig = bsd_to_linux_signal(siginfo.si_signo); 1036 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1037 error = copyout(&lsi, infop, sizeof(lsi)); 1038 } 1039 1040 return (error); 1041 } 1042 1043 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1044 int 1045 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1046 { 1047 struct linux_wait4_args wait4_args; 1048 1049 wait4_args.pid = args->pid; 1050 wait4_args.status = args->status; 1051 wait4_args.options = args->options; 1052 wait4_args.rusage = NULL; 1053 1054 return (linux_wait4(td, &wait4_args)); 1055 } 1056 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1057 1058 int 1059 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1060 { 1061 struct proc *p; 1062 int options, id, idtype; 1063 1064 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1065 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1066 return (EINVAL); 1067 1068 /* -INT_MIN is not defined. */ 1069 if (args->pid == INT_MIN) 1070 return (ESRCH); 1071 1072 options = 0; 1073 linux_to_bsd_waitopts(args->options, &options); 1074 1075 /* 1076 * For backward compatibility we implicitly add flags WEXITED 1077 * and WTRAPPED here. 1078 */ 1079 options |= WEXITED | WTRAPPED; 1080 1081 if (args->pid == WAIT_ANY) { 1082 idtype = P_ALL; 1083 id = 0; 1084 } else if (args->pid < 0) { 1085 idtype = P_PGID; 1086 id = (id_t)-args->pid; 1087 } else if (args->pid == 0) { 1088 idtype = P_PGID; 1089 p = td->td_proc; 1090 PROC_LOCK(p); 1091 id = p->p_pgid; 1092 PROC_UNLOCK(p); 1093 } else { 1094 idtype = P_PID; 1095 id = (id_t)args->pid; 1096 } 1097 1098 return (linux_common_wait(td, idtype, id, args->status, options, 1099 args->rusage, NULL)); 1100 } 1101 1102 int 1103 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1104 { 1105 idtype_t idtype; 1106 int error, options; 1107 struct proc *p; 1108 pid_t id; 1109 1110 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 1111 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1112 return (EINVAL); 1113 1114 options = 0; 1115 linux_to_bsd_waitopts(args->options, &options); 1116 1117 id = args->id; 1118 switch (args->idtype) { 1119 case LINUX_P_ALL: 1120 idtype = P_ALL; 1121 break; 1122 case LINUX_P_PID: 1123 if (args->id <= 0) 1124 return (EINVAL); 1125 idtype = P_PID; 1126 break; 1127 case LINUX_P_PGID: 1128 if (linux_use54(td) && args->id == 0) { 1129 p = td->td_proc; 1130 PROC_LOCK(p); 1131 id = p->p_pgid; 1132 PROC_UNLOCK(p); 1133 } else if (args->id <= 0) 1134 return (EINVAL); 1135 idtype = P_PGID; 1136 break; 1137 case LINUX_P_PIDFD: 1138 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 1139 return (ENOSYS); 1140 default: 1141 return (EINVAL); 1142 } 1143 1144 error = linux_common_wait(td, idtype, id, NULL, options, 1145 args->rusage, args->info); 1146 td->td_retval[0] = 0; 1147 1148 return (error); 1149 } 1150 1151 #ifdef LINUX_LEGACY_SYSCALLS 1152 int 1153 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1154 { 1155 char *path; 1156 int error; 1157 enum uio_seg seg; 1158 bool convpath; 1159 1160 convpath = LUSECONVPATH(td); 1161 if (!convpath) { 1162 path = args->path; 1163 seg = UIO_USERSPACE; 1164 } else { 1165 LCONVPATHCREAT(args->path, &path); 1166 seg = UIO_SYSSPACE; 1167 } 1168 1169 switch (args->mode & S_IFMT) { 1170 case S_IFIFO: 1171 case S_IFSOCK: 1172 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1173 args->mode); 1174 break; 1175 1176 case S_IFCHR: 1177 case S_IFBLK: 1178 error = kern_mknodat(td, AT_FDCWD, path, seg, 1179 args->mode, args->dev); 1180 break; 1181 1182 case S_IFDIR: 1183 error = EPERM; 1184 break; 1185 1186 case 0: 1187 args->mode |= S_IFREG; 1188 /* FALLTHROUGH */ 1189 case S_IFREG: 1190 error = kern_openat(td, AT_FDCWD, path, seg, 1191 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1192 if (error == 0) 1193 kern_close(td, td->td_retval[0]); 1194 break; 1195 1196 default: 1197 error = EINVAL; 1198 break; 1199 } 1200 if (convpath) 1201 LFREEPATH(path); 1202 return (error); 1203 } 1204 #endif 1205 1206 int 1207 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1208 { 1209 char *path; 1210 int error, dfd; 1211 enum uio_seg seg; 1212 bool convpath; 1213 1214 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1215 1216 convpath = LUSECONVPATH(td); 1217 if (!convpath) { 1218 path = __DECONST(char *, args->filename); 1219 seg = UIO_USERSPACE; 1220 } else { 1221 LCONVPATHCREAT_AT(args->filename, &path, dfd); 1222 seg = UIO_SYSSPACE; 1223 } 1224 1225 switch (args->mode & S_IFMT) { 1226 case S_IFIFO: 1227 case S_IFSOCK: 1228 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1229 break; 1230 1231 case S_IFCHR: 1232 case S_IFBLK: 1233 error = kern_mknodat(td, dfd, path, seg, args->mode, 1234 args->dev); 1235 break; 1236 1237 case S_IFDIR: 1238 error = EPERM; 1239 break; 1240 1241 case 0: 1242 args->mode |= S_IFREG; 1243 /* FALLTHROUGH */ 1244 case S_IFREG: 1245 error = kern_openat(td, dfd, path, seg, 1246 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1247 if (error == 0) 1248 kern_close(td, td->td_retval[0]); 1249 break; 1250 1251 default: 1252 error = EINVAL; 1253 break; 1254 } 1255 if (convpath) 1256 LFREEPATH(path); 1257 return (error); 1258 } 1259 1260 /* 1261 * UGH! This is just about the dumbest idea I've ever heard!! 1262 */ 1263 int 1264 linux_personality(struct thread *td, struct linux_personality_args *args) 1265 { 1266 struct linux_pemuldata *pem; 1267 struct proc *p = td->td_proc; 1268 uint32_t old; 1269 1270 PROC_LOCK(p); 1271 pem = pem_find(p); 1272 old = pem->persona; 1273 if (args->per != 0xffffffff) 1274 pem->persona = args->per; 1275 PROC_UNLOCK(p); 1276 1277 td->td_retval[0] = old; 1278 return (0); 1279 } 1280 1281 struct l_itimerval { 1282 l_timeval it_interval; 1283 l_timeval it_value; 1284 }; 1285 1286 #define B2L_ITIMERVAL(bip, lip) \ 1287 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1288 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1289 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1290 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1291 1292 int 1293 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1294 { 1295 int error; 1296 struct l_itimerval ls; 1297 struct itimerval aitv, oitv; 1298 1299 if (uap->itv == NULL) { 1300 uap->itv = uap->oitv; 1301 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1302 } 1303 1304 error = copyin(uap->itv, &ls, sizeof(ls)); 1305 if (error != 0) 1306 return (error); 1307 B2L_ITIMERVAL(&aitv, &ls); 1308 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1309 if (error != 0 || uap->oitv == NULL) 1310 return (error); 1311 B2L_ITIMERVAL(&ls, &oitv); 1312 1313 return (copyout(&ls, uap->oitv, sizeof(ls))); 1314 } 1315 1316 int 1317 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1318 { 1319 int error; 1320 struct l_itimerval ls; 1321 struct itimerval aitv; 1322 1323 error = kern_getitimer(td, uap->which, &aitv); 1324 if (error != 0) 1325 return (error); 1326 B2L_ITIMERVAL(&ls, &aitv); 1327 return (copyout(&ls, uap->itv, sizeof(ls))); 1328 } 1329 1330 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1331 int 1332 linux_nice(struct thread *td, struct linux_nice_args *args) 1333 { 1334 1335 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1336 } 1337 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1338 1339 int 1340 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1341 { 1342 struct ucred *newcred, *oldcred; 1343 l_gid_t *linux_gidset; 1344 gid_t *bsd_gidset; 1345 int ngrp, error; 1346 struct proc *p; 1347 1348 ngrp = args->gidsetsize; 1349 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1350 return (EINVAL); 1351 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1352 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1353 if (error) 1354 goto out; 1355 newcred = crget(); 1356 crextend(newcred, ngrp + 1); 1357 p = td->td_proc; 1358 PROC_LOCK(p); 1359 oldcred = p->p_ucred; 1360 crcopy(newcred, oldcred); 1361 1362 /* 1363 * cr_groups[0] holds egid. Setting the whole set from 1364 * the supplied set will cause egid to be changed too. 1365 * Keep cr_groups[0] unchanged to prevent that. 1366 */ 1367 1368 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1369 PROC_UNLOCK(p); 1370 crfree(newcred); 1371 goto out; 1372 } 1373 1374 if (ngrp > 0) { 1375 newcred->cr_ngroups = ngrp + 1; 1376 1377 bsd_gidset = newcred->cr_groups; 1378 ngrp--; 1379 while (ngrp >= 0) { 1380 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1381 ngrp--; 1382 } 1383 } else 1384 newcred->cr_ngroups = 1; 1385 1386 setsugid(p); 1387 proc_set_cred(p, newcred); 1388 PROC_UNLOCK(p); 1389 crfree(oldcred); 1390 error = 0; 1391 out: 1392 free(linux_gidset, M_LINUX); 1393 return (error); 1394 } 1395 1396 int 1397 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1398 { 1399 struct ucred *cred; 1400 l_gid_t *linux_gidset; 1401 gid_t *bsd_gidset; 1402 int bsd_gidsetsz, ngrp, error; 1403 1404 cred = td->td_ucred; 1405 bsd_gidset = cred->cr_groups; 1406 bsd_gidsetsz = cred->cr_ngroups - 1; 1407 1408 /* 1409 * cr_groups[0] holds egid. Returning the whole set 1410 * here will cause a duplicate. Exclude cr_groups[0] 1411 * to prevent that. 1412 */ 1413 1414 if ((ngrp = args->gidsetsize) == 0) { 1415 td->td_retval[0] = bsd_gidsetsz; 1416 return (0); 1417 } 1418 1419 if (ngrp < bsd_gidsetsz) 1420 return (EINVAL); 1421 1422 ngrp = 0; 1423 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1424 M_LINUX, M_WAITOK); 1425 while (ngrp < bsd_gidsetsz) { 1426 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1427 ngrp++; 1428 } 1429 1430 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1431 free(linux_gidset, M_LINUX); 1432 if (error) 1433 return (error); 1434 1435 td->td_retval[0] = ngrp; 1436 return (0); 1437 } 1438 1439 static bool 1440 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1441 { 1442 1443 if (linux_dummy_rlimits == 0) 1444 return (false); 1445 1446 switch (resource) { 1447 case LINUX_RLIMIT_LOCKS: 1448 case LINUX_RLIMIT_SIGPENDING: 1449 case LINUX_RLIMIT_MSGQUEUE: 1450 case LINUX_RLIMIT_RTTIME: 1451 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1452 rlim->rlim_max = LINUX_RLIM_INFINITY; 1453 return (true); 1454 case LINUX_RLIMIT_NICE: 1455 case LINUX_RLIMIT_RTPRIO: 1456 rlim->rlim_cur = 0; 1457 rlim->rlim_max = 0; 1458 return (true); 1459 default: 1460 return (false); 1461 } 1462 } 1463 1464 int 1465 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1466 { 1467 struct rlimit bsd_rlim; 1468 struct l_rlimit rlim; 1469 u_int which; 1470 int error; 1471 1472 if (args->resource >= LINUX_RLIM_NLIMITS) 1473 return (EINVAL); 1474 1475 which = linux_to_bsd_resource[args->resource]; 1476 if (which == -1) 1477 return (EINVAL); 1478 1479 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1480 if (error) 1481 return (error); 1482 1483 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1484 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1485 return (kern_setrlimit(td, which, &bsd_rlim)); 1486 } 1487 1488 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1489 int 1490 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1491 { 1492 struct l_rlimit rlim; 1493 struct rlimit bsd_rlim; 1494 u_int which; 1495 1496 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1497 rlim.rlim_cur = bsd_rlim.rlim_cur; 1498 rlim.rlim_max = bsd_rlim.rlim_max; 1499 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1500 } 1501 1502 if (args->resource >= LINUX_RLIM_NLIMITS) 1503 return (EINVAL); 1504 1505 which = linux_to_bsd_resource[args->resource]; 1506 if (which == -1) 1507 return (EINVAL); 1508 1509 lim_rlimit(td, which, &bsd_rlim); 1510 1511 #ifdef COMPAT_LINUX32 1512 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1513 if (rlim.rlim_cur == UINT_MAX) 1514 rlim.rlim_cur = INT_MAX; 1515 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1516 if (rlim.rlim_max == UINT_MAX) 1517 rlim.rlim_max = INT_MAX; 1518 #else 1519 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1520 if (rlim.rlim_cur == ULONG_MAX) 1521 rlim.rlim_cur = LONG_MAX; 1522 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1523 if (rlim.rlim_max == ULONG_MAX) 1524 rlim.rlim_max = LONG_MAX; 1525 #endif 1526 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1527 } 1528 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1529 1530 int 1531 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1532 { 1533 struct l_rlimit rlim; 1534 struct rlimit bsd_rlim; 1535 u_int which; 1536 1537 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1538 rlim.rlim_cur = bsd_rlim.rlim_cur; 1539 rlim.rlim_max = bsd_rlim.rlim_max; 1540 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1541 } 1542 1543 if (args->resource >= LINUX_RLIM_NLIMITS) 1544 return (EINVAL); 1545 1546 which = linux_to_bsd_resource[args->resource]; 1547 if (which == -1) 1548 return (EINVAL); 1549 1550 lim_rlimit(td, which, &bsd_rlim); 1551 1552 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1553 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1554 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1555 } 1556 1557 int 1558 linux_sched_setscheduler(struct thread *td, 1559 struct linux_sched_setscheduler_args *args) 1560 { 1561 struct sched_param sched_param; 1562 struct thread *tdt; 1563 int error, policy; 1564 1565 switch (args->policy) { 1566 case LINUX_SCHED_OTHER: 1567 policy = SCHED_OTHER; 1568 break; 1569 case LINUX_SCHED_FIFO: 1570 policy = SCHED_FIFO; 1571 break; 1572 case LINUX_SCHED_RR: 1573 policy = SCHED_RR; 1574 break; 1575 default: 1576 return (EINVAL); 1577 } 1578 1579 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1580 if (error) 1581 return (error); 1582 1583 if (linux_map_sched_prio) { 1584 switch (policy) { 1585 case SCHED_OTHER: 1586 if (sched_param.sched_priority != 0) 1587 return (EINVAL); 1588 1589 sched_param.sched_priority = 1590 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1591 break; 1592 case SCHED_FIFO: 1593 case SCHED_RR: 1594 if (sched_param.sched_priority < 1 || 1595 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1596 return (EINVAL); 1597 1598 /* 1599 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1600 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1601 */ 1602 sched_param.sched_priority = 1603 (sched_param.sched_priority - 1) * 1604 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1605 (LINUX_MAX_RT_PRIO - 1); 1606 break; 1607 } 1608 } 1609 1610 tdt = linux_tdfind(td, args->pid, -1); 1611 if (tdt == NULL) 1612 return (ESRCH); 1613 1614 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1615 PROC_UNLOCK(tdt->td_proc); 1616 return (error); 1617 } 1618 1619 int 1620 linux_sched_getscheduler(struct thread *td, 1621 struct linux_sched_getscheduler_args *args) 1622 { 1623 struct thread *tdt; 1624 int error, policy; 1625 1626 tdt = linux_tdfind(td, args->pid, -1); 1627 if (tdt == NULL) 1628 return (ESRCH); 1629 1630 error = kern_sched_getscheduler(td, tdt, &policy); 1631 PROC_UNLOCK(tdt->td_proc); 1632 1633 switch (policy) { 1634 case SCHED_OTHER: 1635 td->td_retval[0] = LINUX_SCHED_OTHER; 1636 break; 1637 case SCHED_FIFO: 1638 td->td_retval[0] = LINUX_SCHED_FIFO; 1639 break; 1640 case SCHED_RR: 1641 td->td_retval[0] = LINUX_SCHED_RR; 1642 break; 1643 } 1644 return (error); 1645 } 1646 1647 int 1648 linux_sched_get_priority_max(struct thread *td, 1649 struct linux_sched_get_priority_max_args *args) 1650 { 1651 struct sched_get_priority_max_args bsd; 1652 1653 if (linux_map_sched_prio) { 1654 switch (args->policy) { 1655 case LINUX_SCHED_OTHER: 1656 td->td_retval[0] = 0; 1657 return (0); 1658 case LINUX_SCHED_FIFO: 1659 case LINUX_SCHED_RR: 1660 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1661 return (0); 1662 default: 1663 return (EINVAL); 1664 } 1665 } 1666 1667 switch (args->policy) { 1668 case LINUX_SCHED_OTHER: 1669 bsd.policy = SCHED_OTHER; 1670 break; 1671 case LINUX_SCHED_FIFO: 1672 bsd.policy = SCHED_FIFO; 1673 break; 1674 case LINUX_SCHED_RR: 1675 bsd.policy = SCHED_RR; 1676 break; 1677 default: 1678 return (EINVAL); 1679 } 1680 return (sys_sched_get_priority_max(td, &bsd)); 1681 } 1682 1683 int 1684 linux_sched_get_priority_min(struct thread *td, 1685 struct linux_sched_get_priority_min_args *args) 1686 { 1687 struct sched_get_priority_min_args bsd; 1688 1689 if (linux_map_sched_prio) { 1690 switch (args->policy) { 1691 case LINUX_SCHED_OTHER: 1692 td->td_retval[0] = 0; 1693 return (0); 1694 case LINUX_SCHED_FIFO: 1695 case LINUX_SCHED_RR: 1696 td->td_retval[0] = 1; 1697 return (0); 1698 default: 1699 return (EINVAL); 1700 } 1701 } 1702 1703 switch (args->policy) { 1704 case LINUX_SCHED_OTHER: 1705 bsd.policy = SCHED_OTHER; 1706 break; 1707 case LINUX_SCHED_FIFO: 1708 bsd.policy = SCHED_FIFO; 1709 break; 1710 case LINUX_SCHED_RR: 1711 bsd.policy = SCHED_RR; 1712 break; 1713 default: 1714 return (EINVAL); 1715 } 1716 return (sys_sched_get_priority_min(td, &bsd)); 1717 } 1718 1719 #define REBOOT_CAD_ON 0x89abcdef 1720 #define REBOOT_CAD_OFF 0 1721 #define REBOOT_HALT 0xcdef0123 1722 #define REBOOT_RESTART 0x01234567 1723 #define REBOOT_RESTART2 0xA1B2C3D4 1724 #define REBOOT_POWEROFF 0x4321FEDC 1725 #define REBOOT_MAGIC1 0xfee1dead 1726 #define REBOOT_MAGIC2 0x28121969 1727 #define REBOOT_MAGIC2A 0x05121996 1728 #define REBOOT_MAGIC2B 0x16041998 1729 1730 int 1731 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1732 { 1733 struct reboot_args bsd_args; 1734 1735 if (args->magic1 != REBOOT_MAGIC1) 1736 return (EINVAL); 1737 1738 switch (args->magic2) { 1739 case REBOOT_MAGIC2: 1740 case REBOOT_MAGIC2A: 1741 case REBOOT_MAGIC2B: 1742 break; 1743 default: 1744 return (EINVAL); 1745 } 1746 1747 switch (args->cmd) { 1748 case REBOOT_CAD_ON: 1749 case REBOOT_CAD_OFF: 1750 return (priv_check(td, PRIV_REBOOT)); 1751 case REBOOT_HALT: 1752 bsd_args.opt = RB_HALT; 1753 break; 1754 case REBOOT_RESTART: 1755 case REBOOT_RESTART2: 1756 bsd_args.opt = 0; 1757 break; 1758 case REBOOT_POWEROFF: 1759 bsd_args.opt = RB_POWEROFF; 1760 break; 1761 default: 1762 return (EINVAL); 1763 } 1764 return (sys_reboot(td, &bsd_args)); 1765 } 1766 1767 int 1768 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1769 { 1770 1771 td->td_retval[0] = td->td_proc->p_pid; 1772 1773 return (0); 1774 } 1775 1776 int 1777 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1778 { 1779 struct linux_emuldata *em; 1780 1781 em = em_find(td); 1782 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1783 1784 td->td_retval[0] = em->em_tid; 1785 1786 return (0); 1787 } 1788 1789 int 1790 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1791 { 1792 1793 td->td_retval[0] = kern_getppid(td); 1794 return (0); 1795 } 1796 1797 int 1798 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1799 { 1800 1801 td->td_retval[0] = td->td_ucred->cr_rgid; 1802 return (0); 1803 } 1804 1805 int 1806 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1807 { 1808 1809 td->td_retval[0] = td->td_ucred->cr_ruid; 1810 return (0); 1811 } 1812 1813 int 1814 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1815 { 1816 1817 return (kern_getsid(td, args->pid)); 1818 } 1819 1820 int 1821 linux_nosys(struct thread *td, struct nosys_args *ignore) 1822 { 1823 1824 return (ENOSYS); 1825 } 1826 1827 int 1828 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1829 { 1830 int error; 1831 1832 error = kern_getpriority(td, args->which, args->who); 1833 td->td_retval[0] = 20 - td->td_retval[0]; 1834 return (error); 1835 } 1836 1837 int 1838 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1839 { 1840 int name[2]; 1841 1842 name[0] = CTL_KERN; 1843 name[1] = KERN_HOSTNAME; 1844 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1845 args->len, 0, 0)); 1846 } 1847 1848 int 1849 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1850 { 1851 int name[2]; 1852 1853 name[0] = CTL_KERN; 1854 name[1] = KERN_NISDOMAINNAME; 1855 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1856 args->len, 0, 0)); 1857 } 1858 1859 int 1860 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1861 { 1862 1863 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1864 args->error_code); 1865 1866 /* 1867 * XXX: we should send a signal to the parent if 1868 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1869 * as it doesnt occur often. 1870 */ 1871 exit1(td, args->error_code, 0); 1872 /* NOTREACHED */ 1873 } 1874 1875 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1876 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1877 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1878 1879 struct l_user_cap_header { 1880 l_int version; 1881 l_int pid; 1882 }; 1883 1884 struct l_user_cap_data { 1885 l_int effective; 1886 l_int permitted; 1887 l_int inheritable; 1888 }; 1889 1890 int 1891 linux_capget(struct thread *td, struct linux_capget_args *uap) 1892 { 1893 struct l_user_cap_header luch; 1894 struct l_user_cap_data lucd[2]; 1895 int error, u32s; 1896 1897 if (uap->hdrp == NULL) 1898 return (EFAULT); 1899 1900 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1901 if (error != 0) 1902 return (error); 1903 1904 switch (luch.version) { 1905 case _LINUX_CAPABILITY_VERSION_1: 1906 u32s = 1; 1907 break; 1908 case _LINUX_CAPABILITY_VERSION_2: 1909 case _LINUX_CAPABILITY_VERSION_3: 1910 u32s = 2; 1911 break; 1912 default: 1913 luch.version = _LINUX_CAPABILITY_VERSION_1; 1914 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1915 if (error) 1916 return (error); 1917 return (EINVAL); 1918 } 1919 1920 if (luch.pid) 1921 return (EPERM); 1922 1923 if (uap->datap) { 1924 /* 1925 * The current implementation doesn't support setting 1926 * a capability (it's essentially a stub) so indicate 1927 * that no capabilities are currently set or available 1928 * to request. 1929 */ 1930 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1931 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1932 } 1933 1934 return (error); 1935 } 1936 1937 int 1938 linux_capset(struct thread *td, struct linux_capset_args *uap) 1939 { 1940 struct l_user_cap_header luch; 1941 struct l_user_cap_data lucd[2]; 1942 int error, i, u32s; 1943 1944 if (uap->hdrp == NULL || uap->datap == NULL) 1945 return (EFAULT); 1946 1947 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1948 if (error != 0) 1949 return (error); 1950 1951 switch (luch.version) { 1952 case _LINUX_CAPABILITY_VERSION_1: 1953 u32s = 1; 1954 break; 1955 case _LINUX_CAPABILITY_VERSION_2: 1956 case _LINUX_CAPABILITY_VERSION_3: 1957 u32s = 2; 1958 break; 1959 default: 1960 luch.version = _LINUX_CAPABILITY_VERSION_1; 1961 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1962 if (error) 1963 return (error); 1964 return (EINVAL); 1965 } 1966 1967 if (luch.pid) 1968 return (EPERM); 1969 1970 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1971 if (error != 0) 1972 return (error); 1973 1974 /* We currently don't support setting any capabilities. */ 1975 for (i = 0; i < u32s; i++) { 1976 if (lucd[i].effective || lucd[i].permitted || 1977 lucd[i].inheritable) { 1978 linux_msg(td, 1979 "capset[%d] effective=0x%x, permitted=0x%x, " 1980 "inheritable=0x%x is not implemented", i, 1981 (int)lucd[i].effective, (int)lucd[i].permitted, 1982 (int)lucd[i].inheritable); 1983 return (EPERM); 1984 } 1985 } 1986 1987 return (0); 1988 } 1989 1990 int 1991 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1992 { 1993 int error = 0, max_size, arg; 1994 struct proc *p = td->td_proc; 1995 char comm[LINUX_MAX_COMM_LEN]; 1996 int pdeath_signal, trace_state; 1997 1998 switch (args->option) { 1999 case LINUX_PR_SET_PDEATHSIG: 2000 if (!LINUX_SIG_VALID(args->arg2)) 2001 return (EINVAL); 2002 pdeath_signal = linux_to_bsd_signal(args->arg2); 2003 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 2004 &pdeath_signal)); 2005 case LINUX_PR_GET_PDEATHSIG: 2006 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 2007 &pdeath_signal); 2008 if (error != 0) 2009 return (error); 2010 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 2011 return (copyout(&pdeath_signal, 2012 (void *)(register_t)args->arg2, 2013 sizeof(pdeath_signal))); 2014 /* 2015 * In Linux, this flag controls if set[gu]id processes can coredump. 2016 * There are additional semantics imposed on processes that cannot 2017 * coredump: 2018 * - Such processes can not be ptraced. 2019 * - There are some semantics around ownership of process-related files 2020 * in the /proc namespace. 2021 * 2022 * In FreeBSD, we can (and by default, do) disable setuid coredump 2023 * system-wide with 'sugid_coredump.' We control tracability on a 2024 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 2025 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 2026 * procctl is roughly analogous to Linux's DUMPABLE. 2027 * 2028 * So, proxy these knobs to the corresponding PROC_TRACE setting. 2029 */ 2030 case LINUX_PR_GET_DUMPABLE: 2031 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 2032 &trace_state); 2033 if (error != 0) 2034 return (error); 2035 td->td_retval[0] = (trace_state != -1); 2036 return (0); 2037 case LINUX_PR_SET_DUMPABLE: 2038 /* 2039 * It is only valid for userspace to set one of these two 2040 * flags, and only one at a time. 2041 */ 2042 switch (args->arg2) { 2043 case LINUX_SUID_DUMP_DISABLE: 2044 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 2045 break; 2046 case LINUX_SUID_DUMP_USER: 2047 trace_state = PROC_TRACE_CTL_ENABLE; 2048 break; 2049 default: 2050 return (EINVAL); 2051 } 2052 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 2053 &trace_state)); 2054 case LINUX_PR_GET_KEEPCAPS: 2055 /* 2056 * Indicate that we always clear the effective and 2057 * permitted capability sets when the user id becomes 2058 * non-zero (actually the capability sets are simply 2059 * always zero in the current implementation). 2060 */ 2061 td->td_retval[0] = 0; 2062 break; 2063 case LINUX_PR_SET_KEEPCAPS: 2064 /* 2065 * Ignore requests to keep the effective and permitted 2066 * capability sets when the user id becomes non-zero. 2067 */ 2068 break; 2069 case LINUX_PR_SET_NAME: 2070 /* 2071 * To be on the safe side we need to make sure to not 2072 * overflow the size a Linux program expects. We already 2073 * do this here in the copyin, so that we don't need to 2074 * check on copyout. 2075 */ 2076 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2077 error = copyinstr((void *)(register_t)args->arg2, comm, 2078 max_size, NULL); 2079 2080 /* Linux silently truncates the name if it is too long. */ 2081 if (error == ENAMETOOLONG) { 2082 /* 2083 * XXX: copyinstr() isn't documented to populate the 2084 * array completely, so do a copyin() to be on the 2085 * safe side. This should be changed in case 2086 * copyinstr() is changed to guarantee this. 2087 */ 2088 error = copyin((void *)(register_t)args->arg2, comm, 2089 max_size - 1); 2090 comm[max_size - 1] = '\0'; 2091 } 2092 if (error) 2093 return (error); 2094 2095 PROC_LOCK(p); 2096 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2097 PROC_UNLOCK(p); 2098 break; 2099 case LINUX_PR_GET_NAME: 2100 PROC_LOCK(p); 2101 strlcpy(comm, p->p_comm, sizeof(comm)); 2102 PROC_UNLOCK(p); 2103 error = copyout(comm, (void *)(register_t)args->arg2, 2104 strlen(comm) + 1); 2105 break; 2106 case LINUX_PR_GET_SECCOMP: 2107 case LINUX_PR_SET_SECCOMP: 2108 /* 2109 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2110 */ 2111 error = EINVAL; 2112 break; 2113 case LINUX_PR_CAPBSET_READ: 2114 #if 0 2115 /* 2116 * This makes too much noise with Ubuntu Focal. 2117 */ 2118 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2119 (int)args->arg2); 2120 #endif 2121 error = EINVAL; 2122 break; 2123 case LINUX_PR_SET_NO_NEW_PRIVS: 2124 arg = args->arg2 == 1 ? 2125 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 2126 error = kern_procctl(td, P_PID, p->p_pid, 2127 PROC_NO_NEW_PRIVS_CTL, &arg); 2128 break; 2129 case LINUX_PR_SET_PTRACER: 2130 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2131 error = EINVAL; 2132 break; 2133 default: 2134 linux_msg(td, "unsupported prctl option %d", args->option); 2135 error = EINVAL; 2136 break; 2137 } 2138 2139 return (error); 2140 } 2141 2142 int 2143 linux_sched_setparam(struct thread *td, 2144 struct linux_sched_setparam_args *uap) 2145 { 2146 struct sched_param sched_param; 2147 struct thread *tdt; 2148 int error, policy; 2149 2150 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2151 if (error) 2152 return (error); 2153 2154 tdt = linux_tdfind(td, uap->pid, -1); 2155 if (tdt == NULL) 2156 return (ESRCH); 2157 2158 if (linux_map_sched_prio) { 2159 error = kern_sched_getscheduler(td, tdt, &policy); 2160 if (error) 2161 goto out; 2162 2163 switch (policy) { 2164 case SCHED_OTHER: 2165 if (sched_param.sched_priority != 0) { 2166 error = EINVAL; 2167 goto out; 2168 } 2169 sched_param.sched_priority = 2170 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2171 break; 2172 case SCHED_FIFO: 2173 case SCHED_RR: 2174 if (sched_param.sched_priority < 1 || 2175 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2176 error = EINVAL; 2177 goto out; 2178 } 2179 /* 2180 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2181 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2182 */ 2183 sched_param.sched_priority = 2184 (sched_param.sched_priority - 1) * 2185 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2186 (LINUX_MAX_RT_PRIO - 1); 2187 break; 2188 } 2189 } 2190 2191 error = kern_sched_setparam(td, tdt, &sched_param); 2192 out: PROC_UNLOCK(tdt->td_proc); 2193 return (error); 2194 } 2195 2196 int 2197 linux_sched_getparam(struct thread *td, 2198 struct linux_sched_getparam_args *uap) 2199 { 2200 struct sched_param sched_param; 2201 struct thread *tdt; 2202 int error, policy; 2203 2204 tdt = linux_tdfind(td, uap->pid, -1); 2205 if (tdt == NULL) 2206 return (ESRCH); 2207 2208 error = kern_sched_getparam(td, tdt, &sched_param); 2209 if (error) { 2210 PROC_UNLOCK(tdt->td_proc); 2211 return (error); 2212 } 2213 2214 if (linux_map_sched_prio) { 2215 error = kern_sched_getscheduler(td, tdt, &policy); 2216 PROC_UNLOCK(tdt->td_proc); 2217 if (error) 2218 return (error); 2219 2220 switch (policy) { 2221 case SCHED_OTHER: 2222 sched_param.sched_priority = 0; 2223 break; 2224 case SCHED_FIFO: 2225 case SCHED_RR: 2226 /* 2227 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2228 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2229 */ 2230 sched_param.sched_priority = 2231 (sched_param.sched_priority * 2232 (LINUX_MAX_RT_PRIO - 1) + 2233 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2234 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2235 break; 2236 } 2237 } else 2238 PROC_UNLOCK(tdt->td_proc); 2239 2240 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2241 return (error); 2242 } 2243 2244 /* 2245 * Get affinity of a process. 2246 */ 2247 int 2248 linux_sched_getaffinity(struct thread *td, 2249 struct linux_sched_getaffinity_args *args) 2250 { 2251 struct thread *tdt; 2252 cpuset_t *mask; 2253 size_t size; 2254 int error; 2255 id_t tid; 2256 2257 tdt = linux_tdfind(td, args->pid, -1); 2258 if (tdt == NULL) 2259 return (ESRCH); 2260 tid = tdt->td_tid; 2261 PROC_UNLOCK(tdt->td_proc); 2262 2263 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 2264 size = min(args->len, sizeof(cpuset_t)); 2265 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2266 tid, size, mask); 2267 if (error == ERANGE) 2268 error = EINVAL; 2269 if (error == 0) 2270 error = copyout(mask, args->user_mask_ptr, size); 2271 if (error == 0) 2272 td->td_retval[0] = size; 2273 free(mask, M_LINUX); 2274 return (error); 2275 } 2276 2277 /* 2278 * Set affinity of a process. 2279 */ 2280 int 2281 linux_sched_setaffinity(struct thread *td, 2282 struct linux_sched_setaffinity_args *args) 2283 { 2284 struct thread *tdt; 2285 cpuset_t *mask; 2286 int cpu, error; 2287 size_t len; 2288 id_t tid; 2289 2290 tdt = linux_tdfind(td, args->pid, -1); 2291 if (tdt == NULL) 2292 return (ESRCH); 2293 tid = tdt->td_tid; 2294 PROC_UNLOCK(tdt->td_proc); 2295 2296 len = min(args->len, sizeof(cpuset_t)); 2297 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO);; 2298 error = copyin(args->user_mask_ptr, mask, len); 2299 if (error != 0) 2300 goto out; 2301 /* Linux ignore high bits */ 2302 CPU_FOREACH_ISSET(cpu, mask) 2303 if (cpu > mp_maxid) 2304 CPU_CLR(cpu, mask); 2305 2306 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2307 tid, mask); 2308 if (error == EDEADLK) 2309 error = EINVAL; 2310 out: 2311 free(mask, M_TEMP); 2312 return (error); 2313 } 2314 2315 struct linux_rlimit64 { 2316 uint64_t rlim_cur; 2317 uint64_t rlim_max; 2318 }; 2319 2320 int 2321 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2322 { 2323 struct rlimit rlim, nrlim; 2324 struct linux_rlimit64 lrlim; 2325 struct proc *p; 2326 u_int which; 2327 int flags; 2328 int error; 2329 2330 if (args->new == NULL && args->old != NULL) { 2331 if (linux_get_dummy_limit(args->resource, &rlim)) { 2332 lrlim.rlim_cur = rlim.rlim_cur; 2333 lrlim.rlim_max = rlim.rlim_max; 2334 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2335 } 2336 } 2337 2338 if (args->resource >= LINUX_RLIM_NLIMITS) 2339 return (EINVAL); 2340 2341 which = linux_to_bsd_resource[args->resource]; 2342 if (which == -1) 2343 return (EINVAL); 2344 2345 if (args->new != NULL) { 2346 /* 2347 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2348 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2349 * as INFINITY so we do not need a conversion even. 2350 */ 2351 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2352 if (error != 0) 2353 return (error); 2354 } 2355 2356 flags = PGET_HOLD | PGET_NOTWEXIT; 2357 if (args->new != NULL) 2358 flags |= PGET_CANDEBUG; 2359 else 2360 flags |= PGET_CANSEE; 2361 if (args->pid == 0) { 2362 p = td->td_proc; 2363 PHOLD(p); 2364 } else { 2365 error = pget(args->pid, flags, &p); 2366 if (error != 0) 2367 return (error); 2368 } 2369 if (args->old != NULL) { 2370 PROC_LOCK(p); 2371 lim_rlimit_proc(p, which, &rlim); 2372 PROC_UNLOCK(p); 2373 if (rlim.rlim_cur == RLIM_INFINITY) 2374 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2375 else 2376 lrlim.rlim_cur = rlim.rlim_cur; 2377 if (rlim.rlim_max == RLIM_INFINITY) 2378 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2379 else 2380 lrlim.rlim_max = rlim.rlim_max; 2381 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2382 if (error != 0) 2383 goto out; 2384 } 2385 2386 if (args->new != NULL) 2387 error = kern_proc_setrlimit(td, p, which, &nrlim); 2388 2389 out: 2390 PRELE(p); 2391 return (error); 2392 } 2393 2394 int 2395 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2396 { 2397 struct timespec ts, *tsp; 2398 int error; 2399 2400 if (args->tsp != NULL) { 2401 error = linux_get_timespec(&ts, args->tsp); 2402 if (error != 0) 2403 return (error); 2404 tsp = &ts; 2405 } else 2406 tsp = NULL; 2407 2408 error = linux_common_pselect6(td, args->nfds, args->readfds, 2409 args->writefds, args->exceptfds, tsp, args->sig); 2410 2411 if (args->tsp != NULL) 2412 linux_put_timespec(&ts, args->tsp); 2413 return (error); 2414 } 2415 2416 static int 2417 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2418 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2419 l_uintptr_t *sig) 2420 { 2421 struct timeval utv, tv0, tv1, *tvp; 2422 struct l_pselect6arg lpse6; 2423 sigset_t *ssp; 2424 sigset_t ss; 2425 int error; 2426 2427 ssp = NULL; 2428 if (sig != NULL) { 2429 error = copyin(sig, &lpse6, sizeof(lpse6)); 2430 if (error != 0) 2431 return (error); 2432 error = linux_copyin_sigset(PTRIN(lpse6.ss), 2433 lpse6.ss_len, &ss, &ssp); 2434 if (error != 0) 2435 return (error); 2436 } else 2437 ssp = NULL; 2438 2439 /* 2440 * Currently glibc changes nanosecond number to microsecond. 2441 * This mean losing precision but for now it is hardly seen. 2442 */ 2443 if (tsp != NULL) { 2444 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2445 if (itimerfix(&utv)) 2446 return (EINVAL); 2447 2448 microtime(&tv0); 2449 tvp = &utv; 2450 } else 2451 tvp = NULL; 2452 2453 error = kern_pselect(td, nfds, readfds, writefds, 2454 exceptfds, tvp, ssp, LINUX_NFDBITS); 2455 2456 if (tsp != NULL) { 2457 /* 2458 * Compute how much time was left of the timeout, 2459 * by subtracting the current time and the time 2460 * before we started the call, and subtracting 2461 * that result from the user-supplied value. 2462 */ 2463 microtime(&tv1); 2464 timevalsub(&tv1, &tv0); 2465 timevalsub(&utv, &tv1); 2466 if (utv.tv_sec < 0) 2467 timevalclear(&utv); 2468 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2469 } 2470 return (error); 2471 } 2472 2473 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2474 int 2475 linux_pselect6_time64(struct thread *td, 2476 struct linux_pselect6_time64_args *args) 2477 { 2478 struct timespec ts, *tsp; 2479 int error; 2480 2481 if (args->tsp != NULL) { 2482 error = linux_get_timespec64(&ts, args->tsp); 2483 if (error != 0) 2484 return (error); 2485 tsp = &ts; 2486 } else 2487 tsp = NULL; 2488 2489 error = linux_common_pselect6(td, args->nfds, args->readfds, 2490 args->writefds, args->exceptfds, tsp, args->sig); 2491 2492 if (args->tsp != NULL) 2493 linux_put_timespec64(&ts, args->tsp); 2494 return (error); 2495 } 2496 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2497 2498 int 2499 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2500 { 2501 struct timespec uts, *tsp; 2502 int error; 2503 2504 if (args->tsp != NULL) { 2505 error = linux_get_timespec(&uts, args->tsp); 2506 if (error != 0) 2507 return (error); 2508 tsp = &uts; 2509 } else 2510 tsp = NULL; 2511 2512 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2513 args->sset, args->ssize); 2514 if (error == 0 && args->tsp != NULL) 2515 error = linux_put_timespec(&uts, args->tsp); 2516 return (error); 2517 } 2518 2519 static int 2520 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2521 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2522 { 2523 struct timespec ts0, ts1; 2524 struct pollfd stackfds[32]; 2525 struct pollfd *kfds; 2526 sigset_t *ssp; 2527 sigset_t ss; 2528 int error; 2529 2530 if (kern_poll_maxfds(nfds)) 2531 return (EINVAL); 2532 if (sset != NULL) { 2533 error = linux_copyin_sigset(sset, ssize, &ss, &ssp); 2534 if (error != 0) 2535 return (error); 2536 } else 2537 ssp = NULL; 2538 if (tsp != NULL) 2539 nanotime(&ts0); 2540 2541 if (nfds > nitems(stackfds)) 2542 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2543 else 2544 kfds = stackfds; 2545 error = linux_pollin(td, kfds, fds, nfds); 2546 if (error != 0) 2547 goto out; 2548 2549 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2550 if (error == 0) 2551 error = linux_pollout(td, kfds, fds, nfds); 2552 2553 if (error == 0 && tsp != NULL) { 2554 if (td->td_retval[0]) { 2555 nanotime(&ts1); 2556 timespecsub(&ts1, &ts0, &ts1); 2557 timespecsub(tsp, &ts1, tsp); 2558 if (tsp->tv_sec < 0) 2559 timespecclear(tsp); 2560 } else 2561 timespecclear(tsp); 2562 } 2563 2564 out: 2565 if (nfds > nitems(stackfds)) 2566 free(kfds, M_TEMP); 2567 return (error); 2568 } 2569 2570 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2571 int 2572 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2573 { 2574 struct timespec uts, *tsp; 2575 int error; 2576 2577 if (args->tsp != NULL) { 2578 error = linux_get_timespec64(&uts, args->tsp); 2579 if (error != 0) 2580 return (error); 2581 tsp = &uts; 2582 } else 2583 tsp = NULL; 2584 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2585 args->sset, args->ssize); 2586 if (error == 0 && args->tsp != NULL) 2587 error = linux_put_timespec64(&uts, args->tsp); 2588 return (error); 2589 } 2590 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2591 2592 static int 2593 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2594 { 2595 int error; 2596 u_int i; 2597 2598 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2599 if (error != 0) 2600 return (error); 2601 2602 for (i = 0; i < nfd; i++) { 2603 if (fds->events != 0) 2604 linux_to_bsd_poll_events(td, fds->fd, 2605 fds->events, &fds->events); 2606 fds++; 2607 } 2608 return (0); 2609 } 2610 2611 static int 2612 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2613 { 2614 int error = 0; 2615 u_int i, n = 0; 2616 2617 for (i = 0; i < nfd; i++) { 2618 if (fds->revents != 0) { 2619 bsd_to_linux_poll_events(fds->revents, 2620 &fds->revents); 2621 n++; 2622 } 2623 error = copyout(&fds->revents, &ufds->revents, 2624 sizeof(ufds->revents)); 2625 if (error) 2626 return (error); 2627 fds++; 2628 ufds++; 2629 } 2630 td->td_retval[0] = n; 2631 return (0); 2632 } 2633 2634 static int 2635 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2636 struct timespec *ts) 2637 { 2638 struct thread *tdt; 2639 int error; 2640 2641 /* 2642 * According to man in case the invalid pid specified 2643 * EINVAL should be returned. 2644 */ 2645 if (pid < 0) 2646 return (EINVAL); 2647 2648 tdt = linux_tdfind(td, pid, -1); 2649 if (tdt == NULL) 2650 return (ESRCH); 2651 2652 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2653 PROC_UNLOCK(tdt->td_proc); 2654 return (error); 2655 } 2656 2657 int 2658 linux_sched_rr_get_interval(struct thread *td, 2659 struct linux_sched_rr_get_interval_args *uap) 2660 { 2661 struct timespec ts; 2662 int error; 2663 2664 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2665 if (error != 0) 2666 return (error); 2667 return (linux_put_timespec(&ts, uap->interval)); 2668 } 2669 2670 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2671 int 2672 linux_sched_rr_get_interval_time64(struct thread *td, 2673 struct linux_sched_rr_get_interval_time64_args *uap) 2674 { 2675 struct timespec ts; 2676 int error; 2677 2678 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2679 if (error != 0) 2680 return (error); 2681 return (linux_put_timespec64(&ts, uap->interval)); 2682 } 2683 #endif 2684 2685 /* 2686 * In case when the Linux thread is the initial thread in 2687 * the thread group thread id is equal to the process id. 2688 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2689 */ 2690 struct thread * 2691 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2692 { 2693 struct linux_emuldata *em; 2694 struct thread *tdt; 2695 struct proc *p; 2696 2697 tdt = NULL; 2698 if (tid == 0 || tid == td->td_tid) { 2699 if (pid != -1 && td->td_proc->p_pid != pid) 2700 return (NULL); 2701 PROC_LOCK(td->td_proc); 2702 return (td); 2703 } else if (tid > PID_MAX) 2704 return (tdfind(tid, pid)); 2705 2706 /* 2707 * Initial thread where the tid equal to the pid. 2708 */ 2709 p = pfind(tid); 2710 if (p != NULL) { 2711 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2712 (pid != -1 && tid != pid)) { 2713 /* 2714 * p is not a Linuxulator process. 2715 */ 2716 PROC_UNLOCK(p); 2717 return (NULL); 2718 } 2719 FOREACH_THREAD_IN_PROC(p, tdt) { 2720 em = em_find(tdt); 2721 if (tid == em->em_tid) 2722 return (tdt); 2723 } 2724 PROC_UNLOCK(p); 2725 } 2726 return (NULL); 2727 } 2728 2729 void 2730 linux_to_bsd_waitopts(int options, int *bsdopts) 2731 { 2732 2733 if (options & LINUX_WNOHANG) 2734 *bsdopts |= WNOHANG; 2735 if (options & LINUX_WUNTRACED) 2736 *bsdopts |= WUNTRACED; 2737 if (options & LINUX_WEXITED) 2738 *bsdopts |= WEXITED; 2739 if (options & LINUX_WCONTINUED) 2740 *bsdopts |= WCONTINUED; 2741 if (options & LINUX_WNOWAIT) 2742 *bsdopts |= WNOWAIT; 2743 2744 if (options & __WCLONE) 2745 *bsdopts |= WLINUXCLONE; 2746 } 2747 2748 int 2749 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2750 { 2751 struct uio uio; 2752 struct iovec iov; 2753 int error; 2754 2755 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2756 return (EINVAL); 2757 if (args->count > INT_MAX) 2758 args->count = INT_MAX; 2759 2760 iov.iov_base = args->buf; 2761 iov.iov_len = args->count; 2762 2763 uio.uio_iov = &iov; 2764 uio.uio_iovcnt = 1; 2765 uio.uio_resid = iov.iov_len; 2766 uio.uio_segflg = UIO_USERSPACE; 2767 uio.uio_rw = UIO_READ; 2768 uio.uio_td = td; 2769 2770 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2771 if (error == 0) 2772 td->td_retval[0] = args->count - uio.uio_resid; 2773 return (error); 2774 } 2775 2776 int 2777 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2778 { 2779 2780 /* Needs to be page-aligned */ 2781 if (args->start & PAGE_MASK) 2782 return (EINVAL); 2783 return (kern_mincore(td, args->start, args->len, args->vec)); 2784 } 2785 2786 #define SYSLOG_TAG "<6>" 2787 2788 int 2789 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2790 { 2791 char buf[128], *src, *dst; 2792 u_int seq; 2793 int buflen, error; 2794 2795 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2796 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2797 return (EINVAL); 2798 } 2799 2800 if (args->len < 6) { 2801 td->td_retval[0] = 0; 2802 return (0); 2803 } 2804 2805 error = priv_check(td, PRIV_MSGBUF); 2806 if (error) 2807 return (error); 2808 2809 mtx_lock(&msgbuf_lock); 2810 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2811 mtx_unlock(&msgbuf_lock); 2812 2813 dst = args->buf; 2814 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2815 /* The -1 is to skip the trailing '\0'. */ 2816 dst += sizeof(SYSLOG_TAG) - 1; 2817 2818 while (error == 0) { 2819 mtx_lock(&msgbuf_lock); 2820 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2821 mtx_unlock(&msgbuf_lock); 2822 2823 if (buflen == 0) 2824 break; 2825 2826 for (src = buf; src < buf + buflen && error == 0; src++) { 2827 if (*src == '\0') 2828 continue; 2829 2830 if (dst >= args->buf + args->len) 2831 goto out; 2832 2833 error = copyout(src, dst, 1); 2834 dst++; 2835 2836 if (*src == '\n' && *(src + 1) != '<' && 2837 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2838 error = copyout(&SYSLOG_TAG, 2839 dst, sizeof(SYSLOG_TAG)); 2840 dst += sizeof(SYSLOG_TAG) - 1; 2841 } 2842 } 2843 } 2844 out: 2845 td->td_retval[0] = dst - args->buf; 2846 return (error); 2847 } 2848 2849 int 2850 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2851 { 2852 int cpu, error, node; 2853 2854 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2855 error = 0; 2856 node = cpuid_to_pcpu[cpu]->pc_domain; 2857 2858 if (args->cpu != NULL) 2859 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2860 if (args->node != NULL) 2861 error = copyout(&node, args->node, sizeof(l_int)); 2862 return (error); 2863 } 2864 2865 #if defined(__i386__) || defined(__amd64__) 2866 int 2867 linux_poll(struct thread *td, struct linux_poll_args *args) 2868 { 2869 struct timespec ts, *tsp; 2870 2871 if (args->timeout != INFTIM) { 2872 if (args->timeout < 0) 2873 return (EINVAL); 2874 ts.tv_sec = args->timeout / 1000; 2875 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2876 tsp = &ts; 2877 } else 2878 tsp = NULL; 2879 2880 return (linux_common_ppoll(td, args->fds, args->nfds, 2881 tsp, NULL, 0)); 2882 } 2883 #endif /* __i386__ || __amd64__ */ 2884 2885 int 2886 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2887 { 2888 2889 switch (args->op) { 2890 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2891 return (EOPNOTSUPP); 2892 default: 2893 /* 2894 * Ignore unknown operations, just like Linux kernel built 2895 * without CONFIG_SECCOMP. 2896 */ 2897 return (EINVAL); 2898 } 2899 } 2900 2901 #ifndef COMPAT_LINUX32 2902 int 2903 linux_execve(struct thread *td, struct linux_execve_args *args) 2904 { 2905 struct image_args eargs; 2906 char *path; 2907 int error; 2908 2909 LINUX_CTR(execve); 2910 2911 if (!LUSECONVPATH(td)) { 2912 error = exec_copyin_args(&eargs, args->path, UIO_USERSPACE, 2913 args->argp, args->envp); 2914 } else { 2915 LCONVPATHEXIST(args->path, &path); 2916 error = exec_copyin_args(&eargs, path, UIO_SYSSPACE, args->argp, 2917 args->envp); 2918 LFREEPATH(path); 2919 } 2920 if (error == 0) 2921 error = linux_common_execve(td, &eargs); 2922 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2923 return (error); 2924 } 2925 #endif 2926