1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/membarrier.h> 39 #include <sys/msgbuf.h> 40 #include <sys/mqueue.h> 41 #include <sys/mutex.h> 42 #include <sys/poll.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/procctl.h> 46 #include <sys/reboot.h> 47 #include <sys/random.h> 48 #include <sys/resourcevar.h> 49 #include <sys/rtprio.h> 50 #include <sys/sched.h> 51 #include <sys/smp.h> 52 #include <sys/stat.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysent.h> 56 #include <sys/sysproto.h> 57 #include <sys/time.h> 58 #include <sys/unistd.h> 59 #include <sys/vmmeter.h> 60 #include <sys/vnode.h> 61 62 #include <security/audit/audit.h> 63 #include <security/mac/mac_framework.h> 64 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/swap_pager.h> 68 69 #ifdef COMPAT_LINUX32 70 #include <machine/../linux32/linux.h> 71 #include <machine/../linux32/linux32_proto.h> 72 #else 73 #include <machine/../linux/linux.h> 74 #include <machine/../linux/linux_proto.h> 75 #endif 76 77 #include <compat/linux/linux_common.h> 78 #include <compat/linux/linux_dtrace.h> 79 #include <compat/linux/linux_file.h> 80 #include <compat/linux/linux_mib.h> 81 #include <compat/linux/linux_mmap.h> 82 #include <compat/linux/linux_signal.h> 83 #include <compat/linux/linux_time.h> 84 #include <compat/linux/linux_util.h> 85 #include <compat/linux/linux_emul.h> 86 #include <compat/linux/linux_misc.h> 87 88 int stclohz; /* Statistics clock frequency */ 89 90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 91 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 92 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 93 RLIMIT_MEMLOCK, RLIMIT_AS 94 }; 95 96 struct l_sysinfo { 97 l_long uptime; /* Seconds since boot */ 98 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 99 #define LINUX_SYSINFO_LOADS_SCALE 65536 100 l_ulong totalram; /* Total usable main memory size */ 101 l_ulong freeram; /* Available memory size */ 102 l_ulong sharedram; /* Amount of shared memory */ 103 l_ulong bufferram; /* Memory used by buffers */ 104 l_ulong totalswap; /* Total swap space size */ 105 l_ulong freeswap; /* swap space still available */ 106 l_ushort procs; /* Number of current processes */ 107 l_ushort pads; 108 l_ulong totalhigh; 109 l_ulong freehigh; 110 l_uint mem_unit; 111 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 112 }; 113 114 struct l_pselect6arg { 115 l_uintptr_t ss; 116 l_size_t ss_len; 117 }; 118 119 static int linux_utimensat_lts_to_ts(struct l_timespec *, 120 struct timespec *); 121 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 122 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 123 struct timespec *); 124 #endif 125 static int linux_common_utimensat(struct thread *, int, 126 const char *, struct timespec *, int); 127 static int linux_common_pselect6(struct thread *, l_int, 128 l_fd_set *, l_fd_set *, l_fd_set *, 129 struct timespec *, l_uintptr_t *); 130 static int linux_common_ppoll(struct thread *, struct pollfd *, 131 uint32_t, struct timespec *, l_sigset_t *, 132 l_size_t); 133 static int linux_pollin(struct thread *, struct pollfd *, 134 struct pollfd *, u_int); 135 static int linux_pollout(struct thread *, struct pollfd *, 136 struct pollfd *, u_int); 137 138 int 139 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 140 { 141 struct l_sysinfo sysinfo; 142 int i, j; 143 struct timespec ts; 144 145 bzero(&sysinfo, sizeof(sysinfo)); 146 getnanouptime(&ts); 147 if (ts.tv_nsec != 0) 148 ts.tv_sec++; 149 sysinfo.uptime = ts.tv_sec; 150 151 /* Use the information from the mib to get our load averages */ 152 for (i = 0; i < 3; i++) 153 sysinfo.loads[i] = averunnable.ldavg[i] * 154 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 155 156 sysinfo.totalram = physmem * PAGE_SIZE; 157 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 158 159 /* 160 * sharedram counts pages allocated to named, swap-backed objects such 161 * as shared memory segments and tmpfs files. There is no cheap way to 162 * compute this, so just leave the field unpopulated. Linux itself only 163 * started setting this field in the 3.x timeframe. 164 */ 165 sysinfo.sharedram = 0; 166 sysinfo.bufferram = 0; 167 168 swap_pager_status(&i, &j); 169 sysinfo.totalswap = i * PAGE_SIZE; 170 sysinfo.freeswap = (i - j) * PAGE_SIZE; 171 172 sysinfo.procs = nprocs; 173 174 /* 175 * Platforms supported by the emulation layer do not have a notion of 176 * high memory. 177 */ 178 sysinfo.totalhigh = 0; 179 sysinfo.freehigh = 0; 180 181 sysinfo.mem_unit = 1; 182 183 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 184 } 185 186 #ifdef LINUX_LEGACY_SYSCALLS 187 int 188 linux_alarm(struct thread *td, struct linux_alarm_args *args) 189 { 190 struct itimerval it, old_it; 191 u_int secs; 192 int error __diagused; 193 194 secs = args->secs; 195 /* 196 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 197 * to match kern_setitimer()'s limit to avoid error from it. 198 * 199 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 200 * platforms. 201 */ 202 if (secs > INT32_MAX / 2) 203 secs = INT32_MAX / 2; 204 205 it.it_value.tv_sec = secs; 206 it.it_value.tv_usec = 0; 207 timevalclear(&it.it_interval); 208 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 209 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 210 211 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 212 old_it.it_value.tv_usec >= 500000) 213 old_it.it_value.tv_sec++; 214 td->td_retval[0] = old_it.it_value.tv_sec; 215 return (0); 216 } 217 #endif 218 219 int 220 linux_brk(struct thread *td, struct linux_brk_args *args) 221 { 222 struct vmspace *vm = td->td_proc->p_vmspace; 223 uintptr_t new, old; 224 225 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 226 new = (uintptr_t)args->dsend; 227 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 228 td->td_retval[0] = (register_t)new; 229 else 230 td->td_retval[0] = (register_t)old; 231 232 return (0); 233 } 234 235 #ifdef LINUX_LEGACY_SYSCALLS 236 int 237 linux_select(struct thread *td, struct linux_select_args *args) 238 { 239 l_timeval ltv; 240 struct timeval tv0, tv1, utv, *tvp; 241 int error; 242 243 /* 244 * Store current time for computation of the amount of 245 * time left. 246 */ 247 if (args->timeout) { 248 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 249 goto select_out; 250 utv.tv_sec = ltv.tv_sec; 251 utv.tv_usec = ltv.tv_usec; 252 253 if (itimerfix(&utv)) { 254 /* 255 * The timeval was invalid. Convert it to something 256 * valid that will act as it does under Linux. 257 */ 258 utv.tv_sec += utv.tv_usec / 1000000; 259 utv.tv_usec %= 1000000; 260 if (utv.tv_usec < 0) { 261 utv.tv_sec -= 1; 262 utv.tv_usec += 1000000; 263 } 264 if (utv.tv_sec < 0) 265 timevalclear(&utv); 266 } 267 microtime(&tv0); 268 tvp = &utv; 269 } else 270 tvp = NULL; 271 272 error = kern_select(td, args->nfds, args->readfds, args->writefds, 273 args->exceptfds, tvp, LINUX_NFDBITS); 274 if (error) 275 goto select_out; 276 277 if (args->timeout) { 278 if (td->td_retval[0]) { 279 /* 280 * Compute how much time was left of the timeout, 281 * by subtracting the current time and the time 282 * before we started the call, and subtracting 283 * that result from the user-supplied value. 284 */ 285 microtime(&tv1); 286 timevalsub(&tv1, &tv0); 287 timevalsub(&utv, &tv1); 288 if (utv.tv_sec < 0) 289 timevalclear(&utv); 290 } else 291 timevalclear(&utv); 292 ltv.tv_sec = utv.tv_sec; 293 ltv.tv_usec = utv.tv_usec; 294 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 295 goto select_out; 296 } 297 298 select_out: 299 return (error); 300 } 301 #endif 302 303 int 304 linux_mremap(struct thread *td, struct linux_mremap_args *args) 305 { 306 uintptr_t addr; 307 size_t len; 308 int error = 0; 309 310 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 311 td->td_retval[0] = 0; 312 return (EINVAL); 313 } 314 315 /* 316 * Check for the page alignment. 317 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 318 */ 319 if (args->addr & PAGE_MASK) { 320 td->td_retval[0] = 0; 321 return (EINVAL); 322 } 323 324 args->new_len = round_page(args->new_len); 325 args->old_len = round_page(args->old_len); 326 327 if (args->new_len > args->old_len) { 328 td->td_retval[0] = 0; 329 return (ENOMEM); 330 } 331 332 if (args->new_len < args->old_len) { 333 addr = args->addr + args->new_len; 334 len = args->old_len - args->new_len; 335 error = kern_munmap(td, addr, len); 336 } 337 338 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 339 return (error); 340 } 341 342 #define LINUX_MS_ASYNC 0x0001 343 #define LINUX_MS_INVALIDATE 0x0002 344 #define LINUX_MS_SYNC 0x0004 345 346 int 347 linux_msync(struct thread *td, struct linux_msync_args *args) 348 { 349 350 return (kern_msync(td, args->addr, args->len, 351 args->fl & ~LINUX_MS_SYNC)); 352 } 353 354 int 355 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 356 { 357 358 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 359 uap->prot)); 360 } 361 362 int 363 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 364 { 365 366 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 367 uap->behav)); 368 } 369 370 int 371 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 372 { 373 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 374 /* 375 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 376 * implemented with mmap2 syscall and the offset is represented in 377 * multiples of page size. 378 */ 379 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 380 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 381 #else 382 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 383 uap->flags, uap->fd, uap->pgoff)); 384 #endif 385 } 386 387 #ifdef LINUX_LEGACY_SYSCALLS 388 int 389 linux_time(struct thread *td, struct linux_time_args *args) 390 { 391 struct timeval tv; 392 l_time_t tm; 393 int error; 394 395 microtime(&tv); 396 tm = tv.tv_sec; 397 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 398 return (error); 399 td->td_retval[0] = tm; 400 return (0); 401 } 402 #endif 403 404 struct l_times_argv { 405 l_clock_t tms_utime; 406 l_clock_t tms_stime; 407 l_clock_t tms_cutime; 408 l_clock_t tms_cstime; 409 }; 410 411 /* 412 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 413 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 414 * auxiliary vector entry. 415 */ 416 #define CLK_TCK 100 417 418 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 419 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 420 421 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 422 CONVNTCK(r) : CONVOTCK(r)) 423 424 int 425 linux_times(struct thread *td, struct linux_times_args *args) 426 { 427 struct timeval tv, utime, stime, cutime, cstime; 428 struct l_times_argv tms; 429 struct proc *p; 430 int error; 431 432 if (args->buf != NULL) { 433 p = td->td_proc; 434 PROC_LOCK(p); 435 PROC_STATLOCK(p); 436 calcru(p, &utime, &stime); 437 PROC_STATUNLOCK(p); 438 calccru(p, &cutime, &cstime); 439 PROC_UNLOCK(p); 440 441 tms.tms_utime = CONVTCK(utime); 442 tms.tms_stime = CONVTCK(stime); 443 444 tms.tms_cutime = CONVTCK(cutime); 445 tms.tms_cstime = CONVTCK(cstime); 446 447 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 448 return (error); 449 } 450 451 microuptime(&tv); 452 td->td_retval[0] = (int)CONVTCK(tv); 453 return (0); 454 } 455 456 int 457 linux_newuname(struct thread *td, struct linux_newuname_args *args) 458 { 459 struct l_new_utsname utsname; 460 char osname[LINUX_MAX_UTSNAME]; 461 char osrelease[LINUX_MAX_UTSNAME]; 462 char *p; 463 464 linux_get_osname(td, osname); 465 linux_get_osrelease(td, osrelease); 466 467 bzero(&utsname, sizeof(utsname)); 468 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 469 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 470 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 471 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 472 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 473 for (p = utsname.version; *p != '\0'; ++p) 474 if (*p == '\n') { 475 *p = '\0'; 476 break; 477 } 478 #if defined(__amd64__) 479 /* 480 * On amd64, Linux uname(2) needs to return "x86_64" 481 * for both 64-bit and 32-bit applications. On 32-bit, 482 * the string returned by getauxval(AT_PLATFORM) needs 483 * to remain "i686", though. 484 */ 485 #if defined(COMPAT_LINUX32) 486 if (linux32_emulate_i386) 487 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 488 else 489 #endif 490 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 491 #elif defined(__aarch64__) 492 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 493 #elif defined(__i386__) 494 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 495 #endif 496 497 return (copyout(&utsname, args->buf, sizeof(utsname))); 498 } 499 500 struct l_utimbuf { 501 l_time_t l_actime; 502 l_time_t l_modtime; 503 }; 504 505 #ifdef LINUX_LEGACY_SYSCALLS 506 int 507 linux_utime(struct thread *td, struct linux_utime_args *args) 508 { 509 struct timeval tv[2], *tvp; 510 struct l_utimbuf lut; 511 int error; 512 513 if (args->times) { 514 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 515 return (error); 516 tv[0].tv_sec = lut.l_actime; 517 tv[0].tv_usec = 0; 518 tv[1].tv_sec = lut.l_modtime; 519 tv[1].tv_usec = 0; 520 tvp = tv; 521 } else 522 tvp = NULL; 523 524 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 525 tvp, UIO_SYSSPACE)); 526 } 527 #endif 528 529 #ifdef LINUX_LEGACY_SYSCALLS 530 int 531 linux_utimes(struct thread *td, struct linux_utimes_args *args) 532 { 533 l_timeval ltv[2]; 534 struct timeval tv[2], *tvp = NULL; 535 int error; 536 537 if (args->tptr != NULL) { 538 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 539 return (error); 540 tv[0].tv_sec = ltv[0].tv_sec; 541 tv[0].tv_usec = ltv[0].tv_usec; 542 tv[1].tv_sec = ltv[1].tv_sec; 543 tv[1].tv_usec = ltv[1].tv_usec; 544 tvp = tv; 545 } 546 547 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 548 tvp, UIO_SYSSPACE)); 549 } 550 #endif 551 552 static int 553 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 554 { 555 556 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 557 l_times->tv_nsec != LINUX_UTIME_NOW && 558 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 559 return (EINVAL); 560 561 times->tv_sec = l_times->tv_sec; 562 switch (l_times->tv_nsec) 563 { 564 case LINUX_UTIME_OMIT: 565 times->tv_nsec = UTIME_OMIT; 566 break; 567 case LINUX_UTIME_NOW: 568 times->tv_nsec = UTIME_NOW; 569 break; 570 default: 571 times->tv_nsec = l_times->tv_nsec; 572 } 573 574 return (0); 575 } 576 577 static int 578 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 579 struct timespec *timesp, int lflags) 580 { 581 int dfd, flags = 0; 582 583 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 584 585 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 586 return (EINVAL); 587 588 if (timesp != NULL) { 589 /* This breaks POSIX, but is what the Linux kernel does 590 * _on purpose_ (documented in the man page for utimensat(2)), 591 * so we must follow that behaviour. */ 592 if (timesp[0].tv_nsec == UTIME_OMIT && 593 timesp[1].tv_nsec == UTIME_OMIT) 594 return (0); 595 } 596 597 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 598 flags |= AT_SYMLINK_NOFOLLOW; 599 if (lflags & LINUX_AT_EMPTY_PATH) 600 flags |= AT_EMPTY_PATH; 601 602 if (pathname != NULL) 603 return (kern_utimensat(td, dfd, pathname, 604 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 605 606 if (lflags != 0) 607 return (EINVAL); 608 609 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 610 } 611 612 int 613 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 614 { 615 struct l_timespec l_times[2]; 616 struct timespec times[2], *timesp; 617 int error; 618 619 if (args->times != NULL) { 620 error = copyin(args->times, l_times, sizeof(l_times)); 621 if (error != 0) 622 return (error); 623 624 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 625 if (error != 0) 626 return (error); 627 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 628 if (error != 0) 629 return (error); 630 timesp = times; 631 } else 632 timesp = NULL; 633 634 return (linux_common_utimensat(td, args->dfd, args->pathname, 635 timesp, args->flags)); 636 } 637 638 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 639 static int 640 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 641 { 642 643 /* Zero out the padding in compat mode. */ 644 l_times->tv_nsec &= 0xFFFFFFFFUL; 645 646 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 647 l_times->tv_nsec != LINUX_UTIME_NOW && 648 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 649 return (EINVAL); 650 651 times->tv_sec = l_times->tv_sec; 652 switch (l_times->tv_nsec) 653 { 654 case LINUX_UTIME_OMIT: 655 times->tv_nsec = UTIME_OMIT; 656 break; 657 case LINUX_UTIME_NOW: 658 times->tv_nsec = UTIME_NOW; 659 break; 660 default: 661 times->tv_nsec = l_times->tv_nsec; 662 } 663 664 return (0); 665 } 666 667 int 668 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 669 { 670 struct l_timespec64 l_times[2]; 671 struct timespec times[2], *timesp; 672 int error; 673 674 if (args->times64 != NULL) { 675 error = copyin(args->times64, l_times, sizeof(l_times)); 676 if (error != 0) 677 return (error); 678 679 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 680 if (error != 0) 681 return (error); 682 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 683 if (error != 0) 684 return (error); 685 timesp = times; 686 } else 687 timesp = NULL; 688 689 return (linux_common_utimensat(td, args->dfd, args->pathname, 690 timesp, args->flags)); 691 } 692 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 693 694 #ifdef LINUX_LEGACY_SYSCALLS 695 int 696 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 697 { 698 l_timeval ltv[2]; 699 struct timeval tv[2], *tvp = NULL; 700 int error, dfd; 701 702 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 703 704 if (args->utimes != NULL) { 705 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 706 return (error); 707 tv[0].tv_sec = ltv[0].tv_sec; 708 tv[0].tv_usec = ltv[0].tv_usec; 709 tv[1].tv_sec = ltv[1].tv_sec; 710 tv[1].tv_usec = ltv[1].tv_usec; 711 tvp = tv; 712 } 713 714 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 715 tvp, UIO_SYSSPACE)); 716 } 717 #endif 718 719 static int 720 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 721 int options, void *rup, l_siginfo_t *infop) 722 { 723 l_siginfo_t lsi; 724 siginfo_t siginfo; 725 struct __wrusage wru; 726 int error, status, tmpstat, sig; 727 728 error = kern_wait6(td, idtype, id, &status, options, 729 rup != NULL ? &wru : NULL, &siginfo); 730 731 if (error == 0 && statusp) { 732 tmpstat = status & 0xffff; 733 if (WIFSIGNALED(tmpstat)) { 734 tmpstat = (tmpstat & 0xffffff80) | 735 bsd_to_linux_signal(WTERMSIG(tmpstat)); 736 } else if (WIFSTOPPED(tmpstat)) { 737 tmpstat = (tmpstat & 0xffff00ff) | 738 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 739 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 740 if (WSTOPSIG(status) == SIGTRAP) { 741 tmpstat = linux_ptrace_status(td, 742 siginfo.si_pid, tmpstat); 743 } 744 #endif 745 } else if (WIFCONTINUED(tmpstat)) { 746 tmpstat = 0xffff; 747 } 748 error = copyout(&tmpstat, statusp, sizeof(int)); 749 } 750 if (error == 0 && rup != NULL) 751 error = linux_copyout_rusage(&wru.wru_self, rup); 752 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 753 sig = bsd_to_linux_signal(siginfo.si_signo); 754 memset(&lsi, 0, sizeof(lsi)); 755 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 756 error = copyout(&lsi, infop, sizeof(lsi)); 757 } 758 759 return (error); 760 } 761 762 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 763 int 764 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 765 { 766 struct linux_wait4_args wait4_args = { 767 .pid = args->pid, 768 .status = args->status, 769 .options = args->options, 770 .rusage = NULL, 771 }; 772 773 return (linux_wait4(td, &wait4_args)); 774 } 775 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 776 777 int 778 linux_wait4(struct thread *td, struct linux_wait4_args *args) 779 { 780 struct proc *p; 781 int options, id, idtype; 782 783 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 784 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 785 return (EINVAL); 786 787 /* -INT_MIN is not defined. */ 788 if (args->pid == INT_MIN) 789 return (ESRCH); 790 791 options = 0; 792 linux_to_bsd_waitopts(args->options, &options); 793 794 /* 795 * For backward compatibility we implicitly add flags WEXITED 796 * and WTRAPPED here. 797 */ 798 options |= WEXITED | WTRAPPED; 799 800 if (args->pid == WAIT_ANY) { 801 idtype = P_ALL; 802 id = 0; 803 } else if (args->pid < 0) { 804 idtype = P_PGID; 805 id = (id_t)-args->pid; 806 } else if (args->pid == 0) { 807 idtype = P_PGID; 808 p = td->td_proc; 809 PROC_LOCK(p); 810 id = p->p_pgid; 811 PROC_UNLOCK(p); 812 } else { 813 idtype = P_PID; 814 id = (id_t)args->pid; 815 } 816 817 return (linux_common_wait(td, idtype, id, args->status, options, 818 args->rusage, NULL)); 819 } 820 821 int 822 linux_waitid(struct thread *td, struct linux_waitid_args *args) 823 { 824 idtype_t idtype; 825 int error, options; 826 struct proc *p; 827 pid_t id; 828 829 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 830 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 831 return (EINVAL); 832 833 options = 0; 834 linux_to_bsd_waitopts(args->options, &options); 835 836 id = args->id; 837 switch (args->idtype) { 838 case LINUX_P_ALL: 839 idtype = P_ALL; 840 break; 841 case LINUX_P_PID: 842 if (args->id <= 0) 843 return (EINVAL); 844 idtype = P_PID; 845 break; 846 case LINUX_P_PGID: 847 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 848 p = td->td_proc; 849 PROC_LOCK(p); 850 id = p->p_pgid; 851 PROC_UNLOCK(p); 852 } else if (args->id <= 0) 853 return (EINVAL); 854 idtype = P_PGID; 855 break; 856 case LINUX_P_PIDFD: 857 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 858 return (ENOSYS); 859 default: 860 return (EINVAL); 861 } 862 863 error = linux_common_wait(td, idtype, id, NULL, options, 864 args->rusage, args->info); 865 td->td_retval[0] = 0; 866 867 return (error); 868 } 869 870 #ifdef LINUX_LEGACY_SYSCALLS 871 int 872 linux_mknod(struct thread *td, struct linux_mknod_args *args) 873 { 874 int error; 875 876 switch (args->mode & S_IFMT) { 877 case S_IFIFO: 878 case S_IFSOCK: 879 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 880 args->mode); 881 break; 882 883 case S_IFCHR: 884 case S_IFBLK: 885 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 886 args->mode, linux_decode_dev(args->dev)); 887 break; 888 889 case S_IFDIR: 890 error = EPERM; 891 break; 892 893 case 0: 894 args->mode |= S_IFREG; 895 /* FALLTHROUGH */ 896 case S_IFREG: 897 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 898 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 899 if (error == 0) 900 kern_close(td, td->td_retval[0]); 901 break; 902 903 default: 904 error = EINVAL; 905 break; 906 } 907 return (error); 908 } 909 #endif 910 911 int 912 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 913 { 914 int error, dfd; 915 916 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 917 918 switch (args->mode & S_IFMT) { 919 case S_IFIFO: 920 case S_IFSOCK: 921 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 922 args->mode); 923 break; 924 925 case S_IFCHR: 926 case S_IFBLK: 927 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 928 args->mode, linux_decode_dev(args->dev)); 929 break; 930 931 case S_IFDIR: 932 error = EPERM; 933 break; 934 935 case 0: 936 args->mode |= S_IFREG; 937 /* FALLTHROUGH */ 938 case S_IFREG: 939 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 940 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 941 if (error == 0) 942 kern_close(td, td->td_retval[0]); 943 break; 944 945 default: 946 error = EINVAL; 947 break; 948 } 949 return (error); 950 } 951 952 /* 953 * UGH! This is just about the dumbest idea I've ever heard!! 954 */ 955 int 956 linux_personality(struct thread *td, struct linux_personality_args *args) 957 { 958 struct linux_pemuldata *pem; 959 struct proc *p = td->td_proc; 960 uint32_t old; 961 962 PROC_LOCK(p); 963 pem = pem_find(p); 964 old = pem->persona; 965 if (args->per != 0xffffffff) 966 pem->persona = args->per; 967 PROC_UNLOCK(p); 968 969 td->td_retval[0] = old; 970 return (0); 971 } 972 973 struct l_itimerval { 974 l_timeval it_interval; 975 l_timeval it_value; 976 }; 977 978 #define B2L_ITIMERVAL(bip, lip) \ 979 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 980 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 981 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 982 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 983 984 int 985 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 986 { 987 int error; 988 struct l_itimerval ls; 989 struct itimerval aitv, oitv; 990 991 if (uap->itv == NULL) { 992 uap->itv = uap->oitv; 993 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 994 } 995 996 error = copyin(uap->itv, &ls, sizeof(ls)); 997 if (error != 0) 998 return (error); 999 B2L_ITIMERVAL(&aitv, &ls); 1000 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1001 if (error != 0 || uap->oitv == NULL) 1002 return (error); 1003 B2L_ITIMERVAL(&ls, &oitv); 1004 1005 return (copyout(&ls, uap->oitv, sizeof(ls))); 1006 } 1007 1008 int 1009 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1010 { 1011 int error; 1012 struct l_itimerval ls; 1013 struct itimerval aitv; 1014 1015 error = kern_getitimer(td, uap->which, &aitv); 1016 if (error != 0) 1017 return (error); 1018 B2L_ITIMERVAL(&ls, &aitv); 1019 return (copyout(&ls, uap->itv, sizeof(ls))); 1020 } 1021 1022 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1023 int 1024 linux_nice(struct thread *td, struct linux_nice_args *args) 1025 { 1026 1027 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1028 } 1029 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1030 1031 int 1032 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1033 { 1034 const int ngrp = args->gidsetsize; 1035 struct ucred *newcred, *oldcred; 1036 l_gid_t *linux_gidset; 1037 int error; 1038 struct proc *p; 1039 1040 if (ngrp < 0 || ngrp > ngroups_max) 1041 return (EINVAL); 1042 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1043 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1044 if (error) 1045 goto out; 1046 1047 newcred = crget(); 1048 crextend(newcred, ngrp); 1049 p = td->td_proc; 1050 PROC_LOCK(p); 1051 oldcred = crcopysafe(p, newcred); 1052 1053 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1054 PROC_UNLOCK(p); 1055 crfree(newcred); 1056 goto out; 1057 } 1058 1059 newcred->cr_ngroups = ngrp; 1060 for (int i = 0; i < ngrp; i++) 1061 newcred->cr_groups[i] = linux_gidset[i]; 1062 newcred->cr_flags |= CRED_FLAG_GROUPSET; 1063 1064 setsugid(p); 1065 proc_set_cred(p, newcred); 1066 PROC_UNLOCK(p); 1067 crfree(oldcred); 1068 error = 0; 1069 out: 1070 free(linux_gidset, M_LINUX); 1071 return (error); 1072 } 1073 1074 int 1075 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1076 { 1077 const struct ucred *const cred = td->td_ucred; 1078 l_gid_t *linux_gidset; 1079 int ngrp, error; 1080 1081 ngrp = args->gidsetsize; 1082 1083 if (ngrp == 0) { 1084 td->td_retval[0] = cred->cr_ngroups; 1085 return (0); 1086 } 1087 if (ngrp < cred->cr_ngroups) 1088 return (EINVAL); 1089 1090 ngrp = cred->cr_ngroups; 1091 1092 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1093 for (int i = 0; i < ngrp; ++i) 1094 linux_gidset[i] = cred->cr_groups[i]; 1095 1096 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1097 free(linux_gidset, M_LINUX); 1098 1099 if (error != 0) 1100 return (error); 1101 1102 td->td_retval[0] = ngrp; 1103 return (0); 1104 } 1105 1106 static bool 1107 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) 1108 { 1109 ssize_t size; 1110 int res, error; 1111 1112 if (linux_dummy_rlimits == 0) 1113 return (false); 1114 1115 switch (resource) { 1116 case LINUX_RLIMIT_LOCKS: 1117 case LINUX_RLIMIT_RTTIME: 1118 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1119 rlim->rlim_max = LINUX_RLIM_INFINITY; 1120 return (true); 1121 case LINUX_RLIMIT_NICE: 1122 case LINUX_RLIMIT_RTPRIO: 1123 rlim->rlim_cur = 0; 1124 rlim->rlim_max = 0; 1125 return (true); 1126 case LINUX_RLIMIT_SIGPENDING: 1127 error = kernel_sysctlbyname(td, 1128 "kern.sigqueue.max_pending_per_proc", 1129 &res, &size, 0, 0, 0, 0); 1130 if (error != 0) 1131 return (false); 1132 rlim->rlim_cur = res; 1133 rlim->rlim_max = res; 1134 return (true); 1135 case LINUX_RLIMIT_MSGQUEUE: 1136 error = kernel_sysctlbyname(td, 1137 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); 1138 if (error != 0) 1139 return (false); 1140 rlim->rlim_cur = res; 1141 rlim->rlim_max = res; 1142 return (true); 1143 default: 1144 return (false); 1145 } 1146 } 1147 1148 int 1149 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1150 { 1151 struct rlimit bsd_rlim; 1152 struct l_rlimit rlim; 1153 u_int which; 1154 int error; 1155 1156 if (args->resource >= LINUX_RLIM_NLIMITS) 1157 return (EINVAL); 1158 1159 which = linux_to_bsd_resource[args->resource]; 1160 if (which == -1) 1161 return (EINVAL); 1162 1163 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1164 if (error) 1165 return (error); 1166 1167 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1168 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1169 return (kern_setrlimit(td, which, &bsd_rlim)); 1170 } 1171 1172 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1173 int 1174 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1175 { 1176 struct l_rlimit rlim; 1177 struct rlimit bsd_rlim; 1178 u_int which; 1179 1180 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1181 rlim.rlim_cur = bsd_rlim.rlim_cur; 1182 rlim.rlim_max = bsd_rlim.rlim_max; 1183 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1184 } 1185 1186 if (args->resource >= LINUX_RLIM_NLIMITS) 1187 return (EINVAL); 1188 1189 which = linux_to_bsd_resource[args->resource]; 1190 if (which == -1) 1191 return (EINVAL); 1192 1193 lim_rlimit(td, which, &bsd_rlim); 1194 1195 #ifdef COMPAT_LINUX32 1196 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1197 if (rlim.rlim_cur == UINT_MAX) 1198 rlim.rlim_cur = INT_MAX; 1199 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1200 if (rlim.rlim_max == UINT_MAX) 1201 rlim.rlim_max = INT_MAX; 1202 #else 1203 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1204 if (rlim.rlim_cur == ULONG_MAX) 1205 rlim.rlim_cur = LONG_MAX; 1206 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1207 if (rlim.rlim_max == ULONG_MAX) 1208 rlim.rlim_max = LONG_MAX; 1209 #endif 1210 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1211 } 1212 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1213 1214 int 1215 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1216 { 1217 struct l_rlimit rlim; 1218 struct rlimit bsd_rlim; 1219 u_int which; 1220 1221 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1222 rlim.rlim_cur = bsd_rlim.rlim_cur; 1223 rlim.rlim_max = bsd_rlim.rlim_max; 1224 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1225 } 1226 1227 if (args->resource >= LINUX_RLIM_NLIMITS) 1228 return (EINVAL); 1229 1230 which = linux_to_bsd_resource[args->resource]; 1231 if (which == -1) 1232 return (EINVAL); 1233 1234 lim_rlimit(td, which, &bsd_rlim); 1235 1236 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1237 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1238 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1239 } 1240 1241 int 1242 linux_sched_setscheduler(struct thread *td, 1243 struct linux_sched_setscheduler_args *args) 1244 { 1245 struct sched_param sched_param; 1246 struct thread *tdt; 1247 int error, policy; 1248 1249 switch (args->policy) { 1250 case LINUX_SCHED_OTHER: 1251 policy = SCHED_OTHER; 1252 break; 1253 case LINUX_SCHED_FIFO: 1254 policy = SCHED_FIFO; 1255 break; 1256 case LINUX_SCHED_RR: 1257 policy = SCHED_RR; 1258 break; 1259 default: 1260 return (EINVAL); 1261 } 1262 1263 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1264 if (error) 1265 return (error); 1266 1267 if (linux_map_sched_prio) { 1268 switch (policy) { 1269 case SCHED_OTHER: 1270 if (sched_param.sched_priority != 0) 1271 return (EINVAL); 1272 1273 sched_param.sched_priority = 1274 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1275 break; 1276 case SCHED_FIFO: 1277 case SCHED_RR: 1278 if (sched_param.sched_priority < 1 || 1279 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1280 return (EINVAL); 1281 1282 /* 1283 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1284 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1285 */ 1286 sched_param.sched_priority = 1287 (sched_param.sched_priority - 1) * 1288 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1289 (LINUX_MAX_RT_PRIO - 1); 1290 break; 1291 } 1292 } 1293 1294 tdt = linux_tdfind(td, args->pid, -1); 1295 if (tdt == NULL) 1296 return (ESRCH); 1297 1298 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1299 PROC_UNLOCK(tdt->td_proc); 1300 return (error); 1301 } 1302 1303 int 1304 linux_sched_getscheduler(struct thread *td, 1305 struct linux_sched_getscheduler_args *args) 1306 { 1307 struct thread *tdt; 1308 int error, policy; 1309 1310 tdt = linux_tdfind(td, args->pid, -1); 1311 if (tdt == NULL) 1312 return (ESRCH); 1313 1314 error = kern_sched_getscheduler(td, tdt, &policy); 1315 PROC_UNLOCK(tdt->td_proc); 1316 1317 switch (policy) { 1318 case SCHED_OTHER: 1319 td->td_retval[0] = LINUX_SCHED_OTHER; 1320 break; 1321 case SCHED_FIFO: 1322 td->td_retval[0] = LINUX_SCHED_FIFO; 1323 break; 1324 case SCHED_RR: 1325 td->td_retval[0] = LINUX_SCHED_RR; 1326 break; 1327 } 1328 return (error); 1329 } 1330 1331 int 1332 linux_sched_get_priority_max(struct thread *td, 1333 struct linux_sched_get_priority_max_args *args) 1334 { 1335 struct sched_get_priority_max_args bsd; 1336 1337 if (linux_map_sched_prio) { 1338 switch (args->policy) { 1339 case LINUX_SCHED_OTHER: 1340 td->td_retval[0] = 0; 1341 return (0); 1342 case LINUX_SCHED_FIFO: 1343 case LINUX_SCHED_RR: 1344 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1345 return (0); 1346 default: 1347 return (EINVAL); 1348 } 1349 } 1350 1351 switch (args->policy) { 1352 case LINUX_SCHED_OTHER: 1353 bsd.policy = SCHED_OTHER; 1354 break; 1355 case LINUX_SCHED_FIFO: 1356 bsd.policy = SCHED_FIFO; 1357 break; 1358 case LINUX_SCHED_RR: 1359 bsd.policy = SCHED_RR; 1360 break; 1361 default: 1362 return (EINVAL); 1363 } 1364 return (sys_sched_get_priority_max(td, &bsd)); 1365 } 1366 1367 int 1368 linux_sched_get_priority_min(struct thread *td, 1369 struct linux_sched_get_priority_min_args *args) 1370 { 1371 struct sched_get_priority_min_args bsd; 1372 1373 if (linux_map_sched_prio) { 1374 switch (args->policy) { 1375 case LINUX_SCHED_OTHER: 1376 td->td_retval[0] = 0; 1377 return (0); 1378 case LINUX_SCHED_FIFO: 1379 case LINUX_SCHED_RR: 1380 td->td_retval[0] = 1; 1381 return (0); 1382 default: 1383 return (EINVAL); 1384 } 1385 } 1386 1387 switch (args->policy) { 1388 case LINUX_SCHED_OTHER: 1389 bsd.policy = SCHED_OTHER; 1390 break; 1391 case LINUX_SCHED_FIFO: 1392 bsd.policy = SCHED_FIFO; 1393 break; 1394 case LINUX_SCHED_RR: 1395 bsd.policy = SCHED_RR; 1396 break; 1397 default: 1398 return (EINVAL); 1399 } 1400 return (sys_sched_get_priority_min(td, &bsd)); 1401 } 1402 1403 #define REBOOT_CAD_ON 0x89abcdef 1404 #define REBOOT_CAD_OFF 0 1405 #define REBOOT_HALT 0xcdef0123 1406 #define REBOOT_RESTART 0x01234567 1407 #define REBOOT_RESTART2 0xA1B2C3D4 1408 #define REBOOT_POWEROFF 0x4321FEDC 1409 #define REBOOT_MAGIC1 0xfee1dead 1410 #define REBOOT_MAGIC2 0x28121969 1411 #define REBOOT_MAGIC2A 0x05121996 1412 #define REBOOT_MAGIC2B 0x16041998 1413 1414 int 1415 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1416 { 1417 struct reboot_args bsd_args; 1418 1419 if (args->magic1 != REBOOT_MAGIC1) 1420 return (EINVAL); 1421 1422 switch (args->magic2) { 1423 case REBOOT_MAGIC2: 1424 case REBOOT_MAGIC2A: 1425 case REBOOT_MAGIC2B: 1426 break; 1427 default: 1428 return (EINVAL); 1429 } 1430 1431 switch (args->cmd) { 1432 case REBOOT_CAD_ON: 1433 case REBOOT_CAD_OFF: 1434 return (priv_check(td, PRIV_REBOOT)); 1435 case REBOOT_HALT: 1436 bsd_args.opt = RB_HALT; 1437 break; 1438 case REBOOT_RESTART: 1439 case REBOOT_RESTART2: 1440 bsd_args.opt = 0; 1441 break; 1442 case REBOOT_POWEROFF: 1443 bsd_args.opt = RB_POWEROFF; 1444 break; 1445 default: 1446 return (EINVAL); 1447 } 1448 return (sys_reboot(td, &bsd_args)); 1449 } 1450 1451 int 1452 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1453 { 1454 1455 td->td_retval[0] = td->td_proc->p_pid; 1456 1457 return (0); 1458 } 1459 1460 int 1461 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1462 { 1463 struct linux_emuldata *em; 1464 1465 em = em_find(td); 1466 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1467 1468 td->td_retval[0] = em->em_tid; 1469 1470 return (0); 1471 } 1472 1473 int 1474 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1475 { 1476 1477 td->td_retval[0] = kern_getppid(td); 1478 return (0); 1479 } 1480 1481 int 1482 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1483 { 1484 1485 td->td_retval[0] = td->td_ucred->cr_rgid; 1486 return (0); 1487 } 1488 1489 int 1490 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1491 { 1492 1493 td->td_retval[0] = td->td_ucred->cr_ruid; 1494 return (0); 1495 } 1496 1497 int 1498 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1499 { 1500 1501 return (kern_getsid(td, args->pid)); 1502 } 1503 1504 int 1505 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1506 { 1507 int error; 1508 1509 error = kern_getpriority(td, args->which, args->who); 1510 td->td_retval[0] = 20 - td->td_retval[0]; 1511 return (error); 1512 } 1513 1514 int 1515 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1516 { 1517 int name[2]; 1518 1519 name[0] = CTL_KERN; 1520 name[1] = KERN_HOSTNAME; 1521 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1522 args->len, 0, 0)); 1523 } 1524 1525 int 1526 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1527 { 1528 int name[2]; 1529 1530 name[0] = CTL_KERN; 1531 name[1] = KERN_NISDOMAINNAME; 1532 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1533 args->len, 0, 0)); 1534 } 1535 1536 int 1537 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1538 { 1539 1540 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1541 args->error_code); 1542 1543 /* 1544 * XXX: we should send a signal to the parent if 1545 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1546 * as it doesnt occur often. 1547 */ 1548 exit1(td, args->error_code, 0); 1549 /* NOTREACHED */ 1550 } 1551 1552 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1553 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1554 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1555 1556 struct l_user_cap_header { 1557 l_int version; 1558 l_int pid; 1559 }; 1560 1561 struct l_user_cap_data { 1562 l_int effective; 1563 l_int permitted; 1564 l_int inheritable; 1565 }; 1566 1567 int 1568 linux_capget(struct thread *td, struct linux_capget_args *uap) 1569 { 1570 struct l_user_cap_header luch; 1571 struct l_user_cap_data lucd[2]; 1572 int error, u32s; 1573 1574 if (uap->hdrp == NULL) 1575 return (EFAULT); 1576 1577 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1578 if (error != 0) 1579 return (error); 1580 1581 switch (luch.version) { 1582 case _LINUX_CAPABILITY_VERSION_1: 1583 u32s = 1; 1584 break; 1585 case _LINUX_CAPABILITY_VERSION_2: 1586 case _LINUX_CAPABILITY_VERSION_3: 1587 u32s = 2; 1588 break; 1589 default: 1590 luch.version = _LINUX_CAPABILITY_VERSION_1; 1591 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1592 if (error) 1593 return (error); 1594 return (EINVAL); 1595 } 1596 1597 if (luch.pid) 1598 return (EPERM); 1599 1600 if (uap->datap) { 1601 /* 1602 * The current implementation doesn't support setting 1603 * a capability (it's essentially a stub) so indicate 1604 * that no capabilities are currently set or available 1605 * to request. 1606 */ 1607 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1608 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1609 } 1610 1611 return (error); 1612 } 1613 1614 int 1615 linux_capset(struct thread *td, struct linux_capset_args *uap) 1616 { 1617 struct l_user_cap_header luch; 1618 struct l_user_cap_data lucd[2]; 1619 int error, i, u32s; 1620 1621 if (uap->hdrp == NULL || uap->datap == NULL) 1622 return (EFAULT); 1623 1624 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1625 if (error != 0) 1626 return (error); 1627 1628 switch (luch.version) { 1629 case _LINUX_CAPABILITY_VERSION_1: 1630 u32s = 1; 1631 break; 1632 case _LINUX_CAPABILITY_VERSION_2: 1633 case _LINUX_CAPABILITY_VERSION_3: 1634 u32s = 2; 1635 break; 1636 default: 1637 luch.version = _LINUX_CAPABILITY_VERSION_1; 1638 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1639 if (error) 1640 return (error); 1641 return (EINVAL); 1642 } 1643 1644 if (luch.pid) 1645 return (EPERM); 1646 1647 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1648 if (error != 0) 1649 return (error); 1650 1651 /* We currently don't support setting any capabilities. */ 1652 for (i = 0; i < u32s; i++) { 1653 if (lucd[i].effective || lucd[i].permitted || 1654 lucd[i].inheritable) { 1655 linux_msg(td, 1656 "capset[%d] effective=0x%x, permitted=0x%x, " 1657 "inheritable=0x%x is not implemented", i, 1658 (int)lucd[i].effective, (int)lucd[i].permitted, 1659 (int)lucd[i].inheritable); 1660 return (EPERM); 1661 } 1662 } 1663 1664 return (0); 1665 } 1666 1667 int 1668 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1669 { 1670 int error = 0, max_size, arg; 1671 struct proc *p = td->td_proc; 1672 char comm[LINUX_MAX_COMM_LEN]; 1673 int pdeath_signal, trace_state; 1674 1675 switch (args->option) { 1676 case LINUX_PR_SET_PDEATHSIG: 1677 if (!LINUX_SIG_VALID(args->arg2)) 1678 return (EINVAL); 1679 pdeath_signal = linux_to_bsd_signal(args->arg2); 1680 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1681 &pdeath_signal)); 1682 case LINUX_PR_GET_PDEATHSIG: 1683 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1684 &pdeath_signal); 1685 if (error != 0) 1686 return (error); 1687 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1688 return (copyout(&pdeath_signal, 1689 (void *)(register_t)args->arg2, 1690 sizeof(pdeath_signal))); 1691 /* 1692 * In Linux, this flag controls if set[gu]id processes can coredump. 1693 * There are additional semantics imposed on processes that cannot 1694 * coredump: 1695 * - Such processes can not be ptraced. 1696 * - There are some semantics around ownership of process-related files 1697 * in the /proc namespace. 1698 * 1699 * In FreeBSD, we can (and by default, do) disable setuid coredump 1700 * system-wide with 'sugid_coredump.' We control tracability on a 1701 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1702 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1703 * procctl is roughly analogous to Linux's DUMPABLE. 1704 * 1705 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1706 */ 1707 case LINUX_PR_GET_DUMPABLE: 1708 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1709 &trace_state); 1710 if (error != 0) 1711 return (error); 1712 td->td_retval[0] = (trace_state != -1); 1713 return (0); 1714 case LINUX_PR_SET_DUMPABLE: 1715 /* 1716 * It is only valid for userspace to set one of these two 1717 * flags, and only one at a time. 1718 */ 1719 switch (args->arg2) { 1720 case LINUX_SUID_DUMP_DISABLE: 1721 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1722 break; 1723 case LINUX_SUID_DUMP_USER: 1724 trace_state = PROC_TRACE_CTL_ENABLE; 1725 break; 1726 default: 1727 return (EINVAL); 1728 } 1729 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1730 &trace_state)); 1731 case LINUX_PR_GET_KEEPCAPS: 1732 /* 1733 * Indicate that we always clear the effective and 1734 * permitted capability sets when the user id becomes 1735 * non-zero (actually the capability sets are simply 1736 * always zero in the current implementation). 1737 */ 1738 td->td_retval[0] = 0; 1739 break; 1740 case LINUX_PR_SET_KEEPCAPS: 1741 /* 1742 * Ignore requests to keep the effective and permitted 1743 * capability sets when the user id becomes non-zero. 1744 */ 1745 break; 1746 case LINUX_PR_SET_NAME: 1747 /* 1748 * To be on the safe side we need to make sure to not 1749 * overflow the size a Linux program expects. We already 1750 * do this here in the copyin, so that we don't need to 1751 * check on copyout. 1752 */ 1753 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1754 error = copyinstr((void *)(register_t)args->arg2, comm, 1755 max_size, NULL); 1756 1757 /* Linux silently truncates the name if it is too long. */ 1758 if (error == ENAMETOOLONG) { 1759 /* 1760 * XXX: copyinstr() isn't documented to populate the 1761 * array completely, so do a copyin() to be on the 1762 * safe side. This should be changed in case 1763 * copyinstr() is changed to guarantee this. 1764 */ 1765 error = copyin((void *)(register_t)args->arg2, comm, 1766 max_size - 1); 1767 comm[max_size - 1] = '\0'; 1768 } 1769 if (error) 1770 return (error); 1771 1772 PROC_LOCK(p); 1773 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1774 PROC_UNLOCK(p); 1775 break; 1776 case LINUX_PR_GET_NAME: 1777 PROC_LOCK(p); 1778 strlcpy(comm, p->p_comm, sizeof(comm)); 1779 PROC_UNLOCK(p); 1780 error = copyout(comm, (void *)(register_t)args->arg2, 1781 strlen(comm) + 1); 1782 break; 1783 case LINUX_PR_GET_SECCOMP: 1784 case LINUX_PR_SET_SECCOMP: 1785 /* 1786 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1787 */ 1788 error = EINVAL; 1789 break; 1790 case LINUX_PR_CAPBSET_READ: 1791 #if 0 1792 /* 1793 * This makes too much noise with Ubuntu Focal. 1794 */ 1795 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1796 (int)args->arg2); 1797 #endif 1798 error = EINVAL; 1799 break; 1800 case LINUX_PR_SET_CHILD_SUBREAPER: 1801 if (args->arg2 == 0) { 1802 return (kern_procctl(td, P_PID, 0, PROC_REAP_RELEASE, 1803 NULL)); 1804 } 1805 1806 return (kern_procctl(td, P_PID, 0, PROC_REAP_ACQUIRE, 1807 NULL)); 1808 case LINUX_PR_GET_CHILD_SUBREAPER: { 1809 struct procctl_reaper_status rs; 1810 l_int val; 1811 1812 error = kern_procctl(td, P_PID, 0, PROC_REAP_STATUS, &rs); 1813 if (error != 0) 1814 return (error); 1815 val = rs.rs_reaper == p->p_pid ? 1 : 0; 1816 error = copyout(&val, (void *)(register_t)args->arg2, 1817 sizeof(val)); 1818 break; 1819 } 1820 case LINUX_PR_SET_NO_NEW_PRIVS: 1821 arg = args->arg2 == 1 ? 1822 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1823 error = kern_procctl(td, P_PID, p->p_pid, 1824 PROC_NO_NEW_PRIVS_CTL, &arg); 1825 break; 1826 case LINUX_PR_GET_NO_NEW_PRIVS: 1827 error = kern_procctl(td, P_PID, p->p_pid, 1828 PROC_NO_NEW_PRIVS_STATUS, &arg); 1829 if (error != 0) 1830 return (error); 1831 /* Linux returns the value as the syscall return */ 1832 td->td_retval[0] = arg == PROC_NO_NEW_PRIVS_ENABLE ? 1 : 0; 1833 break; 1834 case LINUX_PR_SET_PTRACER: 1835 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1836 error = EINVAL; 1837 break; 1838 case LINUX_PR_SET_VMA: 1839 if (args->arg2 != LINUX_PR_SET_VMA_ANON_NAME) { 1840 linux_msg(td, "unsupported prctl PR_SET_VMA attr %ju", 1841 (uintmax_t)args->arg2); 1842 error = EINVAL; 1843 } 1844 break; 1845 default: 1846 linux_msg(td, "unsupported prctl option %d", args->option); 1847 error = EINVAL; 1848 break; 1849 } 1850 1851 return (error); 1852 } 1853 1854 int 1855 linux_sched_setparam(struct thread *td, 1856 struct linux_sched_setparam_args *uap) 1857 { 1858 struct sched_param sched_param; 1859 struct thread *tdt; 1860 int error, policy; 1861 1862 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1863 if (error) 1864 return (error); 1865 1866 tdt = linux_tdfind(td, uap->pid, -1); 1867 if (tdt == NULL) 1868 return (ESRCH); 1869 1870 if (linux_map_sched_prio) { 1871 error = kern_sched_getscheduler(td, tdt, &policy); 1872 if (error) 1873 goto out; 1874 1875 switch (policy) { 1876 case SCHED_OTHER: 1877 if (sched_param.sched_priority != 0) { 1878 error = EINVAL; 1879 goto out; 1880 } 1881 sched_param.sched_priority = 1882 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1883 break; 1884 case SCHED_FIFO: 1885 case SCHED_RR: 1886 if (sched_param.sched_priority < 1 || 1887 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1888 error = EINVAL; 1889 goto out; 1890 } 1891 /* 1892 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1893 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1894 */ 1895 sched_param.sched_priority = 1896 (sched_param.sched_priority - 1) * 1897 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1898 (LINUX_MAX_RT_PRIO - 1); 1899 break; 1900 } 1901 } 1902 1903 error = kern_sched_setparam(td, tdt, &sched_param); 1904 out: PROC_UNLOCK(tdt->td_proc); 1905 return (error); 1906 } 1907 1908 int 1909 linux_sched_getparam(struct thread *td, 1910 struct linux_sched_getparam_args *uap) 1911 { 1912 struct sched_param sched_param; 1913 struct thread *tdt; 1914 int error, policy; 1915 1916 tdt = linux_tdfind(td, uap->pid, -1); 1917 if (tdt == NULL) 1918 return (ESRCH); 1919 1920 error = kern_sched_getparam(td, tdt, &sched_param); 1921 if (error) { 1922 PROC_UNLOCK(tdt->td_proc); 1923 return (error); 1924 } 1925 1926 if (linux_map_sched_prio) { 1927 error = kern_sched_getscheduler(td, tdt, &policy); 1928 PROC_UNLOCK(tdt->td_proc); 1929 if (error) 1930 return (error); 1931 1932 switch (policy) { 1933 case SCHED_OTHER: 1934 sched_param.sched_priority = 0; 1935 break; 1936 case SCHED_FIFO: 1937 case SCHED_RR: 1938 /* 1939 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1940 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1941 */ 1942 sched_param.sched_priority = 1943 (sched_param.sched_priority * 1944 (LINUX_MAX_RT_PRIO - 1) + 1945 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1946 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1947 break; 1948 } 1949 } else 1950 PROC_UNLOCK(tdt->td_proc); 1951 1952 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1953 return (error); 1954 } 1955 1956 /* 1957 * Get affinity of a process. 1958 */ 1959 int 1960 linux_sched_getaffinity(struct thread *td, 1961 struct linux_sched_getaffinity_args *args) 1962 { 1963 struct thread *tdt; 1964 cpuset_t *mask; 1965 size_t size; 1966 int error; 1967 id_t tid; 1968 1969 tdt = linux_tdfind(td, args->pid, -1); 1970 if (tdt == NULL) 1971 return (ESRCH); 1972 tid = tdt->td_tid; 1973 PROC_UNLOCK(tdt->td_proc); 1974 1975 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1976 size = min(args->len, sizeof(cpuset_t)); 1977 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1978 tid, size, mask); 1979 if (error == ERANGE) 1980 error = EINVAL; 1981 if (error == 0) 1982 error = copyout(mask, args->user_mask_ptr, size); 1983 if (error == 0) 1984 td->td_retval[0] = size; 1985 free(mask, M_LINUX); 1986 return (error); 1987 } 1988 1989 /* 1990 * Set affinity of a process. 1991 */ 1992 int 1993 linux_sched_setaffinity(struct thread *td, 1994 struct linux_sched_setaffinity_args *args) 1995 { 1996 struct thread *tdt; 1997 cpuset_t *mask; 1998 int cpu, error; 1999 size_t len; 2000 id_t tid; 2001 2002 tdt = linux_tdfind(td, args->pid, -1); 2003 if (tdt == NULL) 2004 return (ESRCH); 2005 tid = tdt->td_tid; 2006 PROC_UNLOCK(tdt->td_proc); 2007 2008 len = min(args->len, sizeof(cpuset_t)); 2009 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 2010 error = copyin(args->user_mask_ptr, mask, len); 2011 if (error != 0) 2012 goto out; 2013 /* Linux ignore high bits */ 2014 CPU_FOREACH_ISSET(cpu, mask) 2015 if (cpu > mp_maxid) 2016 CPU_CLR(cpu, mask); 2017 2018 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2019 tid, mask); 2020 if (error == EDEADLK) 2021 error = EINVAL; 2022 out: 2023 free(mask, M_TEMP); 2024 return (error); 2025 } 2026 2027 struct linux_rlimit64 { 2028 uint64_t rlim_cur; 2029 uint64_t rlim_max; 2030 }; 2031 2032 int 2033 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2034 { 2035 struct rlimit rlim, nrlim; 2036 struct linux_rlimit64 lrlim; 2037 struct proc *p; 2038 u_int which; 2039 int flags; 2040 int error; 2041 2042 if (args->new == NULL && args->old != NULL) { 2043 if (linux_get_dummy_limit(td, args->resource, &rlim)) { 2044 lrlim.rlim_cur = rlim.rlim_cur; 2045 lrlim.rlim_max = rlim.rlim_max; 2046 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2047 } 2048 } 2049 2050 if (args->resource >= LINUX_RLIM_NLIMITS) 2051 return (EINVAL); 2052 2053 which = linux_to_bsd_resource[args->resource]; 2054 if (which == -1) 2055 return (EINVAL); 2056 2057 if (args->new != NULL) { 2058 /* 2059 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2060 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2061 * as INFINITY so we do not need a conversion even. 2062 */ 2063 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2064 if (error != 0) 2065 return (error); 2066 } 2067 2068 flags = PGET_HOLD | PGET_NOTWEXIT; 2069 if (args->new != NULL) 2070 flags |= PGET_CANDEBUG; 2071 else 2072 flags |= PGET_CANSEE; 2073 if (args->pid == 0) { 2074 p = td->td_proc; 2075 PHOLD(p); 2076 } else { 2077 error = pget(args->pid, flags, &p); 2078 if (error != 0) 2079 return (error); 2080 } 2081 if (args->old != NULL) { 2082 PROC_LOCK(p); 2083 lim_rlimit_proc(p, which, &rlim); 2084 PROC_UNLOCK(p); 2085 if (rlim.rlim_cur == RLIM_INFINITY) 2086 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2087 else 2088 lrlim.rlim_cur = rlim.rlim_cur; 2089 if (rlim.rlim_max == RLIM_INFINITY) 2090 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2091 else 2092 lrlim.rlim_max = rlim.rlim_max; 2093 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2094 if (error != 0) 2095 goto out; 2096 } 2097 2098 if (args->new != NULL) 2099 error = kern_proc_setrlimit(td, p, which, &nrlim); 2100 2101 out: 2102 PRELE(p); 2103 return (error); 2104 } 2105 2106 int 2107 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2108 { 2109 struct timespec ts, *tsp; 2110 int error; 2111 2112 if (args->tsp != NULL) { 2113 error = linux_get_timespec(&ts, args->tsp); 2114 if (error != 0) 2115 return (error); 2116 tsp = &ts; 2117 } else 2118 tsp = NULL; 2119 2120 error = linux_common_pselect6(td, args->nfds, args->readfds, 2121 args->writefds, args->exceptfds, tsp, args->sig); 2122 2123 if (args->tsp != NULL) 2124 linux_put_timespec(&ts, args->tsp); 2125 return (error); 2126 } 2127 2128 static int 2129 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2130 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2131 l_uintptr_t *sig) 2132 { 2133 struct timeval utv, tv0, tv1, *tvp; 2134 struct l_pselect6arg lpse6; 2135 sigset_t *ssp; 2136 sigset_t ss; 2137 int error; 2138 2139 ssp = NULL; 2140 if (sig != NULL) { 2141 error = copyin(sig, &lpse6, sizeof(lpse6)); 2142 if (error != 0) 2143 return (error); 2144 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2145 lpse6.ss_len, &ss, &ssp); 2146 if (error != 0) 2147 return (error); 2148 } else 2149 ssp = NULL; 2150 2151 /* 2152 * Currently glibc changes nanosecond number to microsecond. 2153 * This mean losing precision but for now it is hardly seen. 2154 */ 2155 if (tsp != NULL) { 2156 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2157 if (itimerfix(&utv)) 2158 return (EINVAL); 2159 2160 microtime(&tv0); 2161 tvp = &utv; 2162 } else 2163 tvp = NULL; 2164 2165 error = kern_pselect(td, nfds, readfds, writefds, 2166 exceptfds, tvp, ssp, LINUX_NFDBITS); 2167 2168 if (tsp != NULL) { 2169 /* 2170 * Compute how much time was left of the timeout, 2171 * by subtracting the current time and the time 2172 * before we started the call, and subtracting 2173 * that result from the user-supplied value. 2174 */ 2175 microtime(&tv1); 2176 timevalsub(&tv1, &tv0); 2177 timevalsub(&utv, &tv1); 2178 if (utv.tv_sec < 0) 2179 timevalclear(&utv); 2180 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2181 } 2182 return (error); 2183 } 2184 2185 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2186 int 2187 linux_pselect6_time64(struct thread *td, 2188 struct linux_pselect6_time64_args *args) 2189 { 2190 struct timespec ts, *tsp; 2191 int error; 2192 2193 if (args->tsp != NULL) { 2194 error = linux_get_timespec64(&ts, args->tsp); 2195 if (error != 0) 2196 return (error); 2197 tsp = &ts; 2198 } else 2199 tsp = NULL; 2200 2201 error = linux_common_pselect6(td, args->nfds, args->readfds, 2202 args->writefds, args->exceptfds, tsp, args->sig); 2203 2204 if (args->tsp != NULL) 2205 linux_put_timespec64(&ts, args->tsp); 2206 return (error); 2207 } 2208 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2209 2210 int 2211 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2212 { 2213 struct timespec uts, *tsp; 2214 int error; 2215 2216 if (args->tsp != NULL) { 2217 error = linux_get_timespec(&uts, args->tsp); 2218 if (error != 0) 2219 return (error); 2220 tsp = &uts; 2221 } else 2222 tsp = NULL; 2223 2224 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2225 args->sset, args->ssize); 2226 if (error == 0 && args->tsp != NULL) 2227 error = linux_put_timespec(&uts, args->tsp); 2228 return (error); 2229 } 2230 2231 static int 2232 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2233 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2234 { 2235 struct timespec ts0, ts1; 2236 struct pollfd stackfds[32]; 2237 struct pollfd *kfds; 2238 sigset_t *ssp; 2239 sigset_t ss; 2240 int error; 2241 2242 if (kern_poll_maxfds(nfds)) 2243 return (EINVAL); 2244 if (sset != NULL) { 2245 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2246 if (error != 0) 2247 return (error); 2248 } else 2249 ssp = NULL; 2250 if (tsp != NULL) 2251 nanotime(&ts0); 2252 2253 if (nfds > nitems(stackfds)) 2254 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2255 else 2256 kfds = stackfds; 2257 error = linux_pollin(td, kfds, fds, nfds); 2258 if (error != 0) 2259 goto out; 2260 2261 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2262 if (error == 0) 2263 error = linux_pollout(td, kfds, fds, nfds); 2264 2265 if (error == 0 && tsp != NULL) { 2266 if (td->td_retval[0]) { 2267 nanotime(&ts1); 2268 timespecsub(&ts1, &ts0, &ts1); 2269 timespecsub(tsp, &ts1, tsp); 2270 if (tsp->tv_sec < 0) 2271 timespecclear(tsp); 2272 } else 2273 timespecclear(tsp); 2274 } 2275 2276 out: 2277 if (nfds > nitems(stackfds)) 2278 free(kfds, M_TEMP); 2279 return (error); 2280 } 2281 2282 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2283 int 2284 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2285 { 2286 struct timespec uts, *tsp; 2287 int error; 2288 2289 if (args->tsp != NULL) { 2290 error = linux_get_timespec64(&uts, args->tsp); 2291 if (error != 0) 2292 return (error); 2293 tsp = &uts; 2294 } else 2295 tsp = NULL; 2296 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2297 args->sset, args->ssize); 2298 if (error == 0 && args->tsp != NULL) 2299 error = linux_put_timespec64(&uts, args->tsp); 2300 return (error); 2301 } 2302 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2303 2304 static int 2305 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2306 { 2307 int error; 2308 u_int i; 2309 2310 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2311 if (error != 0) 2312 return (error); 2313 2314 for (i = 0; i < nfd; i++) { 2315 if (fds->events != 0) 2316 linux_to_bsd_poll_events(td, fds->fd, 2317 fds->events, &fds->events); 2318 fds++; 2319 } 2320 return (0); 2321 } 2322 2323 static int 2324 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2325 { 2326 int error = 0; 2327 u_int i, n = 0; 2328 2329 for (i = 0; i < nfd; i++) { 2330 if (fds->revents != 0) { 2331 bsd_to_linux_poll_events(fds->revents, 2332 &fds->revents); 2333 n++; 2334 } 2335 error = copyout(&fds->revents, &ufds->revents, 2336 sizeof(ufds->revents)); 2337 if (error) 2338 return (error); 2339 fds++; 2340 ufds++; 2341 } 2342 td->td_retval[0] = n; 2343 return (0); 2344 } 2345 2346 static int 2347 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2348 struct timespec *ts) 2349 { 2350 struct thread *tdt; 2351 int error; 2352 2353 /* 2354 * According to man in case the invalid pid specified 2355 * EINVAL should be returned. 2356 */ 2357 if (pid < 0) 2358 return (EINVAL); 2359 2360 tdt = linux_tdfind(td, pid, -1); 2361 if (tdt == NULL) 2362 return (ESRCH); 2363 2364 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2365 PROC_UNLOCK(tdt->td_proc); 2366 return (error); 2367 } 2368 2369 int 2370 linux_sched_rr_get_interval(struct thread *td, 2371 struct linux_sched_rr_get_interval_args *uap) 2372 { 2373 struct timespec ts; 2374 int error; 2375 2376 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2377 if (error != 0) 2378 return (error); 2379 return (linux_put_timespec(&ts, uap->interval)); 2380 } 2381 2382 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2383 int 2384 linux_sched_rr_get_interval_time64(struct thread *td, 2385 struct linux_sched_rr_get_interval_time64_args *uap) 2386 { 2387 struct timespec ts; 2388 int error; 2389 2390 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2391 if (error != 0) 2392 return (error); 2393 return (linux_put_timespec64(&ts, uap->interval)); 2394 } 2395 #endif 2396 2397 /* 2398 * In case when the Linux thread is the initial thread in 2399 * the thread group thread id is equal to the process id. 2400 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2401 */ 2402 struct thread * 2403 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2404 { 2405 struct linux_emuldata *em; 2406 struct thread *tdt; 2407 struct proc *p; 2408 2409 tdt = NULL; 2410 if (tid == 0 || tid == td->td_tid) { 2411 if (pid != -1 && td->td_proc->p_pid != pid) 2412 return (NULL); 2413 PROC_LOCK(td->td_proc); 2414 return (td); 2415 } else if (tid > PID_MAX) 2416 return (tdfind(tid, pid)); 2417 2418 /* 2419 * Initial thread where the tid equal to the pid. 2420 */ 2421 p = pfind(tid); 2422 if (p != NULL) { 2423 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2424 (pid != -1 && tid != pid)) { 2425 /* 2426 * p is not a Linuxulator process. 2427 */ 2428 PROC_UNLOCK(p); 2429 return (NULL); 2430 } 2431 FOREACH_THREAD_IN_PROC(p, tdt) { 2432 em = em_find(tdt); 2433 if (tid == em->em_tid) 2434 return (tdt); 2435 } 2436 PROC_UNLOCK(p); 2437 } 2438 return (NULL); 2439 } 2440 2441 void 2442 linux_to_bsd_waitopts(int options, int *bsdopts) 2443 { 2444 2445 if (options & LINUX_WNOHANG) 2446 *bsdopts |= WNOHANG; 2447 if (options & LINUX_WUNTRACED) 2448 *bsdopts |= WUNTRACED; 2449 if (options & LINUX_WEXITED) 2450 *bsdopts |= WEXITED; 2451 if (options & LINUX_WCONTINUED) 2452 *bsdopts |= WCONTINUED; 2453 if (options & LINUX_WNOWAIT) 2454 *bsdopts |= WNOWAIT; 2455 2456 if (options & __WCLONE) 2457 *bsdopts |= WLINUXCLONE; 2458 } 2459 2460 int 2461 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2462 { 2463 struct uio uio; 2464 struct iovec iov; 2465 int error; 2466 2467 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2468 return (EINVAL); 2469 if (args->count > INT_MAX) 2470 args->count = INT_MAX; 2471 2472 iov.iov_base = args->buf; 2473 iov.iov_len = args->count; 2474 2475 uio.uio_iov = &iov; 2476 uio.uio_iovcnt = 1; 2477 uio.uio_resid = iov.iov_len; 2478 uio.uio_segflg = UIO_USERSPACE; 2479 uio.uio_rw = UIO_READ; 2480 uio.uio_td = td; 2481 2482 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2483 if (error == 0) 2484 td->td_retval[0] = args->count - uio.uio_resid; 2485 return (error); 2486 } 2487 2488 int 2489 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2490 { 2491 2492 /* Needs to be page-aligned */ 2493 if (args->start & PAGE_MASK) 2494 return (EINVAL); 2495 return (kern_mincore(td, args->start, args->len, args->vec)); 2496 } 2497 2498 #define SYSLOG_TAG "<6>" 2499 2500 int 2501 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2502 { 2503 char buf[128], *src, *dst; 2504 u_int seq; 2505 int buflen, error; 2506 2507 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2508 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2509 return (EINVAL); 2510 } 2511 2512 if (args->len < 6) { 2513 td->td_retval[0] = 0; 2514 return (0); 2515 } 2516 2517 error = priv_check(td, PRIV_MSGBUF); 2518 if (error) 2519 return (error); 2520 2521 mtx_lock(&msgbuf_lock); 2522 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2523 mtx_unlock(&msgbuf_lock); 2524 2525 dst = args->buf; 2526 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2527 /* The -1 is to skip the trailing '\0'. */ 2528 dst += sizeof(SYSLOG_TAG) - 1; 2529 2530 while (error == 0) { 2531 mtx_lock(&msgbuf_lock); 2532 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2533 mtx_unlock(&msgbuf_lock); 2534 2535 if (buflen == 0) 2536 break; 2537 2538 for (src = buf; src < buf + buflen && error == 0; src++) { 2539 if (*src == '\0') 2540 continue; 2541 2542 if (dst >= args->buf + args->len) 2543 goto out; 2544 2545 error = copyout(src, dst, 1); 2546 dst++; 2547 2548 if (*src == '\n' && *(src + 1) != '<' && 2549 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2550 error = copyout(&SYSLOG_TAG, 2551 dst, sizeof(SYSLOG_TAG)); 2552 dst += sizeof(SYSLOG_TAG) - 1; 2553 } 2554 } 2555 } 2556 out: 2557 td->td_retval[0] = dst - args->buf; 2558 return (error); 2559 } 2560 2561 int 2562 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2563 { 2564 int cpu, error, node; 2565 2566 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2567 error = 0; 2568 node = cpuid_to_pcpu[cpu]->pc_domain; 2569 2570 if (args->cpu != NULL) 2571 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2572 if (args->node != NULL) 2573 error = copyout(&node, args->node, sizeof(l_int)); 2574 return (error); 2575 } 2576 2577 #if defined(__i386__) || defined(__amd64__) 2578 int 2579 linux_poll(struct thread *td, struct linux_poll_args *args) 2580 { 2581 struct timespec ts, *tsp; 2582 2583 if (args->timeout != INFTIM) { 2584 if (args->timeout < 0) 2585 return (EINVAL); 2586 ts.tv_sec = args->timeout / 1000; 2587 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2588 tsp = &ts; 2589 } else 2590 tsp = NULL; 2591 2592 return (linux_common_ppoll(td, args->fds, args->nfds, 2593 tsp, NULL, 0)); 2594 } 2595 #endif /* __i386__ || __amd64__ */ 2596 2597 int 2598 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2599 { 2600 2601 switch (args->op) { 2602 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2603 return (EOPNOTSUPP); 2604 default: 2605 /* 2606 * Ignore unknown operations, just like Linux kernel built 2607 * without CONFIG_SECCOMP. 2608 */ 2609 return (EINVAL); 2610 } 2611 } 2612 2613 /* 2614 * Custom version of exec_copyin_args(), to copy out argument and environment 2615 * strings from the old process address space into the temporary string buffer. 2616 * Based on freebsd32_exec_copyin_args. 2617 */ 2618 static int 2619 linux_exec_copyin_args(struct image_args *args, const char *fname, 2620 l_uintptr_t *argv, l_uintptr_t *envv) 2621 { 2622 char *argp, *envp; 2623 l_uintptr_t *ptr, arg; 2624 int error; 2625 2626 bzero(args, sizeof(*args)); 2627 if (argv == NULL) 2628 return (EFAULT); 2629 2630 /* 2631 * Allocate demand-paged memory for the file name, argument, and 2632 * environment strings. 2633 */ 2634 error = exec_alloc_args(args); 2635 if (error != 0) 2636 return (error); 2637 2638 /* 2639 * Copy the file name. 2640 */ 2641 error = exec_args_add_fname(args, fname, UIO_USERSPACE); 2642 if (error != 0) 2643 goto err_exit; 2644 2645 /* 2646 * extract arguments first 2647 */ 2648 ptr = argv; 2649 for (;;) { 2650 error = copyin(ptr++, &arg, sizeof(arg)); 2651 if (error) 2652 goto err_exit; 2653 if (arg == 0) 2654 break; 2655 argp = PTRIN(arg); 2656 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2657 if (error != 0) 2658 goto err_exit; 2659 } 2660 2661 /* 2662 * This comment is from Linux do_execveat_common: 2663 * When argv is empty, add an empty string ("") as argv[0] to 2664 * ensure confused userspace programs that start processing 2665 * from argv[1] won't end up walking envp. 2666 */ 2667 if (args->argc == 0 && 2668 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2669 goto err_exit; 2670 2671 /* 2672 * extract environment strings 2673 */ 2674 if (envv) { 2675 ptr = envv; 2676 for (;;) { 2677 error = copyin(ptr++, &arg, sizeof(arg)); 2678 if (error) 2679 goto err_exit; 2680 if (arg == 0) 2681 break; 2682 envp = PTRIN(arg); 2683 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2684 if (error != 0) 2685 goto err_exit; 2686 } 2687 } 2688 2689 return (0); 2690 2691 err_exit: 2692 exec_free_args(args); 2693 return (error); 2694 } 2695 2696 int 2697 linux_execve(struct thread *td, struct linux_execve_args *args) 2698 { 2699 struct image_args eargs; 2700 int error; 2701 2702 LINUX_CTR(execve); 2703 2704 error = linux_exec_copyin_args(&eargs, args->path, args->argp, 2705 args->envp); 2706 if (error == 0) 2707 error = linux_common_execve(td, &eargs); 2708 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2709 return (error); 2710 } 2711 2712 static void 2713 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2714 { 2715 struct rtprio rtp2; 2716 2717 pri_to_rtp(td1, &rtp2); 2718 if (rtp2.type < rtp->type || 2719 (rtp2.type == rtp->type && 2720 rtp2.prio < rtp->prio)) { 2721 rtp->type = rtp2.type; 2722 rtp->prio = rtp2.prio; 2723 } 2724 } 2725 2726 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2727 2728 static int 2729 linux_rtprio2ioprio(struct rtprio *rtp) 2730 { 2731 int ioprio, prio; 2732 2733 switch (rtp->type) { 2734 case RTP_PRIO_IDLE: 2735 prio = RTP_PRIO_MIN; 2736 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2737 break; 2738 case RTP_PRIO_NORMAL: 2739 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2740 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2741 break; 2742 case RTP_PRIO_REALTIME: 2743 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2744 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2745 break; 2746 default: 2747 prio = RTP_PRIO_MIN; 2748 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2749 break; 2750 } 2751 return (ioprio); 2752 } 2753 2754 static int 2755 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2756 { 2757 2758 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2759 case LINUX_IOPRIO_CLASS_IDLE: 2760 rtp->prio = RTP_PRIO_MIN; 2761 rtp->type = RTP_PRIO_IDLE; 2762 break; 2763 case LINUX_IOPRIO_CLASS_BE: 2764 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2765 rtp->type = RTP_PRIO_NORMAL; 2766 break; 2767 case LINUX_IOPRIO_CLASS_RT: 2768 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2769 rtp->type = RTP_PRIO_REALTIME; 2770 break; 2771 default: 2772 return (EINVAL); 2773 } 2774 return (0); 2775 } 2776 #undef LINUX_PRIO_DIVIDER 2777 2778 int 2779 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2780 { 2781 struct thread *td1; 2782 struct rtprio rtp; 2783 struct pgrp *pg; 2784 struct proc *p; 2785 int error, found; 2786 2787 p = NULL; 2788 td1 = NULL; 2789 error = 0; 2790 found = 0; 2791 rtp.type = RTP_PRIO_IDLE; 2792 rtp.prio = RTP_PRIO_MAX; 2793 switch (args->which) { 2794 case LINUX_IOPRIO_WHO_PROCESS: 2795 if (args->who == 0) { 2796 td1 = td; 2797 p = td1->td_proc; 2798 PROC_LOCK(p); 2799 } else if (args->who > PID_MAX) { 2800 td1 = linux_tdfind(td, args->who, -1); 2801 if (td1 != NULL) 2802 p = td1->td_proc; 2803 } else 2804 p = pfind(args->who); 2805 if (p == NULL) 2806 return (ESRCH); 2807 if ((error = p_cansee(td, p))) { 2808 PROC_UNLOCK(p); 2809 break; 2810 } 2811 if (td1 != NULL) { 2812 pri_to_rtp(td1, &rtp); 2813 } else { 2814 FOREACH_THREAD_IN_PROC(p, td1) { 2815 linux_up_rtprio_if(td1, &rtp); 2816 } 2817 } 2818 found++; 2819 PROC_UNLOCK(p); 2820 break; 2821 case LINUX_IOPRIO_WHO_PGRP: 2822 sx_slock(&proctree_lock); 2823 if (args->who == 0) { 2824 pg = td->td_proc->p_pgrp; 2825 PGRP_LOCK(pg); 2826 } else { 2827 pg = pgfind(args->who); 2828 if (pg == NULL) { 2829 sx_sunlock(&proctree_lock); 2830 error = ESRCH; 2831 break; 2832 } 2833 } 2834 sx_sunlock(&proctree_lock); 2835 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2836 PROC_LOCK(p); 2837 if (p->p_state == PRS_NORMAL && 2838 p_cansee(td, p) == 0) { 2839 FOREACH_THREAD_IN_PROC(p, td1) { 2840 linux_up_rtprio_if(td1, &rtp); 2841 found++; 2842 } 2843 } 2844 PROC_UNLOCK(p); 2845 } 2846 PGRP_UNLOCK(pg); 2847 break; 2848 case LINUX_IOPRIO_WHO_USER: 2849 if (args->who == 0) 2850 args->who = td->td_ucred->cr_uid; 2851 sx_slock(&allproc_lock); 2852 FOREACH_PROC_IN_SYSTEM(p) { 2853 PROC_LOCK(p); 2854 if (p->p_state == PRS_NORMAL && 2855 p->p_ucred->cr_uid == args->who && 2856 p_cansee(td, p) == 0) { 2857 FOREACH_THREAD_IN_PROC(p, td1) { 2858 linux_up_rtprio_if(td1, &rtp); 2859 found++; 2860 } 2861 } 2862 PROC_UNLOCK(p); 2863 } 2864 sx_sunlock(&allproc_lock); 2865 break; 2866 default: 2867 error = EINVAL; 2868 break; 2869 } 2870 if (error == 0) { 2871 if (found != 0) 2872 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2873 else 2874 error = ESRCH; 2875 } 2876 return (error); 2877 } 2878 2879 int 2880 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2881 { 2882 struct thread *td1; 2883 struct rtprio rtp; 2884 struct pgrp *pg; 2885 struct proc *p; 2886 int error; 2887 2888 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2889 return (error); 2890 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2891 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2892 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2893 return (error); 2894 2895 p = NULL; 2896 td1 = NULL; 2897 switch (args->which) { 2898 case LINUX_IOPRIO_WHO_PROCESS: 2899 if (args->who == 0) { 2900 td1 = td; 2901 p = td1->td_proc; 2902 PROC_LOCK(p); 2903 } else if (args->who > PID_MAX) { 2904 td1 = linux_tdfind(td, args->who, -1); 2905 if (td1 != NULL) 2906 p = td1->td_proc; 2907 } else 2908 p = pfind(args->who); 2909 if (p == NULL) 2910 return (ESRCH); 2911 if ((error = p_cansched(td, p))) { 2912 PROC_UNLOCK(p); 2913 break; 2914 } 2915 if (td1 != NULL) { 2916 error = rtp_to_pri(&rtp, td1); 2917 } else { 2918 FOREACH_THREAD_IN_PROC(p, td1) { 2919 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2920 break; 2921 } 2922 } 2923 PROC_UNLOCK(p); 2924 break; 2925 case LINUX_IOPRIO_WHO_PGRP: 2926 sx_slock(&proctree_lock); 2927 if (args->who == 0) { 2928 pg = td->td_proc->p_pgrp; 2929 PGRP_LOCK(pg); 2930 } else { 2931 pg = pgfind(args->who); 2932 if (pg == NULL) { 2933 sx_sunlock(&proctree_lock); 2934 error = ESRCH; 2935 break; 2936 } 2937 } 2938 sx_sunlock(&proctree_lock); 2939 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2940 PROC_LOCK(p); 2941 if (p->p_state == PRS_NORMAL && 2942 p_cansched(td, p) == 0) { 2943 FOREACH_THREAD_IN_PROC(p, td1) { 2944 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2945 break; 2946 } 2947 } 2948 PROC_UNLOCK(p); 2949 if (error != 0) 2950 break; 2951 } 2952 PGRP_UNLOCK(pg); 2953 break; 2954 case LINUX_IOPRIO_WHO_USER: 2955 if (args->who == 0) 2956 args->who = td->td_ucred->cr_uid; 2957 sx_slock(&allproc_lock); 2958 FOREACH_PROC_IN_SYSTEM(p) { 2959 PROC_LOCK(p); 2960 if (p->p_state == PRS_NORMAL && 2961 p->p_ucred->cr_uid == args->who && 2962 p_cansched(td, p) == 0) { 2963 FOREACH_THREAD_IN_PROC(p, td1) { 2964 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2965 break; 2966 } 2967 } 2968 PROC_UNLOCK(p); 2969 if (error != 0) 2970 break; 2971 } 2972 sx_sunlock(&allproc_lock); 2973 break; 2974 default: 2975 error = EINVAL; 2976 break; 2977 } 2978 return (error); 2979 } 2980 2981 /* The only flag is O_NONBLOCK */ 2982 #define B2L_MQ_FLAGS(bflags) ((bflags) != 0 ? LINUX_O_NONBLOCK : 0) 2983 #define L2B_MQ_FLAGS(lflags) ((lflags) != 0 ? O_NONBLOCK : 0) 2984 2985 int 2986 linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 2987 { 2988 struct mq_attr attr; 2989 int error, flags; 2990 2991 flags = linux_common_openflags(args->oflag); 2992 if ((flags & O_ACCMODE) == O_ACCMODE || (flags & O_EXEC) != 0) 2993 return (EINVAL); 2994 flags = FFLAGS(flags); 2995 if ((flags & O_CREAT) != 0 && args->attr != NULL) { 2996 error = copyin(args->attr, &attr, sizeof(attr)); 2997 if (error != 0) 2998 return (error); 2999 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3000 } 3001 3002 return (kern_kmq_open(td, args->name, flags, args->mode, 3003 args->attr != NULL ? &attr : NULL)); 3004 } 3005 3006 int 3007 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 3008 { 3009 struct kmq_unlink_args bsd_args = { 3010 .path = PTRIN(args->name) 3011 }; 3012 3013 return (sys_kmq_unlink(td, &bsd_args)); 3014 } 3015 3016 int 3017 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 3018 { 3019 struct timespec ts, *abs_timeout; 3020 int error; 3021 3022 if (args->abs_timeout == NULL) 3023 abs_timeout = NULL; 3024 else { 3025 error = linux_get_timespec(&ts, args->abs_timeout); 3026 if (error != 0) 3027 return (error); 3028 abs_timeout = &ts; 3029 } 3030 3031 return (kern_kmq_timedsend(td, args->mqd, PTRIN(args->msg_ptr), 3032 args->msg_len, args->msg_prio, abs_timeout)); 3033 } 3034 3035 int 3036 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 3037 { 3038 struct timespec ts, *abs_timeout; 3039 int error; 3040 3041 if (args->abs_timeout == NULL) 3042 abs_timeout = NULL; 3043 else { 3044 error = linux_get_timespec(&ts, args->abs_timeout); 3045 if (error != 0) 3046 return (error); 3047 abs_timeout = &ts; 3048 } 3049 3050 return (kern_kmq_timedreceive(td, args->mqd, PTRIN(args->msg_ptr), 3051 args->msg_len, args->msg_prio, abs_timeout)); 3052 } 3053 3054 int 3055 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 3056 { 3057 struct sigevent ev, *evp; 3058 struct l_sigevent l_ev; 3059 int error; 3060 3061 if (args->sevp == NULL) 3062 evp = NULL; 3063 else { 3064 error = copyin(args->sevp, &l_ev, sizeof(l_ev)); 3065 if (error != 0) 3066 return (error); 3067 error = linux_convert_l_sigevent(&l_ev, &ev); 3068 if (error != 0) 3069 return (error); 3070 evp = &ev; 3071 } 3072 3073 return (kern_kmq_notify(td, args->mqd, evp)); 3074 } 3075 3076 int 3077 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 3078 { 3079 struct mq_attr attr, oattr; 3080 int error; 3081 3082 if (args->attr != NULL) { 3083 error = copyin(args->attr, &attr, sizeof(attr)); 3084 if (error != 0) 3085 return (error); 3086 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3087 } 3088 3089 error = kern_kmq_setattr(td, args->mqd, args->attr != NULL ? &attr : NULL, 3090 &oattr); 3091 if (error == 0 && args->oattr != NULL) { 3092 oattr.mq_flags = B2L_MQ_FLAGS(oattr.mq_flags); 3093 bzero(oattr.__reserved, sizeof(oattr.__reserved)); 3094 error = copyout(&oattr, args->oattr, sizeof(oattr)); 3095 } 3096 3097 return (error); 3098 } 3099 3100 int 3101 linux_kcmp(struct thread *td, struct linux_kcmp_args *args) 3102 { 3103 int type; 3104 3105 switch (args->type) { 3106 case LINUX_KCMP_FILE: 3107 type = KCMP_FILE; 3108 break; 3109 case LINUX_KCMP_FILES: 3110 type = KCMP_FILES; 3111 break; 3112 case LINUX_KCMP_SIGHAND: 3113 type = KCMP_SIGHAND; 3114 break; 3115 case LINUX_KCMP_VM: 3116 type = KCMP_VM; 3117 break; 3118 default: 3119 return (EINVAL); 3120 } 3121 3122 return (kern_kcmp(td, args->pid1, args->pid2, type, args->idx1, 3123 args->idx)); 3124 } 3125 3126 int 3127 linux_membarrier(struct thread *td, struct linux_membarrier_args *args) 3128 { 3129 static const struct { 3130 int linux_cmd; 3131 int freebsd_cmd; 3132 } cmds[] = { 3133 { LINUX_MEMBARRIER_CMD_QUERY, 3134 MEMBARRIER_CMD_QUERY }, 3135 { LINUX_MEMBARRIER_CMD_GLOBAL, 3136 MEMBARRIER_CMD_GLOBAL }, 3137 { LINUX_MEMBARRIER_CMD_GLOBAL_EXPEDITED, 3138 MEMBARRIER_CMD_GLOBAL_EXPEDITED }, 3139 { LINUX_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, 3140 MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED }, 3141 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED, 3142 MEMBARRIER_CMD_PRIVATE_EXPEDITED }, 3143 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 3144 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED }, 3145 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 3146 MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE }, 3147 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 3148 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE }, 3149 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 3150 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ }, 3151 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 3152 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ }, 3153 { LINUX_MEMBARRIER_CMD_GET_REGISTRATIONS, 3154 MEMBARRIER_CMD_GET_REGISTRATIONS }, 3155 }; 3156 int cmd, error, flags, i, mask; 3157 3158 cmd = -1; 3159 for (i = 0; i < nitems(cmds); i++) { 3160 if (args->cmd == cmds[i].linux_cmd) { 3161 cmd = cmds[i].freebsd_cmd; 3162 break; 3163 } 3164 } 3165 3166 if (cmd == -1 || (args->flags & ~LINUX_MEMBARRIER_CMD_FLAG_CPU) != 0) 3167 return (EINVAL); 3168 3169 flags = 0; 3170 if ((args->flags & LINUX_MEMBARRIER_CMD_FLAG_CPU) != 0) 3171 flags |= MEMBARRIER_CMD_FLAG_CPU; 3172 3173 error = kern_membarrier(td, cmd, flags, args->cpu_id); 3174 if (error != 0) 3175 return (error); 3176 3177 if (args->cmd == LINUX_MEMBARRIER_CMD_QUERY || 3178 args->cmd == LINUX_MEMBARRIER_CMD_GET_REGISTRATIONS) { 3179 mask = td->td_retval[0]; 3180 td->td_retval[0] = 0; 3181 for (i = 0; i < nitems(cmds); i++) 3182 if ((mask & cmds[i].freebsd_cmd) != 0) 3183 td->td_retval[0] |= cmds[i].linux_cmd; 3184 } 3185 3186 return (0); 3187 } 3188 3189 /* 3190 * setfsuid() & setfsgid() exist to decouple the Linux filesystem credentials 3191 * from the effective credentials, avoiding signal exposure during privilege 3192 * transitions. The signal permission model that motivated this was revised in 3193 * Linux 2.0, making these syscalls obsolete for new applications. 3194 * 3195 * As there's no FreeBSD equivalent, implement both syscalls as no-ops that 3196 * return the current effective UID/GID as the previous filesystem UID/GID. 3197 * Linux returns the previous filesystem UID/GID for these syscalls, with no 3198 * error indication. 3199 */ 3200 3201 int 3202 linux_setfsuid(struct thread *td, struct linux_setfsuid_args *args) 3203 { 3204 td->td_retval[0] = td->td_ucred->cr_uid; 3205 return (0); 3206 } 3207 3208 int 3209 linux_setfsgid(struct thread *td, struct linux_setfsgid_args *args) 3210 { 3211 td->td_retval[0] = td->td_ucred->cr_gid; 3212 return (0); 3213 } 3214 3215 MODULE_DEPEND(linux, mqueuefs, 1, 1, 1); 3216