1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/membarrier.h> 39 #include <sys/msgbuf.h> 40 #include <sys/mqueue.h> 41 #include <sys/mutex.h> 42 #include <sys/poll.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/procctl.h> 46 #include <sys/reboot.h> 47 #include <sys/random.h> 48 #include <sys/resourcevar.h> 49 #include <sys/rtprio.h> 50 #include <sys/sched.h> 51 #include <sys/smp.h> 52 #include <sys/stat.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysent.h> 56 #include <sys/sysproto.h> 57 #include <sys/time.h> 58 #include <sys/unistd.h> 59 #include <sys/vmmeter.h> 60 #include <sys/vnode.h> 61 62 #include <security/audit/audit.h> 63 #include <security/mac/mac_framework.h> 64 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/swap_pager.h> 68 69 #ifdef COMPAT_LINUX32 70 #include <machine/../linux32/linux.h> 71 #include <machine/../linux32/linux32_proto.h> 72 #else 73 #include <machine/../linux/linux.h> 74 #include <machine/../linux/linux_proto.h> 75 #endif 76 77 #include <compat/linux/linux_common.h> 78 #include <compat/linux/linux_dtrace.h> 79 #include <compat/linux/linux_file.h> 80 #include <compat/linux/linux_mib.h> 81 #include <compat/linux/linux_mmap.h> 82 #include <compat/linux/linux_signal.h> 83 #include <compat/linux/linux_time.h> 84 #include <compat/linux/linux_util.h> 85 #include <compat/linux/linux_emul.h> 86 #include <compat/linux/linux_misc.h> 87 88 int stclohz; /* Statistics clock frequency */ 89 90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 91 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 92 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 93 RLIMIT_MEMLOCK, RLIMIT_AS 94 }; 95 96 struct l_sysinfo { 97 l_long uptime; /* Seconds since boot */ 98 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 99 #define LINUX_SYSINFO_LOADS_SCALE 65536 100 l_ulong totalram; /* Total usable main memory size */ 101 l_ulong freeram; /* Available memory size */ 102 l_ulong sharedram; /* Amount of shared memory */ 103 l_ulong bufferram; /* Memory used by buffers */ 104 l_ulong totalswap; /* Total swap space size */ 105 l_ulong freeswap; /* swap space still available */ 106 l_ushort procs; /* Number of current processes */ 107 l_ushort pads; 108 l_ulong totalhigh; 109 l_ulong freehigh; 110 l_uint mem_unit; 111 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 112 }; 113 114 struct l_pselect6arg { 115 l_uintptr_t ss; 116 l_size_t ss_len; 117 }; 118 119 static int linux_utimensat_lts_to_ts(struct l_timespec *, 120 struct timespec *); 121 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 122 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 123 struct timespec *); 124 #endif 125 static int linux_common_utimensat(struct thread *, int, 126 const char *, struct timespec *, int); 127 static int linux_common_pselect6(struct thread *, l_int, 128 l_fd_set *, l_fd_set *, l_fd_set *, 129 struct timespec *, l_uintptr_t *); 130 static int linux_common_ppoll(struct thread *, struct pollfd *, 131 uint32_t, struct timespec *, l_sigset_t *, 132 l_size_t); 133 static int linux_pollin(struct thread *, struct pollfd *, 134 struct pollfd *, u_int); 135 static int linux_pollout(struct thread *, struct pollfd *, 136 struct pollfd *, u_int); 137 138 int 139 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 140 { 141 struct l_sysinfo sysinfo; 142 int i, j; 143 struct timespec ts; 144 145 bzero(&sysinfo, sizeof(sysinfo)); 146 getnanouptime(&ts); 147 if (ts.tv_nsec != 0) 148 ts.tv_sec++; 149 sysinfo.uptime = ts.tv_sec; 150 151 /* Use the information from the mib to get our load averages */ 152 for (i = 0; i < 3; i++) 153 sysinfo.loads[i] = averunnable.ldavg[i] * 154 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 155 156 sysinfo.totalram = physmem * PAGE_SIZE; 157 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 158 159 /* 160 * sharedram counts pages allocated to named, swap-backed objects such 161 * as shared memory segments and tmpfs files. There is no cheap way to 162 * compute this, so just leave the field unpopulated. Linux itself only 163 * started setting this field in the 3.x timeframe. 164 */ 165 sysinfo.sharedram = 0; 166 sysinfo.bufferram = 0; 167 168 swap_pager_status(&i, &j); 169 sysinfo.totalswap = i * PAGE_SIZE; 170 sysinfo.freeswap = (i - j) * PAGE_SIZE; 171 172 sysinfo.procs = nprocs; 173 174 /* 175 * Platforms supported by the emulation layer do not have a notion of 176 * high memory. 177 */ 178 sysinfo.totalhigh = 0; 179 sysinfo.freehigh = 0; 180 181 sysinfo.mem_unit = 1; 182 183 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 184 } 185 186 #ifdef LINUX_LEGACY_SYSCALLS 187 int 188 linux_alarm(struct thread *td, struct linux_alarm_args *args) 189 { 190 struct itimerval it, old_it; 191 u_int secs; 192 int error __diagused; 193 194 secs = args->secs; 195 /* 196 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 197 * to match kern_setitimer()'s limit to avoid error from it. 198 * 199 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 200 * platforms. 201 */ 202 if (secs > INT32_MAX / 2) 203 secs = INT32_MAX / 2; 204 205 it.it_value.tv_sec = secs; 206 it.it_value.tv_usec = 0; 207 timevalclear(&it.it_interval); 208 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 209 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 210 211 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 212 old_it.it_value.tv_usec >= 500000) 213 old_it.it_value.tv_sec++; 214 td->td_retval[0] = old_it.it_value.tv_sec; 215 return (0); 216 } 217 #endif 218 219 int 220 linux_brk(struct thread *td, struct linux_brk_args *args) 221 { 222 struct vmspace *vm = td->td_proc->p_vmspace; 223 uintptr_t new, old; 224 225 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 226 new = (uintptr_t)args->dsend; 227 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 228 td->td_retval[0] = (register_t)new; 229 else 230 td->td_retval[0] = (register_t)old; 231 232 return (0); 233 } 234 235 #ifdef LINUX_LEGACY_SYSCALLS 236 int 237 linux_select(struct thread *td, struct linux_select_args *args) 238 { 239 l_timeval ltv; 240 struct timeval tv0, tv1, utv, *tvp; 241 int error; 242 243 /* 244 * Store current time for computation of the amount of 245 * time left. 246 */ 247 if (args->timeout) { 248 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 249 goto select_out; 250 utv.tv_sec = ltv.tv_sec; 251 utv.tv_usec = ltv.tv_usec; 252 253 if (itimerfix(&utv)) { 254 /* 255 * The timeval was invalid. Convert it to something 256 * valid that will act as it does under Linux. 257 */ 258 utv.tv_sec += utv.tv_usec / 1000000; 259 utv.tv_usec %= 1000000; 260 if (utv.tv_usec < 0) { 261 utv.tv_sec -= 1; 262 utv.tv_usec += 1000000; 263 } 264 if (utv.tv_sec < 0) 265 timevalclear(&utv); 266 } 267 microtime(&tv0); 268 tvp = &utv; 269 } else 270 tvp = NULL; 271 272 error = kern_select(td, args->nfds, args->readfds, args->writefds, 273 args->exceptfds, tvp, LINUX_NFDBITS); 274 if (error) 275 goto select_out; 276 277 if (args->timeout) { 278 if (td->td_retval[0]) { 279 /* 280 * Compute how much time was left of the timeout, 281 * by subtracting the current time and the time 282 * before we started the call, and subtracting 283 * that result from the user-supplied value. 284 */ 285 microtime(&tv1); 286 timevalsub(&tv1, &tv0); 287 timevalsub(&utv, &tv1); 288 if (utv.tv_sec < 0) 289 timevalclear(&utv); 290 } else 291 timevalclear(&utv); 292 ltv.tv_sec = utv.tv_sec; 293 ltv.tv_usec = utv.tv_usec; 294 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 295 goto select_out; 296 } 297 298 select_out: 299 return (error); 300 } 301 #endif 302 303 int 304 linux_mremap(struct thread *td, struct linux_mremap_args *args) 305 { 306 uintptr_t addr; 307 size_t len; 308 int error = 0; 309 310 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 311 td->td_retval[0] = 0; 312 return (EINVAL); 313 } 314 315 /* 316 * Check for the page alignment. 317 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 318 */ 319 if (args->addr & PAGE_MASK) { 320 td->td_retval[0] = 0; 321 return (EINVAL); 322 } 323 324 args->new_len = round_page(args->new_len); 325 args->old_len = round_page(args->old_len); 326 327 if (args->new_len > args->old_len) { 328 td->td_retval[0] = 0; 329 return (ENOMEM); 330 } 331 332 if (args->new_len < args->old_len) { 333 addr = args->addr + args->new_len; 334 len = args->old_len - args->new_len; 335 error = kern_munmap(td, addr, len); 336 } 337 338 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 339 return (error); 340 } 341 342 #define LINUX_MS_ASYNC 0x0001 343 #define LINUX_MS_INVALIDATE 0x0002 344 #define LINUX_MS_SYNC 0x0004 345 346 int 347 linux_msync(struct thread *td, struct linux_msync_args *args) 348 { 349 350 return (kern_msync(td, args->addr, args->len, 351 args->fl & ~LINUX_MS_SYNC)); 352 } 353 354 int 355 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 356 { 357 358 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 359 uap->prot)); 360 } 361 362 int 363 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 364 { 365 366 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 367 uap->behav)); 368 } 369 370 int 371 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 372 { 373 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 374 /* 375 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 376 * implemented with mmap2 syscall and the offset is represented in 377 * multiples of page size. 378 */ 379 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 380 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 381 #else 382 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 383 uap->flags, uap->fd, uap->pgoff)); 384 #endif 385 } 386 387 #ifdef LINUX_LEGACY_SYSCALLS 388 int 389 linux_time(struct thread *td, struct linux_time_args *args) 390 { 391 struct timeval tv; 392 l_time_t tm; 393 int error; 394 395 microtime(&tv); 396 tm = tv.tv_sec; 397 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 398 return (error); 399 td->td_retval[0] = tm; 400 return (0); 401 } 402 #endif 403 404 struct l_times_argv { 405 l_clock_t tms_utime; 406 l_clock_t tms_stime; 407 l_clock_t tms_cutime; 408 l_clock_t tms_cstime; 409 }; 410 411 /* 412 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 413 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 414 * auxiliary vector entry. 415 */ 416 #define CLK_TCK 100 417 418 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 419 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 420 421 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 422 CONVNTCK(r) : CONVOTCK(r)) 423 424 int 425 linux_times(struct thread *td, struct linux_times_args *args) 426 { 427 struct timeval tv, utime, stime, cutime, cstime; 428 struct l_times_argv tms; 429 struct proc *p; 430 int error; 431 432 if (args->buf != NULL) { 433 p = td->td_proc; 434 PROC_LOCK(p); 435 PROC_STATLOCK(p); 436 calcru(p, &utime, &stime); 437 PROC_STATUNLOCK(p); 438 calccru(p, &cutime, &cstime); 439 PROC_UNLOCK(p); 440 441 tms.tms_utime = CONVTCK(utime); 442 tms.tms_stime = CONVTCK(stime); 443 444 tms.tms_cutime = CONVTCK(cutime); 445 tms.tms_cstime = CONVTCK(cstime); 446 447 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 448 return (error); 449 } 450 451 microuptime(&tv); 452 td->td_retval[0] = (int)CONVTCK(tv); 453 return (0); 454 } 455 456 int 457 linux_newuname(struct thread *td, struct linux_newuname_args *args) 458 { 459 struct l_new_utsname utsname; 460 char osname[LINUX_MAX_UTSNAME]; 461 char osrelease[LINUX_MAX_UTSNAME]; 462 char *p; 463 464 linux_get_osname(td, osname); 465 linux_get_osrelease(td, osrelease); 466 467 bzero(&utsname, sizeof(utsname)); 468 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 469 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 470 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 471 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 472 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 473 for (p = utsname.version; *p != '\0'; ++p) 474 if (*p == '\n') { 475 *p = '\0'; 476 break; 477 } 478 #if defined(__amd64__) 479 /* 480 * On amd64, Linux uname(2) needs to return "x86_64" 481 * for both 64-bit and 32-bit applications. On 32-bit, 482 * the string returned by getauxval(AT_PLATFORM) needs 483 * to remain "i686", though. 484 */ 485 #if defined(COMPAT_LINUX32) 486 if (linux32_emulate_i386) 487 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 488 else 489 #endif 490 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 491 #elif defined(__aarch64__) 492 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 493 #elif defined(__i386__) 494 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 495 #endif 496 497 return (copyout(&utsname, args->buf, sizeof(utsname))); 498 } 499 500 struct l_utimbuf { 501 l_time_t l_actime; 502 l_time_t l_modtime; 503 }; 504 505 #ifdef LINUX_LEGACY_SYSCALLS 506 int 507 linux_utime(struct thread *td, struct linux_utime_args *args) 508 { 509 struct timeval tv[2], *tvp; 510 struct l_utimbuf lut; 511 int error; 512 513 if (args->times) { 514 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 515 return (error); 516 tv[0].tv_sec = lut.l_actime; 517 tv[0].tv_usec = 0; 518 tv[1].tv_sec = lut.l_modtime; 519 tv[1].tv_usec = 0; 520 tvp = tv; 521 } else 522 tvp = NULL; 523 524 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 525 tvp, UIO_SYSSPACE)); 526 } 527 #endif 528 529 #ifdef LINUX_LEGACY_SYSCALLS 530 int 531 linux_utimes(struct thread *td, struct linux_utimes_args *args) 532 { 533 l_timeval ltv[2]; 534 struct timeval tv[2], *tvp = NULL; 535 int error; 536 537 if (args->tptr != NULL) { 538 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 539 return (error); 540 tv[0].tv_sec = ltv[0].tv_sec; 541 tv[0].tv_usec = ltv[0].tv_usec; 542 tv[1].tv_sec = ltv[1].tv_sec; 543 tv[1].tv_usec = ltv[1].tv_usec; 544 tvp = tv; 545 } 546 547 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 548 tvp, UIO_SYSSPACE)); 549 } 550 #endif 551 552 static int 553 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 554 { 555 556 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 557 l_times->tv_nsec != LINUX_UTIME_NOW && 558 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 559 return (EINVAL); 560 561 times->tv_sec = l_times->tv_sec; 562 switch (l_times->tv_nsec) 563 { 564 case LINUX_UTIME_OMIT: 565 times->tv_nsec = UTIME_OMIT; 566 break; 567 case LINUX_UTIME_NOW: 568 times->tv_nsec = UTIME_NOW; 569 break; 570 default: 571 times->tv_nsec = l_times->tv_nsec; 572 } 573 574 return (0); 575 } 576 577 static int 578 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 579 struct timespec *timesp, int lflags) 580 { 581 int dfd, flags = 0; 582 583 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 584 585 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 586 return (EINVAL); 587 588 if (timesp != NULL) { 589 /* This breaks POSIX, but is what the Linux kernel does 590 * _on purpose_ (documented in the man page for utimensat(2)), 591 * so we must follow that behaviour. */ 592 if (timesp[0].tv_nsec == UTIME_OMIT && 593 timesp[1].tv_nsec == UTIME_OMIT) 594 return (0); 595 } 596 597 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 598 flags |= AT_SYMLINK_NOFOLLOW; 599 if (lflags & LINUX_AT_EMPTY_PATH) 600 flags |= AT_EMPTY_PATH; 601 602 if (pathname != NULL) 603 return (kern_utimensat(td, dfd, pathname, 604 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 605 606 if (lflags != 0) 607 return (EINVAL); 608 609 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 610 } 611 612 int 613 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 614 { 615 struct l_timespec l_times[2]; 616 struct timespec times[2], *timesp; 617 int error; 618 619 if (args->times != NULL) { 620 error = copyin(args->times, l_times, sizeof(l_times)); 621 if (error != 0) 622 return (error); 623 624 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 625 if (error != 0) 626 return (error); 627 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 628 if (error != 0) 629 return (error); 630 timesp = times; 631 } else 632 timesp = NULL; 633 634 return (linux_common_utimensat(td, args->dfd, args->pathname, 635 timesp, args->flags)); 636 } 637 638 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 639 static int 640 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 641 { 642 643 /* Zero out the padding in compat mode. */ 644 l_times->tv_nsec &= 0xFFFFFFFFUL; 645 646 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 647 l_times->tv_nsec != LINUX_UTIME_NOW && 648 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 649 return (EINVAL); 650 651 times->tv_sec = l_times->tv_sec; 652 switch (l_times->tv_nsec) 653 { 654 case LINUX_UTIME_OMIT: 655 times->tv_nsec = UTIME_OMIT; 656 break; 657 case LINUX_UTIME_NOW: 658 times->tv_nsec = UTIME_NOW; 659 break; 660 default: 661 times->tv_nsec = l_times->tv_nsec; 662 } 663 664 return (0); 665 } 666 667 int 668 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 669 { 670 struct l_timespec64 l_times[2]; 671 struct timespec times[2], *timesp; 672 int error; 673 674 if (args->times64 != NULL) { 675 error = copyin(args->times64, l_times, sizeof(l_times)); 676 if (error != 0) 677 return (error); 678 679 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 680 if (error != 0) 681 return (error); 682 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 683 if (error != 0) 684 return (error); 685 timesp = times; 686 } else 687 timesp = NULL; 688 689 return (linux_common_utimensat(td, args->dfd, args->pathname, 690 timesp, args->flags)); 691 } 692 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 693 694 #ifdef LINUX_LEGACY_SYSCALLS 695 int 696 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 697 { 698 l_timeval ltv[2]; 699 struct timeval tv[2], *tvp = NULL; 700 int error, dfd; 701 702 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 703 704 if (args->utimes != NULL) { 705 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 706 return (error); 707 tv[0].tv_sec = ltv[0].tv_sec; 708 tv[0].tv_usec = ltv[0].tv_usec; 709 tv[1].tv_sec = ltv[1].tv_sec; 710 tv[1].tv_usec = ltv[1].tv_usec; 711 tvp = tv; 712 } 713 714 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 715 tvp, UIO_SYSSPACE)); 716 } 717 #endif 718 719 static int 720 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 721 int options, void *rup, l_siginfo_t *infop) 722 { 723 l_siginfo_t lsi; 724 siginfo_t siginfo; 725 struct __wrusage wru; 726 int error, status, tmpstat, sig; 727 728 error = kern_wait6(td, idtype, id, &status, options, 729 rup != NULL ? &wru : NULL, &siginfo); 730 731 if (error == 0 && statusp) { 732 tmpstat = status & 0xffff; 733 if (WIFSIGNALED(tmpstat)) { 734 tmpstat = (tmpstat & 0xffffff80) | 735 bsd_to_linux_signal(WTERMSIG(tmpstat)); 736 } else if (WIFSTOPPED(tmpstat)) { 737 tmpstat = (tmpstat & 0xffff00ff) | 738 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 739 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 740 if (WSTOPSIG(status) == SIGTRAP) { 741 tmpstat = linux_ptrace_status(td, 742 siginfo.si_pid, tmpstat); 743 } 744 #endif 745 } else if (WIFCONTINUED(tmpstat)) { 746 tmpstat = 0xffff; 747 } 748 error = copyout(&tmpstat, statusp, sizeof(int)); 749 } 750 if (error == 0 && rup != NULL) 751 error = linux_copyout_rusage(&wru.wru_self, rup); 752 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 753 sig = bsd_to_linux_signal(siginfo.si_signo); 754 memset(&lsi, 0, sizeof(lsi)); 755 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 756 error = copyout(&lsi, infop, sizeof(lsi)); 757 } 758 759 return (error); 760 } 761 762 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 763 int 764 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 765 { 766 struct linux_wait4_args wait4_args = { 767 .pid = args->pid, 768 .status = args->status, 769 .options = args->options, 770 .rusage = NULL, 771 }; 772 773 return (linux_wait4(td, &wait4_args)); 774 } 775 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 776 777 int 778 linux_wait4(struct thread *td, struct linux_wait4_args *args) 779 { 780 struct proc *p; 781 int options, id, idtype; 782 783 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 784 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 785 return (EINVAL); 786 787 /* -INT_MIN is not defined. */ 788 if (args->pid == INT_MIN) 789 return (ESRCH); 790 791 options = 0; 792 linux_to_bsd_waitopts(args->options, &options); 793 794 /* 795 * For backward compatibility we implicitly add flags WEXITED 796 * and WTRAPPED here. 797 */ 798 options |= WEXITED | WTRAPPED; 799 800 if (args->pid == WAIT_ANY) { 801 idtype = P_ALL; 802 id = 0; 803 } else if (args->pid < 0) { 804 idtype = P_PGID; 805 id = (id_t)-args->pid; 806 } else if (args->pid == 0) { 807 idtype = P_PGID; 808 p = td->td_proc; 809 PROC_LOCK(p); 810 id = p->p_pgid; 811 PROC_UNLOCK(p); 812 } else { 813 idtype = P_PID; 814 id = (id_t)args->pid; 815 } 816 817 return (linux_common_wait(td, idtype, id, args->status, options, 818 args->rusage, NULL)); 819 } 820 821 int 822 linux_waitid(struct thread *td, struct linux_waitid_args *args) 823 { 824 idtype_t idtype; 825 int error, options; 826 struct proc *p; 827 pid_t id; 828 829 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 830 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 831 return (EINVAL); 832 833 options = 0; 834 linux_to_bsd_waitopts(args->options, &options); 835 836 id = args->id; 837 switch (args->idtype) { 838 case LINUX_P_ALL: 839 idtype = P_ALL; 840 break; 841 case LINUX_P_PID: 842 if (args->id <= 0) 843 return (EINVAL); 844 idtype = P_PID; 845 break; 846 case LINUX_P_PGID: 847 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 848 p = td->td_proc; 849 PROC_LOCK(p); 850 id = p->p_pgid; 851 PROC_UNLOCK(p); 852 } else if (args->id <= 0) 853 return (EINVAL); 854 idtype = P_PGID; 855 break; 856 case LINUX_P_PIDFD: 857 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 858 return (ENOSYS); 859 default: 860 return (EINVAL); 861 } 862 863 error = linux_common_wait(td, idtype, id, NULL, options, 864 args->rusage, args->info); 865 td->td_retval[0] = 0; 866 867 return (error); 868 } 869 870 #ifdef LINUX_LEGACY_SYSCALLS 871 int 872 linux_mknod(struct thread *td, struct linux_mknod_args *args) 873 { 874 int error; 875 876 switch (args->mode & S_IFMT) { 877 case S_IFIFO: 878 case S_IFSOCK: 879 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 880 args->mode); 881 break; 882 883 case S_IFCHR: 884 case S_IFBLK: 885 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 886 args->mode, linux_decode_dev(args->dev)); 887 break; 888 889 case S_IFDIR: 890 error = EPERM; 891 break; 892 893 case 0: 894 args->mode |= S_IFREG; 895 /* FALLTHROUGH */ 896 case S_IFREG: 897 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 898 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 899 if (error == 0) 900 kern_close(td, td->td_retval[0]); 901 break; 902 903 default: 904 error = EINVAL; 905 break; 906 } 907 return (error); 908 } 909 #endif 910 911 int 912 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 913 { 914 int error, dfd; 915 916 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 917 918 switch (args->mode & S_IFMT) { 919 case S_IFIFO: 920 case S_IFSOCK: 921 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 922 args->mode); 923 break; 924 925 case S_IFCHR: 926 case S_IFBLK: 927 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 928 args->mode, linux_decode_dev(args->dev)); 929 break; 930 931 case S_IFDIR: 932 error = EPERM; 933 break; 934 935 case 0: 936 args->mode |= S_IFREG; 937 /* FALLTHROUGH */ 938 case S_IFREG: 939 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 940 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 941 if (error == 0) 942 kern_close(td, td->td_retval[0]); 943 break; 944 945 default: 946 error = EINVAL; 947 break; 948 } 949 return (error); 950 } 951 952 /* 953 * UGH! This is just about the dumbest idea I've ever heard!! 954 */ 955 int 956 linux_personality(struct thread *td, struct linux_personality_args *args) 957 { 958 struct linux_pemuldata *pem; 959 struct proc *p = td->td_proc; 960 uint32_t old; 961 962 PROC_LOCK(p); 963 pem = pem_find(p); 964 old = pem->persona; 965 if (args->per != 0xffffffff) 966 pem->persona = args->per; 967 PROC_UNLOCK(p); 968 969 td->td_retval[0] = old; 970 return (0); 971 } 972 973 struct l_itimerval { 974 l_timeval it_interval; 975 l_timeval it_value; 976 }; 977 978 #define B2L_ITIMERVAL(bip, lip) \ 979 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 980 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 981 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 982 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 983 984 int 985 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 986 { 987 int error; 988 struct l_itimerval ls; 989 struct itimerval aitv, oitv; 990 991 if (uap->itv == NULL) { 992 uap->itv = uap->oitv; 993 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 994 } 995 996 error = copyin(uap->itv, &ls, sizeof(ls)); 997 if (error != 0) 998 return (error); 999 B2L_ITIMERVAL(&aitv, &ls); 1000 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1001 if (error != 0 || uap->oitv == NULL) 1002 return (error); 1003 B2L_ITIMERVAL(&ls, &oitv); 1004 1005 return (copyout(&ls, uap->oitv, sizeof(ls))); 1006 } 1007 1008 int 1009 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1010 { 1011 int error; 1012 struct l_itimerval ls; 1013 struct itimerval aitv; 1014 1015 error = kern_getitimer(td, uap->which, &aitv); 1016 if (error != 0) 1017 return (error); 1018 B2L_ITIMERVAL(&ls, &aitv); 1019 return (copyout(&ls, uap->itv, sizeof(ls))); 1020 } 1021 1022 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1023 int 1024 linux_nice(struct thread *td, struct linux_nice_args *args) 1025 { 1026 1027 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1028 } 1029 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1030 1031 int 1032 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1033 { 1034 const int ngrp = args->gidsetsize; 1035 struct ucred *newcred, *oldcred; 1036 l_gid_t *linux_gidset; 1037 int error; 1038 struct proc *p; 1039 1040 if (ngrp < 0 || ngrp > ngroups_max) 1041 return (EINVAL); 1042 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1043 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1044 if (error) 1045 goto out; 1046 1047 newcred = crget(); 1048 crextend(newcred, ngrp); 1049 p = td->td_proc; 1050 PROC_LOCK(p); 1051 oldcred = crcopysafe(p, newcred); 1052 1053 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1054 PROC_UNLOCK(p); 1055 crfree(newcred); 1056 goto out; 1057 } 1058 1059 newcred->cr_ngroups = ngrp; 1060 for (int i = 0; i < ngrp; i++) 1061 newcred->cr_groups[i] = linux_gidset[i]; 1062 newcred->cr_flags |= CRED_FLAG_GROUPSET; 1063 1064 setsugid(p); 1065 proc_set_cred(p, newcred); 1066 PROC_UNLOCK(p); 1067 crfree(oldcred); 1068 error = 0; 1069 out: 1070 free(linux_gidset, M_LINUX); 1071 return (error); 1072 } 1073 1074 int 1075 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1076 { 1077 const struct ucred *const cred = td->td_ucred; 1078 l_gid_t *linux_gidset; 1079 int ngrp, error; 1080 1081 ngrp = args->gidsetsize; 1082 1083 if (ngrp == 0) { 1084 td->td_retval[0] = cred->cr_ngroups; 1085 return (0); 1086 } 1087 if (ngrp < cred->cr_ngroups) 1088 return (EINVAL); 1089 1090 ngrp = cred->cr_ngroups; 1091 1092 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1093 for (int i = 0; i < ngrp; ++i) 1094 linux_gidset[i] = cred->cr_groups[i]; 1095 1096 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1097 free(linux_gidset, M_LINUX); 1098 1099 if (error != 0) 1100 return (error); 1101 1102 td->td_retval[0] = ngrp; 1103 return (0); 1104 } 1105 1106 static bool 1107 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) 1108 { 1109 ssize_t size; 1110 int res, error; 1111 1112 if (linux_dummy_rlimits == 0) 1113 return (false); 1114 1115 switch (resource) { 1116 case LINUX_RLIMIT_LOCKS: 1117 case LINUX_RLIMIT_RTTIME: 1118 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1119 rlim->rlim_max = LINUX_RLIM_INFINITY; 1120 return (true); 1121 case LINUX_RLIMIT_NICE: 1122 case LINUX_RLIMIT_RTPRIO: 1123 rlim->rlim_cur = 0; 1124 rlim->rlim_max = 0; 1125 return (true); 1126 case LINUX_RLIMIT_SIGPENDING: 1127 error = kernel_sysctlbyname(td, 1128 "kern.sigqueue.max_pending_per_proc", 1129 &res, &size, 0, 0, 0, 0); 1130 if (error != 0) 1131 return (false); 1132 rlim->rlim_cur = res; 1133 rlim->rlim_max = res; 1134 return (true); 1135 case LINUX_RLIMIT_MSGQUEUE: 1136 error = kernel_sysctlbyname(td, 1137 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); 1138 if (error != 0) 1139 return (false); 1140 rlim->rlim_cur = res; 1141 rlim->rlim_max = res; 1142 return (true); 1143 default: 1144 return (false); 1145 } 1146 } 1147 1148 int 1149 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1150 { 1151 struct rlimit bsd_rlim; 1152 struct l_rlimit rlim; 1153 u_int which; 1154 int error; 1155 1156 if (args->resource >= LINUX_RLIM_NLIMITS) 1157 return (EINVAL); 1158 1159 which = linux_to_bsd_resource[args->resource]; 1160 if (which == -1) 1161 return (EINVAL); 1162 1163 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1164 if (error) 1165 return (error); 1166 1167 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1168 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1169 return (kern_setrlimit(td, which, &bsd_rlim)); 1170 } 1171 1172 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1173 int 1174 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1175 { 1176 struct l_rlimit rlim; 1177 struct rlimit bsd_rlim; 1178 u_int which; 1179 1180 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1181 rlim.rlim_cur = bsd_rlim.rlim_cur; 1182 rlim.rlim_max = bsd_rlim.rlim_max; 1183 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1184 } 1185 1186 if (args->resource >= LINUX_RLIM_NLIMITS) 1187 return (EINVAL); 1188 1189 which = linux_to_bsd_resource[args->resource]; 1190 if (which == -1) 1191 return (EINVAL); 1192 1193 lim_rlimit(td, which, &bsd_rlim); 1194 1195 #ifdef COMPAT_LINUX32 1196 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1197 if (rlim.rlim_cur == UINT_MAX) 1198 rlim.rlim_cur = INT_MAX; 1199 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1200 if (rlim.rlim_max == UINT_MAX) 1201 rlim.rlim_max = INT_MAX; 1202 #else 1203 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1204 if (rlim.rlim_cur == ULONG_MAX) 1205 rlim.rlim_cur = LONG_MAX; 1206 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1207 if (rlim.rlim_max == ULONG_MAX) 1208 rlim.rlim_max = LONG_MAX; 1209 #endif 1210 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1211 } 1212 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1213 1214 int 1215 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1216 { 1217 struct l_rlimit rlim; 1218 struct rlimit bsd_rlim; 1219 u_int which; 1220 1221 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1222 rlim.rlim_cur = bsd_rlim.rlim_cur; 1223 rlim.rlim_max = bsd_rlim.rlim_max; 1224 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1225 } 1226 1227 if (args->resource >= LINUX_RLIM_NLIMITS) 1228 return (EINVAL); 1229 1230 which = linux_to_bsd_resource[args->resource]; 1231 if (which == -1) 1232 return (EINVAL); 1233 1234 lim_rlimit(td, which, &bsd_rlim); 1235 1236 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1237 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1238 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1239 } 1240 1241 int 1242 linux_sched_setscheduler(struct thread *td, 1243 struct linux_sched_setscheduler_args *args) 1244 { 1245 struct sched_param sched_param; 1246 struct thread *tdt; 1247 int error, policy; 1248 1249 switch (args->policy) { 1250 case LINUX_SCHED_OTHER: 1251 policy = SCHED_OTHER; 1252 break; 1253 case LINUX_SCHED_FIFO: 1254 policy = SCHED_FIFO; 1255 break; 1256 case LINUX_SCHED_RR: 1257 policy = SCHED_RR; 1258 break; 1259 default: 1260 return (EINVAL); 1261 } 1262 1263 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1264 if (error) 1265 return (error); 1266 1267 if (linux_map_sched_prio) { 1268 switch (policy) { 1269 case SCHED_OTHER: 1270 if (sched_param.sched_priority != 0) 1271 return (EINVAL); 1272 1273 sched_param.sched_priority = 1274 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1275 break; 1276 case SCHED_FIFO: 1277 case SCHED_RR: 1278 if (sched_param.sched_priority < 1 || 1279 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1280 return (EINVAL); 1281 1282 /* 1283 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1284 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1285 */ 1286 sched_param.sched_priority = 1287 (sched_param.sched_priority - 1) * 1288 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1289 (LINUX_MAX_RT_PRIO - 1); 1290 break; 1291 } 1292 } 1293 1294 tdt = linux_tdfind(td, args->pid, -1); 1295 if (tdt == NULL) 1296 return (ESRCH); 1297 1298 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1299 PROC_UNLOCK(tdt->td_proc); 1300 return (error); 1301 } 1302 1303 int 1304 linux_sched_getscheduler(struct thread *td, 1305 struct linux_sched_getscheduler_args *args) 1306 { 1307 struct thread *tdt; 1308 int error, policy; 1309 1310 tdt = linux_tdfind(td, args->pid, -1); 1311 if (tdt == NULL) 1312 return (ESRCH); 1313 1314 error = kern_sched_getscheduler(td, tdt, &policy); 1315 PROC_UNLOCK(tdt->td_proc); 1316 1317 switch (policy) { 1318 case SCHED_OTHER: 1319 td->td_retval[0] = LINUX_SCHED_OTHER; 1320 break; 1321 case SCHED_FIFO: 1322 td->td_retval[0] = LINUX_SCHED_FIFO; 1323 break; 1324 case SCHED_RR: 1325 td->td_retval[0] = LINUX_SCHED_RR; 1326 break; 1327 } 1328 return (error); 1329 } 1330 1331 int 1332 linux_sched_get_priority_max(struct thread *td, 1333 struct linux_sched_get_priority_max_args *args) 1334 { 1335 struct sched_get_priority_max_args bsd; 1336 1337 if (linux_map_sched_prio) { 1338 switch (args->policy) { 1339 case LINUX_SCHED_OTHER: 1340 td->td_retval[0] = 0; 1341 return (0); 1342 case LINUX_SCHED_FIFO: 1343 case LINUX_SCHED_RR: 1344 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1345 return (0); 1346 default: 1347 return (EINVAL); 1348 } 1349 } 1350 1351 switch (args->policy) { 1352 case LINUX_SCHED_OTHER: 1353 bsd.policy = SCHED_OTHER; 1354 break; 1355 case LINUX_SCHED_FIFO: 1356 bsd.policy = SCHED_FIFO; 1357 break; 1358 case LINUX_SCHED_RR: 1359 bsd.policy = SCHED_RR; 1360 break; 1361 default: 1362 return (EINVAL); 1363 } 1364 return (sys_sched_get_priority_max(td, &bsd)); 1365 } 1366 1367 int 1368 linux_sched_get_priority_min(struct thread *td, 1369 struct linux_sched_get_priority_min_args *args) 1370 { 1371 struct sched_get_priority_min_args bsd; 1372 1373 if (linux_map_sched_prio) { 1374 switch (args->policy) { 1375 case LINUX_SCHED_OTHER: 1376 td->td_retval[0] = 0; 1377 return (0); 1378 case LINUX_SCHED_FIFO: 1379 case LINUX_SCHED_RR: 1380 td->td_retval[0] = 1; 1381 return (0); 1382 default: 1383 return (EINVAL); 1384 } 1385 } 1386 1387 switch (args->policy) { 1388 case LINUX_SCHED_OTHER: 1389 bsd.policy = SCHED_OTHER; 1390 break; 1391 case LINUX_SCHED_FIFO: 1392 bsd.policy = SCHED_FIFO; 1393 break; 1394 case LINUX_SCHED_RR: 1395 bsd.policy = SCHED_RR; 1396 break; 1397 default: 1398 return (EINVAL); 1399 } 1400 return (sys_sched_get_priority_min(td, &bsd)); 1401 } 1402 1403 #define REBOOT_CAD_ON 0x89abcdef 1404 #define REBOOT_CAD_OFF 0 1405 #define REBOOT_HALT 0xcdef0123 1406 #define REBOOT_RESTART 0x01234567 1407 #define REBOOT_RESTART2 0xA1B2C3D4 1408 #define REBOOT_POWEROFF 0x4321FEDC 1409 #define REBOOT_MAGIC1 0xfee1dead 1410 #define REBOOT_MAGIC2 0x28121969 1411 #define REBOOT_MAGIC2A 0x05121996 1412 #define REBOOT_MAGIC2B 0x16041998 1413 1414 int 1415 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1416 { 1417 struct reboot_args bsd_args; 1418 1419 if (args->magic1 != REBOOT_MAGIC1) 1420 return (EINVAL); 1421 1422 switch (args->magic2) { 1423 case REBOOT_MAGIC2: 1424 case REBOOT_MAGIC2A: 1425 case REBOOT_MAGIC2B: 1426 break; 1427 default: 1428 return (EINVAL); 1429 } 1430 1431 switch (args->cmd) { 1432 case REBOOT_CAD_ON: 1433 case REBOOT_CAD_OFF: 1434 return (priv_check(td, PRIV_REBOOT)); 1435 case REBOOT_HALT: 1436 bsd_args.opt = RB_HALT; 1437 break; 1438 case REBOOT_RESTART: 1439 case REBOOT_RESTART2: 1440 bsd_args.opt = 0; 1441 break; 1442 case REBOOT_POWEROFF: 1443 bsd_args.opt = RB_POWEROFF; 1444 break; 1445 default: 1446 return (EINVAL); 1447 } 1448 return (sys_reboot(td, &bsd_args)); 1449 } 1450 1451 int 1452 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1453 { 1454 1455 td->td_retval[0] = td->td_proc->p_pid; 1456 1457 return (0); 1458 } 1459 1460 int 1461 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1462 { 1463 struct linux_emuldata *em; 1464 1465 em = em_find(td); 1466 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1467 1468 td->td_retval[0] = em->em_tid; 1469 1470 return (0); 1471 } 1472 1473 int 1474 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1475 { 1476 1477 td->td_retval[0] = kern_getppid(td); 1478 return (0); 1479 } 1480 1481 int 1482 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1483 { 1484 1485 td->td_retval[0] = td->td_ucred->cr_rgid; 1486 return (0); 1487 } 1488 1489 int 1490 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1491 { 1492 1493 td->td_retval[0] = td->td_ucred->cr_ruid; 1494 return (0); 1495 } 1496 1497 int 1498 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1499 { 1500 1501 return (kern_getsid(td, args->pid)); 1502 } 1503 1504 int 1505 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1506 { 1507 int error; 1508 1509 error = kern_getpriority(td, args->which, args->who); 1510 td->td_retval[0] = 20 - td->td_retval[0]; 1511 return (error); 1512 } 1513 1514 int 1515 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1516 { 1517 int name[2]; 1518 1519 name[0] = CTL_KERN; 1520 name[1] = KERN_HOSTNAME; 1521 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1522 args->len, 0, 0)); 1523 } 1524 1525 int 1526 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1527 { 1528 int name[2]; 1529 1530 name[0] = CTL_KERN; 1531 name[1] = KERN_NISDOMAINNAME; 1532 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1533 args->len, 0, 0)); 1534 } 1535 1536 int 1537 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1538 { 1539 1540 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1541 args->error_code); 1542 1543 /* 1544 * XXX: we should send a signal to the parent if 1545 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1546 * as it doesnt occur often. 1547 */ 1548 exit1(td, args->error_code, 0); 1549 /* NOTREACHED */ 1550 } 1551 1552 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1553 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1554 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1555 1556 struct l_user_cap_header { 1557 l_int version; 1558 l_int pid; 1559 }; 1560 1561 struct l_user_cap_data { 1562 l_int effective; 1563 l_int permitted; 1564 l_int inheritable; 1565 }; 1566 1567 int 1568 linux_capget(struct thread *td, struct linux_capget_args *uap) 1569 { 1570 struct l_user_cap_header luch; 1571 struct l_user_cap_data lucd[2]; 1572 int error, u32s; 1573 1574 if (uap->hdrp == NULL) 1575 return (EFAULT); 1576 1577 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1578 if (error != 0) 1579 return (error); 1580 1581 switch (luch.version) { 1582 case _LINUX_CAPABILITY_VERSION_1: 1583 u32s = 1; 1584 break; 1585 case _LINUX_CAPABILITY_VERSION_2: 1586 case _LINUX_CAPABILITY_VERSION_3: 1587 u32s = 2; 1588 break; 1589 default: 1590 luch.version = _LINUX_CAPABILITY_VERSION_1; 1591 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1592 if (error) 1593 return (error); 1594 return (EINVAL); 1595 } 1596 1597 if (luch.pid) 1598 return (EPERM); 1599 1600 if (uap->datap) { 1601 /* 1602 * The current implementation doesn't support setting 1603 * a capability (it's essentially a stub) so indicate 1604 * that no capabilities are currently set or available 1605 * to request. 1606 */ 1607 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1608 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1609 } 1610 1611 return (error); 1612 } 1613 1614 int 1615 linux_capset(struct thread *td, struct linux_capset_args *uap) 1616 { 1617 struct l_user_cap_header luch; 1618 struct l_user_cap_data lucd[2]; 1619 int error, i, u32s; 1620 1621 if (uap->hdrp == NULL || uap->datap == NULL) 1622 return (EFAULT); 1623 1624 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1625 if (error != 0) 1626 return (error); 1627 1628 switch (luch.version) { 1629 case _LINUX_CAPABILITY_VERSION_1: 1630 u32s = 1; 1631 break; 1632 case _LINUX_CAPABILITY_VERSION_2: 1633 case _LINUX_CAPABILITY_VERSION_3: 1634 u32s = 2; 1635 break; 1636 default: 1637 luch.version = _LINUX_CAPABILITY_VERSION_1; 1638 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1639 if (error) 1640 return (error); 1641 return (EINVAL); 1642 } 1643 1644 if (luch.pid) 1645 return (EPERM); 1646 1647 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1648 if (error != 0) 1649 return (error); 1650 1651 /* We currently don't support setting any capabilities. */ 1652 for (i = 0; i < u32s; i++) { 1653 if (lucd[i].effective || lucd[i].permitted || 1654 lucd[i].inheritable) { 1655 linux_msg(td, 1656 "capset[%d] effective=0x%x, permitted=0x%x, " 1657 "inheritable=0x%x is not implemented", i, 1658 (int)lucd[i].effective, (int)lucd[i].permitted, 1659 (int)lucd[i].inheritable); 1660 return (EPERM); 1661 } 1662 } 1663 1664 return (0); 1665 } 1666 1667 int 1668 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1669 { 1670 int error = 0, max_size, arg; 1671 struct proc *p = td->td_proc; 1672 char comm[LINUX_MAX_COMM_LEN]; 1673 int pdeath_signal, trace_state; 1674 1675 switch (args->option) { 1676 case LINUX_PR_SET_PDEATHSIG: 1677 if (!LINUX_SIG_VALID(args->arg2)) 1678 return (EINVAL); 1679 pdeath_signal = linux_to_bsd_signal(args->arg2); 1680 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1681 &pdeath_signal)); 1682 case LINUX_PR_GET_PDEATHSIG: 1683 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1684 &pdeath_signal); 1685 if (error != 0) 1686 return (error); 1687 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1688 return (copyout(&pdeath_signal, 1689 (void *)(register_t)args->arg2, 1690 sizeof(pdeath_signal))); 1691 /* 1692 * In Linux, this flag controls if set[gu]id processes can coredump. 1693 * There are additional semantics imposed on processes that cannot 1694 * coredump: 1695 * - Such processes can not be ptraced. 1696 * - There are some semantics around ownership of process-related files 1697 * in the /proc namespace. 1698 * 1699 * In FreeBSD, we can (and by default, do) disable setuid coredump 1700 * system-wide with 'sugid_coredump.' We control tracability on a 1701 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1702 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1703 * procctl is roughly analogous to Linux's DUMPABLE. 1704 * 1705 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1706 */ 1707 case LINUX_PR_GET_DUMPABLE: 1708 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1709 &trace_state); 1710 if (error != 0) 1711 return (error); 1712 td->td_retval[0] = (trace_state != -1); 1713 return (0); 1714 case LINUX_PR_SET_DUMPABLE: 1715 /* 1716 * It is only valid for userspace to set one of these two 1717 * flags, and only one at a time. 1718 */ 1719 switch (args->arg2) { 1720 case LINUX_SUID_DUMP_DISABLE: 1721 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1722 break; 1723 case LINUX_SUID_DUMP_USER: 1724 trace_state = PROC_TRACE_CTL_ENABLE; 1725 break; 1726 default: 1727 return (EINVAL); 1728 } 1729 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1730 &trace_state)); 1731 case LINUX_PR_GET_KEEPCAPS: 1732 /* 1733 * Indicate that we always clear the effective and 1734 * permitted capability sets when the user id becomes 1735 * non-zero (actually the capability sets are simply 1736 * always zero in the current implementation). 1737 */ 1738 td->td_retval[0] = 0; 1739 break; 1740 case LINUX_PR_SET_KEEPCAPS: 1741 /* 1742 * Ignore requests to keep the effective and permitted 1743 * capability sets when the user id becomes non-zero. 1744 */ 1745 break; 1746 case LINUX_PR_SET_NAME: 1747 /* 1748 * To be on the safe side we need to make sure to not 1749 * overflow the size a Linux program expects. We already 1750 * do this here in the copyin, so that we don't need to 1751 * check on copyout. 1752 */ 1753 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1754 error = copyinstr((void *)(register_t)args->arg2, comm, 1755 max_size, NULL); 1756 1757 /* Linux silently truncates the name if it is too long. */ 1758 if (error == ENAMETOOLONG) { 1759 /* 1760 * XXX: copyinstr() isn't documented to populate the 1761 * array completely, so do a copyin() to be on the 1762 * safe side. This should be changed in case 1763 * copyinstr() is changed to guarantee this. 1764 */ 1765 error = copyin((void *)(register_t)args->arg2, comm, 1766 max_size - 1); 1767 comm[max_size - 1] = '\0'; 1768 } 1769 if (error) 1770 return (error); 1771 1772 PROC_LOCK(p); 1773 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1774 PROC_UNLOCK(p); 1775 break; 1776 case LINUX_PR_GET_NAME: 1777 PROC_LOCK(p); 1778 strlcpy(comm, p->p_comm, sizeof(comm)); 1779 PROC_UNLOCK(p); 1780 error = copyout(comm, (void *)(register_t)args->arg2, 1781 strlen(comm) + 1); 1782 break; 1783 case LINUX_PR_GET_SECCOMP: 1784 case LINUX_PR_SET_SECCOMP: 1785 /* 1786 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1787 */ 1788 error = EINVAL; 1789 break; 1790 case LINUX_PR_CAPBSET_READ: 1791 #if 0 1792 /* 1793 * This makes too much noise with Ubuntu Focal. 1794 */ 1795 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1796 (int)args->arg2); 1797 #endif 1798 error = EINVAL; 1799 break; 1800 case LINUX_PR_SET_CHILD_SUBREAPER: 1801 if (args->arg2 == 0) { 1802 return (kern_procctl(td, P_PID, 0, PROC_REAP_RELEASE, 1803 NULL)); 1804 } 1805 1806 return (kern_procctl(td, P_PID, 0, PROC_REAP_ACQUIRE, 1807 NULL)); 1808 case LINUX_PR_GET_CHILD_SUBREAPER: { 1809 struct procctl_reaper_status rs; 1810 l_int val; 1811 1812 error = kern_procctl(td, P_PID, 0, PROC_REAP_STATUS, &rs); 1813 if (error != 0) 1814 return (error); 1815 val = rs.rs_reaper == p->p_pid ? 1 : 0; 1816 error = copyout(&val, (void *)(register_t)args->arg2, 1817 sizeof(val)); 1818 break; 1819 } 1820 case LINUX_PR_SET_NO_NEW_PRIVS: 1821 arg = args->arg2 == 1 ? 1822 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1823 error = kern_procctl(td, P_PID, p->p_pid, 1824 PROC_NO_NEW_PRIVS_CTL, &arg); 1825 break; 1826 case LINUX_PR_GET_NO_NEW_PRIVS: 1827 error = kern_procctl(td, P_PID, p->p_pid, 1828 PROC_NO_NEW_PRIVS_STATUS, &arg); 1829 if (error != 0) 1830 return (error); 1831 /* Linux returns the value as the syscall return */ 1832 td->td_retval[0] = arg == PROC_NO_NEW_PRIVS_ENABLE ? 1 : 0; 1833 break; 1834 case LINUX_PR_SET_PTRACER: 1835 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1836 error = EINVAL; 1837 break; 1838 default: 1839 linux_msg(td, "unsupported prctl option %d", args->option); 1840 error = EINVAL; 1841 break; 1842 } 1843 1844 return (error); 1845 } 1846 1847 int 1848 linux_sched_setparam(struct thread *td, 1849 struct linux_sched_setparam_args *uap) 1850 { 1851 struct sched_param sched_param; 1852 struct thread *tdt; 1853 int error, policy; 1854 1855 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1856 if (error) 1857 return (error); 1858 1859 tdt = linux_tdfind(td, uap->pid, -1); 1860 if (tdt == NULL) 1861 return (ESRCH); 1862 1863 if (linux_map_sched_prio) { 1864 error = kern_sched_getscheduler(td, tdt, &policy); 1865 if (error) 1866 goto out; 1867 1868 switch (policy) { 1869 case SCHED_OTHER: 1870 if (sched_param.sched_priority != 0) { 1871 error = EINVAL; 1872 goto out; 1873 } 1874 sched_param.sched_priority = 1875 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1876 break; 1877 case SCHED_FIFO: 1878 case SCHED_RR: 1879 if (sched_param.sched_priority < 1 || 1880 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1881 error = EINVAL; 1882 goto out; 1883 } 1884 /* 1885 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1886 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1887 */ 1888 sched_param.sched_priority = 1889 (sched_param.sched_priority - 1) * 1890 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1891 (LINUX_MAX_RT_PRIO - 1); 1892 break; 1893 } 1894 } 1895 1896 error = kern_sched_setparam(td, tdt, &sched_param); 1897 out: PROC_UNLOCK(tdt->td_proc); 1898 return (error); 1899 } 1900 1901 int 1902 linux_sched_getparam(struct thread *td, 1903 struct linux_sched_getparam_args *uap) 1904 { 1905 struct sched_param sched_param; 1906 struct thread *tdt; 1907 int error, policy; 1908 1909 tdt = linux_tdfind(td, uap->pid, -1); 1910 if (tdt == NULL) 1911 return (ESRCH); 1912 1913 error = kern_sched_getparam(td, tdt, &sched_param); 1914 if (error) { 1915 PROC_UNLOCK(tdt->td_proc); 1916 return (error); 1917 } 1918 1919 if (linux_map_sched_prio) { 1920 error = kern_sched_getscheduler(td, tdt, &policy); 1921 PROC_UNLOCK(tdt->td_proc); 1922 if (error) 1923 return (error); 1924 1925 switch (policy) { 1926 case SCHED_OTHER: 1927 sched_param.sched_priority = 0; 1928 break; 1929 case SCHED_FIFO: 1930 case SCHED_RR: 1931 /* 1932 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1933 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1934 */ 1935 sched_param.sched_priority = 1936 (sched_param.sched_priority * 1937 (LINUX_MAX_RT_PRIO - 1) + 1938 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1939 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1940 break; 1941 } 1942 } else 1943 PROC_UNLOCK(tdt->td_proc); 1944 1945 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1946 return (error); 1947 } 1948 1949 /* 1950 * Get affinity of a process. 1951 */ 1952 int 1953 linux_sched_getaffinity(struct thread *td, 1954 struct linux_sched_getaffinity_args *args) 1955 { 1956 struct thread *tdt; 1957 cpuset_t *mask; 1958 size_t size; 1959 int error; 1960 id_t tid; 1961 1962 tdt = linux_tdfind(td, args->pid, -1); 1963 if (tdt == NULL) 1964 return (ESRCH); 1965 tid = tdt->td_tid; 1966 PROC_UNLOCK(tdt->td_proc); 1967 1968 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1969 size = min(args->len, sizeof(cpuset_t)); 1970 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1971 tid, size, mask); 1972 if (error == ERANGE) 1973 error = EINVAL; 1974 if (error == 0) 1975 error = copyout(mask, args->user_mask_ptr, size); 1976 if (error == 0) 1977 td->td_retval[0] = size; 1978 free(mask, M_LINUX); 1979 return (error); 1980 } 1981 1982 /* 1983 * Set affinity of a process. 1984 */ 1985 int 1986 linux_sched_setaffinity(struct thread *td, 1987 struct linux_sched_setaffinity_args *args) 1988 { 1989 struct thread *tdt; 1990 cpuset_t *mask; 1991 int cpu, error; 1992 size_t len; 1993 id_t tid; 1994 1995 tdt = linux_tdfind(td, args->pid, -1); 1996 if (tdt == NULL) 1997 return (ESRCH); 1998 tid = tdt->td_tid; 1999 PROC_UNLOCK(tdt->td_proc); 2000 2001 len = min(args->len, sizeof(cpuset_t)); 2002 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 2003 error = copyin(args->user_mask_ptr, mask, len); 2004 if (error != 0) 2005 goto out; 2006 /* Linux ignore high bits */ 2007 CPU_FOREACH_ISSET(cpu, mask) 2008 if (cpu > mp_maxid) 2009 CPU_CLR(cpu, mask); 2010 2011 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2012 tid, mask); 2013 if (error == EDEADLK) 2014 error = EINVAL; 2015 out: 2016 free(mask, M_TEMP); 2017 return (error); 2018 } 2019 2020 struct linux_rlimit64 { 2021 uint64_t rlim_cur; 2022 uint64_t rlim_max; 2023 }; 2024 2025 int 2026 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2027 { 2028 struct rlimit rlim, nrlim; 2029 struct linux_rlimit64 lrlim; 2030 struct proc *p; 2031 u_int which; 2032 int flags; 2033 int error; 2034 2035 if (args->new == NULL && args->old != NULL) { 2036 if (linux_get_dummy_limit(td, args->resource, &rlim)) { 2037 lrlim.rlim_cur = rlim.rlim_cur; 2038 lrlim.rlim_max = rlim.rlim_max; 2039 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2040 } 2041 } 2042 2043 if (args->resource >= LINUX_RLIM_NLIMITS) 2044 return (EINVAL); 2045 2046 which = linux_to_bsd_resource[args->resource]; 2047 if (which == -1) 2048 return (EINVAL); 2049 2050 if (args->new != NULL) { 2051 /* 2052 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2053 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2054 * as INFINITY so we do not need a conversion even. 2055 */ 2056 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2057 if (error != 0) 2058 return (error); 2059 } 2060 2061 flags = PGET_HOLD | PGET_NOTWEXIT; 2062 if (args->new != NULL) 2063 flags |= PGET_CANDEBUG; 2064 else 2065 flags |= PGET_CANSEE; 2066 if (args->pid == 0) { 2067 p = td->td_proc; 2068 PHOLD(p); 2069 } else { 2070 error = pget(args->pid, flags, &p); 2071 if (error != 0) 2072 return (error); 2073 } 2074 if (args->old != NULL) { 2075 PROC_LOCK(p); 2076 lim_rlimit_proc(p, which, &rlim); 2077 PROC_UNLOCK(p); 2078 if (rlim.rlim_cur == RLIM_INFINITY) 2079 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2080 else 2081 lrlim.rlim_cur = rlim.rlim_cur; 2082 if (rlim.rlim_max == RLIM_INFINITY) 2083 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2084 else 2085 lrlim.rlim_max = rlim.rlim_max; 2086 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2087 if (error != 0) 2088 goto out; 2089 } 2090 2091 if (args->new != NULL) 2092 error = kern_proc_setrlimit(td, p, which, &nrlim); 2093 2094 out: 2095 PRELE(p); 2096 return (error); 2097 } 2098 2099 int 2100 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2101 { 2102 struct timespec ts, *tsp; 2103 int error; 2104 2105 if (args->tsp != NULL) { 2106 error = linux_get_timespec(&ts, args->tsp); 2107 if (error != 0) 2108 return (error); 2109 tsp = &ts; 2110 } else 2111 tsp = NULL; 2112 2113 error = linux_common_pselect6(td, args->nfds, args->readfds, 2114 args->writefds, args->exceptfds, tsp, args->sig); 2115 2116 if (args->tsp != NULL) 2117 linux_put_timespec(&ts, args->tsp); 2118 return (error); 2119 } 2120 2121 static int 2122 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2123 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2124 l_uintptr_t *sig) 2125 { 2126 struct timeval utv, tv0, tv1, *tvp; 2127 struct l_pselect6arg lpse6; 2128 sigset_t *ssp; 2129 sigset_t ss; 2130 int error; 2131 2132 ssp = NULL; 2133 if (sig != NULL) { 2134 error = copyin(sig, &lpse6, sizeof(lpse6)); 2135 if (error != 0) 2136 return (error); 2137 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2138 lpse6.ss_len, &ss, &ssp); 2139 if (error != 0) 2140 return (error); 2141 } else 2142 ssp = NULL; 2143 2144 /* 2145 * Currently glibc changes nanosecond number to microsecond. 2146 * This mean losing precision but for now it is hardly seen. 2147 */ 2148 if (tsp != NULL) { 2149 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2150 if (itimerfix(&utv)) 2151 return (EINVAL); 2152 2153 microtime(&tv0); 2154 tvp = &utv; 2155 } else 2156 tvp = NULL; 2157 2158 error = kern_pselect(td, nfds, readfds, writefds, 2159 exceptfds, tvp, ssp, LINUX_NFDBITS); 2160 2161 if (tsp != NULL) { 2162 /* 2163 * Compute how much time was left of the timeout, 2164 * by subtracting the current time and the time 2165 * before we started the call, and subtracting 2166 * that result from the user-supplied value. 2167 */ 2168 microtime(&tv1); 2169 timevalsub(&tv1, &tv0); 2170 timevalsub(&utv, &tv1); 2171 if (utv.tv_sec < 0) 2172 timevalclear(&utv); 2173 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2174 } 2175 return (error); 2176 } 2177 2178 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2179 int 2180 linux_pselect6_time64(struct thread *td, 2181 struct linux_pselect6_time64_args *args) 2182 { 2183 struct timespec ts, *tsp; 2184 int error; 2185 2186 if (args->tsp != NULL) { 2187 error = linux_get_timespec64(&ts, args->tsp); 2188 if (error != 0) 2189 return (error); 2190 tsp = &ts; 2191 } else 2192 tsp = NULL; 2193 2194 error = linux_common_pselect6(td, args->nfds, args->readfds, 2195 args->writefds, args->exceptfds, tsp, args->sig); 2196 2197 if (args->tsp != NULL) 2198 linux_put_timespec64(&ts, args->tsp); 2199 return (error); 2200 } 2201 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2202 2203 int 2204 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2205 { 2206 struct timespec uts, *tsp; 2207 int error; 2208 2209 if (args->tsp != NULL) { 2210 error = linux_get_timespec(&uts, args->tsp); 2211 if (error != 0) 2212 return (error); 2213 tsp = &uts; 2214 } else 2215 tsp = NULL; 2216 2217 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2218 args->sset, args->ssize); 2219 if (error == 0 && args->tsp != NULL) 2220 error = linux_put_timespec(&uts, args->tsp); 2221 return (error); 2222 } 2223 2224 static int 2225 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2226 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2227 { 2228 struct timespec ts0, ts1; 2229 struct pollfd stackfds[32]; 2230 struct pollfd *kfds; 2231 sigset_t *ssp; 2232 sigset_t ss; 2233 int error; 2234 2235 if (kern_poll_maxfds(nfds)) 2236 return (EINVAL); 2237 if (sset != NULL) { 2238 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2239 if (error != 0) 2240 return (error); 2241 } else 2242 ssp = NULL; 2243 if (tsp != NULL) 2244 nanotime(&ts0); 2245 2246 if (nfds > nitems(stackfds)) 2247 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2248 else 2249 kfds = stackfds; 2250 error = linux_pollin(td, kfds, fds, nfds); 2251 if (error != 0) 2252 goto out; 2253 2254 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2255 if (error == 0) 2256 error = linux_pollout(td, kfds, fds, nfds); 2257 2258 if (error == 0 && tsp != NULL) { 2259 if (td->td_retval[0]) { 2260 nanotime(&ts1); 2261 timespecsub(&ts1, &ts0, &ts1); 2262 timespecsub(tsp, &ts1, tsp); 2263 if (tsp->tv_sec < 0) 2264 timespecclear(tsp); 2265 } else 2266 timespecclear(tsp); 2267 } 2268 2269 out: 2270 if (nfds > nitems(stackfds)) 2271 free(kfds, M_TEMP); 2272 return (error); 2273 } 2274 2275 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2276 int 2277 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2278 { 2279 struct timespec uts, *tsp; 2280 int error; 2281 2282 if (args->tsp != NULL) { 2283 error = linux_get_timespec64(&uts, args->tsp); 2284 if (error != 0) 2285 return (error); 2286 tsp = &uts; 2287 } else 2288 tsp = NULL; 2289 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2290 args->sset, args->ssize); 2291 if (error == 0 && args->tsp != NULL) 2292 error = linux_put_timespec64(&uts, args->tsp); 2293 return (error); 2294 } 2295 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2296 2297 static int 2298 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2299 { 2300 int error; 2301 u_int i; 2302 2303 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2304 if (error != 0) 2305 return (error); 2306 2307 for (i = 0; i < nfd; i++) { 2308 if (fds->events != 0) 2309 linux_to_bsd_poll_events(td, fds->fd, 2310 fds->events, &fds->events); 2311 fds++; 2312 } 2313 return (0); 2314 } 2315 2316 static int 2317 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2318 { 2319 int error = 0; 2320 u_int i, n = 0; 2321 2322 for (i = 0; i < nfd; i++) { 2323 if (fds->revents != 0) { 2324 bsd_to_linux_poll_events(fds->revents, 2325 &fds->revents); 2326 n++; 2327 } 2328 error = copyout(&fds->revents, &ufds->revents, 2329 sizeof(ufds->revents)); 2330 if (error) 2331 return (error); 2332 fds++; 2333 ufds++; 2334 } 2335 td->td_retval[0] = n; 2336 return (0); 2337 } 2338 2339 static int 2340 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2341 struct timespec *ts) 2342 { 2343 struct thread *tdt; 2344 int error; 2345 2346 /* 2347 * According to man in case the invalid pid specified 2348 * EINVAL should be returned. 2349 */ 2350 if (pid < 0) 2351 return (EINVAL); 2352 2353 tdt = linux_tdfind(td, pid, -1); 2354 if (tdt == NULL) 2355 return (ESRCH); 2356 2357 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2358 PROC_UNLOCK(tdt->td_proc); 2359 return (error); 2360 } 2361 2362 int 2363 linux_sched_rr_get_interval(struct thread *td, 2364 struct linux_sched_rr_get_interval_args *uap) 2365 { 2366 struct timespec ts; 2367 int error; 2368 2369 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2370 if (error != 0) 2371 return (error); 2372 return (linux_put_timespec(&ts, uap->interval)); 2373 } 2374 2375 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2376 int 2377 linux_sched_rr_get_interval_time64(struct thread *td, 2378 struct linux_sched_rr_get_interval_time64_args *uap) 2379 { 2380 struct timespec ts; 2381 int error; 2382 2383 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2384 if (error != 0) 2385 return (error); 2386 return (linux_put_timespec64(&ts, uap->interval)); 2387 } 2388 #endif 2389 2390 /* 2391 * In case when the Linux thread is the initial thread in 2392 * the thread group thread id is equal to the process id. 2393 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2394 */ 2395 struct thread * 2396 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2397 { 2398 struct linux_emuldata *em; 2399 struct thread *tdt; 2400 struct proc *p; 2401 2402 tdt = NULL; 2403 if (tid == 0 || tid == td->td_tid) { 2404 if (pid != -1 && td->td_proc->p_pid != pid) 2405 return (NULL); 2406 PROC_LOCK(td->td_proc); 2407 return (td); 2408 } else if (tid > PID_MAX) 2409 return (tdfind(tid, pid)); 2410 2411 /* 2412 * Initial thread where the tid equal to the pid. 2413 */ 2414 p = pfind(tid); 2415 if (p != NULL) { 2416 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2417 (pid != -1 && tid != pid)) { 2418 /* 2419 * p is not a Linuxulator process. 2420 */ 2421 PROC_UNLOCK(p); 2422 return (NULL); 2423 } 2424 FOREACH_THREAD_IN_PROC(p, tdt) { 2425 em = em_find(tdt); 2426 if (tid == em->em_tid) 2427 return (tdt); 2428 } 2429 PROC_UNLOCK(p); 2430 } 2431 return (NULL); 2432 } 2433 2434 void 2435 linux_to_bsd_waitopts(int options, int *bsdopts) 2436 { 2437 2438 if (options & LINUX_WNOHANG) 2439 *bsdopts |= WNOHANG; 2440 if (options & LINUX_WUNTRACED) 2441 *bsdopts |= WUNTRACED; 2442 if (options & LINUX_WEXITED) 2443 *bsdopts |= WEXITED; 2444 if (options & LINUX_WCONTINUED) 2445 *bsdopts |= WCONTINUED; 2446 if (options & LINUX_WNOWAIT) 2447 *bsdopts |= WNOWAIT; 2448 2449 if (options & __WCLONE) 2450 *bsdopts |= WLINUXCLONE; 2451 } 2452 2453 int 2454 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2455 { 2456 struct uio uio; 2457 struct iovec iov; 2458 int error; 2459 2460 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2461 return (EINVAL); 2462 if (args->count > INT_MAX) 2463 args->count = INT_MAX; 2464 2465 iov.iov_base = args->buf; 2466 iov.iov_len = args->count; 2467 2468 uio.uio_iov = &iov; 2469 uio.uio_iovcnt = 1; 2470 uio.uio_resid = iov.iov_len; 2471 uio.uio_segflg = UIO_USERSPACE; 2472 uio.uio_rw = UIO_READ; 2473 uio.uio_td = td; 2474 2475 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2476 if (error == 0) 2477 td->td_retval[0] = args->count - uio.uio_resid; 2478 return (error); 2479 } 2480 2481 int 2482 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2483 { 2484 2485 /* Needs to be page-aligned */ 2486 if (args->start & PAGE_MASK) 2487 return (EINVAL); 2488 return (kern_mincore(td, args->start, args->len, args->vec)); 2489 } 2490 2491 #define SYSLOG_TAG "<6>" 2492 2493 int 2494 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2495 { 2496 char buf[128], *src, *dst; 2497 u_int seq; 2498 int buflen, error; 2499 2500 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2501 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2502 return (EINVAL); 2503 } 2504 2505 if (args->len < 6) { 2506 td->td_retval[0] = 0; 2507 return (0); 2508 } 2509 2510 error = priv_check(td, PRIV_MSGBUF); 2511 if (error) 2512 return (error); 2513 2514 mtx_lock(&msgbuf_lock); 2515 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2516 mtx_unlock(&msgbuf_lock); 2517 2518 dst = args->buf; 2519 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2520 /* The -1 is to skip the trailing '\0'. */ 2521 dst += sizeof(SYSLOG_TAG) - 1; 2522 2523 while (error == 0) { 2524 mtx_lock(&msgbuf_lock); 2525 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2526 mtx_unlock(&msgbuf_lock); 2527 2528 if (buflen == 0) 2529 break; 2530 2531 for (src = buf; src < buf + buflen && error == 0; src++) { 2532 if (*src == '\0') 2533 continue; 2534 2535 if (dst >= args->buf + args->len) 2536 goto out; 2537 2538 error = copyout(src, dst, 1); 2539 dst++; 2540 2541 if (*src == '\n' && *(src + 1) != '<' && 2542 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2543 error = copyout(&SYSLOG_TAG, 2544 dst, sizeof(SYSLOG_TAG)); 2545 dst += sizeof(SYSLOG_TAG) - 1; 2546 } 2547 } 2548 } 2549 out: 2550 td->td_retval[0] = dst - args->buf; 2551 return (error); 2552 } 2553 2554 int 2555 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2556 { 2557 int cpu, error, node; 2558 2559 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2560 error = 0; 2561 node = cpuid_to_pcpu[cpu]->pc_domain; 2562 2563 if (args->cpu != NULL) 2564 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2565 if (args->node != NULL) 2566 error = copyout(&node, args->node, sizeof(l_int)); 2567 return (error); 2568 } 2569 2570 #if defined(__i386__) || defined(__amd64__) 2571 int 2572 linux_poll(struct thread *td, struct linux_poll_args *args) 2573 { 2574 struct timespec ts, *tsp; 2575 2576 if (args->timeout != INFTIM) { 2577 if (args->timeout < 0) 2578 return (EINVAL); 2579 ts.tv_sec = args->timeout / 1000; 2580 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2581 tsp = &ts; 2582 } else 2583 tsp = NULL; 2584 2585 return (linux_common_ppoll(td, args->fds, args->nfds, 2586 tsp, NULL, 0)); 2587 } 2588 #endif /* __i386__ || __amd64__ */ 2589 2590 int 2591 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2592 { 2593 2594 switch (args->op) { 2595 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2596 return (EOPNOTSUPP); 2597 default: 2598 /* 2599 * Ignore unknown operations, just like Linux kernel built 2600 * without CONFIG_SECCOMP. 2601 */ 2602 return (EINVAL); 2603 } 2604 } 2605 2606 /* 2607 * Custom version of exec_copyin_args(), to copy out argument and environment 2608 * strings from the old process address space into the temporary string buffer. 2609 * Based on freebsd32_exec_copyin_args. 2610 */ 2611 static int 2612 linux_exec_copyin_args(struct image_args *args, const char *fname, 2613 l_uintptr_t *argv, l_uintptr_t *envv) 2614 { 2615 char *argp, *envp; 2616 l_uintptr_t *ptr, arg; 2617 int error; 2618 2619 bzero(args, sizeof(*args)); 2620 if (argv == NULL) 2621 return (EFAULT); 2622 2623 /* 2624 * Allocate demand-paged memory for the file name, argument, and 2625 * environment strings. 2626 */ 2627 error = exec_alloc_args(args); 2628 if (error != 0) 2629 return (error); 2630 2631 /* 2632 * Copy the file name. 2633 */ 2634 error = exec_args_add_fname(args, fname, UIO_USERSPACE); 2635 if (error != 0) 2636 goto err_exit; 2637 2638 /* 2639 * extract arguments first 2640 */ 2641 ptr = argv; 2642 for (;;) { 2643 error = copyin(ptr++, &arg, sizeof(arg)); 2644 if (error) 2645 goto err_exit; 2646 if (arg == 0) 2647 break; 2648 argp = PTRIN(arg); 2649 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2650 if (error != 0) 2651 goto err_exit; 2652 } 2653 2654 /* 2655 * This comment is from Linux do_execveat_common: 2656 * When argv is empty, add an empty string ("") as argv[0] to 2657 * ensure confused userspace programs that start processing 2658 * from argv[1] won't end up walking envp. 2659 */ 2660 if (args->argc == 0 && 2661 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2662 goto err_exit; 2663 2664 /* 2665 * extract environment strings 2666 */ 2667 if (envv) { 2668 ptr = envv; 2669 for (;;) { 2670 error = copyin(ptr++, &arg, sizeof(arg)); 2671 if (error) 2672 goto err_exit; 2673 if (arg == 0) 2674 break; 2675 envp = PTRIN(arg); 2676 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2677 if (error != 0) 2678 goto err_exit; 2679 } 2680 } 2681 2682 return (0); 2683 2684 err_exit: 2685 exec_free_args(args); 2686 return (error); 2687 } 2688 2689 int 2690 linux_execve(struct thread *td, struct linux_execve_args *args) 2691 { 2692 struct image_args eargs; 2693 int error; 2694 2695 LINUX_CTR(execve); 2696 2697 error = linux_exec_copyin_args(&eargs, args->path, args->argp, 2698 args->envp); 2699 if (error == 0) 2700 error = linux_common_execve(td, &eargs); 2701 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2702 return (error); 2703 } 2704 2705 static void 2706 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2707 { 2708 struct rtprio rtp2; 2709 2710 pri_to_rtp(td1, &rtp2); 2711 if (rtp2.type < rtp->type || 2712 (rtp2.type == rtp->type && 2713 rtp2.prio < rtp->prio)) { 2714 rtp->type = rtp2.type; 2715 rtp->prio = rtp2.prio; 2716 } 2717 } 2718 2719 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2720 2721 static int 2722 linux_rtprio2ioprio(struct rtprio *rtp) 2723 { 2724 int ioprio, prio; 2725 2726 switch (rtp->type) { 2727 case RTP_PRIO_IDLE: 2728 prio = RTP_PRIO_MIN; 2729 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2730 break; 2731 case RTP_PRIO_NORMAL: 2732 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2733 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2734 break; 2735 case RTP_PRIO_REALTIME: 2736 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2737 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2738 break; 2739 default: 2740 prio = RTP_PRIO_MIN; 2741 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2742 break; 2743 } 2744 return (ioprio); 2745 } 2746 2747 static int 2748 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2749 { 2750 2751 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2752 case LINUX_IOPRIO_CLASS_IDLE: 2753 rtp->prio = RTP_PRIO_MIN; 2754 rtp->type = RTP_PRIO_IDLE; 2755 break; 2756 case LINUX_IOPRIO_CLASS_BE: 2757 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2758 rtp->type = RTP_PRIO_NORMAL; 2759 break; 2760 case LINUX_IOPRIO_CLASS_RT: 2761 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2762 rtp->type = RTP_PRIO_REALTIME; 2763 break; 2764 default: 2765 return (EINVAL); 2766 } 2767 return (0); 2768 } 2769 #undef LINUX_PRIO_DIVIDER 2770 2771 int 2772 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2773 { 2774 struct thread *td1; 2775 struct rtprio rtp; 2776 struct pgrp *pg; 2777 struct proc *p; 2778 int error, found; 2779 2780 p = NULL; 2781 td1 = NULL; 2782 error = 0; 2783 found = 0; 2784 rtp.type = RTP_PRIO_IDLE; 2785 rtp.prio = RTP_PRIO_MAX; 2786 switch (args->which) { 2787 case LINUX_IOPRIO_WHO_PROCESS: 2788 if (args->who == 0) { 2789 td1 = td; 2790 p = td1->td_proc; 2791 PROC_LOCK(p); 2792 } else if (args->who > PID_MAX) { 2793 td1 = linux_tdfind(td, args->who, -1); 2794 if (td1 != NULL) 2795 p = td1->td_proc; 2796 } else 2797 p = pfind(args->who); 2798 if (p == NULL) 2799 return (ESRCH); 2800 if ((error = p_cansee(td, p))) { 2801 PROC_UNLOCK(p); 2802 break; 2803 } 2804 if (td1 != NULL) { 2805 pri_to_rtp(td1, &rtp); 2806 } else { 2807 FOREACH_THREAD_IN_PROC(p, td1) { 2808 linux_up_rtprio_if(td1, &rtp); 2809 } 2810 } 2811 found++; 2812 PROC_UNLOCK(p); 2813 break; 2814 case LINUX_IOPRIO_WHO_PGRP: 2815 sx_slock(&proctree_lock); 2816 if (args->who == 0) { 2817 pg = td->td_proc->p_pgrp; 2818 PGRP_LOCK(pg); 2819 } else { 2820 pg = pgfind(args->who); 2821 if (pg == NULL) { 2822 sx_sunlock(&proctree_lock); 2823 error = ESRCH; 2824 break; 2825 } 2826 } 2827 sx_sunlock(&proctree_lock); 2828 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2829 PROC_LOCK(p); 2830 if (p->p_state == PRS_NORMAL && 2831 p_cansee(td, p) == 0) { 2832 FOREACH_THREAD_IN_PROC(p, td1) { 2833 linux_up_rtprio_if(td1, &rtp); 2834 found++; 2835 } 2836 } 2837 PROC_UNLOCK(p); 2838 } 2839 PGRP_UNLOCK(pg); 2840 break; 2841 case LINUX_IOPRIO_WHO_USER: 2842 if (args->who == 0) 2843 args->who = td->td_ucred->cr_uid; 2844 sx_slock(&allproc_lock); 2845 FOREACH_PROC_IN_SYSTEM(p) { 2846 PROC_LOCK(p); 2847 if (p->p_state == PRS_NORMAL && 2848 p->p_ucred->cr_uid == args->who && 2849 p_cansee(td, p) == 0) { 2850 FOREACH_THREAD_IN_PROC(p, td1) { 2851 linux_up_rtprio_if(td1, &rtp); 2852 found++; 2853 } 2854 } 2855 PROC_UNLOCK(p); 2856 } 2857 sx_sunlock(&allproc_lock); 2858 break; 2859 default: 2860 error = EINVAL; 2861 break; 2862 } 2863 if (error == 0) { 2864 if (found != 0) 2865 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2866 else 2867 error = ESRCH; 2868 } 2869 return (error); 2870 } 2871 2872 int 2873 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2874 { 2875 struct thread *td1; 2876 struct rtprio rtp; 2877 struct pgrp *pg; 2878 struct proc *p; 2879 int error; 2880 2881 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2882 return (error); 2883 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2884 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2885 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2886 return (error); 2887 2888 p = NULL; 2889 td1 = NULL; 2890 switch (args->which) { 2891 case LINUX_IOPRIO_WHO_PROCESS: 2892 if (args->who == 0) { 2893 td1 = td; 2894 p = td1->td_proc; 2895 PROC_LOCK(p); 2896 } else if (args->who > PID_MAX) { 2897 td1 = linux_tdfind(td, args->who, -1); 2898 if (td1 != NULL) 2899 p = td1->td_proc; 2900 } else 2901 p = pfind(args->who); 2902 if (p == NULL) 2903 return (ESRCH); 2904 if ((error = p_cansched(td, p))) { 2905 PROC_UNLOCK(p); 2906 break; 2907 } 2908 if (td1 != NULL) { 2909 error = rtp_to_pri(&rtp, td1); 2910 } else { 2911 FOREACH_THREAD_IN_PROC(p, td1) { 2912 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2913 break; 2914 } 2915 } 2916 PROC_UNLOCK(p); 2917 break; 2918 case LINUX_IOPRIO_WHO_PGRP: 2919 sx_slock(&proctree_lock); 2920 if (args->who == 0) { 2921 pg = td->td_proc->p_pgrp; 2922 PGRP_LOCK(pg); 2923 } else { 2924 pg = pgfind(args->who); 2925 if (pg == NULL) { 2926 sx_sunlock(&proctree_lock); 2927 error = ESRCH; 2928 break; 2929 } 2930 } 2931 sx_sunlock(&proctree_lock); 2932 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2933 PROC_LOCK(p); 2934 if (p->p_state == PRS_NORMAL && 2935 p_cansched(td, p) == 0) { 2936 FOREACH_THREAD_IN_PROC(p, td1) { 2937 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2938 break; 2939 } 2940 } 2941 PROC_UNLOCK(p); 2942 if (error != 0) 2943 break; 2944 } 2945 PGRP_UNLOCK(pg); 2946 break; 2947 case LINUX_IOPRIO_WHO_USER: 2948 if (args->who == 0) 2949 args->who = td->td_ucred->cr_uid; 2950 sx_slock(&allproc_lock); 2951 FOREACH_PROC_IN_SYSTEM(p) { 2952 PROC_LOCK(p); 2953 if (p->p_state == PRS_NORMAL && 2954 p->p_ucred->cr_uid == args->who && 2955 p_cansched(td, p) == 0) { 2956 FOREACH_THREAD_IN_PROC(p, td1) { 2957 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2958 break; 2959 } 2960 } 2961 PROC_UNLOCK(p); 2962 if (error != 0) 2963 break; 2964 } 2965 sx_sunlock(&allproc_lock); 2966 break; 2967 default: 2968 error = EINVAL; 2969 break; 2970 } 2971 return (error); 2972 } 2973 2974 /* The only flag is O_NONBLOCK */ 2975 #define B2L_MQ_FLAGS(bflags) ((bflags) != 0 ? LINUX_O_NONBLOCK : 0) 2976 #define L2B_MQ_FLAGS(lflags) ((lflags) != 0 ? O_NONBLOCK : 0) 2977 2978 int 2979 linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 2980 { 2981 struct mq_attr attr; 2982 int error, flags; 2983 2984 flags = linux_common_openflags(args->oflag); 2985 if ((flags & O_ACCMODE) == O_ACCMODE || (flags & O_EXEC) != 0) 2986 return (EINVAL); 2987 flags = FFLAGS(flags); 2988 if ((flags & O_CREAT) != 0 && args->attr != NULL) { 2989 error = copyin(args->attr, &attr, sizeof(attr)); 2990 if (error != 0) 2991 return (error); 2992 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 2993 } 2994 2995 return (kern_kmq_open(td, args->name, flags, args->mode, 2996 args->attr != NULL ? &attr : NULL)); 2997 } 2998 2999 int 3000 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 3001 { 3002 struct kmq_unlink_args bsd_args = { 3003 .path = PTRIN(args->name) 3004 }; 3005 3006 return (sys_kmq_unlink(td, &bsd_args)); 3007 } 3008 3009 int 3010 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 3011 { 3012 struct timespec ts, *abs_timeout; 3013 int error; 3014 3015 if (args->abs_timeout == NULL) 3016 abs_timeout = NULL; 3017 else { 3018 error = linux_get_timespec(&ts, args->abs_timeout); 3019 if (error != 0) 3020 return (error); 3021 abs_timeout = &ts; 3022 } 3023 3024 return (kern_kmq_timedsend(td, args->mqd, PTRIN(args->msg_ptr), 3025 args->msg_len, args->msg_prio, abs_timeout)); 3026 } 3027 3028 int 3029 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 3030 { 3031 struct timespec ts, *abs_timeout; 3032 int error; 3033 3034 if (args->abs_timeout == NULL) 3035 abs_timeout = NULL; 3036 else { 3037 error = linux_get_timespec(&ts, args->abs_timeout); 3038 if (error != 0) 3039 return (error); 3040 abs_timeout = &ts; 3041 } 3042 3043 return (kern_kmq_timedreceive(td, args->mqd, PTRIN(args->msg_ptr), 3044 args->msg_len, args->msg_prio, abs_timeout)); 3045 } 3046 3047 int 3048 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 3049 { 3050 struct sigevent ev, *evp; 3051 struct l_sigevent l_ev; 3052 int error; 3053 3054 if (args->sevp == NULL) 3055 evp = NULL; 3056 else { 3057 error = copyin(args->sevp, &l_ev, sizeof(l_ev)); 3058 if (error != 0) 3059 return (error); 3060 error = linux_convert_l_sigevent(&l_ev, &ev); 3061 if (error != 0) 3062 return (error); 3063 evp = &ev; 3064 } 3065 3066 return (kern_kmq_notify(td, args->mqd, evp)); 3067 } 3068 3069 int 3070 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 3071 { 3072 struct mq_attr attr, oattr; 3073 int error; 3074 3075 if (args->attr != NULL) { 3076 error = copyin(args->attr, &attr, sizeof(attr)); 3077 if (error != 0) 3078 return (error); 3079 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3080 } 3081 3082 error = kern_kmq_setattr(td, args->mqd, args->attr != NULL ? &attr : NULL, 3083 &oattr); 3084 if (error == 0 && args->oattr != NULL) { 3085 oattr.mq_flags = B2L_MQ_FLAGS(oattr.mq_flags); 3086 bzero(oattr.__reserved, sizeof(oattr.__reserved)); 3087 error = copyout(&oattr, args->oattr, sizeof(oattr)); 3088 } 3089 3090 return (error); 3091 } 3092 3093 int 3094 linux_kcmp(struct thread *td, struct linux_kcmp_args *args) 3095 { 3096 int type; 3097 3098 switch (args->type) { 3099 case LINUX_KCMP_FILE: 3100 type = KCMP_FILE; 3101 break; 3102 case LINUX_KCMP_FILES: 3103 type = KCMP_FILES; 3104 break; 3105 case LINUX_KCMP_SIGHAND: 3106 type = KCMP_SIGHAND; 3107 break; 3108 case LINUX_KCMP_VM: 3109 type = KCMP_VM; 3110 break; 3111 default: 3112 return (EINVAL); 3113 } 3114 3115 return (kern_kcmp(td, args->pid1, args->pid2, type, args->idx1, 3116 args->idx)); 3117 } 3118 3119 int 3120 linux_membarrier(struct thread *td, struct linux_membarrier_args *args) 3121 { 3122 static const struct { 3123 int linux_cmd; 3124 int freebsd_cmd; 3125 } cmds[] = { 3126 { LINUX_MEMBARRIER_CMD_QUERY, 3127 MEMBARRIER_CMD_QUERY }, 3128 { LINUX_MEMBARRIER_CMD_GLOBAL, 3129 MEMBARRIER_CMD_GLOBAL }, 3130 { LINUX_MEMBARRIER_CMD_GLOBAL_EXPEDITED, 3131 MEMBARRIER_CMD_GLOBAL_EXPEDITED }, 3132 { LINUX_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, 3133 MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED }, 3134 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED, 3135 MEMBARRIER_CMD_PRIVATE_EXPEDITED }, 3136 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 3137 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED }, 3138 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 3139 MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE }, 3140 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 3141 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE }, 3142 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 3143 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ }, 3144 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 3145 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ }, 3146 { LINUX_MEMBARRIER_CMD_GET_REGISTRATIONS, 3147 MEMBARRIER_CMD_GET_REGISTRATIONS }, 3148 }; 3149 int cmd, error, flags, i, mask; 3150 3151 cmd = -1; 3152 for (i = 0; i < nitems(cmds); i++) { 3153 if (args->cmd == cmds[i].linux_cmd) { 3154 cmd = cmds[i].freebsd_cmd; 3155 break; 3156 } 3157 } 3158 3159 if (cmd == -1 || (args->flags & ~LINUX_MEMBARRIER_CMD_FLAG_CPU) != 0) 3160 return (EINVAL); 3161 3162 flags = 0; 3163 if ((args->flags & LINUX_MEMBARRIER_CMD_FLAG_CPU) != 0) 3164 flags |= MEMBARRIER_CMD_FLAG_CPU; 3165 3166 error = kern_membarrier(td, cmd, flags, args->cpu_id); 3167 if (error != 0) 3168 return (error); 3169 3170 if (args->cmd == LINUX_MEMBARRIER_CMD_QUERY || 3171 args->cmd == LINUX_MEMBARRIER_CMD_GET_REGISTRATIONS) { 3172 mask = td->td_retval[0]; 3173 td->td_retval[0] = 0; 3174 for (i = 0; i < nitems(cmds); i++) 3175 if ((mask & cmds[i].freebsd_cmd) != 0) 3176 td->td_retval[0] |= cmds[i].linux_cmd; 3177 } 3178 3179 return (0); 3180 } 3181 3182 MODULE_DEPEND(linux, mqueuefs, 1, 1, 1); 3183