1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/membarrier.h> 39 #include <sys/msgbuf.h> 40 #include <sys/mqueue.h> 41 #include <sys/mutex.h> 42 #include <sys/poll.h> 43 #include <sys/priv.h> 44 #include <sys/proc.h> 45 #include <sys/procctl.h> 46 #include <sys/reboot.h> 47 #include <sys/random.h> 48 #include <sys/resourcevar.h> 49 #include <sys/rtprio.h> 50 #include <sys/sched.h> 51 #include <sys/smp.h> 52 #include <sys/stat.h> 53 #include <sys/syscallsubr.h> 54 #include <sys/sysctl.h> 55 #include <sys/sysent.h> 56 #include <sys/sysproto.h> 57 #include <sys/time.h> 58 #include <sys/unistd.h> 59 #include <sys/vmmeter.h> 60 #include <sys/vnode.h> 61 62 #include <security/audit/audit.h> 63 #include <security/mac/mac_framework.h> 64 65 #include <vm/pmap.h> 66 #include <vm/vm_map.h> 67 #include <vm/swap_pager.h> 68 69 #ifdef COMPAT_LINUX32 70 #include <machine/../linux32/linux.h> 71 #include <machine/../linux32/linux32_proto.h> 72 #else 73 #include <machine/../linux/linux.h> 74 #include <machine/../linux/linux_proto.h> 75 #endif 76 77 #include <compat/linux/linux_common.h> 78 #include <compat/linux/linux_dtrace.h> 79 #include <compat/linux/linux_file.h> 80 #include <compat/linux/linux_mib.h> 81 #include <compat/linux/linux_mmap.h> 82 #include <compat/linux/linux_signal.h> 83 #include <compat/linux/linux_time.h> 84 #include <compat/linux/linux_util.h> 85 #include <compat/linux/linux_emul.h> 86 #include <compat/linux/linux_misc.h> 87 88 int stclohz; /* Statistics clock frequency */ 89 90 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 91 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 92 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 93 RLIMIT_MEMLOCK, RLIMIT_AS 94 }; 95 96 struct l_sysinfo { 97 l_long uptime; /* Seconds since boot */ 98 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 99 #define LINUX_SYSINFO_LOADS_SCALE 65536 100 l_ulong totalram; /* Total usable main memory size */ 101 l_ulong freeram; /* Available memory size */ 102 l_ulong sharedram; /* Amount of shared memory */ 103 l_ulong bufferram; /* Memory used by buffers */ 104 l_ulong totalswap; /* Total swap space size */ 105 l_ulong freeswap; /* swap space still available */ 106 l_ushort procs; /* Number of current processes */ 107 l_ushort pads; 108 l_ulong totalhigh; 109 l_ulong freehigh; 110 l_uint mem_unit; 111 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 112 }; 113 114 struct l_pselect6arg { 115 l_uintptr_t ss; 116 l_size_t ss_len; 117 }; 118 119 static int linux_utimensat_lts_to_ts(struct l_timespec *, 120 struct timespec *); 121 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 122 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 123 struct timespec *); 124 #endif 125 static int linux_common_utimensat(struct thread *, int, 126 const char *, struct timespec *, int); 127 static int linux_common_pselect6(struct thread *, l_int, 128 l_fd_set *, l_fd_set *, l_fd_set *, 129 struct timespec *, l_uintptr_t *); 130 static int linux_common_ppoll(struct thread *, struct pollfd *, 131 uint32_t, struct timespec *, l_sigset_t *, 132 l_size_t); 133 static int linux_pollin(struct thread *, struct pollfd *, 134 struct pollfd *, u_int); 135 static int linux_pollout(struct thread *, struct pollfd *, 136 struct pollfd *, u_int); 137 138 int 139 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 140 { 141 struct l_sysinfo sysinfo; 142 int i, j; 143 struct timespec ts; 144 145 bzero(&sysinfo, sizeof(sysinfo)); 146 getnanouptime(&ts); 147 if (ts.tv_nsec != 0) 148 ts.tv_sec++; 149 sysinfo.uptime = ts.tv_sec; 150 151 /* Use the information from the mib to get our load averages */ 152 for (i = 0; i < 3; i++) 153 sysinfo.loads[i] = averunnable.ldavg[i] * 154 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 155 156 sysinfo.totalram = physmem * PAGE_SIZE; 157 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 158 159 /* 160 * sharedram counts pages allocated to named, swap-backed objects such 161 * as shared memory segments and tmpfs files. There is no cheap way to 162 * compute this, so just leave the field unpopulated. Linux itself only 163 * started setting this field in the 3.x timeframe. 164 */ 165 sysinfo.sharedram = 0; 166 sysinfo.bufferram = 0; 167 168 swap_pager_status(&i, &j); 169 sysinfo.totalswap = i * PAGE_SIZE; 170 sysinfo.freeswap = (i - j) * PAGE_SIZE; 171 172 sysinfo.procs = nprocs; 173 174 /* 175 * Platforms supported by the emulation layer do not have a notion of 176 * high memory. 177 */ 178 sysinfo.totalhigh = 0; 179 sysinfo.freehigh = 0; 180 181 sysinfo.mem_unit = 1; 182 183 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 184 } 185 186 #ifdef LINUX_LEGACY_SYSCALLS 187 int 188 linux_alarm(struct thread *td, struct linux_alarm_args *args) 189 { 190 struct itimerval it, old_it; 191 u_int secs; 192 int error __diagused; 193 194 secs = args->secs; 195 /* 196 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 197 * to match kern_setitimer()'s limit to avoid error from it. 198 * 199 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 200 * platforms. 201 */ 202 if (secs > INT32_MAX / 2) 203 secs = INT32_MAX / 2; 204 205 it.it_value.tv_sec = secs; 206 it.it_value.tv_usec = 0; 207 timevalclear(&it.it_interval); 208 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 209 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 210 211 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 212 old_it.it_value.tv_usec >= 500000) 213 old_it.it_value.tv_sec++; 214 td->td_retval[0] = old_it.it_value.tv_sec; 215 return (0); 216 } 217 #endif 218 219 int 220 linux_brk(struct thread *td, struct linux_brk_args *args) 221 { 222 struct vmspace *vm = td->td_proc->p_vmspace; 223 uintptr_t new, old; 224 225 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 226 new = (uintptr_t)args->dsend; 227 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 228 td->td_retval[0] = (register_t)new; 229 else 230 td->td_retval[0] = (register_t)old; 231 232 return (0); 233 } 234 235 #ifdef LINUX_LEGACY_SYSCALLS 236 int 237 linux_select(struct thread *td, struct linux_select_args *args) 238 { 239 l_timeval ltv; 240 struct timeval tv0, tv1, utv, *tvp; 241 int error; 242 243 /* 244 * Store current time for computation of the amount of 245 * time left. 246 */ 247 if (args->timeout) { 248 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 249 goto select_out; 250 utv.tv_sec = ltv.tv_sec; 251 utv.tv_usec = ltv.tv_usec; 252 253 if (itimerfix(&utv)) { 254 /* 255 * The timeval was invalid. Convert it to something 256 * valid that will act as it does under Linux. 257 */ 258 utv.tv_sec += utv.tv_usec / 1000000; 259 utv.tv_usec %= 1000000; 260 if (utv.tv_usec < 0) { 261 utv.tv_sec -= 1; 262 utv.tv_usec += 1000000; 263 } 264 if (utv.tv_sec < 0) 265 timevalclear(&utv); 266 } 267 microtime(&tv0); 268 tvp = &utv; 269 } else 270 tvp = NULL; 271 272 error = kern_select(td, args->nfds, args->readfds, args->writefds, 273 args->exceptfds, tvp, LINUX_NFDBITS); 274 if (error) 275 goto select_out; 276 277 if (args->timeout) { 278 if (td->td_retval[0]) { 279 /* 280 * Compute how much time was left of the timeout, 281 * by subtracting the current time and the time 282 * before we started the call, and subtracting 283 * that result from the user-supplied value. 284 */ 285 microtime(&tv1); 286 timevalsub(&tv1, &tv0); 287 timevalsub(&utv, &tv1); 288 if (utv.tv_sec < 0) 289 timevalclear(&utv); 290 } else 291 timevalclear(&utv); 292 ltv.tv_sec = utv.tv_sec; 293 ltv.tv_usec = utv.tv_usec; 294 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 295 goto select_out; 296 } 297 298 select_out: 299 return (error); 300 } 301 #endif 302 303 int 304 linux_mremap(struct thread *td, struct linux_mremap_args *args) 305 { 306 uintptr_t addr; 307 size_t len; 308 int error = 0; 309 310 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 311 td->td_retval[0] = 0; 312 return (EINVAL); 313 } 314 315 /* 316 * Check for the page alignment. 317 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 318 */ 319 if (args->addr & PAGE_MASK) { 320 td->td_retval[0] = 0; 321 return (EINVAL); 322 } 323 324 args->new_len = round_page(args->new_len); 325 args->old_len = round_page(args->old_len); 326 327 if (args->new_len > args->old_len) { 328 td->td_retval[0] = 0; 329 return (ENOMEM); 330 } 331 332 if (args->new_len < args->old_len) { 333 addr = args->addr + args->new_len; 334 len = args->old_len - args->new_len; 335 error = kern_munmap(td, addr, len); 336 } 337 338 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 339 return (error); 340 } 341 342 #define LINUX_MS_ASYNC 0x0001 343 #define LINUX_MS_INVALIDATE 0x0002 344 #define LINUX_MS_SYNC 0x0004 345 346 int 347 linux_msync(struct thread *td, struct linux_msync_args *args) 348 { 349 350 return (kern_msync(td, args->addr, args->len, 351 args->fl & ~LINUX_MS_SYNC)); 352 } 353 354 int 355 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 356 { 357 358 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 359 uap->prot)); 360 } 361 362 int 363 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 364 { 365 366 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 367 uap->behav)); 368 } 369 370 int 371 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 372 { 373 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 374 /* 375 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 376 * implemented with mmap2 syscall and the offset is represented in 377 * multiples of page size. 378 */ 379 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 380 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 381 #else 382 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 383 uap->flags, uap->fd, uap->pgoff)); 384 #endif 385 } 386 387 #ifdef LINUX_LEGACY_SYSCALLS 388 int 389 linux_time(struct thread *td, struct linux_time_args *args) 390 { 391 struct timeval tv; 392 l_time_t tm; 393 int error; 394 395 microtime(&tv); 396 tm = tv.tv_sec; 397 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 398 return (error); 399 td->td_retval[0] = tm; 400 return (0); 401 } 402 #endif 403 404 struct l_times_argv { 405 l_clock_t tms_utime; 406 l_clock_t tms_stime; 407 l_clock_t tms_cutime; 408 l_clock_t tms_cstime; 409 }; 410 411 /* 412 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 413 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 414 * auxiliary vector entry. 415 */ 416 #define CLK_TCK 100 417 418 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 419 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 420 421 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 422 CONVNTCK(r) : CONVOTCK(r)) 423 424 int 425 linux_times(struct thread *td, struct linux_times_args *args) 426 { 427 struct timeval tv, utime, stime, cutime, cstime; 428 struct l_times_argv tms; 429 struct proc *p; 430 int error; 431 432 if (args->buf != NULL) { 433 p = td->td_proc; 434 PROC_LOCK(p); 435 PROC_STATLOCK(p); 436 calcru(p, &utime, &stime); 437 PROC_STATUNLOCK(p); 438 calccru(p, &cutime, &cstime); 439 PROC_UNLOCK(p); 440 441 tms.tms_utime = CONVTCK(utime); 442 tms.tms_stime = CONVTCK(stime); 443 444 tms.tms_cutime = CONVTCK(cutime); 445 tms.tms_cstime = CONVTCK(cstime); 446 447 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 448 return (error); 449 } 450 451 microuptime(&tv); 452 td->td_retval[0] = (int)CONVTCK(tv); 453 return (0); 454 } 455 456 int 457 linux_newuname(struct thread *td, struct linux_newuname_args *args) 458 { 459 struct l_new_utsname utsname; 460 char osname[LINUX_MAX_UTSNAME]; 461 char osrelease[LINUX_MAX_UTSNAME]; 462 char *p; 463 464 linux_get_osname(td, osname); 465 linux_get_osrelease(td, osrelease); 466 467 bzero(&utsname, sizeof(utsname)); 468 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 469 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 470 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 471 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 472 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 473 for (p = utsname.version; *p != '\0'; ++p) 474 if (*p == '\n') { 475 *p = '\0'; 476 break; 477 } 478 #if defined(__amd64__) 479 /* 480 * On amd64, Linux uname(2) needs to return "x86_64" 481 * for both 64-bit and 32-bit applications. On 32-bit, 482 * the string returned by getauxval(AT_PLATFORM) needs 483 * to remain "i686", though. 484 */ 485 #if defined(COMPAT_LINUX32) 486 if (linux32_emulate_i386) 487 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 488 else 489 #endif 490 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 491 #elif defined(__aarch64__) 492 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 493 #elif defined(__i386__) 494 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 495 #endif 496 497 return (copyout(&utsname, args->buf, sizeof(utsname))); 498 } 499 500 struct l_utimbuf { 501 l_time_t l_actime; 502 l_time_t l_modtime; 503 }; 504 505 #ifdef LINUX_LEGACY_SYSCALLS 506 int 507 linux_utime(struct thread *td, struct linux_utime_args *args) 508 { 509 struct timeval tv[2], *tvp; 510 struct l_utimbuf lut; 511 int error; 512 513 if (args->times) { 514 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 515 return (error); 516 tv[0].tv_sec = lut.l_actime; 517 tv[0].tv_usec = 0; 518 tv[1].tv_sec = lut.l_modtime; 519 tv[1].tv_usec = 0; 520 tvp = tv; 521 } else 522 tvp = NULL; 523 524 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 525 tvp, UIO_SYSSPACE)); 526 } 527 #endif 528 529 #ifdef LINUX_LEGACY_SYSCALLS 530 int 531 linux_utimes(struct thread *td, struct linux_utimes_args *args) 532 { 533 l_timeval ltv[2]; 534 struct timeval tv[2], *tvp = NULL; 535 int error; 536 537 if (args->tptr != NULL) { 538 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 539 return (error); 540 tv[0].tv_sec = ltv[0].tv_sec; 541 tv[0].tv_usec = ltv[0].tv_usec; 542 tv[1].tv_sec = ltv[1].tv_sec; 543 tv[1].tv_usec = ltv[1].tv_usec; 544 tvp = tv; 545 } 546 547 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 548 tvp, UIO_SYSSPACE)); 549 } 550 #endif 551 552 static int 553 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 554 { 555 556 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 557 l_times->tv_nsec != LINUX_UTIME_NOW && 558 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 559 return (EINVAL); 560 561 times->tv_sec = l_times->tv_sec; 562 switch (l_times->tv_nsec) 563 { 564 case LINUX_UTIME_OMIT: 565 times->tv_nsec = UTIME_OMIT; 566 break; 567 case LINUX_UTIME_NOW: 568 times->tv_nsec = UTIME_NOW; 569 break; 570 default: 571 times->tv_nsec = l_times->tv_nsec; 572 } 573 574 return (0); 575 } 576 577 static int 578 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 579 struct timespec *timesp, int lflags) 580 { 581 int dfd, flags = 0; 582 583 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 584 585 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 586 return (EINVAL); 587 588 if (timesp != NULL) { 589 /* This breaks POSIX, but is what the Linux kernel does 590 * _on purpose_ (documented in the man page for utimensat(2)), 591 * so we must follow that behaviour. */ 592 if (timesp[0].tv_nsec == UTIME_OMIT && 593 timesp[1].tv_nsec == UTIME_OMIT) 594 return (0); 595 } 596 597 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 598 flags |= AT_SYMLINK_NOFOLLOW; 599 if (lflags & LINUX_AT_EMPTY_PATH) 600 flags |= AT_EMPTY_PATH; 601 602 if (pathname != NULL) 603 return (kern_utimensat(td, dfd, pathname, 604 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 605 606 if (lflags != 0) 607 return (EINVAL); 608 609 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 610 } 611 612 int 613 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 614 { 615 struct l_timespec l_times[2]; 616 struct timespec times[2], *timesp; 617 int error; 618 619 if (args->times != NULL) { 620 error = copyin(args->times, l_times, sizeof(l_times)); 621 if (error != 0) 622 return (error); 623 624 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 625 if (error != 0) 626 return (error); 627 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 628 if (error != 0) 629 return (error); 630 timesp = times; 631 } else 632 timesp = NULL; 633 634 return (linux_common_utimensat(td, args->dfd, args->pathname, 635 timesp, args->flags)); 636 } 637 638 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 639 static int 640 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 641 { 642 643 /* Zero out the padding in compat mode. */ 644 l_times->tv_nsec &= 0xFFFFFFFFUL; 645 646 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 647 l_times->tv_nsec != LINUX_UTIME_NOW && 648 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 649 return (EINVAL); 650 651 times->tv_sec = l_times->tv_sec; 652 switch (l_times->tv_nsec) 653 { 654 case LINUX_UTIME_OMIT: 655 times->tv_nsec = UTIME_OMIT; 656 break; 657 case LINUX_UTIME_NOW: 658 times->tv_nsec = UTIME_NOW; 659 break; 660 default: 661 times->tv_nsec = l_times->tv_nsec; 662 } 663 664 return (0); 665 } 666 667 int 668 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 669 { 670 struct l_timespec64 l_times[2]; 671 struct timespec times[2], *timesp; 672 int error; 673 674 if (args->times64 != NULL) { 675 error = copyin(args->times64, l_times, sizeof(l_times)); 676 if (error != 0) 677 return (error); 678 679 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 680 if (error != 0) 681 return (error); 682 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 683 if (error != 0) 684 return (error); 685 timesp = times; 686 } else 687 timesp = NULL; 688 689 return (linux_common_utimensat(td, args->dfd, args->pathname, 690 timesp, args->flags)); 691 } 692 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 693 694 #ifdef LINUX_LEGACY_SYSCALLS 695 int 696 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 697 { 698 l_timeval ltv[2]; 699 struct timeval tv[2], *tvp = NULL; 700 int error, dfd; 701 702 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 703 704 if (args->utimes != NULL) { 705 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 706 return (error); 707 tv[0].tv_sec = ltv[0].tv_sec; 708 tv[0].tv_usec = ltv[0].tv_usec; 709 tv[1].tv_sec = ltv[1].tv_sec; 710 tv[1].tv_usec = ltv[1].tv_usec; 711 tvp = tv; 712 } 713 714 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 715 tvp, UIO_SYSSPACE)); 716 } 717 #endif 718 719 static int 720 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 721 int options, void *rup, l_siginfo_t *infop) 722 { 723 l_siginfo_t lsi; 724 siginfo_t siginfo; 725 struct __wrusage wru; 726 int error, status, tmpstat, sig; 727 728 error = kern_wait6(td, idtype, id, &status, options, 729 rup != NULL ? &wru : NULL, &siginfo); 730 731 if (error == 0 && statusp) { 732 tmpstat = status & 0xffff; 733 if (WIFSIGNALED(tmpstat)) { 734 tmpstat = (tmpstat & 0xffffff80) | 735 bsd_to_linux_signal(WTERMSIG(tmpstat)); 736 } else if (WIFSTOPPED(tmpstat)) { 737 tmpstat = (tmpstat & 0xffff00ff) | 738 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 739 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 740 if (WSTOPSIG(status) == SIGTRAP) { 741 tmpstat = linux_ptrace_status(td, 742 siginfo.si_pid, tmpstat); 743 } 744 #endif 745 } else if (WIFCONTINUED(tmpstat)) { 746 tmpstat = 0xffff; 747 } 748 error = copyout(&tmpstat, statusp, sizeof(int)); 749 } 750 if (error == 0 && rup != NULL) 751 error = linux_copyout_rusage(&wru.wru_self, rup); 752 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 753 sig = bsd_to_linux_signal(siginfo.si_signo); 754 memset(&lsi, 0, sizeof(lsi)); 755 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 756 error = copyout(&lsi, infop, sizeof(lsi)); 757 } 758 759 return (error); 760 } 761 762 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 763 int 764 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 765 { 766 struct linux_wait4_args wait4_args = { 767 .pid = args->pid, 768 .status = args->status, 769 .options = args->options, 770 .rusage = NULL, 771 }; 772 773 return (linux_wait4(td, &wait4_args)); 774 } 775 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 776 777 int 778 linux_wait4(struct thread *td, struct linux_wait4_args *args) 779 { 780 struct proc *p; 781 int options, id, idtype; 782 783 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 784 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 785 return (EINVAL); 786 787 /* -INT_MIN is not defined. */ 788 if (args->pid == INT_MIN) 789 return (ESRCH); 790 791 options = 0; 792 linux_to_bsd_waitopts(args->options, &options); 793 794 /* 795 * For backward compatibility we implicitly add flags WEXITED 796 * and WTRAPPED here. 797 */ 798 options |= WEXITED | WTRAPPED; 799 800 if (args->pid == WAIT_ANY) { 801 idtype = P_ALL; 802 id = 0; 803 } else if (args->pid < 0) { 804 idtype = P_PGID; 805 id = (id_t)-args->pid; 806 } else if (args->pid == 0) { 807 idtype = P_PGID; 808 p = td->td_proc; 809 PROC_LOCK(p); 810 id = p->p_pgid; 811 PROC_UNLOCK(p); 812 } else { 813 idtype = P_PID; 814 id = (id_t)args->pid; 815 } 816 817 return (linux_common_wait(td, idtype, id, args->status, options, 818 args->rusage, NULL)); 819 } 820 821 int 822 linux_waitid(struct thread *td, struct linux_waitid_args *args) 823 { 824 idtype_t idtype; 825 int error, options; 826 struct proc *p; 827 pid_t id; 828 829 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 830 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 831 return (EINVAL); 832 833 options = 0; 834 linux_to_bsd_waitopts(args->options, &options); 835 836 id = args->id; 837 switch (args->idtype) { 838 case LINUX_P_ALL: 839 idtype = P_ALL; 840 break; 841 case LINUX_P_PID: 842 if (args->id <= 0) 843 return (EINVAL); 844 idtype = P_PID; 845 break; 846 case LINUX_P_PGID: 847 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 848 p = td->td_proc; 849 PROC_LOCK(p); 850 id = p->p_pgid; 851 PROC_UNLOCK(p); 852 } else if (args->id <= 0) 853 return (EINVAL); 854 idtype = P_PGID; 855 break; 856 case LINUX_P_PIDFD: 857 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 858 return (ENOSYS); 859 default: 860 return (EINVAL); 861 } 862 863 error = linux_common_wait(td, idtype, id, NULL, options, 864 args->rusage, args->info); 865 td->td_retval[0] = 0; 866 867 return (error); 868 } 869 870 #ifdef LINUX_LEGACY_SYSCALLS 871 int 872 linux_mknod(struct thread *td, struct linux_mknod_args *args) 873 { 874 int error; 875 876 switch (args->mode & S_IFMT) { 877 case S_IFIFO: 878 case S_IFSOCK: 879 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 880 args->mode); 881 break; 882 883 case S_IFCHR: 884 case S_IFBLK: 885 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 886 args->mode, linux_decode_dev(args->dev)); 887 break; 888 889 case S_IFDIR: 890 error = EPERM; 891 break; 892 893 case 0: 894 args->mode |= S_IFREG; 895 /* FALLTHROUGH */ 896 case S_IFREG: 897 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 898 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 899 if (error == 0) 900 kern_close(td, td->td_retval[0]); 901 break; 902 903 default: 904 error = EINVAL; 905 break; 906 } 907 return (error); 908 } 909 #endif 910 911 int 912 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 913 { 914 int error, dfd; 915 916 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 917 918 switch (args->mode & S_IFMT) { 919 case S_IFIFO: 920 case S_IFSOCK: 921 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 922 args->mode); 923 break; 924 925 case S_IFCHR: 926 case S_IFBLK: 927 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 928 args->mode, linux_decode_dev(args->dev)); 929 break; 930 931 case S_IFDIR: 932 error = EPERM; 933 break; 934 935 case 0: 936 args->mode |= S_IFREG; 937 /* FALLTHROUGH */ 938 case S_IFREG: 939 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 940 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 941 if (error == 0) 942 kern_close(td, td->td_retval[0]); 943 break; 944 945 default: 946 error = EINVAL; 947 break; 948 } 949 return (error); 950 } 951 952 /* 953 * UGH! This is just about the dumbest idea I've ever heard!! 954 */ 955 int 956 linux_personality(struct thread *td, struct linux_personality_args *args) 957 { 958 struct linux_pemuldata *pem; 959 struct proc *p = td->td_proc; 960 uint32_t old; 961 962 PROC_LOCK(p); 963 pem = pem_find(p); 964 old = pem->persona; 965 if (args->per != 0xffffffff) 966 pem->persona = args->per; 967 PROC_UNLOCK(p); 968 969 td->td_retval[0] = old; 970 return (0); 971 } 972 973 struct l_itimerval { 974 l_timeval it_interval; 975 l_timeval it_value; 976 }; 977 978 #define B2L_ITIMERVAL(bip, lip) \ 979 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 980 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 981 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 982 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 983 984 int 985 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 986 { 987 int error; 988 struct l_itimerval ls; 989 struct itimerval aitv, oitv; 990 991 if (uap->itv == NULL) { 992 uap->itv = uap->oitv; 993 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 994 } 995 996 error = copyin(uap->itv, &ls, sizeof(ls)); 997 if (error != 0) 998 return (error); 999 B2L_ITIMERVAL(&aitv, &ls); 1000 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1001 if (error != 0 || uap->oitv == NULL) 1002 return (error); 1003 B2L_ITIMERVAL(&ls, &oitv); 1004 1005 return (copyout(&ls, uap->oitv, sizeof(ls))); 1006 } 1007 1008 int 1009 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1010 { 1011 int error; 1012 struct l_itimerval ls; 1013 struct itimerval aitv; 1014 1015 error = kern_getitimer(td, uap->which, &aitv); 1016 if (error != 0) 1017 return (error); 1018 B2L_ITIMERVAL(&ls, &aitv); 1019 return (copyout(&ls, uap->itv, sizeof(ls))); 1020 } 1021 1022 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1023 int 1024 linux_nice(struct thread *td, struct linux_nice_args *args) 1025 { 1026 1027 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1028 } 1029 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1030 1031 int 1032 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1033 { 1034 const int ngrp = args->gidsetsize; 1035 struct ucred *newcred, *oldcred; 1036 l_gid_t *linux_gidset; 1037 int error; 1038 struct proc *p; 1039 1040 if (ngrp < 0 || ngrp > ngroups_max) 1041 return (EINVAL); 1042 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1043 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1044 if (error) 1045 goto out; 1046 1047 newcred = crget(); 1048 crextend(newcred, ngrp); 1049 p = td->td_proc; 1050 PROC_LOCK(p); 1051 oldcred = crcopysafe(p, newcred); 1052 1053 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1054 PROC_UNLOCK(p); 1055 crfree(newcred); 1056 goto out; 1057 } 1058 1059 newcred->cr_ngroups = ngrp; 1060 for (int i = 0; i < ngrp; i++) 1061 newcred->cr_groups[i] = linux_gidset[i]; 1062 newcred->cr_flags |= CRED_FLAG_GROUPSET; 1063 1064 setsugid(p); 1065 proc_set_cred(p, newcred); 1066 PROC_UNLOCK(p); 1067 crfree(oldcred); 1068 error = 0; 1069 out: 1070 free(linux_gidset, M_LINUX); 1071 return (error); 1072 } 1073 1074 int 1075 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1076 { 1077 const struct ucred *const cred = td->td_ucred; 1078 l_gid_t *linux_gidset; 1079 int ngrp, error; 1080 1081 ngrp = args->gidsetsize; 1082 1083 if (ngrp == 0) { 1084 td->td_retval[0] = cred->cr_ngroups; 1085 return (0); 1086 } 1087 if (ngrp < cred->cr_ngroups) 1088 return (EINVAL); 1089 1090 ngrp = cred->cr_ngroups; 1091 1092 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1093 for (int i = 0; i < ngrp; ++i) 1094 linux_gidset[i] = cred->cr_groups[i]; 1095 1096 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1097 free(linux_gidset, M_LINUX); 1098 1099 if (error != 0) 1100 return (error); 1101 1102 td->td_retval[0] = ngrp; 1103 return (0); 1104 } 1105 1106 static bool 1107 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) 1108 { 1109 ssize_t size; 1110 int res, error; 1111 1112 if (linux_dummy_rlimits == 0) 1113 return (false); 1114 1115 switch (resource) { 1116 case LINUX_RLIMIT_LOCKS: 1117 case LINUX_RLIMIT_RTTIME: 1118 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1119 rlim->rlim_max = LINUX_RLIM_INFINITY; 1120 return (true); 1121 case LINUX_RLIMIT_NICE: 1122 case LINUX_RLIMIT_RTPRIO: 1123 rlim->rlim_cur = 0; 1124 rlim->rlim_max = 0; 1125 return (true); 1126 case LINUX_RLIMIT_SIGPENDING: 1127 error = kernel_sysctlbyname(td, 1128 "kern.sigqueue.max_pending_per_proc", 1129 &res, &size, 0, 0, 0, 0); 1130 if (error != 0) 1131 return (false); 1132 rlim->rlim_cur = res; 1133 rlim->rlim_max = res; 1134 return (true); 1135 case LINUX_RLIMIT_MSGQUEUE: 1136 error = kernel_sysctlbyname(td, 1137 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); 1138 if (error != 0) 1139 return (false); 1140 rlim->rlim_cur = res; 1141 rlim->rlim_max = res; 1142 return (true); 1143 default: 1144 return (false); 1145 } 1146 } 1147 1148 int 1149 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1150 { 1151 struct rlimit bsd_rlim; 1152 struct l_rlimit rlim; 1153 u_int which; 1154 int error; 1155 1156 if (args->resource >= LINUX_RLIM_NLIMITS) 1157 return (EINVAL); 1158 1159 which = linux_to_bsd_resource[args->resource]; 1160 if (which == -1) 1161 return (EINVAL); 1162 1163 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1164 if (error) 1165 return (error); 1166 1167 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1168 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1169 return (kern_setrlimit(td, which, &bsd_rlim)); 1170 } 1171 1172 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1173 int 1174 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1175 { 1176 struct l_rlimit rlim; 1177 struct rlimit bsd_rlim; 1178 u_int which; 1179 1180 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1181 rlim.rlim_cur = bsd_rlim.rlim_cur; 1182 rlim.rlim_max = bsd_rlim.rlim_max; 1183 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1184 } 1185 1186 if (args->resource >= LINUX_RLIM_NLIMITS) 1187 return (EINVAL); 1188 1189 which = linux_to_bsd_resource[args->resource]; 1190 if (which == -1) 1191 return (EINVAL); 1192 1193 lim_rlimit(td, which, &bsd_rlim); 1194 1195 #ifdef COMPAT_LINUX32 1196 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1197 if (rlim.rlim_cur == UINT_MAX) 1198 rlim.rlim_cur = INT_MAX; 1199 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1200 if (rlim.rlim_max == UINT_MAX) 1201 rlim.rlim_max = INT_MAX; 1202 #else 1203 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1204 if (rlim.rlim_cur == ULONG_MAX) 1205 rlim.rlim_cur = LONG_MAX; 1206 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1207 if (rlim.rlim_max == ULONG_MAX) 1208 rlim.rlim_max = LONG_MAX; 1209 #endif 1210 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1211 } 1212 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1213 1214 int 1215 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1216 { 1217 struct l_rlimit rlim; 1218 struct rlimit bsd_rlim; 1219 u_int which; 1220 1221 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1222 rlim.rlim_cur = bsd_rlim.rlim_cur; 1223 rlim.rlim_max = bsd_rlim.rlim_max; 1224 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1225 } 1226 1227 if (args->resource >= LINUX_RLIM_NLIMITS) 1228 return (EINVAL); 1229 1230 which = linux_to_bsd_resource[args->resource]; 1231 if (which == -1) 1232 return (EINVAL); 1233 1234 lim_rlimit(td, which, &bsd_rlim); 1235 1236 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1237 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1238 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1239 } 1240 1241 int 1242 linux_sched_setscheduler(struct thread *td, 1243 struct linux_sched_setscheduler_args *args) 1244 { 1245 struct sched_param sched_param; 1246 struct thread *tdt; 1247 int error, policy; 1248 1249 switch (args->policy) { 1250 case LINUX_SCHED_OTHER: 1251 policy = SCHED_OTHER; 1252 break; 1253 case LINUX_SCHED_FIFO: 1254 policy = SCHED_FIFO; 1255 break; 1256 case LINUX_SCHED_RR: 1257 policy = SCHED_RR; 1258 break; 1259 default: 1260 return (EINVAL); 1261 } 1262 1263 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1264 if (error) 1265 return (error); 1266 1267 if (linux_map_sched_prio) { 1268 switch (policy) { 1269 case SCHED_OTHER: 1270 if (sched_param.sched_priority != 0) 1271 return (EINVAL); 1272 1273 sched_param.sched_priority = 1274 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1275 break; 1276 case SCHED_FIFO: 1277 case SCHED_RR: 1278 if (sched_param.sched_priority < 1 || 1279 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1280 return (EINVAL); 1281 1282 /* 1283 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1284 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1285 */ 1286 sched_param.sched_priority = 1287 (sched_param.sched_priority - 1) * 1288 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1289 (LINUX_MAX_RT_PRIO - 1); 1290 break; 1291 } 1292 } 1293 1294 tdt = linux_tdfind(td, args->pid, -1); 1295 if (tdt == NULL) 1296 return (ESRCH); 1297 1298 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1299 PROC_UNLOCK(tdt->td_proc); 1300 return (error); 1301 } 1302 1303 int 1304 linux_sched_getscheduler(struct thread *td, 1305 struct linux_sched_getscheduler_args *args) 1306 { 1307 struct thread *tdt; 1308 int error, policy; 1309 1310 tdt = linux_tdfind(td, args->pid, -1); 1311 if (tdt == NULL) 1312 return (ESRCH); 1313 1314 error = kern_sched_getscheduler(td, tdt, &policy); 1315 PROC_UNLOCK(tdt->td_proc); 1316 1317 switch (policy) { 1318 case SCHED_OTHER: 1319 td->td_retval[0] = LINUX_SCHED_OTHER; 1320 break; 1321 case SCHED_FIFO: 1322 td->td_retval[0] = LINUX_SCHED_FIFO; 1323 break; 1324 case SCHED_RR: 1325 td->td_retval[0] = LINUX_SCHED_RR; 1326 break; 1327 } 1328 return (error); 1329 } 1330 1331 int 1332 linux_sched_get_priority_max(struct thread *td, 1333 struct linux_sched_get_priority_max_args *args) 1334 { 1335 struct sched_get_priority_max_args bsd; 1336 1337 if (linux_map_sched_prio) { 1338 switch (args->policy) { 1339 case LINUX_SCHED_OTHER: 1340 td->td_retval[0] = 0; 1341 return (0); 1342 case LINUX_SCHED_FIFO: 1343 case LINUX_SCHED_RR: 1344 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1345 return (0); 1346 default: 1347 return (EINVAL); 1348 } 1349 } 1350 1351 switch (args->policy) { 1352 case LINUX_SCHED_OTHER: 1353 bsd.policy = SCHED_OTHER; 1354 break; 1355 case LINUX_SCHED_FIFO: 1356 bsd.policy = SCHED_FIFO; 1357 break; 1358 case LINUX_SCHED_RR: 1359 bsd.policy = SCHED_RR; 1360 break; 1361 default: 1362 return (EINVAL); 1363 } 1364 return (sys_sched_get_priority_max(td, &bsd)); 1365 } 1366 1367 int 1368 linux_sched_get_priority_min(struct thread *td, 1369 struct linux_sched_get_priority_min_args *args) 1370 { 1371 struct sched_get_priority_min_args bsd; 1372 1373 if (linux_map_sched_prio) { 1374 switch (args->policy) { 1375 case LINUX_SCHED_OTHER: 1376 td->td_retval[0] = 0; 1377 return (0); 1378 case LINUX_SCHED_FIFO: 1379 case LINUX_SCHED_RR: 1380 td->td_retval[0] = 1; 1381 return (0); 1382 default: 1383 return (EINVAL); 1384 } 1385 } 1386 1387 switch (args->policy) { 1388 case LINUX_SCHED_OTHER: 1389 bsd.policy = SCHED_OTHER; 1390 break; 1391 case LINUX_SCHED_FIFO: 1392 bsd.policy = SCHED_FIFO; 1393 break; 1394 case LINUX_SCHED_RR: 1395 bsd.policy = SCHED_RR; 1396 break; 1397 default: 1398 return (EINVAL); 1399 } 1400 return (sys_sched_get_priority_min(td, &bsd)); 1401 } 1402 1403 #define REBOOT_CAD_ON 0x89abcdef 1404 #define REBOOT_CAD_OFF 0 1405 #define REBOOT_HALT 0xcdef0123 1406 #define REBOOT_RESTART 0x01234567 1407 #define REBOOT_RESTART2 0xA1B2C3D4 1408 #define REBOOT_POWEROFF 0x4321FEDC 1409 #define REBOOT_MAGIC1 0xfee1dead 1410 #define REBOOT_MAGIC2 0x28121969 1411 #define REBOOT_MAGIC2A 0x05121996 1412 #define REBOOT_MAGIC2B 0x16041998 1413 1414 int 1415 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1416 { 1417 struct reboot_args bsd_args; 1418 1419 if (args->magic1 != REBOOT_MAGIC1) 1420 return (EINVAL); 1421 1422 switch (args->magic2) { 1423 case REBOOT_MAGIC2: 1424 case REBOOT_MAGIC2A: 1425 case REBOOT_MAGIC2B: 1426 break; 1427 default: 1428 return (EINVAL); 1429 } 1430 1431 switch (args->cmd) { 1432 case REBOOT_CAD_ON: 1433 case REBOOT_CAD_OFF: 1434 return (priv_check(td, PRIV_REBOOT)); 1435 case REBOOT_HALT: 1436 bsd_args.opt = RB_HALT; 1437 break; 1438 case REBOOT_RESTART: 1439 case REBOOT_RESTART2: 1440 bsd_args.opt = 0; 1441 break; 1442 case REBOOT_POWEROFF: 1443 bsd_args.opt = RB_POWEROFF; 1444 break; 1445 default: 1446 return (EINVAL); 1447 } 1448 return (sys_reboot(td, &bsd_args)); 1449 } 1450 1451 int 1452 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1453 { 1454 1455 td->td_retval[0] = td->td_proc->p_pid; 1456 1457 return (0); 1458 } 1459 1460 int 1461 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1462 { 1463 struct linux_emuldata *em; 1464 1465 em = em_find(td); 1466 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1467 1468 td->td_retval[0] = em->em_tid; 1469 1470 return (0); 1471 } 1472 1473 int 1474 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1475 { 1476 1477 td->td_retval[0] = kern_getppid(td); 1478 return (0); 1479 } 1480 1481 int 1482 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1483 { 1484 1485 td->td_retval[0] = td->td_ucred->cr_rgid; 1486 return (0); 1487 } 1488 1489 int 1490 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1491 { 1492 1493 td->td_retval[0] = td->td_ucred->cr_ruid; 1494 return (0); 1495 } 1496 1497 int 1498 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1499 { 1500 1501 return (kern_getsid(td, args->pid)); 1502 } 1503 1504 int 1505 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1506 { 1507 int error; 1508 1509 error = kern_getpriority(td, args->which, args->who); 1510 td->td_retval[0] = 20 - td->td_retval[0]; 1511 return (error); 1512 } 1513 1514 int 1515 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1516 { 1517 int name[2]; 1518 1519 name[0] = CTL_KERN; 1520 name[1] = KERN_HOSTNAME; 1521 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1522 args->len, 0, 0)); 1523 } 1524 1525 int 1526 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1527 { 1528 int name[2]; 1529 1530 name[0] = CTL_KERN; 1531 name[1] = KERN_NISDOMAINNAME; 1532 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1533 args->len, 0, 0)); 1534 } 1535 1536 int 1537 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1538 { 1539 1540 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1541 args->error_code); 1542 1543 /* 1544 * XXX: we should send a signal to the parent if 1545 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1546 * as it doesnt occur often. 1547 */ 1548 kern_exit(td, args->error_code, 0); 1549 return (0); 1550 } 1551 1552 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1553 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1554 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1555 1556 struct l_user_cap_header { 1557 l_int version; 1558 l_int pid; 1559 }; 1560 1561 struct l_user_cap_data { 1562 l_int effective; 1563 l_int permitted; 1564 l_int inheritable; 1565 }; 1566 1567 int 1568 linux_capget(struct thread *td, struct linux_capget_args *uap) 1569 { 1570 struct l_user_cap_header luch; 1571 struct l_user_cap_data lucd[2]; 1572 int error, u32s; 1573 1574 if (uap->hdrp == NULL) 1575 return (EFAULT); 1576 1577 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1578 if (error != 0) 1579 return (error); 1580 1581 switch (luch.version) { 1582 case _LINUX_CAPABILITY_VERSION_1: 1583 u32s = 1; 1584 break; 1585 case _LINUX_CAPABILITY_VERSION_2: 1586 case _LINUX_CAPABILITY_VERSION_3: 1587 u32s = 2; 1588 break; 1589 default: 1590 luch.version = _LINUX_CAPABILITY_VERSION_1; 1591 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1592 if (error) 1593 return (error); 1594 return (EINVAL); 1595 } 1596 1597 if (luch.pid) 1598 return (EPERM); 1599 1600 if (uap->datap) { 1601 /* 1602 * The current implementation doesn't support setting 1603 * a capability (it's essentially a stub) so indicate 1604 * that no capabilities are currently set or available 1605 * to request. 1606 */ 1607 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1608 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1609 } 1610 1611 return (error); 1612 } 1613 1614 int 1615 linux_capset(struct thread *td, struct linux_capset_args *uap) 1616 { 1617 struct l_user_cap_header luch; 1618 struct l_user_cap_data lucd[2]; 1619 int error, i, u32s; 1620 1621 if (uap->hdrp == NULL || uap->datap == NULL) 1622 return (EFAULT); 1623 1624 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1625 if (error != 0) 1626 return (error); 1627 1628 switch (luch.version) { 1629 case _LINUX_CAPABILITY_VERSION_1: 1630 u32s = 1; 1631 break; 1632 case _LINUX_CAPABILITY_VERSION_2: 1633 case _LINUX_CAPABILITY_VERSION_3: 1634 u32s = 2; 1635 break; 1636 default: 1637 luch.version = _LINUX_CAPABILITY_VERSION_1; 1638 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1639 if (error) 1640 return (error); 1641 return (EINVAL); 1642 } 1643 1644 if (luch.pid) 1645 return (EPERM); 1646 1647 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1648 if (error != 0) 1649 return (error); 1650 1651 /* We currently don't support setting any capabilities. */ 1652 for (i = 0; i < u32s; i++) { 1653 if (lucd[i].effective || lucd[i].permitted || 1654 lucd[i].inheritable) { 1655 linux_msg(td, 1656 "capset[%d] effective=0x%x, permitted=0x%x, " 1657 "inheritable=0x%x is not implemented", i, 1658 (int)lucd[i].effective, (int)lucd[i].permitted, 1659 (int)lucd[i].inheritable); 1660 return (EPERM); 1661 } 1662 } 1663 1664 return (0); 1665 } 1666 1667 int 1668 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1669 { 1670 int error = 0, max_size, arg; 1671 struct proc *p = td->td_proc; 1672 char comm[LINUX_MAX_COMM_LEN]; 1673 int pdeath_signal, trace_state; 1674 1675 switch (args->option) { 1676 case LINUX_PR_SET_PDEATHSIG: 1677 if (!LINUX_SIG_VALID(args->arg2)) 1678 return (EINVAL); 1679 pdeath_signal = linux_to_bsd_signal(args->arg2); 1680 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1681 &pdeath_signal)); 1682 case LINUX_PR_GET_PDEATHSIG: 1683 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1684 &pdeath_signal); 1685 if (error != 0) 1686 return (error); 1687 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1688 return (copyout(&pdeath_signal, 1689 (void *)(register_t)args->arg2, 1690 sizeof(pdeath_signal))); 1691 /* 1692 * In Linux, this flag controls if set[gu]id processes can coredump. 1693 * There are additional semantics imposed on processes that cannot 1694 * coredump: 1695 * - Such processes can not be ptraced. 1696 * - There are some semantics around ownership of process-related files 1697 * in the /proc namespace. 1698 * 1699 * In FreeBSD, we can (and by default, do) disable setuid coredump 1700 * system-wide with 'sugid_coredump.' We control tracability on a 1701 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1702 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1703 * procctl is roughly analogous to Linux's DUMPABLE. 1704 * 1705 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1706 */ 1707 case LINUX_PR_GET_DUMPABLE: 1708 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1709 &trace_state); 1710 if (error != 0) 1711 return (error); 1712 td->td_retval[0] = (trace_state != -1); 1713 return (0); 1714 case LINUX_PR_SET_DUMPABLE: 1715 /* 1716 * It is only valid for userspace to set one of these two 1717 * flags, and only one at a time. 1718 */ 1719 switch (args->arg2) { 1720 case LINUX_SUID_DUMP_DISABLE: 1721 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1722 break; 1723 case LINUX_SUID_DUMP_USER: 1724 trace_state = PROC_TRACE_CTL_ENABLE; 1725 break; 1726 default: 1727 return (EINVAL); 1728 } 1729 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1730 &trace_state)); 1731 case LINUX_PR_GET_KEEPCAPS: 1732 /* 1733 * Indicate that we always clear the effective and 1734 * permitted capability sets when the user id becomes 1735 * non-zero (actually the capability sets are simply 1736 * always zero in the current implementation). 1737 */ 1738 td->td_retval[0] = 0; 1739 break; 1740 case LINUX_PR_SET_KEEPCAPS: 1741 /* 1742 * Ignore requests to keep the effective and permitted 1743 * capability sets when the user id becomes non-zero. 1744 */ 1745 break; 1746 case LINUX_PR_SET_NAME: 1747 /* 1748 * To be on the safe side we need to make sure to not 1749 * overflow the size a Linux program expects. We already 1750 * do this here in the copyin, so that we don't need to 1751 * check on copyout. 1752 */ 1753 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1754 error = copyinstr((void *)(register_t)args->arg2, comm, 1755 max_size, NULL); 1756 1757 /* Linux silently truncates the name if it is too long. */ 1758 if (error == ENAMETOOLONG) { 1759 /* 1760 * XXX: copyinstr() isn't documented to populate the 1761 * array completely, so do a copyin() to be on the 1762 * safe side. This should be changed in case 1763 * copyinstr() is changed to guarantee this. 1764 */ 1765 error = copyin((void *)(register_t)args->arg2, comm, 1766 max_size - 1); 1767 comm[max_size - 1] = '\0'; 1768 } 1769 if (error) 1770 return (error); 1771 1772 PROC_LOCK(p); 1773 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1774 PROC_UNLOCK(p); 1775 break; 1776 case LINUX_PR_GET_NAME: 1777 PROC_LOCK(p); 1778 strlcpy(comm, p->p_comm, sizeof(comm)); 1779 PROC_UNLOCK(p); 1780 error = copyout(comm, (void *)(register_t)args->arg2, 1781 strlen(comm) + 1); 1782 break; 1783 case LINUX_PR_GET_SECCOMP: 1784 case LINUX_PR_SET_SECCOMP: 1785 /* 1786 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1787 */ 1788 error = EINVAL; 1789 break; 1790 case LINUX_PR_CAPBSET_READ: 1791 #if 0 1792 /* 1793 * This makes too much noise with Ubuntu Focal. 1794 */ 1795 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1796 (int)args->arg2); 1797 #endif 1798 error = EINVAL; 1799 break; 1800 case LINUX_PR_SET_CHILD_SUBREAPER: 1801 if (args->arg2 == 0) { 1802 return (kern_procctl(td, P_PID, 0, PROC_REAP_RELEASE, 1803 NULL)); 1804 } 1805 1806 return (kern_procctl(td, P_PID, 0, PROC_REAP_ACQUIRE, 1807 NULL)); 1808 case LINUX_PR_GET_CHILD_SUBREAPER: { 1809 struct procctl_reaper_status rs; 1810 l_int val; 1811 1812 error = kern_procctl(td, P_PID, 0, PROC_REAP_STATUS, &rs); 1813 if (error != 0) 1814 return (error); 1815 val = rs.rs_reaper == p->p_pid ? 1 : 0; 1816 error = copyout(&val, (void *)(register_t)args->arg2, 1817 sizeof(val)); 1818 break; 1819 } 1820 case LINUX_PR_SET_NO_NEW_PRIVS: 1821 arg = args->arg2 == 1 ? 1822 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1823 error = kern_procctl(td, P_PID, p->p_pid, 1824 PROC_NO_NEW_PRIVS_CTL, &arg); 1825 break; 1826 case LINUX_PR_GET_NO_NEW_PRIVS: 1827 error = kern_procctl(td, P_PID, p->p_pid, 1828 PROC_NO_NEW_PRIVS_STATUS, &arg); 1829 if (error != 0) 1830 return (error); 1831 /* Linux returns the value as the syscall return */ 1832 td->td_retval[0] = arg == PROC_NO_NEW_PRIVS_ENABLE ? 1 : 0; 1833 break; 1834 case LINUX_PR_SET_PTRACER: 1835 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1836 error = EINVAL; 1837 break; 1838 case LINUX_PR_SET_VMA: 1839 if (args->arg2 != LINUX_PR_SET_VMA_ANON_NAME) { 1840 linux_msg(td, "unsupported prctl PR_SET_VMA attr %ju", 1841 (uintmax_t)args->arg2); 1842 error = EINVAL; 1843 } 1844 break; 1845 default: 1846 linux_msg(td, "unsupported prctl option %d", args->option); 1847 error = EINVAL; 1848 break; 1849 } 1850 1851 return (error); 1852 } 1853 1854 int 1855 linux_sched_setparam(struct thread *td, 1856 struct linux_sched_setparam_args *uap) 1857 { 1858 struct sched_param sched_param; 1859 struct thread *tdt; 1860 int error, policy; 1861 1862 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1863 if (error) 1864 return (error); 1865 1866 tdt = linux_tdfind(td, uap->pid, -1); 1867 if (tdt == NULL) 1868 return (ESRCH); 1869 1870 if (linux_map_sched_prio) { 1871 error = kern_sched_getscheduler(td, tdt, &policy); 1872 if (error) 1873 goto out; 1874 1875 switch (policy) { 1876 case SCHED_OTHER: 1877 if (sched_param.sched_priority != 0) { 1878 error = EINVAL; 1879 goto out; 1880 } 1881 sched_param.sched_priority = 1882 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1883 break; 1884 case SCHED_FIFO: 1885 case SCHED_RR: 1886 if (sched_param.sched_priority < 1 || 1887 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1888 error = EINVAL; 1889 goto out; 1890 } 1891 /* 1892 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1893 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1894 */ 1895 sched_param.sched_priority = 1896 (sched_param.sched_priority - 1) * 1897 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1898 (LINUX_MAX_RT_PRIO - 1); 1899 break; 1900 } 1901 } 1902 1903 error = kern_sched_setparam(td, tdt, &sched_param); 1904 out: PROC_UNLOCK(tdt->td_proc); 1905 return (error); 1906 } 1907 1908 int 1909 linux_sched_getparam(struct thread *td, 1910 struct linux_sched_getparam_args *uap) 1911 { 1912 struct sched_param sched_param; 1913 struct thread *tdt; 1914 int error, policy; 1915 1916 tdt = linux_tdfind(td, uap->pid, -1); 1917 if (tdt == NULL) 1918 return (ESRCH); 1919 1920 error = kern_sched_getparam(td, tdt, &sched_param); 1921 if (error) { 1922 PROC_UNLOCK(tdt->td_proc); 1923 return (error); 1924 } 1925 1926 if (linux_map_sched_prio) { 1927 error = kern_sched_getscheduler(td, tdt, &policy); 1928 PROC_UNLOCK(tdt->td_proc); 1929 if (error) 1930 return (error); 1931 1932 switch (policy) { 1933 case SCHED_OTHER: 1934 sched_param.sched_priority = 0; 1935 break; 1936 case SCHED_FIFO: 1937 case SCHED_RR: 1938 /* 1939 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1940 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1941 */ 1942 sched_param.sched_priority = 1943 (sched_param.sched_priority * 1944 (LINUX_MAX_RT_PRIO - 1) + 1945 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1946 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1947 break; 1948 } 1949 } else 1950 PROC_UNLOCK(tdt->td_proc); 1951 1952 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1953 return (error); 1954 } 1955 1956 /* 1957 * Get affinity of a process. 1958 */ 1959 int 1960 linux_sched_getaffinity(struct thread *td, 1961 struct linux_sched_getaffinity_args *args) 1962 { 1963 struct thread *tdt; 1964 cpuset_t *mask; 1965 size_t size; 1966 int error; 1967 id_t tid; 1968 1969 tdt = linux_tdfind(td, args->pid, -1); 1970 if (tdt == NULL) 1971 return (ESRCH); 1972 tid = tdt->td_tid; 1973 PROC_UNLOCK(tdt->td_proc); 1974 1975 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1976 size = min(args->len, sizeof(cpuset_t)); 1977 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1978 tid, size, mask); 1979 if (error == ERANGE) 1980 error = EINVAL; 1981 if (error == 0) 1982 error = copyout(mask, args->user_mask_ptr, size); 1983 if (error == 0) 1984 td->td_retval[0] = size; 1985 free(mask, M_LINUX); 1986 return (error); 1987 } 1988 1989 /* 1990 * Set affinity of a process. 1991 */ 1992 int 1993 linux_sched_setaffinity(struct thread *td, 1994 struct linux_sched_setaffinity_args *args) 1995 { 1996 struct thread *tdt; 1997 cpuset_t *mask; 1998 int cpu, error; 1999 size_t len; 2000 id_t tid; 2001 2002 tdt = linux_tdfind(td, args->pid, -1); 2003 if (tdt == NULL) 2004 return (ESRCH); 2005 tid = tdt->td_tid; 2006 PROC_UNLOCK(tdt->td_proc); 2007 2008 len = min(args->len, sizeof(cpuset_t)); 2009 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 2010 error = copyin(args->user_mask_ptr, mask, len); 2011 if (error != 0) 2012 goto out; 2013 /* Linux ignore high bits */ 2014 CPU_FOREACH_ISSET(cpu, mask) 2015 if (cpu > mp_maxid) 2016 CPU_CLR(cpu, mask); 2017 2018 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2019 tid, mask); 2020 if (error == EDEADLK) 2021 error = EINVAL; 2022 out: 2023 free(mask, M_TEMP); 2024 return (error); 2025 } 2026 2027 struct linux_rlimit64 { 2028 uint64_t rlim_cur; 2029 uint64_t rlim_max; 2030 }; 2031 2032 int 2033 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2034 { 2035 struct rlimit rlim, nrlim; 2036 struct linux_rlimit64 lrlim; 2037 struct proc *p; 2038 u_int which; 2039 int flags; 2040 int error; 2041 bool exec_blocked; 2042 2043 if (args->new == NULL && args->old != NULL) { 2044 if (linux_get_dummy_limit(td, args->resource, &rlim)) { 2045 lrlim.rlim_cur = rlim.rlim_cur; 2046 lrlim.rlim_max = rlim.rlim_max; 2047 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2048 } 2049 } 2050 2051 if (args->resource >= LINUX_RLIM_NLIMITS) 2052 return (EINVAL); 2053 2054 which = linux_to_bsd_resource[args->resource]; 2055 if (which == -1) 2056 return (EINVAL); 2057 2058 if (args->new != NULL) { 2059 /* 2060 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2061 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2062 * as INFINITY so we do not need a conversion even. 2063 */ 2064 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2065 if (error != 0) 2066 return (error); 2067 } 2068 2069 exec_blocked = false; 2070 flags = PGET_HOLD | PGET_NOTWEXIT; 2071 if (args->new != NULL) 2072 flags |= PGET_CANDEBUG; 2073 else 2074 flags |= PGET_CANSEE; 2075 if (args->pid == 0) { 2076 p = td->td_proc; 2077 PHOLD(p); 2078 } else { 2079 error = pget(args->pid, flags, &p); 2080 if (error != 0) 2081 return (error); 2082 exec_blocked = true; 2083 PROC_LOCK(p); 2084 execve_block_wait(td, p); 2085 error = args->new != NULL ? p_candebug(td, p) : 2086 p_cansee(td, p); 2087 PROC_UNLOCK(p); 2088 if (error != 0) 2089 goto out; 2090 } 2091 if (args->old != NULL) { 2092 PROC_LOCK(p); 2093 lim_rlimit_proc(p, which, &rlim); 2094 PROC_UNLOCK(p); 2095 if (rlim.rlim_cur == RLIM_INFINITY) 2096 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2097 else 2098 lrlim.rlim_cur = rlim.rlim_cur; 2099 if (rlim.rlim_max == RLIM_INFINITY) 2100 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2101 else 2102 lrlim.rlim_max = rlim.rlim_max; 2103 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2104 if (error != 0) 2105 goto out; 2106 } 2107 2108 if (args->new != NULL) 2109 error = kern_proc_setrlimit(td, p, which, &nrlim); 2110 2111 out: 2112 if (exec_blocked) { 2113 PROC_LOCK(p); 2114 execve_unblock(td, p); 2115 PROC_UNLOCK(p); 2116 } 2117 PRELE(p); 2118 return (error); 2119 } 2120 2121 int 2122 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2123 { 2124 struct timespec ts, *tsp; 2125 int error; 2126 2127 if (args->tsp != NULL) { 2128 error = linux_get_timespec(&ts, args->tsp); 2129 if (error != 0) 2130 return (error); 2131 tsp = &ts; 2132 } else 2133 tsp = NULL; 2134 2135 error = linux_common_pselect6(td, args->nfds, args->readfds, 2136 args->writefds, args->exceptfds, tsp, args->sig); 2137 2138 if (args->tsp != NULL) 2139 linux_put_timespec(&ts, args->tsp); 2140 return (error); 2141 } 2142 2143 static int 2144 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2145 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2146 l_uintptr_t *sig) 2147 { 2148 struct timeval utv, tv0, tv1, *tvp; 2149 struct l_pselect6arg lpse6; 2150 sigset_t *ssp; 2151 sigset_t ss; 2152 int error; 2153 2154 ssp = NULL; 2155 if (sig != NULL) { 2156 error = copyin(sig, &lpse6, sizeof(lpse6)); 2157 if (error != 0) 2158 return (error); 2159 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2160 lpse6.ss_len, &ss, &ssp); 2161 if (error != 0) 2162 return (error); 2163 } else 2164 ssp = NULL; 2165 2166 /* 2167 * Currently glibc changes nanosecond number to microsecond. 2168 * This mean losing precision but for now it is hardly seen. 2169 */ 2170 if (tsp != NULL) { 2171 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2172 if (itimerfix(&utv)) 2173 return (EINVAL); 2174 2175 microtime(&tv0); 2176 tvp = &utv; 2177 } else 2178 tvp = NULL; 2179 2180 error = kern_pselect(td, nfds, readfds, writefds, 2181 exceptfds, tvp, ssp, LINUX_NFDBITS); 2182 2183 if (tsp != NULL) { 2184 /* 2185 * Compute how much time was left of the timeout, 2186 * by subtracting the current time and the time 2187 * before we started the call, and subtracting 2188 * that result from the user-supplied value. 2189 */ 2190 microtime(&tv1); 2191 timevalsub(&tv1, &tv0); 2192 timevalsub(&utv, &tv1); 2193 if (utv.tv_sec < 0) 2194 timevalclear(&utv); 2195 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2196 } 2197 return (error); 2198 } 2199 2200 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2201 int 2202 linux_pselect6_time64(struct thread *td, 2203 struct linux_pselect6_time64_args *args) 2204 { 2205 struct timespec ts, *tsp; 2206 int error; 2207 2208 if (args->tsp != NULL) { 2209 error = linux_get_timespec64(&ts, args->tsp); 2210 if (error != 0) 2211 return (error); 2212 tsp = &ts; 2213 } else 2214 tsp = NULL; 2215 2216 error = linux_common_pselect6(td, args->nfds, args->readfds, 2217 args->writefds, args->exceptfds, tsp, args->sig); 2218 2219 if (args->tsp != NULL) 2220 linux_put_timespec64(&ts, args->tsp); 2221 return (error); 2222 } 2223 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2224 2225 int 2226 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2227 { 2228 struct timespec uts, *tsp; 2229 int error; 2230 2231 if (args->tsp != NULL) { 2232 error = linux_get_timespec(&uts, args->tsp); 2233 if (error != 0) 2234 return (error); 2235 tsp = &uts; 2236 } else 2237 tsp = NULL; 2238 2239 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2240 args->sset, args->ssize); 2241 if (error == 0 && args->tsp != NULL) 2242 error = linux_put_timespec(&uts, args->tsp); 2243 return (error); 2244 } 2245 2246 static int 2247 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2248 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2249 { 2250 struct timespec ts0, ts1; 2251 struct pollfd stackfds[32]; 2252 struct pollfd *kfds; 2253 sigset_t *ssp; 2254 sigset_t ss; 2255 int error; 2256 2257 if (kern_poll_maxfds(nfds)) 2258 return (EINVAL); 2259 if (sset != NULL) { 2260 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2261 if (error != 0) 2262 return (error); 2263 } else 2264 ssp = NULL; 2265 if (tsp != NULL) 2266 nanotime(&ts0); 2267 2268 if (nfds > nitems(stackfds)) 2269 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2270 else 2271 kfds = stackfds; 2272 error = linux_pollin(td, kfds, fds, nfds); 2273 if (error != 0) 2274 goto out; 2275 2276 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2277 if (error == 0) 2278 error = linux_pollout(td, kfds, fds, nfds); 2279 2280 if (error == 0 && tsp != NULL) { 2281 if (td->td_retval[0]) { 2282 nanotime(&ts1); 2283 timespecsub(&ts1, &ts0, &ts1); 2284 timespecsub(tsp, &ts1, tsp); 2285 if (tsp->tv_sec < 0) 2286 timespecclear(tsp); 2287 } else 2288 timespecclear(tsp); 2289 } 2290 2291 out: 2292 if (nfds > nitems(stackfds)) 2293 free(kfds, M_TEMP); 2294 return (error); 2295 } 2296 2297 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2298 int 2299 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2300 { 2301 struct timespec uts, *tsp; 2302 int error; 2303 2304 if (args->tsp != NULL) { 2305 error = linux_get_timespec64(&uts, args->tsp); 2306 if (error != 0) 2307 return (error); 2308 tsp = &uts; 2309 } else 2310 tsp = NULL; 2311 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2312 args->sset, args->ssize); 2313 if (error == 0 && args->tsp != NULL) 2314 error = linux_put_timespec64(&uts, args->tsp); 2315 return (error); 2316 } 2317 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2318 2319 static int 2320 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2321 { 2322 int error; 2323 u_int i; 2324 2325 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2326 if (error != 0) 2327 return (error); 2328 2329 for (i = 0; i < nfd; i++) { 2330 if (fds->events != 0) 2331 linux_to_bsd_poll_events(td, fds->fd, 2332 fds->events, &fds->events); 2333 fds++; 2334 } 2335 return (0); 2336 } 2337 2338 static int 2339 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2340 { 2341 int error = 0; 2342 u_int i, n = 0; 2343 2344 for (i = 0; i < nfd; i++) { 2345 if (fds->revents != 0) { 2346 bsd_to_linux_poll_events(fds->revents, 2347 &fds->revents); 2348 n++; 2349 } 2350 error = copyout(&fds->revents, &ufds->revents, 2351 sizeof(ufds->revents)); 2352 if (error) 2353 return (error); 2354 fds++; 2355 ufds++; 2356 } 2357 td->td_retval[0] = n; 2358 return (0); 2359 } 2360 2361 static int 2362 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2363 struct timespec *ts) 2364 { 2365 struct thread *tdt; 2366 int error; 2367 2368 /* 2369 * According to man in case the invalid pid specified 2370 * EINVAL should be returned. 2371 */ 2372 if (pid < 0) 2373 return (EINVAL); 2374 2375 tdt = linux_tdfind(td, pid, -1); 2376 if (tdt == NULL) 2377 return (ESRCH); 2378 2379 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2380 PROC_UNLOCK(tdt->td_proc); 2381 return (error); 2382 } 2383 2384 int 2385 linux_sched_rr_get_interval(struct thread *td, 2386 struct linux_sched_rr_get_interval_args *uap) 2387 { 2388 struct timespec ts; 2389 int error; 2390 2391 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2392 if (error != 0) 2393 return (error); 2394 return (linux_put_timespec(&ts, uap->interval)); 2395 } 2396 2397 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2398 int 2399 linux_sched_rr_get_interval_time64(struct thread *td, 2400 struct linux_sched_rr_get_interval_time64_args *uap) 2401 { 2402 struct timespec ts; 2403 int error; 2404 2405 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2406 if (error != 0) 2407 return (error); 2408 return (linux_put_timespec64(&ts, uap->interval)); 2409 } 2410 #endif 2411 2412 /* 2413 * In case when the Linux thread is the initial thread in 2414 * the thread group thread id is equal to the process id. 2415 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2416 */ 2417 struct thread * 2418 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2419 { 2420 struct linux_emuldata *em; 2421 struct thread *tdt; 2422 struct proc *p; 2423 2424 tdt = NULL; 2425 if (tid == 0 || tid == td->td_tid) { 2426 if (pid != -1 && td->td_proc->p_pid != pid) 2427 return (NULL); 2428 PROC_LOCK(td->td_proc); 2429 return (td); 2430 } else if (tid > PID_MAX) 2431 return (tdfind(tid, pid)); 2432 2433 /* 2434 * Initial thread where the tid equal to the pid. 2435 */ 2436 p = pfind(tid); 2437 if (p != NULL) { 2438 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2439 (pid != -1 && tid != pid)) { 2440 /* 2441 * p is not a Linuxulator process. 2442 */ 2443 PROC_UNLOCK(p); 2444 return (NULL); 2445 } 2446 FOREACH_THREAD_IN_PROC(p, tdt) { 2447 em = em_find(tdt); 2448 if (tid == em->em_tid) 2449 return (tdt); 2450 } 2451 PROC_UNLOCK(p); 2452 } 2453 return (NULL); 2454 } 2455 2456 void 2457 linux_to_bsd_waitopts(int options, int *bsdopts) 2458 { 2459 2460 if (options & LINUX_WNOHANG) 2461 *bsdopts |= WNOHANG; 2462 if (options & LINUX_WUNTRACED) 2463 *bsdopts |= WUNTRACED; 2464 if (options & LINUX_WEXITED) 2465 *bsdopts |= WEXITED; 2466 if (options & LINUX_WCONTINUED) 2467 *bsdopts |= WCONTINUED; 2468 if (options & LINUX_WNOWAIT) 2469 *bsdopts |= WNOWAIT; 2470 2471 if (options & __WCLONE) 2472 *bsdopts |= WLINUXCLONE; 2473 } 2474 2475 int 2476 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2477 { 2478 struct uio uio; 2479 struct iovec iov; 2480 int error; 2481 2482 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2483 return (EINVAL); 2484 if (args->count > INT_MAX) 2485 args->count = INT_MAX; 2486 2487 iov.iov_base = args->buf; 2488 iov.iov_len = args->count; 2489 2490 uio.uio_iov = &iov; 2491 uio.uio_iovcnt = 1; 2492 uio.uio_resid = iov.iov_len; 2493 uio.uio_segflg = UIO_USERSPACE; 2494 uio.uio_rw = UIO_READ; 2495 uio.uio_td = td; 2496 2497 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2498 if (error == 0) 2499 td->td_retval[0] = args->count - uio.uio_resid; 2500 return (error); 2501 } 2502 2503 int 2504 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2505 { 2506 2507 /* Needs to be page-aligned */ 2508 if (args->start & PAGE_MASK) 2509 return (EINVAL); 2510 return (kern_mincore(td, args->start, args->len, args->vec)); 2511 } 2512 2513 #define SYSLOG_TAG "<6>" 2514 2515 int 2516 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2517 { 2518 char buf[128], *src, *dst; 2519 u_int seq; 2520 int buflen, error; 2521 2522 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2523 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2524 return (EINVAL); 2525 } 2526 2527 if (args->len < 6) { 2528 td->td_retval[0] = 0; 2529 return (0); 2530 } 2531 2532 error = priv_check(td, PRIV_MSGBUF); 2533 if (error) 2534 return (error); 2535 2536 mtx_lock(&msgbuf_lock); 2537 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2538 mtx_unlock(&msgbuf_lock); 2539 2540 dst = args->buf; 2541 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2542 /* The -1 is to skip the trailing '\0'. */ 2543 dst += sizeof(SYSLOG_TAG) - 1; 2544 2545 while (error == 0) { 2546 mtx_lock(&msgbuf_lock); 2547 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2548 mtx_unlock(&msgbuf_lock); 2549 2550 if (buflen == 0) 2551 break; 2552 2553 for (src = buf; src < buf + buflen && error == 0; src++) { 2554 if (*src == '\0') 2555 continue; 2556 2557 if (dst >= args->buf + args->len) 2558 goto out; 2559 2560 error = copyout(src, dst, 1); 2561 dst++; 2562 2563 if (*src == '\n' && *(src + 1) != '<' && 2564 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2565 error = copyout(&SYSLOG_TAG, 2566 dst, sizeof(SYSLOG_TAG)); 2567 dst += sizeof(SYSLOG_TAG) - 1; 2568 } 2569 } 2570 } 2571 out: 2572 td->td_retval[0] = dst - args->buf; 2573 return (error); 2574 } 2575 2576 int 2577 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2578 { 2579 int cpu, error, node; 2580 2581 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2582 error = 0; 2583 node = cpuid_to_pcpu[cpu]->pc_domain; 2584 2585 if (args->cpu != NULL) 2586 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2587 if (args->node != NULL) 2588 error = copyout(&node, args->node, sizeof(l_int)); 2589 return (error); 2590 } 2591 2592 #if defined(__i386__) || defined(__amd64__) 2593 int 2594 linux_poll(struct thread *td, struct linux_poll_args *args) 2595 { 2596 struct timespec ts, *tsp; 2597 2598 if (args->timeout != INFTIM) { 2599 if (args->timeout < 0) 2600 return (EINVAL); 2601 ts.tv_sec = args->timeout / 1000; 2602 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2603 tsp = &ts; 2604 } else 2605 tsp = NULL; 2606 2607 return (linux_common_ppoll(td, args->fds, args->nfds, 2608 tsp, NULL, 0)); 2609 } 2610 #endif /* __i386__ || __amd64__ */ 2611 2612 int 2613 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2614 { 2615 2616 switch (args->op) { 2617 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2618 return (EOPNOTSUPP); 2619 default: 2620 /* 2621 * Ignore unknown operations, just like Linux kernel built 2622 * without CONFIG_SECCOMP. 2623 */ 2624 return (EINVAL); 2625 } 2626 } 2627 2628 /* 2629 * Custom version of exec_copyin_args(), to copy out argument and environment 2630 * strings from the old process address space into the temporary string buffer. 2631 * Based on freebsd32_exec_copyin_args. 2632 */ 2633 static int 2634 linux_exec_copyin_args(struct image_args *args, const char *fname, 2635 l_uintptr_t *argv, l_uintptr_t *envv) 2636 { 2637 char *argp, *envp; 2638 l_uintptr_t *ptr, arg; 2639 int error; 2640 2641 bzero(args, sizeof(*args)); 2642 if (argv == NULL) 2643 return (EFAULT); 2644 2645 /* 2646 * Allocate demand-paged memory for the file name, argument, and 2647 * environment strings. 2648 */ 2649 error = exec_alloc_args(args); 2650 if (error != 0) 2651 return (error); 2652 2653 /* 2654 * Copy the file name. 2655 */ 2656 error = exec_args_add_fname(args, fname, UIO_USERSPACE); 2657 if (error != 0) 2658 goto err_exit; 2659 2660 /* 2661 * extract arguments first 2662 */ 2663 ptr = argv; 2664 for (;;) { 2665 error = copyin(ptr++, &arg, sizeof(arg)); 2666 if (error) 2667 goto err_exit; 2668 if (arg == 0) 2669 break; 2670 argp = PTRIN(arg); 2671 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2672 if (error != 0) 2673 goto err_exit; 2674 } 2675 2676 /* 2677 * This comment is from Linux do_execveat_common: 2678 * When argv is empty, add an empty string ("") as argv[0] to 2679 * ensure confused userspace programs that start processing 2680 * from argv[1] won't end up walking envp. 2681 */ 2682 if (args->argc == 0 && 2683 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2684 goto err_exit; 2685 2686 /* 2687 * extract environment strings 2688 */ 2689 if (envv) { 2690 ptr = envv; 2691 for (;;) { 2692 error = copyin(ptr++, &arg, sizeof(arg)); 2693 if (error) 2694 goto err_exit; 2695 if (arg == 0) 2696 break; 2697 envp = PTRIN(arg); 2698 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2699 if (error != 0) 2700 goto err_exit; 2701 } 2702 } 2703 2704 return (0); 2705 2706 err_exit: 2707 exec_free_args(args); 2708 return (error); 2709 } 2710 2711 int 2712 linux_execve(struct thread *td, struct linux_execve_args *args) 2713 { 2714 struct image_args eargs; 2715 int error; 2716 2717 LINUX_CTR(execve); 2718 2719 error = linux_exec_copyin_args(&eargs, args->path, args->argp, 2720 args->envp); 2721 if (error == 0) 2722 error = linux_common_execve(td, &eargs); 2723 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2724 return (error); 2725 } 2726 2727 static void 2728 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2729 { 2730 struct rtprio rtp2; 2731 2732 pri_to_rtp(td1, &rtp2); 2733 if (rtp2.type < rtp->type || 2734 (rtp2.type == rtp->type && 2735 rtp2.prio < rtp->prio)) { 2736 rtp->type = rtp2.type; 2737 rtp->prio = rtp2.prio; 2738 } 2739 } 2740 2741 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2742 2743 static int 2744 linux_rtprio2ioprio(struct rtprio *rtp) 2745 { 2746 int ioprio, prio; 2747 2748 switch (rtp->type) { 2749 case RTP_PRIO_IDLE: 2750 prio = RTP_PRIO_MIN; 2751 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2752 break; 2753 case RTP_PRIO_NORMAL: 2754 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2755 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2756 break; 2757 case RTP_PRIO_REALTIME: 2758 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2759 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2760 break; 2761 default: 2762 prio = RTP_PRIO_MIN; 2763 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2764 break; 2765 } 2766 return (ioprio); 2767 } 2768 2769 static int 2770 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2771 { 2772 2773 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2774 case LINUX_IOPRIO_CLASS_IDLE: 2775 rtp->prio = RTP_PRIO_MIN; 2776 rtp->type = RTP_PRIO_IDLE; 2777 break; 2778 case LINUX_IOPRIO_CLASS_BE: 2779 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2780 rtp->type = RTP_PRIO_NORMAL; 2781 break; 2782 case LINUX_IOPRIO_CLASS_RT: 2783 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2784 rtp->type = RTP_PRIO_REALTIME; 2785 break; 2786 default: 2787 return (EINVAL); 2788 } 2789 return (0); 2790 } 2791 #undef LINUX_PRIO_DIVIDER 2792 2793 int 2794 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2795 { 2796 struct thread *td1; 2797 struct rtprio rtp; 2798 struct pgrp *pg; 2799 struct proc *p; 2800 int error, found; 2801 2802 p = NULL; 2803 td1 = NULL; 2804 error = 0; 2805 found = 0; 2806 rtp.type = RTP_PRIO_IDLE; 2807 rtp.prio = RTP_PRIO_MAX; 2808 switch (args->which) { 2809 case LINUX_IOPRIO_WHO_PROCESS: 2810 if (args->who == 0) { 2811 td1 = td; 2812 p = td1->td_proc; 2813 PROC_LOCK(p); 2814 } else if (args->who > PID_MAX) { 2815 td1 = linux_tdfind(td, args->who, -1); 2816 if (td1 != NULL) 2817 p = td1->td_proc; 2818 } else 2819 p = pfind(args->who); 2820 if (p == NULL) 2821 return (ESRCH); 2822 if ((error = p_cansee(td, p))) { 2823 PROC_UNLOCK(p); 2824 break; 2825 } 2826 if (td1 != NULL) { 2827 pri_to_rtp(td1, &rtp); 2828 } else { 2829 FOREACH_THREAD_IN_PROC(p, td1) { 2830 linux_up_rtprio_if(td1, &rtp); 2831 } 2832 } 2833 found++; 2834 PROC_UNLOCK(p); 2835 break; 2836 case LINUX_IOPRIO_WHO_PGRP: 2837 sx_slock(&proctree_lock); 2838 if (args->who == 0) { 2839 pg = td->td_proc->p_pgrp; 2840 PGRP_LOCK(pg); 2841 } else { 2842 pg = pgfind(args->who); 2843 if (pg == NULL) { 2844 sx_sunlock(&proctree_lock); 2845 error = ESRCH; 2846 break; 2847 } 2848 } 2849 sx_sunlock(&proctree_lock); 2850 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2851 PROC_LOCK(p); 2852 if (p->p_state == PRS_NORMAL && 2853 p_cansee(td, p) == 0) { 2854 FOREACH_THREAD_IN_PROC(p, td1) { 2855 linux_up_rtprio_if(td1, &rtp); 2856 found++; 2857 } 2858 } 2859 PROC_UNLOCK(p); 2860 } 2861 PGRP_UNLOCK(pg); 2862 break; 2863 case LINUX_IOPRIO_WHO_USER: 2864 if (args->who == 0) 2865 args->who = td->td_ucred->cr_uid; 2866 sx_slock(&allproc_lock); 2867 FOREACH_PROC_IN_SYSTEM(p) { 2868 PROC_LOCK(p); 2869 if (p->p_state == PRS_NORMAL && 2870 p->p_ucred->cr_uid == args->who && 2871 p_cansee(td, p) == 0) { 2872 FOREACH_THREAD_IN_PROC(p, td1) { 2873 linux_up_rtprio_if(td1, &rtp); 2874 found++; 2875 } 2876 } 2877 PROC_UNLOCK(p); 2878 } 2879 sx_sunlock(&allproc_lock); 2880 break; 2881 default: 2882 error = EINVAL; 2883 break; 2884 } 2885 if (error == 0) { 2886 if (found != 0) 2887 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2888 else 2889 error = ESRCH; 2890 } 2891 return (error); 2892 } 2893 2894 int 2895 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2896 { 2897 struct thread *td1; 2898 struct rtprio rtp; 2899 struct pgrp *pg; 2900 struct proc *p; 2901 int error; 2902 2903 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2904 return (error); 2905 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2906 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2907 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2908 return (error); 2909 2910 p = NULL; 2911 td1 = NULL; 2912 switch (args->which) { 2913 case LINUX_IOPRIO_WHO_PROCESS: 2914 if (args->who == 0) { 2915 td1 = td; 2916 p = td1->td_proc; 2917 PROC_LOCK(p); 2918 } else if (args->who > PID_MAX) { 2919 td1 = linux_tdfind(td, args->who, -1); 2920 if (td1 != NULL) 2921 p = td1->td_proc; 2922 } else 2923 p = pfind(args->who); 2924 if (p == NULL) 2925 return (ESRCH); 2926 if ((error = p_cansched(td, p))) { 2927 PROC_UNLOCK(p); 2928 break; 2929 } 2930 if (td1 != NULL) { 2931 error = rtp_to_pri(&rtp, td1); 2932 } else { 2933 FOREACH_THREAD_IN_PROC(p, td1) { 2934 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2935 break; 2936 } 2937 } 2938 PROC_UNLOCK(p); 2939 break; 2940 case LINUX_IOPRIO_WHO_PGRP: 2941 sx_slock(&proctree_lock); 2942 if (args->who == 0) { 2943 pg = td->td_proc->p_pgrp; 2944 PGRP_LOCK(pg); 2945 } else { 2946 pg = pgfind(args->who); 2947 if (pg == NULL) { 2948 sx_sunlock(&proctree_lock); 2949 error = ESRCH; 2950 break; 2951 } 2952 } 2953 sx_sunlock(&proctree_lock); 2954 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2955 PROC_LOCK(p); 2956 if (p->p_state == PRS_NORMAL && 2957 p_cansched(td, p) == 0) { 2958 FOREACH_THREAD_IN_PROC(p, td1) { 2959 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2960 break; 2961 } 2962 } 2963 PROC_UNLOCK(p); 2964 if (error != 0) 2965 break; 2966 } 2967 PGRP_UNLOCK(pg); 2968 break; 2969 case LINUX_IOPRIO_WHO_USER: 2970 if (args->who == 0) 2971 args->who = td->td_ucred->cr_uid; 2972 sx_slock(&allproc_lock); 2973 FOREACH_PROC_IN_SYSTEM(p) { 2974 PROC_LOCK(p); 2975 if (p->p_state == PRS_NORMAL && 2976 p->p_ucred->cr_uid == args->who && 2977 p_cansched(td, p) == 0) { 2978 FOREACH_THREAD_IN_PROC(p, td1) { 2979 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2980 break; 2981 } 2982 } 2983 PROC_UNLOCK(p); 2984 if (error != 0) 2985 break; 2986 } 2987 sx_sunlock(&allproc_lock); 2988 break; 2989 default: 2990 error = EINVAL; 2991 break; 2992 } 2993 return (error); 2994 } 2995 2996 /* The only flag is O_NONBLOCK */ 2997 #define B2L_MQ_FLAGS(bflags) ((bflags) != 0 ? LINUX_O_NONBLOCK : 0) 2998 #define L2B_MQ_FLAGS(lflags) ((lflags) != 0 ? O_NONBLOCK : 0) 2999 3000 int 3001 linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 3002 { 3003 struct mq_attr attr; 3004 int error, flags; 3005 3006 flags = linux_common_openflags(args->oflag); 3007 if ((flags & O_ACCMODE) == O_ACCMODE || (flags & O_EXEC) != 0) 3008 return (EINVAL); 3009 flags = FFLAGS(flags); 3010 if ((flags & O_CREAT) != 0 && args->attr != NULL) { 3011 error = copyin(args->attr, &attr, sizeof(attr)); 3012 if (error != 0) 3013 return (error); 3014 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3015 } 3016 3017 return (kern_kmq_open(td, args->name, flags, args->mode, 3018 args->attr != NULL ? &attr : NULL)); 3019 } 3020 3021 int 3022 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 3023 { 3024 struct kmq_unlink_args bsd_args = { 3025 .path = PTRIN(args->name) 3026 }; 3027 3028 return (sys_kmq_unlink(td, &bsd_args)); 3029 } 3030 3031 int 3032 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 3033 { 3034 struct timespec ts, *abs_timeout; 3035 int error; 3036 3037 if (args->abs_timeout == NULL) 3038 abs_timeout = NULL; 3039 else { 3040 error = linux_get_timespec(&ts, args->abs_timeout); 3041 if (error != 0) 3042 return (error); 3043 abs_timeout = &ts; 3044 } 3045 3046 return (kern_kmq_timedsend(td, args->mqd, PTRIN(args->msg_ptr), 3047 args->msg_len, args->msg_prio, abs_timeout)); 3048 } 3049 3050 int 3051 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 3052 { 3053 struct timespec ts, *abs_timeout; 3054 int error; 3055 3056 if (args->abs_timeout == NULL) 3057 abs_timeout = NULL; 3058 else { 3059 error = linux_get_timespec(&ts, args->abs_timeout); 3060 if (error != 0) 3061 return (error); 3062 abs_timeout = &ts; 3063 } 3064 3065 return (kern_kmq_timedreceive(td, args->mqd, PTRIN(args->msg_ptr), 3066 args->msg_len, args->msg_prio, abs_timeout)); 3067 } 3068 3069 int 3070 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 3071 { 3072 struct sigevent ev, *evp; 3073 struct l_sigevent l_ev; 3074 int error; 3075 3076 if (args->sevp == NULL) 3077 evp = NULL; 3078 else { 3079 error = copyin(args->sevp, &l_ev, sizeof(l_ev)); 3080 if (error != 0) 3081 return (error); 3082 error = linux_convert_l_sigevent(&l_ev, &ev); 3083 if (error != 0) 3084 return (error); 3085 evp = &ev; 3086 } 3087 3088 return (kern_kmq_notify(td, args->mqd, evp)); 3089 } 3090 3091 int 3092 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 3093 { 3094 struct mq_attr attr, oattr; 3095 int error; 3096 3097 if (args->attr != NULL) { 3098 error = copyin(args->attr, &attr, sizeof(attr)); 3099 if (error != 0) 3100 return (error); 3101 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3102 } 3103 3104 error = kern_kmq_setattr(td, args->mqd, args->attr != NULL ? &attr : NULL, 3105 &oattr); 3106 if (error == 0 && args->oattr != NULL) { 3107 oattr.mq_flags = B2L_MQ_FLAGS(oattr.mq_flags); 3108 bzero(oattr.__reserved, sizeof(oattr.__reserved)); 3109 error = copyout(&oattr, args->oattr, sizeof(oattr)); 3110 } 3111 3112 return (error); 3113 } 3114 3115 int 3116 linux_kcmp(struct thread *td, struct linux_kcmp_args *args) 3117 { 3118 int type; 3119 3120 switch (args->type) { 3121 case LINUX_KCMP_FILE: 3122 type = KCMP_FILE; 3123 break; 3124 case LINUX_KCMP_FILES: 3125 type = KCMP_FILES; 3126 break; 3127 case LINUX_KCMP_SIGHAND: 3128 type = KCMP_SIGHAND; 3129 break; 3130 case LINUX_KCMP_VM: 3131 type = KCMP_VM; 3132 break; 3133 default: 3134 return (EINVAL); 3135 } 3136 3137 return (kern_kcmp(td, args->pid1, args->pid2, type, args->idx1, 3138 args->idx)); 3139 } 3140 3141 int 3142 linux_membarrier(struct thread *td, struct linux_membarrier_args *args) 3143 { 3144 static const struct { 3145 int linux_cmd; 3146 int freebsd_cmd; 3147 } cmds[] = { 3148 { LINUX_MEMBARRIER_CMD_QUERY, 3149 MEMBARRIER_CMD_QUERY }, 3150 { LINUX_MEMBARRIER_CMD_GLOBAL, 3151 MEMBARRIER_CMD_GLOBAL }, 3152 { LINUX_MEMBARRIER_CMD_GLOBAL_EXPEDITED, 3153 MEMBARRIER_CMD_GLOBAL_EXPEDITED }, 3154 { LINUX_MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED, 3155 MEMBARRIER_CMD_REGISTER_GLOBAL_EXPEDITED }, 3156 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED, 3157 MEMBARRIER_CMD_PRIVATE_EXPEDITED }, 3158 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED, 3159 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED }, 3160 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE, 3161 MEMBARRIER_CMD_PRIVATE_EXPEDITED_SYNC_CORE }, 3162 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE, 3163 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_SYNC_CORE }, 3164 { LINUX_MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ, 3165 MEMBARRIER_CMD_PRIVATE_EXPEDITED_RSEQ }, 3166 { LINUX_MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ, 3167 MEMBARRIER_CMD_REGISTER_PRIVATE_EXPEDITED_RSEQ }, 3168 { LINUX_MEMBARRIER_CMD_GET_REGISTRATIONS, 3169 MEMBARRIER_CMD_GET_REGISTRATIONS }, 3170 }; 3171 int cmd, error, flags, i, mask; 3172 3173 cmd = -1; 3174 for (i = 0; i < nitems(cmds); i++) { 3175 if (args->cmd == cmds[i].linux_cmd) { 3176 cmd = cmds[i].freebsd_cmd; 3177 break; 3178 } 3179 } 3180 3181 if (cmd == -1 || (args->flags & ~LINUX_MEMBARRIER_CMD_FLAG_CPU) != 0) 3182 return (EINVAL); 3183 3184 flags = 0; 3185 if ((args->flags & LINUX_MEMBARRIER_CMD_FLAG_CPU) != 0) 3186 flags |= MEMBARRIER_CMD_FLAG_CPU; 3187 3188 error = kern_membarrier(td, cmd, flags, args->cpu_id); 3189 if (error != 0) 3190 return (error); 3191 3192 if (args->cmd == LINUX_MEMBARRIER_CMD_QUERY || 3193 args->cmd == LINUX_MEMBARRIER_CMD_GET_REGISTRATIONS) { 3194 mask = td->td_retval[0]; 3195 td->td_retval[0] = 0; 3196 for (i = 0; i < nitems(cmds); i++) 3197 if ((mask & cmds[i].freebsd_cmd) != 0) 3198 td->td_retval[0] |= cmds[i].linux_cmd; 3199 } 3200 3201 return (0); 3202 } 3203 3204 /* 3205 * setfsuid() & setfsgid() exist to decouple the Linux filesystem credentials 3206 * from the effective credentials, avoiding signal exposure during privilege 3207 * transitions. The signal permission model that motivated this was revised in 3208 * Linux 2.0, making these syscalls obsolete for new applications. 3209 * 3210 * As there's no FreeBSD equivalent, implement both syscalls as no-ops that 3211 * return the current effective UID/GID as the previous filesystem UID/GID. 3212 * Linux returns the previous filesystem UID/GID for these syscalls, with no 3213 * error indication. 3214 */ 3215 3216 int 3217 linux_setfsuid(struct thread *td, struct linux_setfsuid_args *args) 3218 { 3219 td->td_retval[0] = td->td_ucred->cr_uid; 3220 return (0); 3221 } 3222 3223 int 3224 linux_setfsgid(struct thread *td, struct linux_setfsgid_args *args) 3225 { 3226 td->td_retval[0] = td->td_ucred->cr_gid; 3227 return (0); 3228 } 3229 3230 MODULE_DEPEND(linux, mqueuefs, 1, 1, 1); 3231