1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/param.h> 33 #include <sys/fcntl.h> 34 #include <sys/jail.h> 35 #include <sys/imgact.h> 36 #include <sys/limits.h> 37 #include <sys/lock.h> 38 #include <sys/msgbuf.h> 39 #include <sys/mqueue.h> 40 #include <sys/mutex.h> 41 #include <sys/poll.h> 42 #include <sys/priv.h> 43 #include <sys/proc.h> 44 #include <sys/procctl.h> 45 #include <sys/reboot.h> 46 #include <sys/random.h> 47 #include <sys/resourcevar.h> 48 #include <sys/rtprio.h> 49 #include <sys/sched.h> 50 #include <sys/smp.h> 51 #include <sys/stat.h> 52 #include <sys/syscallsubr.h> 53 #include <sys/sysctl.h> 54 #include <sys/sysent.h> 55 #include <sys/sysproto.h> 56 #include <sys/time.h> 57 #include <sys/unistd.h> 58 #include <sys/vmmeter.h> 59 #include <sys/vnode.h> 60 61 #include <security/audit/audit.h> 62 #include <security/mac/mac_framework.h> 63 64 #include <vm/pmap.h> 65 #include <vm/vm_map.h> 66 #include <vm/swap_pager.h> 67 68 #ifdef COMPAT_LINUX32 69 #include <machine/../linux32/linux.h> 70 #include <machine/../linux32/linux32_proto.h> 71 #else 72 #include <machine/../linux/linux.h> 73 #include <machine/../linux/linux_proto.h> 74 #endif 75 76 #include <compat/linux/linux_common.h> 77 #include <compat/linux/linux_dtrace.h> 78 #include <compat/linux/linux_file.h> 79 #include <compat/linux/linux_mib.h> 80 #include <compat/linux/linux_mmap.h> 81 #include <compat/linux/linux_signal.h> 82 #include <compat/linux/linux_time.h> 83 #include <compat/linux/linux_util.h> 84 #include <compat/linux/linux_emul.h> 85 #include <compat/linux/linux_misc.h> 86 87 int stclohz; /* Statistics clock frequency */ 88 89 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 90 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 91 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 92 RLIMIT_MEMLOCK, RLIMIT_AS 93 }; 94 95 struct l_sysinfo { 96 l_long uptime; /* Seconds since boot */ 97 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 98 #define LINUX_SYSINFO_LOADS_SCALE 65536 99 l_ulong totalram; /* Total usable main memory size */ 100 l_ulong freeram; /* Available memory size */ 101 l_ulong sharedram; /* Amount of shared memory */ 102 l_ulong bufferram; /* Memory used by buffers */ 103 l_ulong totalswap; /* Total swap space size */ 104 l_ulong freeswap; /* swap space still available */ 105 l_ushort procs; /* Number of current processes */ 106 l_ushort pads; 107 l_ulong totalhigh; 108 l_ulong freehigh; 109 l_uint mem_unit; 110 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 111 }; 112 113 struct l_pselect6arg { 114 l_uintptr_t ss; 115 l_size_t ss_len; 116 }; 117 118 static int linux_utimensat_lts_to_ts(struct l_timespec *, 119 struct timespec *); 120 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 121 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 122 struct timespec *); 123 #endif 124 static int linux_common_utimensat(struct thread *, int, 125 const char *, struct timespec *, int); 126 static int linux_common_pselect6(struct thread *, l_int, 127 l_fd_set *, l_fd_set *, l_fd_set *, 128 struct timespec *, l_uintptr_t *); 129 static int linux_common_ppoll(struct thread *, struct pollfd *, 130 uint32_t, struct timespec *, l_sigset_t *, 131 l_size_t); 132 static int linux_pollin(struct thread *, struct pollfd *, 133 struct pollfd *, u_int); 134 static int linux_pollout(struct thread *, struct pollfd *, 135 struct pollfd *, u_int); 136 137 int 138 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 139 { 140 struct l_sysinfo sysinfo; 141 int i, j; 142 struct timespec ts; 143 144 bzero(&sysinfo, sizeof(sysinfo)); 145 getnanouptime(&ts); 146 if (ts.tv_nsec != 0) 147 ts.tv_sec++; 148 sysinfo.uptime = ts.tv_sec; 149 150 /* Use the information from the mib to get our load averages */ 151 for (i = 0; i < 3; i++) 152 sysinfo.loads[i] = averunnable.ldavg[i] * 153 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 154 155 sysinfo.totalram = physmem * PAGE_SIZE; 156 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 157 158 /* 159 * sharedram counts pages allocated to named, swap-backed objects such 160 * as shared memory segments and tmpfs files. There is no cheap way to 161 * compute this, so just leave the field unpopulated. Linux itself only 162 * started setting this field in the 3.x timeframe. 163 */ 164 sysinfo.sharedram = 0; 165 sysinfo.bufferram = 0; 166 167 swap_pager_status(&i, &j); 168 sysinfo.totalswap = i * PAGE_SIZE; 169 sysinfo.freeswap = (i - j) * PAGE_SIZE; 170 171 sysinfo.procs = nprocs; 172 173 /* 174 * Platforms supported by the emulation layer do not have a notion of 175 * high memory. 176 */ 177 sysinfo.totalhigh = 0; 178 sysinfo.freehigh = 0; 179 180 sysinfo.mem_unit = 1; 181 182 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 183 } 184 185 #ifdef LINUX_LEGACY_SYSCALLS 186 int 187 linux_alarm(struct thread *td, struct linux_alarm_args *args) 188 { 189 struct itimerval it, old_it; 190 u_int secs; 191 int error __diagused; 192 193 secs = args->secs; 194 /* 195 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 196 * to match kern_setitimer()'s limit to avoid error from it. 197 * 198 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 199 * platforms. 200 */ 201 if (secs > INT32_MAX / 2) 202 secs = INT32_MAX / 2; 203 204 it.it_value.tv_sec = secs; 205 it.it_value.tv_usec = 0; 206 timevalclear(&it.it_interval); 207 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 208 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 209 210 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 211 old_it.it_value.tv_usec >= 500000) 212 old_it.it_value.tv_sec++; 213 td->td_retval[0] = old_it.it_value.tv_sec; 214 return (0); 215 } 216 #endif 217 218 int 219 linux_brk(struct thread *td, struct linux_brk_args *args) 220 { 221 struct vmspace *vm = td->td_proc->p_vmspace; 222 uintptr_t new, old; 223 224 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 225 new = (uintptr_t)args->dsend; 226 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 227 td->td_retval[0] = (register_t)new; 228 else 229 td->td_retval[0] = (register_t)old; 230 231 return (0); 232 } 233 234 #ifdef LINUX_LEGACY_SYSCALLS 235 int 236 linux_select(struct thread *td, struct linux_select_args *args) 237 { 238 l_timeval ltv; 239 struct timeval tv0, tv1, utv, *tvp; 240 int error; 241 242 /* 243 * Store current time for computation of the amount of 244 * time left. 245 */ 246 if (args->timeout) { 247 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 248 goto select_out; 249 utv.tv_sec = ltv.tv_sec; 250 utv.tv_usec = ltv.tv_usec; 251 252 if (itimerfix(&utv)) { 253 /* 254 * The timeval was invalid. Convert it to something 255 * valid that will act as it does under Linux. 256 */ 257 utv.tv_sec += utv.tv_usec / 1000000; 258 utv.tv_usec %= 1000000; 259 if (utv.tv_usec < 0) { 260 utv.tv_sec -= 1; 261 utv.tv_usec += 1000000; 262 } 263 if (utv.tv_sec < 0) 264 timevalclear(&utv); 265 } 266 microtime(&tv0); 267 tvp = &utv; 268 } else 269 tvp = NULL; 270 271 error = kern_select(td, args->nfds, args->readfds, args->writefds, 272 args->exceptfds, tvp, LINUX_NFDBITS); 273 if (error) 274 goto select_out; 275 276 if (args->timeout) { 277 if (td->td_retval[0]) { 278 /* 279 * Compute how much time was left of the timeout, 280 * by subtracting the current time and the time 281 * before we started the call, and subtracting 282 * that result from the user-supplied value. 283 */ 284 microtime(&tv1); 285 timevalsub(&tv1, &tv0); 286 timevalsub(&utv, &tv1); 287 if (utv.tv_sec < 0) 288 timevalclear(&utv); 289 } else 290 timevalclear(&utv); 291 ltv.tv_sec = utv.tv_sec; 292 ltv.tv_usec = utv.tv_usec; 293 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 294 goto select_out; 295 } 296 297 select_out: 298 return (error); 299 } 300 #endif 301 302 int 303 linux_mremap(struct thread *td, struct linux_mremap_args *args) 304 { 305 uintptr_t addr; 306 size_t len; 307 int error = 0; 308 309 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 310 td->td_retval[0] = 0; 311 return (EINVAL); 312 } 313 314 /* 315 * Check for the page alignment. 316 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 317 */ 318 if (args->addr & PAGE_MASK) { 319 td->td_retval[0] = 0; 320 return (EINVAL); 321 } 322 323 args->new_len = round_page(args->new_len); 324 args->old_len = round_page(args->old_len); 325 326 if (args->new_len > args->old_len) { 327 td->td_retval[0] = 0; 328 return (ENOMEM); 329 } 330 331 if (args->new_len < args->old_len) { 332 addr = args->addr + args->new_len; 333 len = args->old_len - args->new_len; 334 error = kern_munmap(td, addr, len); 335 } 336 337 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 338 return (error); 339 } 340 341 #define LINUX_MS_ASYNC 0x0001 342 #define LINUX_MS_INVALIDATE 0x0002 343 #define LINUX_MS_SYNC 0x0004 344 345 int 346 linux_msync(struct thread *td, struct linux_msync_args *args) 347 { 348 349 return (kern_msync(td, args->addr, args->len, 350 args->fl & ~LINUX_MS_SYNC)); 351 } 352 353 int 354 linux_mprotect(struct thread *td, struct linux_mprotect_args *uap) 355 { 356 357 return (linux_mprotect_common(td, PTROUT(uap->addr), uap->len, 358 uap->prot)); 359 } 360 361 int 362 linux_madvise(struct thread *td, struct linux_madvise_args *uap) 363 { 364 365 return (linux_madvise_common(td, PTROUT(uap->addr), uap->len, 366 uap->behav)); 367 } 368 369 int 370 linux_mmap2(struct thread *td, struct linux_mmap2_args *uap) 371 { 372 #if defined(LINUX_ARCHWANT_MMAP2PGOFF) 373 /* 374 * For architectures with sizeof (off_t) < sizeof (loff_t) mmap is 375 * implemented with mmap2 syscall and the offset is represented in 376 * multiples of page size. 377 */ 378 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 379 uap->flags, uap->fd, (uint64_t)(uint32_t)uap->pgoff * PAGE_SIZE)); 380 #else 381 return (linux_mmap_common(td, PTROUT(uap->addr), uap->len, uap->prot, 382 uap->flags, uap->fd, uap->pgoff)); 383 #endif 384 } 385 386 #ifdef LINUX_LEGACY_SYSCALLS 387 int 388 linux_time(struct thread *td, struct linux_time_args *args) 389 { 390 struct timeval tv; 391 l_time_t tm; 392 int error; 393 394 microtime(&tv); 395 tm = tv.tv_sec; 396 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 397 return (error); 398 td->td_retval[0] = tm; 399 return (0); 400 } 401 #endif 402 403 struct l_times_argv { 404 l_clock_t tms_utime; 405 l_clock_t tms_stime; 406 l_clock_t tms_cutime; 407 l_clock_t tms_cstime; 408 }; 409 410 /* 411 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 412 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 413 * auxiliary vector entry. 414 */ 415 #define CLK_TCK 100 416 417 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 418 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 419 420 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER(2,4,0) ? \ 421 CONVNTCK(r) : CONVOTCK(r)) 422 423 int 424 linux_times(struct thread *td, struct linux_times_args *args) 425 { 426 struct timeval tv, utime, stime, cutime, cstime; 427 struct l_times_argv tms; 428 struct proc *p; 429 int error; 430 431 if (args->buf != NULL) { 432 p = td->td_proc; 433 PROC_LOCK(p); 434 PROC_STATLOCK(p); 435 calcru(p, &utime, &stime); 436 PROC_STATUNLOCK(p); 437 calccru(p, &cutime, &cstime); 438 PROC_UNLOCK(p); 439 440 tms.tms_utime = CONVTCK(utime); 441 tms.tms_stime = CONVTCK(stime); 442 443 tms.tms_cutime = CONVTCK(cutime); 444 tms.tms_cstime = CONVTCK(cstime); 445 446 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 447 return (error); 448 } 449 450 microuptime(&tv); 451 td->td_retval[0] = (int)CONVTCK(tv); 452 return (0); 453 } 454 455 int 456 linux_newuname(struct thread *td, struct linux_newuname_args *args) 457 { 458 struct l_new_utsname utsname; 459 char osname[LINUX_MAX_UTSNAME]; 460 char osrelease[LINUX_MAX_UTSNAME]; 461 char *p; 462 463 linux_get_osname(td, osname); 464 linux_get_osrelease(td, osrelease); 465 466 bzero(&utsname, sizeof(utsname)); 467 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 468 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 469 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 470 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 471 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 472 for (p = utsname.version; *p != '\0'; ++p) 473 if (*p == '\n') { 474 *p = '\0'; 475 break; 476 } 477 #if defined(__amd64__) 478 /* 479 * On amd64, Linux uname(2) needs to return "x86_64" 480 * for both 64-bit and 32-bit applications. On 32-bit, 481 * the string returned by getauxval(AT_PLATFORM) needs 482 * to remain "i686", though. 483 */ 484 #if defined(COMPAT_LINUX32) 485 if (linux32_emulate_i386) 486 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 487 else 488 #endif 489 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 490 #elif defined(__aarch64__) 491 strlcpy(utsname.machine, "aarch64", LINUX_MAX_UTSNAME); 492 #elif defined(__i386__) 493 strlcpy(utsname.machine, "i686", LINUX_MAX_UTSNAME); 494 #endif 495 496 return (copyout(&utsname, args->buf, sizeof(utsname))); 497 } 498 499 struct l_utimbuf { 500 l_time_t l_actime; 501 l_time_t l_modtime; 502 }; 503 504 #ifdef LINUX_LEGACY_SYSCALLS 505 int 506 linux_utime(struct thread *td, struct linux_utime_args *args) 507 { 508 struct timeval tv[2], *tvp; 509 struct l_utimbuf lut; 510 int error; 511 512 if (args->times) { 513 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 514 return (error); 515 tv[0].tv_sec = lut.l_actime; 516 tv[0].tv_usec = 0; 517 tv[1].tv_sec = lut.l_modtime; 518 tv[1].tv_usec = 0; 519 tvp = tv; 520 } else 521 tvp = NULL; 522 523 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 524 tvp, UIO_SYSSPACE)); 525 } 526 #endif 527 528 #ifdef LINUX_LEGACY_SYSCALLS 529 int 530 linux_utimes(struct thread *td, struct linux_utimes_args *args) 531 { 532 l_timeval ltv[2]; 533 struct timeval tv[2], *tvp = NULL; 534 int error; 535 536 if (args->tptr != NULL) { 537 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 538 return (error); 539 tv[0].tv_sec = ltv[0].tv_sec; 540 tv[0].tv_usec = ltv[0].tv_usec; 541 tv[1].tv_sec = ltv[1].tv_sec; 542 tv[1].tv_usec = ltv[1].tv_usec; 543 tvp = tv; 544 } 545 546 return (kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 547 tvp, UIO_SYSSPACE)); 548 } 549 #endif 550 551 static int 552 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 553 { 554 555 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 556 l_times->tv_nsec != LINUX_UTIME_NOW && 557 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 558 return (EINVAL); 559 560 times->tv_sec = l_times->tv_sec; 561 switch (l_times->tv_nsec) 562 { 563 case LINUX_UTIME_OMIT: 564 times->tv_nsec = UTIME_OMIT; 565 break; 566 case LINUX_UTIME_NOW: 567 times->tv_nsec = UTIME_NOW; 568 break; 569 default: 570 times->tv_nsec = l_times->tv_nsec; 571 } 572 573 return (0); 574 } 575 576 static int 577 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 578 struct timespec *timesp, int lflags) 579 { 580 int dfd, flags = 0; 581 582 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 583 584 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 585 return (EINVAL); 586 587 if (timesp != NULL) { 588 /* This breaks POSIX, but is what the Linux kernel does 589 * _on purpose_ (documented in the man page for utimensat(2)), 590 * so we must follow that behaviour. */ 591 if (timesp[0].tv_nsec == UTIME_OMIT && 592 timesp[1].tv_nsec == UTIME_OMIT) 593 return (0); 594 } 595 596 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 597 flags |= AT_SYMLINK_NOFOLLOW; 598 if (lflags & LINUX_AT_EMPTY_PATH) 599 flags |= AT_EMPTY_PATH; 600 601 if (pathname != NULL) 602 return (kern_utimensat(td, dfd, pathname, 603 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 604 605 if (lflags != 0) 606 return (EINVAL); 607 608 return (kern_futimens(td, dfd, timesp, UIO_SYSSPACE)); 609 } 610 611 int 612 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 613 { 614 struct l_timespec l_times[2]; 615 struct timespec times[2], *timesp; 616 int error; 617 618 if (args->times != NULL) { 619 error = copyin(args->times, l_times, sizeof(l_times)); 620 if (error != 0) 621 return (error); 622 623 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 624 if (error != 0) 625 return (error); 626 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 627 if (error != 0) 628 return (error); 629 timesp = times; 630 } else 631 timesp = NULL; 632 633 return (linux_common_utimensat(td, args->dfd, args->pathname, 634 timesp, args->flags)); 635 } 636 637 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 638 static int 639 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 640 { 641 642 /* Zero out the padding in compat mode. */ 643 l_times->tv_nsec &= 0xFFFFFFFFUL; 644 645 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 646 l_times->tv_nsec != LINUX_UTIME_NOW && 647 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 648 return (EINVAL); 649 650 times->tv_sec = l_times->tv_sec; 651 switch (l_times->tv_nsec) 652 { 653 case LINUX_UTIME_OMIT: 654 times->tv_nsec = UTIME_OMIT; 655 break; 656 case LINUX_UTIME_NOW: 657 times->tv_nsec = UTIME_NOW; 658 break; 659 default: 660 times->tv_nsec = l_times->tv_nsec; 661 } 662 663 return (0); 664 } 665 666 int 667 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 668 { 669 struct l_timespec64 l_times[2]; 670 struct timespec times[2], *timesp; 671 int error; 672 673 if (args->times64 != NULL) { 674 error = copyin(args->times64, l_times, sizeof(l_times)); 675 if (error != 0) 676 return (error); 677 678 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 679 if (error != 0) 680 return (error); 681 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 682 if (error != 0) 683 return (error); 684 timesp = times; 685 } else 686 timesp = NULL; 687 688 return (linux_common_utimensat(td, args->dfd, args->pathname, 689 timesp, args->flags)); 690 } 691 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 692 693 #ifdef LINUX_LEGACY_SYSCALLS 694 int 695 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 696 { 697 l_timeval ltv[2]; 698 struct timeval tv[2], *tvp = NULL; 699 int error, dfd; 700 701 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 702 703 if (args->utimes != NULL) { 704 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 705 return (error); 706 tv[0].tv_sec = ltv[0].tv_sec; 707 tv[0].tv_usec = ltv[0].tv_usec; 708 tv[1].tv_sec = ltv[1].tv_sec; 709 tv[1].tv_usec = ltv[1].tv_usec; 710 tvp = tv; 711 } 712 713 return (kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 714 tvp, UIO_SYSSPACE)); 715 } 716 #endif 717 718 static int 719 linux_common_wait(struct thread *td, idtype_t idtype, int id, int *statusp, 720 int options, void *rup, l_siginfo_t *infop) 721 { 722 l_siginfo_t lsi; 723 siginfo_t siginfo; 724 struct __wrusage wru; 725 int error, status, tmpstat, sig; 726 727 error = kern_wait6(td, idtype, id, &status, options, 728 rup != NULL ? &wru : NULL, &siginfo); 729 730 if (error == 0 && statusp) { 731 tmpstat = status & 0xffff; 732 if (WIFSIGNALED(tmpstat)) { 733 tmpstat = (tmpstat & 0xffffff80) | 734 bsd_to_linux_signal(WTERMSIG(tmpstat)); 735 } else if (WIFSTOPPED(tmpstat)) { 736 tmpstat = (tmpstat & 0xffff00ff) | 737 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 738 #if defined(__aarch64__) || (defined(__amd64__) && !defined(COMPAT_LINUX32)) 739 if (WSTOPSIG(status) == SIGTRAP) { 740 tmpstat = linux_ptrace_status(td, 741 siginfo.si_pid, tmpstat); 742 } 743 #endif 744 } else if (WIFCONTINUED(tmpstat)) { 745 tmpstat = 0xffff; 746 } 747 error = copyout(&tmpstat, statusp, sizeof(int)); 748 } 749 if (error == 0 && rup != NULL) 750 error = linux_copyout_rusage(&wru.wru_self, rup); 751 if (error == 0 && infop != NULL && td->td_retval[0] != 0) { 752 sig = bsd_to_linux_signal(siginfo.si_signo); 753 memset(&lsi, 0, sizeof(lsi)); 754 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 755 error = copyout(&lsi, infop, sizeof(lsi)); 756 } 757 758 return (error); 759 } 760 761 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 762 int 763 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 764 { 765 struct linux_wait4_args wait4_args = { 766 .pid = args->pid, 767 .status = args->status, 768 .options = args->options, 769 .rusage = NULL, 770 }; 771 772 return (linux_wait4(td, &wait4_args)); 773 } 774 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 775 776 int 777 linux_wait4(struct thread *td, struct linux_wait4_args *args) 778 { 779 struct proc *p; 780 int options, id, idtype; 781 782 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 783 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 784 return (EINVAL); 785 786 /* -INT_MIN is not defined. */ 787 if (args->pid == INT_MIN) 788 return (ESRCH); 789 790 options = 0; 791 linux_to_bsd_waitopts(args->options, &options); 792 793 /* 794 * For backward compatibility we implicitly add flags WEXITED 795 * and WTRAPPED here. 796 */ 797 options |= WEXITED | WTRAPPED; 798 799 if (args->pid == WAIT_ANY) { 800 idtype = P_ALL; 801 id = 0; 802 } else if (args->pid < 0) { 803 idtype = P_PGID; 804 id = (id_t)-args->pid; 805 } else if (args->pid == 0) { 806 idtype = P_PGID; 807 p = td->td_proc; 808 PROC_LOCK(p); 809 id = p->p_pgid; 810 PROC_UNLOCK(p); 811 } else { 812 idtype = P_PID; 813 id = (id_t)args->pid; 814 } 815 816 return (linux_common_wait(td, idtype, id, args->status, options, 817 args->rusage, NULL)); 818 } 819 820 int 821 linux_waitid(struct thread *td, struct linux_waitid_args *args) 822 { 823 idtype_t idtype; 824 int error, options; 825 struct proc *p; 826 pid_t id; 827 828 if (args->options & ~(LINUX_WNOHANG | LINUX_WNOWAIT | LINUX_WEXITED | 829 LINUX_WSTOPPED | LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 830 return (EINVAL); 831 832 options = 0; 833 linux_to_bsd_waitopts(args->options, &options); 834 835 id = args->id; 836 switch (args->idtype) { 837 case LINUX_P_ALL: 838 idtype = P_ALL; 839 break; 840 case LINUX_P_PID: 841 if (args->id <= 0) 842 return (EINVAL); 843 idtype = P_PID; 844 break; 845 case LINUX_P_PGID: 846 if (linux_kernver(td) >= LINUX_KERNVER(5,4,0) && args->id == 0) { 847 p = td->td_proc; 848 PROC_LOCK(p); 849 id = p->p_pgid; 850 PROC_UNLOCK(p); 851 } else if (args->id <= 0) 852 return (EINVAL); 853 idtype = P_PGID; 854 break; 855 case LINUX_P_PIDFD: 856 LINUX_RATELIMIT_MSG("unsupported waitid P_PIDFD idtype"); 857 return (ENOSYS); 858 default: 859 return (EINVAL); 860 } 861 862 error = linux_common_wait(td, idtype, id, NULL, options, 863 args->rusage, args->info); 864 td->td_retval[0] = 0; 865 866 return (error); 867 } 868 869 #ifdef LINUX_LEGACY_SYSCALLS 870 int 871 linux_mknod(struct thread *td, struct linux_mknod_args *args) 872 { 873 int error; 874 875 switch (args->mode & S_IFMT) { 876 case S_IFIFO: 877 case S_IFSOCK: 878 error = kern_mkfifoat(td, AT_FDCWD, args->path, UIO_USERSPACE, 879 args->mode); 880 break; 881 882 case S_IFCHR: 883 case S_IFBLK: 884 error = kern_mknodat(td, AT_FDCWD, args->path, UIO_USERSPACE, 885 args->mode, linux_decode_dev(args->dev)); 886 break; 887 888 case S_IFDIR: 889 error = EPERM; 890 break; 891 892 case 0: 893 args->mode |= S_IFREG; 894 /* FALLTHROUGH */ 895 case S_IFREG: 896 error = kern_openat(td, AT_FDCWD, args->path, UIO_USERSPACE, 897 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 898 if (error == 0) 899 kern_close(td, td->td_retval[0]); 900 break; 901 902 default: 903 error = EINVAL; 904 break; 905 } 906 return (error); 907 } 908 #endif 909 910 int 911 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 912 { 913 int error, dfd; 914 915 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 916 917 switch (args->mode & S_IFMT) { 918 case S_IFIFO: 919 case S_IFSOCK: 920 error = kern_mkfifoat(td, dfd, args->filename, UIO_USERSPACE, 921 args->mode); 922 break; 923 924 case S_IFCHR: 925 case S_IFBLK: 926 error = kern_mknodat(td, dfd, args->filename, UIO_USERSPACE, 927 args->mode, linux_decode_dev(args->dev)); 928 break; 929 930 case S_IFDIR: 931 error = EPERM; 932 break; 933 934 case 0: 935 args->mode |= S_IFREG; 936 /* FALLTHROUGH */ 937 case S_IFREG: 938 error = kern_openat(td, dfd, args->filename, UIO_USERSPACE, 939 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 940 if (error == 0) 941 kern_close(td, td->td_retval[0]); 942 break; 943 944 default: 945 error = EINVAL; 946 break; 947 } 948 return (error); 949 } 950 951 /* 952 * UGH! This is just about the dumbest idea I've ever heard!! 953 */ 954 int 955 linux_personality(struct thread *td, struct linux_personality_args *args) 956 { 957 struct linux_pemuldata *pem; 958 struct proc *p = td->td_proc; 959 uint32_t old; 960 961 PROC_LOCK(p); 962 pem = pem_find(p); 963 old = pem->persona; 964 if (args->per != 0xffffffff) 965 pem->persona = args->per; 966 PROC_UNLOCK(p); 967 968 td->td_retval[0] = old; 969 return (0); 970 } 971 972 struct l_itimerval { 973 l_timeval it_interval; 974 l_timeval it_value; 975 }; 976 977 #define B2L_ITIMERVAL(bip, lip) \ 978 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 979 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 980 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 981 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 982 983 int 984 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 985 { 986 int error; 987 struct l_itimerval ls; 988 struct itimerval aitv, oitv; 989 990 if (uap->itv == NULL) { 991 uap->itv = uap->oitv; 992 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 993 } 994 995 error = copyin(uap->itv, &ls, sizeof(ls)); 996 if (error != 0) 997 return (error); 998 B2L_ITIMERVAL(&aitv, &ls); 999 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1000 if (error != 0 || uap->oitv == NULL) 1001 return (error); 1002 B2L_ITIMERVAL(&ls, &oitv); 1003 1004 return (copyout(&ls, uap->oitv, sizeof(ls))); 1005 } 1006 1007 int 1008 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1009 { 1010 int error; 1011 struct l_itimerval ls; 1012 struct itimerval aitv; 1013 1014 error = kern_getitimer(td, uap->which, &aitv); 1015 if (error != 0) 1016 return (error); 1017 B2L_ITIMERVAL(&ls, &aitv); 1018 return (copyout(&ls, uap->itv, sizeof(ls))); 1019 } 1020 1021 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1022 int 1023 linux_nice(struct thread *td, struct linux_nice_args *args) 1024 { 1025 1026 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1027 } 1028 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1029 1030 int 1031 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1032 { 1033 const int ngrp = args->gidsetsize; 1034 struct ucred *newcred, *oldcred; 1035 l_gid_t *linux_gidset; 1036 int error; 1037 struct proc *p; 1038 1039 if (ngrp < 0 || ngrp > ngroups_max) 1040 return (EINVAL); 1041 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1042 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1043 if (error) 1044 goto out; 1045 1046 newcred = crget(); 1047 crextend(newcred, ngrp); 1048 p = td->td_proc; 1049 PROC_LOCK(p); 1050 oldcred = crcopysafe(p, newcred); 1051 1052 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1053 PROC_UNLOCK(p); 1054 crfree(newcred); 1055 goto out; 1056 } 1057 1058 newcred->cr_ngroups = ngrp; 1059 for (int i = 0; i < ngrp; i++) 1060 newcred->cr_groups[i] = linux_gidset[i]; 1061 newcred->cr_flags |= CRED_FLAG_GROUPSET; 1062 1063 setsugid(p); 1064 proc_set_cred(p, newcred); 1065 PROC_UNLOCK(p); 1066 crfree(oldcred); 1067 error = 0; 1068 out: 1069 free(linux_gidset, M_LINUX); 1070 return (error); 1071 } 1072 1073 int 1074 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1075 { 1076 const struct ucred *const cred = td->td_ucred; 1077 l_gid_t *linux_gidset; 1078 int ngrp, error; 1079 1080 ngrp = args->gidsetsize; 1081 1082 if (ngrp == 0) { 1083 td->td_retval[0] = cred->cr_ngroups; 1084 return (0); 1085 } 1086 if (ngrp < cred->cr_ngroups) 1087 return (EINVAL); 1088 1089 ngrp = cred->cr_ngroups; 1090 1091 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1092 for (int i = 0; i < ngrp; ++i) 1093 linux_gidset[i] = cred->cr_groups[i]; 1094 1095 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1096 free(linux_gidset, M_LINUX); 1097 1098 if (error != 0) 1099 return (error); 1100 1101 td->td_retval[0] = ngrp; 1102 return (0); 1103 } 1104 1105 static bool 1106 linux_get_dummy_limit(struct thread *td, l_uint resource, struct rlimit *rlim) 1107 { 1108 ssize_t size; 1109 int res, error; 1110 1111 if (linux_dummy_rlimits == 0) 1112 return (false); 1113 1114 switch (resource) { 1115 case LINUX_RLIMIT_LOCKS: 1116 case LINUX_RLIMIT_RTTIME: 1117 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1118 rlim->rlim_max = LINUX_RLIM_INFINITY; 1119 return (true); 1120 case LINUX_RLIMIT_NICE: 1121 case LINUX_RLIMIT_RTPRIO: 1122 rlim->rlim_cur = 0; 1123 rlim->rlim_max = 0; 1124 return (true); 1125 case LINUX_RLIMIT_SIGPENDING: 1126 error = kernel_sysctlbyname(td, 1127 "kern.sigqueue.max_pending_per_proc", 1128 &res, &size, 0, 0, 0, 0); 1129 if (error != 0) 1130 return (false); 1131 rlim->rlim_cur = res; 1132 rlim->rlim_max = res; 1133 return (true); 1134 case LINUX_RLIMIT_MSGQUEUE: 1135 error = kernel_sysctlbyname(td, 1136 "kern.ipc.msgmnb", &res, &size, 0, 0, 0, 0); 1137 if (error != 0) 1138 return (false); 1139 rlim->rlim_cur = res; 1140 rlim->rlim_max = res; 1141 return (true); 1142 default: 1143 return (false); 1144 } 1145 } 1146 1147 int 1148 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1149 { 1150 struct rlimit bsd_rlim; 1151 struct l_rlimit rlim; 1152 u_int which; 1153 int error; 1154 1155 if (args->resource >= LINUX_RLIM_NLIMITS) 1156 return (EINVAL); 1157 1158 which = linux_to_bsd_resource[args->resource]; 1159 if (which == -1) 1160 return (EINVAL); 1161 1162 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1163 if (error) 1164 return (error); 1165 1166 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1167 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1168 return (kern_setrlimit(td, which, &bsd_rlim)); 1169 } 1170 1171 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1172 int 1173 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1174 { 1175 struct l_rlimit rlim; 1176 struct rlimit bsd_rlim; 1177 u_int which; 1178 1179 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1180 rlim.rlim_cur = bsd_rlim.rlim_cur; 1181 rlim.rlim_max = bsd_rlim.rlim_max; 1182 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1183 } 1184 1185 if (args->resource >= LINUX_RLIM_NLIMITS) 1186 return (EINVAL); 1187 1188 which = linux_to_bsd_resource[args->resource]; 1189 if (which == -1) 1190 return (EINVAL); 1191 1192 lim_rlimit(td, which, &bsd_rlim); 1193 1194 #ifdef COMPAT_LINUX32 1195 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1196 if (rlim.rlim_cur == UINT_MAX) 1197 rlim.rlim_cur = INT_MAX; 1198 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1199 if (rlim.rlim_max == UINT_MAX) 1200 rlim.rlim_max = INT_MAX; 1201 #else 1202 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1203 if (rlim.rlim_cur == ULONG_MAX) 1204 rlim.rlim_cur = LONG_MAX; 1205 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1206 if (rlim.rlim_max == ULONG_MAX) 1207 rlim.rlim_max = LONG_MAX; 1208 #endif 1209 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1210 } 1211 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1212 1213 int 1214 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1215 { 1216 struct l_rlimit rlim; 1217 struct rlimit bsd_rlim; 1218 u_int which; 1219 1220 if (linux_get_dummy_limit(td, args->resource, &bsd_rlim)) { 1221 rlim.rlim_cur = bsd_rlim.rlim_cur; 1222 rlim.rlim_max = bsd_rlim.rlim_max; 1223 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1224 } 1225 1226 if (args->resource >= LINUX_RLIM_NLIMITS) 1227 return (EINVAL); 1228 1229 which = linux_to_bsd_resource[args->resource]; 1230 if (which == -1) 1231 return (EINVAL); 1232 1233 lim_rlimit(td, which, &bsd_rlim); 1234 1235 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1236 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1237 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1238 } 1239 1240 int 1241 linux_sched_setscheduler(struct thread *td, 1242 struct linux_sched_setscheduler_args *args) 1243 { 1244 struct sched_param sched_param; 1245 struct thread *tdt; 1246 int error, policy; 1247 1248 switch (args->policy) { 1249 case LINUX_SCHED_OTHER: 1250 policy = SCHED_OTHER; 1251 break; 1252 case LINUX_SCHED_FIFO: 1253 policy = SCHED_FIFO; 1254 break; 1255 case LINUX_SCHED_RR: 1256 policy = SCHED_RR; 1257 break; 1258 default: 1259 return (EINVAL); 1260 } 1261 1262 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1263 if (error) 1264 return (error); 1265 1266 if (linux_map_sched_prio) { 1267 switch (policy) { 1268 case SCHED_OTHER: 1269 if (sched_param.sched_priority != 0) 1270 return (EINVAL); 1271 1272 sched_param.sched_priority = 1273 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1274 break; 1275 case SCHED_FIFO: 1276 case SCHED_RR: 1277 if (sched_param.sched_priority < 1 || 1278 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1279 return (EINVAL); 1280 1281 /* 1282 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1283 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1284 */ 1285 sched_param.sched_priority = 1286 (sched_param.sched_priority - 1) * 1287 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1288 (LINUX_MAX_RT_PRIO - 1); 1289 break; 1290 } 1291 } 1292 1293 tdt = linux_tdfind(td, args->pid, -1); 1294 if (tdt == NULL) 1295 return (ESRCH); 1296 1297 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1298 PROC_UNLOCK(tdt->td_proc); 1299 return (error); 1300 } 1301 1302 int 1303 linux_sched_getscheduler(struct thread *td, 1304 struct linux_sched_getscheduler_args *args) 1305 { 1306 struct thread *tdt; 1307 int error, policy; 1308 1309 tdt = linux_tdfind(td, args->pid, -1); 1310 if (tdt == NULL) 1311 return (ESRCH); 1312 1313 error = kern_sched_getscheduler(td, tdt, &policy); 1314 PROC_UNLOCK(tdt->td_proc); 1315 1316 switch (policy) { 1317 case SCHED_OTHER: 1318 td->td_retval[0] = LINUX_SCHED_OTHER; 1319 break; 1320 case SCHED_FIFO: 1321 td->td_retval[0] = LINUX_SCHED_FIFO; 1322 break; 1323 case SCHED_RR: 1324 td->td_retval[0] = LINUX_SCHED_RR; 1325 break; 1326 } 1327 return (error); 1328 } 1329 1330 int 1331 linux_sched_get_priority_max(struct thread *td, 1332 struct linux_sched_get_priority_max_args *args) 1333 { 1334 struct sched_get_priority_max_args bsd; 1335 1336 if (linux_map_sched_prio) { 1337 switch (args->policy) { 1338 case LINUX_SCHED_OTHER: 1339 td->td_retval[0] = 0; 1340 return (0); 1341 case LINUX_SCHED_FIFO: 1342 case LINUX_SCHED_RR: 1343 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1344 return (0); 1345 default: 1346 return (EINVAL); 1347 } 1348 } 1349 1350 switch (args->policy) { 1351 case LINUX_SCHED_OTHER: 1352 bsd.policy = SCHED_OTHER; 1353 break; 1354 case LINUX_SCHED_FIFO: 1355 bsd.policy = SCHED_FIFO; 1356 break; 1357 case LINUX_SCHED_RR: 1358 bsd.policy = SCHED_RR; 1359 break; 1360 default: 1361 return (EINVAL); 1362 } 1363 return (sys_sched_get_priority_max(td, &bsd)); 1364 } 1365 1366 int 1367 linux_sched_get_priority_min(struct thread *td, 1368 struct linux_sched_get_priority_min_args *args) 1369 { 1370 struct sched_get_priority_min_args bsd; 1371 1372 if (linux_map_sched_prio) { 1373 switch (args->policy) { 1374 case LINUX_SCHED_OTHER: 1375 td->td_retval[0] = 0; 1376 return (0); 1377 case LINUX_SCHED_FIFO: 1378 case LINUX_SCHED_RR: 1379 td->td_retval[0] = 1; 1380 return (0); 1381 default: 1382 return (EINVAL); 1383 } 1384 } 1385 1386 switch (args->policy) { 1387 case LINUX_SCHED_OTHER: 1388 bsd.policy = SCHED_OTHER; 1389 break; 1390 case LINUX_SCHED_FIFO: 1391 bsd.policy = SCHED_FIFO; 1392 break; 1393 case LINUX_SCHED_RR: 1394 bsd.policy = SCHED_RR; 1395 break; 1396 default: 1397 return (EINVAL); 1398 } 1399 return (sys_sched_get_priority_min(td, &bsd)); 1400 } 1401 1402 #define REBOOT_CAD_ON 0x89abcdef 1403 #define REBOOT_CAD_OFF 0 1404 #define REBOOT_HALT 0xcdef0123 1405 #define REBOOT_RESTART 0x01234567 1406 #define REBOOT_RESTART2 0xA1B2C3D4 1407 #define REBOOT_POWEROFF 0x4321FEDC 1408 #define REBOOT_MAGIC1 0xfee1dead 1409 #define REBOOT_MAGIC2 0x28121969 1410 #define REBOOT_MAGIC2A 0x05121996 1411 #define REBOOT_MAGIC2B 0x16041998 1412 1413 int 1414 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1415 { 1416 struct reboot_args bsd_args; 1417 1418 if (args->magic1 != REBOOT_MAGIC1) 1419 return (EINVAL); 1420 1421 switch (args->magic2) { 1422 case REBOOT_MAGIC2: 1423 case REBOOT_MAGIC2A: 1424 case REBOOT_MAGIC2B: 1425 break; 1426 default: 1427 return (EINVAL); 1428 } 1429 1430 switch (args->cmd) { 1431 case REBOOT_CAD_ON: 1432 case REBOOT_CAD_OFF: 1433 return (priv_check(td, PRIV_REBOOT)); 1434 case REBOOT_HALT: 1435 bsd_args.opt = RB_HALT; 1436 break; 1437 case REBOOT_RESTART: 1438 case REBOOT_RESTART2: 1439 bsd_args.opt = 0; 1440 break; 1441 case REBOOT_POWEROFF: 1442 bsd_args.opt = RB_POWEROFF; 1443 break; 1444 default: 1445 return (EINVAL); 1446 } 1447 return (sys_reboot(td, &bsd_args)); 1448 } 1449 1450 int 1451 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1452 { 1453 1454 td->td_retval[0] = td->td_proc->p_pid; 1455 1456 return (0); 1457 } 1458 1459 int 1460 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1461 { 1462 struct linux_emuldata *em; 1463 1464 em = em_find(td); 1465 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1466 1467 td->td_retval[0] = em->em_tid; 1468 1469 return (0); 1470 } 1471 1472 int 1473 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1474 { 1475 1476 td->td_retval[0] = kern_getppid(td); 1477 return (0); 1478 } 1479 1480 int 1481 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1482 { 1483 1484 td->td_retval[0] = td->td_ucred->cr_rgid; 1485 return (0); 1486 } 1487 1488 int 1489 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1490 { 1491 1492 td->td_retval[0] = td->td_ucred->cr_ruid; 1493 return (0); 1494 } 1495 1496 int 1497 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1498 { 1499 1500 return (kern_getsid(td, args->pid)); 1501 } 1502 1503 int 1504 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1505 { 1506 int error; 1507 1508 error = kern_getpriority(td, args->which, args->who); 1509 td->td_retval[0] = 20 - td->td_retval[0]; 1510 return (error); 1511 } 1512 1513 int 1514 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1515 { 1516 int name[2]; 1517 1518 name[0] = CTL_KERN; 1519 name[1] = KERN_HOSTNAME; 1520 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1521 args->len, 0, 0)); 1522 } 1523 1524 int 1525 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1526 { 1527 int name[2]; 1528 1529 name[0] = CTL_KERN; 1530 name[1] = KERN_NISDOMAINNAME; 1531 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1532 args->len, 0, 0)); 1533 } 1534 1535 int 1536 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1537 { 1538 1539 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1540 args->error_code); 1541 1542 /* 1543 * XXX: we should send a signal to the parent if 1544 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1545 * as it doesnt occur often. 1546 */ 1547 exit1(td, args->error_code, 0); 1548 /* NOTREACHED */ 1549 } 1550 1551 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1552 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1553 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1554 1555 struct l_user_cap_header { 1556 l_int version; 1557 l_int pid; 1558 }; 1559 1560 struct l_user_cap_data { 1561 l_int effective; 1562 l_int permitted; 1563 l_int inheritable; 1564 }; 1565 1566 int 1567 linux_capget(struct thread *td, struct linux_capget_args *uap) 1568 { 1569 struct l_user_cap_header luch; 1570 struct l_user_cap_data lucd[2]; 1571 int error, u32s; 1572 1573 if (uap->hdrp == NULL) 1574 return (EFAULT); 1575 1576 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1577 if (error != 0) 1578 return (error); 1579 1580 switch (luch.version) { 1581 case _LINUX_CAPABILITY_VERSION_1: 1582 u32s = 1; 1583 break; 1584 case _LINUX_CAPABILITY_VERSION_2: 1585 case _LINUX_CAPABILITY_VERSION_3: 1586 u32s = 2; 1587 break; 1588 default: 1589 luch.version = _LINUX_CAPABILITY_VERSION_1; 1590 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1591 if (error) 1592 return (error); 1593 return (EINVAL); 1594 } 1595 1596 if (luch.pid) 1597 return (EPERM); 1598 1599 if (uap->datap) { 1600 /* 1601 * The current implementation doesn't support setting 1602 * a capability (it's essentially a stub) so indicate 1603 * that no capabilities are currently set or available 1604 * to request. 1605 */ 1606 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1607 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1608 } 1609 1610 return (error); 1611 } 1612 1613 int 1614 linux_capset(struct thread *td, struct linux_capset_args *uap) 1615 { 1616 struct l_user_cap_header luch; 1617 struct l_user_cap_data lucd[2]; 1618 int error, i, u32s; 1619 1620 if (uap->hdrp == NULL || uap->datap == NULL) 1621 return (EFAULT); 1622 1623 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1624 if (error != 0) 1625 return (error); 1626 1627 switch (luch.version) { 1628 case _LINUX_CAPABILITY_VERSION_1: 1629 u32s = 1; 1630 break; 1631 case _LINUX_CAPABILITY_VERSION_2: 1632 case _LINUX_CAPABILITY_VERSION_3: 1633 u32s = 2; 1634 break; 1635 default: 1636 luch.version = _LINUX_CAPABILITY_VERSION_1; 1637 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1638 if (error) 1639 return (error); 1640 return (EINVAL); 1641 } 1642 1643 if (luch.pid) 1644 return (EPERM); 1645 1646 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1647 if (error != 0) 1648 return (error); 1649 1650 /* We currently don't support setting any capabilities. */ 1651 for (i = 0; i < u32s; i++) { 1652 if (lucd[i].effective || lucd[i].permitted || 1653 lucd[i].inheritable) { 1654 linux_msg(td, 1655 "capset[%d] effective=0x%x, permitted=0x%x, " 1656 "inheritable=0x%x is not implemented", i, 1657 (int)lucd[i].effective, (int)lucd[i].permitted, 1658 (int)lucd[i].inheritable); 1659 return (EPERM); 1660 } 1661 } 1662 1663 return (0); 1664 } 1665 1666 int 1667 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1668 { 1669 int error = 0, max_size, arg; 1670 struct proc *p = td->td_proc; 1671 char comm[LINUX_MAX_COMM_LEN]; 1672 int pdeath_signal, trace_state; 1673 1674 switch (args->option) { 1675 case LINUX_PR_SET_PDEATHSIG: 1676 if (!LINUX_SIG_VALID(args->arg2)) 1677 return (EINVAL); 1678 pdeath_signal = linux_to_bsd_signal(args->arg2); 1679 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1680 &pdeath_signal)); 1681 case LINUX_PR_GET_PDEATHSIG: 1682 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1683 &pdeath_signal); 1684 if (error != 0) 1685 return (error); 1686 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1687 return (copyout(&pdeath_signal, 1688 (void *)(register_t)args->arg2, 1689 sizeof(pdeath_signal))); 1690 /* 1691 * In Linux, this flag controls if set[gu]id processes can coredump. 1692 * There are additional semantics imposed on processes that cannot 1693 * coredump: 1694 * - Such processes can not be ptraced. 1695 * - There are some semantics around ownership of process-related files 1696 * in the /proc namespace. 1697 * 1698 * In FreeBSD, we can (and by default, do) disable setuid coredump 1699 * system-wide with 'sugid_coredump.' We control tracability on a 1700 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1701 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1702 * procctl is roughly analogous to Linux's DUMPABLE. 1703 * 1704 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1705 */ 1706 case LINUX_PR_GET_DUMPABLE: 1707 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1708 &trace_state); 1709 if (error != 0) 1710 return (error); 1711 td->td_retval[0] = (trace_state != -1); 1712 return (0); 1713 case LINUX_PR_SET_DUMPABLE: 1714 /* 1715 * It is only valid for userspace to set one of these two 1716 * flags, and only one at a time. 1717 */ 1718 switch (args->arg2) { 1719 case LINUX_SUID_DUMP_DISABLE: 1720 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1721 break; 1722 case LINUX_SUID_DUMP_USER: 1723 trace_state = PROC_TRACE_CTL_ENABLE; 1724 break; 1725 default: 1726 return (EINVAL); 1727 } 1728 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1729 &trace_state)); 1730 case LINUX_PR_GET_KEEPCAPS: 1731 /* 1732 * Indicate that we always clear the effective and 1733 * permitted capability sets when the user id becomes 1734 * non-zero (actually the capability sets are simply 1735 * always zero in the current implementation). 1736 */ 1737 td->td_retval[0] = 0; 1738 break; 1739 case LINUX_PR_SET_KEEPCAPS: 1740 /* 1741 * Ignore requests to keep the effective and permitted 1742 * capability sets when the user id becomes non-zero. 1743 */ 1744 break; 1745 case LINUX_PR_SET_NAME: 1746 /* 1747 * To be on the safe side we need to make sure to not 1748 * overflow the size a Linux program expects. We already 1749 * do this here in the copyin, so that we don't need to 1750 * check on copyout. 1751 */ 1752 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 1753 error = copyinstr((void *)(register_t)args->arg2, comm, 1754 max_size, NULL); 1755 1756 /* Linux silently truncates the name if it is too long. */ 1757 if (error == ENAMETOOLONG) { 1758 /* 1759 * XXX: copyinstr() isn't documented to populate the 1760 * array completely, so do a copyin() to be on the 1761 * safe side. This should be changed in case 1762 * copyinstr() is changed to guarantee this. 1763 */ 1764 error = copyin((void *)(register_t)args->arg2, comm, 1765 max_size - 1); 1766 comm[max_size - 1] = '\0'; 1767 } 1768 if (error) 1769 return (error); 1770 1771 PROC_LOCK(p); 1772 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 1773 PROC_UNLOCK(p); 1774 break; 1775 case LINUX_PR_GET_NAME: 1776 PROC_LOCK(p); 1777 strlcpy(comm, p->p_comm, sizeof(comm)); 1778 PROC_UNLOCK(p); 1779 error = copyout(comm, (void *)(register_t)args->arg2, 1780 strlen(comm) + 1); 1781 break; 1782 case LINUX_PR_GET_SECCOMP: 1783 case LINUX_PR_SET_SECCOMP: 1784 /* 1785 * Same as returned by Linux without CONFIG_SECCOMP enabled. 1786 */ 1787 error = EINVAL; 1788 break; 1789 case LINUX_PR_CAPBSET_READ: 1790 #if 0 1791 /* 1792 * This makes too much noise with Ubuntu Focal. 1793 */ 1794 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 1795 (int)args->arg2); 1796 #endif 1797 error = EINVAL; 1798 break; 1799 case LINUX_PR_SET_CHILD_SUBREAPER: 1800 if (args->arg2 == 0) { 1801 return (kern_procctl(td, P_PID, 0, PROC_REAP_RELEASE, 1802 NULL)); 1803 } 1804 1805 return (kern_procctl(td, P_PID, 0, PROC_REAP_ACQUIRE, 1806 NULL)); 1807 case LINUX_PR_GET_CHILD_SUBREAPER: { 1808 struct procctl_reaper_status rs; 1809 l_int val; 1810 1811 error = kern_procctl(td, P_PID, 0, PROC_REAP_STATUS, &rs); 1812 if (error != 0) 1813 return (error); 1814 val = rs.rs_reaper == p->p_pid ? 1 : 0; 1815 error = copyout(&val, (void *)(register_t)args->arg2, 1816 sizeof(val)); 1817 break; 1818 } 1819 case LINUX_PR_SET_NO_NEW_PRIVS: 1820 arg = args->arg2 == 1 ? 1821 PROC_NO_NEW_PRIVS_ENABLE : PROC_NO_NEW_PRIVS_DISABLE; 1822 error = kern_procctl(td, P_PID, p->p_pid, 1823 PROC_NO_NEW_PRIVS_CTL, &arg); 1824 break; 1825 case LINUX_PR_GET_NO_NEW_PRIVS: 1826 error = kern_procctl(td, P_PID, p->p_pid, 1827 PROC_NO_NEW_PRIVS_STATUS, &arg); 1828 if (error != 0) 1829 return (error); 1830 /* Linux returns the value as the syscall return */ 1831 td->td_retval[0] = arg == PROC_NO_NEW_PRIVS_ENABLE ? 1 : 0; 1832 break; 1833 case LINUX_PR_SET_PTRACER: 1834 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 1835 error = EINVAL; 1836 break; 1837 default: 1838 linux_msg(td, "unsupported prctl option %d", args->option); 1839 error = EINVAL; 1840 break; 1841 } 1842 1843 return (error); 1844 } 1845 1846 int 1847 linux_sched_setparam(struct thread *td, 1848 struct linux_sched_setparam_args *uap) 1849 { 1850 struct sched_param sched_param; 1851 struct thread *tdt; 1852 int error, policy; 1853 1854 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 1855 if (error) 1856 return (error); 1857 1858 tdt = linux_tdfind(td, uap->pid, -1); 1859 if (tdt == NULL) 1860 return (ESRCH); 1861 1862 if (linux_map_sched_prio) { 1863 error = kern_sched_getscheduler(td, tdt, &policy); 1864 if (error) 1865 goto out; 1866 1867 switch (policy) { 1868 case SCHED_OTHER: 1869 if (sched_param.sched_priority != 0) { 1870 error = EINVAL; 1871 goto out; 1872 } 1873 sched_param.sched_priority = 1874 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1875 break; 1876 case SCHED_FIFO: 1877 case SCHED_RR: 1878 if (sched_param.sched_priority < 1 || 1879 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 1880 error = EINVAL; 1881 goto out; 1882 } 1883 /* 1884 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1885 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1886 */ 1887 sched_param.sched_priority = 1888 (sched_param.sched_priority - 1) * 1889 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1890 (LINUX_MAX_RT_PRIO - 1); 1891 break; 1892 } 1893 } 1894 1895 error = kern_sched_setparam(td, tdt, &sched_param); 1896 out: PROC_UNLOCK(tdt->td_proc); 1897 return (error); 1898 } 1899 1900 int 1901 linux_sched_getparam(struct thread *td, 1902 struct linux_sched_getparam_args *uap) 1903 { 1904 struct sched_param sched_param; 1905 struct thread *tdt; 1906 int error, policy; 1907 1908 tdt = linux_tdfind(td, uap->pid, -1); 1909 if (tdt == NULL) 1910 return (ESRCH); 1911 1912 error = kern_sched_getparam(td, tdt, &sched_param); 1913 if (error) { 1914 PROC_UNLOCK(tdt->td_proc); 1915 return (error); 1916 } 1917 1918 if (linux_map_sched_prio) { 1919 error = kern_sched_getscheduler(td, tdt, &policy); 1920 PROC_UNLOCK(tdt->td_proc); 1921 if (error) 1922 return (error); 1923 1924 switch (policy) { 1925 case SCHED_OTHER: 1926 sched_param.sched_priority = 0; 1927 break; 1928 case SCHED_FIFO: 1929 case SCHED_RR: 1930 /* 1931 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 1932 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 1933 */ 1934 sched_param.sched_priority = 1935 (sched_param.sched_priority * 1936 (LINUX_MAX_RT_PRIO - 1) + 1937 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 1938 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 1939 break; 1940 } 1941 } else 1942 PROC_UNLOCK(tdt->td_proc); 1943 1944 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 1945 return (error); 1946 } 1947 1948 /* 1949 * Get affinity of a process. 1950 */ 1951 int 1952 linux_sched_getaffinity(struct thread *td, 1953 struct linux_sched_getaffinity_args *args) 1954 { 1955 struct thread *tdt; 1956 cpuset_t *mask; 1957 size_t size; 1958 int error; 1959 id_t tid; 1960 1961 tdt = linux_tdfind(td, args->pid, -1); 1962 if (tdt == NULL) 1963 return (ESRCH); 1964 tid = tdt->td_tid; 1965 PROC_UNLOCK(tdt->td_proc); 1966 1967 mask = malloc(sizeof(cpuset_t), M_LINUX, M_WAITOK | M_ZERO); 1968 size = min(args->len, sizeof(cpuset_t)); 1969 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 1970 tid, size, mask); 1971 if (error == ERANGE) 1972 error = EINVAL; 1973 if (error == 0) 1974 error = copyout(mask, args->user_mask_ptr, size); 1975 if (error == 0) 1976 td->td_retval[0] = size; 1977 free(mask, M_LINUX); 1978 return (error); 1979 } 1980 1981 /* 1982 * Set affinity of a process. 1983 */ 1984 int 1985 linux_sched_setaffinity(struct thread *td, 1986 struct linux_sched_setaffinity_args *args) 1987 { 1988 struct thread *tdt; 1989 cpuset_t *mask; 1990 int cpu, error; 1991 size_t len; 1992 id_t tid; 1993 1994 tdt = linux_tdfind(td, args->pid, -1); 1995 if (tdt == NULL) 1996 return (ESRCH); 1997 tid = tdt->td_tid; 1998 PROC_UNLOCK(tdt->td_proc); 1999 2000 len = min(args->len, sizeof(cpuset_t)); 2001 mask = malloc(sizeof(cpuset_t), M_TEMP, M_WAITOK | M_ZERO); 2002 error = copyin(args->user_mask_ptr, mask, len); 2003 if (error != 0) 2004 goto out; 2005 /* Linux ignore high bits */ 2006 CPU_FOREACH_ISSET(cpu, mask) 2007 if (cpu > mp_maxid) 2008 CPU_CLR(cpu, mask); 2009 2010 error = kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2011 tid, mask); 2012 if (error == EDEADLK) 2013 error = EINVAL; 2014 out: 2015 free(mask, M_TEMP); 2016 return (error); 2017 } 2018 2019 struct linux_rlimit64 { 2020 uint64_t rlim_cur; 2021 uint64_t rlim_max; 2022 }; 2023 2024 int 2025 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2026 { 2027 struct rlimit rlim, nrlim; 2028 struct linux_rlimit64 lrlim; 2029 struct proc *p; 2030 u_int which; 2031 int flags; 2032 int error; 2033 2034 if (args->new == NULL && args->old != NULL) { 2035 if (linux_get_dummy_limit(td, args->resource, &rlim)) { 2036 lrlim.rlim_cur = rlim.rlim_cur; 2037 lrlim.rlim_max = rlim.rlim_max; 2038 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2039 } 2040 } 2041 2042 if (args->resource >= LINUX_RLIM_NLIMITS) 2043 return (EINVAL); 2044 2045 which = linux_to_bsd_resource[args->resource]; 2046 if (which == -1) 2047 return (EINVAL); 2048 2049 if (args->new != NULL) { 2050 /* 2051 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2052 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2053 * as INFINITY so we do not need a conversion even. 2054 */ 2055 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2056 if (error != 0) 2057 return (error); 2058 } 2059 2060 flags = PGET_HOLD | PGET_NOTWEXIT; 2061 if (args->new != NULL) 2062 flags |= PGET_CANDEBUG; 2063 else 2064 flags |= PGET_CANSEE; 2065 if (args->pid == 0) { 2066 p = td->td_proc; 2067 PHOLD(p); 2068 } else { 2069 error = pget(args->pid, flags, &p); 2070 if (error != 0) 2071 return (error); 2072 } 2073 if (args->old != NULL) { 2074 PROC_LOCK(p); 2075 lim_rlimit_proc(p, which, &rlim); 2076 PROC_UNLOCK(p); 2077 if (rlim.rlim_cur == RLIM_INFINITY) 2078 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2079 else 2080 lrlim.rlim_cur = rlim.rlim_cur; 2081 if (rlim.rlim_max == RLIM_INFINITY) 2082 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2083 else 2084 lrlim.rlim_max = rlim.rlim_max; 2085 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2086 if (error != 0) 2087 goto out; 2088 } 2089 2090 if (args->new != NULL) 2091 error = kern_proc_setrlimit(td, p, which, &nrlim); 2092 2093 out: 2094 PRELE(p); 2095 return (error); 2096 } 2097 2098 int 2099 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2100 { 2101 struct timespec ts, *tsp; 2102 int error; 2103 2104 if (args->tsp != NULL) { 2105 error = linux_get_timespec(&ts, args->tsp); 2106 if (error != 0) 2107 return (error); 2108 tsp = &ts; 2109 } else 2110 tsp = NULL; 2111 2112 error = linux_common_pselect6(td, args->nfds, args->readfds, 2113 args->writefds, args->exceptfds, tsp, args->sig); 2114 2115 if (args->tsp != NULL) 2116 linux_put_timespec(&ts, args->tsp); 2117 return (error); 2118 } 2119 2120 static int 2121 linux_common_pselect6(struct thread *td, l_int nfds, l_fd_set *readfds, 2122 l_fd_set *writefds, l_fd_set *exceptfds, struct timespec *tsp, 2123 l_uintptr_t *sig) 2124 { 2125 struct timeval utv, tv0, tv1, *tvp; 2126 struct l_pselect6arg lpse6; 2127 sigset_t *ssp; 2128 sigset_t ss; 2129 int error; 2130 2131 ssp = NULL; 2132 if (sig != NULL) { 2133 error = copyin(sig, &lpse6, sizeof(lpse6)); 2134 if (error != 0) 2135 return (error); 2136 error = linux_copyin_sigset(td, PTRIN(lpse6.ss), 2137 lpse6.ss_len, &ss, &ssp); 2138 if (error != 0) 2139 return (error); 2140 } else 2141 ssp = NULL; 2142 2143 /* 2144 * Currently glibc changes nanosecond number to microsecond. 2145 * This mean losing precision but for now it is hardly seen. 2146 */ 2147 if (tsp != NULL) { 2148 TIMESPEC_TO_TIMEVAL(&utv, tsp); 2149 if (itimerfix(&utv)) 2150 return (EINVAL); 2151 2152 microtime(&tv0); 2153 tvp = &utv; 2154 } else 2155 tvp = NULL; 2156 2157 error = kern_pselect(td, nfds, readfds, writefds, 2158 exceptfds, tvp, ssp, LINUX_NFDBITS); 2159 2160 if (tsp != NULL) { 2161 /* 2162 * Compute how much time was left of the timeout, 2163 * by subtracting the current time and the time 2164 * before we started the call, and subtracting 2165 * that result from the user-supplied value. 2166 */ 2167 microtime(&tv1); 2168 timevalsub(&tv1, &tv0); 2169 timevalsub(&utv, &tv1); 2170 if (utv.tv_sec < 0) 2171 timevalclear(&utv); 2172 TIMEVAL_TO_TIMESPEC(&utv, tsp); 2173 } 2174 return (error); 2175 } 2176 2177 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2178 int 2179 linux_pselect6_time64(struct thread *td, 2180 struct linux_pselect6_time64_args *args) 2181 { 2182 struct timespec ts, *tsp; 2183 int error; 2184 2185 if (args->tsp != NULL) { 2186 error = linux_get_timespec64(&ts, args->tsp); 2187 if (error != 0) 2188 return (error); 2189 tsp = &ts; 2190 } else 2191 tsp = NULL; 2192 2193 error = linux_common_pselect6(td, args->nfds, args->readfds, 2194 args->writefds, args->exceptfds, tsp, args->sig); 2195 2196 if (args->tsp != NULL) 2197 linux_put_timespec64(&ts, args->tsp); 2198 return (error); 2199 } 2200 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2201 2202 int 2203 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2204 { 2205 struct timespec uts, *tsp; 2206 int error; 2207 2208 if (args->tsp != NULL) { 2209 error = linux_get_timespec(&uts, args->tsp); 2210 if (error != 0) 2211 return (error); 2212 tsp = &uts; 2213 } else 2214 tsp = NULL; 2215 2216 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2217 args->sset, args->ssize); 2218 if (error == 0 && args->tsp != NULL) 2219 error = linux_put_timespec(&uts, args->tsp); 2220 return (error); 2221 } 2222 2223 static int 2224 linux_common_ppoll(struct thread *td, struct pollfd *fds, uint32_t nfds, 2225 struct timespec *tsp, l_sigset_t *sset, l_size_t ssize) 2226 { 2227 struct timespec ts0, ts1; 2228 struct pollfd stackfds[32]; 2229 struct pollfd *kfds; 2230 sigset_t *ssp; 2231 sigset_t ss; 2232 int error; 2233 2234 if (kern_poll_maxfds(nfds)) 2235 return (EINVAL); 2236 if (sset != NULL) { 2237 error = linux_copyin_sigset(td, sset, ssize, &ss, &ssp); 2238 if (error != 0) 2239 return (error); 2240 } else 2241 ssp = NULL; 2242 if (tsp != NULL) 2243 nanotime(&ts0); 2244 2245 if (nfds > nitems(stackfds)) 2246 kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); 2247 else 2248 kfds = stackfds; 2249 error = linux_pollin(td, kfds, fds, nfds); 2250 if (error != 0) 2251 goto out; 2252 2253 error = kern_poll_kfds(td, kfds, nfds, tsp, ssp); 2254 if (error == 0) 2255 error = linux_pollout(td, kfds, fds, nfds); 2256 2257 if (error == 0 && tsp != NULL) { 2258 if (td->td_retval[0]) { 2259 nanotime(&ts1); 2260 timespecsub(&ts1, &ts0, &ts1); 2261 timespecsub(tsp, &ts1, tsp); 2262 if (tsp->tv_sec < 0) 2263 timespecclear(tsp); 2264 } else 2265 timespecclear(tsp); 2266 } 2267 2268 out: 2269 if (nfds > nitems(stackfds)) 2270 free(kfds, M_TEMP); 2271 return (error); 2272 } 2273 2274 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2275 int 2276 linux_ppoll_time64(struct thread *td, struct linux_ppoll_time64_args *args) 2277 { 2278 struct timespec uts, *tsp; 2279 int error; 2280 2281 if (args->tsp != NULL) { 2282 error = linux_get_timespec64(&uts, args->tsp); 2283 if (error != 0) 2284 return (error); 2285 tsp = &uts; 2286 } else 2287 tsp = NULL; 2288 error = linux_common_ppoll(td, args->fds, args->nfds, tsp, 2289 args->sset, args->ssize); 2290 if (error == 0 && args->tsp != NULL) 2291 error = linux_put_timespec64(&uts, args->tsp); 2292 return (error); 2293 } 2294 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 2295 2296 static int 2297 linux_pollin(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2298 { 2299 int error; 2300 u_int i; 2301 2302 error = copyin(ufds, fds, nfd * sizeof(*fds)); 2303 if (error != 0) 2304 return (error); 2305 2306 for (i = 0; i < nfd; i++) { 2307 if (fds->events != 0) 2308 linux_to_bsd_poll_events(td, fds->fd, 2309 fds->events, &fds->events); 2310 fds++; 2311 } 2312 return (0); 2313 } 2314 2315 static int 2316 linux_pollout(struct thread *td, struct pollfd *fds, struct pollfd *ufds, u_int nfd) 2317 { 2318 int error = 0; 2319 u_int i, n = 0; 2320 2321 for (i = 0; i < nfd; i++) { 2322 if (fds->revents != 0) { 2323 bsd_to_linux_poll_events(fds->revents, 2324 &fds->revents); 2325 n++; 2326 } 2327 error = copyout(&fds->revents, &ufds->revents, 2328 sizeof(ufds->revents)); 2329 if (error) 2330 return (error); 2331 fds++; 2332 ufds++; 2333 } 2334 td->td_retval[0] = n; 2335 return (0); 2336 } 2337 2338 static int 2339 linux_sched_rr_get_interval_common(struct thread *td, pid_t pid, 2340 struct timespec *ts) 2341 { 2342 struct thread *tdt; 2343 int error; 2344 2345 /* 2346 * According to man in case the invalid pid specified 2347 * EINVAL should be returned. 2348 */ 2349 if (pid < 0) 2350 return (EINVAL); 2351 2352 tdt = linux_tdfind(td, pid, -1); 2353 if (tdt == NULL) 2354 return (ESRCH); 2355 2356 error = kern_sched_rr_get_interval_td(td, tdt, ts); 2357 PROC_UNLOCK(tdt->td_proc); 2358 return (error); 2359 } 2360 2361 int 2362 linux_sched_rr_get_interval(struct thread *td, 2363 struct linux_sched_rr_get_interval_args *uap) 2364 { 2365 struct timespec ts; 2366 int error; 2367 2368 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2369 if (error != 0) 2370 return (error); 2371 return (linux_put_timespec(&ts, uap->interval)); 2372 } 2373 2374 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 2375 int 2376 linux_sched_rr_get_interval_time64(struct thread *td, 2377 struct linux_sched_rr_get_interval_time64_args *uap) 2378 { 2379 struct timespec ts; 2380 int error; 2381 2382 error = linux_sched_rr_get_interval_common(td, uap->pid, &ts); 2383 if (error != 0) 2384 return (error); 2385 return (linux_put_timespec64(&ts, uap->interval)); 2386 } 2387 #endif 2388 2389 /* 2390 * In case when the Linux thread is the initial thread in 2391 * the thread group thread id is equal to the process id. 2392 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2393 */ 2394 struct thread * 2395 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2396 { 2397 struct linux_emuldata *em; 2398 struct thread *tdt; 2399 struct proc *p; 2400 2401 tdt = NULL; 2402 if (tid == 0 || tid == td->td_tid) { 2403 if (pid != -1 && td->td_proc->p_pid != pid) 2404 return (NULL); 2405 PROC_LOCK(td->td_proc); 2406 return (td); 2407 } else if (tid > PID_MAX) 2408 return (tdfind(tid, pid)); 2409 2410 /* 2411 * Initial thread where the tid equal to the pid. 2412 */ 2413 p = pfind(tid); 2414 if (p != NULL) { 2415 if (SV_PROC_ABI(p) != SV_ABI_LINUX || 2416 (pid != -1 && tid != pid)) { 2417 /* 2418 * p is not a Linuxulator process. 2419 */ 2420 PROC_UNLOCK(p); 2421 return (NULL); 2422 } 2423 FOREACH_THREAD_IN_PROC(p, tdt) { 2424 em = em_find(tdt); 2425 if (tid == em->em_tid) 2426 return (tdt); 2427 } 2428 PROC_UNLOCK(p); 2429 } 2430 return (NULL); 2431 } 2432 2433 void 2434 linux_to_bsd_waitopts(int options, int *bsdopts) 2435 { 2436 2437 if (options & LINUX_WNOHANG) 2438 *bsdopts |= WNOHANG; 2439 if (options & LINUX_WUNTRACED) 2440 *bsdopts |= WUNTRACED; 2441 if (options & LINUX_WEXITED) 2442 *bsdopts |= WEXITED; 2443 if (options & LINUX_WCONTINUED) 2444 *bsdopts |= WCONTINUED; 2445 if (options & LINUX_WNOWAIT) 2446 *bsdopts |= WNOWAIT; 2447 2448 if (options & __WCLONE) 2449 *bsdopts |= WLINUXCLONE; 2450 } 2451 2452 int 2453 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2454 { 2455 struct uio uio; 2456 struct iovec iov; 2457 int error; 2458 2459 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2460 return (EINVAL); 2461 if (args->count > INT_MAX) 2462 args->count = INT_MAX; 2463 2464 iov.iov_base = args->buf; 2465 iov.iov_len = args->count; 2466 2467 uio.uio_iov = &iov; 2468 uio.uio_iovcnt = 1; 2469 uio.uio_resid = iov.iov_len; 2470 uio.uio_segflg = UIO_USERSPACE; 2471 uio.uio_rw = UIO_READ; 2472 uio.uio_td = td; 2473 2474 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2475 if (error == 0) 2476 td->td_retval[0] = args->count - uio.uio_resid; 2477 return (error); 2478 } 2479 2480 int 2481 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2482 { 2483 2484 /* Needs to be page-aligned */ 2485 if (args->start & PAGE_MASK) 2486 return (EINVAL); 2487 return (kern_mincore(td, args->start, args->len, args->vec)); 2488 } 2489 2490 #define SYSLOG_TAG "<6>" 2491 2492 int 2493 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2494 { 2495 char buf[128], *src, *dst; 2496 u_int seq; 2497 int buflen, error; 2498 2499 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2500 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2501 return (EINVAL); 2502 } 2503 2504 if (args->len < 6) { 2505 td->td_retval[0] = 0; 2506 return (0); 2507 } 2508 2509 error = priv_check(td, PRIV_MSGBUF); 2510 if (error) 2511 return (error); 2512 2513 mtx_lock(&msgbuf_lock); 2514 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2515 mtx_unlock(&msgbuf_lock); 2516 2517 dst = args->buf; 2518 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2519 /* The -1 is to skip the trailing '\0'. */ 2520 dst += sizeof(SYSLOG_TAG) - 1; 2521 2522 while (error == 0) { 2523 mtx_lock(&msgbuf_lock); 2524 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2525 mtx_unlock(&msgbuf_lock); 2526 2527 if (buflen == 0) 2528 break; 2529 2530 for (src = buf; src < buf + buflen && error == 0; src++) { 2531 if (*src == '\0') 2532 continue; 2533 2534 if (dst >= args->buf + args->len) 2535 goto out; 2536 2537 error = copyout(src, dst, 1); 2538 dst++; 2539 2540 if (*src == '\n' && *(src + 1) != '<' && 2541 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2542 error = copyout(&SYSLOG_TAG, 2543 dst, sizeof(SYSLOG_TAG)); 2544 dst += sizeof(SYSLOG_TAG) - 1; 2545 } 2546 } 2547 } 2548 out: 2549 td->td_retval[0] = dst - args->buf; 2550 return (error); 2551 } 2552 2553 int 2554 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2555 { 2556 int cpu, error, node; 2557 2558 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2559 error = 0; 2560 node = cpuid_to_pcpu[cpu]->pc_domain; 2561 2562 if (args->cpu != NULL) 2563 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2564 if (args->node != NULL) 2565 error = copyout(&node, args->node, sizeof(l_int)); 2566 return (error); 2567 } 2568 2569 #if defined(__i386__) || defined(__amd64__) 2570 int 2571 linux_poll(struct thread *td, struct linux_poll_args *args) 2572 { 2573 struct timespec ts, *tsp; 2574 2575 if (args->timeout != INFTIM) { 2576 if (args->timeout < 0) 2577 return (EINVAL); 2578 ts.tv_sec = args->timeout / 1000; 2579 ts.tv_nsec = (args->timeout % 1000) * 1000000; 2580 tsp = &ts; 2581 } else 2582 tsp = NULL; 2583 2584 return (linux_common_ppoll(td, args->fds, args->nfds, 2585 tsp, NULL, 0)); 2586 } 2587 #endif /* __i386__ || __amd64__ */ 2588 2589 int 2590 linux_seccomp(struct thread *td, struct linux_seccomp_args *args) 2591 { 2592 2593 switch (args->op) { 2594 case LINUX_SECCOMP_GET_ACTION_AVAIL: 2595 return (EOPNOTSUPP); 2596 default: 2597 /* 2598 * Ignore unknown operations, just like Linux kernel built 2599 * without CONFIG_SECCOMP. 2600 */ 2601 return (EINVAL); 2602 } 2603 } 2604 2605 /* 2606 * Custom version of exec_copyin_args(), to copy out argument and environment 2607 * strings from the old process address space into the temporary string buffer. 2608 * Based on freebsd32_exec_copyin_args. 2609 */ 2610 static int 2611 linux_exec_copyin_args(struct image_args *args, const char *fname, 2612 l_uintptr_t *argv, l_uintptr_t *envv) 2613 { 2614 char *argp, *envp; 2615 l_uintptr_t *ptr, arg; 2616 int error; 2617 2618 bzero(args, sizeof(*args)); 2619 if (argv == NULL) 2620 return (EFAULT); 2621 2622 /* 2623 * Allocate demand-paged memory for the file name, argument, and 2624 * environment strings. 2625 */ 2626 error = exec_alloc_args(args); 2627 if (error != 0) 2628 return (error); 2629 2630 /* 2631 * Copy the file name. 2632 */ 2633 error = exec_args_add_fname(args, fname, UIO_USERSPACE); 2634 if (error != 0) 2635 goto err_exit; 2636 2637 /* 2638 * extract arguments first 2639 */ 2640 ptr = argv; 2641 for (;;) { 2642 error = copyin(ptr++, &arg, sizeof(arg)); 2643 if (error) 2644 goto err_exit; 2645 if (arg == 0) 2646 break; 2647 argp = PTRIN(arg); 2648 error = exec_args_add_arg(args, argp, UIO_USERSPACE); 2649 if (error != 0) 2650 goto err_exit; 2651 } 2652 2653 /* 2654 * This comment is from Linux do_execveat_common: 2655 * When argv is empty, add an empty string ("") as argv[0] to 2656 * ensure confused userspace programs that start processing 2657 * from argv[1] won't end up walking envp. 2658 */ 2659 if (args->argc == 0 && 2660 (error = exec_args_add_arg(args, "", UIO_SYSSPACE) != 0)) 2661 goto err_exit; 2662 2663 /* 2664 * extract environment strings 2665 */ 2666 if (envv) { 2667 ptr = envv; 2668 for (;;) { 2669 error = copyin(ptr++, &arg, sizeof(arg)); 2670 if (error) 2671 goto err_exit; 2672 if (arg == 0) 2673 break; 2674 envp = PTRIN(arg); 2675 error = exec_args_add_env(args, envp, UIO_USERSPACE); 2676 if (error != 0) 2677 goto err_exit; 2678 } 2679 } 2680 2681 return (0); 2682 2683 err_exit: 2684 exec_free_args(args); 2685 return (error); 2686 } 2687 2688 int 2689 linux_execve(struct thread *td, struct linux_execve_args *args) 2690 { 2691 struct image_args eargs; 2692 int error; 2693 2694 LINUX_CTR(execve); 2695 2696 error = linux_exec_copyin_args(&eargs, args->path, args->argp, 2697 args->envp); 2698 if (error == 0) 2699 error = linux_common_execve(td, &eargs); 2700 AUDIT_SYSCALL_EXIT(error == EJUSTRETURN ? 0 : error, td); 2701 return (error); 2702 } 2703 2704 static void 2705 linux_up_rtprio_if(struct thread *td1, struct rtprio *rtp) 2706 { 2707 struct rtprio rtp2; 2708 2709 pri_to_rtp(td1, &rtp2); 2710 if (rtp2.type < rtp->type || 2711 (rtp2.type == rtp->type && 2712 rtp2.prio < rtp->prio)) { 2713 rtp->type = rtp2.type; 2714 rtp->prio = rtp2.prio; 2715 } 2716 } 2717 2718 #define LINUX_PRIO_DIVIDER RTP_PRIO_MAX / LINUX_IOPRIO_MAX 2719 2720 static int 2721 linux_rtprio2ioprio(struct rtprio *rtp) 2722 { 2723 int ioprio, prio; 2724 2725 switch (rtp->type) { 2726 case RTP_PRIO_IDLE: 2727 prio = RTP_PRIO_MIN; 2728 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_IDLE, prio); 2729 break; 2730 case RTP_PRIO_NORMAL: 2731 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2732 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_BE, prio); 2733 break; 2734 case RTP_PRIO_REALTIME: 2735 prio = rtp->prio / LINUX_PRIO_DIVIDER; 2736 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_RT, prio); 2737 break; 2738 default: 2739 prio = RTP_PRIO_MIN; 2740 ioprio = LINUX_IOPRIO_PRIO(LINUX_IOPRIO_CLASS_NONE, prio); 2741 break; 2742 } 2743 return (ioprio); 2744 } 2745 2746 static int 2747 linux_ioprio2rtprio(int ioprio, struct rtprio *rtp) 2748 { 2749 2750 switch (LINUX_IOPRIO_PRIO_CLASS(ioprio)) { 2751 case LINUX_IOPRIO_CLASS_IDLE: 2752 rtp->prio = RTP_PRIO_MIN; 2753 rtp->type = RTP_PRIO_IDLE; 2754 break; 2755 case LINUX_IOPRIO_CLASS_BE: 2756 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2757 rtp->type = RTP_PRIO_NORMAL; 2758 break; 2759 case LINUX_IOPRIO_CLASS_RT: 2760 rtp->prio = LINUX_IOPRIO_PRIO_DATA(ioprio) * LINUX_PRIO_DIVIDER; 2761 rtp->type = RTP_PRIO_REALTIME; 2762 break; 2763 default: 2764 return (EINVAL); 2765 } 2766 return (0); 2767 } 2768 #undef LINUX_PRIO_DIVIDER 2769 2770 int 2771 linux_ioprio_get(struct thread *td, struct linux_ioprio_get_args *args) 2772 { 2773 struct thread *td1; 2774 struct rtprio rtp; 2775 struct pgrp *pg; 2776 struct proc *p; 2777 int error, found; 2778 2779 p = NULL; 2780 td1 = NULL; 2781 error = 0; 2782 found = 0; 2783 rtp.type = RTP_PRIO_IDLE; 2784 rtp.prio = RTP_PRIO_MAX; 2785 switch (args->which) { 2786 case LINUX_IOPRIO_WHO_PROCESS: 2787 if (args->who == 0) { 2788 td1 = td; 2789 p = td1->td_proc; 2790 PROC_LOCK(p); 2791 } else if (args->who > PID_MAX) { 2792 td1 = linux_tdfind(td, args->who, -1); 2793 if (td1 != NULL) 2794 p = td1->td_proc; 2795 } else 2796 p = pfind(args->who); 2797 if (p == NULL) 2798 return (ESRCH); 2799 if ((error = p_cansee(td, p))) { 2800 PROC_UNLOCK(p); 2801 break; 2802 } 2803 if (td1 != NULL) { 2804 pri_to_rtp(td1, &rtp); 2805 } else { 2806 FOREACH_THREAD_IN_PROC(p, td1) { 2807 linux_up_rtprio_if(td1, &rtp); 2808 } 2809 } 2810 found++; 2811 PROC_UNLOCK(p); 2812 break; 2813 case LINUX_IOPRIO_WHO_PGRP: 2814 sx_slock(&proctree_lock); 2815 if (args->who == 0) { 2816 pg = td->td_proc->p_pgrp; 2817 PGRP_LOCK(pg); 2818 } else { 2819 pg = pgfind(args->who); 2820 if (pg == NULL) { 2821 sx_sunlock(&proctree_lock); 2822 error = ESRCH; 2823 break; 2824 } 2825 } 2826 sx_sunlock(&proctree_lock); 2827 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2828 PROC_LOCK(p); 2829 if (p->p_state == PRS_NORMAL && 2830 p_cansee(td, p) == 0) { 2831 FOREACH_THREAD_IN_PROC(p, td1) { 2832 linux_up_rtprio_if(td1, &rtp); 2833 found++; 2834 } 2835 } 2836 PROC_UNLOCK(p); 2837 } 2838 PGRP_UNLOCK(pg); 2839 break; 2840 case LINUX_IOPRIO_WHO_USER: 2841 if (args->who == 0) 2842 args->who = td->td_ucred->cr_uid; 2843 sx_slock(&allproc_lock); 2844 FOREACH_PROC_IN_SYSTEM(p) { 2845 PROC_LOCK(p); 2846 if (p->p_state == PRS_NORMAL && 2847 p->p_ucred->cr_uid == args->who && 2848 p_cansee(td, p) == 0) { 2849 FOREACH_THREAD_IN_PROC(p, td1) { 2850 linux_up_rtprio_if(td1, &rtp); 2851 found++; 2852 } 2853 } 2854 PROC_UNLOCK(p); 2855 } 2856 sx_sunlock(&allproc_lock); 2857 break; 2858 default: 2859 error = EINVAL; 2860 break; 2861 } 2862 if (error == 0) { 2863 if (found != 0) 2864 td->td_retval[0] = linux_rtprio2ioprio(&rtp); 2865 else 2866 error = ESRCH; 2867 } 2868 return (error); 2869 } 2870 2871 int 2872 linux_ioprio_set(struct thread *td, struct linux_ioprio_set_args *args) 2873 { 2874 struct thread *td1; 2875 struct rtprio rtp; 2876 struct pgrp *pg; 2877 struct proc *p; 2878 int error; 2879 2880 if ((error = linux_ioprio2rtprio(args->ioprio, &rtp)) != 0) 2881 return (error); 2882 /* Attempts to set high priorities (REALTIME) require su privileges. */ 2883 if (RTP_PRIO_BASE(rtp.type) == RTP_PRIO_REALTIME && 2884 (error = priv_check(td, PRIV_SCHED_RTPRIO)) != 0) 2885 return (error); 2886 2887 p = NULL; 2888 td1 = NULL; 2889 switch (args->which) { 2890 case LINUX_IOPRIO_WHO_PROCESS: 2891 if (args->who == 0) { 2892 td1 = td; 2893 p = td1->td_proc; 2894 PROC_LOCK(p); 2895 } else if (args->who > PID_MAX) { 2896 td1 = linux_tdfind(td, args->who, -1); 2897 if (td1 != NULL) 2898 p = td1->td_proc; 2899 } else 2900 p = pfind(args->who); 2901 if (p == NULL) 2902 return (ESRCH); 2903 if ((error = p_cansched(td, p))) { 2904 PROC_UNLOCK(p); 2905 break; 2906 } 2907 if (td1 != NULL) { 2908 error = rtp_to_pri(&rtp, td1); 2909 } else { 2910 FOREACH_THREAD_IN_PROC(p, td1) { 2911 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2912 break; 2913 } 2914 } 2915 PROC_UNLOCK(p); 2916 break; 2917 case LINUX_IOPRIO_WHO_PGRP: 2918 sx_slock(&proctree_lock); 2919 if (args->who == 0) { 2920 pg = td->td_proc->p_pgrp; 2921 PGRP_LOCK(pg); 2922 } else { 2923 pg = pgfind(args->who); 2924 if (pg == NULL) { 2925 sx_sunlock(&proctree_lock); 2926 error = ESRCH; 2927 break; 2928 } 2929 } 2930 sx_sunlock(&proctree_lock); 2931 LIST_FOREACH(p, &pg->pg_members, p_pglist) { 2932 PROC_LOCK(p); 2933 if (p->p_state == PRS_NORMAL && 2934 p_cansched(td, p) == 0) { 2935 FOREACH_THREAD_IN_PROC(p, td1) { 2936 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2937 break; 2938 } 2939 } 2940 PROC_UNLOCK(p); 2941 if (error != 0) 2942 break; 2943 } 2944 PGRP_UNLOCK(pg); 2945 break; 2946 case LINUX_IOPRIO_WHO_USER: 2947 if (args->who == 0) 2948 args->who = td->td_ucred->cr_uid; 2949 sx_slock(&allproc_lock); 2950 FOREACH_PROC_IN_SYSTEM(p) { 2951 PROC_LOCK(p); 2952 if (p->p_state == PRS_NORMAL && 2953 p->p_ucred->cr_uid == args->who && 2954 p_cansched(td, p) == 0) { 2955 FOREACH_THREAD_IN_PROC(p, td1) { 2956 if ((error = rtp_to_pri(&rtp, td1)) != 0) 2957 break; 2958 } 2959 } 2960 PROC_UNLOCK(p); 2961 if (error != 0) 2962 break; 2963 } 2964 sx_sunlock(&allproc_lock); 2965 break; 2966 default: 2967 error = EINVAL; 2968 break; 2969 } 2970 return (error); 2971 } 2972 2973 /* The only flag is O_NONBLOCK */ 2974 #define B2L_MQ_FLAGS(bflags) ((bflags) != 0 ? LINUX_O_NONBLOCK : 0) 2975 #define L2B_MQ_FLAGS(lflags) ((lflags) != 0 ? O_NONBLOCK : 0) 2976 2977 int 2978 linux_mq_open(struct thread *td, struct linux_mq_open_args *args) 2979 { 2980 struct mq_attr attr; 2981 int error, flags; 2982 2983 flags = linux_common_openflags(args->oflag); 2984 if ((flags & O_ACCMODE) == O_ACCMODE || (flags & O_EXEC) != 0) 2985 return (EINVAL); 2986 flags = FFLAGS(flags); 2987 if ((flags & O_CREAT) != 0 && args->attr != NULL) { 2988 error = copyin(args->attr, &attr, sizeof(attr)); 2989 if (error != 0) 2990 return (error); 2991 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 2992 } 2993 2994 return (kern_kmq_open(td, args->name, flags, args->mode, 2995 args->attr != NULL ? &attr : NULL)); 2996 } 2997 2998 int 2999 linux_mq_unlink(struct thread *td, struct linux_mq_unlink_args *args) 3000 { 3001 struct kmq_unlink_args bsd_args = { 3002 .path = PTRIN(args->name) 3003 }; 3004 3005 return (sys_kmq_unlink(td, &bsd_args)); 3006 } 3007 3008 int 3009 linux_mq_timedsend(struct thread *td, struct linux_mq_timedsend_args *args) 3010 { 3011 struct timespec ts, *abs_timeout; 3012 int error; 3013 3014 if (args->abs_timeout == NULL) 3015 abs_timeout = NULL; 3016 else { 3017 error = linux_get_timespec(&ts, args->abs_timeout); 3018 if (error != 0) 3019 return (error); 3020 abs_timeout = &ts; 3021 } 3022 3023 return (kern_kmq_timedsend(td, args->mqd, PTRIN(args->msg_ptr), 3024 args->msg_len, args->msg_prio, abs_timeout)); 3025 } 3026 3027 int 3028 linux_mq_timedreceive(struct thread *td, struct linux_mq_timedreceive_args *args) 3029 { 3030 struct timespec ts, *abs_timeout; 3031 int error; 3032 3033 if (args->abs_timeout == NULL) 3034 abs_timeout = NULL; 3035 else { 3036 error = linux_get_timespec(&ts, args->abs_timeout); 3037 if (error != 0) 3038 return (error); 3039 abs_timeout = &ts; 3040 } 3041 3042 return (kern_kmq_timedreceive(td, args->mqd, PTRIN(args->msg_ptr), 3043 args->msg_len, args->msg_prio, abs_timeout)); 3044 } 3045 3046 int 3047 linux_mq_notify(struct thread *td, struct linux_mq_notify_args *args) 3048 { 3049 struct sigevent ev, *evp; 3050 struct l_sigevent l_ev; 3051 int error; 3052 3053 if (args->sevp == NULL) 3054 evp = NULL; 3055 else { 3056 error = copyin(args->sevp, &l_ev, sizeof(l_ev)); 3057 if (error != 0) 3058 return (error); 3059 error = linux_convert_l_sigevent(&l_ev, &ev); 3060 if (error != 0) 3061 return (error); 3062 evp = &ev; 3063 } 3064 3065 return (kern_kmq_notify(td, args->mqd, evp)); 3066 } 3067 3068 int 3069 linux_mq_getsetattr(struct thread *td, struct linux_mq_getsetattr_args *args) 3070 { 3071 struct mq_attr attr, oattr; 3072 int error; 3073 3074 if (args->attr != NULL) { 3075 error = copyin(args->attr, &attr, sizeof(attr)); 3076 if (error != 0) 3077 return (error); 3078 attr.mq_flags = L2B_MQ_FLAGS(attr.mq_flags); 3079 } 3080 3081 error = kern_kmq_setattr(td, args->mqd, args->attr != NULL ? &attr : NULL, 3082 &oattr); 3083 if (error == 0 && args->oattr != NULL) { 3084 oattr.mq_flags = B2L_MQ_FLAGS(oattr.mq_flags); 3085 bzero(oattr.__reserved, sizeof(oattr.__reserved)); 3086 error = copyout(&oattr, args->oattr, sizeof(oattr)); 3087 } 3088 3089 return (error); 3090 } 3091 3092 int 3093 linux_kcmp(struct thread *td, struct linux_kcmp_args *args) 3094 { 3095 int type; 3096 3097 switch (args->type) { 3098 case LINUX_KCMP_FILE: 3099 type = KCMP_FILE; 3100 break; 3101 case LINUX_KCMP_FILES: 3102 type = KCMP_FILES; 3103 break; 3104 case LINUX_KCMP_SIGHAND: 3105 type = KCMP_SIGHAND; 3106 break; 3107 case LINUX_KCMP_VM: 3108 type = KCMP_VM; 3109 break; 3110 default: 3111 return (EINVAL); 3112 } 3113 3114 return (kern_kcmp(td, args->pid1, args->pid2, type, args->idx1, 3115 args->idx)); 3116 } 3117 3118 MODULE_DEPEND(linux, mqueuefs, 1, 1, 1); 3119