1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 int stclohz; /* Statistics clock frequency */ 103 104 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 105 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 106 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 107 RLIMIT_MEMLOCK, RLIMIT_AS 108 }; 109 110 struct l_sysinfo { 111 l_long uptime; /* Seconds since boot */ 112 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 113 #define LINUX_SYSINFO_LOADS_SCALE 65536 114 l_ulong totalram; /* Total usable main memory size */ 115 l_ulong freeram; /* Available memory size */ 116 l_ulong sharedram; /* Amount of shared memory */ 117 l_ulong bufferram; /* Memory used by buffers */ 118 l_ulong totalswap; /* Total swap space size */ 119 l_ulong freeswap; /* swap space still available */ 120 l_ushort procs; /* Number of current processes */ 121 l_ushort pads; 122 l_ulong totalhigh; 123 l_ulong freehigh; 124 l_uint mem_unit; 125 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 126 }; 127 128 struct l_pselect6arg { 129 l_uintptr_t ss; 130 l_size_t ss_len; 131 }; 132 133 static int linux_utimensat_lts_to_ts(struct l_timespec *, 134 struct timespec *); 135 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 136 static int linux_utimensat_lts64_to_ts(struct l_timespec64 *, 137 struct timespec *); 138 #endif 139 static int linux_common_utimensat(struct thread *, int, 140 const char *, struct timespec *, int); 141 142 int 143 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 144 { 145 struct l_sysinfo sysinfo; 146 int i, j; 147 struct timespec ts; 148 149 bzero(&sysinfo, sizeof(sysinfo)); 150 getnanouptime(&ts); 151 if (ts.tv_nsec != 0) 152 ts.tv_sec++; 153 sysinfo.uptime = ts.tv_sec; 154 155 /* Use the information from the mib to get our load averages */ 156 for (i = 0; i < 3; i++) 157 sysinfo.loads[i] = averunnable.ldavg[i] * 158 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 159 160 sysinfo.totalram = physmem * PAGE_SIZE; 161 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 162 163 /* 164 * sharedram counts pages allocated to named, swap-backed objects such 165 * as shared memory segments and tmpfs files. There is no cheap way to 166 * compute this, so just leave the field unpopulated. Linux itself only 167 * started setting this field in the 3.x timeframe. 168 */ 169 sysinfo.sharedram = 0; 170 sysinfo.bufferram = 0; 171 172 swap_pager_status(&i, &j); 173 sysinfo.totalswap = i * PAGE_SIZE; 174 sysinfo.freeswap = (i - j) * PAGE_SIZE; 175 176 sysinfo.procs = nprocs; 177 178 /* 179 * Platforms supported by the emulation layer do not have a notion of 180 * high memory. 181 */ 182 sysinfo.totalhigh = 0; 183 sysinfo.freehigh = 0; 184 185 sysinfo.mem_unit = 1; 186 187 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 188 } 189 190 #ifdef LINUX_LEGACY_SYSCALLS 191 int 192 linux_alarm(struct thread *td, struct linux_alarm_args *args) 193 { 194 struct itimerval it, old_it; 195 u_int secs; 196 int error; 197 198 secs = args->secs; 199 /* 200 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 201 * to match kern_setitimer()'s limit to avoid error from it. 202 * 203 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 204 * platforms. 205 */ 206 if (secs > INT32_MAX / 2) 207 secs = INT32_MAX / 2; 208 209 it.it_value.tv_sec = secs; 210 it.it_value.tv_usec = 0; 211 timevalclear(&it.it_interval); 212 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 213 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 214 215 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 216 old_it.it_value.tv_usec >= 500000) 217 old_it.it_value.tv_sec++; 218 td->td_retval[0] = old_it.it_value.tv_sec; 219 return (0); 220 } 221 #endif 222 223 int 224 linux_brk(struct thread *td, struct linux_brk_args *args) 225 { 226 struct vmspace *vm = td->td_proc->p_vmspace; 227 uintptr_t new, old; 228 229 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 230 new = (uintptr_t)args->dsend; 231 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 232 td->td_retval[0] = (register_t)new; 233 else 234 td->td_retval[0] = (register_t)old; 235 236 return (0); 237 } 238 239 #if defined(__i386__) 240 /* XXX: what about amd64/linux32? */ 241 242 int 243 linux_uselib(struct thread *td, struct linux_uselib_args *args) 244 { 245 struct nameidata ni; 246 struct vnode *vp; 247 struct exec *a_out; 248 vm_map_t map; 249 vm_map_entry_t entry; 250 struct vattr attr; 251 vm_offset_t vmaddr; 252 unsigned long file_offset; 253 unsigned long bss_size; 254 char *library; 255 ssize_t aresid; 256 int error; 257 bool locked, opened, textset; 258 259 a_out = NULL; 260 vp = NULL; 261 locked = false; 262 textset = false; 263 opened = false; 264 265 if (!LUSECONVPATH(td)) { 266 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 267 UIO_USERSPACE, args->library, td); 268 error = namei(&ni); 269 } else { 270 LCONVPATHEXIST(td, args->library, &library); 271 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 272 UIO_SYSSPACE, library, td); 273 error = namei(&ni); 274 LFREEPATH(library); 275 } 276 if (error) 277 goto cleanup; 278 279 vp = ni.ni_vp; 280 NDFREE(&ni, NDF_ONLY_PNBUF); 281 282 /* 283 * From here on down, we have a locked vnode that must be unlocked. 284 * XXX: The code below largely duplicates exec_check_permissions(). 285 */ 286 locked = true; 287 288 /* Executable? */ 289 error = VOP_GETATTR(vp, &attr, td->td_ucred); 290 if (error) 291 goto cleanup; 292 293 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 294 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 295 /* EACCESS is what exec(2) returns. */ 296 error = ENOEXEC; 297 goto cleanup; 298 } 299 300 /* Sensible size? */ 301 if (attr.va_size == 0) { 302 error = ENOEXEC; 303 goto cleanup; 304 } 305 306 /* Can we access it? */ 307 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 308 if (error) 309 goto cleanup; 310 311 /* 312 * XXX: This should use vn_open() so that it is properly authorized, 313 * and to reduce code redundancy all over the place here. 314 * XXX: Not really, it duplicates far more of exec_check_permissions() 315 * than vn_open(). 316 */ 317 #ifdef MAC 318 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 319 if (error) 320 goto cleanup; 321 #endif 322 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 323 if (error) 324 goto cleanup; 325 opened = true; 326 327 /* Pull in executable header into exec_map */ 328 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 329 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 330 if (error) 331 goto cleanup; 332 333 /* Is it a Linux binary ? */ 334 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 335 error = ENOEXEC; 336 goto cleanup; 337 } 338 339 /* 340 * While we are here, we should REALLY do some more checks 341 */ 342 343 /* Set file/virtual offset based on a.out variant. */ 344 switch ((int)(a_out->a_magic & 0xffff)) { 345 case 0413: /* ZMAGIC */ 346 file_offset = 1024; 347 break; 348 case 0314: /* QMAGIC */ 349 file_offset = 0; 350 break; 351 default: 352 error = ENOEXEC; 353 goto cleanup; 354 } 355 356 bss_size = round_page(a_out->a_bss); 357 358 /* Check various fields in header for validity/bounds. */ 359 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 360 error = ENOEXEC; 361 goto cleanup; 362 } 363 364 /* text + data can't exceed file size */ 365 if (a_out->a_data + a_out->a_text > attr.va_size) { 366 error = EFAULT; 367 goto cleanup; 368 } 369 370 /* 371 * text/data/bss must not exceed limits 372 * XXX - this is not complete. it should check current usage PLUS 373 * the resources needed by this library. 374 */ 375 PROC_LOCK(td->td_proc); 376 if (a_out->a_text > maxtsiz || 377 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 378 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 379 bss_size) != 0) { 380 PROC_UNLOCK(td->td_proc); 381 error = ENOMEM; 382 goto cleanup; 383 } 384 PROC_UNLOCK(td->td_proc); 385 386 /* 387 * Prevent more writers. 388 */ 389 error = VOP_SET_TEXT(vp); 390 if (error != 0) 391 goto cleanup; 392 textset = true; 393 394 /* 395 * Lock no longer needed 396 */ 397 locked = false; 398 VOP_UNLOCK(vp); 399 400 /* 401 * Check if file_offset page aligned. Currently we cannot handle 402 * misalinged file offsets, and so we read in the entire image 403 * (what a waste). 404 */ 405 if (file_offset & PAGE_MASK) { 406 /* Map text+data read/write/execute */ 407 408 /* a_entry is the load address and is page aligned */ 409 vmaddr = trunc_page(a_out->a_entry); 410 411 /* get anon user mapping, read+write+execute */ 412 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 413 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 414 VM_PROT_ALL, VM_PROT_ALL, 0); 415 if (error) 416 goto cleanup; 417 418 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 419 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 420 td->td_ucred, NOCRED, &aresid, td); 421 if (error != 0) 422 goto cleanup; 423 if (aresid != 0) { 424 error = ENOEXEC; 425 goto cleanup; 426 } 427 } else { 428 /* 429 * for QMAGIC, a_entry is 20 bytes beyond the load address 430 * to skip the executable header 431 */ 432 vmaddr = trunc_page(a_out->a_entry); 433 434 /* 435 * Map it all into the process's space as a single 436 * copy-on-write "data" segment. 437 */ 438 map = &td->td_proc->p_vmspace->vm_map; 439 error = vm_mmap(map, &vmaddr, 440 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 441 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 442 if (error) 443 goto cleanup; 444 vm_map_lock(map); 445 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 446 vm_map_unlock(map); 447 error = EDOOFUS; 448 goto cleanup; 449 } 450 entry->eflags |= MAP_ENTRY_VN_EXEC; 451 vm_map_unlock(map); 452 textset = false; 453 } 454 455 if (bss_size != 0) { 456 /* Calculate BSS start address */ 457 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 458 a_out->a_data; 459 460 /* allocate some 'anon' space */ 461 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 462 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 463 VM_PROT_ALL, 0); 464 if (error) 465 goto cleanup; 466 } 467 468 cleanup: 469 if (opened) { 470 if (locked) 471 VOP_UNLOCK(vp); 472 locked = false; 473 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 474 } 475 if (textset) { 476 if (!locked) { 477 locked = true; 478 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 479 } 480 VOP_UNSET_TEXT_CHECKED(vp); 481 } 482 if (locked) 483 VOP_UNLOCK(vp); 484 485 /* Release the temporary mapping. */ 486 if (a_out) 487 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 488 489 return (error); 490 } 491 492 #endif /* __i386__ */ 493 494 #ifdef LINUX_LEGACY_SYSCALLS 495 int 496 linux_select(struct thread *td, struct linux_select_args *args) 497 { 498 l_timeval ltv; 499 struct timeval tv0, tv1, utv, *tvp; 500 int error; 501 502 /* 503 * Store current time for computation of the amount of 504 * time left. 505 */ 506 if (args->timeout) { 507 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 508 goto select_out; 509 utv.tv_sec = ltv.tv_sec; 510 utv.tv_usec = ltv.tv_usec; 511 512 if (itimerfix(&utv)) { 513 /* 514 * The timeval was invalid. Convert it to something 515 * valid that will act as it does under Linux. 516 */ 517 utv.tv_sec += utv.tv_usec / 1000000; 518 utv.tv_usec %= 1000000; 519 if (utv.tv_usec < 0) { 520 utv.tv_sec -= 1; 521 utv.tv_usec += 1000000; 522 } 523 if (utv.tv_sec < 0) 524 timevalclear(&utv); 525 } 526 microtime(&tv0); 527 tvp = &utv; 528 } else 529 tvp = NULL; 530 531 error = kern_select(td, args->nfds, args->readfds, args->writefds, 532 args->exceptfds, tvp, LINUX_NFDBITS); 533 if (error) 534 goto select_out; 535 536 if (args->timeout) { 537 if (td->td_retval[0]) { 538 /* 539 * Compute how much time was left of the timeout, 540 * by subtracting the current time and the time 541 * before we started the call, and subtracting 542 * that result from the user-supplied value. 543 */ 544 microtime(&tv1); 545 timevalsub(&tv1, &tv0); 546 timevalsub(&utv, &tv1); 547 if (utv.tv_sec < 0) 548 timevalclear(&utv); 549 } else 550 timevalclear(&utv); 551 ltv.tv_sec = utv.tv_sec; 552 ltv.tv_usec = utv.tv_usec; 553 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 554 goto select_out; 555 } 556 557 select_out: 558 return (error); 559 } 560 #endif 561 562 int 563 linux_mremap(struct thread *td, struct linux_mremap_args *args) 564 { 565 uintptr_t addr; 566 size_t len; 567 int error = 0; 568 569 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 570 td->td_retval[0] = 0; 571 return (EINVAL); 572 } 573 574 /* 575 * Check for the page alignment. 576 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 577 */ 578 if (args->addr & PAGE_MASK) { 579 td->td_retval[0] = 0; 580 return (EINVAL); 581 } 582 583 args->new_len = round_page(args->new_len); 584 args->old_len = round_page(args->old_len); 585 586 if (args->new_len > args->old_len) { 587 td->td_retval[0] = 0; 588 return (ENOMEM); 589 } 590 591 if (args->new_len < args->old_len) { 592 addr = args->addr + args->new_len; 593 len = args->old_len - args->new_len; 594 error = kern_munmap(td, addr, len); 595 } 596 597 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 598 return (error); 599 } 600 601 #define LINUX_MS_ASYNC 0x0001 602 #define LINUX_MS_INVALIDATE 0x0002 603 #define LINUX_MS_SYNC 0x0004 604 605 int 606 linux_msync(struct thread *td, struct linux_msync_args *args) 607 { 608 609 return (kern_msync(td, args->addr, args->len, 610 args->fl & ~LINUX_MS_SYNC)); 611 } 612 613 #ifdef LINUX_LEGACY_SYSCALLS 614 int 615 linux_time(struct thread *td, struct linux_time_args *args) 616 { 617 struct timeval tv; 618 l_time_t tm; 619 int error; 620 621 microtime(&tv); 622 tm = tv.tv_sec; 623 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 624 return (error); 625 td->td_retval[0] = tm; 626 return (0); 627 } 628 #endif 629 630 struct l_times_argv { 631 l_clock_t tms_utime; 632 l_clock_t tms_stime; 633 l_clock_t tms_cutime; 634 l_clock_t tms_cstime; 635 }; 636 637 /* 638 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 639 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 640 * auxiliary vector entry. 641 */ 642 #define CLK_TCK 100 643 644 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 645 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 646 647 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 648 CONVNTCK(r) : CONVOTCK(r)) 649 650 int 651 linux_times(struct thread *td, struct linux_times_args *args) 652 { 653 struct timeval tv, utime, stime, cutime, cstime; 654 struct l_times_argv tms; 655 struct proc *p; 656 int error; 657 658 if (args->buf != NULL) { 659 p = td->td_proc; 660 PROC_LOCK(p); 661 PROC_STATLOCK(p); 662 calcru(p, &utime, &stime); 663 PROC_STATUNLOCK(p); 664 calccru(p, &cutime, &cstime); 665 PROC_UNLOCK(p); 666 667 tms.tms_utime = CONVTCK(utime); 668 tms.tms_stime = CONVTCK(stime); 669 670 tms.tms_cutime = CONVTCK(cutime); 671 tms.tms_cstime = CONVTCK(cstime); 672 673 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 674 return (error); 675 } 676 677 microuptime(&tv); 678 td->td_retval[0] = (int)CONVTCK(tv); 679 return (0); 680 } 681 682 int 683 linux_newuname(struct thread *td, struct linux_newuname_args *args) 684 { 685 struct l_new_utsname utsname; 686 char osname[LINUX_MAX_UTSNAME]; 687 char osrelease[LINUX_MAX_UTSNAME]; 688 char *p; 689 690 linux_get_osname(td, osname); 691 linux_get_osrelease(td, osrelease); 692 693 bzero(&utsname, sizeof(utsname)); 694 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 695 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 696 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 697 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 698 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 699 for (p = utsname.version; *p != '\0'; ++p) 700 if (*p == '\n') { 701 *p = '\0'; 702 break; 703 } 704 #if defined(__amd64__) 705 /* 706 * On amd64, Linux uname(2) needs to return "x86_64" 707 * for both 64-bit and 32-bit applications. On 32-bit, 708 * the string returned by getauxval(AT_PLATFORM) needs 709 * to remain "i686", though. 710 */ 711 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 712 #else 713 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 714 #endif 715 716 return (copyout(&utsname, args->buf, sizeof(utsname))); 717 } 718 719 struct l_utimbuf { 720 l_time_t l_actime; 721 l_time_t l_modtime; 722 }; 723 724 #ifdef LINUX_LEGACY_SYSCALLS 725 int 726 linux_utime(struct thread *td, struct linux_utime_args *args) 727 { 728 struct timeval tv[2], *tvp; 729 struct l_utimbuf lut; 730 char *fname; 731 int error; 732 733 if (args->times) { 734 if ((error = copyin(args->times, &lut, sizeof lut)) != 0) 735 return (error); 736 tv[0].tv_sec = lut.l_actime; 737 tv[0].tv_usec = 0; 738 tv[1].tv_sec = lut.l_modtime; 739 tv[1].tv_usec = 0; 740 tvp = tv; 741 } else 742 tvp = NULL; 743 744 if (!LUSECONVPATH(td)) { 745 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 746 tvp, UIO_SYSSPACE); 747 } else { 748 LCONVPATHEXIST(td, args->fname, &fname); 749 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 750 UIO_SYSSPACE); 751 LFREEPATH(fname); 752 } 753 return (error); 754 } 755 #endif 756 757 #ifdef LINUX_LEGACY_SYSCALLS 758 int 759 linux_utimes(struct thread *td, struct linux_utimes_args *args) 760 { 761 l_timeval ltv[2]; 762 struct timeval tv[2], *tvp = NULL; 763 char *fname; 764 int error; 765 766 if (args->tptr != NULL) { 767 if ((error = copyin(args->tptr, ltv, sizeof ltv)) != 0) 768 return (error); 769 tv[0].tv_sec = ltv[0].tv_sec; 770 tv[0].tv_usec = ltv[0].tv_usec; 771 tv[1].tv_sec = ltv[1].tv_sec; 772 tv[1].tv_usec = ltv[1].tv_usec; 773 tvp = tv; 774 } 775 776 if (!LUSECONVPATH(td)) { 777 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 778 tvp, UIO_SYSSPACE); 779 } else { 780 LCONVPATHEXIST(td, args->fname, &fname); 781 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 782 tvp, UIO_SYSSPACE); 783 LFREEPATH(fname); 784 } 785 return (error); 786 } 787 #endif 788 789 static int 790 linux_utimensat_lts_to_ts(struct l_timespec *l_times, struct timespec *times) 791 { 792 793 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 794 l_times->tv_nsec != LINUX_UTIME_NOW && 795 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 796 return (EINVAL); 797 798 times->tv_sec = l_times->tv_sec; 799 switch (l_times->tv_nsec) 800 { 801 case LINUX_UTIME_OMIT: 802 times->tv_nsec = UTIME_OMIT; 803 break; 804 case LINUX_UTIME_NOW: 805 times->tv_nsec = UTIME_NOW; 806 break; 807 default: 808 times->tv_nsec = l_times->tv_nsec; 809 } 810 811 return (0); 812 } 813 814 static int 815 linux_common_utimensat(struct thread *td, int ldfd, const char *pathname, 816 struct timespec *timesp, int lflags) 817 { 818 char *path = NULL; 819 int error, dfd, flags = 0; 820 821 dfd = (ldfd == LINUX_AT_FDCWD) ? AT_FDCWD : ldfd; 822 823 if (lflags & ~(LINUX_AT_SYMLINK_NOFOLLOW | LINUX_AT_EMPTY_PATH)) 824 return (EINVAL); 825 826 if (timesp != NULL) { 827 /* This breaks POSIX, but is what the Linux kernel does 828 * _on purpose_ (documented in the man page for utimensat(2)), 829 * so we must follow that behaviour. */ 830 if (timesp[0].tv_nsec == UTIME_OMIT && 831 timesp[1].tv_nsec == UTIME_OMIT) 832 return (0); 833 } 834 835 if (lflags & LINUX_AT_SYMLINK_NOFOLLOW) 836 flags |= AT_SYMLINK_NOFOLLOW; 837 if (lflags & LINUX_AT_EMPTY_PATH) 838 flags |= AT_EMPTY_PATH; 839 840 if (!LUSECONVPATH(td)) { 841 if (pathname != NULL) { 842 return (kern_utimensat(td, dfd, pathname, 843 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 844 } 845 } 846 847 if (pathname != NULL) 848 LCONVPATHEXIST_AT(td, pathname, &path, dfd); 849 else if (lflags != 0) 850 return (EINVAL); 851 852 if (path == NULL) 853 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 854 else { 855 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 856 UIO_SYSSPACE, flags); 857 LFREEPATH(path); 858 } 859 860 return (error); 861 } 862 863 int 864 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 865 { 866 struct l_timespec l_times[2]; 867 struct timespec times[2], *timesp; 868 int error; 869 870 if (args->times != NULL) { 871 error = copyin(args->times, l_times, sizeof(l_times)); 872 if (error != 0) 873 return (error); 874 875 error = linux_utimensat_lts_to_ts(&l_times[0], ×[0]); 876 if (error != 0) 877 return (error); 878 error = linux_utimensat_lts_to_ts(&l_times[1], ×[1]); 879 if (error != 0) 880 return (error); 881 timesp = times; 882 } else 883 timesp = NULL; 884 885 return (linux_common_utimensat(td, args->dfd, args->pathname, 886 timesp, args->flags)); 887 } 888 889 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 890 static int 891 linux_utimensat_lts64_to_ts(struct l_timespec64 *l_times, struct timespec *times) 892 { 893 894 if (l_times->tv_nsec != LINUX_UTIME_OMIT && 895 l_times->tv_nsec != LINUX_UTIME_NOW && 896 (l_times->tv_nsec < 0 || l_times->tv_nsec > 999999999)) 897 return (EINVAL); 898 899 times->tv_sec = l_times->tv_sec; 900 switch (l_times->tv_nsec) 901 { 902 case LINUX_UTIME_OMIT: 903 times->tv_nsec = UTIME_OMIT; 904 break; 905 case LINUX_UTIME_NOW: 906 times->tv_nsec = UTIME_NOW; 907 break; 908 default: 909 times->tv_nsec = l_times->tv_nsec; 910 } 911 912 return (0); 913 } 914 915 int 916 linux_utimensat_time64(struct thread *td, struct linux_utimensat_time64_args *args) 917 { 918 struct l_timespec64 l_times[2]; 919 struct timespec times[2], *timesp; 920 int error; 921 922 if (args->times64 != NULL) { 923 error = copyin(args->times64, l_times, sizeof(l_times)); 924 if (error != 0) 925 return (error); 926 927 error = linux_utimensat_lts64_to_ts(&l_times[0], ×[0]); 928 if (error != 0) 929 return (error); 930 error = linux_utimensat_lts64_to_ts(&l_times[1], ×[1]); 931 if (error != 0) 932 return (error); 933 timesp = times; 934 } else 935 timesp = NULL; 936 937 return (linux_common_utimensat(td, args->dfd, args->pathname, 938 timesp, args->flags)); 939 } 940 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 941 942 #ifdef LINUX_LEGACY_SYSCALLS 943 int 944 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 945 { 946 l_timeval ltv[2]; 947 struct timeval tv[2], *tvp = NULL; 948 char *fname; 949 int error, dfd; 950 951 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 952 953 if (args->utimes != NULL) { 954 if ((error = copyin(args->utimes, ltv, sizeof ltv)) != 0) 955 return (error); 956 tv[0].tv_sec = ltv[0].tv_sec; 957 tv[0].tv_usec = ltv[0].tv_usec; 958 tv[1].tv_sec = ltv[1].tv_sec; 959 tv[1].tv_usec = ltv[1].tv_usec; 960 tvp = tv; 961 } 962 963 if (!LUSECONVPATH(td)) { 964 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 965 tvp, UIO_SYSSPACE); 966 } else { 967 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 968 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, 969 tvp, UIO_SYSSPACE); 970 LFREEPATH(fname); 971 } 972 return (error); 973 } 974 #endif 975 976 static int 977 linux_common_wait(struct thread *td, int pid, int *statusp, 978 int options, struct __wrusage *wrup) 979 { 980 siginfo_t siginfo; 981 idtype_t idtype; 982 id_t id; 983 int error, status, tmpstat; 984 985 if (pid == WAIT_ANY) { 986 idtype = P_ALL; 987 id = 0; 988 } else if (pid < 0) { 989 idtype = P_PGID; 990 id = (id_t)-pid; 991 } else { 992 idtype = P_PID; 993 id = (id_t)pid; 994 } 995 996 /* 997 * For backward compatibility we implicitly add flags WEXITED 998 * and WTRAPPED here. 999 */ 1000 options |= WEXITED | WTRAPPED; 1001 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 1002 if (error) 1003 return (error); 1004 1005 if (statusp) { 1006 tmpstat = status & 0xffff; 1007 if (WIFSIGNALED(tmpstat)) { 1008 tmpstat = (tmpstat & 0xffffff80) | 1009 bsd_to_linux_signal(WTERMSIG(tmpstat)); 1010 } else if (WIFSTOPPED(tmpstat)) { 1011 tmpstat = (tmpstat & 0xffff00ff) | 1012 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 1013 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 1014 if (WSTOPSIG(status) == SIGTRAP) { 1015 tmpstat = linux_ptrace_status(td, 1016 siginfo.si_pid, tmpstat); 1017 } 1018 #endif 1019 } else if (WIFCONTINUED(tmpstat)) { 1020 tmpstat = 0xffff; 1021 } 1022 error = copyout(&tmpstat, statusp, sizeof(int)); 1023 } 1024 1025 return (error); 1026 } 1027 1028 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1029 int 1030 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 1031 { 1032 struct linux_wait4_args wait4_args; 1033 1034 wait4_args.pid = args->pid; 1035 wait4_args.status = args->status; 1036 wait4_args.options = args->options; 1037 wait4_args.rusage = NULL; 1038 1039 return (linux_wait4(td, &wait4_args)); 1040 } 1041 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1042 1043 int 1044 linux_wait4(struct thread *td, struct linux_wait4_args *args) 1045 { 1046 int error, options; 1047 struct __wrusage wru, *wrup; 1048 1049 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1050 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1051 return (EINVAL); 1052 1053 options = WEXITED; 1054 linux_to_bsd_waitopts(args->options, &options); 1055 1056 if (args->rusage != NULL) 1057 wrup = &wru; 1058 else 1059 wrup = NULL; 1060 error = linux_common_wait(td, args->pid, args->status, options, wrup); 1061 if (error != 0) 1062 return (error); 1063 if (args->rusage != NULL) 1064 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 1065 return (error); 1066 } 1067 1068 int 1069 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1070 { 1071 int status, options, sig; 1072 struct __wrusage wru; 1073 siginfo_t siginfo; 1074 l_siginfo_t lsi; 1075 idtype_t idtype; 1076 struct proc *p; 1077 int error; 1078 1079 options = 0; 1080 linux_to_bsd_waitopts(args->options, &options); 1081 1082 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1083 return (EINVAL); 1084 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1085 return (EINVAL); 1086 1087 switch (args->idtype) { 1088 case LINUX_P_ALL: 1089 idtype = P_ALL; 1090 break; 1091 case LINUX_P_PID: 1092 if (args->id <= 0) 1093 return (EINVAL); 1094 idtype = P_PID; 1095 break; 1096 case LINUX_P_PGID: 1097 if (args->id <= 0) 1098 return (EINVAL); 1099 idtype = P_PGID; 1100 break; 1101 default: 1102 return (EINVAL); 1103 } 1104 1105 error = kern_wait6(td, idtype, args->id, &status, options, 1106 &wru, &siginfo); 1107 if (error != 0) 1108 return (error); 1109 if (args->rusage != NULL) { 1110 error = linux_copyout_rusage(&wru.wru_children, 1111 args->rusage); 1112 if (error != 0) 1113 return (error); 1114 } 1115 if (args->info != NULL) { 1116 p = td->td_proc; 1117 bzero(&lsi, sizeof(lsi)); 1118 if (td->td_retval[0] != 0) { 1119 sig = bsd_to_linux_signal(siginfo.si_signo); 1120 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1121 } 1122 error = copyout(&lsi, args->info, sizeof(lsi)); 1123 } 1124 td->td_retval[0] = 0; 1125 1126 return (error); 1127 } 1128 1129 #ifdef LINUX_LEGACY_SYSCALLS 1130 int 1131 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1132 { 1133 char *path; 1134 int error; 1135 enum uio_seg seg; 1136 bool convpath; 1137 1138 convpath = LUSECONVPATH(td); 1139 if (!convpath) { 1140 path = args->path; 1141 seg = UIO_USERSPACE; 1142 } else { 1143 LCONVPATHCREAT(td, args->path, &path); 1144 seg = UIO_SYSSPACE; 1145 } 1146 1147 switch (args->mode & S_IFMT) { 1148 case S_IFIFO: 1149 case S_IFSOCK: 1150 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1151 args->mode); 1152 break; 1153 1154 case S_IFCHR: 1155 case S_IFBLK: 1156 error = kern_mknodat(td, AT_FDCWD, path, seg, 1157 args->mode, args->dev); 1158 break; 1159 1160 case S_IFDIR: 1161 error = EPERM; 1162 break; 1163 1164 case 0: 1165 args->mode |= S_IFREG; 1166 /* FALLTHROUGH */ 1167 case S_IFREG: 1168 error = kern_openat(td, AT_FDCWD, path, seg, 1169 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1170 if (error == 0) 1171 kern_close(td, td->td_retval[0]); 1172 break; 1173 1174 default: 1175 error = EINVAL; 1176 break; 1177 } 1178 if (convpath) 1179 LFREEPATH(path); 1180 return (error); 1181 } 1182 #endif 1183 1184 int 1185 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1186 { 1187 char *path; 1188 int error, dfd; 1189 enum uio_seg seg; 1190 bool convpath; 1191 1192 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1193 1194 convpath = LUSECONVPATH(td); 1195 if (!convpath) { 1196 path = __DECONST(char *, args->filename); 1197 seg = UIO_USERSPACE; 1198 } else { 1199 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1200 seg = UIO_SYSSPACE; 1201 } 1202 1203 switch (args->mode & S_IFMT) { 1204 case S_IFIFO: 1205 case S_IFSOCK: 1206 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1207 break; 1208 1209 case S_IFCHR: 1210 case S_IFBLK: 1211 error = kern_mknodat(td, dfd, path, seg, args->mode, 1212 args->dev); 1213 break; 1214 1215 case S_IFDIR: 1216 error = EPERM; 1217 break; 1218 1219 case 0: 1220 args->mode |= S_IFREG; 1221 /* FALLTHROUGH */ 1222 case S_IFREG: 1223 error = kern_openat(td, dfd, path, seg, 1224 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1225 if (error == 0) 1226 kern_close(td, td->td_retval[0]); 1227 break; 1228 1229 default: 1230 error = EINVAL; 1231 break; 1232 } 1233 if (convpath) 1234 LFREEPATH(path); 1235 return (error); 1236 } 1237 1238 /* 1239 * UGH! This is just about the dumbest idea I've ever heard!! 1240 */ 1241 int 1242 linux_personality(struct thread *td, struct linux_personality_args *args) 1243 { 1244 struct linux_pemuldata *pem; 1245 struct proc *p = td->td_proc; 1246 uint32_t old; 1247 1248 PROC_LOCK(p); 1249 pem = pem_find(p); 1250 old = pem->persona; 1251 if (args->per != 0xffffffff) 1252 pem->persona = args->per; 1253 PROC_UNLOCK(p); 1254 1255 td->td_retval[0] = old; 1256 return (0); 1257 } 1258 1259 struct l_itimerval { 1260 l_timeval it_interval; 1261 l_timeval it_value; 1262 }; 1263 1264 #define B2L_ITIMERVAL(bip, lip) \ 1265 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1266 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1267 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1268 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1269 1270 int 1271 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1272 { 1273 int error; 1274 struct l_itimerval ls; 1275 struct itimerval aitv, oitv; 1276 1277 if (uap->itv == NULL) { 1278 uap->itv = uap->oitv; 1279 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1280 } 1281 1282 error = copyin(uap->itv, &ls, sizeof(ls)); 1283 if (error != 0) 1284 return (error); 1285 B2L_ITIMERVAL(&aitv, &ls); 1286 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1287 if (error != 0 || uap->oitv == NULL) 1288 return (error); 1289 B2L_ITIMERVAL(&ls, &oitv); 1290 1291 return (copyout(&ls, uap->oitv, sizeof(ls))); 1292 } 1293 1294 int 1295 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1296 { 1297 int error; 1298 struct l_itimerval ls; 1299 struct itimerval aitv; 1300 1301 error = kern_getitimer(td, uap->which, &aitv); 1302 if (error != 0) 1303 return (error); 1304 B2L_ITIMERVAL(&ls, &aitv); 1305 return (copyout(&ls, uap->itv, sizeof(ls))); 1306 } 1307 1308 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1309 int 1310 linux_nice(struct thread *td, struct linux_nice_args *args) 1311 { 1312 1313 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1314 } 1315 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1316 1317 int 1318 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1319 { 1320 struct ucred *newcred, *oldcred; 1321 l_gid_t *linux_gidset; 1322 gid_t *bsd_gidset; 1323 int ngrp, error; 1324 struct proc *p; 1325 1326 ngrp = args->gidsetsize; 1327 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1328 return (EINVAL); 1329 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1330 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1331 if (error) 1332 goto out; 1333 newcred = crget(); 1334 crextend(newcred, ngrp + 1); 1335 p = td->td_proc; 1336 PROC_LOCK(p); 1337 oldcred = p->p_ucred; 1338 crcopy(newcred, oldcred); 1339 1340 /* 1341 * cr_groups[0] holds egid. Setting the whole set from 1342 * the supplied set will cause egid to be changed too. 1343 * Keep cr_groups[0] unchanged to prevent that. 1344 */ 1345 1346 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1347 PROC_UNLOCK(p); 1348 crfree(newcred); 1349 goto out; 1350 } 1351 1352 if (ngrp > 0) { 1353 newcred->cr_ngroups = ngrp + 1; 1354 1355 bsd_gidset = newcred->cr_groups; 1356 ngrp--; 1357 while (ngrp >= 0) { 1358 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1359 ngrp--; 1360 } 1361 } else 1362 newcred->cr_ngroups = 1; 1363 1364 setsugid(p); 1365 proc_set_cred(p, newcred); 1366 PROC_UNLOCK(p); 1367 crfree(oldcred); 1368 error = 0; 1369 out: 1370 free(linux_gidset, M_LINUX); 1371 return (error); 1372 } 1373 1374 int 1375 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1376 { 1377 struct ucred *cred; 1378 l_gid_t *linux_gidset; 1379 gid_t *bsd_gidset; 1380 int bsd_gidsetsz, ngrp, error; 1381 1382 cred = td->td_ucred; 1383 bsd_gidset = cred->cr_groups; 1384 bsd_gidsetsz = cred->cr_ngroups - 1; 1385 1386 /* 1387 * cr_groups[0] holds egid. Returning the whole set 1388 * here will cause a duplicate. Exclude cr_groups[0] 1389 * to prevent that. 1390 */ 1391 1392 if ((ngrp = args->gidsetsize) == 0) { 1393 td->td_retval[0] = bsd_gidsetsz; 1394 return (0); 1395 } 1396 1397 if (ngrp < bsd_gidsetsz) 1398 return (EINVAL); 1399 1400 ngrp = 0; 1401 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1402 M_LINUX, M_WAITOK); 1403 while (ngrp < bsd_gidsetsz) { 1404 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1405 ngrp++; 1406 } 1407 1408 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1409 free(linux_gidset, M_LINUX); 1410 if (error) 1411 return (error); 1412 1413 td->td_retval[0] = ngrp; 1414 return (0); 1415 } 1416 1417 static bool 1418 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1419 { 1420 1421 if (linux_dummy_rlimits == 0) 1422 return (false); 1423 1424 switch (resource) { 1425 case LINUX_RLIMIT_LOCKS: 1426 case LINUX_RLIMIT_SIGPENDING: 1427 case LINUX_RLIMIT_MSGQUEUE: 1428 case LINUX_RLIMIT_RTTIME: 1429 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1430 rlim->rlim_max = LINUX_RLIM_INFINITY; 1431 return (true); 1432 case LINUX_RLIMIT_NICE: 1433 case LINUX_RLIMIT_RTPRIO: 1434 rlim->rlim_cur = 0; 1435 rlim->rlim_max = 0; 1436 return (true); 1437 default: 1438 return (false); 1439 } 1440 } 1441 1442 int 1443 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1444 { 1445 struct rlimit bsd_rlim; 1446 struct l_rlimit rlim; 1447 u_int which; 1448 int error; 1449 1450 if (args->resource >= LINUX_RLIM_NLIMITS) 1451 return (EINVAL); 1452 1453 which = linux_to_bsd_resource[args->resource]; 1454 if (which == -1) 1455 return (EINVAL); 1456 1457 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1458 if (error) 1459 return (error); 1460 1461 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1462 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1463 return (kern_setrlimit(td, which, &bsd_rlim)); 1464 } 1465 1466 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1467 int 1468 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1469 { 1470 struct l_rlimit rlim; 1471 struct rlimit bsd_rlim; 1472 u_int which; 1473 1474 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1475 rlim.rlim_cur = bsd_rlim.rlim_cur; 1476 rlim.rlim_max = bsd_rlim.rlim_max; 1477 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1478 } 1479 1480 if (args->resource >= LINUX_RLIM_NLIMITS) 1481 return (EINVAL); 1482 1483 which = linux_to_bsd_resource[args->resource]; 1484 if (which == -1) 1485 return (EINVAL); 1486 1487 lim_rlimit(td, which, &bsd_rlim); 1488 1489 #ifdef COMPAT_LINUX32 1490 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1491 if (rlim.rlim_cur == UINT_MAX) 1492 rlim.rlim_cur = INT_MAX; 1493 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1494 if (rlim.rlim_max == UINT_MAX) 1495 rlim.rlim_max = INT_MAX; 1496 #else 1497 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1498 if (rlim.rlim_cur == ULONG_MAX) 1499 rlim.rlim_cur = LONG_MAX; 1500 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1501 if (rlim.rlim_max == ULONG_MAX) 1502 rlim.rlim_max = LONG_MAX; 1503 #endif 1504 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1505 } 1506 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1507 1508 int 1509 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1510 { 1511 struct l_rlimit rlim; 1512 struct rlimit bsd_rlim; 1513 u_int which; 1514 1515 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1516 rlim.rlim_cur = bsd_rlim.rlim_cur; 1517 rlim.rlim_max = bsd_rlim.rlim_max; 1518 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1519 } 1520 1521 if (args->resource >= LINUX_RLIM_NLIMITS) 1522 return (EINVAL); 1523 1524 which = linux_to_bsd_resource[args->resource]; 1525 if (which == -1) 1526 return (EINVAL); 1527 1528 lim_rlimit(td, which, &bsd_rlim); 1529 1530 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1531 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1532 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1533 } 1534 1535 int 1536 linux_sched_setscheduler(struct thread *td, 1537 struct linux_sched_setscheduler_args *args) 1538 { 1539 struct sched_param sched_param; 1540 struct thread *tdt; 1541 int error, policy; 1542 1543 switch (args->policy) { 1544 case LINUX_SCHED_OTHER: 1545 policy = SCHED_OTHER; 1546 break; 1547 case LINUX_SCHED_FIFO: 1548 policy = SCHED_FIFO; 1549 break; 1550 case LINUX_SCHED_RR: 1551 policy = SCHED_RR; 1552 break; 1553 default: 1554 return (EINVAL); 1555 } 1556 1557 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1558 if (error) 1559 return (error); 1560 1561 if (linux_map_sched_prio) { 1562 switch (policy) { 1563 case SCHED_OTHER: 1564 if (sched_param.sched_priority != 0) 1565 return (EINVAL); 1566 1567 sched_param.sched_priority = 1568 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1569 break; 1570 case SCHED_FIFO: 1571 case SCHED_RR: 1572 if (sched_param.sched_priority < 1 || 1573 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1574 return (EINVAL); 1575 1576 /* 1577 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1578 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1579 */ 1580 sched_param.sched_priority = 1581 (sched_param.sched_priority - 1) * 1582 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1583 (LINUX_MAX_RT_PRIO - 1); 1584 break; 1585 } 1586 } 1587 1588 tdt = linux_tdfind(td, args->pid, -1); 1589 if (tdt == NULL) 1590 return (ESRCH); 1591 1592 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1593 PROC_UNLOCK(tdt->td_proc); 1594 return (error); 1595 } 1596 1597 int 1598 linux_sched_getscheduler(struct thread *td, 1599 struct linux_sched_getscheduler_args *args) 1600 { 1601 struct thread *tdt; 1602 int error, policy; 1603 1604 tdt = linux_tdfind(td, args->pid, -1); 1605 if (tdt == NULL) 1606 return (ESRCH); 1607 1608 error = kern_sched_getscheduler(td, tdt, &policy); 1609 PROC_UNLOCK(tdt->td_proc); 1610 1611 switch (policy) { 1612 case SCHED_OTHER: 1613 td->td_retval[0] = LINUX_SCHED_OTHER; 1614 break; 1615 case SCHED_FIFO: 1616 td->td_retval[0] = LINUX_SCHED_FIFO; 1617 break; 1618 case SCHED_RR: 1619 td->td_retval[0] = LINUX_SCHED_RR; 1620 break; 1621 } 1622 return (error); 1623 } 1624 1625 int 1626 linux_sched_get_priority_max(struct thread *td, 1627 struct linux_sched_get_priority_max_args *args) 1628 { 1629 struct sched_get_priority_max_args bsd; 1630 1631 if (linux_map_sched_prio) { 1632 switch (args->policy) { 1633 case LINUX_SCHED_OTHER: 1634 td->td_retval[0] = 0; 1635 return (0); 1636 case LINUX_SCHED_FIFO: 1637 case LINUX_SCHED_RR: 1638 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1639 return (0); 1640 default: 1641 return (EINVAL); 1642 } 1643 } 1644 1645 switch (args->policy) { 1646 case LINUX_SCHED_OTHER: 1647 bsd.policy = SCHED_OTHER; 1648 break; 1649 case LINUX_SCHED_FIFO: 1650 bsd.policy = SCHED_FIFO; 1651 break; 1652 case LINUX_SCHED_RR: 1653 bsd.policy = SCHED_RR; 1654 break; 1655 default: 1656 return (EINVAL); 1657 } 1658 return (sys_sched_get_priority_max(td, &bsd)); 1659 } 1660 1661 int 1662 linux_sched_get_priority_min(struct thread *td, 1663 struct linux_sched_get_priority_min_args *args) 1664 { 1665 struct sched_get_priority_min_args bsd; 1666 1667 if (linux_map_sched_prio) { 1668 switch (args->policy) { 1669 case LINUX_SCHED_OTHER: 1670 td->td_retval[0] = 0; 1671 return (0); 1672 case LINUX_SCHED_FIFO: 1673 case LINUX_SCHED_RR: 1674 td->td_retval[0] = 1; 1675 return (0); 1676 default: 1677 return (EINVAL); 1678 } 1679 } 1680 1681 switch (args->policy) { 1682 case LINUX_SCHED_OTHER: 1683 bsd.policy = SCHED_OTHER; 1684 break; 1685 case LINUX_SCHED_FIFO: 1686 bsd.policy = SCHED_FIFO; 1687 break; 1688 case LINUX_SCHED_RR: 1689 bsd.policy = SCHED_RR; 1690 break; 1691 default: 1692 return (EINVAL); 1693 } 1694 return (sys_sched_get_priority_min(td, &bsd)); 1695 } 1696 1697 #define REBOOT_CAD_ON 0x89abcdef 1698 #define REBOOT_CAD_OFF 0 1699 #define REBOOT_HALT 0xcdef0123 1700 #define REBOOT_RESTART 0x01234567 1701 #define REBOOT_RESTART2 0xA1B2C3D4 1702 #define REBOOT_POWEROFF 0x4321FEDC 1703 #define REBOOT_MAGIC1 0xfee1dead 1704 #define REBOOT_MAGIC2 0x28121969 1705 #define REBOOT_MAGIC2A 0x05121996 1706 #define REBOOT_MAGIC2B 0x16041998 1707 1708 int 1709 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1710 { 1711 struct reboot_args bsd_args; 1712 1713 if (args->magic1 != REBOOT_MAGIC1) 1714 return (EINVAL); 1715 1716 switch (args->magic2) { 1717 case REBOOT_MAGIC2: 1718 case REBOOT_MAGIC2A: 1719 case REBOOT_MAGIC2B: 1720 break; 1721 default: 1722 return (EINVAL); 1723 } 1724 1725 switch (args->cmd) { 1726 case REBOOT_CAD_ON: 1727 case REBOOT_CAD_OFF: 1728 return (priv_check(td, PRIV_REBOOT)); 1729 case REBOOT_HALT: 1730 bsd_args.opt = RB_HALT; 1731 break; 1732 case REBOOT_RESTART: 1733 case REBOOT_RESTART2: 1734 bsd_args.opt = 0; 1735 break; 1736 case REBOOT_POWEROFF: 1737 bsd_args.opt = RB_POWEROFF; 1738 break; 1739 default: 1740 return (EINVAL); 1741 } 1742 return (sys_reboot(td, &bsd_args)); 1743 } 1744 1745 int 1746 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1747 { 1748 1749 td->td_retval[0] = td->td_proc->p_pid; 1750 1751 return (0); 1752 } 1753 1754 int 1755 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1756 { 1757 struct linux_emuldata *em; 1758 1759 em = em_find(td); 1760 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1761 1762 td->td_retval[0] = em->em_tid; 1763 1764 return (0); 1765 } 1766 1767 int 1768 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1769 { 1770 1771 td->td_retval[0] = kern_getppid(td); 1772 return (0); 1773 } 1774 1775 int 1776 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1777 { 1778 1779 td->td_retval[0] = td->td_ucred->cr_rgid; 1780 return (0); 1781 } 1782 1783 int 1784 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1785 { 1786 1787 td->td_retval[0] = td->td_ucred->cr_ruid; 1788 return (0); 1789 } 1790 1791 int 1792 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1793 { 1794 1795 return (kern_getsid(td, args->pid)); 1796 } 1797 1798 int 1799 linux_nosys(struct thread *td, struct nosys_args *ignore) 1800 { 1801 1802 return (ENOSYS); 1803 } 1804 1805 int 1806 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1807 { 1808 int error; 1809 1810 error = kern_getpriority(td, args->which, args->who); 1811 td->td_retval[0] = 20 - td->td_retval[0]; 1812 return (error); 1813 } 1814 1815 int 1816 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1817 { 1818 int name[2]; 1819 1820 name[0] = CTL_KERN; 1821 name[1] = KERN_HOSTNAME; 1822 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1823 args->len, 0, 0)); 1824 } 1825 1826 int 1827 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1828 { 1829 int name[2]; 1830 1831 name[0] = CTL_KERN; 1832 name[1] = KERN_NISDOMAINNAME; 1833 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1834 args->len, 0, 0)); 1835 } 1836 1837 int 1838 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1839 { 1840 1841 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1842 args->error_code); 1843 1844 /* 1845 * XXX: we should send a signal to the parent if 1846 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1847 * as it doesnt occur often. 1848 */ 1849 exit1(td, args->error_code, 0); 1850 /* NOTREACHED */ 1851 } 1852 1853 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1854 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1855 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1856 1857 struct l_user_cap_header { 1858 l_int version; 1859 l_int pid; 1860 }; 1861 1862 struct l_user_cap_data { 1863 l_int effective; 1864 l_int permitted; 1865 l_int inheritable; 1866 }; 1867 1868 int 1869 linux_capget(struct thread *td, struct linux_capget_args *uap) 1870 { 1871 struct l_user_cap_header luch; 1872 struct l_user_cap_data lucd[2]; 1873 int error, u32s; 1874 1875 if (uap->hdrp == NULL) 1876 return (EFAULT); 1877 1878 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1879 if (error != 0) 1880 return (error); 1881 1882 switch (luch.version) { 1883 case _LINUX_CAPABILITY_VERSION_1: 1884 u32s = 1; 1885 break; 1886 case _LINUX_CAPABILITY_VERSION_2: 1887 case _LINUX_CAPABILITY_VERSION_3: 1888 u32s = 2; 1889 break; 1890 default: 1891 luch.version = _LINUX_CAPABILITY_VERSION_1; 1892 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1893 if (error) 1894 return (error); 1895 return (EINVAL); 1896 } 1897 1898 if (luch.pid) 1899 return (EPERM); 1900 1901 if (uap->datap) { 1902 /* 1903 * The current implementation doesn't support setting 1904 * a capability (it's essentially a stub) so indicate 1905 * that no capabilities are currently set or available 1906 * to request. 1907 */ 1908 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1909 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1910 } 1911 1912 return (error); 1913 } 1914 1915 int 1916 linux_capset(struct thread *td, struct linux_capset_args *uap) 1917 { 1918 struct l_user_cap_header luch; 1919 struct l_user_cap_data lucd[2]; 1920 int error, i, u32s; 1921 1922 if (uap->hdrp == NULL || uap->datap == NULL) 1923 return (EFAULT); 1924 1925 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1926 if (error != 0) 1927 return (error); 1928 1929 switch (luch.version) { 1930 case _LINUX_CAPABILITY_VERSION_1: 1931 u32s = 1; 1932 break; 1933 case _LINUX_CAPABILITY_VERSION_2: 1934 case _LINUX_CAPABILITY_VERSION_3: 1935 u32s = 2; 1936 break; 1937 default: 1938 luch.version = _LINUX_CAPABILITY_VERSION_1; 1939 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1940 if (error) 1941 return (error); 1942 return (EINVAL); 1943 } 1944 1945 if (luch.pid) 1946 return (EPERM); 1947 1948 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1949 if (error != 0) 1950 return (error); 1951 1952 /* We currently don't support setting any capabilities. */ 1953 for (i = 0; i < u32s; i++) { 1954 if (lucd[i].effective || lucd[i].permitted || 1955 lucd[i].inheritable) { 1956 linux_msg(td, 1957 "capset[%d] effective=0x%x, permitted=0x%x, " 1958 "inheritable=0x%x is not implemented", i, 1959 (int)lucd[i].effective, (int)lucd[i].permitted, 1960 (int)lucd[i].inheritable); 1961 return (EPERM); 1962 } 1963 } 1964 1965 return (0); 1966 } 1967 1968 int 1969 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1970 { 1971 int error = 0, max_size; 1972 struct proc *p = td->td_proc; 1973 char comm[LINUX_MAX_COMM_LEN]; 1974 int pdeath_signal, trace_state; 1975 1976 switch (args->option) { 1977 case LINUX_PR_SET_PDEATHSIG: 1978 if (!LINUX_SIG_VALID(args->arg2)) 1979 return (EINVAL); 1980 pdeath_signal = linux_to_bsd_signal(args->arg2); 1981 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1982 &pdeath_signal)); 1983 case LINUX_PR_GET_PDEATHSIG: 1984 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1985 &pdeath_signal); 1986 if (error != 0) 1987 return (error); 1988 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1989 return (copyout(&pdeath_signal, 1990 (void *)(register_t)args->arg2, 1991 sizeof(pdeath_signal))); 1992 /* 1993 * In Linux, this flag controls if set[gu]id processes can coredump. 1994 * There are additional semantics imposed on processes that cannot 1995 * coredump: 1996 * - Such processes can not be ptraced. 1997 * - There are some semantics around ownership of process-related files 1998 * in the /proc namespace. 1999 * 2000 * In FreeBSD, we can (and by default, do) disable setuid coredump 2001 * system-wide with 'sugid_coredump.' We control tracability on a 2002 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 2003 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 2004 * procctl is roughly analogous to Linux's DUMPABLE. 2005 * 2006 * So, proxy these knobs to the corresponding PROC_TRACE setting. 2007 */ 2008 case LINUX_PR_GET_DUMPABLE: 2009 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 2010 &trace_state); 2011 if (error != 0) 2012 return (error); 2013 td->td_retval[0] = (trace_state != -1); 2014 return (0); 2015 case LINUX_PR_SET_DUMPABLE: 2016 /* 2017 * It is only valid for userspace to set one of these two 2018 * flags, and only one at a time. 2019 */ 2020 switch (args->arg2) { 2021 case LINUX_SUID_DUMP_DISABLE: 2022 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 2023 break; 2024 case LINUX_SUID_DUMP_USER: 2025 trace_state = PROC_TRACE_CTL_ENABLE; 2026 break; 2027 default: 2028 return (EINVAL); 2029 } 2030 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 2031 &trace_state)); 2032 case LINUX_PR_GET_KEEPCAPS: 2033 /* 2034 * Indicate that we always clear the effective and 2035 * permitted capability sets when the user id becomes 2036 * non-zero (actually the capability sets are simply 2037 * always zero in the current implementation). 2038 */ 2039 td->td_retval[0] = 0; 2040 break; 2041 case LINUX_PR_SET_KEEPCAPS: 2042 /* 2043 * Ignore requests to keep the effective and permitted 2044 * capability sets when the user id becomes non-zero. 2045 */ 2046 break; 2047 case LINUX_PR_SET_NAME: 2048 /* 2049 * To be on the safe side we need to make sure to not 2050 * overflow the size a Linux program expects. We already 2051 * do this here in the copyin, so that we don't need to 2052 * check on copyout. 2053 */ 2054 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2055 error = copyinstr((void *)(register_t)args->arg2, comm, 2056 max_size, NULL); 2057 2058 /* Linux silently truncates the name if it is too long. */ 2059 if (error == ENAMETOOLONG) { 2060 /* 2061 * XXX: copyinstr() isn't documented to populate the 2062 * array completely, so do a copyin() to be on the 2063 * safe side. This should be changed in case 2064 * copyinstr() is changed to guarantee this. 2065 */ 2066 error = copyin((void *)(register_t)args->arg2, comm, 2067 max_size - 1); 2068 comm[max_size - 1] = '\0'; 2069 } 2070 if (error) 2071 return (error); 2072 2073 PROC_LOCK(p); 2074 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2075 PROC_UNLOCK(p); 2076 break; 2077 case LINUX_PR_GET_NAME: 2078 PROC_LOCK(p); 2079 strlcpy(comm, p->p_comm, sizeof(comm)); 2080 PROC_UNLOCK(p); 2081 error = copyout(comm, (void *)(register_t)args->arg2, 2082 strlen(comm) + 1); 2083 break; 2084 case LINUX_PR_GET_SECCOMP: 2085 case LINUX_PR_SET_SECCOMP: 2086 /* 2087 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2088 */ 2089 error = EINVAL; 2090 break; 2091 case LINUX_PR_CAPBSET_READ: 2092 #if 0 2093 /* 2094 * This makes too much noise with Ubuntu Focal. 2095 */ 2096 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2097 (int)args->arg2); 2098 #endif 2099 error = EINVAL; 2100 break; 2101 case LINUX_PR_SET_NO_NEW_PRIVS: 2102 linux_msg(td, "unsupported prctl PR_SET_NO_NEW_PRIVS"); 2103 error = EINVAL; 2104 break; 2105 case LINUX_PR_SET_PTRACER: 2106 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2107 error = EINVAL; 2108 break; 2109 default: 2110 linux_msg(td, "unsupported prctl option %d", args->option); 2111 error = EINVAL; 2112 break; 2113 } 2114 2115 return (error); 2116 } 2117 2118 int 2119 linux_sched_setparam(struct thread *td, 2120 struct linux_sched_setparam_args *uap) 2121 { 2122 struct sched_param sched_param; 2123 struct thread *tdt; 2124 int error, policy; 2125 2126 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2127 if (error) 2128 return (error); 2129 2130 tdt = linux_tdfind(td, uap->pid, -1); 2131 if (tdt == NULL) 2132 return (ESRCH); 2133 2134 if (linux_map_sched_prio) { 2135 error = kern_sched_getscheduler(td, tdt, &policy); 2136 if (error) 2137 goto out; 2138 2139 switch (policy) { 2140 case SCHED_OTHER: 2141 if (sched_param.sched_priority != 0) { 2142 error = EINVAL; 2143 goto out; 2144 } 2145 sched_param.sched_priority = 2146 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2147 break; 2148 case SCHED_FIFO: 2149 case SCHED_RR: 2150 if (sched_param.sched_priority < 1 || 2151 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2152 error = EINVAL; 2153 goto out; 2154 } 2155 /* 2156 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2157 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2158 */ 2159 sched_param.sched_priority = 2160 (sched_param.sched_priority - 1) * 2161 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2162 (LINUX_MAX_RT_PRIO - 1); 2163 break; 2164 } 2165 } 2166 2167 error = kern_sched_setparam(td, tdt, &sched_param); 2168 out: PROC_UNLOCK(tdt->td_proc); 2169 return (error); 2170 } 2171 2172 int 2173 linux_sched_getparam(struct thread *td, 2174 struct linux_sched_getparam_args *uap) 2175 { 2176 struct sched_param sched_param; 2177 struct thread *tdt; 2178 int error, policy; 2179 2180 tdt = linux_tdfind(td, uap->pid, -1); 2181 if (tdt == NULL) 2182 return (ESRCH); 2183 2184 error = kern_sched_getparam(td, tdt, &sched_param); 2185 if (error) { 2186 PROC_UNLOCK(tdt->td_proc); 2187 return (error); 2188 } 2189 2190 if (linux_map_sched_prio) { 2191 error = kern_sched_getscheduler(td, tdt, &policy); 2192 PROC_UNLOCK(tdt->td_proc); 2193 if (error) 2194 return (error); 2195 2196 switch (policy) { 2197 case SCHED_OTHER: 2198 sched_param.sched_priority = 0; 2199 break; 2200 case SCHED_FIFO: 2201 case SCHED_RR: 2202 /* 2203 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2204 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2205 */ 2206 sched_param.sched_priority = 2207 (sched_param.sched_priority * 2208 (LINUX_MAX_RT_PRIO - 1) + 2209 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2210 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2211 break; 2212 } 2213 } else 2214 PROC_UNLOCK(tdt->td_proc); 2215 2216 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2217 return (error); 2218 } 2219 2220 /* 2221 * Get affinity of a process. 2222 */ 2223 int 2224 linux_sched_getaffinity(struct thread *td, 2225 struct linux_sched_getaffinity_args *args) 2226 { 2227 int error; 2228 struct thread *tdt; 2229 2230 if (args->len < sizeof(cpuset_t)) 2231 return (EINVAL); 2232 2233 tdt = linux_tdfind(td, args->pid, -1); 2234 if (tdt == NULL) 2235 return (ESRCH); 2236 2237 PROC_UNLOCK(tdt->td_proc); 2238 2239 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2240 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2241 if (error == 0) 2242 td->td_retval[0] = sizeof(cpuset_t); 2243 2244 return (error); 2245 } 2246 2247 /* 2248 * Set affinity of a process. 2249 */ 2250 int 2251 linux_sched_setaffinity(struct thread *td, 2252 struct linux_sched_setaffinity_args *args) 2253 { 2254 struct thread *tdt; 2255 2256 if (args->len < sizeof(cpuset_t)) 2257 return (EINVAL); 2258 2259 tdt = linux_tdfind(td, args->pid, -1); 2260 if (tdt == NULL) 2261 return (ESRCH); 2262 2263 PROC_UNLOCK(tdt->td_proc); 2264 2265 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2266 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2267 } 2268 2269 struct linux_rlimit64 { 2270 uint64_t rlim_cur; 2271 uint64_t rlim_max; 2272 }; 2273 2274 int 2275 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2276 { 2277 struct rlimit rlim, nrlim; 2278 struct linux_rlimit64 lrlim; 2279 struct proc *p; 2280 u_int which; 2281 int flags; 2282 int error; 2283 2284 if (args->new == NULL && args->old != NULL) { 2285 if (linux_get_dummy_limit(args->resource, &rlim)) { 2286 lrlim.rlim_cur = rlim.rlim_cur; 2287 lrlim.rlim_max = rlim.rlim_max; 2288 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2289 } 2290 } 2291 2292 if (args->resource >= LINUX_RLIM_NLIMITS) 2293 return (EINVAL); 2294 2295 which = linux_to_bsd_resource[args->resource]; 2296 if (which == -1) 2297 return (EINVAL); 2298 2299 if (args->new != NULL) { 2300 /* 2301 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2302 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2303 * as INFINITY so we do not need a conversion even. 2304 */ 2305 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2306 if (error != 0) 2307 return (error); 2308 } 2309 2310 flags = PGET_HOLD | PGET_NOTWEXIT; 2311 if (args->new != NULL) 2312 flags |= PGET_CANDEBUG; 2313 else 2314 flags |= PGET_CANSEE; 2315 if (args->pid == 0) { 2316 p = td->td_proc; 2317 PHOLD(p); 2318 } else { 2319 error = pget(args->pid, flags, &p); 2320 if (error != 0) 2321 return (error); 2322 } 2323 if (args->old != NULL) { 2324 PROC_LOCK(p); 2325 lim_rlimit_proc(p, which, &rlim); 2326 PROC_UNLOCK(p); 2327 if (rlim.rlim_cur == RLIM_INFINITY) 2328 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2329 else 2330 lrlim.rlim_cur = rlim.rlim_cur; 2331 if (rlim.rlim_max == RLIM_INFINITY) 2332 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2333 else 2334 lrlim.rlim_max = rlim.rlim_max; 2335 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2336 if (error != 0) 2337 goto out; 2338 } 2339 2340 if (args->new != NULL) 2341 error = kern_proc_setrlimit(td, p, which, &nrlim); 2342 2343 out: 2344 PRELE(p); 2345 return (error); 2346 } 2347 2348 int 2349 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2350 { 2351 struct timeval utv, tv0, tv1, *tvp; 2352 struct l_pselect6arg lpse6; 2353 struct l_timespec lts; 2354 struct timespec uts; 2355 l_sigset_t l_ss; 2356 sigset_t *ssp; 2357 sigset_t ss; 2358 int error; 2359 2360 ssp = NULL; 2361 if (args->sig != NULL) { 2362 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2363 if (error != 0) 2364 return (error); 2365 if (lpse6.ss_len != sizeof(l_ss)) 2366 return (EINVAL); 2367 if (lpse6.ss != 0) { 2368 error = copyin(PTRIN(lpse6.ss), &l_ss, 2369 sizeof(l_ss)); 2370 if (error != 0) 2371 return (error); 2372 linux_to_bsd_sigset(&l_ss, &ss); 2373 ssp = &ss; 2374 } 2375 } 2376 2377 /* 2378 * Currently glibc changes nanosecond number to microsecond. 2379 * This mean losing precision but for now it is hardly seen. 2380 */ 2381 if (args->tsp != NULL) { 2382 error = copyin(args->tsp, <s, sizeof(lts)); 2383 if (error != 0) 2384 return (error); 2385 error = linux_to_native_timespec(&uts, <s); 2386 if (error != 0) 2387 return (error); 2388 2389 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2390 if (itimerfix(&utv)) 2391 return (EINVAL); 2392 2393 microtime(&tv0); 2394 tvp = &utv; 2395 } else 2396 tvp = NULL; 2397 2398 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2399 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2400 2401 if (error == 0 && args->tsp != NULL) { 2402 if (td->td_retval[0] != 0) { 2403 /* 2404 * Compute how much time was left of the timeout, 2405 * by subtracting the current time and the time 2406 * before we started the call, and subtracting 2407 * that result from the user-supplied value. 2408 */ 2409 2410 microtime(&tv1); 2411 timevalsub(&tv1, &tv0); 2412 timevalsub(&utv, &tv1); 2413 if (utv.tv_sec < 0) 2414 timevalclear(&utv); 2415 } else 2416 timevalclear(&utv); 2417 2418 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2419 2420 error = native_to_linux_timespec(<s, &uts); 2421 if (error == 0) 2422 error = copyout(<s, args->tsp, sizeof(lts)); 2423 } 2424 2425 return (error); 2426 } 2427 2428 int 2429 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2430 { 2431 struct timespec ts0, ts1; 2432 struct l_timespec lts; 2433 struct timespec uts, *tsp; 2434 l_sigset_t l_ss; 2435 sigset_t *ssp; 2436 sigset_t ss; 2437 int error; 2438 2439 if (args->sset != NULL) { 2440 if (args->ssize != sizeof(l_ss)) 2441 return (EINVAL); 2442 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2443 if (error) 2444 return (error); 2445 linux_to_bsd_sigset(&l_ss, &ss); 2446 ssp = &ss; 2447 } else 2448 ssp = NULL; 2449 if (args->tsp != NULL) { 2450 error = copyin(args->tsp, <s, sizeof(lts)); 2451 if (error) 2452 return (error); 2453 error = linux_to_native_timespec(&uts, <s); 2454 if (error != 0) 2455 return (error); 2456 2457 nanotime(&ts0); 2458 tsp = &uts; 2459 } else 2460 tsp = NULL; 2461 2462 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2463 2464 if (error == 0 && args->tsp != NULL) { 2465 if (td->td_retval[0]) { 2466 nanotime(&ts1); 2467 timespecsub(&ts1, &ts0, &ts1); 2468 timespecsub(&uts, &ts1, &uts); 2469 if (uts.tv_sec < 0) 2470 timespecclear(&uts); 2471 } else 2472 timespecclear(&uts); 2473 2474 error = native_to_linux_timespec(<s, &uts); 2475 if (error == 0) 2476 error = copyout(<s, args->tsp, sizeof(lts)); 2477 } 2478 2479 return (error); 2480 } 2481 2482 int 2483 linux_sched_rr_get_interval(struct thread *td, 2484 struct linux_sched_rr_get_interval_args *uap) 2485 { 2486 struct timespec ts; 2487 struct l_timespec lts; 2488 struct thread *tdt; 2489 int error; 2490 2491 /* 2492 * According to man in case the invalid pid specified 2493 * EINVAL should be returned. 2494 */ 2495 if (uap->pid < 0) 2496 return (EINVAL); 2497 2498 tdt = linux_tdfind(td, uap->pid, -1); 2499 if (tdt == NULL) 2500 return (ESRCH); 2501 2502 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2503 PROC_UNLOCK(tdt->td_proc); 2504 if (error != 0) 2505 return (error); 2506 error = native_to_linux_timespec(<s, &ts); 2507 if (error != 0) 2508 return (error); 2509 return (copyout(<s, uap->interval, sizeof(lts))); 2510 } 2511 2512 /* 2513 * In case when the Linux thread is the initial thread in 2514 * the thread group thread id is equal to the process id. 2515 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2516 */ 2517 struct thread * 2518 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2519 { 2520 struct linux_emuldata *em; 2521 struct thread *tdt; 2522 struct proc *p; 2523 2524 tdt = NULL; 2525 if (tid == 0 || tid == td->td_tid) { 2526 tdt = td; 2527 PROC_LOCK(tdt->td_proc); 2528 } else if (tid > PID_MAX) 2529 tdt = tdfind(tid, pid); 2530 else { 2531 /* 2532 * Initial thread where the tid equal to the pid. 2533 */ 2534 p = pfind(tid); 2535 if (p != NULL) { 2536 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2537 /* 2538 * p is not a Linuxulator process. 2539 */ 2540 PROC_UNLOCK(p); 2541 return (NULL); 2542 } 2543 FOREACH_THREAD_IN_PROC(p, tdt) { 2544 em = em_find(tdt); 2545 if (tid == em->em_tid) 2546 return (tdt); 2547 } 2548 PROC_UNLOCK(p); 2549 } 2550 return (NULL); 2551 } 2552 2553 return (tdt); 2554 } 2555 2556 void 2557 linux_to_bsd_waitopts(int options, int *bsdopts) 2558 { 2559 2560 if (options & LINUX_WNOHANG) 2561 *bsdopts |= WNOHANG; 2562 if (options & LINUX_WUNTRACED) 2563 *bsdopts |= WUNTRACED; 2564 if (options & LINUX_WEXITED) 2565 *bsdopts |= WEXITED; 2566 if (options & LINUX_WCONTINUED) 2567 *bsdopts |= WCONTINUED; 2568 if (options & LINUX_WNOWAIT) 2569 *bsdopts |= WNOWAIT; 2570 2571 if (options & __WCLONE) 2572 *bsdopts |= WLINUXCLONE; 2573 } 2574 2575 int 2576 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2577 { 2578 struct uio uio; 2579 struct iovec iov; 2580 int error; 2581 2582 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2583 return (EINVAL); 2584 if (args->count > INT_MAX) 2585 args->count = INT_MAX; 2586 2587 iov.iov_base = args->buf; 2588 iov.iov_len = args->count; 2589 2590 uio.uio_iov = &iov; 2591 uio.uio_iovcnt = 1; 2592 uio.uio_resid = iov.iov_len; 2593 uio.uio_segflg = UIO_USERSPACE; 2594 uio.uio_rw = UIO_READ; 2595 uio.uio_td = td; 2596 2597 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2598 if (error == 0) 2599 td->td_retval[0] = args->count - uio.uio_resid; 2600 return (error); 2601 } 2602 2603 int 2604 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2605 { 2606 2607 /* Needs to be page-aligned */ 2608 if (args->start & PAGE_MASK) 2609 return (EINVAL); 2610 return (kern_mincore(td, args->start, args->len, args->vec)); 2611 } 2612 2613 #define SYSLOG_TAG "<6>" 2614 2615 int 2616 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2617 { 2618 char buf[128], *src, *dst; 2619 u_int seq; 2620 int buflen, error; 2621 2622 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2623 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2624 return (EINVAL); 2625 } 2626 2627 if (args->len < 6) { 2628 td->td_retval[0] = 0; 2629 return (0); 2630 } 2631 2632 error = priv_check(td, PRIV_MSGBUF); 2633 if (error) 2634 return (error); 2635 2636 mtx_lock(&msgbuf_lock); 2637 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2638 mtx_unlock(&msgbuf_lock); 2639 2640 dst = args->buf; 2641 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2642 /* The -1 is to skip the trailing '\0'. */ 2643 dst += sizeof(SYSLOG_TAG) - 1; 2644 2645 while (error == 0) { 2646 mtx_lock(&msgbuf_lock); 2647 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2648 mtx_unlock(&msgbuf_lock); 2649 2650 if (buflen == 0) 2651 break; 2652 2653 for (src = buf; src < buf + buflen && error == 0; src++) { 2654 if (*src == '\0') 2655 continue; 2656 2657 if (dst >= args->buf + args->len) 2658 goto out; 2659 2660 error = copyout(src, dst, 1); 2661 dst++; 2662 2663 if (*src == '\n' && *(src + 1) != '<' && 2664 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2665 error = copyout(&SYSLOG_TAG, 2666 dst, sizeof(SYSLOG_TAG)); 2667 dst += sizeof(SYSLOG_TAG) - 1; 2668 } 2669 } 2670 } 2671 out: 2672 td->td_retval[0] = dst - args->buf; 2673 return (error); 2674 } 2675 2676 int 2677 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2678 { 2679 int cpu, error, node; 2680 2681 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2682 error = 0; 2683 node = cpuid_to_pcpu[cpu]->pc_domain; 2684 2685 if (args->cpu != NULL) 2686 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2687 if (args->node != NULL) 2688 error = copyout(&node, args->node, sizeof(l_int)); 2689 return (error); 2690 } 2691