1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 2002 Doug Rabson 5 * Copyright (c) 1994-1995 Søren Schmidt 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer 13 * in this position and unchanged. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 3. The name of the author may not be used to endorse or promote products 18 * derived from this software without specific prior written permission 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 21 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 22 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 23 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 24 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 25 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 29 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32 #include <sys/cdefs.h> 33 __FBSDID("$FreeBSD$"); 34 35 #include "opt_compat.h" 36 37 #include <sys/param.h> 38 #include <sys/blist.h> 39 #include <sys/fcntl.h> 40 #if defined(__i386__) 41 #include <sys/imgact_aout.h> 42 #endif 43 #include <sys/jail.h> 44 #include <sys/kernel.h> 45 #include <sys/limits.h> 46 #include <sys/lock.h> 47 #include <sys/malloc.h> 48 #include <sys/mman.h> 49 #include <sys/mount.h> 50 #include <sys/msgbuf.h> 51 #include <sys/mutex.h> 52 #include <sys/namei.h> 53 #include <sys/priv.h> 54 #include <sys/proc.h> 55 #include <sys/procctl.h> 56 #include <sys/reboot.h> 57 #include <sys/racct.h> 58 #include <sys/random.h> 59 #include <sys/resourcevar.h> 60 #include <sys/sched.h> 61 #include <sys/sdt.h> 62 #include <sys/signalvar.h> 63 #include <sys/stat.h> 64 #include <sys/syscallsubr.h> 65 #include <sys/sysctl.h> 66 #include <sys/sysproto.h> 67 #include <sys/systm.h> 68 #include <sys/time.h> 69 #include <sys/vmmeter.h> 70 #include <sys/vnode.h> 71 #include <sys/wait.h> 72 #include <sys/cpuset.h> 73 #include <sys/uio.h> 74 75 #include <security/mac/mac_framework.h> 76 77 #include <vm/vm.h> 78 #include <vm/pmap.h> 79 #include <vm/vm_kern.h> 80 #include <vm/vm_map.h> 81 #include <vm/vm_extern.h> 82 #include <vm/swap_pager.h> 83 84 #ifdef COMPAT_LINUX32 85 #include <machine/../linux32/linux.h> 86 #include <machine/../linux32/linux32_proto.h> 87 #else 88 #include <machine/../linux/linux.h> 89 #include <machine/../linux/linux_proto.h> 90 #endif 91 92 #include <compat/linux/linux_dtrace.h> 93 #include <compat/linux/linux_file.h> 94 #include <compat/linux/linux_mib.h> 95 #include <compat/linux/linux_signal.h> 96 #include <compat/linux/linux_timer.h> 97 #include <compat/linux/linux_util.h> 98 #include <compat/linux/linux_sysproto.h> 99 #include <compat/linux/linux_emul.h> 100 #include <compat/linux/linux_misc.h> 101 102 int stclohz; /* Statistics clock frequency */ 103 104 static unsigned int linux_to_bsd_resource[LINUX_RLIM_NLIMITS] = { 105 RLIMIT_CPU, RLIMIT_FSIZE, RLIMIT_DATA, RLIMIT_STACK, 106 RLIMIT_CORE, RLIMIT_RSS, RLIMIT_NPROC, RLIMIT_NOFILE, 107 RLIMIT_MEMLOCK, RLIMIT_AS 108 }; 109 110 struct l_sysinfo { 111 l_long uptime; /* Seconds since boot */ 112 l_ulong loads[3]; /* 1, 5, and 15 minute load averages */ 113 #define LINUX_SYSINFO_LOADS_SCALE 65536 114 l_ulong totalram; /* Total usable main memory size */ 115 l_ulong freeram; /* Available memory size */ 116 l_ulong sharedram; /* Amount of shared memory */ 117 l_ulong bufferram; /* Memory used by buffers */ 118 l_ulong totalswap; /* Total swap space size */ 119 l_ulong freeswap; /* swap space still available */ 120 l_ushort procs; /* Number of current processes */ 121 l_ushort pads; 122 l_ulong totalhigh; 123 l_ulong freehigh; 124 l_uint mem_unit; 125 char _f[20-2*sizeof(l_long)-sizeof(l_int)]; /* padding */ 126 }; 127 128 struct l_pselect6arg { 129 l_uintptr_t ss; 130 l_size_t ss_len; 131 }; 132 133 static int linux_utimensat_nsec_valid(l_long); 134 135 int 136 linux_sysinfo(struct thread *td, struct linux_sysinfo_args *args) 137 { 138 struct l_sysinfo sysinfo; 139 int i, j; 140 struct timespec ts; 141 142 bzero(&sysinfo, sizeof(sysinfo)); 143 getnanouptime(&ts); 144 if (ts.tv_nsec != 0) 145 ts.tv_sec++; 146 sysinfo.uptime = ts.tv_sec; 147 148 /* Use the information from the mib to get our load averages */ 149 for (i = 0; i < 3; i++) 150 sysinfo.loads[i] = averunnable.ldavg[i] * 151 LINUX_SYSINFO_LOADS_SCALE / averunnable.fscale; 152 153 sysinfo.totalram = physmem * PAGE_SIZE; 154 sysinfo.freeram = (u_long)vm_free_count() * PAGE_SIZE; 155 156 /* 157 * sharedram counts pages allocated to named, swap-backed objects such 158 * as shared memory segments and tmpfs files. There is no cheap way to 159 * compute this, so just leave the field unpopulated. Linux itself only 160 * started setting this field in the 3.x timeframe. 161 */ 162 sysinfo.sharedram = 0; 163 sysinfo.bufferram = 0; 164 165 swap_pager_status(&i, &j); 166 sysinfo.totalswap = i * PAGE_SIZE; 167 sysinfo.freeswap = (i - j) * PAGE_SIZE; 168 169 sysinfo.procs = nprocs; 170 171 /* 172 * Platforms supported by the emulation layer do not have a notion of 173 * high memory. 174 */ 175 sysinfo.totalhigh = 0; 176 sysinfo.freehigh = 0; 177 178 sysinfo.mem_unit = 1; 179 180 return (copyout(&sysinfo, args->info, sizeof(sysinfo))); 181 } 182 183 #ifdef LINUX_LEGACY_SYSCALLS 184 int 185 linux_alarm(struct thread *td, struct linux_alarm_args *args) 186 { 187 struct itimerval it, old_it; 188 u_int secs; 189 int error; 190 191 secs = args->secs; 192 /* 193 * Linux alarm() is always successful. Limit secs to INT32_MAX / 2 194 * to match kern_setitimer()'s limit to avoid error from it. 195 * 196 * XXX. Linux limit secs to INT_MAX on 32 and does not limit on 64-bit 197 * platforms. 198 */ 199 if (secs > INT32_MAX / 2) 200 secs = INT32_MAX / 2; 201 202 it.it_value.tv_sec = secs; 203 it.it_value.tv_usec = 0; 204 timevalclear(&it.it_interval); 205 error = kern_setitimer(td, ITIMER_REAL, &it, &old_it); 206 KASSERT(error == 0, ("kern_setitimer returns %d", error)); 207 208 if ((old_it.it_value.tv_sec == 0 && old_it.it_value.tv_usec > 0) || 209 old_it.it_value.tv_usec >= 500000) 210 old_it.it_value.tv_sec++; 211 td->td_retval[0] = old_it.it_value.tv_sec; 212 return (0); 213 } 214 #endif 215 216 int 217 linux_brk(struct thread *td, struct linux_brk_args *args) 218 { 219 struct vmspace *vm = td->td_proc->p_vmspace; 220 uintptr_t new, old; 221 222 old = (uintptr_t)vm->vm_daddr + ctob(vm->vm_dsize); 223 new = (uintptr_t)args->dsend; 224 if ((caddr_t)new > vm->vm_daddr && !kern_break(td, &new)) 225 td->td_retval[0] = (register_t)new; 226 else 227 td->td_retval[0] = (register_t)old; 228 229 return (0); 230 } 231 232 #if defined(__i386__) 233 /* XXX: what about amd64/linux32? */ 234 235 int 236 linux_uselib(struct thread *td, struct linux_uselib_args *args) 237 { 238 struct nameidata ni; 239 struct vnode *vp; 240 struct exec *a_out; 241 vm_map_t map; 242 vm_map_entry_t entry; 243 struct vattr attr; 244 vm_offset_t vmaddr; 245 unsigned long file_offset; 246 unsigned long bss_size; 247 char *library; 248 ssize_t aresid; 249 int error; 250 bool locked, opened, textset; 251 252 a_out = NULL; 253 vp = NULL; 254 locked = false; 255 textset = false; 256 opened = false; 257 258 if (!LUSECONVPATH(td)) { 259 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 260 UIO_USERSPACE, args->library, td); 261 error = namei(&ni); 262 } else { 263 LCONVPATHEXIST(td, args->library, &library); 264 NDINIT(&ni, LOOKUP, ISOPEN | FOLLOW | LOCKLEAF | AUDITVNODE1, 265 UIO_SYSSPACE, library, td); 266 error = namei(&ni); 267 LFREEPATH(library); 268 } 269 if (error) 270 goto cleanup; 271 272 vp = ni.ni_vp; 273 NDFREE(&ni, NDF_ONLY_PNBUF); 274 275 /* 276 * From here on down, we have a locked vnode that must be unlocked. 277 * XXX: The code below largely duplicates exec_check_permissions(). 278 */ 279 locked = true; 280 281 /* Executable? */ 282 error = VOP_GETATTR(vp, &attr, td->td_ucred); 283 if (error) 284 goto cleanup; 285 286 if ((vp->v_mount->mnt_flag & MNT_NOEXEC) || 287 ((attr.va_mode & 0111) == 0) || (attr.va_type != VREG)) { 288 /* EACCESS is what exec(2) returns. */ 289 error = ENOEXEC; 290 goto cleanup; 291 } 292 293 /* Sensible size? */ 294 if (attr.va_size == 0) { 295 error = ENOEXEC; 296 goto cleanup; 297 } 298 299 /* Can we access it? */ 300 error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); 301 if (error) 302 goto cleanup; 303 304 /* 305 * XXX: This should use vn_open() so that it is properly authorized, 306 * and to reduce code redundancy all over the place here. 307 * XXX: Not really, it duplicates far more of exec_check_permissions() 308 * than vn_open(). 309 */ 310 #ifdef MAC 311 error = mac_vnode_check_open(td->td_ucred, vp, VREAD); 312 if (error) 313 goto cleanup; 314 #endif 315 error = VOP_OPEN(vp, FREAD, td->td_ucred, td, NULL); 316 if (error) 317 goto cleanup; 318 opened = true; 319 320 /* Pull in executable header into exec_map */ 321 error = vm_mmap(exec_map, (vm_offset_t *)&a_out, PAGE_SIZE, 322 VM_PROT_READ, VM_PROT_READ, 0, OBJT_VNODE, vp, 0); 323 if (error) 324 goto cleanup; 325 326 /* Is it a Linux binary ? */ 327 if (((a_out->a_magic >> 16) & 0xff) != 0x64) { 328 error = ENOEXEC; 329 goto cleanup; 330 } 331 332 /* 333 * While we are here, we should REALLY do some more checks 334 */ 335 336 /* Set file/virtual offset based on a.out variant. */ 337 switch ((int)(a_out->a_magic & 0xffff)) { 338 case 0413: /* ZMAGIC */ 339 file_offset = 1024; 340 break; 341 case 0314: /* QMAGIC */ 342 file_offset = 0; 343 break; 344 default: 345 error = ENOEXEC; 346 goto cleanup; 347 } 348 349 bss_size = round_page(a_out->a_bss); 350 351 /* Check various fields in header for validity/bounds. */ 352 if (a_out->a_text & PAGE_MASK || a_out->a_data & PAGE_MASK) { 353 error = ENOEXEC; 354 goto cleanup; 355 } 356 357 /* text + data can't exceed file size */ 358 if (a_out->a_data + a_out->a_text > attr.va_size) { 359 error = EFAULT; 360 goto cleanup; 361 } 362 363 /* 364 * text/data/bss must not exceed limits 365 * XXX - this is not complete. it should check current usage PLUS 366 * the resources needed by this library. 367 */ 368 PROC_LOCK(td->td_proc); 369 if (a_out->a_text > maxtsiz || 370 a_out->a_data + bss_size > lim_cur_proc(td->td_proc, RLIMIT_DATA) || 371 racct_set(td->td_proc, RACCT_DATA, a_out->a_data + 372 bss_size) != 0) { 373 PROC_UNLOCK(td->td_proc); 374 error = ENOMEM; 375 goto cleanup; 376 } 377 PROC_UNLOCK(td->td_proc); 378 379 /* 380 * Prevent more writers. 381 */ 382 error = VOP_SET_TEXT(vp); 383 if (error != 0) 384 goto cleanup; 385 textset = true; 386 387 /* 388 * Lock no longer needed 389 */ 390 locked = false; 391 VOP_UNLOCK(vp); 392 393 /* 394 * Check if file_offset page aligned. Currently we cannot handle 395 * misalinged file offsets, and so we read in the entire image 396 * (what a waste). 397 */ 398 if (file_offset & PAGE_MASK) { 399 /* Map text+data read/write/execute */ 400 401 /* a_entry is the load address and is page aligned */ 402 vmaddr = trunc_page(a_out->a_entry); 403 404 /* get anon user mapping, read+write+execute */ 405 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 406 &vmaddr, a_out->a_text + a_out->a_data, 0, VMFS_NO_SPACE, 407 VM_PROT_ALL, VM_PROT_ALL, 0); 408 if (error) 409 goto cleanup; 410 411 error = vn_rdwr(UIO_READ, vp, (void *)vmaddr, file_offset, 412 a_out->a_text + a_out->a_data, UIO_USERSPACE, 0, 413 td->td_ucred, NOCRED, &aresid, td); 414 if (error != 0) 415 goto cleanup; 416 if (aresid != 0) { 417 error = ENOEXEC; 418 goto cleanup; 419 } 420 } else { 421 /* 422 * for QMAGIC, a_entry is 20 bytes beyond the load address 423 * to skip the executable header 424 */ 425 vmaddr = trunc_page(a_out->a_entry); 426 427 /* 428 * Map it all into the process's space as a single 429 * copy-on-write "data" segment. 430 */ 431 map = &td->td_proc->p_vmspace->vm_map; 432 error = vm_mmap(map, &vmaddr, 433 a_out->a_text + a_out->a_data, VM_PROT_ALL, VM_PROT_ALL, 434 MAP_PRIVATE | MAP_FIXED, OBJT_VNODE, vp, file_offset); 435 if (error) 436 goto cleanup; 437 vm_map_lock(map); 438 if (!vm_map_lookup_entry(map, vmaddr, &entry)) { 439 vm_map_unlock(map); 440 error = EDOOFUS; 441 goto cleanup; 442 } 443 entry->eflags |= MAP_ENTRY_VN_EXEC; 444 vm_map_unlock(map); 445 textset = false; 446 } 447 448 if (bss_size != 0) { 449 /* Calculate BSS start address */ 450 vmaddr = trunc_page(a_out->a_entry) + a_out->a_text + 451 a_out->a_data; 452 453 /* allocate some 'anon' space */ 454 error = vm_map_find(&td->td_proc->p_vmspace->vm_map, NULL, 0, 455 &vmaddr, bss_size, 0, VMFS_NO_SPACE, VM_PROT_ALL, 456 VM_PROT_ALL, 0); 457 if (error) 458 goto cleanup; 459 } 460 461 cleanup: 462 if (opened) { 463 if (locked) 464 VOP_UNLOCK(vp); 465 locked = false; 466 VOP_CLOSE(vp, FREAD, td->td_ucred, td); 467 } 468 if (textset) { 469 if (!locked) { 470 locked = true; 471 VOP_LOCK(vp, LK_SHARED | LK_RETRY); 472 } 473 VOP_UNSET_TEXT_CHECKED(vp); 474 } 475 if (locked) 476 VOP_UNLOCK(vp); 477 478 /* Release the temporary mapping. */ 479 if (a_out) 480 kmap_free_wakeup(exec_map, (vm_offset_t)a_out, PAGE_SIZE); 481 482 return (error); 483 } 484 485 #endif /* __i386__ */ 486 487 #ifdef LINUX_LEGACY_SYSCALLS 488 int 489 linux_select(struct thread *td, struct linux_select_args *args) 490 { 491 l_timeval ltv; 492 struct timeval tv0, tv1, utv, *tvp; 493 int error; 494 495 /* 496 * Store current time for computation of the amount of 497 * time left. 498 */ 499 if (args->timeout) { 500 if ((error = copyin(args->timeout, <v, sizeof(ltv)))) 501 goto select_out; 502 utv.tv_sec = ltv.tv_sec; 503 utv.tv_usec = ltv.tv_usec; 504 505 if (itimerfix(&utv)) { 506 /* 507 * The timeval was invalid. Convert it to something 508 * valid that will act as it does under Linux. 509 */ 510 utv.tv_sec += utv.tv_usec / 1000000; 511 utv.tv_usec %= 1000000; 512 if (utv.tv_usec < 0) { 513 utv.tv_sec -= 1; 514 utv.tv_usec += 1000000; 515 } 516 if (utv.tv_sec < 0) 517 timevalclear(&utv); 518 } 519 microtime(&tv0); 520 tvp = &utv; 521 } else 522 tvp = NULL; 523 524 error = kern_select(td, args->nfds, args->readfds, args->writefds, 525 args->exceptfds, tvp, LINUX_NFDBITS); 526 if (error) 527 goto select_out; 528 529 if (args->timeout) { 530 if (td->td_retval[0]) { 531 /* 532 * Compute how much time was left of the timeout, 533 * by subtracting the current time and the time 534 * before we started the call, and subtracting 535 * that result from the user-supplied value. 536 */ 537 microtime(&tv1); 538 timevalsub(&tv1, &tv0); 539 timevalsub(&utv, &tv1); 540 if (utv.tv_sec < 0) 541 timevalclear(&utv); 542 } else 543 timevalclear(&utv); 544 ltv.tv_sec = utv.tv_sec; 545 ltv.tv_usec = utv.tv_usec; 546 if ((error = copyout(<v, args->timeout, sizeof(ltv)))) 547 goto select_out; 548 } 549 550 select_out: 551 return (error); 552 } 553 #endif 554 555 int 556 linux_mremap(struct thread *td, struct linux_mremap_args *args) 557 { 558 uintptr_t addr; 559 size_t len; 560 int error = 0; 561 562 if (args->flags & ~(LINUX_MREMAP_FIXED | LINUX_MREMAP_MAYMOVE)) { 563 td->td_retval[0] = 0; 564 return (EINVAL); 565 } 566 567 /* 568 * Check for the page alignment. 569 * Linux defines PAGE_MASK to be FreeBSD ~PAGE_MASK. 570 */ 571 if (args->addr & PAGE_MASK) { 572 td->td_retval[0] = 0; 573 return (EINVAL); 574 } 575 576 args->new_len = round_page(args->new_len); 577 args->old_len = round_page(args->old_len); 578 579 if (args->new_len > args->old_len) { 580 td->td_retval[0] = 0; 581 return (ENOMEM); 582 } 583 584 if (args->new_len < args->old_len) { 585 addr = args->addr + args->new_len; 586 len = args->old_len - args->new_len; 587 error = kern_munmap(td, addr, len); 588 } 589 590 td->td_retval[0] = error ? 0 : (uintptr_t)args->addr; 591 return (error); 592 } 593 594 #define LINUX_MS_ASYNC 0x0001 595 #define LINUX_MS_INVALIDATE 0x0002 596 #define LINUX_MS_SYNC 0x0004 597 598 int 599 linux_msync(struct thread *td, struct linux_msync_args *args) 600 { 601 602 return (kern_msync(td, args->addr, args->len, 603 args->fl & ~LINUX_MS_SYNC)); 604 } 605 606 #ifdef LINUX_LEGACY_SYSCALLS 607 int 608 linux_time(struct thread *td, struct linux_time_args *args) 609 { 610 struct timeval tv; 611 l_time_t tm; 612 int error; 613 614 microtime(&tv); 615 tm = tv.tv_sec; 616 if (args->tm && (error = copyout(&tm, args->tm, sizeof(tm)))) 617 return (error); 618 td->td_retval[0] = tm; 619 return (0); 620 } 621 #endif 622 623 struct l_times_argv { 624 l_clock_t tms_utime; 625 l_clock_t tms_stime; 626 l_clock_t tms_cutime; 627 l_clock_t tms_cstime; 628 }; 629 630 /* 631 * Glibc versions prior to 2.2.1 always use hard-coded CLK_TCK value. 632 * Since 2.2.1 Glibc uses value exported from kernel via AT_CLKTCK 633 * auxiliary vector entry. 634 */ 635 #define CLK_TCK 100 636 637 #define CONVOTCK(r) (r.tv_sec * CLK_TCK + r.tv_usec / (1000000 / CLK_TCK)) 638 #define CONVNTCK(r) (r.tv_sec * stclohz + r.tv_usec / (1000000 / stclohz)) 639 640 #define CONVTCK(r) (linux_kernver(td) >= LINUX_KERNVER_2004000 ? \ 641 CONVNTCK(r) : CONVOTCK(r)) 642 643 int 644 linux_times(struct thread *td, struct linux_times_args *args) 645 { 646 struct timeval tv, utime, stime, cutime, cstime; 647 struct l_times_argv tms; 648 struct proc *p; 649 int error; 650 651 if (args->buf != NULL) { 652 p = td->td_proc; 653 PROC_LOCK(p); 654 PROC_STATLOCK(p); 655 calcru(p, &utime, &stime); 656 PROC_STATUNLOCK(p); 657 calccru(p, &cutime, &cstime); 658 PROC_UNLOCK(p); 659 660 tms.tms_utime = CONVTCK(utime); 661 tms.tms_stime = CONVTCK(stime); 662 663 tms.tms_cutime = CONVTCK(cutime); 664 tms.tms_cstime = CONVTCK(cstime); 665 666 if ((error = copyout(&tms, args->buf, sizeof(tms)))) 667 return (error); 668 } 669 670 microuptime(&tv); 671 td->td_retval[0] = (int)CONVTCK(tv); 672 return (0); 673 } 674 675 int 676 linux_newuname(struct thread *td, struct linux_newuname_args *args) 677 { 678 struct l_new_utsname utsname; 679 char osname[LINUX_MAX_UTSNAME]; 680 char osrelease[LINUX_MAX_UTSNAME]; 681 char *p; 682 683 linux_get_osname(td, osname); 684 linux_get_osrelease(td, osrelease); 685 686 bzero(&utsname, sizeof(utsname)); 687 strlcpy(utsname.sysname, osname, LINUX_MAX_UTSNAME); 688 getcredhostname(td->td_ucred, utsname.nodename, LINUX_MAX_UTSNAME); 689 getcreddomainname(td->td_ucred, utsname.domainname, LINUX_MAX_UTSNAME); 690 strlcpy(utsname.release, osrelease, LINUX_MAX_UTSNAME); 691 strlcpy(utsname.version, version, LINUX_MAX_UTSNAME); 692 for (p = utsname.version; *p != '\0'; ++p) 693 if (*p == '\n') { 694 *p = '\0'; 695 break; 696 } 697 #if defined(__amd64__) 698 /* 699 * On amd64, Linux uname(2) needs to return "x86_64" 700 * for both 64-bit and 32-bit applications. On 32-bit, 701 * the string returned by getauxval(AT_PLATFORM) needs 702 * to remain "i686", though. 703 */ 704 strlcpy(utsname.machine, "x86_64", LINUX_MAX_UTSNAME); 705 #else 706 strlcpy(utsname.machine, linux_kplatform, LINUX_MAX_UTSNAME); 707 #endif 708 709 return (copyout(&utsname, args->buf, sizeof(utsname))); 710 } 711 712 struct l_utimbuf { 713 l_time_t l_actime; 714 l_time_t l_modtime; 715 }; 716 717 #ifdef LINUX_LEGACY_SYSCALLS 718 int 719 linux_utime(struct thread *td, struct linux_utime_args *args) 720 { 721 struct timeval tv[2], *tvp; 722 struct l_utimbuf lut; 723 char *fname; 724 int error; 725 bool convpath; 726 727 convpath = LUSECONVPATH(td); 728 if (convpath) 729 LCONVPATHEXIST(td, args->fname, &fname); 730 731 if (args->times) { 732 if ((error = copyin(args->times, &lut, sizeof lut))) { 733 if (convpath) 734 LFREEPATH(fname); 735 return (error); 736 } 737 tv[0].tv_sec = lut.l_actime; 738 tv[0].tv_usec = 0; 739 tv[1].tv_sec = lut.l_modtime; 740 tv[1].tv_usec = 0; 741 tvp = tv; 742 } else 743 tvp = NULL; 744 745 if (!convpath) { 746 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 747 tvp, UIO_SYSSPACE); 748 } else { 749 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, tvp, 750 UIO_SYSSPACE); 751 LFREEPATH(fname); 752 } 753 return (error); 754 } 755 #endif 756 757 #ifdef LINUX_LEGACY_SYSCALLS 758 int 759 linux_utimes(struct thread *td, struct linux_utimes_args *args) 760 { 761 l_timeval ltv[2]; 762 struct timeval tv[2], *tvp = NULL; 763 char *fname; 764 int error; 765 bool convpath; 766 767 convpath = LUSECONVPATH(td); 768 if (convpath) 769 LCONVPATHEXIST(td, args->fname, &fname); 770 771 if (args->tptr != NULL) { 772 if ((error = copyin(args->tptr, ltv, sizeof ltv))) { 773 LFREEPATH(fname); 774 return (error); 775 } 776 tv[0].tv_sec = ltv[0].tv_sec; 777 tv[0].tv_usec = ltv[0].tv_usec; 778 tv[1].tv_sec = ltv[1].tv_sec; 779 tv[1].tv_usec = ltv[1].tv_usec; 780 tvp = tv; 781 } 782 783 if (!convpath) { 784 error = kern_utimesat(td, AT_FDCWD, args->fname, UIO_USERSPACE, 785 tvp, UIO_SYSSPACE); 786 } else { 787 error = kern_utimesat(td, AT_FDCWD, fname, UIO_SYSSPACE, 788 tvp, UIO_SYSSPACE); 789 LFREEPATH(fname); 790 } 791 return (error); 792 } 793 #endif 794 795 static int 796 linux_utimensat_nsec_valid(l_long nsec) 797 { 798 799 if (nsec == LINUX_UTIME_OMIT || nsec == LINUX_UTIME_NOW) 800 return (0); 801 if (nsec >= 0 && nsec <= 999999999) 802 return (0); 803 return (1); 804 } 805 806 int 807 linux_utimensat(struct thread *td, struct linux_utimensat_args *args) 808 { 809 struct l_timespec l_times[2]; 810 struct timespec times[2], *timesp = NULL; 811 char *path = NULL; 812 int error, dfd, flags = 0; 813 814 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 815 816 if (args->flags & ~LINUX_AT_SYMLINK_NOFOLLOW) 817 return (EINVAL); 818 819 if (args->times != NULL) { 820 error = copyin(args->times, l_times, sizeof(l_times)); 821 if (error != 0) 822 return (error); 823 824 if (linux_utimensat_nsec_valid(l_times[0].tv_nsec) != 0 || 825 linux_utimensat_nsec_valid(l_times[1].tv_nsec) != 0) 826 return (EINVAL); 827 828 times[0].tv_sec = l_times[0].tv_sec; 829 switch (l_times[0].tv_nsec) 830 { 831 case LINUX_UTIME_OMIT: 832 times[0].tv_nsec = UTIME_OMIT; 833 break; 834 case LINUX_UTIME_NOW: 835 times[0].tv_nsec = UTIME_NOW; 836 break; 837 default: 838 times[0].tv_nsec = l_times[0].tv_nsec; 839 } 840 841 times[1].tv_sec = l_times[1].tv_sec; 842 switch (l_times[1].tv_nsec) 843 { 844 case LINUX_UTIME_OMIT: 845 times[1].tv_nsec = UTIME_OMIT; 846 break; 847 case LINUX_UTIME_NOW: 848 times[1].tv_nsec = UTIME_NOW; 849 break; 850 default: 851 times[1].tv_nsec = l_times[1].tv_nsec; 852 break; 853 } 854 timesp = times; 855 856 /* This breaks POSIX, but is what the Linux kernel does 857 * _on purpose_ (documented in the man page for utimensat(2)), 858 * so we must follow that behaviour. */ 859 if (times[0].tv_nsec == UTIME_OMIT && 860 times[1].tv_nsec == UTIME_OMIT) 861 return (0); 862 } 863 864 if (!LUSECONVPATH(td)) { 865 if (args->pathname != NULL) { 866 return (kern_utimensat(td, dfd, args->pathname, 867 UIO_USERSPACE, timesp, UIO_SYSSPACE, flags)); 868 } 869 } 870 871 if (args->pathname != NULL) 872 LCONVPATHEXIST_AT(td, args->pathname, &path, dfd); 873 else if (args->flags != 0) 874 return (EINVAL); 875 876 if (args->flags & LINUX_AT_SYMLINK_NOFOLLOW) 877 flags |= AT_SYMLINK_NOFOLLOW; 878 879 if (path == NULL) 880 error = kern_futimens(td, dfd, timesp, UIO_SYSSPACE); 881 else { 882 error = kern_utimensat(td, dfd, path, UIO_SYSSPACE, timesp, 883 UIO_SYSSPACE, flags); 884 LFREEPATH(path); 885 } 886 887 return (error); 888 } 889 890 #ifdef LINUX_LEGACY_SYSCALLS 891 int 892 linux_futimesat(struct thread *td, struct linux_futimesat_args *args) 893 { 894 l_timeval ltv[2]; 895 struct timeval tv[2], *tvp = NULL; 896 char *fname; 897 int error, dfd; 898 bool convpath; 899 900 convpath = LUSECONVPATH(td); 901 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 902 if (convpath) 903 LCONVPATHEXIST_AT(td, args->filename, &fname, dfd); 904 905 if (args->utimes != NULL) { 906 if ((error = copyin(args->utimes, ltv, sizeof ltv))) { 907 if (convpath) 908 LFREEPATH(fname); 909 return (error); 910 } 911 tv[0].tv_sec = ltv[0].tv_sec; 912 tv[0].tv_usec = ltv[0].tv_usec; 913 tv[1].tv_sec = ltv[1].tv_sec; 914 tv[1].tv_usec = ltv[1].tv_usec; 915 tvp = tv; 916 } 917 918 if (!convpath) { 919 error = kern_utimesat(td, dfd, args->filename, UIO_USERSPACE, 920 tvp, UIO_SYSSPACE); 921 } else { 922 error = kern_utimesat(td, dfd, fname, UIO_SYSSPACE, tvp, UIO_SYSSPACE); 923 LFREEPATH(fname); 924 } 925 return (error); 926 } 927 #endif 928 929 static int 930 linux_common_wait(struct thread *td, int pid, int *statusp, 931 int options, struct __wrusage *wrup) 932 { 933 siginfo_t siginfo; 934 idtype_t idtype; 935 id_t id; 936 int error, status, tmpstat; 937 938 if (pid == WAIT_ANY) { 939 idtype = P_ALL; 940 id = 0; 941 } else if (pid < 0) { 942 idtype = P_PGID; 943 id = (id_t)-pid; 944 } else { 945 idtype = P_PID; 946 id = (id_t)pid; 947 } 948 949 /* 950 * For backward compatibility we implicitly add flags WEXITED 951 * and WTRAPPED here. 952 */ 953 options |= WEXITED | WTRAPPED; 954 error = kern_wait6(td, idtype, id, &status, options, wrup, &siginfo); 955 if (error) 956 return (error); 957 958 if (statusp) { 959 tmpstat = status & 0xffff; 960 if (WIFSIGNALED(tmpstat)) { 961 tmpstat = (tmpstat & 0xffffff80) | 962 bsd_to_linux_signal(WTERMSIG(tmpstat)); 963 } else if (WIFSTOPPED(tmpstat)) { 964 tmpstat = (tmpstat & 0xffff00ff) | 965 (bsd_to_linux_signal(WSTOPSIG(tmpstat)) << 8); 966 #if defined(__amd64__) && !defined(COMPAT_LINUX32) 967 if (WSTOPSIG(status) == SIGTRAP) { 968 tmpstat = linux_ptrace_status(td, 969 siginfo.si_pid, tmpstat); 970 } 971 #endif 972 } else if (WIFCONTINUED(tmpstat)) { 973 tmpstat = 0xffff; 974 } 975 error = copyout(&tmpstat, statusp, sizeof(int)); 976 } 977 978 return (error); 979 } 980 981 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 982 int 983 linux_waitpid(struct thread *td, struct linux_waitpid_args *args) 984 { 985 struct linux_wait4_args wait4_args; 986 987 wait4_args.pid = args->pid; 988 wait4_args.status = args->status; 989 wait4_args.options = args->options; 990 wait4_args.rusage = NULL; 991 992 return (linux_wait4(td, &wait4_args)); 993 } 994 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 995 996 int 997 linux_wait4(struct thread *td, struct linux_wait4_args *args) 998 { 999 int error, options; 1000 struct __wrusage wru, *wrup; 1001 1002 if (args->options & ~(LINUX_WUNTRACED | LINUX_WNOHANG | 1003 LINUX_WCONTINUED | __WCLONE | __WNOTHREAD | __WALL)) 1004 return (EINVAL); 1005 1006 options = WEXITED; 1007 linux_to_bsd_waitopts(args->options, &options); 1008 1009 if (args->rusage != NULL) 1010 wrup = &wru; 1011 else 1012 wrup = NULL; 1013 error = linux_common_wait(td, args->pid, args->status, options, wrup); 1014 if (error != 0) 1015 return (error); 1016 if (args->rusage != NULL) 1017 error = linux_copyout_rusage(&wru.wru_self, args->rusage); 1018 return (error); 1019 } 1020 1021 int 1022 linux_waitid(struct thread *td, struct linux_waitid_args *args) 1023 { 1024 int status, options, sig; 1025 struct __wrusage wru; 1026 siginfo_t siginfo; 1027 l_siginfo_t lsi; 1028 idtype_t idtype; 1029 struct proc *p; 1030 int error; 1031 1032 options = 0; 1033 linux_to_bsd_waitopts(args->options, &options); 1034 1035 if (options & ~(WNOHANG | WNOWAIT | WEXITED | WUNTRACED | WCONTINUED)) 1036 return (EINVAL); 1037 if (!(options & (WEXITED | WUNTRACED | WCONTINUED))) 1038 return (EINVAL); 1039 1040 switch (args->idtype) { 1041 case LINUX_P_ALL: 1042 idtype = P_ALL; 1043 break; 1044 case LINUX_P_PID: 1045 if (args->id <= 0) 1046 return (EINVAL); 1047 idtype = P_PID; 1048 break; 1049 case LINUX_P_PGID: 1050 if (args->id <= 0) 1051 return (EINVAL); 1052 idtype = P_PGID; 1053 break; 1054 default: 1055 return (EINVAL); 1056 } 1057 1058 error = kern_wait6(td, idtype, args->id, &status, options, 1059 &wru, &siginfo); 1060 if (error != 0) 1061 return (error); 1062 if (args->rusage != NULL) { 1063 error = linux_copyout_rusage(&wru.wru_children, 1064 args->rusage); 1065 if (error != 0) 1066 return (error); 1067 } 1068 if (args->info != NULL) { 1069 p = td->td_proc; 1070 bzero(&lsi, sizeof(lsi)); 1071 if (td->td_retval[0] != 0) { 1072 sig = bsd_to_linux_signal(siginfo.si_signo); 1073 siginfo_to_lsiginfo(&siginfo, &lsi, sig); 1074 } 1075 error = copyout(&lsi, args->info, sizeof(lsi)); 1076 } 1077 td->td_retval[0] = 0; 1078 1079 return (error); 1080 } 1081 1082 #ifdef LINUX_LEGACY_SYSCALLS 1083 int 1084 linux_mknod(struct thread *td, struct linux_mknod_args *args) 1085 { 1086 char *path; 1087 int error; 1088 enum uio_seg seg; 1089 bool convpath; 1090 1091 convpath = LUSECONVPATH(td); 1092 if (!convpath) { 1093 path = args->path; 1094 seg = UIO_USERSPACE; 1095 } else { 1096 LCONVPATHCREAT(td, args->path, &path); 1097 seg = UIO_SYSSPACE; 1098 } 1099 1100 switch (args->mode & S_IFMT) { 1101 case S_IFIFO: 1102 case S_IFSOCK: 1103 error = kern_mkfifoat(td, AT_FDCWD, path, seg, 1104 args->mode); 1105 break; 1106 1107 case S_IFCHR: 1108 case S_IFBLK: 1109 error = kern_mknodat(td, AT_FDCWD, path, seg, 1110 args->mode, args->dev); 1111 break; 1112 1113 case S_IFDIR: 1114 error = EPERM; 1115 break; 1116 1117 case 0: 1118 args->mode |= S_IFREG; 1119 /* FALLTHROUGH */ 1120 case S_IFREG: 1121 error = kern_openat(td, AT_FDCWD, path, seg, 1122 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1123 if (error == 0) 1124 kern_close(td, td->td_retval[0]); 1125 break; 1126 1127 default: 1128 error = EINVAL; 1129 break; 1130 } 1131 if (convpath) 1132 LFREEPATH(path); 1133 return (error); 1134 } 1135 #endif 1136 1137 int 1138 linux_mknodat(struct thread *td, struct linux_mknodat_args *args) 1139 { 1140 char *path; 1141 int error, dfd; 1142 enum uio_seg seg; 1143 bool convpath; 1144 1145 dfd = (args->dfd == LINUX_AT_FDCWD) ? AT_FDCWD : args->dfd; 1146 1147 convpath = LUSECONVPATH(td); 1148 if (!convpath) { 1149 path = __DECONST(char *, args->filename); 1150 seg = UIO_USERSPACE; 1151 } else { 1152 LCONVPATHCREAT_AT(td, args->filename, &path, dfd); 1153 seg = UIO_SYSSPACE; 1154 } 1155 1156 switch (args->mode & S_IFMT) { 1157 case S_IFIFO: 1158 case S_IFSOCK: 1159 error = kern_mkfifoat(td, dfd, path, seg, args->mode); 1160 break; 1161 1162 case S_IFCHR: 1163 case S_IFBLK: 1164 error = kern_mknodat(td, dfd, path, seg, args->mode, 1165 args->dev); 1166 break; 1167 1168 case S_IFDIR: 1169 error = EPERM; 1170 break; 1171 1172 case 0: 1173 args->mode |= S_IFREG; 1174 /* FALLTHROUGH */ 1175 case S_IFREG: 1176 error = kern_openat(td, dfd, path, seg, 1177 O_WRONLY | O_CREAT | O_TRUNC, args->mode); 1178 if (error == 0) 1179 kern_close(td, td->td_retval[0]); 1180 break; 1181 1182 default: 1183 error = EINVAL; 1184 break; 1185 } 1186 if (convpath) 1187 LFREEPATH(path); 1188 return (error); 1189 } 1190 1191 /* 1192 * UGH! This is just about the dumbest idea I've ever heard!! 1193 */ 1194 int 1195 linux_personality(struct thread *td, struct linux_personality_args *args) 1196 { 1197 struct linux_pemuldata *pem; 1198 struct proc *p = td->td_proc; 1199 uint32_t old; 1200 1201 PROC_LOCK(p); 1202 pem = pem_find(p); 1203 old = pem->persona; 1204 if (args->per != 0xffffffff) 1205 pem->persona = args->per; 1206 PROC_UNLOCK(p); 1207 1208 td->td_retval[0] = old; 1209 return (0); 1210 } 1211 1212 struct l_itimerval { 1213 l_timeval it_interval; 1214 l_timeval it_value; 1215 }; 1216 1217 #define B2L_ITIMERVAL(bip, lip) \ 1218 (bip)->it_interval.tv_sec = (lip)->it_interval.tv_sec; \ 1219 (bip)->it_interval.tv_usec = (lip)->it_interval.tv_usec; \ 1220 (bip)->it_value.tv_sec = (lip)->it_value.tv_sec; \ 1221 (bip)->it_value.tv_usec = (lip)->it_value.tv_usec; 1222 1223 int 1224 linux_setitimer(struct thread *td, struct linux_setitimer_args *uap) 1225 { 1226 int error; 1227 struct l_itimerval ls; 1228 struct itimerval aitv, oitv; 1229 1230 if (uap->itv == NULL) { 1231 uap->itv = uap->oitv; 1232 return (linux_getitimer(td, (struct linux_getitimer_args *)uap)); 1233 } 1234 1235 error = copyin(uap->itv, &ls, sizeof(ls)); 1236 if (error != 0) 1237 return (error); 1238 B2L_ITIMERVAL(&aitv, &ls); 1239 error = kern_setitimer(td, uap->which, &aitv, &oitv); 1240 if (error != 0 || uap->oitv == NULL) 1241 return (error); 1242 B2L_ITIMERVAL(&ls, &oitv); 1243 1244 return (copyout(&ls, uap->oitv, sizeof(ls))); 1245 } 1246 1247 int 1248 linux_getitimer(struct thread *td, struct linux_getitimer_args *uap) 1249 { 1250 int error; 1251 struct l_itimerval ls; 1252 struct itimerval aitv; 1253 1254 error = kern_getitimer(td, uap->which, &aitv); 1255 if (error != 0) 1256 return (error); 1257 B2L_ITIMERVAL(&ls, &aitv); 1258 return (copyout(&ls, uap->itv, sizeof(ls))); 1259 } 1260 1261 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1262 int 1263 linux_nice(struct thread *td, struct linux_nice_args *args) 1264 { 1265 1266 return (kern_setpriority(td, PRIO_PROCESS, 0, args->inc)); 1267 } 1268 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1269 1270 int 1271 linux_setgroups(struct thread *td, struct linux_setgroups_args *args) 1272 { 1273 struct ucred *newcred, *oldcred; 1274 l_gid_t *linux_gidset; 1275 gid_t *bsd_gidset; 1276 int ngrp, error; 1277 struct proc *p; 1278 1279 ngrp = args->gidsetsize; 1280 if (ngrp < 0 || ngrp >= ngroups_max + 1) 1281 return (EINVAL); 1282 linux_gidset = malloc(ngrp * sizeof(*linux_gidset), M_LINUX, M_WAITOK); 1283 error = copyin(args->grouplist, linux_gidset, ngrp * sizeof(l_gid_t)); 1284 if (error) 1285 goto out; 1286 newcred = crget(); 1287 crextend(newcred, ngrp + 1); 1288 p = td->td_proc; 1289 PROC_LOCK(p); 1290 oldcred = p->p_ucred; 1291 crcopy(newcred, oldcred); 1292 1293 /* 1294 * cr_groups[0] holds egid. Setting the whole set from 1295 * the supplied set will cause egid to be changed too. 1296 * Keep cr_groups[0] unchanged to prevent that. 1297 */ 1298 1299 if ((error = priv_check_cred(oldcred, PRIV_CRED_SETGROUPS)) != 0) { 1300 PROC_UNLOCK(p); 1301 crfree(newcred); 1302 goto out; 1303 } 1304 1305 if (ngrp > 0) { 1306 newcred->cr_ngroups = ngrp + 1; 1307 1308 bsd_gidset = newcred->cr_groups; 1309 ngrp--; 1310 while (ngrp >= 0) { 1311 bsd_gidset[ngrp + 1] = linux_gidset[ngrp]; 1312 ngrp--; 1313 } 1314 } else 1315 newcred->cr_ngroups = 1; 1316 1317 setsugid(p); 1318 proc_set_cred(p, newcred); 1319 PROC_UNLOCK(p); 1320 crfree(oldcred); 1321 error = 0; 1322 out: 1323 free(linux_gidset, M_LINUX); 1324 return (error); 1325 } 1326 1327 int 1328 linux_getgroups(struct thread *td, struct linux_getgroups_args *args) 1329 { 1330 struct ucred *cred; 1331 l_gid_t *linux_gidset; 1332 gid_t *bsd_gidset; 1333 int bsd_gidsetsz, ngrp, error; 1334 1335 cred = td->td_ucred; 1336 bsd_gidset = cred->cr_groups; 1337 bsd_gidsetsz = cred->cr_ngroups - 1; 1338 1339 /* 1340 * cr_groups[0] holds egid. Returning the whole set 1341 * here will cause a duplicate. Exclude cr_groups[0] 1342 * to prevent that. 1343 */ 1344 1345 if ((ngrp = args->gidsetsize) == 0) { 1346 td->td_retval[0] = bsd_gidsetsz; 1347 return (0); 1348 } 1349 1350 if (ngrp < bsd_gidsetsz) 1351 return (EINVAL); 1352 1353 ngrp = 0; 1354 linux_gidset = malloc(bsd_gidsetsz * sizeof(*linux_gidset), 1355 M_LINUX, M_WAITOK); 1356 while (ngrp < bsd_gidsetsz) { 1357 linux_gidset[ngrp] = bsd_gidset[ngrp + 1]; 1358 ngrp++; 1359 } 1360 1361 error = copyout(linux_gidset, args->grouplist, ngrp * sizeof(l_gid_t)); 1362 free(linux_gidset, M_LINUX); 1363 if (error) 1364 return (error); 1365 1366 td->td_retval[0] = ngrp; 1367 return (0); 1368 } 1369 1370 static bool 1371 linux_get_dummy_limit(l_uint resource, struct rlimit *rlim) 1372 { 1373 1374 if (linux_dummy_rlimits == 0) 1375 return (false); 1376 1377 switch (resource) { 1378 case LINUX_RLIMIT_LOCKS: 1379 case LINUX_RLIMIT_SIGPENDING: 1380 case LINUX_RLIMIT_MSGQUEUE: 1381 case LINUX_RLIMIT_RTTIME: 1382 rlim->rlim_cur = LINUX_RLIM_INFINITY; 1383 rlim->rlim_max = LINUX_RLIM_INFINITY; 1384 return (true); 1385 case LINUX_RLIMIT_NICE: 1386 case LINUX_RLIMIT_RTPRIO: 1387 rlim->rlim_cur = 0; 1388 rlim->rlim_max = 0; 1389 return (true); 1390 default: 1391 return (false); 1392 } 1393 } 1394 1395 int 1396 linux_setrlimit(struct thread *td, struct linux_setrlimit_args *args) 1397 { 1398 struct rlimit bsd_rlim; 1399 struct l_rlimit rlim; 1400 u_int which; 1401 int error; 1402 1403 if (args->resource >= LINUX_RLIM_NLIMITS) 1404 return (EINVAL); 1405 1406 which = linux_to_bsd_resource[args->resource]; 1407 if (which == -1) 1408 return (EINVAL); 1409 1410 error = copyin(args->rlim, &rlim, sizeof(rlim)); 1411 if (error) 1412 return (error); 1413 1414 bsd_rlim.rlim_cur = (rlim_t)rlim.rlim_cur; 1415 bsd_rlim.rlim_max = (rlim_t)rlim.rlim_max; 1416 return (kern_setrlimit(td, which, &bsd_rlim)); 1417 } 1418 1419 #if defined(__i386__) || (defined(__amd64__) && defined(COMPAT_LINUX32)) 1420 int 1421 linux_old_getrlimit(struct thread *td, struct linux_old_getrlimit_args *args) 1422 { 1423 struct l_rlimit rlim; 1424 struct rlimit bsd_rlim; 1425 u_int which; 1426 1427 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1428 rlim.rlim_cur = bsd_rlim.rlim_cur; 1429 rlim.rlim_max = bsd_rlim.rlim_max; 1430 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1431 } 1432 1433 if (args->resource >= LINUX_RLIM_NLIMITS) 1434 return (EINVAL); 1435 1436 which = linux_to_bsd_resource[args->resource]; 1437 if (which == -1) 1438 return (EINVAL); 1439 1440 lim_rlimit(td, which, &bsd_rlim); 1441 1442 #ifdef COMPAT_LINUX32 1443 rlim.rlim_cur = (unsigned int)bsd_rlim.rlim_cur; 1444 if (rlim.rlim_cur == UINT_MAX) 1445 rlim.rlim_cur = INT_MAX; 1446 rlim.rlim_max = (unsigned int)bsd_rlim.rlim_max; 1447 if (rlim.rlim_max == UINT_MAX) 1448 rlim.rlim_max = INT_MAX; 1449 #else 1450 rlim.rlim_cur = (unsigned long)bsd_rlim.rlim_cur; 1451 if (rlim.rlim_cur == ULONG_MAX) 1452 rlim.rlim_cur = LONG_MAX; 1453 rlim.rlim_max = (unsigned long)bsd_rlim.rlim_max; 1454 if (rlim.rlim_max == ULONG_MAX) 1455 rlim.rlim_max = LONG_MAX; 1456 #endif 1457 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1458 } 1459 #endif /* __i386__ || (__amd64__ && COMPAT_LINUX32) */ 1460 1461 int 1462 linux_getrlimit(struct thread *td, struct linux_getrlimit_args *args) 1463 { 1464 struct l_rlimit rlim; 1465 struct rlimit bsd_rlim; 1466 u_int which; 1467 1468 if (linux_get_dummy_limit(args->resource, &bsd_rlim)) { 1469 rlim.rlim_cur = bsd_rlim.rlim_cur; 1470 rlim.rlim_max = bsd_rlim.rlim_max; 1471 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1472 } 1473 1474 if (args->resource >= LINUX_RLIM_NLIMITS) 1475 return (EINVAL); 1476 1477 which = linux_to_bsd_resource[args->resource]; 1478 if (which == -1) 1479 return (EINVAL); 1480 1481 lim_rlimit(td, which, &bsd_rlim); 1482 1483 rlim.rlim_cur = (l_ulong)bsd_rlim.rlim_cur; 1484 rlim.rlim_max = (l_ulong)bsd_rlim.rlim_max; 1485 return (copyout(&rlim, args->rlim, sizeof(rlim))); 1486 } 1487 1488 int 1489 linux_sched_setscheduler(struct thread *td, 1490 struct linux_sched_setscheduler_args *args) 1491 { 1492 struct sched_param sched_param; 1493 struct thread *tdt; 1494 int error, policy; 1495 1496 switch (args->policy) { 1497 case LINUX_SCHED_OTHER: 1498 policy = SCHED_OTHER; 1499 break; 1500 case LINUX_SCHED_FIFO: 1501 policy = SCHED_FIFO; 1502 break; 1503 case LINUX_SCHED_RR: 1504 policy = SCHED_RR; 1505 break; 1506 default: 1507 return (EINVAL); 1508 } 1509 1510 error = copyin(args->param, &sched_param, sizeof(sched_param)); 1511 if (error) 1512 return (error); 1513 1514 if (linux_map_sched_prio) { 1515 switch (policy) { 1516 case SCHED_OTHER: 1517 if (sched_param.sched_priority != 0) 1518 return (EINVAL); 1519 1520 sched_param.sched_priority = 1521 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 1522 break; 1523 case SCHED_FIFO: 1524 case SCHED_RR: 1525 if (sched_param.sched_priority < 1 || 1526 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) 1527 return (EINVAL); 1528 1529 /* 1530 * Map [1, LINUX_MAX_RT_PRIO - 1] to 1531 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 1532 */ 1533 sched_param.sched_priority = 1534 (sched_param.sched_priority - 1) * 1535 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 1536 (LINUX_MAX_RT_PRIO - 1); 1537 break; 1538 } 1539 } 1540 1541 tdt = linux_tdfind(td, args->pid, -1); 1542 if (tdt == NULL) 1543 return (ESRCH); 1544 1545 error = kern_sched_setscheduler(td, tdt, policy, &sched_param); 1546 PROC_UNLOCK(tdt->td_proc); 1547 return (error); 1548 } 1549 1550 int 1551 linux_sched_getscheduler(struct thread *td, 1552 struct linux_sched_getscheduler_args *args) 1553 { 1554 struct thread *tdt; 1555 int error, policy; 1556 1557 tdt = linux_tdfind(td, args->pid, -1); 1558 if (tdt == NULL) 1559 return (ESRCH); 1560 1561 error = kern_sched_getscheduler(td, tdt, &policy); 1562 PROC_UNLOCK(tdt->td_proc); 1563 1564 switch (policy) { 1565 case SCHED_OTHER: 1566 td->td_retval[0] = LINUX_SCHED_OTHER; 1567 break; 1568 case SCHED_FIFO: 1569 td->td_retval[0] = LINUX_SCHED_FIFO; 1570 break; 1571 case SCHED_RR: 1572 td->td_retval[0] = LINUX_SCHED_RR; 1573 break; 1574 } 1575 return (error); 1576 } 1577 1578 int 1579 linux_sched_get_priority_max(struct thread *td, 1580 struct linux_sched_get_priority_max_args *args) 1581 { 1582 struct sched_get_priority_max_args bsd; 1583 1584 if (linux_map_sched_prio) { 1585 switch (args->policy) { 1586 case LINUX_SCHED_OTHER: 1587 td->td_retval[0] = 0; 1588 return (0); 1589 case LINUX_SCHED_FIFO: 1590 case LINUX_SCHED_RR: 1591 td->td_retval[0] = LINUX_MAX_RT_PRIO - 1; 1592 return (0); 1593 default: 1594 return (EINVAL); 1595 } 1596 } 1597 1598 switch (args->policy) { 1599 case LINUX_SCHED_OTHER: 1600 bsd.policy = SCHED_OTHER; 1601 break; 1602 case LINUX_SCHED_FIFO: 1603 bsd.policy = SCHED_FIFO; 1604 break; 1605 case LINUX_SCHED_RR: 1606 bsd.policy = SCHED_RR; 1607 break; 1608 default: 1609 return (EINVAL); 1610 } 1611 return (sys_sched_get_priority_max(td, &bsd)); 1612 } 1613 1614 int 1615 linux_sched_get_priority_min(struct thread *td, 1616 struct linux_sched_get_priority_min_args *args) 1617 { 1618 struct sched_get_priority_min_args bsd; 1619 1620 if (linux_map_sched_prio) { 1621 switch (args->policy) { 1622 case LINUX_SCHED_OTHER: 1623 td->td_retval[0] = 0; 1624 return (0); 1625 case LINUX_SCHED_FIFO: 1626 case LINUX_SCHED_RR: 1627 td->td_retval[0] = 1; 1628 return (0); 1629 default: 1630 return (EINVAL); 1631 } 1632 } 1633 1634 switch (args->policy) { 1635 case LINUX_SCHED_OTHER: 1636 bsd.policy = SCHED_OTHER; 1637 break; 1638 case LINUX_SCHED_FIFO: 1639 bsd.policy = SCHED_FIFO; 1640 break; 1641 case LINUX_SCHED_RR: 1642 bsd.policy = SCHED_RR; 1643 break; 1644 default: 1645 return (EINVAL); 1646 } 1647 return (sys_sched_get_priority_min(td, &bsd)); 1648 } 1649 1650 #define REBOOT_CAD_ON 0x89abcdef 1651 #define REBOOT_CAD_OFF 0 1652 #define REBOOT_HALT 0xcdef0123 1653 #define REBOOT_RESTART 0x01234567 1654 #define REBOOT_RESTART2 0xA1B2C3D4 1655 #define REBOOT_POWEROFF 0x4321FEDC 1656 #define REBOOT_MAGIC1 0xfee1dead 1657 #define REBOOT_MAGIC2 0x28121969 1658 #define REBOOT_MAGIC2A 0x05121996 1659 #define REBOOT_MAGIC2B 0x16041998 1660 1661 int 1662 linux_reboot(struct thread *td, struct linux_reboot_args *args) 1663 { 1664 struct reboot_args bsd_args; 1665 1666 if (args->magic1 != REBOOT_MAGIC1) 1667 return (EINVAL); 1668 1669 switch (args->magic2) { 1670 case REBOOT_MAGIC2: 1671 case REBOOT_MAGIC2A: 1672 case REBOOT_MAGIC2B: 1673 break; 1674 default: 1675 return (EINVAL); 1676 } 1677 1678 switch (args->cmd) { 1679 case REBOOT_CAD_ON: 1680 case REBOOT_CAD_OFF: 1681 return (priv_check(td, PRIV_REBOOT)); 1682 case REBOOT_HALT: 1683 bsd_args.opt = RB_HALT; 1684 break; 1685 case REBOOT_RESTART: 1686 case REBOOT_RESTART2: 1687 bsd_args.opt = 0; 1688 break; 1689 case REBOOT_POWEROFF: 1690 bsd_args.opt = RB_POWEROFF; 1691 break; 1692 default: 1693 return (EINVAL); 1694 } 1695 return (sys_reboot(td, &bsd_args)); 1696 } 1697 1698 int 1699 linux_getpid(struct thread *td, struct linux_getpid_args *args) 1700 { 1701 1702 td->td_retval[0] = td->td_proc->p_pid; 1703 1704 return (0); 1705 } 1706 1707 int 1708 linux_gettid(struct thread *td, struct linux_gettid_args *args) 1709 { 1710 struct linux_emuldata *em; 1711 1712 em = em_find(td); 1713 KASSERT(em != NULL, ("gettid: emuldata not found.\n")); 1714 1715 td->td_retval[0] = em->em_tid; 1716 1717 return (0); 1718 } 1719 1720 int 1721 linux_getppid(struct thread *td, struct linux_getppid_args *args) 1722 { 1723 1724 td->td_retval[0] = kern_getppid(td); 1725 return (0); 1726 } 1727 1728 int 1729 linux_getgid(struct thread *td, struct linux_getgid_args *args) 1730 { 1731 1732 td->td_retval[0] = td->td_ucred->cr_rgid; 1733 return (0); 1734 } 1735 1736 int 1737 linux_getuid(struct thread *td, struct linux_getuid_args *args) 1738 { 1739 1740 td->td_retval[0] = td->td_ucred->cr_ruid; 1741 return (0); 1742 } 1743 1744 int 1745 linux_getsid(struct thread *td, struct linux_getsid_args *args) 1746 { 1747 1748 return (kern_getsid(td, args->pid)); 1749 } 1750 1751 int 1752 linux_nosys(struct thread *td, struct nosys_args *ignore) 1753 { 1754 1755 return (ENOSYS); 1756 } 1757 1758 int 1759 linux_getpriority(struct thread *td, struct linux_getpriority_args *args) 1760 { 1761 int error; 1762 1763 error = kern_getpriority(td, args->which, args->who); 1764 td->td_retval[0] = 20 - td->td_retval[0]; 1765 return (error); 1766 } 1767 1768 int 1769 linux_sethostname(struct thread *td, struct linux_sethostname_args *args) 1770 { 1771 int name[2]; 1772 1773 name[0] = CTL_KERN; 1774 name[1] = KERN_HOSTNAME; 1775 return (userland_sysctl(td, name, 2, 0, 0, 0, args->hostname, 1776 args->len, 0, 0)); 1777 } 1778 1779 int 1780 linux_setdomainname(struct thread *td, struct linux_setdomainname_args *args) 1781 { 1782 int name[2]; 1783 1784 name[0] = CTL_KERN; 1785 name[1] = KERN_NISDOMAINNAME; 1786 return (userland_sysctl(td, name, 2, 0, 0, 0, args->name, 1787 args->len, 0, 0)); 1788 } 1789 1790 int 1791 linux_exit_group(struct thread *td, struct linux_exit_group_args *args) 1792 { 1793 1794 LINUX_CTR2(exit_group, "thread(%d) (%d)", td->td_tid, 1795 args->error_code); 1796 1797 /* 1798 * XXX: we should send a signal to the parent if 1799 * SIGNAL_EXIT_GROUP is set. We ignore that (temporarily?) 1800 * as it doesnt occur often. 1801 */ 1802 exit1(td, args->error_code, 0); 1803 /* NOTREACHED */ 1804 } 1805 1806 #define _LINUX_CAPABILITY_VERSION_1 0x19980330 1807 #define _LINUX_CAPABILITY_VERSION_2 0x20071026 1808 #define _LINUX_CAPABILITY_VERSION_3 0x20080522 1809 1810 struct l_user_cap_header { 1811 l_int version; 1812 l_int pid; 1813 }; 1814 1815 struct l_user_cap_data { 1816 l_int effective; 1817 l_int permitted; 1818 l_int inheritable; 1819 }; 1820 1821 int 1822 linux_capget(struct thread *td, struct linux_capget_args *uap) 1823 { 1824 struct l_user_cap_header luch; 1825 struct l_user_cap_data lucd[2]; 1826 int error, u32s; 1827 1828 if (uap->hdrp == NULL) 1829 return (EFAULT); 1830 1831 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1832 if (error != 0) 1833 return (error); 1834 1835 switch (luch.version) { 1836 case _LINUX_CAPABILITY_VERSION_1: 1837 u32s = 1; 1838 break; 1839 case _LINUX_CAPABILITY_VERSION_2: 1840 case _LINUX_CAPABILITY_VERSION_3: 1841 u32s = 2; 1842 break; 1843 default: 1844 luch.version = _LINUX_CAPABILITY_VERSION_1; 1845 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1846 if (error) 1847 return (error); 1848 return (EINVAL); 1849 } 1850 1851 if (luch.pid) 1852 return (EPERM); 1853 1854 if (uap->datap) { 1855 /* 1856 * The current implementation doesn't support setting 1857 * a capability (it's essentially a stub) so indicate 1858 * that no capabilities are currently set or available 1859 * to request. 1860 */ 1861 memset(&lucd, 0, u32s * sizeof(lucd[0])); 1862 error = copyout(&lucd, uap->datap, u32s * sizeof(lucd[0])); 1863 } 1864 1865 return (error); 1866 } 1867 1868 int 1869 linux_capset(struct thread *td, struct linux_capset_args *uap) 1870 { 1871 struct l_user_cap_header luch; 1872 struct l_user_cap_data lucd[2]; 1873 int error, i, u32s; 1874 1875 if (uap->hdrp == NULL || uap->datap == NULL) 1876 return (EFAULT); 1877 1878 error = copyin(uap->hdrp, &luch, sizeof(luch)); 1879 if (error != 0) 1880 return (error); 1881 1882 switch (luch.version) { 1883 case _LINUX_CAPABILITY_VERSION_1: 1884 u32s = 1; 1885 break; 1886 case _LINUX_CAPABILITY_VERSION_2: 1887 case _LINUX_CAPABILITY_VERSION_3: 1888 u32s = 2; 1889 break; 1890 default: 1891 luch.version = _LINUX_CAPABILITY_VERSION_1; 1892 error = copyout(&luch, uap->hdrp, sizeof(luch)); 1893 if (error) 1894 return (error); 1895 return (EINVAL); 1896 } 1897 1898 if (luch.pid) 1899 return (EPERM); 1900 1901 error = copyin(uap->datap, &lucd, u32s * sizeof(lucd[0])); 1902 if (error != 0) 1903 return (error); 1904 1905 /* We currently don't support setting any capabilities. */ 1906 for (i = 0; i < u32s; i++) { 1907 if (lucd[i].effective || lucd[i].permitted || 1908 lucd[i].inheritable) { 1909 linux_msg(td, 1910 "capset[%d] effective=0x%x, permitted=0x%x, " 1911 "inheritable=0x%x is not implemented", i, 1912 (int)lucd[i].effective, (int)lucd[i].permitted, 1913 (int)lucd[i].inheritable); 1914 return (EPERM); 1915 } 1916 } 1917 1918 return (0); 1919 } 1920 1921 int 1922 linux_prctl(struct thread *td, struct linux_prctl_args *args) 1923 { 1924 int error = 0, max_size; 1925 struct proc *p = td->td_proc; 1926 char comm[LINUX_MAX_COMM_LEN]; 1927 int pdeath_signal, trace_state; 1928 1929 switch (args->option) { 1930 case LINUX_PR_SET_PDEATHSIG: 1931 if (!LINUX_SIG_VALID(args->arg2)) 1932 return (EINVAL); 1933 pdeath_signal = linux_to_bsd_signal(args->arg2); 1934 return (kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_CTL, 1935 &pdeath_signal)); 1936 case LINUX_PR_GET_PDEATHSIG: 1937 error = kern_procctl(td, P_PID, 0, PROC_PDEATHSIG_STATUS, 1938 &pdeath_signal); 1939 if (error != 0) 1940 return (error); 1941 pdeath_signal = bsd_to_linux_signal(pdeath_signal); 1942 return (copyout(&pdeath_signal, 1943 (void *)(register_t)args->arg2, 1944 sizeof(pdeath_signal))); 1945 /* 1946 * In Linux, this flag controls if set[gu]id processes can coredump. 1947 * There are additional semantics imposed on processes that cannot 1948 * coredump: 1949 * - Such processes can not be ptraced. 1950 * - There are some semantics around ownership of process-related files 1951 * in the /proc namespace. 1952 * 1953 * In FreeBSD, we can (and by default, do) disable setuid coredump 1954 * system-wide with 'sugid_coredump.' We control tracability on a 1955 * per-process basis with the procctl PROC_TRACE (=> P2_NOTRACE flag). 1956 * By happy coincidence, P2_NOTRACE also prevents coredumping. So the 1957 * procctl is roughly analogous to Linux's DUMPABLE. 1958 * 1959 * So, proxy these knobs to the corresponding PROC_TRACE setting. 1960 */ 1961 case LINUX_PR_GET_DUMPABLE: 1962 error = kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_STATUS, 1963 &trace_state); 1964 if (error != 0) 1965 return (error); 1966 td->td_retval[0] = (trace_state != -1); 1967 return (0); 1968 case LINUX_PR_SET_DUMPABLE: 1969 /* 1970 * It is only valid for userspace to set one of these two 1971 * flags, and only one at a time. 1972 */ 1973 switch (args->arg2) { 1974 case LINUX_SUID_DUMP_DISABLE: 1975 trace_state = PROC_TRACE_CTL_DISABLE_EXEC; 1976 break; 1977 case LINUX_SUID_DUMP_USER: 1978 trace_state = PROC_TRACE_CTL_ENABLE; 1979 break; 1980 default: 1981 return (EINVAL); 1982 } 1983 return (kern_procctl(td, P_PID, p->p_pid, PROC_TRACE_CTL, 1984 &trace_state)); 1985 case LINUX_PR_GET_KEEPCAPS: 1986 /* 1987 * Indicate that we always clear the effective and 1988 * permitted capability sets when the user id becomes 1989 * non-zero (actually the capability sets are simply 1990 * always zero in the current implementation). 1991 */ 1992 td->td_retval[0] = 0; 1993 break; 1994 case LINUX_PR_SET_KEEPCAPS: 1995 /* 1996 * Ignore requests to keep the effective and permitted 1997 * capability sets when the user id becomes non-zero. 1998 */ 1999 break; 2000 case LINUX_PR_SET_NAME: 2001 /* 2002 * To be on the safe side we need to make sure to not 2003 * overflow the size a Linux program expects. We already 2004 * do this here in the copyin, so that we don't need to 2005 * check on copyout. 2006 */ 2007 max_size = MIN(sizeof(comm), sizeof(p->p_comm)); 2008 error = copyinstr((void *)(register_t)args->arg2, comm, 2009 max_size, NULL); 2010 2011 /* Linux silently truncates the name if it is too long. */ 2012 if (error == ENAMETOOLONG) { 2013 /* 2014 * XXX: copyinstr() isn't documented to populate the 2015 * array completely, so do a copyin() to be on the 2016 * safe side. This should be changed in case 2017 * copyinstr() is changed to guarantee this. 2018 */ 2019 error = copyin((void *)(register_t)args->arg2, comm, 2020 max_size - 1); 2021 comm[max_size - 1] = '\0'; 2022 } 2023 if (error) 2024 return (error); 2025 2026 PROC_LOCK(p); 2027 strlcpy(p->p_comm, comm, sizeof(p->p_comm)); 2028 PROC_UNLOCK(p); 2029 break; 2030 case LINUX_PR_GET_NAME: 2031 PROC_LOCK(p); 2032 strlcpy(comm, p->p_comm, sizeof(comm)); 2033 PROC_UNLOCK(p); 2034 error = copyout(comm, (void *)(register_t)args->arg2, 2035 strlen(comm) + 1); 2036 break; 2037 case LINUX_PR_GET_SECCOMP: 2038 case LINUX_PR_SET_SECCOMP: 2039 /* 2040 * Same as returned by Linux without CONFIG_SECCOMP enabled. 2041 */ 2042 error = EINVAL; 2043 break; 2044 case LINUX_PR_CAPBSET_READ: 2045 #if 0 2046 /* 2047 * This makes too much noise with Ubuntu Focal. 2048 */ 2049 linux_msg(td, "unsupported prctl PR_CAPBSET_READ %d", 2050 (int)args->arg2); 2051 #endif 2052 error = EINVAL; 2053 break; 2054 case LINUX_PR_SET_NO_NEW_PRIVS: 2055 linux_msg(td, "unsupported prctl PR_SET_NO_NEW_PRIVS"); 2056 error = EINVAL; 2057 break; 2058 case LINUX_PR_SET_PTRACER: 2059 linux_msg(td, "unsupported prctl PR_SET_PTRACER"); 2060 error = EINVAL; 2061 break; 2062 default: 2063 linux_msg(td, "unsupported prctl option %d", args->option); 2064 error = EINVAL; 2065 break; 2066 } 2067 2068 return (error); 2069 } 2070 2071 int 2072 linux_sched_setparam(struct thread *td, 2073 struct linux_sched_setparam_args *uap) 2074 { 2075 struct sched_param sched_param; 2076 struct thread *tdt; 2077 int error, policy; 2078 2079 error = copyin(uap->param, &sched_param, sizeof(sched_param)); 2080 if (error) 2081 return (error); 2082 2083 tdt = linux_tdfind(td, uap->pid, -1); 2084 if (tdt == NULL) 2085 return (ESRCH); 2086 2087 if (linux_map_sched_prio) { 2088 error = kern_sched_getscheduler(td, tdt, &policy); 2089 if (error) 2090 goto out; 2091 2092 switch (policy) { 2093 case SCHED_OTHER: 2094 if (sched_param.sched_priority != 0) { 2095 error = EINVAL; 2096 goto out; 2097 } 2098 sched_param.sched_priority = 2099 PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE; 2100 break; 2101 case SCHED_FIFO: 2102 case SCHED_RR: 2103 if (sched_param.sched_priority < 1 || 2104 sched_param.sched_priority >= LINUX_MAX_RT_PRIO) { 2105 error = EINVAL; 2106 goto out; 2107 } 2108 /* 2109 * Map [1, LINUX_MAX_RT_PRIO - 1] to 2110 * [0, RTP_PRIO_MAX - RTP_PRIO_MIN] (rounding down). 2111 */ 2112 sched_param.sched_priority = 2113 (sched_param.sched_priority - 1) * 2114 (RTP_PRIO_MAX - RTP_PRIO_MIN + 1) / 2115 (LINUX_MAX_RT_PRIO - 1); 2116 break; 2117 } 2118 } 2119 2120 error = kern_sched_setparam(td, tdt, &sched_param); 2121 out: PROC_UNLOCK(tdt->td_proc); 2122 return (error); 2123 } 2124 2125 int 2126 linux_sched_getparam(struct thread *td, 2127 struct linux_sched_getparam_args *uap) 2128 { 2129 struct sched_param sched_param; 2130 struct thread *tdt; 2131 int error, policy; 2132 2133 tdt = linux_tdfind(td, uap->pid, -1); 2134 if (tdt == NULL) 2135 return (ESRCH); 2136 2137 error = kern_sched_getparam(td, tdt, &sched_param); 2138 if (error) { 2139 PROC_UNLOCK(tdt->td_proc); 2140 return (error); 2141 } 2142 2143 if (linux_map_sched_prio) { 2144 error = kern_sched_getscheduler(td, tdt, &policy); 2145 PROC_UNLOCK(tdt->td_proc); 2146 if (error) 2147 return (error); 2148 2149 switch (policy) { 2150 case SCHED_OTHER: 2151 sched_param.sched_priority = 0; 2152 break; 2153 case SCHED_FIFO: 2154 case SCHED_RR: 2155 /* 2156 * Map [0, RTP_PRIO_MAX - RTP_PRIO_MIN] to 2157 * [1, LINUX_MAX_RT_PRIO - 1] (rounding up). 2158 */ 2159 sched_param.sched_priority = 2160 (sched_param.sched_priority * 2161 (LINUX_MAX_RT_PRIO - 1) + 2162 (RTP_PRIO_MAX - RTP_PRIO_MIN - 1)) / 2163 (RTP_PRIO_MAX - RTP_PRIO_MIN) + 1; 2164 break; 2165 } 2166 } else 2167 PROC_UNLOCK(tdt->td_proc); 2168 2169 error = copyout(&sched_param, uap->param, sizeof(sched_param)); 2170 return (error); 2171 } 2172 2173 /* 2174 * Get affinity of a process. 2175 */ 2176 int 2177 linux_sched_getaffinity(struct thread *td, 2178 struct linux_sched_getaffinity_args *args) 2179 { 2180 int error; 2181 struct thread *tdt; 2182 2183 if (args->len < sizeof(cpuset_t)) 2184 return (EINVAL); 2185 2186 tdt = linux_tdfind(td, args->pid, -1); 2187 if (tdt == NULL) 2188 return (ESRCH); 2189 2190 PROC_UNLOCK(tdt->td_proc); 2191 2192 error = kern_cpuset_getaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2193 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *)args->user_mask_ptr); 2194 if (error == 0) 2195 td->td_retval[0] = sizeof(cpuset_t); 2196 2197 return (error); 2198 } 2199 2200 /* 2201 * Set affinity of a process. 2202 */ 2203 int 2204 linux_sched_setaffinity(struct thread *td, 2205 struct linux_sched_setaffinity_args *args) 2206 { 2207 struct thread *tdt; 2208 2209 if (args->len < sizeof(cpuset_t)) 2210 return (EINVAL); 2211 2212 tdt = linux_tdfind(td, args->pid, -1); 2213 if (tdt == NULL) 2214 return (ESRCH); 2215 2216 PROC_UNLOCK(tdt->td_proc); 2217 2218 return (kern_cpuset_setaffinity(td, CPU_LEVEL_WHICH, CPU_WHICH_TID, 2219 tdt->td_tid, sizeof(cpuset_t), (cpuset_t *) args->user_mask_ptr)); 2220 } 2221 2222 struct linux_rlimit64 { 2223 uint64_t rlim_cur; 2224 uint64_t rlim_max; 2225 }; 2226 2227 int 2228 linux_prlimit64(struct thread *td, struct linux_prlimit64_args *args) 2229 { 2230 struct rlimit rlim, nrlim; 2231 struct linux_rlimit64 lrlim; 2232 struct proc *p; 2233 u_int which; 2234 int flags; 2235 int error; 2236 2237 if (args->new == NULL && args->old != NULL) { 2238 if (linux_get_dummy_limit(args->resource, &rlim)) { 2239 lrlim.rlim_cur = rlim.rlim_cur; 2240 lrlim.rlim_max = rlim.rlim_max; 2241 return (copyout(&lrlim, args->old, sizeof(lrlim))); 2242 } 2243 } 2244 2245 if (args->resource >= LINUX_RLIM_NLIMITS) 2246 return (EINVAL); 2247 2248 which = linux_to_bsd_resource[args->resource]; 2249 if (which == -1) 2250 return (EINVAL); 2251 2252 if (args->new != NULL) { 2253 /* 2254 * Note. Unlike FreeBSD where rlim is signed 64-bit Linux 2255 * rlim is unsigned 64-bit. FreeBSD treats negative limits 2256 * as INFINITY so we do not need a conversion even. 2257 */ 2258 error = copyin(args->new, &nrlim, sizeof(nrlim)); 2259 if (error != 0) 2260 return (error); 2261 } 2262 2263 flags = PGET_HOLD | PGET_NOTWEXIT; 2264 if (args->new != NULL) 2265 flags |= PGET_CANDEBUG; 2266 else 2267 flags |= PGET_CANSEE; 2268 if (args->pid == 0) { 2269 p = td->td_proc; 2270 PHOLD(p); 2271 } else { 2272 error = pget(args->pid, flags, &p); 2273 if (error != 0) 2274 return (error); 2275 } 2276 if (args->old != NULL) { 2277 PROC_LOCK(p); 2278 lim_rlimit_proc(p, which, &rlim); 2279 PROC_UNLOCK(p); 2280 if (rlim.rlim_cur == RLIM_INFINITY) 2281 lrlim.rlim_cur = LINUX_RLIM_INFINITY; 2282 else 2283 lrlim.rlim_cur = rlim.rlim_cur; 2284 if (rlim.rlim_max == RLIM_INFINITY) 2285 lrlim.rlim_max = LINUX_RLIM_INFINITY; 2286 else 2287 lrlim.rlim_max = rlim.rlim_max; 2288 error = copyout(&lrlim, args->old, sizeof(lrlim)); 2289 if (error != 0) 2290 goto out; 2291 } 2292 2293 if (args->new != NULL) 2294 error = kern_proc_setrlimit(td, p, which, &nrlim); 2295 2296 out: 2297 PRELE(p); 2298 return (error); 2299 } 2300 2301 int 2302 linux_pselect6(struct thread *td, struct linux_pselect6_args *args) 2303 { 2304 struct timeval utv, tv0, tv1, *tvp; 2305 struct l_pselect6arg lpse6; 2306 struct l_timespec lts; 2307 struct timespec uts; 2308 l_sigset_t l_ss; 2309 sigset_t *ssp; 2310 sigset_t ss; 2311 int error; 2312 2313 ssp = NULL; 2314 if (args->sig != NULL) { 2315 error = copyin(args->sig, &lpse6, sizeof(lpse6)); 2316 if (error != 0) 2317 return (error); 2318 if (lpse6.ss_len != sizeof(l_ss)) 2319 return (EINVAL); 2320 if (lpse6.ss != 0) { 2321 error = copyin(PTRIN(lpse6.ss), &l_ss, 2322 sizeof(l_ss)); 2323 if (error != 0) 2324 return (error); 2325 linux_to_bsd_sigset(&l_ss, &ss); 2326 ssp = &ss; 2327 } 2328 } 2329 2330 /* 2331 * Currently glibc changes nanosecond number to microsecond. 2332 * This mean losing precision but for now it is hardly seen. 2333 */ 2334 if (args->tsp != NULL) { 2335 error = copyin(args->tsp, <s, sizeof(lts)); 2336 if (error != 0) 2337 return (error); 2338 error = linux_to_native_timespec(&uts, <s); 2339 if (error != 0) 2340 return (error); 2341 2342 TIMESPEC_TO_TIMEVAL(&utv, &uts); 2343 if (itimerfix(&utv)) 2344 return (EINVAL); 2345 2346 microtime(&tv0); 2347 tvp = &utv; 2348 } else 2349 tvp = NULL; 2350 2351 error = kern_pselect(td, args->nfds, args->readfds, args->writefds, 2352 args->exceptfds, tvp, ssp, LINUX_NFDBITS); 2353 2354 if (error == 0 && args->tsp != NULL) { 2355 if (td->td_retval[0] != 0) { 2356 /* 2357 * Compute how much time was left of the timeout, 2358 * by subtracting the current time and the time 2359 * before we started the call, and subtracting 2360 * that result from the user-supplied value. 2361 */ 2362 2363 microtime(&tv1); 2364 timevalsub(&tv1, &tv0); 2365 timevalsub(&utv, &tv1); 2366 if (utv.tv_sec < 0) 2367 timevalclear(&utv); 2368 } else 2369 timevalclear(&utv); 2370 2371 TIMEVAL_TO_TIMESPEC(&utv, &uts); 2372 2373 error = native_to_linux_timespec(<s, &uts); 2374 if (error == 0) 2375 error = copyout(<s, args->tsp, sizeof(lts)); 2376 } 2377 2378 return (error); 2379 } 2380 2381 int 2382 linux_ppoll(struct thread *td, struct linux_ppoll_args *args) 2383 { 2384 struct timespec ts0, ts1; 2385 struct l_timespec lts; 2386 struct timespec uts, *tsp; 2387 l_sigset_t l_ss; 2388 sigset_t *ssp; 2389 sigset_t ss; 2390 int error; 2391 2392 if (args->sset != NULL) { 2393 if (args->ssize != sizeof(l_ss)) 2394 return (EINVAL); 2395 error = copyin(args->sset, &l_ss, sizeof(l_ss)); 2396 if (error) 2397 return (error); 2398 linux_to_bsd_sigset(&l_ss, &ss); 2399 ssp = &ss; 2400 } else 2401 ssp = NULL; 2402 if (args->tsp != NULL) { 2403 error = copyin(args->tsp, <s, sizeof(lts)); 2404 if (error) 2405 return (error); 2406 error = linux_to_native_timespec(&uts, <s); 2407 if (error != 0) 2408 return (error); 2409 2410 nanotime(&ts0); 2411 tsp = &uts; 2412 } else 2413 tsp = NULL; 2414 2415 error = kern_poll(td, args->fds, args->nfds, tsp, ssp); 2416 2417 if (error == 0 && args->tsp != NULL) { 2418 if (td->td_retval[0]) { 2419 nanotime(&ts1); 2420 timespecsub(&ts1, &ts0, &ts1); 2421 timespecsub(&uts, &ts1, &uts); 2422 if (uts.tv_sec < 0) 2423 timespecclear(&uts); 2424 } else 2425 timespecclear(&uts); 2426 2427 error = native_to_linux_timespec(<s, &uts); 2428 if (error == 0) 2429 error = copyout(<s, args->tsp, sizeof(lts)); 2430 } 2431 2432 return (error); 2433 } 2434 2435 int 2436 linux_sched_rr_get_interval(struct thread *td, 2437 struct linux_sched_rr_get_interval_args *uap) 2438 { 2439 struct timespec ts; 2440 struct l_timespec lts; 2441 struct thread *tdt; 2442 int error; 2443 2444 /* 2445 * According to man in case the invalid pid specified 2446 * EINVAL should be returned. 2447 */ 2448 if (uap->pid < 0) 2449 return (EINVAL); 2450 2451 tdt = linux_tdfind(td, uap->pid, -1); 2452 if (tdt == NULL) 2453 return (ESRCH); 2454 2455 error = kern_sched_rr_get_interval_td(td, tdt, &ts); 2456 PROC_UNLOCK(tdt->td_proc); 2457 if (error != 0) 2458 return (error); 2459 error = native_to_linux_timespec(<s, &ts); 2460 if (error != 0) 2461 return (error); 2462 return (copyout(<s, uap->interval, sizeof(lts))); 2463 } 2464 2465 /* 2466 * In case when the Linux thread is the initial thread in 2467 * the thread group thread id is equal to the process id. 2468 * Glibc depends on this magic (assert in pthread_getattr_np.c). 2469 */ 2470 struct thread * 2471 linux_tdfind(struct thread *td, lwpid_t tid, pid_t pid) 2472 { 2473 struct linux_emuldata *em; 2474 struct thread *tdt; 2475 struct proc *p; 2476 2477 tdt = NULL; 2478 if (tid == 0 || tid == td->td_tid) { 2479 tdt = td; 2480 PROC_LOCK(tdt->td_proc); 2481 } else if (tid > PID_MAX) 2482 tdt = tdfind(tid, pid); 2483 else { 2484 /* 2485 * Initial thread where the tid equal to the pid. 2486 */ 2487 p = pfind(tid); 2488 if (p != NULL) { 2489 if (SV_PROC_ABI(p) != SV_ABI_LINUX) { 2490 /* 2491 * p is not a Linuxulator process. 2492 */ 2493 PROC_UNLOCK(p); 2494 return (NULL); 2495 } 2496 FOREACH_THREAD_IN_PROC(p, tdt) { 2497 em = em_find(tdt); 2498 if (tid == em->em_tid) 2499 return (tdt); 2500 } 2501 PROC_UNLOCK(p); 2502 } 2503 return (NULL); 2504 } 2505 2506 return (tdt); 2507 } 2508 2509 void 2510 linux_to_bsd_waitopts(int options, int *bsdopts) 2511 { 2512 2513 if (options & LINUX_WNOHANG) 2514 *bsdopts |= WNOHANG; 2515 if (options & LINUX_WUNTRACED) 2516 *bsdopts |= WUNTRACED; 2517 if (options & LINUX_WEXITED) 2518 *bsdopts |= WEXITED; 2519 if (options & LINUX_WCONTINUED) 2520 *bsdopts |= WCONTINUED; 2521 if (options & LINUX_WNOWAIT) 2522 *bsdopts |= WNOWAIT; 2523 2524 if (options & __WCLONE) 2525 *bsdopts |= WLINUXCLONE; 2526 } 2527 2528 int 2529 linux_getrandom(struct thread *td, struct linux_getrandom_args *args) 2530 { 2531 struct uio uio; 2532 struct iovec iov; 2533 int error; 2534 2535 if (args->flags & ~(LINUX_GRND_NONBLOCK|LINUX_GRND_RANDOM)) 2536 return (EINVAL); 2537 if (args->count > INT_MAX) 2538 args->count = INT_MAX; 2539 2540 iov.iov_base = args->buf; 2541 iov.iov_len = args->count; 2542 2543 uio.uio_iov = &iov; 2544 uio.uio_iovcnt = 1; 2545 uio.uio_resid = iov.iov_len; 2546 uio.uio_segflg = UIO_USERSPACE; 2547 uio.uio_rw = UIO_READ; 2548 uio.uio_td = td; 2549 2550 error = read_random_uio(&uio, args->flags & LINUX_GRND_NONBLOCK); 2551 if (error == 0) 2552 td->td_retval[0] = args->count - uio.uio_resid; 2553 return (error); 2554 } 2555 2556 int 2557 linux_mincore(struct thread *td, struct linux_mincore_args *args) 2558 { 2559 2560 /* Needs to be page-aligned */ 2561 if (args->start & PAGE_MASK) 2562 return (EINVAL); 2563 return (kern_mincore(td, args->start, args->len, args->vec)); 2564 } 2565 2566 #define SYSLOG_TAG "<6>" 2567 2568 int 2569 linux_syslog(struct thread *td, struct linux_syslog_args *args) 2570 { 2571 char buf[128], *src, *dst; 2572 u_int seq; 2573 int buflen, error; 2574 2575 if (args->type != LINUX_SYSLOG_ACTION_READ_ALL) { 2576 linux_msg(td, "syslog unsupported type 0x%x", args->type); 2577 return (EINVAL); 2578 } 2579 2580 if (args->len < 6) { 2581 td->td_retval[0] = 0; 2582 return (0); 2583 } 2584 2585 error = priv_check(td, PRIV_MSGBUF); 2586 if (error) 2587 return (error); 2588 2589 mtx_lock(&msgbuf_lock); 2590 msgbuf_peekbytes(msgbufp, NULL, 0, &seq); 2591 mtx_unlock(&msgbuf_lock); 2592 2593 dst = args->buf; 2594 error = copyout(&SYSLOG_TAG, dst, sizeof(SYSLOG_TAG)); 2595 /* The -1 is to skip the trailing '\0'. */ 2596 dst += sizeof(SYSLOG_TAG) - 1; 2597 2598 while (error == 0) { 2599 mtx_lock(&msgbuf_lock); 2600 buflen = msgbuf_peekbytes(msgbufp, buf, sizeof(buf), &seq); 2601 mtx_unlock(&msgbuf_lock); 2602 2603 if (buflen == 0) 2604 break; 2605 2606 for (src = buf; src < buf + buflen && error == 0; src++) { 2607 if (*src == '\0') 2608 continue; 2609 2610 if (dst >= args->buf + args->len) 2611 goto out; 2612 2613 error = copyout(src, dst, 1); 2614 dst++; 2615 2616 if (*src == '\n' && *(src + 1) != '<' && 2617 dst + sizeof(SYSLOG_TAG) < args->buf + args->len) { 2618 error = copyout(&SYSLOG_TAG, 2619 dst, sizeof(SYSLOG_TAG)); 2620 dst += sizeof(SYSLOG_TAG) - 1; 2621 } 2622 } 2623 } 2624 out: 2625 td->td_retval[0] = dst - args->buf; 2626 return (error); 2627 } 2628 2629 int 2630 linux_getcpu(struct thread *td, struct linux_getcpu_args *args) 2631 { 2632 int cpu, error, node; 2633 2634 cpu = td->td_oncpu; /* Make sure it doesn't change during copyout(9) */ 2635 error = 0; 2636 node = cpuid_to_pcpu[cpu]->pc_domain; 2637 2638 if (args->cpu != NULL) 2639 error = copyout(&cpu, args->cpu, sizeof(l_int)); 2640 if (args->node != NULL) 2641 error = copyout(&node, args->node, sizeof(l_int)); 2642 return (error); 2643 } 2644