1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #ifdef COMPAT_FREEBSD11 48 #include <sys/abi_compat.h> 49 #endif 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/capsicum.h> 53 #include <sys/disk.h> 54 #include <sys/sysent.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mutex.h> 58 #include <sys/sysproto.h> 59 #include <sys/namei.h> 60 #include <sys/filedesc.h> 61 #include <sys/kernel.h> 62 #include <sys/fcntl.h> 63 #include <sys/file.h> 64 #include <sys/filio.h> 65 #include <sys/limits.h> 66 #include <sys/linker.h> 67 #include <sys/rwlock.h> 68 #include <sys/sdt.h> 69 #include <sys/stat.h> 70 #include <sys/sx.h> 71 #include <sys/unistd.h> 72 #include <sys/vnode.h> 73 #include <sys/priv.h> 74 #include <sys/proc.h> 75 #include <sys/dirent.h> 76 #include <sys/jail.h> 77 #include <sys/syscallsubr.h> 78 #include <sys/sysctl.h> 79 #ifdef KTRACE 80 #include <sys/ktrace.h> 81 #endif 82 83 #include <machine/stdarg.h> 84 85 #include <security/audit/audit.h> 86 #include <security/mac/mac_framework.h> 87 88 #include <vm/vm.h> 89 #include <vm/vm_object.h> 90 #include <vm/vm_page.h> 91 #include <vm/uma.h> 92 93 #include <fs/devfs/devfs.h> 94 95 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 108 enum uio_seg pathseg, fhandle_t *fhp); 109 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 110 size_t count, struct thread *td); 111 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 112 const char *path, enum uio_seg segflag); 113 114 static uint64_t 115 at2cnpflags(u_int at_flags, u_int mask) 116 { 117 u_int64_t res; 118 119 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 120 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 121 122 res = 0; 123 at_flags &= mask; 124 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 125 res |= RBENEATH; 126 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 127 res |= FOLLOW; 128 /* NOFOLLOW is pseudo flag */ 129 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 130 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 131 FOLLOW; 132 } 133 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 134 res |= EMPTYPATH; 135 return (res); 136 } 137 138 int 139 kern_sync(struct thread *td) 140 { 141 struct mount *mp, *nmp; 142 int save; 143 144 mtx_lock(&mountlist_mtx); 145 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 146 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 continue; 149 } 150 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 151 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 152 save = curthread_pflags_set(TDP_SYNCIO); 153 vfs_periodic(mp, MNT_NOWAIT); 154 VFS_SYNC(mp, MNT_NOWAIT); 155 curthread_pflags_restore(save); 156 vn_finished_write(mp); 157 } 158 mtx_lock(&mountlist_mtx); 159 nmp = TAILQ_NEXT(mp, mnt_list); 160 vfs_unbusy(mp); 161 } 162 mtx_unlock(&mountlist_mtx); 163 return (0); 164 } 165 166 /* 167 * Sync each mounted filesystem. 168 */ 169 #ifndef _SYS_SYSPROTO_H_ 170 struct sync_args { 171 int dummy; 172 }; 173 #endif 174 /* ARGSUSED */ 175 int 176 sys_sync(struct thread *td, struct sync_args *uap) 177 { 178 179 return (kern_sync(td)); 180 } 181 182 /* 183 * Change filesystem quotas. 184 */ 185 #ifndef _SYS_SYSPROTO_H_ 186 struct quotactl_args { 187 char *path; 188 int cmd; 189 int uid; 190 caddr_t arg; 191 }; 192 #endif 193 int 194 sys_quotactl(struct thread *td, struct quotactl_args *uap) 195 { 196 struct mount *mp; 197 struct nameidata nd; 198 int error; 199 bool mp_busy; 200 201 AUDIT_ARG_CMD(uap->cmd); 202 AUDIT_ARG_UID(uap->uid); 203 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 204 return (EPERM); 205 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 206 uap->path, td); 207 if ((error = namei(&nd)) != 0) 208 return (error); 209 NDFREE(&nd, NDF_ONLY_PNBUF); 210 mp = nd.ni_vp->v_mount; 211 vfs_ref(mp); 212 vput(nd.ni_vp); 213 error = vfs_busy(mp, 0); 214 if (error != 0) { 215 vfs_rel(mp); 216 return (error); 217 } 218 mp_busy = true; 219 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 220 221 /* 222 * Since quota on/off operations typically need to open quota 223 * files, the implementation may need to unbusy the mount point 224 * before calling into namei. Otherwise, unmount might be 225 * started between two vfs_busy() invocations (first is ours, 226 * second is from mount point cross-walk code in lookup()), 227 * causing deadlock. 228 * 229 * Avoid unbusying mp if the implementation indicates it has 230 * already done so. 231 */ 232 if (mp_busy) 233 vfs_unbusy(mp); 234 vfs_rel(mp); 235 return (error); 236 } 237 238 /* 239 * Used by statfs conversion routines to scale the block size up if 240 * necessary so that all of the block counts are <= 'max_size'. Note 241 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 242 * value of 'n'. 243 */ 244 void 245 statfs_scale_blocks(struct statfs *sf, long max_size) 246 { 247 uint64_t count; 248 int shift; 249 250 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 251 252 /* 253 * Attempt to scale the block counts to give a more accurate 254 * overview to userland of the ratio of free space to used 255 * space. To do this, find the largest block count and compute 256 * a divisor that lets it fit into a signed integer <= max_size. 257 */ 258 if (sf->f_bavail < 0) 259 count = -sf->f_bavail; 260 else 261 count = sf->f_bavail; 262 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 263 if (count <= max_size) 264 return; 265 266 count >>= flsl(max_size); 267 shift = 0; 268 while (count > 0) { 269 shift++; 270 count >>=1; 271 } 272 273 sf->f_bsize <<= shift; 274 sf->f_blocks >>= shift; 275 sf->f_bfree >>= shift; 276 sf->f_bavail >>= shift; 277 } 278 279 static int 280 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 281 { 282 int error; 283 284 if (mp == NULL) 285 return (EBADF); 286 error = vfs_busy(mp, 0); 287 vfs_rel(mp); 288 if (error != 0) 289 return (error); 290 #ifdef MAC 291 error = mac_mount_check_stat(td->td_ucred, mp); 292 if (error != 0) 293 goto out; 294 #endif 295 error = VFS_STATFS(mp, buf); 296 if (error != 0) 297 goto out; 298 if (priv_check_cred_vfs_generation(td->td_ucred)) { 299 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 300 prison_enforce_statfs(td->td_ucred, mp, buf); 301 } 302 out: 303 vfs_unbusy(mp); 304 return (error); 305 } 306 307 /* 308 * Get filesystem statistics. 309 */ 310 #ifndef _SYS_SYSPROTO_H_ 311 struct statfs_args { 312 char *path; 313 struct statfs *buf; 314 }; 315 #endif 316 int 317 sys_statfs(struct thread *td, struct statfs_args *uap) 318 { 319 struct statfs *sfp; 320 int error; 321 322 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 323 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 324 if (error == 0) 325 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 326 free(sfp, M_STATFS); 327 return (error); 328 } 329 330 int 331 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 332 struct statfs *buf) 333 { 334 struct mount *mp; 335 struct nameidata nd; 336 int error; 337 338 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 339 error = namei(&nd); 340 if (error != 0) 341 return (error); 342 mp = vfs_ref_from_vp(nd.ni_vp); 343 NDFREE_NOTHING(&nd); 344 vrele(nd.ni_vp); 345 return (kern_do_statfs(td, mp, buf)); 346 } 347 348 /* 349 * Get filesystem statistics. 350 */ 351 #ifndef _SYS_SYSPROTO_H_ 352 struct fstatfs_args { 353 int fd; 354 struct statfs *buf; 355 }; 356 #endif 357 int 358 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 359 { 360 struct statfs *sfp; 361 int error; 362 363 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 364 error = kern_fstatfs(td, uap->fd, sfp); 365 if (error == 0) 366 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 367 free(sfp, M_STATFS); 368 return (error); 369 } 370 371 int 372 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 373 { 374 struct file *fp; 375 struct mount *mp; 376 struct vnode *vp; 377 int error; 378 379 AUDIT_ARG_FD(fd); 380 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 381 if (error != 0) 382 return (error); 383 vp = fp->f_vnode; 384 #ifdef AUDIT 385 if (AUDITING_TD(td)) { 386 vn_lock(vp, LK_SHARED | LK_RETRY); 387 AUDIT_ARG_VNODE1(vp); 388 VOP_UNLOCK(vp); 389 } 390 #endif 391 mp = vfs_ref_from_vp(vp); 392 fdrop(fp, td); 393 return (kern_do_statfs(td, mp, buf)); 394 } 395 396 /* 397 * Get statistics on all filesystems. 398 */ 399 #ifndef _SYS_SYSPROTO_H_ 400 struct getfsstat_args { 401 struct statfs *buf; 402 long bufsize; 403 int mode; 404 }; 405 #endif 406 int 407 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 408 { 409 size_t count; 410 int error; 411 412 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 413 return (EINVAL); 414 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 415 UIO_USERSPACE, uap->mode); 416 if (error == 0) 417 td->td_retval[0] = count; 418 return (error); 419 } 420 421 /* 422 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 423 * The caller is responsible for freeing memory which will be allocated 424 * in '*buf'. 425 */ 426 int 427 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 428 size_t *countp, enum uio_seg bufseg, int mode) 429 { 430 struct mount *mp, *nmp; 431 struct statfs *sfsp, *sp, *sptmp, *tofree; 432 size_t count, maxcount; 433 int error; 434 435 switch (mode) { 436 case MNT_WAIT: 437 case MNT_NOWAIT: 438 break; 439 default: 440 if (bufseg == UIO_SYSSPACE) 441 *buf = NULL; 442 return (EINVAL); 443 } 444 restart: 445 maxcount = bufsize / sizeof(struct statfs); 446 if (bufsize == 0) { 447 sfsp = NULL; 448 tofree = NULL; 449 } else if (bufseg == UIO_USERSPACE) { 450 sfsp = *buf; 451 tofree = NULL; 452 } else /* if (bufseg == UIO_SYSSPACE) */ { 453 count = 0; 454 mtx_lock(&mountlist_mtx); 455 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 456 count++; 457 } 458 mtx_unlock(&mountlist_mtx); 459 if (maxcount > count) 460 maxcount = count; 461 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 462 M_STATFS, M_WAITOK); 463 } 464 465 count = 0; 466 467 /* 468 * If there is no target buffer they only want the count. 469 * 470 * This could be TAILQ_FOREACH but it is open-coded to match the original 471 * code below. 472 */ 473 if (sfsp == NULL) { 474 mtx_lock(&mountlist_mtx); 475 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 476 if (prison_canseemount(td->td_ucred, mp) != 0) { 477 nmp = TAILQ_NEXT(mp, mnt_list); 478 continue; 479 } 480 #ifdef MAC 481 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #endif 486 count++; 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 } 489 mtx_unlock(&mountlist_mtx); 490 *countp = count; 491 return (0); 492 } 493 494 /* 495 * They want the entire thing. 496 * 497 * Short-circuit the corner case of no room for anything, avoids 498 * relocking below. 499 */ 500 if (maxcount < 1) { 501 goto out; 502 } 503 504 mtx_lock(&mountlist_mtx); 505 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 506 if (prison_canseemount(td->td_ucred, mp) != 0) { 507 nmp = TAILQ_NEXT(mp, mnt_list); 508 continue; 509 } 510 #ifdef MAC 511 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 512 nmp = TAILQ_NEXT(mp, mnt_list); 513 continue; 514 } 515 #endif 516 if (mode == MNT_WAIT) { 517 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 518 /* 519 * If vfs_busy() failed, and MBF_NOWAIT 520 * wasn't passed, then the mp is gone. 521 * Furthermore, because of MBF_MNTLSTLOCK, 522 * the mountlist_mtx was dropped. We have 523 * no other choice than to start over. 524 */ 525 mtx_unlock(&mountlist_mtx); 526 free(tofree, M_STATFS); 527 goto restart; 528 } 529 } else { 530 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 531 nmp = TAILQ_NEXT(mp, mnt_list); 532 continue; 533 } 534 } 535 sp = &mp->mnt_stat; 536 /* 537 * If MNT_NOWAIT is specified, do not refresh 538 * the fsstat cache. 539 */ 540 if (mode != MNT_NOWAIT) { 541 error = VFS_STATFS(mp, sp); 542 if (error != 0) { 543 mtx_lock(&mountlist_mtx); 544 nmp = TAILQ_NEXT(mp, mnt_list); 545 vfs_unbusy(mp); 546 continue; 547 } 548 } 549 if (priv_check_cred_vfs_generation(td->td_ucred)) { 550 sptmp = malloc(sizeof(struct statfs), M_STATFS, 551 M_WAITOK); 552 *sptmp = *sp; 553 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 554 prison_enforce_statfs(td->td_ucred, mp, sptmp); 555 sp = sptmp; 556 } else 557 sptmp = NULL; 558 if (bufseg == UIO_SYSSPACE) { 559 bcopy(sp, sfsp, sizeof(*sp)); 560 free(sptmp, M_STATFS); 561 } else /* if (bufseg == UIO_USERSPACE) */ { 562 error = copyout(sp, sfsp, sizeof(*sp)); 563 free(sptmp, M_STATFS); 564 if (error != 0) { 565 vfs_unbusy(mp); 566 return (error); 567 } 568 } 569 sfsp++; 570 count++; 571 572 if (count == maxcount) { 573 vfs_unbusy(mp); 574 goto out; 575 } 576 577 mtx_lock(&mountlist_mtx); 578 nmp = TAILQ_NEXT(mp, mnt_list); 579 vfs_unbusy(mp); 580 } 581 mtx_unlock(&mountlist_mtx); 582 out: 583 *countp = count; 584 return (0); 585 } 586 587 #ifdef COMPAT_FREEBSD4 588 /* 589 * Get old format filesystem statistics. 590 */ 591 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 592 593 #ifndef _SYS_SYSPROTO_H_ 594 struct freebsd4_statfs_args { 595 char *path; 596 struct ostatfs *buf; 597 }; 598 #endif 599 int 600 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 601 { 602 struct ostatfs osb; 603 struct statfs *sfp; 604 int error; 605 606 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 607 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 608 if (error == 0) { 609 freebsd4_cvtstatfs(sfp, &osb); 610 error = copyout(&osb, uap->buf, sizeof(osb)); 611 } 612 free(sfp, M_STATFS); 613 return (error); 614 } 615 616 /* 617 * Get filesystem statistics. 618 */ 619 #ifndef _SYS_SYSPROTO_H_ 620 struct freebsd4_fstatfs_args { 621 int fd; 622 struct ostatfs *buf; 623 }; 624 #endif 625 int 626 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 627 { 628 struct ostatfs osb; 629 struct statfs *sfp; 630 int error; 631 632 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 633 error = kern_fstatfs(td, uap->fd, sfp); 634 if (error == 0) { 635 freebsd4_cvtstatfs(sfp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 } 638 free(sfp, M_STATFS); 639 return (error); 640 } 641 642 /* 643 * Get statistics on all filesystems. 644 */ 645 #ifndef _SYS_SYSPROTO_H_ 646 struct freebsd4_getfsstat_args { 647 struct ostatfs *buf; 648 long bufsize; 649 int mode; 650 }; 651 #endif 652 int 653 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 654 { 655 struct statfs *buf, *sp; 656 struct ostatfs osb; 657 size_t count, size; 658 int error; 659 660 if (uap->bufsize < 0) 661 return (EINVAL); 662 count = uap->bufsize / sizeof(struct ostatfs); 663 if (count > SIZE_MAX / sizeof(struct statfs)) 664 return (EINVAL); 665 size = count * sizeof(struct statfs); 666 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 667 uap->mode); 668 if (error == 0) 669 td->td_retval[0] = count; 670 if (size != 0) { 671 sp = buf; 672 while (count != 0 && error == 0) { 673 freebsd4_cvtstatfs(sp, &osb); 674 error = copyout(&osb, uap->buf, sizeof(osb)); 675 sp++; 676 uap->buf++; 677 count--; 678 } 679 free(buf, M_STATFS); 680 } 681 return (error); 682 } 683 684 /* 685 * Implement fstatfs() for (NFS) file handles. 686 */ 687 #ifndef _SYS_SYSPROTO_H_ 688 struct freebsd4_fhstatfs_args { 689 struct fhandle *u_fhp; 690 struct ostatfs *buf; 691 }; 692 #endif 693 int 694 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 695 { 696 struct ostatfs osb; 697 struct statfs *sfp; 698 fhandle_t fh; 699 int error; 700 701 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 702 if (error != 0) 703 return (error); 704 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 705 error = kern_fhstatfs(td, fh, sfp); 706 if (error == 0) { 707 freebsd4_cvtstatfs(sfp, &osb); 708 error = copyout(&osb, uap->buf, sizeof(osb)); 709 } 710 free(sfp, M_STATFS); 711 return (error); 712 } 713 714 /* 715 * Convert a new format statfs structure to an old format statfs structure. 716 */ 717 static void 718 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 719 { 720 721 statfs_scale_blocks(nsp, LONG_MAX); 722 bzero(osp, sizeof(*osp)); 723 osp->f_bsize = nsp->f_bsize; 724 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 725 osp->f_blocks = nsp->f_blocks; 726 osp->f_bfree = nsp->f_bfree; 727 osp->f_bavail = nsp->f_bavail; 728 osp->f_files = MIN(nsp->f_files, LONG_MAX); 729 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 730 osp->f_owner = nsp->f_owner; 731 osp->f_type = nsp->f_type; 732 osp->f_flags = nsp->f_flags; 733 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 734 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 735 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 736 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 737 strlcpy(osp->f_fstypename, nsp->f_fstypename, 738 MIN(MFSNAMELEN, OMFSNAMELEN)); 739 strlcpy(osp->f_mntonname, nsp->f_mntonname, 740 MIN(MNAMELEN, OMNAMELEN)); 741 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 742 MIN(MNAMELEN, OMNAMELEN)); 743 osp->f_fsid = nsp->f_fsid; 744 } 745 #endif /* COMPAT_FREEBSD4 */ 746 747 #if defined(COMPAT_FREEBSD11) 748 /* 749 * Get old format filesystem statistics. 750 */ 751 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 752 753 int 754 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 755 { 756 struct freebsd11_statfs osb; 757 struct statfs *sfp; 758 int error; 759 760 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 761 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 762 if (error == 0) { 763 freebsd11_cvtstatfs(sfp, &osb); 764 error = copyout(&osb, uap->buf, sizeof(osb)); 765 } 766 free(sfp, M_STATFS); 767 return (error); 768 } 769 770 /* 771 * Get filesystem statistics. 772 */ 773 int 774 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 775 { 776 struct freebsd11_statfs osb; 777 struct statfs *sfp; 778 int error; 779 780 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 781 error = kern_fstatfs(td, uap->fd, sfp); 782 if (error == 0) { 783 freebsd11_cvtstatfs(sfp, &osb); 784 error = copyout(&osb, uap->buf, sizeof(osb)); 785 } 786 free(sfp, M_STATFS); 787 return (error); 788 } 789 790 /* 791 * Get statistics on all filesystems. 792 */ 793 int 794 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 795 { 796 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 797 } 798 799 int 800 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 801 long bufsize, int mode) 802 { 803 struct freebsd11_statfs osb; 804 struct statfs *buf, *sp; 805 size_t count, size; 806 int error; 807 808 if (bufsize < 0) 809 return (EINVAL); 810 811 count = bufsize / sizeof(struct ostatfs); 812 size = count * sizeof(struct statfs); 813 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 814 if (error == 0) 815 td->td_retval[0] = count; 816 if (size > 0) { 817 sp = buf; 818 while (count > 0 && error == 0) { 819 freebsd11_cvtstatfs(sp, &osb); 820 error = copyout(&osb, ubuf, sizeof(osb)); 821 sp++; 822 ubuf++; 823 count--; 824 } 825 free(buf, M_STATFS); 826 } 827 return (error); 828 } 829 830 /* 831 * Implement fstatfs() for (NFS) file handles. 832 */ 833 int 834 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 835 { 836 struct freebsd11_statfs osb; 837 struct statfs *sfp; 838 fhandle_t fh; 839 int error; 840 841 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 842 if (error) 843 return (error); 844 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 845 error = kern_fhstatfs(td, fh, sfp); 846 if (error == 0) { 847 freebsd11_cvtstatfs(sfp, &osb); 848 error = copyout(&osb, uap->buf, sizeof(osb)); 849 } 850 free(sfp, M_STATFS); 851 return (error); 852 } 853 854 /* 855 * Convert a new format statfs structure to an old format statfs structure. 856 */ 857 static void 858 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 859 { 860 861 bzero(osp, sizeof(*osp)); 862 osp->f_version = FREEBSD11_STATFS_VERSION; 863 osp->f_type = nsp->f_type; 864 osp->f_flags = nsp->f_flags; 865 osp->f_bsize = nsp->f_bsize; 866 osp->f_iosize = nsp->f_iosize; 867 osp->f_blocks = nsp->f_blocks; 868 osp->f_bfree = nsp->f_bfree; 869 osp->f_bavail = nsp->f_bavail; 870 osp->f_files = nsp->f_files; 871 osp->f_ffree = nsp->f_ffree; 872 osp->f_syncwrites = nsp->f_syncwrites; 873 osp->f_asyncwrites = nsp->f_asyncwrites; 874 osp->f_syncreads = nsp->f_syncreads; 875 osp->f_asyncreads = nsp->f_asyncreads; 876 osp->f_namemax = nsp->f_namemax; 877 osp->f_owner = nsp->f_owner; 878 osp->f_fsid = nsp->f_fsid; 879 strlcpy(osp->f_fstypename, nsp->f_fstypename, 880 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 881 strlcpy(osp->f_mntonname, nsp->f_mntonname, 882 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 883 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 884 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 885 } 886 #endif /* COMPAT_FREEBSD11 */ 887 888 /* 889 * Change current working directory to a given file descriptor. 890 */ 891 #ifndef _SYS_SYSPROTO_H_ 892 struct fchdir_args { 893 int fd; 894 }; 895 #endif 896 int 897 sys_fchdir(struct thread *td, struct fchdir_args *uap) 898 { 899 struct vnode *vp, *tdp; 900 struct mount *mp; 901 struct file *fp; 902 int error; 903 904 AUDIT_ARG_FD(uap->fd); 905 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 906 &fp); 907 if (error != 0) 908 return (error); 909 vp = fp->f_vnode; 910 vref(vp); 911 fdrop(fp, td); 912 vn_lock(vp, LK_SHARED | LK_RETRY); 913 AUDIT_ARG_VNODE1(vp); 914 error = change_dir(vp, td); 915 while (!error && (mp = vp->v_mountedhere) != NULL) { 916 if (vfs_busy(mp, 0)) 917 continue; 918 error = VFS_ROOT(mp, LK_SHARED, &tdp); 919 vfs_unbusy(mp); 920 if (error != 0) 921 break; 922 vput(vp); 923 vp = tdp; 924 } 925 if (error != 0) { 926 vput(vp); 927 return (error); 928 } 929 VOP_UNLOCK(vp); 930 pwd_chdir(td, vp); 931 return (0); 932 } 933 934 /* 935 * Change current working directory (``.''). 936 */ 937 #ifndef _SYS_SYSPROTO_H_ 938 struct chdir_args { 939 char *path; 940 }; 941 #endif 942 int 943 sys_chdir(struct thread *td, struct chdir_args *uap) 944 { 945 946 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 947 } 948 949 int 950 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 951 { 952 struct nameidata nd; 953 int error; 954 955 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 956 pathseg, path, td); 957 if ((error = namei(&nd)) != 0) 958 return (error); 959 if ((error = change_dir(nd.ni_vp, td)) != 0) { 960 vput(nd.ni_vp); 961 NDFREE_NOTHING(&nd); 962 return (error); 963 } 964 VOP_UNLOCK(nd.ni_vp); 965 NDFREE_NOTHING(&nd); 966 pwd_chdir(td, nd.ni_vp); 967 return (0); 968 } 969 970 static int unprivileged_chroot = 0; 971 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 972 &unprivileged_chroot, 0, 973 "Unprivileged processes can use chroot(2)"); 974 /* 975 * Change notion of root (``/'') directory. 976 */ 977 #ifndef _SYS_SYSPROTO_H_ 978 struct chroot_args { 979 char *path; 980 }; 981 #endif 982 int 983 sys_chroot(struct thread *td, struct chroot_args *uap) 984 { 985 struct nameidata nd; 986 struct proc *p; 987 int error; 988 989 error = priv_check(td, PRIV_VFS_CHROOT); 990 if (error != 0) { 991 p = td->td_proc; 992 PROC_LOCK(p); 993 if (unprivileged_chroot == 0 || 994 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 995 PROC_UNLOCK(p); 996 return (error); 997 } 998 PROC_UNLOCK(p); 999 } 1000 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1001 UIO_USERSPACE, uap->path, td); 1002 error = namei(&nd); 1003 if (error != 0) 1004 goto error; 1005 error = change_dir(nd.ni_vp, td); 1006 if (error != 0) 1007 goto e_vunlock; 1008 #ifdef MAC 1009 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 1010 if (error != 0) 1011 goto e_vunlock; 1012 #endif 1013 VOP_UNLOCK(nd.ni_vp); 1014 error = pwd_chroot(td, nd.ni_vp); 1015 vrele(nd.ni_vp); 1016 NDFREE_NOTHING(&nd); 1017 return (error); 1018 e_vunlock: 1019 vput(nd.ni_vp); 1020 error: 1021 NDFREE_NOTHING(&nd); 1022 return (error); 1023 } 1024 1025 /* 1026 * Common routine for chroot and chdir. Callers must provide a locked vnode 1027 * instance. 1028 */ 1029 int 1030 change_dir(struct vnode *vp, struct thread *td) 1031 { 1032 #ifdef MAC 1033 int error; 1034 #endif 1035 1036 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1037 if (vp->v_type != VDIR) 1038 return (ENOTDIR); 1039 #ifdef MAC 1040 error = mac_vnode_check_chdir(td->td_ucred, vp); 1041 if (error != 0) 1042 return (error); 1043 #endif 1044 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1045 } 1046 1047 static __inline void 1048 flags_to_rights(int flags, cap_rights_t *rightsp) 1049 { 1050 if (flags & O_EXEC) { 1051 cap_rights_set_one(rightsp, CAP_FEXECVE); 1052 if (flags & O_PATH) 1053 return; 1054 } else { 1055 switch ((flags & O_ACCMODE)) { 1056 case O_RDONLY: 1057 cap_rights_set_one(rightsp, CAP_READ); 1058 break; 1059 case O_RDWR: 1060 cap_rights_set_one(rightsp, CAP_READ); 1061 /* FALLTHROUGH */ 1062 case O_WRONLY: 1063 cap_rights_set_one(rightsp, CAP_WRITE); 1064 if (!(flags & (O_APPEND | O_TRUNC))) 1065 cap_rights_set_one(rightsp, CAP_SEEK); 1066 break; 1067 } 1068 } 1069 1070 if (flags & O_CREAT) 1071 cap_rights_set_one(rightsp, CAP_CREATE); 1072 1073 if (flags & O_TRUNC) 1074 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1075 1076 if (flags & (O_SYNC | O_FSYNC)) 1077 cap_rights_set_one(rightsp, CAP_FSYNC); 1078 1079 if (flags & (O_EXLOCK | O_SHLOCK)) 1080 cap_rights_set_one(rightsp, CAP_FLOCK); 1081 } 1082 1083 /* 1084 * Check permissions, allocate an open file structure, and call the device 1085 * open routine if any. 1086 */ 1087 #ifndef _SYS_SYSPROTO_H_ 1088 struct open_args { 1089 char *path; 1090 int flags; 1091 int mode; 1092 }; 1093 #endif 1094 int 1095 sys_open(struct thread *td, struct open_args *uap) 1096 { 1097 1098 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1099 uap->flags, uap->mode)); 1100 } 1101 1102 #ifndef _SYS_SYSPROTO_H_ 1103 struct openat_args { 1104 int fd; 1105 char *path; 1106 int flag; 1107 int mode; 1108 }; 1109 #endif 1110 int 1111 sys_openat(struct thread *td, struct openat_args *uap) 1112 { 1113 1114 AUDIT_ARG_FD(uap->fd); 1115 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1116 uap->mode)); 1117 } 1118 1119 int 1120 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1121 int flags, int mode) 1122 { 1123 struct proc *p = td->td_proc; 1124 struct filedesc *fdp; 1125 struct pwddesc *pdp; 1126 struct file *fp; 1127 struct vnode *vp; 1128 struct nameidata nd; 1129 cap_rights_t rights; 1130 int cmode, error, indx; 1131 1132 indx = -1; 1133 fdp = p->p_fd; 1134 pdp = p->p_pd; 1135 1136 AUDIT_ARG_FFLAGS(flags); 1137 AUDIT_ARG_MODE(mode); 1138 cap_rights_init_one(&rights, CAP_LOOKUP); 1139 flags_to_rights(flags, &rights); 1140 1141 /* 1142 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1143 * may be specified. On the other hand, for O_PATH any mode 1144 * except O_EXEC is ignored. 1145 */ 1146 if ((flags & O_PATH) != 0) { 1147 flags &= ~(O_CREAT | O_ACCMODE); 1148 } else if ((flags & O_EXEC) != 0) { 1149 if (flags & O_ACCMODE) 1150 return (EINVAL); 1151 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1152 return (EINVAL); 1153 } else { 1154 flags = FFLAGS(flags); 1155 } 1156 1157 /* 1158 * Allocate a file structure. The descriptor to reference it 1159 * is allocated and used by finstall_refed() below. 1160 */ 1161 error = falloc_noinstall(td, &fp); 1162 if (error != 0) 1163 return (error); 1164 /* Set the flags early so the finit in devfs can pick them up. */ 1165 fp->f_flag = flags & FMASK; 1166 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1167 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1168 &rights, td); 1169 td->td_dupfd = -1; /* XXX check for fdopen */ 1170 error = vn_open(&nd, &flags, cmode, fp); 1171 if (error != 0) { 1172 /* 1173 * If the vn_open replaced the method vector, something 1174 * wonderous happened deep below and we just pass it up 1175 * pretending we know what we do. 1176 */ 1177 if (error == ENXIO && fp->f_ops != &badfileops) { 1178 MPASS((flags & O_PATH) == 0); 1179 goto success; 1180 } 1181 1182 /* 1183 * Handle special fdopen() case. bleh. 1184 * 1185 * Don't do this for relative (capability) lookups; we don't 1186 * understand exactly what would happen, and we don't think 1187 * that it ever should. 1188 */ 1189 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1190 (error == ENODEV || error == ENXIO) && 1191 td->td_dupfd >= 0) { 1192 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1193 &indx); 1194 if (error == 0) 1195 goto success; 1196 } 1197 1198 goto bad; 1199 } 1200 td->td_dupfd = 0; 1201 NDFREE(&nd, NDF_ONLY_PNBUF); 1202 vp = nd.ni_vp; 1203 1204 /* 1205 * Store the vnode, for any f_type. Typically, the vnode use 1206 * count is decremented by direct call to vn_closefile() for 1207 * files that switched type in the cdevsw fdopen() method. 1208 */ 1209 fp->f_vnode = vp; 1210 1211 /* 1212 * If the file wasn't claimed by devfs bind it to the normal 1213 * vnode operations here. 1214 */ 1215 if (fp->f_ops == &badfileops) { 1216 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1217 ("Unexpected fifo fp %p vp %p", fp, vp)); 1218 if ((flags & O_PATH) != 0) { 1219 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1220 DTYPE_VNODE, NULL, &path_fileops); 1221 vhold(vp); 1222 vunref(vp); 1223 } else { 1224 finit_vnode(fp, flags, NULL, &vnops); 1225 } 1226 } 1227 1228 VOP_UNLOCK(vp); 1229 if (flags & O_TRUNC) { 1230 error = fo_truncate(fp, 0, td->td_ucred, td); 1231 if (error != 0) 1232 goto bad; 1233 } 1234 success: 1235 /* 1236 * If we haven't already installed the FD (for dupfdopen), do so now. 1237 */ 1238 if (indx == -1) { 1239 struct filecaps *fcaps; 1240 1241 #ifdef CAPABILITIES 1242 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1243 fcaps = &nd.ni_filecaps; 1244 else 1245 #endif 1246 fcaps = NULL; 1247 error = finstall_refed(td, fp, &indx, flags, fcaps); 1248 /* On success finstall_refed() consumes fcaps. */ 1249 if (error != 0) { 1250 filecaps_free(&nd.ni_filecaps); 1251 goto bad; 1252 } 1253 } else { 1254 filecaps_free(&nd.ni_filecaps); 1255 falloc_abort(td, fp); 1256 } 1257 1258 td->td_retval[0] = indx; 1259 return (0); 1260 bad: 1261 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1262 falloc_abort(td, fp); 1263 return (error); 1264 } 1265 1266 #ifdef COMPAT_43 1267 /* 1268 * Create a file. 1269 */ 1270 #ifndef _SYS_SYSPROTO_H_ 1271 struct ocreat_args { 1272 char *path; 1273 int mode; 1274 }; 1275 #endif 1276 int 1277 ocreat(struct thread *td, struct ocreat_args *uap) 1278 { 1279 1280 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1281 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1282 } 1283 #endif /* COMPAT_43 */ 1284 1285 /* 1286 * Create a special file. 1287 */ 1288 #ifndef _SYS_SYSPROTO_H_ 1289 struct mknodat_args { 1290 int fd; 1291 char *path; 1292 mode_t mode; 1293 dev_t dev; 1294 }; 1295 #endif 1296 int 1297 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1298 { 1299 1300 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1301 uap->dev)); 1302 } 1303 1304 #if defined(COMPAT_FREEBSD11) 1305 int 1306 freebsd11_mknod(struct thread *td, 1307 struct freebsd11_mknod_args *uap) 1308 { 1309 1310 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1311 uap->mode, uap->dev)); 1312 } 1313 1314 int 1315 freebsd11_mknodat(struct thread *td, 1316 struct freebsd11_mknodat_args *uap) 1317 { 1318 1319 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1320 uap->dev)); 1321 } 1322 #endif /* COMPAT_FREEBSD11 */ 1323 1324 int 1325 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1326 int mode, dev_t dev) 1327 { 1328 struct vnode *vp; 1329 struct mount *mp; 1330 struct vattr vattr; 1331 struct nameidata nd; 1332 int error, whiteout = 0; 1333 1334 AUDIT_ARG_MODE(mode); 1335 AUDIT_ARG_DEV(dev); 1336 switch (mode & S_IFMT) { 1337 case S_IFCHR: 1338 case S_IFBLK: 1339 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1340 if (error == 0 && dev == VNOVAL) 1341 error = EINVAL; 1342 break; 1343 case S_IFWHT: 1344 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1345 break; 1346 case S_IFIFO: 1347 if (dev == 0) 1348 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1349 /* FALLTHROUGH */ 1350 default: 1351 error = EINVAL; 1352 break; 1353 } 1354 if (error != 0) 1355 return (error); 1356 NDPREINIT(&nd); 1357 restart: 1358 bwillwrite(); 1359 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1360 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1361 td); 1362 if ((error = namei(&nd)) != 0) 1363 return (error); 1364 vp = nd.ni_vp; 1365 if (vp != NULL) { 1366 NDFREE(&nd, NDF_ONLY_PNBUF); 1367 if (vp == nd.ni_dvp) 1368 vrele(nd.ni_dvp); 1369 else 1370 vput(nd.ni_dvp); 1371 vrele(vp); 1372 return (EEXIST); 1373 } else { 1374 VATTR_NULL(&vattr); 1375 vattr.va_mode = (mode & ALLPERMS) & 1376 ~td->td_proc->p_pd->pd_cmask; 1377 vattr.va_rdev = dev; 1378 whiteout = 0; 1379 1380 switch (mode & S_IFMT) { 1381 case S_IFCHR: 1382 vattr.va_type = VCHR; 1383 break; 1384 case S_IFBLK: 1385 vattr.va_type = VBLK; 1386 break; 1387 case S_IFWHT: 1388 whiteout = 1; 1389 break; 1390 default: 1391 panic("kern_mknod: invalid mode"); 1392 } 1393 } 1394 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1395 NDFREE(&nd, NDF_ONLY_PNBUF); 1396 vput(nd.ni_dvp); 1397 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1398 return (error); 1399 goto restart; 1400 } 1401 #ifdef MAC 1402 if (error == 0 && !whiteout) 1403 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1404 &nd.ni_cnd, &vattr); 1405 #endif 1406 if (error == 0) { 1407 if (whiteout) 1408 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1409 else { 1410 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1411 &nd.ni_cnd, &vattr); 1412 } 1413 } 1414 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1415 true); 1416 vn_finished_write(mp); 1417 NDFREE(&nd, NDF_ONLY_PNBUF); 1418 if (error == ERELOOKUP) 1419 goto restart; 1420 return (error); 1421 } 1422 1423 /* 1424 * Create a named pipe. 1425 */ 1426 #ifndef _SYS_SYSPROTO_H_ 1427 struct mkfifo_args { 1428 char *path; 1429 int mode; 1430 }; 1431 #endif 1432 int 1433 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1434 { 1435 1436 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1437 uap->mode)); 1438 } 1439 1440 #ifndef _SYS_SYSPROTO_H_ 1441 struct mkfifoat_args { 1442 int fd; 1443 char *path; 1444 mode_t mode; 1445 }; 1446 #endif 1447 int 1448 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1449 { 1450 1451 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1452 uap->mode)); 1453 } 1454 1455 int 1456 kern_mkfifoat(struct thread *td, int fd, const char *path, 1457 enum uio_seg pathseg, int mode) 1458 { 1459 struct mount *mp; 1460 struct vattr vattr; 1461 struct nameidata nd; 1462 int error; 1463 1464 AUDIT_ARG_MODE(mode); 1465 NDPREINIT(&nd); 1466 restart: 1467 bwillwrite(); 1468 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1469 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1470 td); 1471 if ((error = namei(&nd)) != 0) 1472 return (error); 1473 if (nd.ni_vp != NULL) { 1474 NDFREE(&nd, NDF_ONLY_PNBUF); 1475 if (nd.ni_vp == nd.ni_dvp) 1476 vrele(nd.ni_dvp); 1477 else 1478 vput(nd.ni_dvp); 1479 vrele(nd.ni_vp); 1480 return (EEXIST); 1481 } 1482 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1483 NDFREE(&nd, NDF_ONLY_PNBUF); 1484 vput(nd.ni_dvp); 1485 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1486 return (error); 1487 goto restart; 1488 } 1489 VATTR_NULL(&vattr); 1490 vattr.va_type = VFIFO; 1491 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1492 #ifdef MAC 1493 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1494 &vattr); 1495 if (error != 0) 1496 goto out; 1497 #endif 1498 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1499 #ifdef MAC 1500 out: 1501 #endif 1502 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1503 vn_finished_write(mp); 1504 NDFREE(&nd, NDF_ONLY_PNBUF); 1505 if (error == ERELOOKUP) 1506 goto restart; 1507 return (error); 1508 } 1509 1510 /* 1511 * Make a hard file link. 1512 */ 1513 #ifndef _SYS_SYSPROTO_H_ 1514 struct link_args { 1515 char *path; 1516 char *link; 1517 }; 1518 #endif 1519 int 1520 sys_link(struct thread *td, struct link_args *uap) 1521 { 1522 1523 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1524 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1525 } 1526 1527 #ifndef _SYS_SYSPROTO_H_ 1528 struct linkat_args { 1529 int fd1; 1530 char *path1; 1531 int fd2; 1532 char *path2; 1533 int flag; 1534 }; 1535 #endif 1536 int 1537 sys_linkat(struct thread *td, struct linkat_args *uap) 1538 { 1539 1540 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1541 UIO_USERSPACE, uap->flag)); 1542 } 1543 1544 int hardlink_check_uid = 0; 1545 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1546 &hardlink_check_uid, 0, 1547 "Unprivileged processes cannot create hard links to files owned by other " 1548 "users"); 1549 static int hardlink_check_gid = 0; 1550 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1551 &hardlink_check_gid, 0, 1552 "Unprivileged processes cannot create hard links to files owned by other " 1553 "groups"); 1554 1555 static int 1556 can_hardlink(struct vnode *vp, struct ucred *cred) 1557 { 1558 struct vattr va; 1559 int error; 1560 1561 if (!hardlink_check_uid && !hardlink_check_gid) 1562 return (0); 1563 1564 error = VOP_GETATTR(vp, &va, cred); 1565 if (error != 0) 1566 return (error); 1567 1568 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1569 error = priv_check_cred(cred, PRIV_VFS_LINK); 1570 if (error != 0) 1571 return (error); 1572 } 1573 1574 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1575 error = priv_check_cred(cred, PRIV_VFS_LINK); 1576 if (error != 0) 1577 return (error); 1578 } 1579 1580 return (0); 1581 } 1582 1583 int 1584 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1585 const char *path2, enum uio_seg segflag, int flag) 1586 { 1587 struct nameidata nd; 1588 int error; 1589 1590 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1591 AT_EMPTY_PATH)) != 0) 1592 return (EINVAL); 1593 1594 NDPREINIT(&nd); 1595 do { 1596 bwillwrite(); 1597 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1598 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1599 segflag, path1, fd1, &cap_linkat_source_rights, td); 1600 if ((error = namei(&nd)) != 0) 1601 return (error); 1602 NDFREE(&nd, NDF_ONLY_PNBUF); 1603 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1604 error = priv_check(td, PRIV_VFS_FHOPEN); 1605 if (error != 0) { 1606 vrele(nd.ni_vp); 1607 return (error); 1608 } 1609 } 1610 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1611 } while (error == EAGAIN || error == ERELOOKUP); 1612 return (error); 1613 } 1614 1615 static int 1616 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1617 enum uio_seg segflag) 1618 { 1619 struct nameidata nd; 1620 struct mount *mp; 1621 int error; 1622 1623 if (vp->v_type == VDIR) { 1624 vrele(vp); 1625 return (EPERM); /* POSIX */ 1626 } 1627 NDINIT_ATRIGHTS(&nd, CREATE, 1628 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1629 &cap_linkat_target_rights, td); 1630 if ((error = namei(&nd)) == 0) { 1631 if (nd.ni_vp != NULL) { 1632 NDFREE(&nd, NDF_ONLY_PNBUF); 1633 if (nd.ni_dvp == nd.ni_vp) 1634 vrele(nd.ni_dvp); 1635 else 1636 vput(nd.ni_dvp); 1637 vrele(nd.ni_vp); 1638 vrele(vp); 1639 return (EEXIST); 1640 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1641 /* 1642 * Cross-device link. No need to recheck 1643 * vp->v_type, since it cannot change, except 1644 * to VBAD. 1645 */ 1646 NDFREE(&nd, NDF_ONLY_PNBUF); 1647 vput(nd.ni_dvp); 1648 vrele(vp); 1649 return (EXDEV); 1650 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1651 error = can_hardlink(vp, td->td_ucred); 1652 #ifdef MAC 1653 if (error == 0) 1654 error = mac_vnode_check_link(td->td_ucred, 1655 nd.ni_dvp, vp, &nd.ni_cnd); 1656 #endif 1657 if (error != 0) { 1658 vput(vp); 1659 vput(nd.ni_dvp); 1660 NDFREE(&nd, NDF_ONLY_PNBUF); 1661 return (error); 1662 } 1663 error = vn_start_write(vp, &mp, V_NOWAIT); 1664 if (error != 0) { 1665 vput(vp); 1666 vput(nd.ni_dvp); 1667 NDFREE(&nd, NDF_ONLY_PNBUF); 1668 error = vn_start_write(NULL, &mp, 1669 V_XSLEEP | PCATCH); 1670 if (error != 0) 1671 return (error); 1672 return (EAGAIN); 1673 } 1674 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1675 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1676 vn_finished_write(mp); 1677 NDFREE(&nd, NDF_ONLY_PNBUF); 1678 vp = NULL; 1679 } else { 1680 vput(nd.ni_dvp); 1681 NDFREE(&nd, NDF_ONLY_PNBUF); 1682 vrele(vp); 1683 return (EAGAIN); 1684 } 1685 } 1686 if (vp != NULL) 1687 vrele(vp); 1688 return (error); 1689 } 1690 1691 /* 1692 * Make a symbolic link. 1693 */ 1694 #ifndef _SYS_SYSPROTO_H_ 1695 struct symlink_args { 1696 char *path; 1697 char *link; 1698 }; 1699 #endif 1700 int 1701 sys_symlink(struct thread *td, struct symlink_args *uap) 1702 { 1703 1704 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1705 UIO_USERSPACE)); 1706 } 1707 1708 #ifndef _SYS_SYSPROTO_H_ 1709 struct symlinkat_args { 1710 char *path; 1711 int fd; 1712 char *path2; 1713 }; 1714 #endif 1715 int 1716 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1717 { 1718 1719 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1720 UIO_USERSPACE)); 1721 } 1722 1723 int 1724 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1725 enum uio_seg segflg) 1726 { 1727 struct mount *mp; 1728 struct vattr vattr; 1729 const char *syspath; 1730 char *tmppath; 1731 struct nameidata nd; 1732 int error; 1733 1734 if (segflg == UIO_SYSSPACE) { 1735 syspath = path1; 1736 } else { 1737 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1738 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1739 goto out; 1740 syspath = tmppath; 1741 } 1742 AUDIT_ARG_TEXT(syspath); 1743 NDPREINIT(&nd); 1744 restart: 1745 bwillwrite(); 1746 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1747 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1748 td); 1749 if ((error = namei(&nd)) != 0) 1750 goto out; 1751 if (nd.ni_vp) { 1752 NDFREE(&nd, NDF_ONLY_PNBUF); 1753 if (nd.ni_vp == nd.ni_dvp) 1754 vrele(nd.ni_dvp); 1755 else 1756 vput(nd.ni_dvp); 1757 vrele(nd.ni_vp); 1758 nd.ni_vp = NULL; 1759 error = EEXIST; 1760 goto out; 1761 } 1762 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1763 NDFREE(&nd, NDF_ONLY_PNBUF); 1764 vput(nd.ni_dvp); 1765 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1766 goto out; 1767 goto restart; 1768 } 1769 VATTR_NULL(&vattr); 1770 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1771 #ifdef MAC 1772 vattr.va_type = VLNK; 1773 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1774 &vattr); 1775 if (error != 0) 1776 goto out2; 1777 #endif 1778 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1779 #ifdef MAC 1780 out2: 1781 #endif 1782 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1783 vn_finished_write(mp); 1784 NDFREE(&nd, NDF_ONLY_PNBUF); 1785 if (error == ERELOOKUP) 1786 goto restart; 1787 out: 1788 if (segflg != UIO_SYSSPACE) 1789 uma_zfree(namei_zone, tmppath); 1790 return (error); 1791 } 1792 1793 /* 1794 * Delete a whiteout from the filesystem. 1795 */ 1796 #ifndef _SYS_SYSPROTO_H_ 1797 struct undelete_args { 1798 char *path; 1799 }; 1800 #endif 1801 int 1802 sys_undelete(struct thread *td, struct undelete_args *uap) 1803 { 1804 struct mount *mp; 1805 struct nameidata nd; 1806 int error; 1807 1808 NDPREINIT(&nd); 1809 restart: 1810 bwillwrite(); 1811 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1812 UIO_USERSPACE, uap->path, td); 1813 error = namei(&nd); 1814 if (error != 0) 1815 return (error); 1816 1817 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1818 NDFREE(&nd, NDF_ONLY_PNBUF); 1819 if (nd.ni_vp == nd.ni_dvp) 1820 vrele(nd.ni_dvp); 1821 else 1822 vput(nd.ni_dvp); 1823 if (nd.ni_vp) 1824 vrele(nd.ni_vp); 1825 return (EEXIST); 1826 } 1827 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1828 NDFREE(&nd, NDF_ONLY_PNBUF); 1829 vput(nd.ni_dvp); 1830 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1831 return (error); 1832 goto restart; 1833 } 1834 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1835 NDFREE(&nd, NDF_ONLY_PNBUF); 1836 vput(nd.ni_dvp); 1837 vn_finished_write(mp); 1838 if (error == ERELOOKUP) 1839 goto restart; 1840 return (error); 1841 } 1842 1843 /* 1844 * Delete a name from the filesystem. 1845 */ 1846 #ifndef _SYS_SYSPROTO_H_ 1847 struct unlink_args { 1848 char *path; 1849 }; 1850 #endif 1851 int 1852 sys_unlink(struct thread *td, struct unlink_args *uap) 1853 { 1854 1855 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1856 0, 0)); 1857 } 1858 1859 static int 1860 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1861 int flag, enum uio_seg pathseg, ino_t oldinum) 1862 { 1863 1864 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1865 return (EINVAL); 1866 1867 if ((flag & AT_REMOVEDIR) != 0) 1868 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1869 1870 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1871 } 1872 1873 #ifndef _SYS_SYSPROTO_H_ 1874 struct unlinkat_args { 1875 int fd; 1876 char *path; 1877 int flag; 1878 }; 1879 #endif 1880 int 1881 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1882 { 1883 1884 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1885 UIO_USERSPACE, 0)); 1886 } 1887 1888 #ifndef _SYS_SYSPROTO_H_ 1889 struct funlinkat_args { 1890 int dfd; 1891 const char *path; 1892 int fd; 1893 int flag; 1894 }; 1895 #endif 1896 int 1897 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1898 { 1899 1900 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1901 UIO_USERSPACE, 0)); 1902 } 1903 1904 int 1905 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1906 enum uio_seg pathseg, int flag, ino_t oldinum) 1907 { 1908 struct mount *mp; 1909 struct file *fp; 1910 struct vnode *vp; 1911 struct nameidata nd; 1912 struct stat sb; 1913 int error; 1914 1915 fp = NULL; 1916 if (fd != FD_NONE) { 1917 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1918 if (error != 0) 1919 return (error); 1920 } 1921 1922 NDPREINIT(&nd); 1923 restart: 1924 bwillwrite(); 1925 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1926 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1927 pathseg, path, dfd, &cap_unlinkat_rights, td); 1928 if ((error = namei(&nd)) != 0) { 1929 if (error == EINVAL) 1930 error = EPERM; 1931 goto fdout; 1932 } 1933 vp = nd.ni_vp; 1934 if (vp->v_type == VDIR && oldinum == 0) { 1935 error = EPERM; /* POSIX */ 1936 } else if (oldinum != 0 && 1937 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 1938 sb.st_ino != oldinum) { 1939 error = EIDRM; /* Identifier removed */ 1940 } else if (fp != NULL && fp->f_vnode != vp) { 1941 if (VN_IS_DOOMED(fp->f_vnode)) 1942 error = EBADF; 1943 else 1944 error = EDEADLK; 1945 } else { 1946 /* 1947 * The root of a mounted filesystem cannot be deleted. 1948 * 1949 * XXX: can this only be a VDIR case? 1950 */ 1951 if (vp->v_vflag & VV_ROOT) 1952 error = EBUSY; 1953 } 1954 if (error == 0) { 1955 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1956 NDFREE(&nd, NDF_ONLY_PNBUF); 1957 vput(nd.ni_dvp); 1958 if (vp == nd.ni_dvp) 1959 vrele(vp); 1960 else 1961 vput(vp); 1962 if ((error = vn_start_write(NULL, &mp, 1963 V_XSLEEP | PCATCH)) != 0) { 1964 goto fdout; 1965 } 1966 goto restart; 1967 } 1968 #ifdef MAC 1969 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1970 &nd.ni_cnd); 1971 if (error != 0) 1972 goto out; 1973 #endif 1974 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1975 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1976 #ifdef MAC 1977 out: 1978 #endif 1979 vn_finished_write(mp); 1980 } 1981 NDFREE(&nd, NDF_ONLY_PNBUF); 1982 vput(nd.ni_dvp); 1983 if (vp == nd.ni_dvp) 1984 vrele(vp); 1985 else 1986 vput(vp); 1987 if (error == ERELOOKUP) 1988 goto restart; 1989 fdout: 1990 if (fp != NULL) 1991 fdrop(fp, td); 1992 return (error); 1993 } 1994 1995 /* 1996 * Reposition read/write file offset. 1997 */ 1998 #ifndef _SYS_SYSPROTO_H_ 1999 struct lseek_args { 2000 int fd; 2001 int pad; 2002 off_t offset; 2003 int whence; 2004 }; 2005 #endif 2006 int 2007 sys_lseek(struct thread *td, struct lseek_args *uap) 2008 { 2009 2010 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2011 } 2012 2013 int 2014 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2015 { 2016 struct file *fp; 2017 int error; 2018 2019 AUDIT_ARG_FD(fd); 2020 error = fget(td, fd, &cap_seek_rights, &fp); 2021 if (error != 0) 2022 return (error); 2023 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2024 fo_seek(fp, offset, whence, td) : ESPIPE; 2025 fdrop(fp, td); 2026 return (error); 2027 } 2028 2029 #if defined(COMPAT_43) 2030 /* 2031 * Reposition read/write file offset. 2032 */ 2033 #ifndef _SYS_SYSPROTO_H_ 2034 struct olseek_args { 2035 int fd; 2036 long offset; 2037 int whence; 2038 }; 2039 #endif 2040 int 2041 olseek(struct thread *td, struct olseek_args *uap) 2042 { 2043 2044 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2045 } 2046 #endif /* COMPAT_43 */ 2047 2048 #if defined(COMPAT_FREEBSD6) 2049 /* Version with the 'pad' argument */ 2050 int 2051 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2052 { 2053 2054 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2055 } 2056 #endif 2057 2058 /* 2059 * Check access permissions using passed credentials. 2060 */ 2061 static int 2062 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2063 struct thread *td) 2064 { 2065 accmode_t accmode; 2066 int error; 2067 2068 /* Flags == 0 means only check for existence. */ 2069 if (user_flags == 0) 2070 return (0); 2071 2072 accmode = 0; 2073 if (user_flags & R_OK) 2074 accmode |= VREAD; 2075 if (user_flags & W_OK) 2076 accmode |= VWRITE; 2077 if (user_flags & X_OK) 2078 accmode |= VEXEC; 2079 #ifdef MAC 2080 error = mac_vnode_check_access(cred, vp, accmode); 2081 if (error != 0) 2082 return (error); 2083 #endif 2084 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2085 error = VOP_ACCESS(vp, accmode, cred, td); 2086 return (error); 2087 } 2088 2089 /* 2090 * Check access permissions using "real" credentials. 2091 */ 2092 #ifndef _SYS_SYSPROTO_H_ 2093 struct access_args { 2094 char *path; 2095 int amode; 2096 }; 2097 #endif 2098 int 2099 sys_access(struct thread *td, struct access_args *uap) 2100 { 2101 2102 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2103 0, uap->amode)); 2104 } 2105 2106 #ifndef _SYS_SYSPROTO_H_ 2107 struct faccessat_args { 2108 int dirfd; 2109 char *path; 2110 int amode; 2111 int flag; 2112 } 2113 #endif 2114 int 2115 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2116 { 2117 2118 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2119 uap->amode)); 2120 } 2121 2122 int 2123 kern_accessat(struct thread *td, int fd, const char *path, 2124 enum uio_seg pathseg, int flag, int amode) 2125 { 2126 struct ucred *cred, *usecred; 2127 struct vnode *vp; 2128 struct nameidata nd; 2129 int error; 2130 2131 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0) 2132 return (EINVAL); 2133 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2134 return (EINVAL); 2135 2136 /* 2137 * Create and modify a temporary credential instead of one that 2138 * is potentially shared (if we need one). 2139 */ 2140 cred = td->td_ucred; 2141 if ((flag & AT_EACCESS) == 0 && 2142 ((cred->cr_uid != cred->cr_ruid || 2143 cred->cr_rgid != cred->cr_groups[0]))) { 2144 usecred = crdup(cred); 2145 usecred->cr_uid = cred->cr_ruid; 2146 usecred->cr_groups[0] = cred->cr_rgid; 2147 td->td_ucred = usecred; 2148 } else 2149 usecred = cred; 2150 AUDIT_ARG_VALUE(amode); 2151 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2152 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | 2153 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights, td); 2154 if ((error = namei(&nd)) != 0) 2155 goto out; 2156 vp = nd.ni_vp; 2157 2158 error = vn_access(vp, amode, usecred, td); 2159 NDFREE_NOTHING(&nd); 2160 vput(vp); 2161 out: 2162 if (usecred != cred) { 2163 td->td_ucred = cred; 2164 crfree(usecred); 2165 } 2166 return (error); 2167 } 2168 2169 /* 2170 * Check access permissions using "effective" credentials. 2171 */ 2172 #ifndef _SYS_SYSPROTO_H_ 2173 struct eaccess_args { 2174 char *path; 2175 int amode; 2176 }; 2177 #endif 2178 int 2179 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2180 { 2181 2182 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2183 AT_EACCESS, uap->amode)); 2184 } 2185 2186 #if defined(COMPAT_43) 2187 /* 2188 * Get file status; this version follows links. 2189 */ 2190 #ifndef _SYS_SYSPROTO_H_ 2191 struct ostat_args { 2192 char *path; 2193 struct ostat *ub; 2194 }; 2195 #endif 2196 int 2197 ostat(struct thread *td, struct ostat_args *uap) 2198 { 2199 struct stat sb; 2200 struct ostat osb; 2201 int error; 2202 2203 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2204 &sb, NULL); 2205 if (error != 0) 2206 return (error); 2207 cvtstat(&sb, &osb); 2208 return (copyout(&osb, uap->ub, sizeof (osb))); 2209 } 2210 2211 /* 2212 * Get file status; this version does not follow links. 2213 */ 2214 #ifndef _SYS_SYSPROTO_H_ 2215 struct olstat_args { 2216 char *path; 2217 struct ostat *ub; 2218 }; 2219 #endif 2220 int 2221 olstat(struct thread *td, struct olstat_args *uap) 2222 { 2223 struct stat sb; 2224 struct ostat osb; 2225 int error; 2226 2227 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2228 UIO_USERSPACE, &sb, NULL); 2229 if (error != 0) 2230 return (error); 2231 cvtstat(&sb, &osb); 2232 return (copyout(&osb, uap->ub, sizeof (osb))); 2233 } 2234 2235 /* 2236 * Convert from an old to a new stat structure. 2237 * XXX: many values are blindly truncated. 2238 */ 2239 void 2240 cvtstat(struct stat *st, struct ostat *ost) 2241 { 2242 2243 bzero(ost, sizeof(*ost)); 2244 ost->st_dev = st->st_dev; 2245 ost->st_ino = st->st_ino; 2246 ost->st_mode = st->st_mode; 2247 ost->st_nlink = st->st_nlink; 2248 ost->st_uid = st->st_uid; 2249 ost->st_gid = st->st_gid; 2250 ost->st_rdev = st->st_rdev; 2251 ost->st_size = MIN(st->st_size, INT32_MAX); 2252 ost->st_atim = st->st_atim; 2253 ost->st_mtim = st->st_mtim; 2254 ost->st_ctim = st->st_ctim; 2255 ost->st_blksize = st->st_blksize; 2256 ost->st_blocks = st->st_blocks; 2257 ost->st_flags = st->st_flags; 2258 ost->st_gen = st->st_gen; 2259 } 2260 #endif /* COMPAT_43 */ 2261 2262 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2263 int ino64_trunc_error; 2264 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2265 &ino64_trunc_error, 0, 2266 "Error on truncation of device, file or inode number, or link count"); 2267 2268 int 2269 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2270 { 2271 2272 ost->st_dev = st->st_dev; 2273 if (ost->st_dev != st->st_dev) { 2274 switch (ino64_trunc_error) { 2275 default: 2276 /* 2277 * Since dev_t is almost raw, don't clamp to the 2278 * maximum for case 2, but ignore the error. 2279 */ 2280 break; 2281 case 1: 2282 return (EOVERFLOW); 2283 } 2284 } 2285 ost->st_ino = st->st_ino; 2286 if (ost->st_ino != st->st_ino) { 2287 switch (ino64_trunc_error) { 2288 default: 2289 case 0: 2290 break; 2291 case 1: 2292 return (EOVERFLOW); 2293 case 2: 2294 ost->st_ino = UINT32_MAX; 2295 break; 2296 } 2297 } 2298 ost->st_mode = st->st_mode; 2299 ost->st_nlink = st->st_nlink; 2300 if (ost->st_nlink != st->st_nlink) { 2301 switch (ino64_trunc_error) { 2302 default: 2303 case 0: 2304 break; 2305 case 1: 2306 return (EOVERFLOW); 2307 case 2: 2308 ost->st_nlink = UINT16_MAX; 2309 break; 2310 } 2311 } 2312 ost->st_uid = st->st_uid; 2313 ost->st_gid = st->st_gid; 2314 ost->st_rdev = st->st_rdev; 2315 if (ost->st_rdev != st->st_rdev) { 2316 switch (ino64_trunc_error) { 2317 default: 2318 break; 2319 case 1: 2320 return (EOVERFLOW); 2321 } 2322 } 2323 ost->st_atim = st->st_atim; 2324 ost->st_mtim = st->st_mtim; 2325 ost->st_ctim = st->st_ctim; 2326 ost->st_size = st->st_size; 2327 ost->st_blocks = st->st_blocks; 2328 ost->st_blksize = st->st_blksize; 2329 ost->st_flags = st->st_flags; 2330 ost->st_gen = st->st_gen; 2331 ost->st_lspare = 0; 2332 ost->st_birthtim = st->st_birthtim; 2333 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2334 sizeof(*ost) - offsetof(struct freebsd11_stat, 2335 st_birthtim) - sizeof(ost->st_birthtim)); 2336 return (0); 2337 } 2338 2339 int 2340 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2341 { 2342 struct stat sb; 2343 struct freebsd11_stat osb; 2344 int error; 2345 2346 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2347 &sb, NULL); 2348 if (error != 0) 2349 return (error); 2350 error = freebsd11_cvtstat(&sb, &osb); 2351 if (error == 0) 2352 error = copyout(&osb, uap->ub, sizeof(osb)); 2353 return (error); 2354 } 2355 2356 int 2357 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2358 { 2359 struct stat sb; 2360 struct freebsd11_stat osb; 2361 int error; 2362 2363 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2364 UIO_USERSPACE, &sb, NULL); 2365 if (error != 0) 2366 return (error); 2367 error = freebsd11_cvtstat(&sb, &osb); 2368 if (error == 0) 2369 error = copyout(&osb, uap->ub, sizeof(osb)); 2370 return (error); 2371 } 2372 2373 int 2374 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2375 { 2376 struct fhandle fh; 2377 struct stat sb; 2378 struct freebsd11_stat osb; 2379 int error; 2380 2381 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2382 if (error != 0) 2383 return (error); 2384 error = kern_fhstat(td, fh, &sb); 2385 if (error != 0) 2386 return (error); 2387 error = freebsd11_cvtstat(&sb, &osb); 2388 if (error == 0) 2389 error = copyout(&osb, uap->sb, sizeof(osb)); 2390 return (error); 2391 } 2392 2393 int 2394 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2395 { 2396 struct stat sb; 2397 struct freebsd11_stat osb; 2398 int error; 2399 2400 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2401 UIO_USERSPACE, &sb, NULL); 2402 if (error != 0) 2403 return (error); 2404 error = freebsd11_cvtstat(&sb, &osb); 2405 if (error == 0) 2406 error = copyout(&osb, uap->buf, sizeof(osb)); 2407 return (error); 2408 } 2409 #endif /* COMPAT_FREEBSD11 */ 2410 2411 /* 2412 * Get file status 2413 */ 2414 #ifndef _SYS_SYSPROTO_H_ 2415 struct fstatat_args { 2416 int fd; 2417 char *path; 2418 struct stat *buf; 2419 int flag; 2420 } 2421 #endif 2422 int 2423 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2424 { 2425 struct stat sb; 2426 int error; 2427 2428 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2429 UIO_USERSPACE, &sb, NULL); 2430 if (error == 0) 2431 error = copyout(&sb, uap->buf, sizeof (sb)); 2432 return (error); 2433 } 2434 2435 int 2436 kern_statat(struct thread *td, int flag, int fd, const char *path, 2437 enum uio_seg pathseg, struct stat *sbp, 2438 void (*hook)(struct vnode *vp, struct stat *sbp)) 2439 { 2440 struct nameidata nd; 2441 int error; 2442 2443 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2444 AT_EMPTY_PATH)) != 0) 2445 return (EINVAL); 2446 2447 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2448 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2449 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights, td); 2450 2451 if ((error = namei(&nd)) != 0) { 2452 if (error == ENOTDIR && 2453 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2454 error = kern_fstat(td, fd, sbp); 2455 return (error); 2456 } 2457 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2458 if (error == 0) { 2459 if (__predict_false(hook != NULL)) 2460 hook(nd.ni_vp, sbp); 2461 } 2462 NDFREE_NOTHING(&nd); 2463 vput(nd.ni_vp); 2464 #ifdef __STAT_TIME_T_EXT 2465 sbp->st_atim_ext = 0; 2466 sbp->st_mtim_ext = 0; 2467 sbp->st_ctim_ext = 0; 2468 sbp->st_btim_ext = 0; 2469 #endif 2470 #ifdef KTRACE 2471 if (KTRPOINT(td, KTR_STRUCT)) 2472 ktrstat_error(sbp, error); 2473 #endif 2474 return (error); 2475 } 2476 2477 #if defined(COMPAT_FREEBSD11) 2478 /* 2479 * Implementation of the NetBSD [l]stat() functions. 2480 */ 2481 int 2482 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2483 { 2484 struct freebsd11_stat sb11; 2485 int error; 2486 2487 error = freebsd11_cvtstat(sb, &sb11); 2488 if (error != 0) 2489 return (error); 2490 2491 bzero(nsb, sizeof(*nsb)); 2492 CP(sb11, *nsb, st_dev); 2493 CP(sb11, *nsb, st_ino); 2494 CP(sb11, *nsb, st_mode); 2495 CP(sb11, *nsb, st_nlink); 2496 CP(sb11, *nsb, st_uid); 2497 CP(sb11, *nsb, st_gid); 2498 CP(sb11, *nsb, st_rdev); 2499 CP(sb11, *nsb, st_atim); 2500 CP(sb11, *nsb, st_mtim); 2501 CP(sb11, *nsb, st_ctim); 2502 CP(sb11, *nsb, st_size); 2503 CP(sb11, *nsb, st_blocks); 2504 CP(sb11, *nsb, st_blksize); 2505 CP(sb11, *nsb, st_flags); 2506 CP(sb11, *nsb, st_gen); 2507 CP(sb11, *nsb, st_birthtim); 2508 return (0); 2509 } 2510 2511 #ifndef _SYS_SYSPROTO_H_ 2512 struct freebsd11_nstat_args { 2513 char *path; 2514 struct nstat *ub; 2515 }; 2516 #endif 2517 int 2518 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2519 { 2520 struct stat sb; 2521 struct nstat nsb; 2522 int error; 2523 2524 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2525 &sb, NULL); 2526 if (error != 0) 2527 return (error); 2528 error = freebsd11_cvtnstat(&sb, &nsb); 2529 if (error == 0) 2530 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2531 return (error); 2532 } 2533 2534 /* 2535 * NetBSD lstat. Get file status; this version does not follow links. 2536 */ 2537 #ifndef _SYS_SYSPROTO_H_ 2538 struct freebsd11_nlstat_args { 2539 char *path; 2540 struct nstat *ub; 2541 }; 2542 #endif 2543 int 2544 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2545 { 2546 struct stat sb; 2547 struct nstat nsb; 2548 int error; 2549 2550 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2551 UIO_USERSPACE, &sb, NULL); 2552 if (error != 0) 2553 return (error); 2554 error = freebsd11_cvtnstat(&sb, &nsb); 2555 if (error == 0) 2556 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2557 return (error); 2558 } 2559 #endif /* COMPAT_FREEBSD11 */ 2560 2561 /* 2562 * Get configurable pathname variables. 2563 */ 2564 #ifndef _SYS_SYSPROTO_H_ 2565 struct pathconf_args { 2566 char *path; 2567 int name; 2568 }; 2569 #endif 2570 int 2571 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2572 { 2573 long value; 2574 int error; 2575 2576 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2577 &value); 2578 if (error == 0) 2579 td->td_retval[0] = value; 2580 return (error); 2581 } 2582 2583 #ifndef _SYS_SYSPROTO_H_ 2584 struct lpathconf_args { 2585 char *path; 2586 int name; 2587 }; 2588 #endif 2589 int 2590 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2591 { 2592 long value; 2593 int error; 2594 2595 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2596 NOFOLLOW, &value); 2597 if (error == 0) 2598 td->td_retval[0] = value; 2599 return (error); 2600 } 2601 2602 int 2603 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2604 int name, u_long flags, long *valuep) 2605 { 2606 struct nameidata nd; 2607 int error; 2608 2609 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2610 pathseg, path, td); 2611 if ((error = namei(&nd)) != 0) 2612 return (error); 2613 NDFREE_NOTHING(&nd); 2614 2615 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2616 vput(nd.ni_vp); 2617 return (error); 2618 } 2619 2620 /* 2621 * Return target name of a symbolic link. 2622 */ 2623 #ifndef _SYS_SYSPROTO_H_ 2624 struct readlink_args { 2625 char *path; 2626 char *buf; 2627 size_t count; 2628 }; 2629 #endif 2630 int 2631 sys_readlink(struct thread *td, struct readlink_args *uap) 2632 { 2633 2634 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2635 uap->buf, UIO_USERSPACE, uap->count)); 2636 } 2637 #ifndef _SYS_SYSPROTO_H_ 2638 struct readlinkat_args { 2639 int fd; 2640 char *path; 2641 char *buf; 2642 size_t bufsize; 2643 }; 2644 #endif 2645 int 2646 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2647 { 2648 2649 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2650 uap->buf, UIO_USERSPACE, uap->bufsize)); 2651 } 2652 2653 int 2654 kern_readlinkat(struct thread *td, int fd, const char *path, 2655 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2656 { 2657 struct vnode *vp; 2658 struct nameidata nd; 2659 int error; 2660 2661 if (count > IOSIZE_MAX) 2662 return (EINVAL); 2663 2664 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2665 EMPTYPATH, pathseg, path, fd, td); 2666 2667 if ((error = namei(&nd)) != 0) 2668 return (error); 2669 NDFREE_NOTHING(&nd); 2670 vp = nd.ni_vp; 2671 2672 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2673 vput(vp); 2674 2675 return (error); 2676 } 2677 2678 /* 2679 * Helper function to readlink from a vnode 2680 */ 2681 static int 2682 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2683 struct thread *td) 2684 { 2685 struct iovec aiov; 2686 struct uio auio; 2687 int error; 2688 2689 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2690 #ifdef MAC 2691 error = mac_vnode_check_readlink(td->td_ucred, vp); 2692 if (error != 0) 2693 return (error); 2694 #endif 2695 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2696 return (EINVAL); 2697 2698 aiov.iov_base = buf; 2699 aiov.iov_len = count; 2700 auio.uio_iov = &aiov; 2701 auio.uio_iovcnt = 1; 2702 auio.uio_offset = 0; 2703 auio.uio_rw = UIO_READ; 2704 auio.uio_segflg = bufseg; 2705 auio.uio_td = td; 2706 auio.uio_resid = count; 2707 error = VOP_READLINK(vp, &auio, td->td_ucred); 2708 td->td_retval[0] = count - auio.uio_resid; 2709 return (error); 2710 } 2711 2712 /* 2713 * Common implementation code for chflags() and fchflags(). 2714 */ 2715 static int 2716 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2717 { 2718 struct mount *mp; 2719 struct vattr vattr; 2720 int error; 2721 2722 /* We can't support the value matching VNOVAL. */ 2723 if (flags == VNOVAL) 2724 return (EOPNOTSUPP); 2725 2726 /* 2727 * Prevent non-root users from setting flags on devices. When 2728 * a device is reused, users can retain ownership of the device 2729 * if they are allowed to set flags and programs assume that 2730 * chown can't fail when done as root. 2731 */ 2732 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2733 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2734 if (error != 0) 2735 return (error); 2736 } 2737 2738 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2739 return (error); 2740 VATTR_NULL(&vattr); 2741 vattr.va_flags = flags; 2742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2743 #ifdef MAC 2744 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2745 if (error == 0) 2746 #endif 2747 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2748 VOP_UNLOCK(vp); 2749 vn_finished_write(mp); 2750 return (error); 2751 } 2752 2753 /* 2754 * Change flags of a file given a path name. 2755 */ 2756 #ifndef _SYS_SYSPROTO_H_ 2757 struct chflags_args { 2758 const char *path; 2759 u_long flags; 2760 }; 2761 #endif 2762 int 2763 sys_chflags(struct thread *td, struct chflags_args *uap) 2764 { 2765 2766 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2767 uap->flags, 0)); 2768 } 2769 2770 #ifndef _SYS_SYSPROTO_H_ 2771 struct chflagsat_args { 2772 int fd; 2773 const char *path; 2774 u_long flags; 2775 int atflag; 2776 } 2777 #endif 2778 int 2779 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2780 { 2781 2782 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2783 uap->flags, uap->atflag)); 2784 } 2785 2786 /* 2787 * Same as chflags() but doesn't follow symlinks. 2788 */ 2789 #ifndef _SYS_SYSPROTO_H_ 2790 struct lchflags_args { 2791 const char *path; 2792 u_long flags; 2793 }; 2794 #endif 2795 int 2796 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2797 { 2798 2799 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2800 uap->flags, AT_SYMLINK_NOFOLLOW)); 2801 } 2802 2803 static int 2804 kern_chflagsat(struct thread *td, int fd, const char *path, 2805 enum uio_seg pathseg, u_long flags, int atflag) 2806 { 2807 struct nameidata nd; 2808 int error; 2809 2810 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2811 AT_EMPTY_PATH)) != 0) 2812 return (EINVAL); 2813 2814 AUDIT_ARG_FFLAGS(flags); 2815 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2816 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2817 fd, &cap_fchflags_rights, td); 2818 if ((error = namei(&nd)) != 0) 2819 return (error); 2820 NDFREE_NOTHING(&nd); 2821 error = setfflags(td, nd.ni_vp, flags); 2822 vrele(nd.ni_vp); 2823 return (error); 2824 } 2825 2826 /* 2827 * Change flags of a file given a file descriptor. 2828 */ 2829 #ifndef _SYS_SYSPROTO_H_ 2830 struct fchflags_args { 2831 int fd; 2832 u_long flags; 2833 }; 2834 #endif 2835 int 2836 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2837 { 2838 struct file *fp; 2839 int error; 2840 2841 AUDIT_ARG_FD(uap->fd); 2842 AUDIT_ARG_FFLAGS(uap->flags); 2843 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2844 &fp); 2845 if (error != 0) 2846 return (error); 2847 #ifdef AUDIT 2848 if (AUDITING_TD(td)) { 2849 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2850 AUDIT_ARG_VNODE1(fp->f_vnode); 2851 VOP_UNLOCK(fp->f_vnode); 2852 } 2853 #endif 2854 error = setfflags(td, fp->f_vnode, uap->flags); 2855 fdrop(fp, td); 2856 return (error); 2857 } 2858 2859 /* 2860 * Common implementation code for chmod(), lchmod() and fchmod(). 2861 */ 2862 int 2863 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2864 { 2865 struct mount *mp; 2866 struct vattr vattr; 2867 int error; 2868 2869 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2870 return (error); 2871 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2872 VATTR_NULL(&vattr); 2873 vattr.va_mode = mode & ALLPERMS; 2874 #ifdef MAC 2875 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2876 if (error == 0) 2877 #endif 2878 error = VOP_SETATTR(vp, &vattr, cred); 2879 VOP_UNLOCK(vp); 2880 vn_finished_write(mp); 2881 return (error); 2882 } 2883 2884 /* 2885 * Change mode of a file given path name. 2886 */ 2887 #ifndef _SYS_SYSPROTO_H_ 2888 struct chmod_args { 2889 char *path; 2890 int mode; 2891 }; 2892 #endif 2893 int 2894 sys_chmod(struct thread *td, struct chmod_args *uap) 2895 { 2896 2897 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2898 uap->mode, 0)); 2899 } 2900 2901 #ifndef _SYS_SYSPROTO_H_ 2902 struct fchmodat_args { 2903 int dirfd; 2904 char *path; 2905 mode_t mode; 2906 int flag; 2907 } 2908 #endif 2909 int 2910 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2911 { 2912 2913 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2914 uap->mode, uap->flag)); 2915 } 2916 2917 /* 2918 * Change mode of a file given path name (don't follow links.) 2919 */ 2920 #ifndef _SYS_SYSPROTO_H_ 2921 struct lchmod_args { 2922 char *path; 2923 int mode; 2924 }; 2925 #endif 2926 int 2927 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2928 { 2929 2930 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2931 uap->mode, AT_SYMLINK_NOFOLLOW)); 2932 } 2933 2934 int 2935 kern_fchmodat(struct thread *td, int fd, const char *path, 2936 enum uio_seg pathseg, mode_t mode, int flag) 2937 { 2938 struct nameidata nd; 2939 int error; 2940 2941 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2942 AT_EMPTY_PATH)) != 0) 2943 return (EINVAL); 2944 2945 AUDIT_ARG_MODE(mode); 2946 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2947 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2948 fd, &cap_fchmod_rights, td); 2949 if ((error = namei(&nd)) != 0) 2950 return (error); 2951 NDFREE_NOTHING(&nd); 2952 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2953 vrele(nd.ni_vp); 2954 return (error); 2955 } 2956 2957 /* 2958 * Change mode of a file given a file descriptor. 2959 */ 2960 #ifndef _SYS_SYSPROTO_H_ 2961 struct fchmod_args { 2962 int fd; 2963 int mode; 2964 }; 2965 #endif 2966 int 2967 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2968 { 2969 struct file *fp; 2970 int error; 2971 2972 AUDIT_ARG_FD(uap->fd); 2973 AUDIT_ARG_MODE(uap->mode); 2974 2975 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2976 if (error != 0) 2977 return (error); 2978 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2979 fdrop(fp, td); 2980 return (error); 2981 } 2982 2983 /* 2984 * Common implementation for chown(), lchown(), and fchown() 2985 */ 2986 int 2987 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2988 gid_t gid) 2989 { 2990 struct mount *mp; 2991 struct vattr vattr; 2992 int error; 2993 2994 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2995 return (error); 2996 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2997 VATTR_NULL(&vattr); 2998 vattr.va_uid = uid; 2999 vattr.va_gid = gid; 3000 #ifdef MAC 3001 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3002 vattr.va_gid); 3003 if (error == 0) 3004 #endif 3005 error = VOP_SETATTR(vp, &vattr, cred); 3006 VOP_UNLOCK(vp); 3007 vn_finished_write(mp); 3008 return (error); 3009 } 3010 3011 /* 3012 * Set ownership given a path name. 3013 */ 3014 #ifndef _SYS_SYSPROTO_H_ 3015 struct chown_args { 3016 char *path; 3017 int uid; 3018 int gid; 3019 }; 3020 #endif 3021 int 3022 sys_chown(struct thread *td, struct chown_args *uap) 3023 { 3024 3025 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3026 uap->gid, 0)); 3027 } 3028 3029 #ifndef _SYS_SYSPROTO_H_ 3030 struct fchownat_args { 3031 int fd; 3032 const char * path; 3033 uid_t uid; 3034 gid_t gid; 3035 int flag; 3036 }; 3037 #endif 3038 int 3039 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3040 { 3041 3042 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3043 uap->gid, uap->flag)); 3044 } 3045 3046 int 3047 kern_fchownat(struct thread *td, int fd, const char *path, 3048 enum uio_seg pathseg, int uid, int gid, int flag) 3049 { 3050 struct nameidata nd; 3051 int error; 3052 3053 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3054 AT_EMPTY_PATH)) != 0) 3055 return (EINVAL); 3056 3057 AUDIT_ARG_OWNER(uid, gid); 3058 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3059 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3060 fd, &cap_fchown_rights, td); 3061 3062 if ((error = namei(&nd)) != 0) 3063 return (error); 3064 NDFREE_NOTHING(&nd); 3065 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3066 vrele(nd.ni_vp); 3067 return (error); 3068 } 3069 3070 /* 3071 * Set ownership given a path name, do not cross symlinks. 3072 */ 3073 #ifndef _SYS_SYSPROTO_H_ 3074 struct lchown_args { 3075 char *path; 3076 int uid; 3077 int gid; 3078 }; 3079 #endif 3080 int 3081 sys_lchown(struct thread *td, struct lchown_args *uap) 3082 { 3083 3084 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3085 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3086 } 3087 3088 /* 3089 * Set ownership given a file descriptor. 3090 */ 3091 #ifndef _SYS_SYSPROTO_H_ 3092 struct fchown_args { 3093 int fd; 3094 int uid; 3095 int gid; 3096 }; 3097 #endif 3098 int 3099 sys_fchown(struct thread *td, struct fchown_args *uap) 3100 { 3101 struct file *fp; 3102 int error; 3103 3104 AUDIT_ARG_FD(uap->fd); 3105 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3106 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3107 if (error != 0) 3108 return (error); 3109 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3110 fdrop(fp, td); 3111 return (error); 3112 } 3113 3114 /* 3115 * Common implementation code for utimes(), lutimes(), and futimes(). 3116 */ 3117 static int 3118 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3119 struct timespec *tsp) 3120 { 3121 struct timeval tv[2]; 3122 const struct timeval *tvp; 3123 int error; 3124 3125 if (usrtvp == NULL) { 3126 vfs_timestamp(&tsp[0]); 3127 tsp[1] = tsp[0]; 3128 } else { 3129 if (tvpseg == UIO_SYSSPACE) { 3130 tvp = usrtvp; 3131 } else { 3132 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3133 return (error); 3134 tvp = tv; 3135 } 3136 3137 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3138 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3139 return (EINVAL); 3140 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3141 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3142 } 3143 return (0); 3144 } 3145 3146 /* 3147 * Common implementation code for futimens(), utimensat(). 3148 */ 3149 #define UTIMENS_NULL 0x1 3150 #define UTIMENS_EXIT 0x2 3151 static int 3152 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3153 struct timespec *tsp, int *retflags) 3154 { 3155 struct timespec tsnow; 3156 int error; 3157 3158 vfs_timestamp(&tsnow); 3159 *retflags = 0; 3160 if (usrtsp == NULL) { 3161 tsp[0] = tsnow; 3162 tsp[1] = tsnow; 3163 *retflags |= UTIMENS_NULL; 3164 return (0); 3165 } 3166 if (tspseg == UIO_SYSSPACE) { 3167 tsp[0] = usrtsp[0]; 3168 tsp[1] = usrtsp[1]; 3169 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3170 return (error); 3171 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3172 *retflags |= UTIMENS_EXIT; 3173 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3174 *retflags |= UTIMENS_NULL; 3175 if (tsp[0].tv_nsec == UTIME_OMIT) 3176 tsp[0].tv_sec = VNOVAL; 3177 else if (tsp[0].tv_nsec == UTIME_NOW) 3178 tsp[0] = tsnow; 3179 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3180 return (EINVAL); 3181 if (tsp[1].tv_nsec == UTIME_OMIT) 3182 tsp[1].tv_sec = VNOVAL; 3183 else if (tsp[1].tv_nsec == UTIME_NOW) 3184 tsp[1] = tsnow; 3185 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3186 return (EINVAL); 3187 3188 return (0); 3189 } 3190 3191 /* 3192 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3193 * and utimensat(). 3194 */ 3195 static int 3196 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3197 int numtimes, int nullflag) 3198 { 3199 struct mount *mp; 3200 struct vattr vattr; 3201 int error; 3202 bool setbirthtime; 3203 3204 setbirthtime = false; 3205 vattr.va_birthtime.tv_sec = VNOVAL; 3206 vattr.va_birthtime.tv_nsec = 0; 3207 3208 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3209 return (error); 3210 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3211 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3212 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3213 setbirthtime = true; 3214 VATTR_NULL(&vattr); 3215 vattr.va_atime = ts[0]; 3216 vattr.va_mtime = ts[1]; 3217 if (setbirthtime) 3218 vattr.va_birthtime = ts[1]; 3219 if (numtimes > 2) 3220 vattr.va_birthtime = ts[2]; 3221 if (nullflag) 3222 vattr.va_vaflags |= VA_UTIMES_NULL; 3223 #ifdef MAC 3224 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3225 vattr.va_mtime); 3226 #endif 3227 if (error == 0) 3228 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3229 VOP_UNLOCK(vp); 3230 vn_finished_write(mp); 3231 return (error); 3232 } 3233 3234 /* 3235 * Set the access and modification times of a file. 3236 */ 3237 #ifndef _SYS_SYSPROTO_H_ 3238 struct utimes_args { 3239 char *path; 3240 struct timeval *tptr; 3241 }; 3242 #endif 3243 int 3244 sys_utimes(struct thread *td, struct utimes_args *uap) 3245 { 3246 3247 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3248 uap->tptr, UIO_USERSPACE)); 3249 } 3250 3251 #ifndef _SYS_SYSPROTO_H_ 3252 struct futimesat_args { 3253 int fd; 3254 const char * path; 3255 const struct timeval * times; 3256 }; 3257 #endif 3258 int 3259 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3260 { 3261 3262 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3263 uap->times, UIO_USERSPACE)); 3264 } 3265 3266 int 3267 kern_utimesat(struct thread *td, int fd, const char *path, 3268 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3269 { 3270 struct nameidata nd; 3271 struct timespec ts[2]; 3272 int error; 3273 3274 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3275 return (error); 3276 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3277 &cap_futimes_rights, td); 3278 3279 if ((error = namei(&nd)) != 0) 3280 return (error); 3281 NDFREE_NOTHING(&nd); 3282 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3283 vrele(nd.ni_vp); 3284 return (error); 3285 } 3286 3287 /* 3288 * Set the access and modification times of a file. 3289 */ 3290 #ifndef _SYS_SYSPROTO_H_ 3291 struct lutimes_args { 3292 char *path; 3293 struct timeval *tptr; 3294 }; 3295 #endif 3296 int 3297 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3298 { 3299 3300 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3301 UIO_USERSPACE)); 3302 } 3303 3304 int 3305 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3306 const struct timeval *tptr, enum uio_seg tptrseg) 3307 { 3308 struct timespec ts[2]; 3309 struct nameidata nd; 3310 int error; 3311 3312 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3313 return (error); 3314 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3315 if ((error = namei(&nd)) != 0) 3316 return (error); 3317 NDFREE_NOTHING(&nd); 3318 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3319 vrele(nd.ni_vp); 3320 return (error); 3321 } 3322 3323 /* 3324 * Set the access and modification times of a file. 3325 */ 3326 #ifndef _SYS_SYSPROTO_H_ 3327 struct futimes_args { 3328 int fd; 3329 struct timeval *tptr; 3330 }; 3331 #endif 3332 int 3333 sys_futimes(struct thread *td, struct futimes_args *uap) 3334 { 3335 3336 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3337 } 3338 3339 int 3340 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3341 enum uio_seg tptrseg) 3342 { 3343 struct timespec ts[2]; 3344 struct file *fp; 3345 int error; 3346 3347 AUDIT_ARG_FD(fd); 3348 error = getutimes(tptr, tptrseg, ts); 3349 if (error != 0) 3350 return (error); 3351 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3352 if (error != 0) 3353 return (error); 3354 #ifdef AUDIT 3355 if (AUDITING_TD(td)) { 3356 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3357 AUDIT_ARG_VNODE1(fp->f_vnode); 3358 VOP_UNLOCK(fp->f_vnode); 3359 } 3360 #endif 3361 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3362 fdrop(fp, td); 3363 return (error); 3364 } 3365 3366 int 3367 sys_futimens(struct thread *td, struct futimens_args *uap) 3368 { 3369 3370 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3371 } 3372 3373 int 3374 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3375 enum uio_seg tptrseg) 3376 { 3377 struct timespec ts[2]; 3378 struct file *fp; 3379 int error, flags; 3380 3381 AUDIT_ARG_FD(fd); 3382 error = getutimens(tptr, tptrseg, ts, &flags); 3383 if (error != 0) 3384 return (error); 3385 if (flags & UTIMENS_EXIT) 3386 return (0); 3387 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3388 if (error != 0) 3389 return (error); 3390 #ifdef AUDIT 3391 if (AUDITING_TD(td)) { 3392 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3393 AUDIT_ARG_VNODE1(fp->f_vnode); 3394 VOP_UNLOCK(fp->f_vnode); 3395 } 3396 #endif 3397 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3398 fdrop(fp, td); 3399 return (error); 3400 } 3401 3402 int 3403 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3404 { 3405 3406 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3407 uap->times, UIO_USERSPACE, uap->flag)); 3408 } 3409 3410 int 3411 kern_utimensat(struct thread *td, int fd, const char *path, 3412 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3413 int flag) 3414 { 3415 struct nameidata nd; 3416 struct timespec ts[2]; 3417 int error, flags; 3418 3419 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3420 AT_EMPTY_PATH)) != 0) 3421 return (EINVAL); 3422 3423 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3424 return (error); 3425 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3426 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3427 pathseg, path, fd, &cap_futimes_rights, td); 3428 if ((error = namei(&nd)) != 0) 3429 return (error); 3430 /* 3431 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3432 * POSIX states: 3433 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3434 * "Search permission is denied by a component of the path prefix." 3435 */ 3436 NDFREE_NOTHING(&nd); 3437 if ((flags & UTIMENS_EXIT) == 0) 3438 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3439 vrele(nd.ni_vp); 3440 return (error); 3441 } 3442 3443 /* 3444 * Truncate a file given its path name. 3445 */ 3446 #ifndef _SYS_SYSPROTO_H_ 3447 struct truncate_args { 3448 char *path; 3449 int pad; 3450 off_t length; 3451 }; 3452 #endif 3453 int 3454 sys_truncate(struct thread *td, struct truncate_args *uap) 3455 { 3456 3457 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3458 } 3459 3460 int 3461 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3462 off_t length) 3463 { 3464 struct mount *mp; 3465 struct vnode *vp; 3466 void *rl_cookie; 3467 struct vattr vattr; 3468 struct nameidata nd; 3469 int error; 3470 3471 if (length < 0) 3472 return (EINVAL); 3473 NDPREINIT(&nd); 3474 retry: 3475 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3476 if ((error = namei(&nd)) != 0) 3477 return (error); 3478 vp = nd.ni_vp; 3479 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3480 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3481 vn_rangelock_unlock(vp, rl_cookie); 3482 vrele(vp); 3483 return (error); 3484 } 3485 NDFREE(&nd, NDF_ONLY_PNBUF); 3486 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3487 if (vp->v_type == VDIR) 3488 error = EISDIR; 3489 #ifdef MAC 3490 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3491 } 3492 #endif 3493 else if ((error = vn_writechk(vp)) == 0 && 3494 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3495 VATTR_NULL(&vattr); 3496 vattr.va_size = length; 3497 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3498 } 3499 VOP_UNLOCK(vp); 3500 vn_finished_write(mp); 3501 vn_rangelock_unlock(vp, rl_cookie); 3502 vrele(vp); 3503 if (error == ERELOOKUP) 3504 goto retry; 3505 return (error); 3506 } 3507 3508 #if defined(COMPAT_43) 3509 /* 3510 * Truncate a file given its path name. 3511 */ 3512 #ifndef _SYS_SYSPROTO_H_ 3513 struct otruncate_args { 3514 char *path; 3515 long length; 3516 }; 3517 #endif 3518 int 3519 otruncate(struct thread *td, struct otruncate_args *uap) 3520 { 3521 3522 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3523 } 3524 #endif /* COMPAT_43 */ 3525 3526 #if defined(COMPAT_FREEBSD6) 3527 /* Versions with the pad argument */ 3528 int 3529 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3530 { 3531 3532 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3533 } 3534 3535 int 3536 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3537 { 3538 3539 return (kern_ftruncate(td, uap->fd, uap->length)); 3540 } 3541 #endif 3542 3543 int 3544 kern_fsync(struct thread *td, int fd, bool fullsync) 3545 { 3546 struct vnode *vp; 3547 struct mount *mp; 3548 struct file *fp; 3549 int error; 3550 3551 AUDIT_ARG_FD(fd); 3552 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3553 if (error != 0) 3554 return (error); 3555 vp = fp->f_vnode; 3556 #if 0 3557 if (!fullsync) 3558 /* XXXKIB: compete outstanding aio writes */; 3559 #endif 3560 retry: 3561 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3562 if (error != 0) 3563 goto drop; 3564 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3565 AUDIT_ARG_VNODE1(vp); 3566 if (vp->v_object != NULL) { 3567 VM_OBJECT_WLOCK(vp->v_object); 3568 vm_object_page_clean(vp->v_object, 0, 0, 0); 3569 VM_OBJECT_WUNLOCK(vp->v_object); 3570 } 3571 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3572 VOP_UNLOCK(vp); 3573 vn_finished_write(mp); 3574 if (error == ERELOOKUP) 3575 goto retry; 3576 drop: 3577 fdrop(fp, td); 3578 return (error); 3579 } 3580 3581 /* 3582 * Sync an open file. 3583 */ 3584 #ifndef _SYS_SYSPROTO_H_ 3585 struct fsync_args { 3586 int fd; 3587 }; 3588 #endif 3589 int 3590 sys_fsync(struct thread *td, struct fsync_args *uap) 3591 { 3592 3593 return (kern_fsync(td, uap->fd, true)); 3594 } 3595 3596 int 3597 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3598 { 3599 3600 return (kern_fsync(td, uap->fd, false)); 3601 } 3602 3603 /* 3604 * Rename files. Source and destination must either both be directories, or 3605 * both not be directories. If target is a directory, it must be empty. 3606 */ 3607 #ifndef _SYS_SYSPROTO_H_ 3608 struct rename_args { 3609 char *from; 3610 char *to; 3611 }; 3612 #endif 3613 int 3614 sys_rename(struct thread *td, struct rename_args *uap) 3615 { 3616 3617 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3618 uap->to, UIO_USERSPACE)); 3619 } 3620 3621 #ifndef _SYS_SYSPROTO_H_ 3622 struct renameat_args { 3623 int oldfd; 3624 char *old; 3625 int newfd; 3626 char *new; 3627 }; 3628 #endif 3629 int 3630 sys_renameat(struct thread *td, struct renameat_args *uap) 3631 { 3632 3633 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3634 UIO_USERSPACE)); 3635 } 3636 3637 #ifdef MAC 3638 static int 3639 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3640 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3641 { 3642 int error; 3643 3644 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3645 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); 3646 if ((error = namei(fromnd)) != 0) 3647 return (error); 3648 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3649 fromnd->ni_vp, &fromnd->ni_cnd); 3650 VOP_UNLOCK(fromnd->ni_dvp); 3651 if (fromnd->ni_dvp != fromnd->ni_vp) 3652 VOP_UNLOCK(fromnd->ni_vp); 3653 if (error != 0) { 3654 NDFREE(fromnd, NDF_ONLY_PNBUF); 3655 vrele(fromnd->ni_dvp); 3656 vrele(fromnd->ni_vp); 3657 if (fromnd->ni_startdir) 3658 vrele(fromnd->ni_startdir); 3659 } 3660 return (error); 3661 } 3662 #endif 3663 3664 int 3665 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3666 const char *new, enum uio_seg pathseg) 3667 { 3668 struct mount *mp = NULL; 3669 struct vnode *tvp, *fvp, *tdvp; 3670 struct nameidata fromnd, tond; 3671 u_int64_t tondflags; 3672 int error; 3673 3674 again: 3675 bwillwrite(); 3676 #ifdef MAC 3677 if (mac_vnode_check_rename_from_enabled()) { 3678 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3679 &fromnd); 3680 if (error != 0) 3681 return (error); 3682 } else { 3683 #endif 3684 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3685 pathseg, old, oldfd, &cap_renameat_source_rights, td); 3686 if ((error = namei(&fromnd)) != 0) 3687 return (error); 3688 #ifdef MAC 3689 } 3690 #endif 3691 fvp = fromnd.ni_vp; 3692 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2; 3693 if (fromnd.ni_vp->v_type == VDIR) 3694 tondflags |= WILLBEDIR; 3695 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3696 &cap_renameat_target_rights, td); 3697 if ((error = namei(&tond)) != 0) { 3698 /* Translate error code for rename("dir1", "dir2/."). */ 3699 if (error == EISDIR && fvp->v_type == VDIR) 3700 error = EINVAL; 3701 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3702 vrele(fromnd.ni_dvp); 3703 vrele(fvp); 3704 goto out1; 3705 } 3706 tdvp = tond.ni_dvp; 3707 tvp = tond.ni_vp; 3708 error = vn_start_write(fvp, &mp, V_NOWAIT); 3709 if (error != 0) { 3710 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3711 NDFREE(&tond, NDF_ONLY_PNBUF); 3712 if (tvp != NULL) 3713 vput(tvp); 3714 if (tdvp == tvp) 3715 vrele(tdvp); 3716 else 3717 vput(tdvp); 3718 vrele(fromnd.ni_dvp); 3719 vrele(fvp); 3720 vrele(tond.ni_startdir); 3721 if (fromnd.ni_startdir != NULL) 3722 vrele(fromnd.ni_startdir); 3723 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3724 if (error != 0) 3725 return (error); 3726 goto again; 3727 } 3728 if (tvp != NULL) { 3729 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3730 error = ENOTDIR; 3731 goto out; 3732 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3733 error = EISDIR; 3734 goto out; 3735 } 3736 #ifdef CAPABILITIES 3737 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3738 /* 3739 * If the target already exists we require CAP_UNLINKAT 3740 * from 'newfd', when newfd was used for the lookup. 3741 */ 3742 error = cap_check(&tond.ni_filecaps.fc_rights, 3743 &cap_unlinkat_rights); 3744 if (error != 0) 3745 goto out; 3746 } 3747 #endif 3748 } 3749 if (fvp == tdvp) { 3750 error = EINVAL; 3751 goto out; 3752 } 3753 /* 3754 * If the source is the same as the destination (that is, if they 3755 * are links to the same vnode), then there is nothing to do. 3756 */ 3757 if (fvp == tvp) 3758 error = ERESTART; 3759 #ifdef MAC 3760 else 3761 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3762 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3763 #endif 3764 out: 3765 if (error == 0) { 3766 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3767 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3768 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3769 NDFREE(&tond, NDF_ONLY_PNBUF); 3770 } else { 3771 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3772 NDFREE(&tond, NDF_ONLY_PNBUF); 3773 if (tvp != NULL) 3774 vput(tvp); 3775 if (tdvp == tvp) 3776 vrele(tdvp); 3777 else 3778 vput(tdvp); 3779 vrele(fromnd.ni_dvp); 3780 vrele(fvp); 3781 } 3782 vrele(tond.ni_startdir); 3783 vn_finished_write(mp); 3784 out1: 3785 if (fromnd.ni_startdir) 3786 vrele(fromnd.ni_startdir); 3787 if (error == ERESTART) 3788 return (0); 3789 if (error == ERELOOKUP) 3790 goto again; 3791 return (error); 3792 } 3793 3794 /* 3795 * Make a directory file. 3796 */ 3797 #ifndef _SYS_SYSPROTO_H_ 3798 struct mkdir_args { 3799 char *path; 3800 int mode; 3801 }; 3802 #endif 3803 int 3804 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3805 { 3806 3807 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3808 uap->mode)); 3809 } 3810 3811 #ifndef _SYS_SYSPROTO_H_ 3812 struct mkdirat_args { 3813 int fd; 3814 char *path; 3815 mode_t mode; 3816 }; 3817 #endif 3818 int 3819 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3820 { 3821 3822 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3823 } 3824 3825 int 3826 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3827 int mode) 3828 { 3829 struct mount *mp; 3830 struct vattr vattr; 3831 struct nameidata nd; 3832 int error; 3833 3834 AUDIT_ARG_MODE(mode); 3835 NDPREINIT(&nd); 3836 restart: 3837 bwillwrite(); 3838 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3839 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3840 segflg, path, fd, &cap_mkdirat_rights, td); 3841 if ((error = namei(&nd)) != 0) 3842 return (error); 3843 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3844 NDFREE(&nd, NDF_ONLY_PNBUF); 3845 vput(nd.ni_dvp); 3846 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3847 return (error); 3848 goto restart; 3849 } 3850 VATTR_NULL(&vattr); 3851 vattr.va_type = VDIR; 3852 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3853 #ifdef MAC 3854 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3855 &vattr); 3856 if (error != 0) 3857 goto out; 3858 #endif 3859 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3860 #ifdef MAC 3861 out: 3862 #endif 3863 NDFREE(&nd, NDF_ONLY_PNBUF); 3864 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3865 vn_finished_write(mp); 3866 if (error == ERELOOKUP) 3867 goto restart; 3868 return (error); 3869 } 3870 3871 /* 3872 * Remove a directory file. 3873 */ 3874 #ifndef _SYS_SYSPROTO_H_ 3875 struct rmdir_args { 3876 char *path; 3877 }; 3878 #endif 3879 int 3880 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3881 { 3882 3883 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3884 0)); 3885 } 3886 3887 int 3888 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3889 enum uio_seg pathseg, int flag) 3890 { 3891 struct mount *mp; 3892 struct vnode *vp; 3893 struct file *fp; 3894 struct nameidata nd; 3895 cap_rights_t rights; 3896 int error; 3897 3898 fp = NULL; 3899 if (fd != FD_NONE) { 3900 error = getvnode(td, fd, cap_rights_init_one(&rights, 3901 CAP_LOOKUP), &fp); 3902 if (error != 0) 3903 return (error); 3904 } 3905 3906 NDPREINIT(&nd); 3907 restart: 3908 bwillwrite(); 3909 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3910 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3911 pathseg, path, dfd, &cap_unlinkat_rights, td); 3912 if ((error = namei(&nd)) != 0) 3913 goto fdout; 3914 vp = nd.ni_vp; 3915 if (vp->v_type != VDIR) { 3916 error = ENOTDIR; 3917 goto out; 3918 } 3919 /* 3920 * No rmdir "." please. 3921 */ 3922 if (nd.ni_dvp == vp) { 3923 error = EINVAL; 3924 goto out; 3925 } 3926 /* 3927 * The root of a mounted filesystem cannot be deleted. 3928 */ 3929 if (vp->v_vflag & VV_ROOT) { 3930 error = EBUSY; 3931 goto out; 3932 } 3933 3934 if (fp != NULL && fp->f_vnode != vp) { 3935 if (VN_IS_DOOMED(fp->f_vnode)) 3936 error = EBADF; 3937 else 3938 error = EDEADLK; 3939 goto out; 3940 } 3941 3942 #ifdef MAC 3943 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3944 &nd.ni_cnd); 3945 if (error != 0) 3946 goto out; 3947 #endif 3948 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3949 NDFREE(&nd, NDF_ONLY_PNBUF); 3950 vput(vp); 3951 if (nd.ni_dvp == vp) 3952 vrele(nd.ni_dvp); 3953 else 3954 vput(nd.ni_dvp); 3955 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3956 goto fdout; 3957 goto restart; 3958 } 3959 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3960 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3961 vn_finished_write(mp); 3962 out: 3963 NDFREE(&nd, NDF_ONLY_PNBUF); 3964 vput(vp); 3965 if (nd.ni_dvp == vp) 3966 vrele(nd.ni_dvp); 3967 else 3968 vput(nd.ni_dvp); 3969 if (error == ERELOOKUP) 3970 goto restart; 3971 fdout: 3972 if (fp != NULL) 3973 fdrop(fp, td); 3974 return (error); 3975 } 3976 3977 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3978 int 3979 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3980 long *basep, void (*func)(struct freebsd11_dirent *)) 3981 { 3982 struct freebsd11_dirent dstdp; 3983 struct dirent *dp, *edp; 3984 char *dirbuf; 3985 off_t base; 3986 ssize_t resid, ucount; 3987 int error; 3988 3989 /* XXX arbitrary sanity limit on `count'. */ 3990 count = min(count, 64 * 1024); 3991 3992 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3993 3994 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3995 UIO_SYSSPACE); 3996 if (error != 0) 3997 goto done; 3998 if (basep != NULL) 3999 *basep = base; 4000 4001 ucount = 0; 4002 for (dp = (struct dirent *)dirbuf, 4003 edp = (struct dirent *)&dirbuf[count - resid]; 4004 ucount < count && dp < edp; ) { 4005 if (dp->d_reclen == 0) 4006 break; 4007 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4008 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4009 continue; 4010 dstdp.d_type = dp->d_type; 4011 dstdp.d_namlen = dp->d_namlen; 4012 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4013 if (dstdp.d_fileno != dp->d_fileno) { 4014 switch (ino64_trunc_error) { 4015 default: 4016 case 0: 4017 break; 4018 case 1: 4019 error = EOVERFLOW; 4020 goto done; 4021 case 2: 4022 dstdp.d_fileno = UINT32_MAX; 4023 break; 4024 } 4025 } 4026 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4027 ((dp->d_namlen + 1 + 3) &~ 3); 4028 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4029 bzero(dstdp.d_name + dstdp.d_namlen, 4030 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4031 dstdp.d_namlen); 4032 MPASS(dstdp.d_reclen <= dp->d_reclen); 4033 MPASS(ucount + dstdp.d_reclen <= count); 4034 if (func != NULL) 4035 func(&dstdp); 4036 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4037 if (error != 0) 4038 break; 4039 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4040 ucount += dstdp.d_reclen; 4041 } 4042 4043 done: 4044 free(dirbuf, M_TEMP); 4045 if (error == 0) 4046 td->td_retval[0] = ucount; 4047 return (error); 4048 } 4049 #endif /* COMPAT */ 4050 4051 #ifdef COMPAT_43 4052 static void 4053 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4054 { 4055 #if (BYTE_ORDER == LITTLE_ENDIAN) 4056 /* 4057 * The expected low byte of dp->d_namlen is our dp->d_type. 4058 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4059 */ 4060 dp->d_type = dp->d_namlen; 4061 dp->d_namlen = 0; 4062 #else 4063 /* 4064 * The dp->d_type is the high byte of the expected dp->d_namlen, 4065 * so must be zero'ed. 4066 */ 4067 dp->d_type = 0; 4068 #endif 4069 } 4070 4071 /* 4072 * Read a block of directory entries in a filesystem independent format. 4073 */ 4074 #ifndef _SYS_SYSPROTO_H_ 4075 struct ogetdirentries_args { 4076 int fd; 4077 char *buf; 4078 u_int count; 4079 long *basep; 4080 }; 4081 #endif 4082 int 4083 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4084 { 4085 long loff; 4086 int error; 4087 4088 error = kern_ogetdirentries(td, uap, &loff); 4089 if (error == 0) 4090 error = copyout(&loff, uap->basep, sizeof(long)); 4091 return (error); 4092 } 4093 4094 int 4095 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4096 long *ploff) 4097 { 4098 long base; 4099 int error; 4100 4101 /* XXX arbitrary sanity limit on `count'. */ 4102 if (uap->count > 64 * 1024) 4103 return (EINVAL); 4104 4105 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4106 &base, ogetdirentries_cvt); 4107 4108 if (error == 0 && uap->basep != NULL) 4109 error = copyout(&base, uap->basep, sizeof(long)); 4110 4111 return (error); 4112 } 4113 #endif /* COMPAT_43 */ 4114 4115 #if defined(COMPAT_FREEBSD11) 4116 #ifndef _SYS_SYSPROTO_H_ 4117 struct freebsd11_getdirentries_args { 4118 int fd; 4119 char *buf; 4120 u_int count; 4121 long *basep; 4122 }; 4123 #endif 4124 int 4125 freebsd11_getdirentries(struct thread *td, 4126 struct freebsd11_getdirentries_args *uap) 4127 { 4128 long base; 4129 int error; 4130 4131 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4132 &base, NULL); 4133 4134 if (error == 0 && uap->basep != NULL) 4135 error = copyout(&base, uap->basep, sizeof(long)); 4136 return (error); 4137 } 4138 4139 int 4140 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4141 { 4142 struct freebsd11_getdirentries_args ap; 4143 4144 ap.fd = uap->fd; 4145 ap.buf = uap->buf; 4146 ap.count = uap->count; 4147 ap.basep = NULL; 4148 return (freebsd11_getdirentries(td, &ap)); 4149 } 4150 #endif /* COMPAT_FREEBSD11 */ 4151 4152 /* 4153 * Read a block of directory entries in a filesystem independent format. 4154 */ 4155 int 4156 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4157 { 4158 off_t base; 4159 int error; 4160 4161 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4162 NULL, UIO_USERSPACE); 4163 if (error != 0) 4164 return (error); 4165 if (uap->basep != NULL) 4166 error = copyout(&base, uap->basep, sizeof(off_t)); 4167 return (error); 4168 } 4169 4170 int 4171 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4172 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4173 { 4174 struct vnode *vp; 4175 struct file *fp; 4176 struct uio auio; 4177 struct iovec aiov; 4178 off_t loff; 4179 int error, eofflag; 4180 off_t foffset; 4181 4182 AUDIT_ARG_FD(fd); 4183 if (count > IOSIZE_MAX) 4184 return (EINVAL); 4185 auio.uio_resid = count; 4186 error = getvnode(td, fd, &cap_read_rights, &fp); 4187 if (error != 0) 4188 return (error); 4189 if ((fp->f_flag & FREAD) == 0) { 4190 fdrop(fp, td); 4191 return (EBADF); 4192 } 4193 vp = fp->f_vnode; 4194 foffset = foffset_lock(fp, 0); 4195 unionread: 4196 if (vp->v_type != VDIR) { 4197 error = EINVAL; 4198 goto fail; 4199 } 4200 aiov.iov_base = buf; 4201 aiov.iov_len = count; 4202 auio.uio_iov = &aiov; 4203 auio.uio_iovcnt = 1; 4204 auio.uio_rw = UIO_READ; 4205 auio.uio_segflg = bufseg; 4206 auio.uio_td = td; 4207 vn_lock(vp, LK_SHARED | LK_RETRY); 4208 AUDIT_ARG_VNODE1(vp); 4209 loff = auio.uio_offset = foffset; 4210 #ifdef MAC 4211 error = mac_vnode_check_readdir(td->td_ucred, vp); 4212 if (error == 0) 4213 #endif 4214 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4215 NULL); 4216 foffset = auio.uio_offset; 4217 if (error != 0) { 4218 VOP_UNLOCK(vp); 4219 goto fail; 4220 } 4221 if (count == auio.uio_resid && 4222 (vp->v_vflag & VV_ROOT) && 4223 (vp->v_mount->mnt_flag & MNT_UNION)) { 4224 struct vnode *tvp = vp; 4225 4226 vp = vp->v_mount->mnt_vnodecovered; 4227 VREF(vp); 4228 fp->f_vnode = vp; 4229 foffset = 0; 4230 vput(tvp); 4231 goto unionread; 4232 } 4233 VOP_UNLOCK(vp); 4234 *basep = loff; 4235 if (residp != NULL) 4236 *residp = auio.uio_resid; 4237 td->td_retval[0] = count - auio.uio_resid; 4238 fail: 4239 foffset_unlock(fp, foffset, 0); 4240 fdrop(fp, td); 4241 return (error); 4242 } 4243 4244 /* 4245 * Set the mode mask for creation of filesystem nodes. 4246 */ 4247 #ifndef _SYS_SYSPROTO_H_ 4248 struct umask_args { 4249 int newmask; 4250 }; 4251 #endif 4252 int 4253 sys_umask(struct thread *td, struct umask_args *uap) 4254 { 4255 struct pwddesc *pdp; 4256 4257 pdp = td->td_proc->p_pd; 4258 PWDDESC_XLOCK(pdp); 4259 td->td_retval[0] = pdp->pd_cmask; 4260 pdp->pd_cmask = uap->newmask & ALLPERMS; 4261 PWDDESC_XUNLOCK(pdp); 4262 return (0); 4263 } 4264 4265 /* 4266 * Void all references to file by ripping underlying filesystem away from 4267 * vnode. 4268 */ 4269 #ifndef _SYS_SYSPROTO_H_ 4270 struct revoke_args { 4271 char *path; 4272 }; 4273 #endif 4274 int 4275 sys_revoke(struct thread *td, struct revoke_args *uap) 4276 { 4277 struct vnode *vp; 4278 struct vattr vattr; 4279 struct nameidata nd; 4280 int error; 4281 4282 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4283 uap->path, td); 4284 if ((error = namei(&nd)) != 0) 4285 return (error); 4286 vp = nd.ni_vp; 4287 NDFREE_NOTHING(&nd); 4288 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4289 error = EINVAL; 4290 goto out; 4291 } 4292 #ifdef MAC 4293 error = mac_vnode_check_revoke(td->td_ucred, vp); 4294 if (error != 0) 4295 goto out; 4296 #endif 4297 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4298 if (error != 0) 4299 goto out; 4300 if (td->td_ucred->cr_uid != vattr.va_uid) { 4301 error = priv_check(td, PRIV_VFS_ADMIN); 4302 if (error != 0) 4303 goto out; 4304 } 4305 if (devfs_usecount(vp) > 0) 4306 VOP_REVOKE(vp, REVOKEALL); 4307 out: 4308 vput(vp); 4309 return (error); 4310 } 4311 4312 /* 4313 * This variant of getvnode() allows O_PATH files. Caller should 4314 * ensure that returned file and vnode are only used for compatible 4315 * semantics. 4316 */ 4317 int 4318 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4319 struct file **fpp) 4320 { 4321 struct file *fp; 4322 int error; 4323 4324 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); 4325 if (error != 0) 4326 return (error); 4327 4328 /* 4329 * The file could be not of the vnode type, or it may be not 4330 * yet fully initialized, in which case the f_vnode pointer 4331 * may be set, but f_ops is still badfileops. E.g., 4332 * devfs_open() transiently create such situation to 4333 * facilitate csw d_fdopen(). 4334 * 4335 * Dupfdopen() handling in kern_openat() installs the 4336 * half-baked file into the process descriptor table, allowing 4337 * other thread to dereference it. Guard against the race by 4338 * checking f_ops. 4339 */ 4340 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4341 fdrop(fp, td); 4342 return (EINVAL); 4343 } 4344 4345 *fpp = fp; 4346 return (0); 4347 } 4348 4349 /* 4350 * Convert a user file descriptor to a kernel file entry and check 4351 * that, if it is a capability, the correct rights are present. 4352 * A reference on the file entry is held upon returning. 4353 */ 4354 int 4355 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4356 { 4357 int error; 4358 4359 error = getvnode_path(td, fd, rightsp, fpp); 4360 if (__predict_false(error != 0)) 4361 return (error); 4362 4363 /* 4364 * Filter out O_PATH file descriptors, most getvnode() callers 4365 * do not call fo_ methods. 4366 */ 4367 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4368 fdrop(*fpp, td); 4369 error = EBADF; 4370 } 4371 4372 return (error); 4373 } 4374 4375 /* 4376 * Get an (NFS) file handle. 4377 */ 4378 #ifndef _SYS_SYSPROTO_H_ 4379 struct lgetfh_args { 4380 char *fname; 4381 fhandle_t *fhp; 4382 }; 4383 #endif 4384 int 4385 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4386 { 4387 4388 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4389 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4390 } 4391 4392 #ifndef _SYS_SYSPROTO_H_ 4393 struct getfh_args { 4394 char *fname; 4395 fhandle_t *fhp; 4396 }; 4397 #endif 4398 int 4399 sys_getfh(struct thread *td, struct getfh_args *uap) 4400 { 4401 4402 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4403 uap->fhp, UIO_USERSPACE)); 4404 } 4405 4406 /* 4407 * syscall for the rpc.lockd to use to translate an open descriptor into 4408 * a NFS file handle. 4409 * 4410 * warning: do not remove the priv_check() call or this becomes one giant 4411 * security hole. 4412 */ 4413 #ifndef _SYS_SYSPROTO_H_ 4414 struct getfhat_args { 4415 int fd; 4416 char *path; 4417 fhandle_t *fhp; 4418 int flags; 4419 }; 4420 #endif 4421 int 4422 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4423 { 4424 4425 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4426 uap->fhp, UIO_USERSPACE)); 4427 } 4428 4429 int 4430 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4431 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4432 { 4433 struct nameidata nd; 4434 fhandle_t fh; 4435 struct vnode *vp; 4436 int error; 4437 4438 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4439 return (EINVAL); 4440 error = priv_check(td, PRIV_VFS_GETFH); 4441 if (error != 0) 4442 return (error); 4443 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4444 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4445 fd, td); 4446 error = namei(&nd); 4447 if (error != 0) 4448 return (error); 4449 NDFREE_NOTHING(&nd); 4450 vp = nd.ni_vp; 4451 bzero(&fh, sizeof(fh)); 4452 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4453 error = VOP_VPTOFH(vp, &fh.fh_fid); 4454 vput(vp); 4455 if (error == 0) { 4456 if (fhseg == UIO_USERSPACE) 4457 error = copyout(&fh, fhp, sizeof (fh)); 4458 else 4459 memcpy(fhp, &fh, sizeof(fh)); 4460 } 4461 return (error); 4462 } 4463 4464 #ifndef _SYS_SYSPROTO_H_ 4465 struct fhlink_args { 4466 fhandle_t *fhp; 4467 const char *to; 4468 }; 4469 #endif 4470 int 4471 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4472 { 4473 4474 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4475 } 4476 4477 #ifndef _SYS_SYSPROTO_H_ 4478 struct fhlinkat_args { 4479 fhandle_t *fhp; 4480 int tofd; 4481 const char *to; 4482 }; 4483 #endif 4484 int 4485 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4486 { 4487 4488 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4489 } 4490 4491 static int 4492 kern_fhlinkat(struct thread *td, int fd, const char *path, 4493 enum uio_seg pathseg, fhandle_t *fhp) 4494 { 4495 fhandle_t fh; 4496 struct mount *mp; 4497 struct vnode *vp; 4498 int error; 4499 4500 error = priv_check(td, PRIV_VFS_GETFH); 4501 if (error != 0) 4502 return (error); 4503 error = copyin(fhp, &fh, sizeof(fh)); 4504 if (error != 0) 4505 return (error); 4506 do { 4507 bwillwrite(); 4508 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4509 return (ESTALE); 4510 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4511 vfs_unbusy(mp); 4512 if (error != 0) 4513 return (error); 4514 VOP_UNLOCK(vp); 4515 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4516 } while (error == EAGAIN || error == ERELOOKUP); 4517 return (error); 4518 } 4519 4520 #ifndef _SYS_SYSPROTO_H_ 4521 struct fhreadlink_args { 4522 fhandle_t *fhp; 4523 char *buf; 4524 size_t bufsize; 4525 }; 4526 #endif 4527 int 4528 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4529 { 4530 fhandle_t fh; 4531 struct mount *mp; 4532 struct vnode *vp; 4533 int error; 4534 4535 error = priv_check(td, PRIV_VFS_GETFH); 4536 if (error != 0) 4537 return (error); 4538 if (uap->bufsize > IOSIZE_MAX) 4539 return (EINVAL); 4540 error = copyin(uap->fhp, &fh, sizeof(fh)); 4541 if (error != 0) 4542 return (error); 4543 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4544 return (ESTALE); 4545 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4546 vfs_unbusy(mp); 4547 if (error != 0) 4548 return (error); 4549 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4550 vput(vp); 4551 return (error); 4552 } 4553 4554 /* 4555 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4556 * open descriptor. 4557 * 4558 * warning: do not remove the priv_check() call or this becomes one giant 4559 * security hole. 4560 */ 4561 #ifndef _SYS_SYSPROTO_H_ 4562 struct fhopen_args { 4563 const struct fhandle *u_fhp; 4564 int flags; 4565 }; 4566 #endif 4567 int 4568 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4569 { 4570 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4571 } 4572 4573 int 4574 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4575 { 4576 struct mount *mp; 4577 struct vnode *vp; 4578 struct fhandle fhp; 4579 struct file *fp; 4580 int fmode, error; 4581 int indx; 4582 4583 error = priv_check(td, PRIV_VFS_FHOPEN); 4584 if (error != 0) 4585 return (error); 4586 indx = -1; 4587 fmode = FFLAGS(flags); 4588 /* why not allow a non-read/write open for our lockd? */ 4589 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4590 return (EINVAL); 4591 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4592 if (error != 0) 4593 return(error); 4594 /* find the mount point */ 4595 mp = vfs_busyfs(&fhp.fh_fsid); 4596 if (mp == NULL) 4597 return (ESTALE); 4598 /* now give me my vnode, it gets returned to me locked */ 4599 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4600 vfs_unbusy(mp); 4601 if (error != 0) 4602 return (error); 4603 4604 error = falloc_noinstall(td, &fp); 4605 if (error != 0) { 4606 vput(vp); 4607 return (error); 4608 } 4609 /* 4610 * An extra reference on `fp' has been held for us by 4611 * falloc_noinstall(). 4612 */ 4613 4614 #ifdef INVARIANTS 4615 td->td_dupfd = -1; 4616 #endif 4617 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4618 if (error != 0) { 4619 KASSERT(fp->f_ops == &badfileops, 4620 ("VOP_OPEN in fhopen() set f_ops")); 4621 KASSERT(td->td_dupfd < 0, 4622 ("fhopen() encountered fdopen()")); 4623 4624 vput(vp); 4625 goto bad; 4626 } 4627 #ifdef INVARIANTS 4628 td->td_dupfd = 0; 4629 #endif 4630 fp->f_vnode = vp; 4631 finit_vnode(fp, fmode, NULL, &vnops); 4632 VOP_UNLOCK(vp); 4633 if ((fmode & O_TRUNC) != 0) { 4634 error = fo_truncate(fp, 0, td->td_ucred, td); 4635 if (error != 0) 4636 goto bad; 4637 } 4638 4639 error = finstall(td, fp, &indx, fmode, NULL); 4640 bad: 4641 fdrop(fp, td); 4642 td->td_retval[0] = indx; 4643 return (error); 4644 } 4645 4646 /* 4647 * Stat an (NFS) file handle. 4648 */ 4649 #ifndef _SYS_SYSPROTO_H_ 4650 struct fhstat_args { 4651 struct fhandle *u_fhp; 4652 struct stat *sb; 4653 }; 4654 #endif 4655 int 4656 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4657 { 4658 struct stat sb; 4659 struct fhandle fh; 4660 int error; 4661 4662 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4663 if (error != 0) 4664 return (error); 4665 error = kern_fhstat(td, fh, &sb); 4666 if (error == 0) 4667 error = copyout(&sb, uap->sb, sizeof(sb)); 4668 return (error); 4669 } 4670 4671 int 4672 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4673 { 4674 struct mount *mp; 4675 struct vnode *vp; 4676 int error; 4677 4678 error = priv_check(td, PRIV_VFS_FHSTAT); 4679 if (error != 0) 4680 return (error); 4681 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4682 return (ESTALE); 4683 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4684 vfs_unbusy(mp); 4685 if (error != 0) 4686 return (error); 4687 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4688 vput(vp); 4689 return (error); 4690 } 4691 4692 /* 4693 * Implement fstatfs() for (NFS) file handles. 4694 */ 4695 #ifndef _SYS_SYSPROTO_H_ 4696 struct fhstatfs_args { 4697 struct fhandle *u_fhp; 4698 struct statfs *buf; 4699 }; 4700 #endif 4701 int 4702 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4703 { 4704 struct statfs *sfp; 4705 fhandle_t fh; 4706 int error; 4707 4708 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4709 if (error != 0) 4710 return (error); 4711 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4712 error = kern_fhstatfs(td, fh, sfp); 4713 if (error == 0) 4714 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4715 free(sfp, M_STATFS); 4716 return (error); 4717 } 4718 4719 int 4720 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4721 { 4722 struct mount *mp; 4723 struct vnode *vp; 4724 int error; 4725 4726 error = priv_check(td, PRIV_VFS_FHSTATFS); 4727 if (error != 0) 4728 return (error); 4729 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4730 return (ESTALE); 4731 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4732 if (error != 0) { 4733 vfs_unbusy(mp); 4734 return (error); 4735 } 4736 vput(vp); 4737 error = prison_canseemount(td->td_ucred, mp); 4738 if (error != 0) 4739 goto out; 4740 #ifdef MAC 4741 error = mac_mount_check_stat(td->td_ucred, mp); 4742 if (error != 0) 4743 goto out; 4744 #endif 4745 error = VFS_STATFS(mp, buf); 4746 out: 4747 vfs_unbusy(mp); 4748 return (error); 4749 } 4750 4751 /* 4752 * Unlike madvise(2), we do not make a best effort to remember every 4753 * possible caching hint. Instead, we remember the last setting with 4754 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4755 * region of any current setting. 4756 */ 4757 int 4758 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4759 int advice) 4760 { 4761 struct fadvise_info *fa, *new; 4762 struct file *fp; 4763 struct vnode *vp; 4764 off_t end; 4765 int error; 4766 4767 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4768 return (EINVAL); 4769 AUDIT_ARG_VALUE(advice); 4770 switch (advice) { 4771 case POSIX_FADV_SEQUENTIAL: 4772 case POSIX_FADV_RANDOM: 4773 case POSIX_FADV_NOREUSE: 4774 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4775 break; 4776 case POSIX_FADV_NORMAL: 4777 case POSIX_FADV_WILLNEED: 4778 case POSIX_FADV_DONTNEED: 4779 new = NULL; 4780 break; 4781 default: 4782 return (EINVAL); 4783 } 4784 /* XXX: CAP_POSIX_FADVISE? */ 4785 AUDIT_ARG_FD(fd); 4786 error = fget(td, fd, &cap_no_rights, &fp); 4787 if (error != 0) 4788 goto out; 4789 AUDIT_ARG_FILE(td->td_proc, fp); 4790 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4791 error = ESPIPE; 4792 goto out; 4793 } 4794 if (fp->f_type != DTYPE_VNODE) { 4795 error = ENODEV; 4796 goto out; 4797 } 4798 vp = fp->f_vnode; 4799 if (vp->v_type != VREG) { 4800 error = ENODEV; 4801 goto out; 4802 } 4803 if (len == 0) 4804 end = OFF_MAX; 4805 else 4806 end = offset + len - 1; 4807 switch (advice) { 4808 case POSIX_FADV_SEQUENTIAL: 4809 case POSIX_FADV_RANDOM: 4810 case POSIX_FADV_NOREUSE: 4811 /* 4812 * Try to merge any existing non-standard region with 4813 * this new region if possible, otherwise create a new 4814 * non-standard region for this request. 4815 */ 4816 mtx_pool_lock(mtxpool_sleep, fp); 4817 fa = fp->f_advice; 4818 if (fa != NULL && fa->fa_advice == advice && 4819 ((fa->fa_start <= end && fa->fa_end >= offset) || 4820 (end != OFF_MAX && fa->fa_start == end + 1) || 4821 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4822 if (offset < fa->fa_start) 4823 fa->fa_start = offset; 4824 if (end > fa->fa_end) 4825 fa->fa_end = end; 4826 } else { 4827 new->fa_advice = advice; 4828 new->fa_start = offset; 4829 new->fa_end = end; 4830 fp->f_advice = new; 4831 new = fa; 4832 } 4833 mtx_pool_unlock(mtxpool_sleep, fp); 4834 break; 4835 case POSIX_FADV_NORMAL: 4836 /* 4837 * If a the "normal" region overlaps with an existing 4838 * non-standard region, trim or remove the 4839 * non-standard region. 4840 */ 4841 mtx_pool_lock(mtxpool_sleep, fp); 4842 fa = fp->f_advice; 4843 if (fa != NULL) { 4844 if (offset <= fa->fa_start && end >= fa->fa_end) { 4845 new = fa; 4846 fp->f_advice = NULL; 4847 } else if (offset <= fa->fa_start && 4848 end >= fa->fa_start) 4849 fa->fa_start = end + 1; 4850 else if (offset <= fa->fa_end && end >= fa->fa_end) 4851 fa->fa_end = offset - 1; 4852 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4853 /* 4854 * If the "normal" region is a middle 4855 * portion of the existing 4856 * non-standard region, just remove 4857 * the whole thing rather than picking 4858 * one side or the other to 4859 * preserve. 4860 */ 4861 new = fa; 4862 fp->f_advice = NULL; 4863 } 4864 } 4865 mtx_pool_unlock(mtxpool_sleep, fp); 4866 break; 4867 case POSIX_FADV_WILLNEED: 4868 case POSIX_FADV_DONTNEED: 4869 error = VOP_ADVISE(vp, offset, end, advice); 4870 break; 4871 } 4872 out: 4873 if (fp != NULL) 4874 fdrop(fp, td); 4875 free(new, M_FADVISE); 4876 return (error); 4877 } 4878 4879 int 4880 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4881 { 4882 int error; 4883 4884 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4885 uap->advice); 4886 return (kern_posix_error(td, error)); 4887 } 4888 4889 int 4890 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4891 off_t *outoffp, size_t len, unsigned int flags) 4892 { 4893 struct file *infp, *outfp; 4894 struct vnode *invp, *outvp; 4895 int error; 4896 size_t retlen; 4897 void *rl_rcookie, *rl_wcookie; 4898 off_t savinoff, savoutoff; 4899 4900 infp = outfp = NULL; 4901 rl_rcookie = rl_wcookie = NULL; 4902 savinoff = -1; 4903 error = 0; 4904 retlen = 0; 4905 4906 if (flags != 0) { 4907 error = EINVAL; 4908 goto out; 4909 } 4910 if (len > SSIZE_MAX) 4911 /* 4912 * Although the len argument is size_t, the return argument 4913 * is ssize_t (which is signed). Therefore a size that won't 4914 * fit in ssize_t can't be returned. 4915 */ 4916 len = SSIZE_MAX; 4917 4918 /* Get the file structures for the file descriptors. */ 4919 error = fget_read(td, infd, &cap_read_rights, &infp); 4920 if (error != 0) 4921 goto out; 4922 if (infp->f_ops == &badfileops) { 4923 error = EBADF; 4924 goto out; 4925 } 4926 if (infp->f_vnode == NULL) { 4927 error = EINVAL; 4928 goto out; 4929 } 4930 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4931 if (error != 0) 4932 goto out; 4933 if (outfp->f_ops == &badfileops) { 4934 error = EBADF; 4935 goto out; 4936 } 4937 if (outfp->f_vnode == NULL) { 4938 error = EINVAL; 4939 goto out; 4940 } 4941 4942 /* Set the offset pointers to the correct place. */ 4943 if (inoffp == NULL) 4944 inoffp = &infp->f_offset; 4945 if (outoffp == NULL) 4946 outoffp = &outfp->f_offset; 4947 savinoff = *inoffp; 4948 savoutoff = *outoffp; 4949 4950 invp = infp->f_vnode; 4951 outvp = outfp->f_vnode; 4952 /* Sanity check the f_flag bits. */ 4953 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4954 (infp->f_flag & FREAD) == 0) { 4955 error = EBADF; 4956 goto out; 4957 } 4958 4959 /* If len == 0, just return 0. */ 4960 if (len == 0) 4961 goto out; 4962 4963 /* 4964 * If infp and outfp refer to the same file, the byte ranges cannot 4965 * overlap. 4966 */ 4967 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4968 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4969 savinoff))) { 4970 error = EINVAL; 4971 goto out; 4972 } 4973 4974 /* Range lock the byte ranges for both invp and outvp. */ 4975 for (;;) { 4976 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4977 len); 4978 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4979 len); 4980 if (rl_rcookie != NULL) 4981 break; 4982 vn_rangelock_unlock(outvp, rl_wcookie); 4983 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4984 vn_rangelock_unlock(invp, rl_rcookie); 4985 } 4986 4987 retlen = len; 4988 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4989 flags, infp->f_cred, outfp->f_cred, td); 4990 out: 4991 if (rl_rcookie != NULL) 4992 vn_rangelock_unlock(invp, rl_rcookie); 4993 if (rl_wcookie != NULL) 4994 vn_rangelock_unlock(outvp, rl_wcookie); 4995 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4996 *inoffp = savinoff; 4997 *outoffp = savoutoff; 4998 } 4999 if (outfp != NULL) 5000 fdrop(outfp, td); 5001 if (infp != NULL) 5002 fdrop(infp, td); 5003 td->td_retval[0] = retlen; 5004 return (error); 5005 } 5006 5007 int 5008 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5009 { 5010 off_t inoff, outoff, *inoffp, *outoffp; 5011 int error; 5012 5013 inoffp = outoffp = NULL; 5014 if (uap->inoffp != NULL) { 5015 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5016 if (error != 0) 5017 return (error); 5018 inoffp = &inoff; 5019 } 5020 if (uap->outoffp != NULL) { 5021 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5022 if (error != 0) 5023 return (error); 5024 outoffp = &outoff; 5025 } 5026 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5027 outoffp, uap->len, uap->flags); 5028 if (error == 0 && uap->inoffp != NULL) 5029 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5030 if (error == 0 && uap->outoffp != NULL) 5031 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5032 return (error); 5033 } 5034