1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_VFSSYSCALL 41 #include <sys/systm.h> 42 #ifdef COMPAT_FREEBSD11 43 #include <sys/abi_compat.h> 44 #endif 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/capsicum.h> 48 #include <sys/disk.h> 49 #include <sys/dirent.h> 50 #include <sys/exterrvar.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/filio.h> 55 #include <sys/jail.h> 56 #include <sys/kernel.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/malloc.h> 63 #include <sys/mount.h> 64 #include <sys/mutex.h> 65 #include <sys/namei.h> 66 #include <sys/priv.h> 67 #include <sys/proc.h> 68 #include <sys/rwlock.h> 69 #include <sys/sdt.h> 70 #include <sys/stat.h> 71 #include <sys/stdarg.h> 72 #include <sys/sx.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #include <sys/sysproto.h> 76 #include <sys/unistd.h> 77 #include <sys/vnode.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vnode_pager.h> 86 #include <vm/uma.h> 87 88 #include <fs/devfs/devfs.h> 89 90 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 91 92 static int kern_chflagsat(struct thread *td, int fd, const char *path, 93 enum uio_seg pathseg, u_long flags, int atflag); 94 static int setfflags(struct thread *td, struct vnode *, u_long); 95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 96 static int getutimens(const struct timespec *, enum uio_seg, 97 struct timespec *, int *); 98 static int setutimes(struct thread *td, struct vnode *, 99 const struct timespec *, int, int); 100 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 101 struct thread *td); 102 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, fhandle_t *fhp); 104 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 105 size_t count, struct thread *td); 106 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 107 const char *path, enum uio_seg segflag); 108 109 uint64_t 110 at2cnpflags(u_int at_flags, u_int mask) 111 { 112 uint64_t res; 113 114 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 115 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 116 117 res = 0; 118 at_flags &= mask; 119 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 120 res |= RBENEATH; 121 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 122 res |= FOLLOW; 123 /* NOFOLLOW is pseudo flag */ 124 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 125 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 126 FOLLOW; 127 } 128 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 129 res |= EMPTYPATH; 130 return (res); 131 } 132 133 int 134 kern_sync(struct thread *td) 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_periodic(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Sync each mounted filesystem. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct sync_args { 166 int dummy; 167 }; 168 #endif 169 /* ARGSUSED */ 170 int 171 sys_sync(struct thread *td, struct sync_args *uap) 172 { 173 174 return (kern_sync(td)); 175 } 176 177 /* 178 * Change filesystem quotas. 179 */ 180 #ifndef _SYS_SYSPROTO_H_ 181 struct quotactl_args { 182 char *path; 183 int cmd; 184 int uid; 185 caddr_t arg; 186 }; 187 #endif 188 int 189 sys_quotactl(struct thread *td, struct quotactl_args *uap) 190 { 191 struct mount *mp; 192 struct nameidata nd; 193 int error; 194 bool mp_busy; 195 196 AUDIT_ARG_CMD(uap->cmd); 197 AUDIT_ARG_UID(uap->uid); 198 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 199 return (EPERM); 200 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 201 uap->path); 202 if ((error = namei(&nd)) != 0) 203 return (error); 204 NDFREE_PNBUF(&nd); 205 mp = nd.ni_vp->v_mount; 206 vfs_ref(mp); 207 vput(nd.ni_vp); 208 error = vfs_busy(mp, 0); 209 if (error != 0) { 210 vfs_rel(mp); 211 return (error); 212 } 213 mp_busy = true; 214 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 215 216 /* 217 * Since quota on/off operations typically need to open quota 218 * files, the implementation may need to unbusy the mount point 219 * before calling into namei. Otherwise, unmount might be 220 * started between two vfs_busy() invocations (first is ours, 221 * second is from mount point cross-walk code in lookup()), 222 * causing deadlock. 223 * 224 * Avoid unbusying mp if the implementation indicates it has 225 * already done so. 226 */ 227 if (mp_busy) 228 vfs_unbusy(mp); 229 vfs_rel(mp); 230 return (error); 231 } 232 233 /* 234 * Used by statfs conversion routines to scale the block size up if 235 * necessary so that all of the block counts are <= 'max_size'. Note 236 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 237 * value of 'n'. 238 */ 239 void 240 statfs_scale_blocks(struct statfs *sf, long max_size) 241 { 242 uint64_t count; 243 int shift; 244 245 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 246 247 /* 248 * Attempt to scale the block counts to give a more accurate 249 * overview to userland of the ratio of free space to used 250 * space. To do this, find the largest block count and compute 251 * a divisor that lets it fit into a signed integer <= max_size. 252 */ 253 if (sf->f_bavail < 0) 254 count = -sf->f_bavail; 255 else 256 count = sf->f_bavail; 257 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 258 if (count <= max_size) 259 return; 260 261 count >>= flsl(max_size); 262 shift = 0; 263 while (count > 0) { 264 shift++; 265 count >>=1; 266 } 267 268 sf->f_bsize <<= shift; 269 sf->f_blocks >>= shift; 270 sf->f_bfree >>= shift; 271 sf->f_bavail >>= shift; 272 } 273 274 static int 275 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 276 { 277 int error; 278 279 if (mp == NULL) 280 return (EBADF); 281 error = vfs_busy(mp, 0); 282 vfs_rel(mp); 283 if (error != 0) 284 return (error); 285 #ifdef MAC 286 error = mac_mount_check_stat(td->td_ucred, mp); 287 if (error != 0) 288 goto out; 289 #endif 290 error = VFS_STATFS(mp, buf); 291 if (error != 0) 292 goto out; 293 if (priv_check_cred_vfs_generation(td->td_ucred)) { 294 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 295 prison_enforce_statfs(td->td_ucred, mp, buf); 296 } 297 out: 298 vfs_unbusy(mp); 299 return (error); 300 } 301 302 /* 303 * Get filesystem statistics. 304 */ 305 #ifndef _SYS_SYSPROTO_H_ 306 struct statfs_args { 307 char *path; 308 struct statfs *buf; 309 }; 310 #endif 311 int 312 sys_statfs(struct thread *td, struct statfs_args *uap) 313 { 314 struct statfs *sfp; 315 int error; 316 317 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 318 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 319 if (error == 0) 320 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 321 free(sfp, M_STATFS); 322 return (error); 323 } 324 325 int 326 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 327 struct statfs *buf) 328 { 329 struct mount *mp; 330 struct nameidata nd; 331 int error; 332 333 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 334 error = namei(&nd); 335 if (error != 0) 336 return (error); 337 NDFREE_PNBUF(&nd); 338 mp = vfs_ref_from_vp(nd.ni_vp); 339 vrele(nd.ni_vp); 340 return (kern_do_statfs(td, mp, buf)); 341 } 342 343 /* 344 * Get filesystem statistics. 345 */ 346 #ifndef _SYS_SYSPROTO_H_ 347 struct fstatfs_args { 348 int fd; 349 struct statfs *buf; 350 }; 351 #endif 352 int 353 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 354 { 355 struct statfs *sfp; 356 int error; 357 358 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 359 error = kern_fstatfs(td, uap->fd, sfp); 360 if (error == 0) 361 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 362 free(sfp, M_STATFS); 363 return (error); 364 } 365 366 int 367 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 368 { 369 struct file *fp; 370 struct mount *mp; 371 struct vnode *vp; 372 int error; 373 374 AUDIT_ARG_FD(fd); 375 error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp); 376 if (error != 0) 377 return (error); 378 vp = fp->f_vnode; 379 #ifdef AUDIT 380 if (AUDITING_TD(td)) { 381 vn_lock(vp, LK_SHARED | LK_RETRY); 382 AUDIT_ARG_VNODE1(vp); 383 VOP_UNLOCK(vp); 384 } 385 #endif 386 mp = vfs_ref_from_vp(vp); 387 fdrop(fp, td); 388 return (kern_do_statfs(td, mp, buf)); 389 } 390 391 /* 392 * Get statistics on all filesystems. 393 */ 394 #ifndef _SYS_SYSPROTO_H_ 395 struct getfsstat_args { 396 struct statfs *buf; 397 long bufsize; 398 int mode; 399 }; 400 #endif 401 int 402 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 403 { 404 size_t count; 405 int error; 406 407 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 408 return (EINVAL); 409 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 410 UIO_USERSPACE, uap->mode); 411 if (error == 0) 412 td->td_retval[0] = count; 413 return (error); 414 } 415 416 /* 417 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 418 * The caller is responsible for freeing memory which will be allocated 419 * in '*buf'. 420 */ 421 int 422 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 423 size_t *countp, enum uio_seg bufseg, int mode) 424 { 425 struct mount *mp, *nmp; 426 struct statfs *sfsp, *sp, *sptmp, *tofree; 427 size_t count, maxcount; 428 int error; 429 430 switch (mode) { 431 case MNT_WAIT: 432 case MNT_NOWAIT: 433 break; 434 default: 435 if (bufseg == UIO_SYSSPACE) 436 *buf = NULL; 437 return (EINVAL); 438 } 439 restart: 440 maxcount = bufsize / sizeof(struct statfs); 441 if (bufsize == 0) { 442 sfsp = NULL; 443 tofree = NULL; 444 } else if (bufseg == UIO_USERSPACE) { 445 sfsp = *buf; 446 tofree = NULL; 447 } else /* if (bufseg == UIO_SYSSPACE) */ { 448 count = 0; 449 mtx_lock(&mountlist_mtx); 450 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 451 count++; 452 } 453 mtx_unlock(&mountlist_mtx); 454 if (maxcount > count) 455 maxcount = count; 456 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 457 M_STATFS, M_WAITOK); 458 } 459 460 count = 0; 461 462 /* 463 * If there is no target buffer they only want the count. 464 * 465 * This could be TAILQ_FOREACH but it is open-coded to match the original 466 * code below. 467 */ 468 if (sfsp == NULL) { 469 mtx_lock(&mountlist_mtx); 470 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 471 if (prison_canseemount(td->td_ucred, mp) != 0) { 472 nmp = TAILQ_NEXT(mp, mnt_list); 473 continue; 474 } 475 #ifdef MAC 476 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 477 nmp = TAILQ_NEXT(mp, mnt_list); 478 continue; 479 } 480 #endif 481 count++; 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 } 484 mtx_unlock(&mountlist_mtx); 485 *countp = count; 486 return (0); 487 } 488 489 /* 490 * They want the entire thing. 491 * 492 * Short-circuit the corner case of no room for anything, avoids 493 * relocking below. 494 */ 495 if (maxcount < 1) { 496 goto out; 497 } 498 499 mtx_lock(&mountlist_mtx); 500 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 501 if (prison_canseemount(td->td_ucred, mp) != 0) { 502 nmp = TAILQ_NEXT(mp, mnt_list); 503 continue; 504 } 505 #ifdef MAC 506 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 507 nmp = TAILQ_NEXT(mp, mnt_list); 508 continue; 509 } 510 #endif 511 if (mode == MNT_WAIT) { 512 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 513 /* 514 * If vfs_busy() failed, and MBF_NOWAIT 515 * wasn't passed, then the mp is gone. 516 * Furthermore, because of MBF_MNTLSTLOCK, 517 * the mountlist_mtx was dropped. We have 518 * no other choice than to start over. 519 */ 520 mtx_unlock(&mountlist_mtx); 521 free(tofree, M_STATFS); 522 goto restart; 523 } 524 } else { 525 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 526 nmp = TAILQ_NEXT(mp, mnt_list); 527 continue; 528 } 529 } 530 sp = &mp->mnt_stat; 531 /* 532 * If MNT_NOWAIT is specified, do not refresh 533 * the fsstat cache. 534 */ 535 if (mode != MNT_NOWAIT) { 536 error = VFS_STATFS(mp, sp); 537 if (error != 0) { 538 mtx_lock(&mountlist_mtx); 539 nmp = TAILQ_NEXT(mp, mnt_list); 540 vfs_unbusy(mp); 541 continue; 542 } 543 } 544 if (priv_check_cred_vfs_generation(td->td_ucred)) { 545 sptmp = malloc(sizeof(struct statfs), M_STATFS, 546 M_WAITOK); 547 *sptmp = *sp; 548 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 549 prison_enforce_statfs(td->td_ucred, mp, sptmp); 550 sp = sptmp; 551 } else 552 sptmp = NULL; 553 if (bufseg == UIO_SYSSPACE) { 554 bcopy(sp, sfsp, sizeof(*sp)); 555 free(sptmp, M_STATFS); 556 } else /* if (bufseg == UIO_USERSPACE) */ { 557 error = copyout(sp, sfsp, sizeof(*sp)); 558 free(sptmp, M_STATFS); 559 if (error != 0) { 560 vfs_unbusy(mp); 561 return (error); 562 } 563 } 564 sfsp++; 565 count++; 566 567 if (count == maxcount) { 568 vfs_unbusy(mp); 569 goto out; 570 } 571 572 mtx_lock(&mountlist_mtx); 573 nmp = TAILQ_NEXT(mp, mnt_list); 574 vfs_unbusy(mp); 575 } 576 mtx_unlock(&mountlist_mtx); 577 out: 578 *countp = count; 579 return (0); 580 } 581 582 #ifdef COMPAT_FREEBSD4 583 /* 584 * Get old format filesystem statistics. 585 */ 586 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 587 588 #ifndef _SYS_SYSPROTO_H_ 589 struct freebsd4_statfs_args { 590 char *path; 591 struct ostatfs *buf; 592 }; 593 #endif 594 int 595 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 596 { 597 struct ostatfs osb; 598 struct statfs *sfp; 599 int error; 600 601 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 602 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 603 if (error == 0) { 604 freebsd4_cvtstatfs(sfp, &osb); 605 error = copyout(&osb, uap->buf, sizeof(osb)); 606 } 607 free(sfp, M_STATFS); 608 return (error); 609 } 610 611 /* 612 * Get filesystem statistics. 613 */ 614 #ifndef _SYS_SYSPROTO_H_ 615 struct freebsd4_fstatfs_args { 616 int fd; 617 struct ostatfs *buf; 618 }; 619 #endif 620 int 621 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 622 { 623 struct ostatfs osb; 624 struct statfs *sfp; 625 int error; 626 627 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 628 error = kern_fstatfs(td, uap->fd, sfp); 629 if (error == 0) { 630 freebsd4_cvtstatfs(sfp, &osb); 631 error = copyout(&osb, uap->buf, sizeof(osb)); 632 } 633 free(sfp, M_STATFS); 634 return (error); 635 } 636 637 /* 638 * Get statistics on all filesystems. 639 */ 640 #ifndef _SYS_SYSPROTO_H_ 641 struct freebsd4_getfsstat_args { 642 struct ostatfs *buf; 643 long bufsize; 644 int mode; 645 }; 646 #endif 647 int 648 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 649 { 650 struct statfs *buf, *sp; 651 struct ostatfs osb; 652 size_t count, size; 653 int error; 654 655 if (uap->bufsize < 0) 656 return (EINVAL); 657 count = uap->bufsize / sizeof(struct ostatfs); 658 if (count > SIZE_MAX / sizeof(struct statfs)) 659 return (EINVAL); 660 size = count * sizeof(struct statfs); 661 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 662 uap->mode); 663 if (error == 0) 664 td->td_retval[0] = count; 665 if (size != 0) { 666 sp = buf; 667 while (count != 0 && error == 0) { 668 freebsd4_cvtstatfs(sp, &osb); 669 error = copyout(&osb, uap->buf, sizeof(osb)); 670 sp++; 671 uap->buf++; 672 count--; 673 } 674 free(buf, M_STATFS); 675 } 676 return (error); 677 } 678 679 /* 680 * Implement fstatfs() for (NFS) file handles. 681 */ 682 #ifndef _SYS_SYSPROTO_H_ 683 struct freebsd4_fhstatfs_args { 684 struct fhandle *u_fhp; 685 struct ostatfs *buf; 686 }; 687 #endif 688 int 689 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 690 { 691 struct ostatfs osb; 692 struct statfs *sfp; 693 fhandle_t fh; 694 int error; 695 696 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 697 if (error != 0) 698 return (error); 699 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 700 error = kern_fhstatfs(td, fh, sfp); 701 if (error == 0) { 702 freebsd4_cvtstatfs(sfp, &osb); 703 error = copyout(&osb, uap->buf, sizeof(osb)); 704 } 705 free(sfp, M_STATFS); 706 return (error); 707 } 708 709 /* 710 * Convert a new format statfs structure to an old format statfs structure. 711 */ 712 static void 713 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 714 { 715 716 statfs_scale_blocks(nsp, LONG_MAX); 717 bzero(osp, sizeof(*osp)); 718 osp->f_bsize = nsp->f_bsize; 719 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 720 osp->f_blocks = nsp->f_blocks; 721 osp->f_bfree = nsp->f_bfree; 722 osp->f_bavail = nsp->f_bavail; 723 osp->f_files = MIN(nsp->f_files, LONG_MAX); 724 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 725 osp->f_owner = nsp->f_owner; 726 osp->f_type = nsp->f_type; 727 osp->f_flags = nsp->f_flags; 728 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 729 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 730 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 731 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 732 strlcpy(osp->f_fstypename, nsp->f_fstypename, 733 MIN(MFSNAMELEN, OMFSNAMELEN)); 734 strlcpy(osp->f_mntonname, nsp->f_mntonname, 735 MIN(MNAMELEN, OMNAMELEN)); 736 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 737 MIN(MNAMELEN, OMNAMELEN)); 738 osp->f_fsid = nsp->f_fsid; 739 } 740 #endif /* COMPAT_FREEBSD4 */ 741 742 #if defined(COMPAT_FREEBSD11) 743 /* 744 * Get old format filesystem statistics. 745 */ 746 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 747 748 int 749 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 750 { 751 struct freebsd11_statfs osb; 752 struct statfs *sfp; 753 int error; 754 755 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 756 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 757 if (error == 0) { 758 freebsd11_cvtstatfs(sfp, &osb); 759 error = copyout(&osb, uap->buf, sizeof(osb)); 760 } 761 free(sfp, M_STATFS); 762 return (error); 763 } 764 765 /* 766 * Get filesystem statistics. 767 */ 768 int 769 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 770 { 771 struct freebsd11_statfs osb; 772 struct statfs *sfp; 773 int error; 774 775 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 776 error = kern_fstatfs(td, uap->fd, sfp); 777 if (error == 0) { 778 freebsd11_cvtstatfs(sfp, &osb); 779 error = copyout(&osb, uap->buf, sizeof(osb)); 780 } 781 free(sfp, M_STATFS); 782 return (error); 783 } 784 785 /* 786 * Get statistics on all filesystems. 787 */ 788 int 789 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 790 { 791 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 792 } 793 794 int 795 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 796 long bufsize, int mode) 797 { 798 struct freebsd11_statfs osb; 799 struct statfs *buf, *sp; 800 size_t count, size; 801 int error; 802 803 if (bufsize < 0) 804 return (EINVAL); 805 806 count = bufsize / sizeof(struct ostatfs); 807 size = count * sizeof(struct statfs); 808 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 809 if (error == 0) 810 td->td_retval[0] = count; 811 if (size > 0) { 812 sp = buf; 813 while (count > 0 && error == 0) { 814 freebsd11_cvtstatfs(sp, &osb); 815 error = copyout(&osb, ubuf, sizeof(osb)); 816 sp++; 817 ubuf++; 818 count--; 819 } 820 free(buf, M_STATFS); 821 } 822 return (error); 823 } 824 825 /* 826 * Implement fstatfs() for (NFS) file handles. 827 */ 828 int 829 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 830 { 831 struct freebsd11_statfs osb; 832 struct statfs *sfp; 833 fhandle_t fh; 834 int error; 835 836 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 837 if (error) 838 return (error); 839 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 840 error = kern_fhstatfs(td, fh, sfp); 841 if (error == 0) { 842 freebsd11_cvtstatfs(sfp, &osb); 843 error = copyout(&osb, uap->buf, sizeof(osb)); 844 } 845 free(sfp, M_STATFS); 846 return (error); 847 } 848 849 /* 850 * Convert a new format statfs structure to an old format statfs structure. 851 */ 852 static void 853 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 854 { 855 856 bzero(osp, sizeof(*osp)); 857 osp->f_version = FREEBSD11_STATFS_VERSION; 858 osp->f_type = nsp->f_type; 859 osp->f_flags = nsp->f_flags; 860 osp->f_bsize = nsp->f_bsize; 861 osp->f_iosize = nsp->f_iosize; 862 osp->f_blocks = nsp->f_blocks; 863 osp->f_bfree = nsp->f_bfree; 864 osp->f_bavail = nsp->f_bavail; 865 osp->f_files = nsp->f_files; 866 osp->f_ffree = nsp->f_ffree; 867 osp->f_syncwrites = nsp->f_syncwrites; 868 osp->f_asyncwrites = nsp->f_asyncwrites; 869 osp->f_syncreads = nsp->f_syncreads; 870 osp->f_asyncreads = nsp->f_asyncreads; 871 osp->f_namemax = nsp->f_namemax; 872 osp->f_owner = nsp->f_owner; 873 osp->f_fsid = nsp->f_fsid; 874 strlcpy(osp->f_fstypename, nsp->f_fstypename, 875 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 876 strlcpy(osp->f_mntonname, nsp->f_mntonname, 877 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 878 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 879 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 880 } 881 #endif /* COMPAT_FREEBSD11 */ 882 883 /* 884 * Change current working directory to a given file descriptor. 885 */ 886 #ifndef _SYS_SYSPROTO_H_ 887 struct fchdir_args { 888 int fd; 889 }; 890 #endif 891 int 892 sys_fchdir(struct thread *td, struct fchdir_args *uap) 893 { 894 struct vnode *vp, *tdp; 895 struct mount *mp; 896 struct file *fp; 897 int error; 898 uint8_t fdflags; 899 900 AUDIT_ARG_FD(uap->fd); 901 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags, 902 &fp); 903 if (error != 0) 904 return (error); 905 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 906 fdrop(fp, td); 907 return (ENOTCAPABLE); 908 } 909 vp = fp->f_vnode; 910 vrefact(vp); 911 fdrop(fp, td); 912 vn_lock(vp, LK_SHARED | LK_RETRY); 913 AUDIT_ARG_VNODE1(vp); 914 error = change_dir(vp, td); 915 while (!error && (mp = vp->v_mountedhere) != NULL) { 916 if (vfs_busy(mp, 0)) 917 continue; 918 error = VFS_ROOT(mp, LK_SHARED, &tdp); 919 vfs_unbusy(mp); 920 if (error != 0) 921 break; 922 vput(vp); 923 vp = tdp; 924 } 925 if (error != 0) { 926 vput(vp); 927 return (error); 928 } 929 VOP_UNLOCK(vp); 930 pwd_chdir(td, vp); 931 return (0); 932 } 933 934 /* 935 * Change current working directory (``.''). 936 */ 937 #ifndef _SYS_SYSPROTO_H_ 938 struct chdir_args { 939 char *path; 940 }; 941 #endif 942 int 943 sys_chdir(struct thread *td, struct chdir_args *uap) 944 { 945 946 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 947 } 948 949 int 950 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 951 { 952 struct nameidata nd; 953 int error; 954 955 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 956 pathseg, path); 957 if ((error = namei(&nd)) != 0) 958 return (error); 959 if ((error = change_dir(nd.ni_vp, td)) != 0) { 960 vput(nd.ni_vp); 961 NDFREE_PNBUF(&nd); 962 return (error); 963 } 964 VOP_UNLOCK(nd.ni_vp); 965 NDFREE_PNBUF(&nd); 966 pwd_chdir(td, nd.ni_vp); 967 return (0); 968 } 969 970 static int unprivileged_chroot = 0; 971 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 972 &unprivileged_chroot, 0, 973 "Unprivileged processes can use chroot(2)"); 974 975 /* 976 * Takes locked vnode, unlocks it before returning. 977 */ 978 static int 979 kern_chroot(struct thread *td, struct vnode *vp) 980 { 981 struct proc *p; 982 int error; 983 984 error = priv_check(td, PRIV_VFS_CHROOT); 985 if (error != 0) { 986 p = td->td_proc; 987 if (unprivileged_chroot == 0) { 988 error = EXTERROR(EPERM, 989 "security.bsd.unprivileged_chroot sysctl not enabled"); 990 goto e_vunlock; 991 } 992 if ((p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 993 error = EXTERROR(EPERM, 994 "PROC_NO_NEW_PRIVS not enabled"); 995 goto e_vunlock; 996 } 997 } 998 999 error = change_dir(vp, td); 1000 if (error != 0) 1001 goto e_vunlock; 1002 #ifdef MAC 1003 error = mac_vnode_check_chroot(td->td_ucred, vp); 1004 if (error != 0) 1005 goto e_vunlock; 1006 #endif 1007 VOP_UNLOCK(vp); 1008 error = pwd_chroot(td, vp); 1009 vrele(vp); 1010 return (error); 1011 e_vunlock: 1012 vput(vp); 1013 return (error); 1014 } 1015 1016 /* 1017 * Change notion of root (``/'') directory. 1018 */ 1019 #ifndef _SYS_SYSPROTO_H_ 1020 struct chroot_args { 1021 char *path; 1022 }; 1023 #endif 1024 int 1025 sys_chroot(struct thread *td, struct chroot_args *uap) 1026 { 1027 struct nameidata nd; 1028 int error; 1029 1030 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1031 UIO_USERSPACE, uap->path); 1032 error = namei(&nd); 1033 if (error != 0) 1034 return (error); 1035 NDFREE_PNBUF(&nd); 1036 error = kern_chroot(td, nd.ni_vp); 1037 return (error); 1038 } 1039 1040 /* 1041 * Change notion of root directory to a given file descriptor. 1042 */ 1043 #ifndef _SYS_SYSPROTO_H_ 1044 struct fchroot_args { 1045 int fd; 1046 }; 1047 #endif 1048 int 1049 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1050 { 1051 struct vnode *vp; 1052 struct file *fp; 1053 int error; 1054 uint8_t fdflags; 1055 1056 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp); 1057 if (error != 0) 1058 return (error); 1059 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 1060 fdrop(fp, td); 1061 return (ENOTCAPABLE); 1062 } 1063 vp = fp->f_vnode; 1064 vrefact(vp); 1065 fdrop(fp, td); 1066 vn_lock(vp, LK_SHARED | LK_RETRY); 1067 error = kern_chroot(td, vp); 1068 return (error); 1069 } 1070 1071 /* 1072 * Common routine for chroot and chdir. Callers must provide a locked vnode 1073 * instance. 1074 */ 1075 int 1076 change_dir(struct vnode *vp, struct thread *td) 1077 { 1078 #ifdef MAC 1079 int error; 1080 #endif 1081 1082 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1083 if (vp->v_type != VDIR) 1084 return (ENOTDIR); 1085 #ifdef MAC 1086 error = mac_vnode_check_chdir(td->td_ucred, vp); 1087 if (error != 0) 1088 return (error); 1089 #endif 1090 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1091 } 1092 1093 static __inline void 1094 flags_to_rights(int flags, cap_rights_t *rightsp) 1095 { 1096 if (flags & O_EXEC) { 1097 cap_rights_set_one(rightsp, CAP_FEXECVE); 1098 if (flags & O_PATH) 1099 return; 1100 } else { 1101 switch ((flags & O_ACCMODE)) { 1102 case O_RDONLY: 1103 cap_rights_set_one(rightsp, CAP_READ); 1104 break; 1105 case O_RDWR: 1106 cap_rights_set_one(rightsp, CAP_READ); 1107 /* FALLTHROUGH */ 1108 case O_WRONLY: 1109 cap_rights_set_one(rightsp, CAP_WRITE); 1110 if (!(flags & (O_APPEND | O_TRUNC))) 1111 cap_rights_set_one(rightsp, CAP_SEEK); 1112 break; 1113 } 1114 } 1115 1116 if (flags & O_CREAT) 1117 cap_rights_set_one(rightsp, CAP_CREATE); 1118 1119 if (flags & O_TRUNC) 1120 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1121 1122 if (flags & (O_SYNC | O_FSYNC)) 1123 cap_rights_set_one(rightsp, CAP_FSYNC); 1124 1125 if (flags & (O_EXLOCK | O_SHLOCK)) 1126 cap_rights_set_one(rightsp, CAP_FLOCK); 1127 } 1128 1129 /* 1130 * Check permissions, allocate an open file structure, and call the device 1131 * open routine if any. 1132 */ 1133 #ifndef _SYS_SYSPROTO_H_ 1134 struct open_args { 1135 char *path; 1136 int flags; 1137 int mode; 1138 }; 1139 #endif 1140 int 1141 sys_open(struct thread *td, struct open_args *uap) 1142 { 1143 1144 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1145 uap->flags, uap->mode)); 1146 } 1147 1148 #ifndef _SYS_SYSPROTO_H_ 1149 struct openat_args { 1150 int fd; 1151 char *path; 1152 int flag; 1153 int mode; 1154 }; 1155 #endif 1156 int 1157 sys_openat(struct thread *td, struct openat_args *uap) 1158 { 1159 1160 AUDIT_ARG_FD(uap->fd); 1161 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1162 uap->mode)); 1163 } 1164 1165 /* 1166 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1167 * in-kernel representations (FREAD etc.). 1168 */ 1169 static int 1170 openflags(int *flagsp) 1171 { 1172 int flags; 1173 1174 /* 1175 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1176 * may be specified. On the other hand, for O_PATH any mode 1177 * except O_EXEC is ignored. 1178 */ 1179 flags = *flagsp; 1180 if ((flags & O_PATH) != 0) { 1181 flags &= ~O_ACCMODE; 1182 } else if ((flags & O_EXEC) != 0) { 1183 if ((flags & O_ACCMODE) != 0) 1184 return (EINVAL); 1185 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1186 return (EINVAL); 1187 } else { 1188 flags = FFLAGS(flags); 1189 } 1190 *flagsp = flags; 1191 return (0); 1192 } 1193 1194 static void 1195 finit_open(struct file *fp, struct vnode *vp, int flags) 1196 { 1197 /* 1198 * Store the vnode, for any f_type. Typically, the vnode use count is 1199 * decremented by a direct call to vnops.fo_close() for files that 1200 * switched type. 1201 */ 1202 fp->f_vnode = vp; 1203 1204 /* 1205 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1206 * vnode operations here. 1207 */ 1208 if (fp->f_ops == &badfileops) { 1209 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1210 ("Unexpected fifo fp %p vp %p", fp, vp)); 1211 if ((flags & O_PATH) != 0) { 1212 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1213 DTYPE_VNODE, NULL, &path_fileops); 1214 } else { 1215 finit_vnode(fp, flags, NULL, &vnops); 1216 } 1217 } 1218 } 1219 1220 /* 1221 * If fpp != NULL, opened file is not installed into the file 1222 * descriptor table, instead it is returned in *fpp. This is 1223 * incompatible with fdopen(), in which case we return EINVAL. 1224 */ 1225 static int 1226 openatfp(struct thread *td, int dirfd, const char *path, 1227 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1228 { 1229 struct proc *p; 1230 struct filedesc *fdp; 1231 struct pwddesc *pdp; 1232 struct file *fp; 1233 struct vnode *vp; 1234 struct filecaps *fcaps; 1235 struct nameidata nd; 1236 cap_rights_t rights; 1237 int cmode, error, indx; 1238 1239 indx = -1; 1240 p = td->td_proc; 1241 fdp = p->p_fd; 1242 pdp = p->p_pd; 1243 1244 AUDIT_ARG_FFLAGS(flags); 1245 AUDIT_ARG_MODE(mode); 1246 cap_rights_init_one(&rights, CAP_LOOKUP); 1247 flags_to_rights(flags, &rights); 1248 1249 error = openflags(&flags); 1250 if (error != 0) 1251 return (error); 1252 1253 /* 1254 * Allocate a file structure. The descriptor to reference it 1255 * is allocated and used by finstall_refed() below. 1256 */ 1257 error = falloc_noinstall(td, &fp); 1258 if (error != 0) 1259 return (error); 1260 /* Set the flags early so the finit in devfs can pick them up. */ 1261 fp->f_flag = flags & FMASK; 1262 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1263 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1264 pathseg, path, dirfd, &rights); 1265 td->td_dupfd = -1; /* XXX check for fdopen */ 1266 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1267 td->td_ucred, fp); 1268 if (error != 0) { 1269 /* 1270 * If the vn_open replaced the method vector, something 1271 * wonderous happened deep below and we just pass it up 1272 * pretending we know what we do. 1273 */ 1274 if (error == ENXIO && fp->f_ops != &badfileops) { 1275 MPASS((flags & O_PATH) == 0); 1276 goto success; 1277 } 1278 1279 /* 1280 * Handle special fdopen() case. bleh. 1281 * 1282 * Don't do this for relative (capability) lookups; we don't 1283 * understand exactly what would happen, and we don't think 1284 * that it ever should. 1285 */ 1286 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1287 (error == ENODEV || error == ENXIO) && 1288 td->td_dupfd >= 0) { 1289 MPASS(fpp == NULL); 1290 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1291 &indx); 1292 if (error == 0) 1293 goto success; 1294 } 1295 1296 goto bad; 1297 } 1298 td->td_dupfd = 0; 1299 NDFREE_PNBUF(&nd); 1300 vp = nd.ni_vp; 1301 1302 finit_open(fp, vp, flags); 1303 VOP_UNLOCK(vp); 1304 if (flags & O_TRUNC) { 1305 error = fo_truncate(fp, 0, td->td_ucred, td); 1306 if (error != 0) 1307 goto bad; 1308 } 1309 success: 1310 if (fpp != NULL) { 1311 MPASS(error == 0); 1312 NDFREE_IOCTLCAPS(&nd); 1313 *fpp = fp; 1314 return (0); 1315 } 1316 1317 /* 1318 * If we haven't already installed the FD (for dupfdopen), do so now. 1319 */ 1320 if (indx == -1) { 1321 #ifdef CAPABILITIES 1322 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1323 fcaps = &nd.ni_filecaps; 1324 else 1325 #endif 1326 fcaps = NULL; 1327 if ((nd.ni_resflags & NIRES_BENEATH) != 0) 1328 flags |= O_RESOLVE_BENEATH; 1329 else 1330 flags &= ~O_RESOLVE_BENEATH; 1331 error = finstall_refed(td, fp, &indx, flags, fcaps); 1332 /* On success finstall_refed() consumes fcaps. */ 1333 if (error != 0) { 1334 goto bad; 1335 } 1336 } else { 1337 NDFREE_IOCTLCAPS(&nd); 1338 falloc_abort(td, fp); 1339 } 1340 1341 td->td_retval[0] = indx; 1342 return (0); 1343 bad: 1344 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1345 NDFREE_IOCTLCAPS(&nd); 1346 falloc_abort(td, fp); 1347 return (error); 1348 } 1349 1350 int 1351 kern_openat(struct thread *td, int dirfd, const char *path, 1352 enum uio_seg pathseg, int flags, int mode) 1353 { 1354 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1355 } 1356 1357 int 1358 kern_openatfp(struct thread *td, int dirfd, const char *path, 1359 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1360 { 1361 int error, old_dupfd; 1362 1363 old_dupfd = td->td_dupfd; 1364 td->td_dupfd = -1; 1365 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1366 td->td_dupfd = old_dupfd; 1367 return (error); 1368 } 1369 1370 #ifdef COMPAT_43 1371 /* 1372 * Create a file. 1373 */ 1374 #ifndef _SYS_SYSPROTO_H_ 1375 struct ocreat_args { 1376 char *path; 1377 int mode; 1378 }; 1379 #endif 1380 int 1381 ocreat(struct thread *td, struct ocreat_args *uap) 1382 { 1383 1384 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1385 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1386 } 1387 #endif /* COMPAT_43 */ 1388 1389 /* 1390 * Create a special file. 1391 */ 1392 #ifndef _SYS_SYSPROTO_H_ 1393 struct mknodat_args { 1394 int fd; 1395 char *path; 1396 mode_t mode; 1397 dev_t dev; 1398 }; 1399 #endif 1400 int 1401 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1402 { 1403 1404 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1405 uap->dev)); 1406 } 1407 1408 #if defined(COMPAT_FREEBSD11) 1409 int 1410 freebsd11_mknod(struct thread *td, 1411 struct freebsd11_mknod_args *uap) 1412 { 1413 1414 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1415 uap->mode, uap->dev)); 1416 } 1417 1418 int 1419 freebsd11_mknodat(struct thread *td, 1420 struct freebsd11_mknodat_args *uap) 1421 { 1422 1423 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1424 uap->dev)); 1425 } 1426 #endif /* COMPAT_FREEBSD11 */ 1427 1428 int 1429 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1430 int mode, dev_t dev) 1431 { 1432 struct vnode *vp; 1433 struct mount *mp; 1434 struct vattr vattr; 1435 struct nameidata nd; 1436 int error, whiteout = 0; 1437 1438 AUDIT_ARG_MODE(mode); 1439 AUDIT_ARG_DEV(dev); 1440 switch (mode & S_IFMT) { 1441 case S_IFCHR: 1442 case S_IFBLK: 1443 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1444 if (error == 0 && dev == VNOVAL) 1445 error = EINVAL; 1446 break; 1447 case S_IFWHT: 1448 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1449 break; 1450 case S_IFIFO: 1451 if (dev == 0) 1452 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1453 /* FALLTHROUGH */ 1454 default: 1455 error = EINVAL; 1456 break; 1457 } 1458 if (error != 0) 1459 return (error); 1460 NDPREINIT(&nd); 1461 restart: 1462 bwillwrite(); 1463 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1464 pathseg, path, fd, &cap_mknodat_rights); 1465 if ((error = namei(&nd)) != 0) 1466 return (error); 1467 vp = nd.ni_vp; 1468 if (vp != NULL) { 1469 NDFREE_PNBUF(&nd); 1470 if (vp == nd.ni_dvp) 1471 vrele(nd.ni_dvp); 1472 else 1473 vput(nd.ni_dvp); 1474 vrele(vp); 1475 return (EEXIST); 1476 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1477 NDFREE_PNBUF(&nd); 1478 vput(nd.ni_dvp); 1479 return (EINVAL); 1480 } else { 1481 VATTR_NULL(&vattr); 1482 vattr.va_mode = (mode & ALLPERMS) & 1483 ~td->td_proc->p_pd->pd_cmask; 1484 vattr.va_rdev = dev; 1485 whiteout = 0; 1486 1487 switch (mode & S_IFMT) { 1488 case S_IFCHR: 1489 vattr.va_type = VCHR; 1490 break; 1491 case S_IFBLK: 1492 vattr.va_type = VBLK; 1493 break; 1494 case S_IFWHT: 1495 whiteout = 1; 1496 break; 1497 default: 1498 panic("kern_mknod: invalid mode"); 1499 } 1500 } 1501 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1502 NDFREE_PNBUF(&nd); 1503 vput(nd.ni_dvp); 1504 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1505 return (error); 1506 goto restart; 1507 } 1508 #ifdef MAC 1509 if (error == 0 && !whiteout) 1510 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1511 &nd.ni_cnd, &vattr); 1512 #endif 1513 if (error == 0) { 1514 if (whiteout) 1515 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1516 else { 1517 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1518 &nd.ni_cnd, &vattr); 1519 } 1520 } 1521 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1522 true); 1523 vn_finished_write(mp); 1524 NDFREE_PNBUF(&nd); 1525 if (error == ERELOOKUP) 1526 goto restart; 1527 return (error); 1528 } 1529 1530 /* 1531 * Create a named pipe. 1532 */ 1533 #ifndef _SYS_SYSPROTO_H_ 1534 struct mkfifo_args { 1535 char *path; 1536 int mode; 1537 }; 1538 #endif 1539 int 1540 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1541 { 1542 1543 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1544 uap->mode)); 1545 } 1546 1547 #ifndef _SYS_SYSPROTO_H_ 1548 struct mkfifoat_args { 1549 int fd; 1550 char *path; 1551 mode_t mode; 1552 }; 1553 #endif 1554 int 1555 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1556 { 1557 1558 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1559 uap->mode)); 1560 } 1561 1562 int 1563 kern_mkfifoat(struct thread *td, int fd, const char *path, 1564 enum uio_seg pathseg, int mode) 1565 { 1566 struct mount *mp; 1567 struct vattr vattr; 1568 struct nameidata nd; 1569 int error; 1570 1571 AUDIT_ARG_MODE(mode); 1572 NDPREINIT(&nd); 1573 restart: 1574 bwillwrite(); 1575 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1576 pathseg, path, fd, &cap_mkfifoat_rights); 1577 if ((error = namei(&nd)) != 0) 1578 return (error); 1579 if (nd.ni_vp != NULL) { 1580 NDFREE_PNBUF(&nd); 1581 if (nd.ni_vp == nd.ni_dvp) 1582 vrele(nd.ni_dvp); 1583 else 1584 vput(nd.ni_dvp); 1585 vrele(nd.ni_vp); 1586 return (EEXIST); 1587 } 1588 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1589 NDFREE_PNBUF(&nd); 1590 vput(nd.ni_dvp); 1591 return (EINVAL); 1592 } 1593 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1594 NDFREE_PNBUF(&nd); 1595 vput(nd.ni_dvp); 1596 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1597 return (error); 1598 goto restart; 1599 } 1600 VATTR_NULL(&vattr); 1601 vattr.va_type = VFIFO; 1602 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1603 #ifdef MAC 1604 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1605 &vattr); 1606 if (error != 0) 1607 goto out; 1608 #endif 1609 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1610 #ifdef MAC 1611 out: 1612 #endif 1613 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1614 vn_finished_write(mp); 1615 NDFREE_PNBUF(&nd); 1616 if (error == ERELOOKUP) 1617 goto restart; 1618 return (error); 1619 } 1620 1621 /* 1622 * Make a hard file link. 1623 */ 1624 #ifndef _SYS_SYSPROTO_H_ 1625 struct link_args { 1626 char *path; 1627 char *link; 1628 }; 1629 #endif 1630 int 1631 sys_link(struct thread *td, struct link_args *uap) 1632 { 1633 1634 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1635 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1636 } 1637 1638 #ifndef _SYS_SYSPROTO_H_ 1639 struct linkat_args { 1640 int fd1; 1641 char *path1; 1642 int fd2; 1643 char *path2; 1644 int flag; 1645 }; 1646 #endif 1647 int 1648 sys_linkat(struct thread *td, struct linkat_args *uap) 1649 { 1650 1651 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1652 UIO_USERSPACE, uap->flag)); 1653 } 1654 1655 int hardlink_check_uid = 0; 1656 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1657 &hardlink_check_uid, 0, 1658 "Unprivileged processes cannot create hard links to files owned by other " 1659 "users"); 1660 static int hardlink_check_gid = 0; 1661 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1662 &hardlink_check_gid, 0, 1663 "Unprivileged processes cannot create hard links to files owned by other " 1664 "groups"); 1665 1666 static int 1667 can_hardlink(struct vnode *vp, struct ucred *cred) 1668 { 1669 struct vattr va; 1670 int error; 1671 1672 if (!hardlink_check_uid && !hardlink_check_gid) 1673 return (0); 1674 1675 error = VOP_GETATTR(vp, &va, cred); 1676 if (error != 0) 1677 return (error); 1678 1679 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1680 error = priv_check_cred(cred, PRIV_VFS_LINK); 1681 if (error != 0) 1682 return (error); 1683 } 1684 1685 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1686 error = priv_check_cred(cred, PRIV_VFS_LINK); 1687 if (error != 0) 1688 return (error); 1689 } 1690 1691 return (0); 1692 } 1693 1694 int 1695 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1696 const char *path2, enum uio_seg segflag, int flag) 1697 { 1698 struct nameidata nd; 1699 int error; 1700 1701 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1702 AT_EMPTY_PATH)) != 0) 1703 return (EINVAL); 1704 1705 NDPREINIT(&nd); 1706 do { 1707 bwillwrite(); 1708 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1709 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1710 segflag, path1, fd1, &cap_linkat_source_rights); 1711 if ((error = namei(&nd)) != 0) 1712 return (error); 1713 NDFREE_PNBUF(&nd); 1714 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1715 error = priv_check(td, PRIV_VFS_FHOPEN); 1716 if (error != 0) { 1717 vrele(nd.ni_vp); 1718 return (error); 1719 } 1720 } 1721 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1722 } while (error == EAGAIN || error == ERELOOKUP); 1723 return (error); 1724 } 1725 1726 static int 1727 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1728 enum uio_seg segflag) 1729 { 1730 struct nameidata nd; 1731 struct mount *mp; 1732 int error; 1733 1734 if (vp->v_type == VDIR) { 1735 vrele(vp); 1736 return (EPERM); /* POSIX */ 1737 } 1738 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1739 vrele(vp); 1740 return (EINVAL); 1741 } 1742 NDINIT_ATRIGHTS(&nd, CREATE, 1743 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1744 &cap_linkat_target_rights); 1745 if ((error = namei(&nd)) == 0) { 1746 if (nd.ni_vp != NULL) { 1747 NDFREE_PNBUF(&nd); 1748 if (nd.ni_dvp == nd.ni_vp) 1749 vrele(nd.ni_dvp); 1750 else 1751 vput(nd.ni_dvp); 1752 vrele(nd.ni_vp); 1753 vrele(vp); 1754 return (EEXIST); 1755 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1756 /* 1757 * Cross-device link. No need to recheck 1758 * vp->v_type, since it cannot change, except 1759 * to VBAD. 1760 */ 1761 NDFREE_PNBUF(&nd); 1762 vput(nd.ni_dvp); 1763 vrele(vp); 1764 return (EXDEV); 1765 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1766 error = can_hardlink(vp, td->td_ucred); 1767 #ifdef MAC 1768 if (error == 0) 1769 error = mac_vnode_check_link(td->td_ucred, 1770 nd.ni_dvp, vp, &nd.ni_cnd); 1771 #endif 1772 if (error != 0) { 1773 vput(vp); 1774 vput(nd.ni_dvp); 1775 NDFREE_PNBUF(&nd); 1776 return (error); 1777 } 1778 error = vn_start_write(vp, &mp, V_NOWAIT); 1779 if (error != 0) { 1780 vput(vp); 1781 vput(nd.ni_dvp); 1782 NDFREE_PNBUF(&nd); 1783 error = vn_start_write(NULL, &mp, 1784 V_XSLEEP | V_PCATCH); 1785 if (error != 0) 1786 return (error); 1787 return (EAGAIN); 1788 } 1789 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1790 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1791 vn_finished_write(mp); 1792 NDFREE_PNBUF(&nd); 1793 vp = NULL; 1794 } else { 1795 vput(nd.ni_dvp); 1796 NDFREE_PNBUF(&nd); 1797 vrele(vp); 1798 return (EAGAIN); 1799 } 1800 } 1801 if (vp != NULL) 1802 vrele(vp); 1803 return (error); 1804 } 1805 1806 /* 1807 * Make a symbolic link. 1808 */ 1809 #ifndef _SYS_SYSPROTO_H_ 1810 struct symlink_args { 1811 char *path; 1812 char *link; 1813 }; 1814 #endif 1815 int 1816 sys_symlink(struct thread *td, struct symlink_args *uap) 1817 { 1818 1819 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1820 UIO_USERSPACE)); 1821 } 1822 1823 #ifndef _SYS_SYSPROTO_H_ 1824 struct symlinkat_args { 1825 char *path; 1826 int fd; 1827 char *path2; 1828 }; 1829 #endif 1830 int 1831 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1832 { 1833 1834 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1835 UIO_USERSPACE)); 1836 } 1837 1838 int 1839 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1840 enum uio_seg segflg) 1841 { 1842 struct mount *mp; 1843 struct vattr vattr; 1844 const char *syspath; 1845 char *tmppath; 1846 struct nameidata nd; 1847 int error; 1848 1849 if (segflg == UIO_SYSSPACE) { 1850 syspath = path1; 1851 } else { 1852 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1853 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1854 goto out; 1855 syspath = tmppath; 1856 } 1857 AUDIT_ARG_TEXT(syspath); 1858 NDPREINIT(&nd); 1859 restart: 1860 bwillwrite(); 1861 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1862 path2, fd, &cap_symlinkat_rights); 1863 if ((error = namei(&nd)) != 0) 1864 goto out; 1865 if (nd.ni_vp) { 1866 NDFREE_PNBUF(&nd); 1867 if (nd.ni_vp == nd.ni_dvp) 1868 vrele(nd.ni_dvp); 1869 else 1870 vput(nd.ni_dvp); 1871 vrele(nd.ni_vp); 1872 nd.ni_vp = NULL; 1873 error = EEXIST; 1874 goto out; 1875 } 1876 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1877 NDFREE_PNBUF(&nd); 1878 vput(nd.ni_dvp); 1879 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1880 goto out; 1881 goto restart; 1882 } 1883 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1884 error = EINVAL; 1885 goto out; 1886 } 1887 VATTR_NULL(&vattr); 1888 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1889 #ifdef MAC 1890 vattr.va_type = VLNK; 1891 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1892 &vattr); 1893 if (error != 0) 1894 goto out2; 1895 #endif 1896 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1897 #ifdef MAC 1898 out2: 1899 #endif 1900 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1901 vn_finished_write(mp); 1902 NDFREE_PNBUF(&nd); 1903 if (error == ERELOOKUP) 1904 goto restart; 1905 out: 1906 if (segflg != UIO_SYSSPACE) 1907 uma_zfree(namei_zone, tmppath); 1908 return (error); 1909 } 1910 1911 /* 1912 * Delete a whiteout from the filesystem. 1913 */ 1914 #ifndef _SYS_SYSPROTO_H_ 1915 struct undelete_args { 1916 char *path; 1917 }; 1918 #endif 1919 int 1920 sys_undelete(struct thread *td, struct undelete_args *uap) 1921 { 1922 struct mount *mp; 1923 struct nameidata nd; 1924 int error; 1925 1926 NDPREINIT(&nd); 1927 restart: 1928 bwillwrite(); 1929 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1930 UIO_USERSPACE, uap->path); 1931 error = namei(&nd); 1932 if (error != 0) 1933 return (error); 1934 1935 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1936 NDFREE_PNBUF(&nd); 1937 if (nd.ni_vp == nd.ni_dvp) 1938 vrele(nd.ni_dvp); 1939 else 1940 vput(nd.ni_dvp); 1941 if (nd.ni_vp) 1942 vrele(nd.ni_vp); 1943 return (EEXIST); 1944 } 1945 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1946 NDFREE_PNBUF(&nd); 1947 vput(nd.ni_dvp); 1948 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1949 return (error); 1950 goto restart; 1951 } 1952 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1953 NDFREE_PNBUF(&nd); 1954 vput(nd.ni_dvp); 1955 vn_finished_write(mp); 1956 if (error == ERELOOKUP) 1957 goto restart; 1958 return (error); 1959 } 1960 1961 /* 1962 * Delete a name from the filesystem. 1963 */ 1964 #ifndef _SYS_SYSPROTO_H_ 1965 struct unlink_args { 1966 char *path; 1967 }; 1968 #endif 1969 int 1970 sys_unlink(struct thread *td, struct unlink_args *uap) 1971 { 1972 1973 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1974 0, 0)); 1975 } 1976 1977 static int 1978 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1979 int flag, enum uio_seg pathseg, ino_t oldinum) 1980 { 1981 1982 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1983 return (EINVAL); 1984 1985 if ((flag & AT_REMOVEDIR) != 0) 1986 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1987 1988 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1989 } 1990 1991 #ifndef _SYS_SYSPROTO_H_ 1992 struct unlinkat_args { 1993 int fd; 1994 char *path; 1995 int flag; 1996 }; 1997 #endif 1998 int 1999 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 2000 { 2001 2002 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 2003 UIO_USERSPACE, 0)); 2004 } 2005 2006 #ifndef _SYS_SYSPROTO_H_ 2007 struct funlinkat_args { 2008 int dfd; 2009 const char *path; 2010 int fd; 2011 int flag; 2012 }; 2013 #endif 2014 int 2015 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 2016 { 2017 2018 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2019 UIO_USERSPACE, 0)); 2020 } 2021 2022 int 2023 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2024 enum uio_seg pathseg, int flag, ino_t oldinum) 2025 { 2026 struct mount *mp; 2027 struct file *fp; 2028 struct vnode *vp; 2029 struct nameidata nd; 2030 struct stat sb; 2031 int error; 2032 2033 fp = NULL; 2034 if (fd != FD_NONE) { 2035 error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp); 2036 if (error != 0) 2037 return (error); 2038 } 2039 2040 NDPREINIT(&nd); 2041 restart: 2042 bwillwrite(); 2043 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2044 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2045 pathseg, path, dfd, &cap_unlinkat_rights); 2046 if ((error = namei(&nd)) != 0) { 2047 if (error == EINVAL) 2048 error = EPERM; 2049 goto fdout; 2050 } 2051 vp = nd.ni_vp; 2052 if (vp->v_type == VDIR && oldinum == 0) { 2053 error = EPERM; /* POSIX */ 2054 } else if (oldinum != 0 && 2055 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2056 sb.st_ino != oldinum) { 2057 error = EIDRM; /* Identifier removed */ 2058 } else if (fp != NULL && fp->f_vnode != vp) { 2059 if (VN_IS_DOOMED(fp->f_vnode)) 2060 error = EBADF; 2061 else 2062 error = EDEADLK; 2063 } else { 2064 /* 2065 * The root of a mounted filesystem cannot be deleted. 2066 * 2067 * XXX: can this only be a VDIR case? 2068 */ 2069 if (vp->v_vflag & VV_ROOT) 2070 error = EBUSY; 2071 } 2072 if (error == 0) { 2073 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2074 NDFREE_PNBUF(&nd); 2075 vput(nd.ni_dvp); 2076 if (vp == nd.ni_dvp) 2077 vrele(vp); 2078 else 2079 vput(vp); 2080 if ((error = vn_start_write(NULL, &mp, 2081 V_XSLEEP | V_PCATCH)) != 0) { 2082 goto fdout; 2083 } 2084 goto restart; 2085 } 2086 #ifdef MAC 2087 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2088 &nd.ni_cnd); 2089 if (error != 0) 2090 goto out; 2091 #endif 2092 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2093 #ifdef MAC 2094 out: 2095 #endif 2096 vn_finished_write(mp); 2097 } 2098 NDFREE_PNBUF(&nd); 2099 vput(nd.ni_dvp); 2100 if (vp == nd.ni_dvp) 2101 vrele(vp); 2102 else 2103 vput(vp); 2104 if (error == ERELOOKUP) 2105 goto restart; 2106 fdout: 2107 if (fp != NULL) 2108 fdrop(fp, td); 2109 return (error); 2110 } 2111 2112 /* 2113 * Reposition read/write file offset. 2114 */ 2115 #ifndef _SYS_SYSPROTO_H_ 2116 struct lseek_args { 2117 int fd; 2118 int pad; 2119 off_t offset; 2120 int whence; 2121 }; 2122 #endif 2123 int 2124 sys_lseek(struct thread *td, struct lseek_args *uap) 2125 { 2126 2127 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2128 } 2129 2130 int 2131 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2132 { 2133 struct file *fp; 2134 int error; 2135 2136 AUDIT_ARG_FD(fd); 2137 error = fget(td, fd, &cap_seek_rights, &fp); 2138 if (error != 0) 2139 return (error); 2140 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2141 fo_seek(fp, offset, whence, td) : ESPIPE; 2142 fdrop(fp, td); 2143 return (error); 2144 } 2145 2146 #if defined(COMPAT_43) 2147 /* 2148 * Reposition read/write file offset. 2149 */ 2150 #ifndef _SYS_SYSPROTO_H_ 2151 struct olseek_args { 2152 int fd; 2153 long offset; 2154 int whence; 2155 }; 2156 #endif 2157 int 2158 olseek(struct thread *td, struct olseek_args *uap) 2159 { 2160 2161 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2162 } 2163 #endif /* COMPAT_43 */ 2164 2165 #if defined(COMPAT_FREEBSD6) 2166 /* Version with the 'pad' argument */ 2167 int 2168 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2169 { 2170 2171 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2172 } 2173 #endif 2174 2175 /* 2176 * Check access permissions using passed credentials. 2177 */ 2178 static int 2179 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2180 struct thread *td) 2181 { 2182 accmode_t accmode; 2183 int error; 2184 2185 /* Flags == 0 means only check for existence. */ 2186 if (user_flags == 0) 2187 return (0); 2188 2189 accmode = 0; 2190 if (user_flags & R_OK) 2191 accmode |= VREAD; 2192 if (user_flags & W_OK) 2193 accmode |= VWRITE; 2194 if (user_flags & X_OK) 2195 accmode |= VEXEC; 2196 #ifdef MAC 2197 error = mac_vnode_check_access(cred, vp, accmode); 2198 if (error != 0) 2199 return (error); 2200 #endif 2201 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2202 error = VOP_ACCESS(vp, accmode, cred, td); 2203 return (error); 2204 } 2205 2206 /* 2207 * Check access permissions using "real" credentials. 2208 */ 2209 #ifndef _SYS_SYSPROTO_H_ 2210 struct access_args { 2211 char *path; 2212 int amode; 2213 }; 2214 #endif 2215 int 2216 sys_access(struct thread *td, struct access_args *uap) 2217 { 2218 2219 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2220 0, uap->amode)); 2221 } 2222 2223 #ifndef _SYS_SYSPROTO_H_ 2224 struct faccessat_args { 2225 int dirfd; 2226 char *path; 2227 int amode; 2228 int flag; 2229 } 2230 #endif 2231 int 2232 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2233 { 2234 2235 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2236 uap->amode)); 2237 } 2238 2239 int 2240 kern_accessat(struct thread *td, int fd, const char *path, 2241 enum uio_seg pathseg, int flag, int amode) 2242 { 2243 struct ucred *cred, *usecred; 2244 struct vnode *vp; 2245 struct nameidata nd; 2246 int error; 2247 2248 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2249 AT_SYMLINK_NOFOLLOW)) != 0) 2250 return (EINVAL); 2251 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2252 return (EINVAL); 2253 2254 /* 2255 * Create and modify a temporary credential instead of one that 2256 * is potentially shared (if we need one). 2257 */ 2258 cred = td->td_ucred; 2259 if ((flag & AT_EACCESS) == 0 && 2260 ((cred->cr_uid != cred->cr_ruid || 2261 cred->cr_rgid != cred->cr_gid))) { 2262 usecred = crdup(cred); 2263 usecred->cr_uid = cred->cr_ruid; 2264 usecred->cr_gid = cred->cr_rgid; 2265 td->td_ucred = usecred; 2266 } else 2267 usecred = cred; 2268 AUDIT_ARG_VALUE(amode); 2269 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2270 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2271 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2272 if ((error = namei(&nd)) != 0) 2273 goto out; 2274 vp = nd.ni_vp; 2275 2276 error = vn_access(vp, amode, usecred, td); 2277 NDFREE_PNBUF(&nd); 2278 vput(vp); 2279 out: 2280 if (usecred != cred) { 2281 td->td_ucred = cred; 2282 crfree(usecred); 2283 } 2284 return (error); 2285 } 2286 2287 /* 2288 * Check access permissions using "effective" credentials. 2289 */ 2290 #ifndef _SYS_SYSPROTO_H_ 2291 struct eaccess_args { 2292 char *path; 2293 int amode; 2294 }; 2295 #endif 2296 int 2297 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2298 { 2299 2300 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2301 AT_EACCESS, uap->amode)); 2302 } 2303 2304 #if defined(COMPAT_43) 2305 /* 2306 * Get file status; this version follows links. 2307 */ 2308 #ifndef _SYS_SYSPROTO_H_ 2309 struct ostat_args { 2310 char *path; 2311 struct ostat *ub; 2312 }; 2313 #endif 2314 int 2315 ostat(struct thread *td, struct ostat_args *uap) 2316 { 2317 struct stat sb; 2318 struct ostat osb; 2319 int error; 2320 2321 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2322 if (error != 0) 2323 return (error); 2324 cvtstat(&sb, &osb); 2325 return (copyout(&osb, uap->ub, sizeof (osb))); 2326 } 2327 2328 /* 2329 * Get file status; this version does not follow links. 2330 */ 2331 #ifndef _SYS_SYSPROTO_H_ 2332 struct olstat_args { 2333 char *path; 2334 struct ostat *ub; 2335 }; 2336 #endif 2337 int 2338 olstat(struct thread *td, struct olstat_args *uap) 2339 { 2340 struct stat sb; 2341 struct ostat osb; 2342 int error; 2343 2344 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2345 UIO_USERSPACE, &sb); 2346 if (error != 0) 2347 return (error); 2348 cvtstat(&sb, &osb); 2349 return (copyout(&osb, uap->ub, sizeof (osb))); 2350 } 2351 2352 /* 2353 * Convert from an old to a new stat structure. 2354 * XXX: many values are blindly truncated. 2355 */ 2356 void 2357 cvtstat(struct stat *st, struct ostat *ost) 2358 { 2359 2360 bzero(ost, sizeof(*ost)); 2361 ost->st_dev = st->st_dev; 2362 ost->st_ino = st->st_ino; 2363 ost->st_mode = st->st_mode; 2364 ost->st_nlink = st->st_nlink; 2365 ost->st_uid = st->st_uid; 2366 ost->st_gid = st->st_gid; 2367 ost->st_rdev = st->st_rdev; 2368 ost->st_size = MIN(st->st_size, INT32_MAX); 2369 ost->st_atim = st->st_atim; 2370 ost->st_mtim = st->st_mtim; 2371 ost->st_ctim = st->st_ctim; 2372 ost->st_blksize = st->st_blksize; 2373 ost->st_blocks = st->st_blocks; 2374 ost->st_flags = st->st_flags; 2375 ost->st_gen = st->st_gen; 2376 } 2377 #endif /* COMPAT_43 */ 2378 2379 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2380 int ino64_trunc_error; 2381 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2382 &ino64_trunc_error, 0, 2383 "Error on truncation of device, file or inode number, or link count"); 2384 2385 int 2386 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2387 { 2388 2389 ost->st_dev = st->st_dev; 2390 if (ost->st_dev != st->st_dev) { 2391 switch (ino64_trunc_error) { 2392 default: 2393 /* 2394 * Since dev_t is almost raw, don't clamp to the 2395 * maximum for case 2, but ignore the error. 2396 */ 2397 break; 2398 case 1: 2399 return (EOVERFLOW); 2400 } 2401 } 2402 ost->st_ino = st->st_ino; 2403 if (ost->st_ino != st->st_ino) { 2404 switch (ino64_trunc_error) { 2405 default: 2406 case 0: 2407 break; 2408 case 1: 2409 return (EOVERFLOW); 2410 case 2: 2411 ost->st_ino = UINT32_MAX; 2412 break; 2413 } 2414 } 2415 ost->st_mode = st->st_mode; 2416 ost->st_nlink = st->st_nlink; 2417 if (ost->st_nlink != st->st_nlink) { 2418 switch (ino64_trunc_error) { 2419 default: 2420 case 0: 2421 break; 2422 case 1: 2423 return (EOVERFLOW); 2424 case 2: 2425 ost->st_nlink = UINT16_MAX; 2426 break; 2427 } 2428 } 2429 ost->st_uid = st->st_uid; 2430 ost->st_gid = st->st_gid; 2431 ost->st_rdev = st->st_rdev; 2432 if (ost->st_rdev != st->st_rdev) { 2433 switch (ino64_trunc_error) { 2434 default: 2435 break; 2436 case 1: 2437 return (EOVERFLOW); 2438 } 2439 } 2440 ost->st_atim = st->st_atim; 2441 ost->st_mtim = st->st_mtim; 2442 ost->st_ctim = st->st_ctim; 2443 ost->st_size = st->st_size; 2444 ost->st_blocks = st->st_blocks; 2445 ost->st_blksize = st->st_blksize; 2446 ost->st_flags = st->st_flags; 2447 ost->st_gen = st->st_gen; 2448 ost->st_lspare = 0; 2449 ost->st_birthtim = st->st_birthtim; 2450 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2451 sizeof(*ost) - offsetof(struct freebsd11_stat, 2452 st_birthtim) - sizeof(ost->st_birthtim)); 2453 return (0); 2454 } 2455 2456 int 2457 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2458 { 2459 struct stat sb; 2460 struct freebsd11_stat osb; 2461 int error; 2462 2463 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2464 if (error != 0) 2465 return (error); 2466 error = freebsd11_cvtstat(&sb, &osb); 2467 if (error == 0) 2468 error = copyout(&osb, uap->ub, sizeof(osb)); 2469 return (error); 2470 } 2471 2472 int 2473 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2474 { 2475 struct stat sb; 2476 struct freebsd11_stat osb; 2477 int error; 2478 2479 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2480 UIO_USERSPACE, &sb); 2481 if (error != 0) 2482 return (error); 2483 error = freebsd11_cvtstat(&sb, &osb); 2484 if (error == 0) 2485 error = copyout(&osb, uap->ub, sizeof(osb)); 2486 return (error); 2487 } 2488 2489 int 2490 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2491 { 2492 struct fhandle fh; 2493 struct stat sb; 2494 struct freebsd11_stat osb; 2495 int error; 2496 2497 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2498 if (error != 0) 2499 return (error); 2500 error = kern_fhstat(td, fh, &sb); 2501 if (error != 0) 2502 return (error); 2503 error = freebsd11_cvtstat(&sb, &osb); 2504 if (error == 0) 2505 error = copyout(&osb, uap->sb, sizeof(osb)); 2506 return (error); 2507 } 2508 2509 int 2510 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2511 { 2512 struct stat sb; 2513 struct freebsd11_stat osb; 2514 int error; 2515 2516 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2517 UIO_USERSPACE, &sb); 2518 if (error != 0) 2519 return (error); 2520 error = freebsd11_cvtstat(&sb, &osb); 2521 if (error == 0) 2522 error = copyout(&osb, uap->buf, sizeof(osb)); 2523 return (error); 2524 } 2525 #endif /* COMPAT_FREEBSD11 */ 2526 2527 /* 2528 * Get file status 2529 */ 2530 #ifndef _SYS_SYSPROTO_H_ 2531 struct fstatat_args { 2532 int fd; 2533 char *path; 2534 struct stat *buf; 2535 int flag; 2536 } 2537 #endif 2538 int 2539 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2540 { 2541 struct stat sb; 2542 int error; 2543 2544 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2545 UIO_USERSPACE, &sb); 2546 if (error == 0) 2547 error = copyout(&sb, uap->buf, sizeof (sb)); 2548 return (error); 2549 } 2550 2551 int 2552 kern_statat(struct thread *td, int flag, int fd, const char *path, 2553 enum uio_seg pathseg, struct stat *sbp) 2554 { 2555 struct nameidata nd; 2556 int error; 2557 2558 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2559 AT_EMPTY_PATH)) != 0) 2560 return (EINVAL); 2561 2562 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2563 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2564 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2565 2566 if ((error = namei(&nd)) != 0) { 2567 if (error == ENOTDIR && 2568 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2569 error = kern_fstat(td, fd, sbp); 2570 return (error); 2571 } 2572 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2573 NDFREE_PNBUF(&nd); 2574 vput(nd.ni_vp); 2575 #ifdef __STAT_TIME_T_EXT 2576 sbp->st_atim_ext = 0; 2577 sbp->st_mtim_ext = 0; 2578 sbp->st_ctim_ext = 0; 2579 sbp->st_btim_ext = 0; 2580 #endif 2581 #ifdef KTRACE 2582 if (KTRPOINT(td, KTR_STRUCT)) 2583 ktrstat_error(sbp, error); 2584 #endif 2585 return (error); 2586 } 2587 2588 #if defined(COMPAT_FREEBSD11) 2589 /* 2590 * Implementation of the NetBSD [l]stat() functions. 2591 */ 2592 int 2593 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2594 { 2595 struct freebsd11_stat sb11; 2596 int error; 2597 2598 error = freebsd11_cvtstat(sb, &sb11); 2599 if (error != 0) 2600 return (error); 2601 2602 bzero(nsb, sizeof(*nsb)); 2603 CP(sb11, *nsb, st_dev); 2604 CP(sb11, *nsb, st_ino); 2605 CP(sb11, *nsb, st_mode); 2606 CP(sb11, *nsb, st_nlink); 2607 CP(sb11, *nsb, st_uid); 2608 CP(sb11, *nsb, st_gid); 2609 CP(sb11, *nsb, st_rdev); 2610 CP(sb11, *nsb, st_atim); 2611 CP(sb11, *nsb, st_mtim); 2612 CP(sb11, *nsb, st_ctim); 2613 CP(sb11, *nsb, st_size); 2614 CP(sb11, *nsb, st_blocks); 2615 CP(sb11, *nsb, st_blksize); 2616 CP(sb11, *nsb, st_flags); 2617 CP(sb11, *nsb, st_gen); 2618 CP(sb11, *nsb, st_birthtim); 2619 return (0); 2620 } 2621 2622 #ifndef _SYS_SYSPROTO_H_ 2623 struct freebsd11_nstat_args { 2624 char *path; 2625 struct nstat *ub; 2626 }; 2627 #endif 2628 int 2629 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2630 { 2631 struct stat sb; 2632 struct nstat nsb; 2633 int error; 2634 2635 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2636 if (error != 0) 2637 return (error); 2638 error = freebsd11_cvtnstat(&sb, &nsb); 2639 if (error == 0) 2640 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2641 return (error); 2642 } 2643 2644 /* 2645 * NetBSD lstat. Get file status; this version does not follow links. 2646 */ 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct freebsd11_nlstat_args { 2649 char *path; 2650 struct nstat *ub; 2651 }; 2652 #endif 2653 int 2654 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2655 { 2656 struct stat sb; 2657 struct nstat nsb; 2658 int error; 2659 2660 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2661 UIO_USERSPACE, &sb); 2662 if (error != 0) 2663 return (error); 2664 error = freebsd11_cvtnstat(&sb, &nsb); 2665 if (error == 0) 2666 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2667 return (error); 2668 } 2669 #endif /* COMPAT_FREEBSD11 */ 2670 2671 /* 2672 * Get configurable pathname variables. 2673 */ 2674 #ifndef _SYS_SYSPROTO_H_ 2675 struct pathconf_args { 2676 char *path; 2677 int name; 2678 }; 2679 #endif 2680 int 2681 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2682 { 2683 long value; 2684 int error; 2685 2686 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2687 &value); 2688 if (error == 0) 2689 td->td_retval[0] = value; 2690 return (error); 2691 } 2692 2693 #ifndef _SYS_SYSPROTO_H_ 2694 struct lpathconf_args { 2695 char *path; 2696 int name; 2697 }; 2698 #endif 2699 int 2700 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2701 { 2702 long value; 2703 int error; 2704 2705 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2706 NOFOLLOW, &value); 2707 if (error == 0) 2708 td->td_retval[0] = value; 2709 return (error); 2710 } 2711 2712 int 2713 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2714 int name, u_long flags, long *valuep) 2715 { 2716 struct nameidata nd; 2717 int error; 2718 2719 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2720 pathseg, path); 2721 if ((error = namei(&nd)) != 0) 2722 return (error); 2723 NDFREE_PNBUF(&nd); 2724 2725 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2726 vput(nd.ni_vp); 2727 return (error); 2728 } 2729 2730 /* 2731 * Return target name of a symbolic link. 2732 */ 2733 #ifndef _SYS_SYSPROTO_H_ 2734 struct readlink_args { 2735 char *path; 2736 char *buf; 2737 size_t count; 2738 }; 2739 #endif 2740 int 2741 sys_readlink(struct thread *td, struct readlink_args *uap) 2742 { 2743 2744 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2745 uap->buf, UIO_USERSPACE, uap->count)); 2746 } 2747 #ifndef _SYS_SYSPROTO_H_ 2748 struct readlinkat_args { 2749 int fd; 2750 char *path; 2751 char *buf; 2752 size_t bufsize; 2753 }; 2754 #endif 2755 int 2756 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2757 { 2758 2759 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2760 uap->buf, UIO_USERSPACE, uap->bufsize)); 2761 } 2762 2763 int 2764 kern_readlinkat(struct thread *td, int fd, const char *path, 2765 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2766 { 2767 struct vnode *vp; 2768 struct nameidata nd; 2769 int error; 2770 2771 if (count > IOSIZE_MAX) 2772 return (EINVAL); 2773 2774 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2775 EMPTYPATH, pathseg, path, fd); 2776 2777 if ((error = namei(&nd)) != 0) 2778 return (error); 2779 NDFREE_PNBUF(&nd); 2780 vp = nd.ni_vp; 2781 2782 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2783 vput(vp); 2784 2785 return (error); 2786 } 2787 2788 /* 2789 * Helper function to readlink from a vnode 2790 */ 2791 static int 2792 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2793 struct thread *td) 2794 { 2795 struct iovec aiov; 2796 struct uio auio; 2797 int error; 2798 2799 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2800 #ifdef MAC 2801 error = mac_vnode_check_readlink(td->td_ucred, vp); 2802 if (error != 0) 2803 return (error); 2804 #endif 2805 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2806 return (EINVAL); 2807 2808 aiov.iov_base = buf; 2809 aiov.iov_len = count; 2810 auio.uio_iov = &aiov; 2811 auio.uio_iovcnt = 1; 2812 auio.uio_offset = 0; 2813 auio.uio_rw = UIO_READ; 2814 auio.uio_segflg = bufseg; 2815 auio.uio_td = td; 2816 auio.uio_resid = count; 2817 error = VOP_READLINK(vp, &auio, td->td_ucred); 2818 td->td_retval[0] = count - auio.uio_resid; 2819 return (error); 2820 } 2821 2822 /* 2823 * Common implementation code for chflags() and fchflags(). 2824 */ 2825 static int 2826 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2827 { 2828 struct mount *mp; 2829 struct vattr vattr; 2830 int error; 2831 2832 /* We can't support the value matching VNOVAL. */ 2833 if (flags == VNOVAL) 2834 return (EOPNOTSUPP); 2835 2836 /* 2837 * Prevent non-root users from setting flags on devices. When 2838 * a device is reused, users can retain ownership of the device 2839 * if they are allowed to set flags and programs assume that 2840 * chown can't fail when done as root. 2841 */ 2842 if (VN_ISDEV(vp)) { 2843 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2844 if (error != 0) 2845 return (error); 2846 } 2847 2848 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2849 return (error); 2850 VATTR_NULL(&vattr); 2851 vattr.va_flags = flags; 2852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2853 #ifdef MAC 2854 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2855 if (error == 0) 2856 #endif 2857 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2858 VOP_UNLOCK(vp); 2859 vn_finished_write(mp); 2860 return (error); 2861 } 2862 2863 /* 2864 * Change flags of a file given a path name. 2865 */ 2866 #ifndef _SYS_SYSPROTO_H_ 2867 struct chflags_args { 2868 const char *path; 2869 u_long flags; 2870 }; 2871 #endif 2872 int 2873 sys_chflags(struct thread *td, struct chflags_args *uap) 2874 { 2875 2876 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2877 uap->flags, 0)); 2878 } 2879 2880 #ifndef _SYS_SYSPROTO_H_ 2881 struct chflagsat_args { 2882 int fd; 2883 const char *path; 2884 u_long flags; 2885 int atflag; 2886 } 2887 #endif 2888 int 2889 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2890 { 2891 2892 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2893 uap->flags, uap->atflag)); 2894 } 2895 2896 /* 2897 * Same as chflags() but doesn't follow symlinks. 2898 */ 2899 #ifndef _SYS_SYSPROTO_H_ 2900 struct lchflags_args { 2901 const char *path; 2902 u_long flags; 2903 }; 2904 #endif 2905 int 2906 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2907 { 2908 2909 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2910 uap->flags, AT_SYMLINK_NOFOLLOW)); 2911 } 2912 2913 static int 2914 kern_chflagsat(struct thread *td, int fd, const char *path, 2915 enum uio_seg pathseg, u_long flags, int atflag) 2916 { 2917 struct nameidata nd; 2918 int error; 2919 2920 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2921 AT_EMPTY_PATH)) != 0) 2922 return (EINVAL); 2923 2924 AUDIT_ARG_FFLAGS(flags); 2925 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2926 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2927 fd, &cap_fchflags_rights); 2928 if ((error = namei(&nd)) != 0) 2929 return (error); 2930 NDFREE_PNBUF(&nd); 2931 error = setfflags(td, nd.ni_vp, flags); 2932 vrele(nd.ni_vp); 2933 return (error); 2934 } 2935 2936 /* 2937 * Change flags of a file given a file descriptor. 2938 */ 2939 #ifndef _SYS_SYSPROTO_H_ 2940 struct fchflags_args { 2941 int fd; 2942 u_long flags; 2943 }; 2944 #endif 2945 int 2946 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2947 { 2948 struct file *fp; 2949 int error; 2950 2951 AUDIT_ARG_FD(uap->fd); 2952 AUDIT_ARG_FFLAGS(uap->flags); 2953 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2954 &fp); 2955 if (error != 0) 2956 return (error); 2957 #ifdef AUDIT 2958 if (AUDITING_TD(td)) { 2959 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2960 AUDIT_ARG_VNODE1(fp->f_vnode); 2961 VOP_UNLOCK(fp->f_vnode); 2962 } 2963 #endif 2964 error = setfflags(td, fp->f_vnode, uap->flags); 2965 fdrop(fp, td); 2966 return (error); 2967 } 2968 2969 /* 2970 * Common implementation code for chmod(), lchmod() and fchmod(). 2971 */ 2972 int 2973 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2974 { 2975 struct mount *mp; 2976 struct vattr vattr; 2977 int error; 2978 2979 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2980 return (error); 2981 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2982 VATTR_NULL(&vattr); 2983 vattr.va_mode = mode & ALLPERMS; 2984 #ifdef MAC 2985 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2986 if (error == 0) 2987 #endif 2988 error = VOP_SETATTR(vp, &vattr, cred); 2989 VOP_UNLOCK(vp); 2990 vn_finished_write(mp); 2991 return (error); 2992 } 2993 2994 /* 2995 * Change mode of a file given path name. 2996 */ 2997 #ifndef _SYS_SYSPROTO_H_ 2998 struct chmod_args { 2999 char *path; 3000 int mode; 3001 }; 3002 #endif 3003 int 3004 sys_chmod(struct thread *td, struct chmod_args *uap) 3005 { 3006 3007 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3008 uap->mode, 0)); 3009 } 3010 3011 #ifndef _SYS_SYSPROTO_H_ 3012 struct fchmodat_args { 3013 int dirfd; 3014 char *path; 3015 mode_t mode; 3016 int flag; 3017 } 3018 #endif 3019 int 3020 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3021 { 3022 3023 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3024 uap->mode, uap->flag)); 3025 } 3026 3027 /* 3028 * Change mode of a file given path name (don't follow links.) 3029 */ 3030 #ifndef _SYS_SYSPROTO_H_ 3031 struct lchmod_args { 3032 char *path; 3033 int mode; 3034 }; 3035 #endif 3036 int 3037 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3038 { 3039 3040 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3041 uap->mode, AT_SYMLINK_NOFOLLOW)); 3042 } 3043 3044 int 3045 kern_fchmodat(struct thread *td, int fd, const char *path, 3046 enum uio_seg pathseg, mode_t mode, int flag) 3047 { 3048 struct nameidata nd; 3049 int error; 3050 3051 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3052 AT_EMPTY_PATH)) != 0) 3053 return (EINVAL); 3054 3055 AUDIT_ARG_MODE(mode); 3056 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3057 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3058 fd, &cap_fchmod_rights); 3059 if ((error = namei(&nd)) != 0) 3060 return (error); 3061 NDFREE_PNBUF(&nd); 3062 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3063 vrele(nd.ni_vp); 3064 return (error); 3065 } 3066 3067 /* 3068 * Change mode of a file given a file descriptor. 3069 */ 3070 #ifndef _SYS_SYSPROTO_H_ 3071 struct fchmod_args { 3072 int fd; 3073 int mode; 3074 }; 3075 #endif 3076 int 3077 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3078 { 3079 struct file *fp; 3080 int error; 3081 3082 AUDIT_ARG_FD(uap->fd); 3083 AUDIT_ARG_MODE(uap->mode); 3084 3085 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3086 if (error != 0) 3087 return (error); 3088 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3089 fdrop(fp, td); 3090 return (error); 3091 } 3092 3093 /* 3094 * Common implementation for chown(), lchown(), and fchown() 3095 */ 3096 int 3097 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3098 gid_t gid) 3099 { 3100 struct mount *mp; 3101 struct vattr vattr; 3102 int error; 3103 3104 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3105 return (error); 3106 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3107 VATTR_NULL(&vattr); 3108 vattr.va_uid = uid; 3109 vattr.va_gid = gid; 3110 #ifdef MAC 3111 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3112 vattr.va_gid); 3113 if (error == 0) 3114 #endif 3115 error = VOP_SETATTR(vp, &vattr, cred); 3116 VOP_UNLOCK(vp); 3117 vn_finished_write(mp); 3118 return (error); 3119 } 3120 3121 /* 3122 * Set ownership given a path name. 3123 */ 3124 #ifndef _SYS_SYSPROTO_H_ 3125 struct chown_args { 3126 char *path; 3127 int uid; 3128 int gid; 3129 }; 3130 #endif 3131 int 3132 sys_chown(struct thread *td, struct chown_args *uap) 3133 { 3134 3135 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3136 uap->gid, 0)); 3137 } 3138 3139 #ifndef _SYS_SYSPROTO_H_ 3140 struct fchownat_args { 3141 int fd; 3142 const char * path; 3143 uid_t uid; 3144 gid_t gid; 3145 int flag; 3146 }; 3147 #endif 3148 int 3149 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3150 { 3151 3152 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3153 uap->gid, uap->flag)); 3154 } 3155 3156 int 3157 kern_fchownat(struct thread *td, int fd, const char *path, 3158 enum uio_seg pathseg, int uid, int gid, int flag) 3159 { 3160 struct nameidata nd; 3161 int error; 3162 3163 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3164 AT_EMPTY_PATH)) != 0) 3165 return (EINVAL); 3166 3167 AUDIT_ARG_OWNER(uid, gid); 3168 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3169 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3170 fd, &cap_fchown_rights); 3171 3172 if ((error = namei(&nd)) != 0) 3173 return (error); 3174 NDFREE_PNBUF(&nd); 3175 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3176 vrele(nd.ni_vp); 3177 return (error); 3178 } 3179 3180 /* 3181 * Set ownership given a path name, do not cross symlinks. 3182 */ 3183 #ifndef _SYS_SYSPROTO_H_ 3184 struct lchown_args { 3185 char *path; 3186 int uid; 3187 int gid; 3188 }; 3189 #endif 3190 int 3191 sys_lchown(struct thread *td, struct lchown_args *uap) 3192 { 3193 3194 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3195 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3196 } 3197 3198 /* 3199 * Set ownership given a file descriptor. 3200 */ 3201 #ifndef _SYS_SYSPROTO_H_ 3202 struct fchown_args { 3203 int fd; 3204 int uid; 3205 int gid; 3206 }; 3207 #endif 3208 int 3209 sys_fchown(struct thread *td, struct fchown_args *uap) 3210 { 3211 struct file *fp; 3212 int error; 3213 3214 AUDIT_ARG_FD(uap->fd); 3215 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3216 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3217 if (error != 0) 3218 return (error); 3219 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3220 fdrop(fp, td); 3221 return (error); 3222 } 3223 3224 /* 3225 * Common implementation code for utimes(), lutimes(), and futimes(). 3226 */ 3227 static int 3228 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3229 struct timespec *tsp) 3230 { 3231 struct timeval tv[2]; 3232 const struct timeval *tvp; 3233 int error; 3234 3235 if (usrtvp == NULL) { 3236 vfs_timestamp(&tsp[0]); 3237 tsp[1] = tsp[0]; 3238 } else { 3239 if (tvpseg == UIO_SYSSPACE) { 3240 tvp = usrtvp; 3241 } else { 3242 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3243 return (error); 3244 tvp = tv; 3245 } 3246 3247 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3248 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3249 return (EINVAL); 3250 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3251 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3252 } 3253 return (0); 3254 } 3255 3256 /* 3257 * Common implementation code for futimens(), utimensat(). 3258 */ 3259 #define UTIMENS_NULL 0x1 3260 #define UTIMENS_EXIT 0x2 3261 static int 3262 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3263 struct timespec *tsp, int *retflags) 3264 { 3265 struct timespec tsnow; 3266 int error; 3267 3268 vfs_timestamp(&tsnow); 3269 *retflags = 0; 3270 if (usrtsp == NULL) { 3271 tsp[0] = tsnow; 3272 tsp[1] = tsnow; 3273 *retflags |= UTIMENS_NULL; 3274 return (0); 3275 } 3276 if (tspseg == UIO_SYSSPACE) { 3277 tsp[0] = usrtsp[0]; 3278 tsp[1] = usrtsp[1]; 3279 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3280 return (error); 3281 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3282 *retflags |= UTIMENS_EXIT; 3283 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3284 *retflags |= UTIMENS_NULL; 3285 if (tsp[0].tv_nsec == UTIME_OMIT) 3286 tsp[0].tv_sec = VNOVAL; 3287 else if (tsp[0].tv_nsec == UTIME_NOW) 3288 tsp[0] = tsnow; 3289 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3290 return (EINVAL); 3291 if (tsp[1].tv_nsec == UTIME_OMIT) 3292 tsp[1].tv_sec = VNOVAL; 3293 else if (tsp[1].tv_nsec == UTIME_NOW) 3294 tsp[1] = tsnow; 3295 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3296 return (EINVAL); 3297 3298 return (0); 3299 } 3300 3301 /* 3302 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3303 * and utimensat(). 3304 */ 3305 static int 3306 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3307 int numtimes, int nullflag) 3308 { 3309 struct mount *mp; 3310 struct vattr vattr; 3311 int error; 3312 bool setbirthtime; 3313 3314 setbirthtime = false; 3315 vattr.va_birthtime.tv_sec = VNOVAL; 3316 vattr.va_birthtime.tv_nsec = 0; 3317 3318 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3319 return (error); 3320 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3321 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3322 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3323 setbirthtime = true; 3324 VATTR_NULL(&vattr); 3325 vattr.va_atime = ts[0]; 3326 vattr.va_mtime = ts[1]; 3327 if (setbirthtime) 3328 vattr.va_birthtime = ts[1]; 3329 if (numtimes > 2) 3330 vattr.va_birthtime = ts[2]; 3331 if (nullflag) 3332 vattr.va_vaflags |= VA_UTIMES_NULL; 3333 #ifdef MAC 3334 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3335 vattr.va_mtime); 3336 #endif 3337 if (error == 0) 3338 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3339 VOP_UNLOCK(vp); 3340 vn_finished_write(mp); 3341 return (error); 3342 } 3343 3344 /* 3345 * Set the access and modification times of a file. 3346 */ 3347 #ifndef _SYS_SYSPROTO_H_ 3348 struct utimes_args { 3349 char *path; 3350 struct timeval *tptr; 3351 }; 3352 #endif 3353 int 3354 sys_utimes(struct thread *td, struct utimes_args *uap) 3355 { 3356 3357 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3358 uap->tptr, UIO_USERSPACE)); 3359 } 3360 3361 #ifndef _SYS_SYSPROTO_H_ 3362 struct futimesat_args { 3363 int fd; 3364 const char * path; 3365 const struct timeval * times; 3366 }; 3367 #endif 3368 int 3369 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3370 { 3371 3372 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3373 uap->times, UIO_USERSPACE)); 3374 } 3375 3376 int 3377 kern_utimesat(struct thread *td, int fd, const char *path, 3378 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3379 { 3380 struct nameidata nd; 3381 struct timespec ts[2]; 3382 int error; 3383 3384 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3385 return (error); 3386 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3387 &cap_futimes_rights); 3388 3389 if ((error = namei(&nd)) != 0) 3390 return (error); 3391 NDFREE_PNBUF(&nd); 3392 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3393 vrele(nd.ni_vp); 3394 return (error); 3395 } 3396 3397 /* 3398 * Set the access and modification times of a file. 3399 */ 3400 #ifndef _SYS_SYSPROTO_H_ 3401 struct lutimes_args { 3402 char *path; 3403 struct timeval *tptr; 3404 }; 3405 #endif 3406 int 3407 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3408 { 3409 3410 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3411 UIO_USERSPACE)); 3412 } 3413 3414 int 3415 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3416 const struct timeval *tptr, enum uio_seg tptrseg) 3417 { 3418 struct timespec ts[2]; 3419 struct nameidata nd; 3420 int error; 3421 3422 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3423 return (error); 3424 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3425 if ((error = namei(&nd)) != 0) 3426 return (error); 3427 NDFREE_PNBUF(&nd); 3428 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3429 vrele(nd.ni_vp); 3430 return (error); 3431 } 3432 3433 /* 3434 * Set the access and modification times of a file. 3435 */ 3436 #ifndef _SYS_SYSPROTO_H_ 3437 struct futimes_args { 3438 int fd; 3439 struct timeval *tptr; 3440 }; 3441 #endif 3442 int 3443 sys_futimes(struct thread *td, struct futimes_args *uap) 3444 { 3445 3446 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3447 } 3448 3449 int 3450 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3451 enum uio_seg tptrseg) 3452 { 3453 struct timespec ts[2]; 3454 struct file *fp; 3455 int error; 3456 3457 AUDIT_ARG_FD(fd); 3458 error = getutimes(tptr, tptrseg, ts); 3459 if (error != 0) 3460 return (error); 3461 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3462 if (error != 0) 3463 return (error); 3464 #ifdef AUDIT 3465 if (AUDITING_TD(td)) { 3466 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3467 AUDIT_ARG_VNODE1(fp->f_vnode); 3468 VOP_UNLOCK(fp->f_vnode); 3469 } 3470 #endif 3471 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3472 fdrop(fp, td); 3473 return (error); 3474 } 3475 3476 int 3477 sys_futimens(struct thread *td, struct futimens_args *uap) 3478 { 3479 3480 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3481 } 3482 3483 int 3484 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3485 enum uio_seg tptrseg) 3486 { 3487 struct timespec ts[2]; 3488 struct file *fp; 3489 int error, flags; 3490 3491 AUDIT_ARG_FD(fd); 3492 error = getutimens(tptr, tptrseg, ts, &flags); 3493 if (error != 0) 3494 return (error); 3495 if (flags & UTIMENS_EXIT) 3496 return (0); 3497 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3498 if (error != 0) 3499 return (error); 3500 #ifdef AUDIT 3501 if (AUDITING_TD(td)) { 3502 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3503 AUDIT_ARG_VNODE1(fp->f_vnode); 3504 VOP_UNLOCK(fp->f_vnode); 3505 } 3506 #endif 3507 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3508 fdrop(fp, td); 3509 return (error); 3510 } 3511 3512 int 3513 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3514 { 3515 3516 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3517 uap->times, UIO_USERSPACE, uap->flag)); 3518 } 3519 3520 int 3521 kern_utimensat(struct thread *td, int fd, const char *path, 3522 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3523 int flag) 3524 { 3525 struct nameidata nd; 3526 struct timespec ts[2]; 3527 int error, flags; 3528 3529 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3530 AT_EMPTY_PATH)) != 0) 3531 return (EINVAL); 3532 3533 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3534 return (error); 3535 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3536 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3537 pathseg, path, fd, &cap_futimes_rights); 3538 if ((error = namei(&nd)) != 0) 3539 return (error); 3540 /* 3541 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3542 * POSIX states: 3543 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3544 * "Search permission is denied by a component of the path prefix." 3545 */ 3546 NDFREE_PNBUF(&nd); 3547 if ((flags & UTIMENS_EXIT) == 0) 3548 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3549 vrele(nd.ni_vp); 3550 return (error); 3551 } 3552 3553 /* 3554 * Truncate a file given its path name. 3555 */ 3556 #ifndef _SYS_SYSPROTO_H_ 3557 struct truncate_args { 3558 char *path; 3559 int pad; 3560 off_t length; 3561 }; 3562 #endif 3563 int 3564 sys_truncate(struct thread *td, struct truncate_args *uap) 3565 { 3566 3567 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3568 } 3569 3570 int 3571 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3572 off_t length) 3573 { 3574 struct mount *mp; 3575 struct vnode *vp; 3576 void *rl_cookie; 3577 struct nameidata nd; 3578 int error; 3579 3580 if (length < 0) 3581 return (EINVAL); 3582 NDPREINIT(&nd); 3583 retry: 3584 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3585 if ((error = namei(&nd)) != 0) 3586 return (error); 3587 vp = nd.ni_vp; 3588 NDFREE_PNBUF(&nd); 3589 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3590 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3591 vn_rangelock_unlock(vp, rl_cookie); 3592 vrele(vp); 3593 return (error); 3594 } 3595 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3596 if (vp->v_type == VDIR) { 3597 error = EISDIR; 3598 goto out; 3599 } 3600 #ifdef MAC 3601 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3602 if (error != 0) 3603 goto out; 3604 #endif 3605 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3606 if (error != 0) 3607 goto out; 3608 3609 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3610 out: 3611 VOP_UNLOCK(vp); 3612 vn_finished_write(mp); 3613 vn_rangelock_unlock(vp, rl_cookie); 3614 vrele(vp); 3615 if (error == ERELOOKUP) 3616 goto retry; 3617 return (error); 3618 } 3619 3620 #if defined(COMPAT_43) 3621 /* 3622 * Truncate a file given its path name. 3623 */ 3624 #ifndef _SYS_SYSPROTO_H_ 3625 struct otruncate_args { 3626 char *path; 3627 long length; 3628 }; 3629 #endif 3630 int 3631 otruncate(struct thread *td, struct otruncate_args *uap) 3632 { 3633 3634 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3635 } 3636 #endif /* COMPAT_43 */ 3637 3638 #if defined(COMPAT_FREEBSD6) 3639 /* Versions with the pad argument */ 3640 int 3641 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3642 { 3643 3644 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3645 } 3646 3647 int 3648 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3649 { 3650 3651 return (kern_ftruncate(td, uap->fd, uap->length)); 3652 } 3653 #endif 3654 3655 int 3656 kern_fsync(struct thread *td, int fd, bool fullsync) 3657 { 3658 struct vnode *vp; 3659 struct mount *mp; 3660 struct file *fp; 3661 int error; 3662 3663 AUDIT_ARG_FD(fd); 3664 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3665 if (error != 0) 3666 return (error); 3667 vp = fp->f_vnode; 3668 #if 0 3669 if (!fullsync) 3670 /* XXXKIB: compete outstanding aio writes */; 3671 #endif 3672 retry: 3673 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3674 if (error != 0) 3675 goto drop; 3676 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3677 AUDIT_ARG_VNODE1(vp); 3678 vnode_pager_clean_async(vp); 3679 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3680 VOP_UNLOCK(vp); 3681 vn_finished_write(mp); 3682 if (error == ERELOOKUP) 3683 goto retry; 3684 drop: 3685 fdrop(fp, td); 3686 return (error); 3687 } 3688 3689 /* 3690 * Sync an open file. 3691 */ 3692 #ifndef _SYS_SYSPROTO_H_ 3693 struct fsync_args { 3694 int fd; 3695 }; 3696 #endif 3697 int 3698 sys_fsync(struct thread *td, struct fsync_args *uap) 3699 { 3700 3701 return (kern_fsync(td, uap->fd, true)); 3702 } 3703 3704 int 3705 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3706 { 3707 3708 return (kern_fsync(td, uap->fd, false)); 3709 } 3710 3711 /* 3712 * Rename files. Source and destination must either both be directories, or 3713 * both not be directories. If target is a directory, it must be empty. 3714 */ 3715 #ifndef _SYS_SYSPROTO_H_ 3716 struct rename_args { 3717 char *from; 3718 char *to; 3719 }; 3720 #endif 3721 int 3722 sys_rename(struct thread *td, struct rename_args *uap) 3723 { 3724 3725 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3726 uap->to, UIO_USERSPACE)); 3727 } 3728 3729 #ifndef _SYS_SYSPROTO_H_ 3730 struct renameat_args { 3731 int oldfd; 3732 char *old; 3733 int newfd; 3734 char *new; 3735 }; 3736 #endif 3737 int 3738 sys_renameat(struct thread *td, struct renameat_args *uap) 3739 { 3740 3741 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3742 UIO_USERSPACE)); 3743 } 3744 3745 #ifdef MAC 3746 static int 3747 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3748 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3749 { 3750 int error; 3751 3752 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3753 pathseg, old, oldfd, &cap_renameat_source_rights); 3754 if ((error = namei(fromnd)) != 0) 3755 return (error); 3756 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3757 fromnd->ni_vp, &fromnd->ni_cnd); 3758 VOP_UNLOCK(fromnd->ni_dvp); 3759 if (fromnd->ni_dvp != fromnd->ni_vp) 3760 VOP_UNLOCK(fromnd->ni_vp); 3761 if (error != 0) { 3762 NDFREE_PNBUF(fromnd); 3763 vrele(fromnd->ni_dvp); 3764 vrele(fromnd->ni_vp); 3765 } 3766 return (error); 3767 } 3768 #endif 3769 3770 int 3771 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3772 const char *new, enum uio_seg pathseg) 3773 { 3774 struct mount *mp, *tmp; 3775 struct vnode *tvp, *fvp, *tdvp; 3776 struct nameidata fromnd, tond; 3777 uint64_t tondflags; 3778 int error; 3779 short irflag; 3780 3781 again: 3782 tmp = mp = NULL; 3783 bwillwrite(); 3784 #ifdef MAC 3785 if (mac_vnode_check_rename_from_enabled()) { 3786 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3787 &fromnd); 3788 if (error != 0) 3789 return (error); 3790 } else { 3791 #endif 3792 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3793 pathseg, old, oldfd, &cap_renameat_source_rights); 3794 if ((error = namei(&fromnd)) != 0) 3795 return (error); 3796 #ifdef MAC 3797 } 3798 #endif 3799 fvp = fromnd.ni_vp; 3800 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3801 if (fromnd.ni_vp->v_type == VDIR) 3802 tondflags |= WILLBEDIR; 3803 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3804 &cap_renameat_target_rights); 3805 if ((error = namei(&tond)) != 0) { 3806 /* Translate error code for rename("dir1", "dir2/."). */ 3807 if (error == EISDIR && fvp->v_type == VDIR) 3808 error = EINVAL; 3809 NDFREE_PNBUF(&fromnd); 3810 vrele(fromnd.ni_dvp); 3811 vrele(fvp); 3812 goto out1; 3813 } 3814 tdvp = tond.ni_dvp; 3815 tvp = tond.ni_vp; 3816 error = vn_start_write(fvp, &mp, V_NOWAIT); 3817 if (error != 0) { 3818 again1: 3819 NDFREE_PNBUF(&fromnd); 3820 NDFREE_PNBUF(&tond); 3821 if (tvp != NULL) 3822 vput(tvp); 3823 if (tdvp == tvp) 3824 vrele(tdvp); 3825 else 3826 vput(tdvp); 3827 vrele(fromnd.ni_dvp); 3828 vrele(fvp); 3829 if (tmp != NULL) { 3830 lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); 3831 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); 3832 vfs_rel(tmp); 3833 tmp = NULL; 3834 } 3835 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3836 if (error != 0) 3837 return (error); 3838 goto again; 3839 } 3840 error = VOP_GETWRITEMOUNT(tdvp, &tmp); 3841 if (error != 0 || tmp == NULL) 3842 goto again1; 3843 error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); 3844 if (error != 0) { 3845 vn_finished_write(mp); 3846 goto again1; 3847 } 3848 irflag = vn_irflag_read(fvp); 3849 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3850 (irflag & VIRF_NAMEDDIR) != 0) { 3851 error = EINVAL; 3852 goto out; 3853 } 3854 if (tvp != NULL) { 3855 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3856 error = ENOTDIR; 3857 goto out; 3858 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3859 error = EISDIR; 3860 goto out; 3861 } 3862 #ifdef CAPABILITIES 3863 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3864 /* 3865 * If the target already exists we require CAP_UNLINKAT 3866 * from 'newfd', when newfd was used for the lookup. 3867 */ 3868 error = cap_check(&tond.ni_filecaps.fc_rights, 3869 &cap_unlinkat_rights); 3870 if (error != 0) 3871 goto out; 3872 } 3873 #endif 3874 } 3875 if (fvp == tdvp) { 3876 error = EINVAL; 3877 goto out; 3878 } 3879 /* 3880 * If the source is the same as the destination (that is, if they 3881 * are links to the same vnode), then there is nothing to do. 3882 */ 3883 if (fvp == tvp) 3884 error = ERESTART; 3885 #ifdef MAC 3886 else 3887 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3888 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3889 #endif 3890 out: 3891 if (error == 0) { 3892 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3893 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3894 NDFREE_PNBUF(&fromnd); 3895 NDFREE_PNBUF(&tond); 3896 } else { 3897 NDFREE_PNBUF(&fromnd); 3898 NDFREE_PNBUF(&tond); 3899 if (tvp != NULL) 3900 vput(tvp); 3901 if (tdvp == tvp) 3902 vrele(tdvp); 3903 else 3904 vput(tdvp); 3905 vrele(fromnd.ni_dvp); 3906 vrele(fvp); 3907 } 3908 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); 3909 vfs_rel(tmp); 3910 vn_finished_write(mp); 3911 out1: 3912 if (error == ERESTART) 3913 return (0); 3914 if (error == ERELOOKUP) 3915 goto again; 3916 return (error); 3917 } 3918 3919 /* 3920 * Make a directory file. 3921 */ 3922 #ifndef _SYS_SYSPROTO_H_ 3923 struct mkdir_args { 3924 char *path; 3925 int mode; 3926 }; 3927 #endif 3928 int 3929 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3930 { 3931 3932 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3933 uap->mode)); 3934 } 3935 3936 #ifndef _SYS_SYSPROTO_H_ 3937 struct mkdirat_args { 3938 int fd; 3939 char *path; 3940 mode_t mode; 3941 }; 3942 #endif 3943 int 3944 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3945 { 3946 3947 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3948 } 3949 3950 int 3951 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3952 int mode) 3953 { 3954 struct mount *mp; 3955 struct vattr vattr; 3956 struct nameidata nd; 3957 int error; 3958 3959 AUDIT_ARG_MODE(mode); 3960 NDPREINIT(&nd); 3961 restart: 3962 bwillwrite(); 3963 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3964 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3965 segflg, path, fd, &cap_mkdirat_rights); 3966 if ((error = namei(&nd)) != 0) 3967 return (error); 3968 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3969 NDFREE_PNBUF(&nd); 3970 vput(nd.ni_dvp); 3971 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3972 return (error); 3973 goto restart; 3974 } 3975 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 3976 error = EINVAL; 3977 goto out; 3978 } 3979 VATTR_NULL(&vattr); 3980 vattr.va_type = VDIR; 3981 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3982 #ifdef MAC 3983 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3984 &vattr); 3985 if (error != 0) 3986 goto out; 3987 #endif 3988 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3989 out: 3990 NDFREE_PNBUF(&nd); 3991 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3992 vn_finished_write(mp); 3993 if (error == ERELOOKUP) 3994 goto restart; 3995 return (error); 3996 } 3997 3998 /* 3999 * Remove a directory file. 4000 */ 4001 #ifndef _SYS_SYSPROTO_H_ 4002 struct rmdir_args { 4003 char *path; 4004 }; 4005 #endif 4006 int 4007 sys_rmdir(struct thread *td, struct rmdir_args *uap) 4008 { 4009 4010 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 4011 0)); 4012 } 4013 4014 int 4015 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 4016 enum uio_seg pathseg, int flag) 4017 { 4018 struct mount *mp; 4019 struct vnode *vp; 4020 struct file *fp; 4021 struct nameidata nd; 4022 cap_rights_t rights; 4023 int error; 4024 4025 fp = NULL; 4026 if (fd != FD_NONE) { 4027 error = getvnode(td, fd, cap_rights_init_one(&rights, 4028 CAP_LOOKUP), &fp); 4029 if (error != 0) 4030 return (error); 4031 } 4032 4033 NDPREINIT(&nd); 4034 restart: 4035 bwillwrite(); 4036 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4037 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4038 pathseg, path, dfd, &cap_unlinkat_rights); 4039 if ((error = namei(&nd)) != 0) 4040 goto fdout; 4041 vp = nd.ni_vp; 4042 if (vp->v_type != VDIR) { 4043 error = ENOTDIR; 4044 goto out; 4045 } 4046 /* 4047 * No rmdir "." please. 4048 */ 4049 if (nd.ni_dvp == vp) { 4050 error = EINVAL; 4051 goto out; 4052 } 4053 /* 4054 * The root of a mounted filesystem cannot be deleted. 4055 */ 4056 if (vp->v_vflag & VV_ROOT) { 4057 error = EBUSY; 4058 goto out; 4059 } 4060 4061 if (fp != NULL && fp->f_vnode != vp) { 4062 if (VN_IS_DOOMED(fp->f_vnode)) 4063 error = EBADF; 4064 else 4065 error = EDEADLK; 4066 goto out; 4067 } 4068 4069 #ifdef MAC 4070 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4071 &nd.ni_cnd); 4072 if (error != 0) 4073 goto out; 4074 #endif 4075 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4076 NDFREE_PNBUF(&nd); 4077 vput(vp); 4078 if (nd.ni_dvp == vp) 4079 vrele(nd.ni_dvp); 4080 else 4081 vput(nd.ni_dvp); 4082 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4083 goto fdout; 4084 goto restart; 4085 } 4086 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4087 vn_finished_write(mp); 4088 out: 4089 NDFREE_PNBUF(&nd); 4090 vput(vp); 4091 if (nd.ni_dvp == vp) 4092 vrele(nd.ni_dvp); 4093 else 4094 vput(nd.ni_dvp); 4095 if (error == ERELOOKUP) 4096 goto restart; 4097 fdout: 4098 if (fp != NULL) 4099 fdrop(fp, td); 4100 return (error); 4101 } 4102 4103 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4104 int 4105 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4106 long *basep, void (*func)(struct freebsd11_dirent *)) 4107 { 4108 struct freebsd11_dirent dstdp; 4109 struct dirent *dp, *edp; 4110 char *dirbuf; 4111 off_t base; 4112 ssize_t resid, ucount; 4113 int error; 4114 4115 /* XXX arbitrary sanity limit on `count'. */ 4116 count = min(count, 64 * 1024); 4117 4118 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4119 4120 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4121 UIO_SYSSPACE); 4122 if (error != 0) 4123 goto done; 4124 if (basep != NULL) 4125 *basep = base; 4126 4127 ucount = 0; 4128 for (dp = (struct dirent *)dirbuf, 4129 edp = (struct dirent *)&dirbuf[count - resid]; 4130 ucount < count && dp < edp; ) { 4131 if (dp->d_reclen == 0) 4132 break; 4133 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4134 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4135 continue; 4136 dstdp.d_type = dp->d_type; 4137 dstdp.d_namlen = dp->d_namlen; 4138 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4139 if (dstdp.d_fileno != dp->d_fileno) { 4140 switch (ino64_trunc_error) { 4141 default: 4142 case 0: 4143 break; 4144 case 1: 4145 error = EOVERFLOW; 4146 goto done; 4147 case 2: 4148 dstdp.d_fileno = UINT32_MAX; 4149 break; 4150 } 4151 } 4152 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4153 ((dp->d_namlen + 1 + 3) &~ 3); 4154 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4155 bzero(dstdp.d_name + dstdp.d_namlen, 4156 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4157 dstdp.d_namlen); 4158 MPASS(dstdp.d_reclen <= dp->d_reclen); 4159 MPASS(ucount + dstdp.d_reclen <= count); 4160 if (func != NULL) 4161 func(&dstdp); 4162 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4163 if (error != 0) 4164 break; 4165 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4166 ucount += dstdp.d_reclen; 4167 } 4168 4169 done: 4170 free(dirbuf, M_TEMP); 4171 if (error == 0) 4172 td->td_retval[0] = ucount; 4173 return (error); 4174 } 4175 #endif /* COMPAT */ 4176 4177 #ifdef COMPAT_43 4178 static void 4179 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4180 { 4181 #if (BYTE_ORDER == LITTLE_ENDIAN) 4182 /* 4183 * The expected low byte of dp->d_namlen is our dp->d_type. 4184 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4185 */ 4186 dp->d_type = dp->d_namlen; 4187 dp->d_namlen = 0; 4188 #else 4189 /* 4190 * The dp->d_type is the high byte of the expected dp->d_namlen, 4191 * so must be zero'ed. 4192 */ 4193 dp->d_type = 0; 4194 #endif 4195 } 4196 4197 /* 4198 * Read a block of directory entries in a filesystem independent format. 4199 */ 4200 #ifndef _SYS_SYSPROTO_H_ 4201 struct ogetdirentries_args { 4202 int fd; 4203 char *buf; 4204 u_int count; 4205 long *basep; 4206 }; 4207 #endif 4208 int 4209 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4210 { 4211 long loff; 4212 int error; 4213 4214 error = kern_ogetdirentries(td, uap, &loff); 4215 if (error == 0) 4216 error = copyout(&loff, uap->basep, sizeof(long)); 4217 return (error); 4218 } 4219 4220 int 4221 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4222 long *ploff) 4223 { 4224 long base; 4225 int error; 4226 4227 /* XXX arbitrary sanity limit on `count'. */ 4228 if (uap->count > 64 * 1024) 4229 return (EINVAL); 4230 4231 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4232 &base, ogetdirentries_cvt); 4233 4234 if (error == 0 && uap->basep != NULL) 4235 error = copyout(&base, uap->basep, sizeof(long)); 4236 4237 return (error); 4238 } 4239 #endif /* COMPAT_43 */ 4240 4241 #if defined(COMPAT_FREEBSD11) 4242 #ifndef _SYS_SYSPROTO_H_ 4243 struct freebsd11_getdirentries_args { 4244 int fd; 4245 char *buf; 4246 u_int count; 4247 long *basep; 4248 }; 4249 #endif 4250 int 4251 freebsd11_getdirentries(struct thread *td, 4252 struct freebsd11_getdirentries_args *uap) 4253 { 4254 long base; 4255 int error; 4256 4257 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4258 &base, NULL); 4259 4260 if (error == 0 && uap->basep != NULL) 4261 error = copyout(&base, uap->basep, sizeof(long)); 4262 return (error); 4263 } 4264 4265 int 4266 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4267 { 4268 struct freebsd11_getdirentries_args ap; 4269 4270 ap.fd = uap->fd; 4271 ap.buf = uap->buf; 4272 ap.count = uap->count; 4273 ap.basep = NULL; 4274 return (freebsd11_getdirentries(td, &ap)); 4275 } 4276 #endif /* COMPAT_FREEBSD11 */ 4277 4278 /* 4279 * Read a block of directory entries in a filesystem independent format. 4280 */ 4281 int 4282 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4283 { 4284 off_t base; 4285 int error; 4286 4287 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4288 NULL, UIO_USERSPACE); 4289 if (error != 0) 4290 return (error); 4291 if (uap->basep != NULL) 4292 error = copyout(&base, uap->basep, sizeof(off_t)); 4293 return (error); 4294 } 4295 4296 int 4297 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4298 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4299 { 4300 struct vnode *vp; 4301 struct file *fp; 4302 struct uio auio; 4303 struct iovec aiov; 4304 off_t loff; 4305 int error, eofflag; 4306 off_t foffset; 4307 4308 AUDIT_ARG_FD(fd); 4309 if (count > IOSIZE_MAX) 4310 return (EINVAL); 4311 auio.uio_resid = count; 4312 error = getvnode(td, fd, &cap_read_rights, &fp); 4313 if (error != 0) 4314 return (error); 4315 if ((fp->f_flag & FREAD) == 0) { 4316 fdrop(fp, td); 4317 return (EBADF); 4318 } 4319 vp = fp->f_vnode; 4320 foffset = foffset_lock(fp, 0); 4321 unionread: 4322 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4323 error = ENOENT; 4324 goto fail; 4325 } 4326 aiov.iov_base = buf; 4327 aiov.iov_len = count; 4328 auio.uio_iov = &aiov; 4329 auio.uio_iovcnt = 1; 4330 auio.uio_rw = UIO_READ; 4331 auio.uio_segflg = bufseg; 4332 auio.uio_td = td; 4333 vn_lock(vp, LK_SHARED | LK_RETRY); 4334 /* 4335 * We want to return ENOTDIR for anything that is not VDIR, but 4336 * not for VBAD, and we can't check for VBAD while the vnode is 4337 * unlocked. 4338 */ 4339 if (vp->v_type != VDIR) { 4340 if (vp->v_type == VBAD) 4341 error = EBADF; 4342 else 4343 error = ENOTDIR; 4344 VOP_UNLOCK(vp); 4345 goto fail; 4346 } 4347 AUDIT_ARG_VNODE1(vp); 4348 loff = auio.uio_offset = foffset; 4349 #ifdef MAC 4350 error = mac_vnode_check_readdir(td->td_ucred, vp); 4351 if (error == 0) 4352 #endif 4353 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4354 NULL); 4355 foffset = auio.uio_offset; 4356 if (error != 0) { 4357 VOP_UNLOCK(vp); 4358 goto fail; 4359 } 4360 if (count == auio.uio_resid && 4361 (vp->v_vflag & VV_ROOT) && 4362 (vp->v_mount->mnt_flag & MNT_UNION)) { 4363 struct vnode *tvp = vp; 4364 4365 vp = vp->v_mount->mnt_vnodecovered; 4366 VREF(vp); 4367 fp->f_vnode = vp; 4368 foffset = 0; 4369 vput(tvp); 4370 goto unionread; 4371 } 4372 VOP_UNLOCK(vp); 4373 *basep = loff; 4374 if (residp != NULL) 4375 *residp = auio.uio_resid; 4376 td->td_retval[0] = count - auio.uio_resid; 4377 fail: 4378 foffset_unlock(fp, foffset, 0); 4379 fdrop(fp, td); 4380 return (error); 4381 } 4382 4383 /* 4384 * Set the mode mask for creation of filesystem nodes. 4385 */ 4386 #ifndef _SYS_SYSPROTO_H_ 4387 struct umask_args { 4388 int newmask; 4389 }; 4390 #endif 4391 int 4392 sys_umask(struct thread *td, struct umask_args *uap) 4393 { 4394 struct pwddesc *pdp; 4395 4396 pdp = td->td_proc->p_pd; 4397 PWDDESC_XLOCK(pdp); 4398 td->td_retval[0] = pdp->pd_cmask; 4399 pdp->pd_cmask = uap->newmask & ALLPERMS; 4400 PWDDESC_XUNLOCK(pdp); 4401 return (0); 4402 } 4403 4404 /* 4405 * Void all references to file by ripping underlying filesystem away from 4406 * vnode. 4407 */ 4408 #ifndef _SYS_SYSPROTO_H_ 4409 struct revoke_args { 4410 char *path; 4411 }; 4412 #endif 4413 int 4414 sys_revoke(struct thread *td, struct revoke_args *uap) 4415 { 4416 struct vnode *vp; 4417 struct vattr vattr; 4418 struct nameidata nd; 4419 int error; 4420 4421 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4422 uap->path); 4423 if ((error = namei(&nd)) != 0) 4424 return (error); 4425 vp = nd.ni_vp; 4426 NDFREE_PNBUF(&nd); 4427 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4428 error = EINVAL; 4429 goto out; 4430 } 4431 #ifdef MAC 4432 error = mac_vnode_check_revoke(td->td_ucred, vp); 4433 if (error != 0) 4434 goto out; 4435 #endif 4436 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4437 if (error != 0) 4438 goto out; 4439 if (td->td_ucred->cr_uid != vattr.va_uid) { 4440 error = priv_check(td, PRIV_VFS_ADMIN); 4441 if (error != 0) 4442 goto out; 4443 } 4444 if (devfs_usecount(vp) > 0) 4445 VOP_REVOKE(vp, REVOKEALL); 4446 out: 4447 vput(vp); 4448 return (error); 4449 } 4450 4451 /* 4452 * This variant of getvnode() allows O_PATH files. Caller should 4453 * ensure that returned file and vnode are only used for compatible 4454 * semantics. 4455 */ 4456 int 4457 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, 4458 uint8_t *flagsp, struct file **fpp) 4459 { 4460 struct file *fp; 4461 int error; 4462 4463 error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp); 4464 if (error != 0) 4465 return (error); 4466 4467 /* 4468 * The file could be not of the vnode type, or it may be not 4469 * yet fully initialized, in which case the f_vnode pointer 4470 * may be set, but f_ops is still badfileops. E.g., 4471 * devfs_open() transiently create such situation to 4472 * facilitate csw d_fdopen(). 4473 * 4474 * Dupfdopen() handling in kern_openat() installs the 4475 * half-baked file into the process descriptor table, allowing 4476 * other thread to dereference it. Guard against the race by 4477 * checking f_ops. 4478 */ 4479 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4480 fdrop(fp, td); 4481 *fpp = NULL; 4482 return (EINVAL); 4483 } 4484 4485 *fpp = fp; 4486 return (0); 4487 } 4488 4489 /* 4490 * Convert a user file descriptor to a kernel file entry and check 4491 * that, if it is a capability, the correct rights are present. 4492 * A reference on the file entry is held upon returning. 4493 */ 4494 int 4495 getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, 4496 struct file **fpp) 4497 { 4498 int error; 4499 4500 error = getvnode_path(td, fd, rightsp, NULL, fpp); 4501 if (__predict_false(error != 0)) 4502 return (error); 4503 4504 /* 4505 * Filter out O_PATH file descriptors, most getvnode() callers 4506 * do not call fo_ methods. 4507 */ 4508 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4509 fdrop(*fpp, td); 4510 *fpp = NULL; 4511 error = EBADF; 4512 } 4513 4514 return (error); 4515 } 4516 4517 /* 4518 * Get an (NFS) file handle. 4519 */ 4520 #ifndef _SYS_SYSPROTO_H_ 4521 struct lgetfh_args { 4522 char *fname; 4523 fhandle_t *fhp; 4524 }; 4525 #endif 4526 int 4527 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4528 { 4529 4530 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4531 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4532 } 4533 4534 #ifndef _SYS_SYSPROTO_H_ 4535 struct getfh_args { 4536 char *fname; 4537 fhandle_t *fhp; 4538 }; 4539 #endif 4540 int 4541 sys_getfh(struct thread *td, struct getfh_args *uap) 4542 { 4543 4544 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4545 uap->fhp, UIO_USERSPACE)); 4546 } 4547 4548 /* 4549 * syscall for the rpc.lockd to use to translate an open descriptor into 4550 * a NFS file handle. 4551 * 4552 * warning: do not remove the priv_check() call or this becomes one giant 4553 * security hole. 4554 */ 4555 #ifndef _SYS_SYSPROTO_H_ 4556 struct getfhat_args { 4557 int fd; 4558 char *path; 4559 fhandle_t *fhp; 4560 int flags; 4561 }; 4562 #endif 4563 int 4564 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4565 { 4566 4567 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4568 uap->fhp, UIO_USERSPACE)); 4569 } 4570 4571 int 4572 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4573 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4574 { 4575 struct nameidata nd; 4576 fhandle_t fh; 4577 struct vnode *vp; 4578 int error; 4579 4580 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4581 return (EINVAL); 4582 error = priv_check(td, PRIV_VFS_GETFH); 4583 if (error != 0) 4584 return (error); 4585 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4586 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4587 fd); 4588 error = namei(&nd); 4589 if (error != 0) 4590 return (error); 4591 NDFREE_PNBUF(&nd); 4592 vp = nd.ni_vp; 4593 bzero(&fh, sizeof(fh)); 4594 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4595 error = VOP_VPTOFH(vp, &fh.fh_fid); 4596 vput(vp); 4597 if (error == 0) { 4598 if (fhseg == UIO_USERSPACE) 4599 error = copyout(&fh, fhp, sizeof (fh)); 4600 else 4601 memcpy(fhp, &fh, sizeof(fh)); 4602 } 4603 return (error); 4604 } 4605 4606 #ifndef _SYS_SYSPROTO_H_ 4607 struct fhlink_args { 4608 fhandle_t *fhp; 4609 const char *to; 4610 }; 4611 #endif 4612 int 4613 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4614 { 4615 4616 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4617 } 4618 4619 #ifndef _SYS_SYSPROTO_H_ 4620 struct fhlinkat_args { 4621 fhandle_t *fhp; 4622 int tofd; 4623 const char *to; 4624 }; 4625 #endif 4626 int 4627 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4628 { 4629 4630 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4631 } 4632 4633 static int 4634 kern_fhlinkat(struct thread *td, int fd, const char *path, 4635 enum uio_seg pathseg, fhandle_t *fhp) 4636 { 4637 fhandle_t fh; 4638 struct mount *mp; 4639 struct vnode *vp; 4640 int error; 4641 4642 error = priv_check(td, PRIV_VFS_GETFH); 4643 if (error != 0) 4644 return (error); 4645 error = copyin(fhp, &fh, sizeof(fh)); 4646 if (error != 0) 4647 return (error); 4648 do { 4649 bwillwrite(); 4650 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4651 return (ESTALE); 4652 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4653 vfs_unbusy(mp); 4654 if (error != 0) 4655 return (error); 4656 VOP_UNLOCK(vp); 4657 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4658 } while (error == EAGAIN || error == ERELOOKUP); 4659 return (error); 4660 } 4661 4662 #ifndef _SYS_SYSPROTO_H_ 4663 struct fhreadlink_args { 4664 fhandle_t *fhp; 4665 char *buf; 4666 size_t bufsize; 4667 }; 4668 #endif 4669 int 4670 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4671 { 4672 fhandle_t fh; 4673 struct mount *mp; 4674 struct vnode *vp; 4675 int error; 4676 4677 error = priv_check(td, PRIV_VFS_GETFH); 4678 if (error != 0) 4679 return (error); 4680 if (uap->bufsize > IOSIZE_MAX) 4681 return (EINVAL); 4682 error = copyin(uap->fhp, &fh, sizeof(fh)); 4683 if (error != 0) 4684 return (error); 4685 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4686 return (ESTALE); 4687 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4688 vfs_unbusy(mp); 4689 if (error != 0) 4690 return (error); 4691 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4692 vput(vp); 4693 return (error); 4694 } 4695 4696 /* 4697 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4698 * open descriptor. 4699 * 4700 * warning: do not remove the priv_check() call or this becomes one giant 4701 * security hole. 4702 */ 4703 #ifndef _SYS_SYSPROTO_H_ 4704 struct fhopen_args { 4705 const struct fhandle *u_fhp; 4706 int flags; 4707 }; 4708 #endif 4709 int 4710 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4711 { 4712 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4713 } 4714 4715 int 4716 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4717 { 4718 struct mount *mp; 4719 struct vnode *vp; 4720 struct fhandle fhp; 4721 struct file *fp; 4722 int error, indx; 4723 bool named_attr; 4724 4725 error = priv_check(td, PRIV_VFS_FHOPEN); 4726 if (error != 0) 4727 return (error); 4728 4729 indx = -1; 4730 if ((flags & O_CREAT) != 0) 4731 return (EINVAL); 4732 error = openflags(&flags); 4733 if (error != 0) 4734 return (error); 4735 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4736 if (error != 0) 4737 return (error); 4738 /* find the mount point */ 4739 mp = vfs_busyfs(&fhp.fh_fsid); 4740 if (mp == NULL) 4741 return (ESTALE); 4742 /* now give me my vnode, it gets returned to me locked */ 4743 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4744 vfs_unbusy(mp); 4745 if (error != 0) 4746 return (error); 4747 4748 /* 4749 * Check to see if the file handle refers to a named attribute 4750 * directory or attribute. If it does, the O_NAMEDATTR flag 4751 * must have been specified. 4752 */ 4753 named_attr = (vn_irflag_read(vp) & 4754 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4755 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4756 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4757 vput(vp); 4758 return (ENOATTR); 4759 } 4760 4761 error = falloc_noinstall(td, &fp); 4762 if (error != 0) { 4763 vput(vp); 4764 return (error); 4765 } 4766 /* Set the flags early so the finit in devfs can pick them up. */ 4767 fp->f_flag = flags & FMASK; 4768 4769 #ifdef INVARIANTS 4770 td->td_dupfd = -1; 4771 #endif 4772 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4773 if (error != 0) { 4774 KASSERT(fp->f_ops == &badfileops, 4775 ("VOP_OPEN in fhopen() set f_ops")); 4776 KASSERT(td->td_dupfd < 0, 4777 ("fhopen() encountered fdopen()")); 4778 4779 vput(vp); 4780 goto bad; 4781 } 4782 #ifdef INVARIANTS 4783 td->td_dupfd = 0; 4784 #endif 4785 finit_open(fp, vp, flags); 4786 VOP_UNLOCK(vp); 4787 if ((flags & O_TRUNC) != 0) { 4788 error = fo_truncate(fp, 0, td->td_ucred, td); 4789 if (error != 0) 4790 goto bad; 4791 } 4792 4793 error = finstall(td, fp, &indx, flags, NULL); 4794 bad: 4795 fdrop(fp, td); 4796 td->td_retval[0] = indx; 4797 return (error); 4798 } 4799 4800 /* 4801 * Stat an (NFS) file handle. 4802 */ 4803 #ifndef _SYS_SYSPROTO_H_ 4804 struct fhstat_args { 4805 struct fhandle *u_fhp; 4806 struct stat *sb; 4807 }; 4808 #endif 4809 int 4810 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4811 { 4812 struct stat sb; 4813 struct fhandle fh; 4814 int error; 4815 4816 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4817 if (error != 0) 4818 return (error); 4819 error = kern_fhstat(td, fh, &sb); 4820 if (error == 0) 4821 error = copyout(&sb, uap->sb, sizeof(sb)); 4822 return (error); 4823 } 4824 4825 int 4826 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4827 { 4828 struct mount *mp; 4829 struct vnode *vp; 4830 int error; 4831 4832 error = priv_check(td, PRIV_VFS_FHSTAT); 4833 if (error != 0) 4834 return (error); 4835 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4836 return (ESTALE); 4837 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4838 vfs_unbusy(mp); 4839 if (error != 0) 4840 return (error); 4841 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4842 vput(vp); 4843 return (error); 4844 } 4845 4846 /* 4847 * Implement fstatfs() for (NFS) file handles. 4848 */ 4849 #ifndef _SYS_SYSPROTO_H_ 4850 struct fhstatfs_args { 4851 struct fhandle *u_fhp; 4852 struct statfs *buf; 4853 }; 4854 #endif 4855 int 4856 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4857 { 4858 struct statfs *sfp; 4859 fhandle_t fh; 4860 int error; 4861 4862 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4863 if (error != 0) 4864 return (error); 4865 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4866 error = kern_fhstatfs(td, fh, sfp); 4867 if (error == 0) 4868 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4869 free(sfp, M_STATFS); 4870 return (error); 4871 } 4872 4873 int 4874 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4875 { 4876 struct mount *mp; 4877 struct vnode *vp; 4878 int error; 4879 4880 error = priv_check(td, PRIV_VFS_FHSTATFS); 4881 if (error != 0) 4882 return (error); 4883 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4884 return (ESTALE); 4885 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4886 if (error != 0) { 4887 vfs_unbusy(mp); 4888 return (error); 4889 } 4890 vput(vp); 4891 error = prison_canseemount(td->td_ucred, mp); 4892 if (error != 0) 4893 goto out; 4894 #ifdef MAC 4895 error = mac_mount_check_stat(td->td_ucred, mp); 4896 if (error != 0) 4897 goto out; 4898 #endif 4899 error = VFS_STATFS(mp, buf); 4900 out: 4901 vfs_unbusy(mp); 4902 return (error); 4903 } 4904 4905 /* 4906 * Unlike madvise(2), we do not make a best effort to remember every 4907 * possible caching hint. Instead, we remember the last setting with 4908 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4909 * region of any current setting. 4910 */ 4911 int 4912 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4913 int advice) 4914 { 4915 struct fadvise_info *fa, *new; 4916 struct file *fp; 4917 struct vnode *vp; 4918 off_t end; 4919 int error; 4920 4921 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4922 return (EINVAL); 4923 AUDIT_ARG_VALUE(advice); 4924 switch (advice) { 4925 case POSIX_FADV_SEQUENTIAL: 4926 case POSIX_FADV_RANDOM: 4927 case POSIX_FADV_NOREUSE: 4928 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4929 break; 4930 case POSIX_FADV_NORMAL: 4931 case POSIX_FADV_WILLNEED: 4932 case POSIX_FADV_DONTNEED: 4933 new = NULL; 4934 break; 4935 default: 4936 return (EINVAL); 4937 } 4938 /* XXX: CAP_POSIX_FADVISE? */ 4939 AUDIT_ARG_FD(fd); 4940 error = fget(td, fd, &cap_no_rights, &fp); 4941 if (error != 0) 4942 goto out; 4943 AUDIT_ARG_FILE(td->td_proc, fp); 4944 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4945 error = ESPIPE; 4946 goto out; 4947 } 4948 if (fp->f_type != DTYPE_VNODE) { 4949 error = ENODEV; 4950 goto out; 4951 } 4952 vp = fp->f_vnode; 4953 if (vp->v_type != VREG) { 4954 error = ENODEV; 4955 goto out; 4956 } 4957 if (len == 0) 4958 end = OFF_MAX; 4959 else 4960 end = offset + len - 1; 4961 switch (advice) { 4962 case POSIX_FADV_SEQUENTIAL: 4963 case POSIX_FADV_RANDOM: 4964 case POSIX_FADV_NOREUSE: 4965 /* 4966 * Try to merge any existing non-standard region with 4967 * this new region if possible, otherwise create a new 4968 * non-standard region for this request. 4969 */ 4970 mtx_pool_lock(mtxpool_sleep, fp); 4971 fa = fp->f_advice; 4972 if (fa != NULL && fa->fa_advice == advice && 4973 ((fa->fa_start <= end && fa->fa_end >= offset) || 4974 (end != OFF_MAX && fa->fa_start == end + 1) || 4975 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4976 if (offset < fa->fa_start) 4977 fa->fa_start = offset; 4978 if (end > fa->fa_end) 4979 fa->fa_end = end; 4980 } else { 4981 new->fa_advice = advice; 4982 new->fa_start = offset; 4983 new->fa_end = end; 4984 fp->f_advice = new; 4985 new = fa; 4986 } 4987 mtx_pool_unlock(mtxpool_sleep, fp); 4988 break; 4989 case POSIX_FADV_NORMAL: 4990 /* 4991 * If a the "normal" region overlaps with an existing 4992 * non-standard region, trim or remove the 4993 * non-standard region. 4994 */ 4995 mtx_pool_lock(mtxpool_sleep, fp); 4996 fa = fp->f_advice; 4997 if (fa != NULL) { 4998 if (offset <= fa->fa_start && end >= fa->fa_end) { 4999 new = fa; 5000 fp->f_advice = NULL; 5001 } else if (offset <= fa->fa_start && 5002 end >= fa->fa_start) 5003 fa->fa_start = end + 1; 5004 else if (offset <= fa->fa_end && end >= fa->fa_end) 5005 fa->fa_end = offset - 1; 5006 else if (offset >= fa->fa_start && end <= fa->fa_end) { 5007 /* 5008 * If the "normal" region is a middle 5009 * portion of the existing 5010 * non-standard region, just remove 5011 * the whole thing rather than picking 5012 * one side or the other to 5013 * preserve. 5014 */ 5015 new = fa; 5016 fp->f_advice = NULL; 5017 } 5018 } 5019 mtx_pool_unlock(mtxpool_sleep, fp); 5020 break; 5021 case POSIX_FADV_WILLNEED: 5022 case POSIX_FADV_DONTNEED: 5023 error = VOP_ADVISE(vp, offset, end, advice); 5024 break; 5025 } 5026 out: 5027 if (fp != NULL) 5028 fdrop(fp, td); 5029 free(new, M_FADVISE); 5030 return (error); 5031 } 5032 5033 int 5034 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5035 { 5036 int error; 5037 5038 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5039 uap->advice); 5040 return (kern_posix_error(td, error)); 5041 } 5042 5043 int 5044 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 5045 off_t *outoffp, size_t len, unsigned int flags) 5046 { 5047 struct file *infp, *infp1, *outfp, *outfp1; 5048 struct vnode *invp, *outvp; 5049 int error; 5050 size_t retlen; 5051 void *rl_rcookie, *rl_wcookie; 5052 off_t inoff, outoff, savinoff, savoutoff; 5053 bool foffsets_locked, foffsets_set; 5054 5055 infp = outfp = NULL; 5056 rl_rcookie = rl_wcookie = NULL; 5057 foffsets_locked = false; 5058 foffsets_set = false; 5059 error = 0; 5060 retlen = 0; 5061 5062 if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { 5063 error = EINVAL; 5064 goto out; 5065 } 5066 if (len > SSIZE_MAX) 5067 /* 5068 * Although the len argument is size_t, the return argument 5069 * is ssize_t (which is signed). Therefore a size that won't 5070 * fit in ssize_t can't be returned. 5071 */ 5072 len = SSIZE_MAX; 5073 5074 /* Get the file structures for the file descriptors. */ 5075 error = fget_read(td, infd, 5076 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5077 if (error != 0) 5078 goto out; 5079 if (infp->f_ops == &badfileops) { 5080 error = EBADF; 5081 goto out; 5082 } 5083 if (infp->f_vnode == NULL) { 5084 error = EINVAL; 5085 goto out; 5086 } 5087 error = fget_write(td, outfd, 5088 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5089 if (error != 0) 5090 goto out; 5091 if (outfp->f_ops == &badfileops) { 5092 error = EBADF; 5093 goto out; 5094 } 5095 if (outfp->f_vnode == NULL) { 5096 error = EINVAL; 5097 goto out; 5098 } 5099 5100 /* 5101 * Figure out which file offsets we're reading from and writing to. 5102 * If the offsets come from the file descriptions, we need to lock them, 5103 * and locking both offsets requires a loop to avoid deadlocks. 5104 */ 5105 infp1 = outfp1 = NULL; 5106 if (inoffp != NULL) 5107 inoff = *inoffp; 5108 else 5109 infp1 = infp; 5110 if (outoffp != NULL) 5111 outoff = *outoffp; 5112 else 5113 outfp1 = outfp; 5114 if (infp1 != NULL || outfp1 != NULL) { 5115 if (infp1 == outfp1) { 5116 /* 5117 * Overlapping ranges are not allowed. A more thorough 5118 * check appears below, but we must not lock the same 5119 * offset twice. 5120 */ 5121 error = EINVAL; 5122 goto out; 5123 } 5124 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5125 foffsets_locked = true; 5126 } else { 5127 foffsets_set = true; 5128 } 5129 savinoff = inoff; 5130 savoutoff = outoff; 5131 5132 invp = infp->f_vnode; 5133 outvp = outfp->f_vnode; 5134 /* Sanity check the f_flag bits. */ 5135 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5136 (infp->f_flag & FREAD) == 0) { 5137 error = EBADF; 5138 goto out; 5139 } 5140 5141 /* If len == 0, just return 0. */ 5142 if (len == 0) 5143 goto out; 5144 5145 /* 5146 * Make sure that the ranges we check and lock below are valid. Note 5147 * that len is clamped to SSIZE_MAX above. 5148 */ 5149 if (inoff < 0 || outoff < 0) { 5150 error = EINVAL; 5151 goto out; 5152 } 5153 5154 /* 5155 * If infp and outfp refer to the same file, the byte ranges cannot 5156 * overlap. 5157 */ 5158 if (invp == outvp) { 5159 if ((inoff <= outoff && inoff + len > outoff) || 5160 (inoff > outoff && outoff + len > inoff)) { 5161 error = EINVAL; 5162 goto out; 5163 } 5164 rangelock_may_recurse(&invp->v_rl); 5165 } 5166 5167 /* Range lock the byte ranges for both invp and outvp. */ 5168 for (;;) { 5169 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5170 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5171 if (rl_rcookie != NULL) 5172 break; 5173 vn_rangelock_unlock(outvp, rl_wcookie); 5174 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5175 vn_rangelock_unlock(invp, rl_rcookie); 5176 } 5177 5178 retlen = len; 5179 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5180 flags, infp->f_cred, outfp->f_cred, td); 5181 out: 5182 if (rl_rcookie != NULL) 5183 vn_rangelock_unlock(invp, rl_rcookie); 5184 if (rl_wcookie != NULL) 5185 vn_rangelock_unlock(outvp, rl_wcookie); 5186 if ((foffsets_locked || foffsets_set) && 5187 (error == EINTR || error == ERESTART)) { 5188 inoff = savinoff; 5189 outoff = savoutoff; 5190 } 5191 if (foffsets_locked) { 5192 if (inoffp == NULL) 5193 foffset_unlock(infp, inoff, 0); 5194 else 5195 *inoffp = inoff; 5196 if (outoffp == NULL) 5197 foffset_unlock(outfp, outoff, 0); 5198 else 5199 *outoffp = outoff; 5200 } else if (foffsets_set) { 5201 *inoffp = inoff; 5202 *outoffp = outoff; 5203 } 5204 if (outfp != NULL) 5205 fdrop(outfp, td); 5206 if (infp != NULL) 5207 fdrop(infp, td); 5208 td->td_retval[0] = retlen; 5209 return (error); 5210 } 5211 5212 int 5213 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5214 { 5215 off_t inoff, outoff, *inoffp, *outoffp; 5216 int error; 5217 5218 inoffp = outoffp = NULL; 5219 if (uap->inoffp != NULL) { 5220 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5221 if (error != 0) 5222 return (error); 5223 inoffp = &inoff; 5224 } 5225 if (uap->outoffp != NULL) { 5226 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5227 if (error != 0) 5228 return (error); 5229 outoffp = &outoff; 5230 } 5231 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5232 outoffp, uap->len, uap->flags); 5233 if (error == 0 && uap->inoffp != NULL) 5234 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5235 if (error == 0 && uap->outoffp != NULL) 5236 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5237 return (error); 5238 } 5239