1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_VFSSYSCALL 41 #include <sys/systm.h> 42 #ifdef COMPAT_FREEBSD11 43 #include <sys/abi_compat.h> 44 #endif 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/capsicum.h> 48 #include <sys/disk.h> 49 #include <sys/dirent.h> 50 #include <sys/exterrvar.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/filio.h> 55 #include <sys/jail.h> 56 #include <sys/kernel.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/malloc.h> 63 #include <sys/mount.h> 64 #include <sys/mutex.h> 65 #include <sys/namei.h> 66 #include <sys/priv.h> 67 #include <sys/proc.h> 68 #include <sys/rwlock.h> 69 #include <sys/sdt.h> 70 #include <sys/stat.h> 71 #include <sys/stdarg.h> 72 #include <sys/sx.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #include <sys/sysproto.h> 76 #include <sys/unistd.h> 77 #include <sys/vnode.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vnode_pager.h> 86 #include <vm/uma.h> 87 88 #include <fs/devfs/devfs.h> 89 90 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 91 92 static int kern_chflagsat(struct thread *td, int fd, const char *path, 93 enum uio_seg pathseg, u_long flags, int atflag); 94 static int setfflags(struct thread *td, struct vnode *, u_long); 95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 96 static int getutimens(const struct timespec *, enum uio_seg, 97 struct timespec *, int *); 98 static int setutimes(struct thread *td, struct vnode *, 99 const struct timespec *, int, int); 100 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 101 struct thread *td); 102 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, fhandle_t *fhp); 104 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 105 size_t count, struct thread *td); 106 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 107 const char *path, enum uio_seg segflag); 108 109 uint64_t 110 at2cnpflags(u_int at_flags, u_int mask) 111 { 112 uint64_t res; 113 114 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 115 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 116 117 res = 0; 118 at_flags &= mask; 119 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 120 res |= RBENEATH; 121 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 122 res |= FOLLOW; 123 /* NOFOLLOW is pseudo flag */ 124 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 125 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 126 FOLLOW; 127 } 128 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 129 res |= EMPTYPATH; 130 return (res); 131 } 132 133 int 134 kern_sync(struct thread *td) 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_periodic(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Sync each mounted filesystem. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct sync_args { 166 int dummy; 167 }; 168 #endif 169 /* ARGSUSED */ 170 int 171 sys_sync(struct thread *td, struct sync_args *uap) 172 { 173 174 return (kern_sync(td)); 175 } 176 177 /* 178 * Change filesystem quotas. 179 */ 180 #ifndef _SYS_SYSPROTO_H_ 181 struct quotactl_args { 182 char *path; 183 int cmd; 184 int uid; 185 caddr_t arg; 186 }; 187 #endif 188 int 189 sys_quotactl(struct thread *td, struct quotactl_args *uap) 190 { 191 struct mount *mp; 192 struct nameidata nd; 193 int error; 194 bool mp_busy; 195 196 AUDIT_ARG_CMD(uap->cmd); 197 AUDIT_ARG_UID(uap->uid); 198 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 199 return (EPERM); 200 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 201 uap->path); 202 if ((error = namei(&nd)) != 0) 203 return (error); 204 NDFREE_PNBUF(&nd); 205 mp = nd.ni_vp->v_mount; 206 vfs_ref(mp); 207 vput(nd.ni_vp); 208 error = vfs_busy(mp, 0); 209 if (error != 0) { 210 vfs_rel(mp); 211 return (error); 212 } 213 mp_busy = true; 214 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 215 216 /* 217 * Since quota on/off operations typically need to open quota 218 * files, the implementation may need to unbusy the mount point 219 * before calling into namei. Otherwise, unmount might be 220 * started between two vfs_busy() invocations (first is ours, 221 * second is from mount point cross-walk code in lookup()), 222 * causing deadlock. 223 * 224 * Avoid unbusying mp if the implementation indicates it has 225 * already done so. 226 */ 227 if (mp_busy) 228 vfs_unbusy(mp); 229 vfs_rel(mp); 230 return (error); 231 } 232 233 /* 234 * Used by statfs conversion routines to scale the block size up if 235 * necessary so that all of the block counts are <= 'max_size'. Note 236 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 237 * value of 'n'. 238 */ 239 void 240 statfs_scale_blocks(struct statfs *sf, long max_size) 241 { 242 uint64_t count; 243 int shift; 244 245 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 246 247 /* 248 * Attempt to scale the block counts to give a more accurate 249 * overview to userland of the ratio of free space to used 250 * space. To do this, find the largest block count and compute 251 * a divisor that lets it fit into a signed integer <= max_size. 252 */ 253 if (sf->f_bavail < 0) 254 count = -sf->f_bavail; 255 else 256 count = sf->f_bavail; 257 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 258 if (count <= max_size) 259 return; 260 261 count >>= flsl(max_size); 262 shift = 0; 263 while (count > 0) { 264 shift++; 265 count >>=1; 266 } 267 268 sf->f_bsize <<= shift; 269 sf->f_blocks >>= shift; 270 sf->f_bfree >>= shift; 271 sf->f_bavail >>= shift; 272 } 273 274 static int 275 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 276 { 277 int error; 278 279 if (mp == NULL) 280 return (EBADF); 281 error = vfs_busy(mp, 0); 282 vfs_rel(mp); 283 if (error != 0) 284 return (error); 285 #ifdef MAC 286 error = mac_mount_check_stat(td->td_ucred, mp); 287 if (error != 0) 288 goto out; 289 #endif 290 error = VFS_STATFS(mp, buf); 291 if (error != 0) 292 goto out; 293 if (priv_check_cred_vfs_generation(td->td_ucred)) 294 prison_enforce_statfs(td->td_ucred, mp, buf); 295 out: 296 vfs_unbusy(mp); 297 return (error); 298 } 299 300 /* 301 * Get filesystem statistics. 302 */ 303 #ifndef _SYS_SYSPROTO_H_ 304 struct statfs_args { 305 char *path; 306 struct statfs *buf; 307 }; 308 #endif 309 int 310 sys_statfs(struct thread *td, struct statfs_args *uap) 311 { 312 struct statfs *sfp; 313 int error; 314 315 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 316 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 317 if (error == 0) 318 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 319 free(sfp, M_STATFS); 320 return (error); 321 } 322 323 int 324 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 325 struct statfs *buf) 326 { 327 struct mount *mp; 328 struct nameidata nd; 329 int error; 330 331 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 332 error = namei(&nd); 333 if (error != 0) 334 return (error); 335 NDFREE_PNBUF(&nd); 336 mp = vfs_ref_from_vp(nd.ni_vp); 337 vrele(nd.ni_vp); 338 return (kern_do_statfs(td, mp, buf)); 339 } 340 341 /* 342 * Get filesystem statistics. 343 */ 344 #ifndef _SYS_SYSPROTO_H_ 345 struct fstatfs_args { 346 int fd; 347 struct statfs *buf; 348 }; 349 #endif 350 int 351 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 352 { 353 struct statfs *sfp; 354 int error; 355 356 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 357 error = kern_fstatfs(td, uap->fd, sfp); 358 if (error == 0) 359 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 360 free(sfp, M_STATFS); 361 return (error); 362 } 363 364 int 365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 366 { 367 struct file *fp; 368 struct mount *mp; 369 struct vnode *vp; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp); 374 if (error != 0) 375 return (error); 376 vp = fp->f_vnode; 377 #ifdef AUDIT 378 if (AUDITING_TD(td)) { 379 vn_lock(vp, LK_SHARED | LK_RETRY); 380 AUDIT_ARG_VNODE1(vp); 381 VOP_UNLOCK(vp); 382 } 383 #endif 384 mp = vfs_ref_from_vp(vp); 385 fdrop(fp, td); 386 return (kern_do_statfs(td, mp, buf)); 387 } 388 389 /* 390 * Get statistics on all filesystems. 391 */ 392 #ifndef _SYS_SYSPROTO_H_ 393 struct getfsstat_args { 394 struct statfs *buf; 395 long bufsize; 396 int mode; 397 }; 398 #endif 399 int 400 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 401 { 402 size_t count; 403 int error; 404 405 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 406 return (EINVAL); 407 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 408 UIO_USERSPACE, uap->mode); 409 if (error == 0) 410 td->td_retval[0] = count; 411 return (error); 412 } 413 414 /* 415 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 416 * The caller is responsible for freeing memory which will be allocated 417 * in '*buf'. 418 */ 419 int 420 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 421 size_t *countp, enum uio_seg bufseg, int mode) 422 { 423 struct mount *mp, *nmp; 424 struct statfs *sfsp, *sp, *sptmp, *tofree; 425 size_t count, maxcount; 426 int error; 427 428 switch (mode) { 429 case MNT_WAIT: 430 case MNT_NOWAIT: 431 break; 432 default: 433 if (bufseg == UIO_SYSSPACE) 434 *buf = NULL; 435 return (EINVAL); 436 } 437 restart: 438 maxcount = bufsize / sizeof(struct statfs); 439 if (bufsize == 0) { 440 sfsp = NULL; 441 tofree = NULL; 442 } else if (bufseg == UIO_USERSPACE) { 443 sfsp = *buf; 444 tofree = NULL; 445 } else /* if (bufseg == UIO_SYSSPACE) */ { 446 count = 0; 447 mtx_lock(&mountlist_mtx); 448 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 449 count++; 450 } 451 mtx_unlock(&mountlist_mtx); 452 if (maxcount > count) 453 maxcount = count; 454 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 455 M_STATFS, M_WAITOK); 456 } 457 458 count = 0; 459 460 /* 461 * If there is no target buffer they only want the count. 462 * 463 * This could be TAILQ_FOREACH but it is open-coded to match the original 464 * code below. 465 */ 466 if (sfsp == NULL) { 467 mtx_lock(&mountlist_mtx); 468 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 469 if (prison_canseemount(td->td_ucred, mp) != 0) { 470 nmp = TAILQ_NEXT(mp, mnt_list); 471 continue; 472 } 473 #ifdef MAC 474 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 475 nmp = TAILQ_NEXT(mp, mnt_list); 476 continue; 477 } 478 #endif 479 count++; 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 } 482 mtx_unlock(&mountlist_mtx); 483 *countp = count; 484 return (0); 485 } 486 487 /* 488 * They want the entire thing. 489 * 490 * Short-circuit the corner case of no room for anything, avoids 491 * relocking below. 492 */ 493 if (maxcount < 1) { 494 goto out; 495 } 496 497 mtx_lock(&mountlist_mtx); 498 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 499 if (prison_canseemount(td->td_ucred, mp) != 0) { 500 nmp = TAILQ_NEXT(mp, mnt_list); 501 continue; 502 } 503 #ifdef MAC 504 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 continue; 507 } 508 #endif 509 if (mode == MNT_WAIT) { 510 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 511 /* 512 * If vfs_busy() failed, and MBF_NOWAIT 513 * wasn't passed, then the mp is gone. 514 * Furthermore, because of MBF_MNTLSTLOCK, 515 * the mountlist_mtx was dropped. We have 516 * no other choice than to start over. 517 */ 518 mtx_unlock(&mountlist_mtx); 519 free(tofree, M_STATFS); 520 goto restart; 521 } 522 } else { 523 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 524 nmp = TAILQ_NEXT(mp, mnt_list); 525 continue; 526 } 527 } 528 sp = &mp->mnt_stat; 529 /* 530 * If MNT_NOWAIT is specified, do not refresh 531 * the fsstat cache. 532 */ 533 if (mode != MNT_NOWAIT) { 534 error = VFS_STATFS(mp, sp); 535 if (error != 0) { 536 mtx_lock(&mountlist_mtx); 537 nmp = TAILQ_NEXT(mp, mnt_list); 538 vfs_unbusy(mp); 539 continue; 540 } 541 } 542 if (priv_check_cred_vfs_generation(td->td_ucred)) { 543 sptmp = malloc(sizeof(struct statfs), M_STATFS, 544 M_WAITOK); 545 *sptmp = *sp; 546 prison_enforce_statfs(td->td_ucred, mp, sptmp); 547 sp = sptmp; 548 } else 549 sptmp = NULL; 550 if (bufseg == UIO_SYSSPACE) { 551 bcopy(sp, sfsp, sizeof(*sp)); 552 free(sptmp, M_STATFS); 553 } else /* if (bufseg == UIO_USERSPACE) */ { 554 error = copyout(sp, sfsp, sizeof(*sp)); 555 free(sptmp, M_STATFS); 556 if (error != 0) { 557 vfs_unbusy(mp); 558 return (error); 559 } 560 } 561 sfsp++; 562 count++; 563 564 if (count == maxcount) { 565 vfs_unbusy(mp); 566 goto out; 567 } 568 569 mtx_lock(&mountlist_mtx); 570 nmp = TAILQ_NEXT(mp, mnt_list); 571 vfs_unbusy(mp); 572 } 573 mtx_unlock(&mountlist_mtx); 574 out: 575 *countp = count; 576 return (0); 577 } 578 579 #ifdef COMPAT_FREEBSD4 580 /* 581 * Get old format filesystem statistics. 582 */ 583 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct freebsd4_statfs_args { 587 char *path; 588 struct ostatfs *buf; 589 }; 590 #endif 591 int 592 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 593 { 594 struct ostatfs osb; 595 struct statfs *sfp; 596 int error; 597 598 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 599 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 600 if (error == 0) { 601 freebsd4_cvtstatfs(sfp, &osb); 602 error = copyout(&osb, uap->buf, sizeof(osb)); 603 } 604 free(sfp, M_STATFS); 605 return (error); 606 } 607 608 /* 609 * Get filesystem statistics. 610 */ 611 #ifndef _SYS_SYSPROTO_H_ 612 struct freebsd4_fstatfs_args { 613 int fd; 614 struct ostatfs *buf; 615 }; 616 #endif 617 int 618 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 619 { 620 struct ostatfs osb; 621 struct statfs *sfp; 622 int error; 623 624 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 625 error = kern_fstatfs(td, uap->fd, sfp); 626 if (error == 0) { 627 freebsd4_cvtstatfs(sfp, &osb); 628 error = copyout(&osb, uap->buf, sizeof(osb)); 629 } 630 free(sfp, M_STATFS); 631 return (error); 632 } 633 634 /* 635 * Get statistics on all filesystems. 636 */ 637 #ifndef _SYS_SYSPROTO_H_ 638 struct freebsd4_getfsstat_args { 639 struct ostatfs *buf; 640 long bufsize; 641 int mode; 642 }; 643 #endif 644 int 645 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 646 { 647 struct statfs *buf, *sp; 648 struct ostatfs osb; 649 size_t count, size; 650 int error; 651 652 if (uap->bufsize < 0) 653 return (EINVAL); 654 count = uap->bufsize / sizeof(struct ostatfs); 655 if (count > SIZE_MAX / sizeof(struct statfs)) 656 return (EINVAL); 657 size = count * sizeof(struct statfs); 658 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 659 uap->mode); 660 if (error == 0) 661 td->td_retval[0] = count; 662 if (size != 0) { 663 sp = buf; 664 while (count != 0 && error == 0) { 665 freebsd4_cvtstatfs(sp, &osb); 666 error = copyout(&osb, uap->buf, sizeof(osb)); 667 sp++; 668 uap->buf++; 669 count--; 670 } 671 free(buf, M_STATFS); 672 } 673 return (error); 674 } 675 676 /* 677 * Implement fstatfs() for (NFS) file handles. 678 */ 679 #ifndef _SYS_SYSPROTO_H_ 680 struct freebsd4_fhstatfs_args { 681 struct fhandle *u_fhp; 682 struct ostatfs *buf; 683 }; 684 #endif 685 int 686 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 687 { 688 struct ostatfs osb; 689 struct statfs *sfp; 690 fhandle_t fh; 691 int error; 692 693 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 694 if (error != 0) 695 return (error); 696 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 697 error = kern_fhstatfs(td, fh, sfp); 698 if (error == 0) { 699 freebsd4_cvtstatfs(sfp, &osb); 700 error = copyout(&osb, uap->buf, sizeof(osb)); 701 } 702 free(sfp, M_STATFS); 703 return (error); 704 } 705 706 /* 707 * Convert a new format statfs structure to an old format statfs structure. 708 */ 709 static void 710 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 711 { 712 713 statfs_scale_blocks(nsp, LONG_MAX); 714 bzero(osp, sizeof(*osp)); 715 osp->f_bsize = nsp->f_bsize; 716 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 717 osp->f_blocks = nsp->f_blocks; 718 osp->f_bfree = nsp->f_bfree; 719 osp->f_bavail = nsp->f_bavail; 720 osp->f_files = MIN(nsp->f_files, LONG_MAX); 721 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 722 osp->f_owner = nsp->f_owner; 723 osp->f_type = nsp->f_type; 724 osp->f_flags = nsp->f_flags; 725 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 726 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 727 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 728 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 729 strlcpy(osp->f_fstypename, nsp->f_fstypename, 730 MIN(MFSNAMELEN, OMFSNAMELEN)); 731 strlcpy(osp->f_mntonname, nsp->f_mntonname, 732 MIN(MNAMELEN, OMNAMELEN)); 733 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 734 MIN(MNAMELEN, OMNAMELEN)); 735 osp->f_fsid = nsp->f_fsid; 736 } 737 #endif /* COMPAT_FREEBSD4 */ 738 739 #if defined(COMPAT_FREEBSD11) 740 /* 741 * Get old format filesystem statistics. 742 */ 743 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 744 745 int 746 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 747 { 748 struct freebsd11_statfs osb; 749 struct statfs *sfp; 750 int error; 751 752 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 753 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 754 if (error == 0) { 755 freebsd11_cvtstatfs(sfp, &osb); 756 error = copyout(&osb, uap->buf, sizeof(osb)); 757 } 758 free(sfp, M_STATFS); 759 return (error); 760 } 761 762 /* 763 * Get filesystem statistics. 764 */ 765 int 766 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 767 { 768 struct freebsd11_statfs osb; 769 struct statfs *sfp; 770 int error; 771 772 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 773 error = kern_fstatfs(td, uap->fd, sfp); 774 if (error == 0) { 775 freebsd11_cvtstatfs(sfp, &osb); 776 error = copyout(&osb, uap->buf, sizeof(osb)); 777 } 778 free(sfp, M_STATFS); 779 return (error); 780 } 781 782 /* 783 * Get statistics on all filesystems. 784 */ 785 int 786 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 787 { 788 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 789 } 790 791 int 792 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 793 long bufsize, int mode) 794 { 795 struct freebsd11_statfs osb; 796 struct statfs *buf, *sp; 797 size_t count, size; 798 int error; 799 800 if (bufsize < 0) 801 return (EINVAL); 802 803 count = bufsize / sizeof(struct ostatfs); 804 size = count * sizeof(struct statfs); 805 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 806 if (error == 0) 807 td->td_retval[0] = count; 808 if (size > 0) { 809 sp = buf; 810 while (count > 0 && error == 0) { 811 freebsd11_cvtstatfs(sp, &osb); 812 error = copyout(&osb, ubuf, sizeof(osb)); 813 sp++; 814 ubuf++; 815 count--; 816 } 817 free(buf, M_STATFS); 818 } 819 return (error); 820 } 821 822 /* 823 * Implement fstatfs() for (NFS) file handles. 824 */ 825 int 826 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 827 { 828 struct freebsd11_statfs osb; 829 struct statfs *sfp; 830 fhandle_t fh; 831 int error; 832 833 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 834 if (error) 835 return (error); 836 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 837 error = kern_fhstatfs(td, fh, sfp); 838 if (error == 0) { 839 freebsd11_cvtstatfs(sfp, &osb); 840 error = copyout(&osb, uap->buf, sizeof(osb)); 841 } 842 free(sfp, M_STATFS); 843 return (error); 844 } 845 846 /* 847 * Convert a new format statfs structure to an old format statfs structure. 848 */ 849 static void 850 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 851 { 852 853 bzero(osp, sizeof(*osp)); 854 osp->f_version = FREEBSD11_STATFS_VERSION; 855 osp->f_type = nsp->f_type; 856 osp->f_flags = nsp->f_flags; 857 osp->f_bsize = nsp->f_bsize; 858 osp->f_iosize = nsp->f_iosize; 859 osp->f_blocks = nsp->f_blocks; 860 osp->f_bfree = nsp->f_bfree; 861 osp->f_bavail = nsp->f_bavail; 862 osp->f_files = nsp->f_files; 863 osp->f_ffree = nsp->f_ffree; 864 osp->f_syncwrites = nsp->f_syncwrites; 865 osp->f_asyncwrites = nsp->f_asyncwrites; 866 osp->f_syncreads = nsp->f_syncreads; 867 osp->f_asyncreads = nsp->f_asyncreads; 868 osp->f_namemax = nsp->f_namemax; 869 osp->f_owner = nsp->f_owner; 870 osp->f_fsid = nsp->f_fsid; 871 strlcpy(osp->f_fstypename, nsp->f_fstypename, 872 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 873 strlcpy(osp->f_mntonname, nsp->f_mntonname, 874 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 875 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 876 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 877 } 878 #endif /* COMPAT_FREEBSD11 */ 879 880 /* 881 * Change current working directory to a given file descriptor. 882 */ 883 #ifndef _SYS_SYSPROTO_H_ 884 struct fchdir_args { 885 int fd; 886 }; 887 #endif 888 int 889 sys_fchdir(struct thread *td, struct fchdir_args *uap) 890 { 891 struct vnode *vp, *tdp; 892 struct mount *mp; 893 struct file *fp; 894 int error; 895 uint8_t fdflags; 896 897 AUDIT_ARG_FD(uap->fd); 898 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags, 899 &fp); 900 if (error != 0) 901 return (error); 902 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 903 fdrop(fp, td); 904 return (ENOTCAPABLE); 905 } 906 vp = fp->f_vnode; 907 vrefact(vp); 908 fdrop(fp, td); 909 vn_lock(vp, LK_SHARED | LK_RETRY); 910 AUDIT_ARG_VNODE1(vp); 911 error = change_dir(vp, td); 912 while (!error && (mp = vp->v_mountedhere) != NULL) { 913 if (vfs_busy(mp, 0)) 914 continue; 915 error = VFS_ROOT(mp, LK_SHARED, &tdp); 916 vfs_unbusy(mp); 917 if (error != 0) 918 break; 919 vput(vp); 920 vp = tdp; 921 } 922 if (error != 0) { 923 vput(vp); 924 return (error); 925 } 926 VOP_UNLOCK(vp); 927 pwd_chdir(td, vp); 928 return (0); 929 } 930 931 /* 932 * Change current working directory (``.''). 933 */ 934 #ifndef _SYS_SYSPROTO_H_ 935 struct chdir_args { 936 char *path; 937 }; 938 #endif 939 int 940 sys_chdir(struct thread *td, struct chdir_args *uap) 941 { 942 943 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 944 } 945 946 int 947 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 948 { 949 struct nameidata nd; 950 int error; 951 952 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 953 pathseg, path); 954 if ((error = namei(&nd)) != 0) 955 return (error); 956 if ((error = change_dir(nd.ni_vp, td)) != 0) { 957 vput(nd.ni_vp); 958 NDFREE_PNBUF(&nd); 959 return (error); 960 } 961 VOP_UNLOCK(nd.ni_vp); 962 NDFREE_PNBUF(&nd); 963 pwd_chdir(td, nd.ni_vp); 964 return (0); 965 } 966 967 static int unprivileged_chroot = 0; 968 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 969 &unprivileged_chroot, 0, 970 "Unprivileged processes can use chroot(2)"); 971 972 /* 973 * Takes locked vnode, unlocks it before returning. 974 */ 975 static int 976 kern_chroot(struct thread *td, struct vnode *vp) 977 { 978 struct proc *p; 979 int error; 980 981 error = priv_check(td, PRIV_VFS_CHROOT); 982 if (error != 0) { 983 p = td->td_proc; 984 if (unprivileged_chroot == 0) { 985 error = EXTERROR(EPERM, 986 "security.bsd.unprivileged_chroot sysctl not enabled"); 987 goto e_vunlock; 988 } 989 if ((p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 990 error = EXTERROR(EPERM, 991 "PROC_NO_NEW_PRIVS not enabled"); 992 goto e_vunlock; 993 } 994 } 995 996 error = change_dir(vp, td); 997 if (error != 0) 998 goto e_vunlock; 999 #ifdef MAC 1000 error = mac_vnode_check_chroot(td->td_ucred, vp); 1001 if (error != 0) 1002 goto e_vunlock; 1003 #endif 1004 VOP_UNLOCK(vp); 1005 error = pwd_chroot(td, vp); 1006 vrele(vp); 1007 return (error); 1008 e_vunlock: 1009 vput(vp); 1010 return (error); 1011 } 1012 1013 /* 1014 * Change notion of root (``/'') directory. 1015 */ 1016 #ifndef _SYS_SYSPROTO_H_ 1017 struct chroot_args { 1018 char *path; 1019 }; 1020 #endif 1021 int 1022 sys_chroot(struct thread *td, struct chroot_args *uap) 1023 { 1024 struct nameidata nd; 1025 int error; 1026 1027 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1028 UIO_USERSPACE, uap->path); 1029 error = namei(&nd); 1030 if (error != 0) 1031 return (error); 1032 NDFREE_PNBUF(&nd); 1033 error = kern_chroot(td, nd.ni_vp); 1034 return (error); 1035 } 1036 1037 /* 1038 * Change notion of root directory to a given file descriptor. 1039 */ 1040 #ifndef _SYS_SYSPROTO_H_ 1041 struct fchroot_args { 1042 int fd; 1043 }; 1044 #endif 1045 int 1046 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1047 { 1048 struct vnode *vp; 1049 struct file *fp; 1050 int error; 1051 uint8_t fdflags; 1052 1053 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp); 1054 if (error != 0) 1055 return (error); 1056 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 1057 fdrop(fp, td); 1058 return (ENOTCAPABLE); 1059 } 1060 vp = fp->f_vnode; 1061 vrefact(vp); 1062 fdrop(fp, td); 1063 vn_lock(vp, LK_SHARED | LK_RETRY); 1064 error = kern_chroot(td, vp); 1065 return (error); 1066 } 1067 1068 /* 1069 * Common routine for chroot and chdir. Callers must provide a locked vnode 1070 * instance. 1071 */ 1072 int 1073 change_dir(struct vnode *vp, struct thread *td) 1074 { 1075 #ifdef MAC 1076 int error; 1077 #endif 1078 1079 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1080 if (vp->v_type != VDIR) 1081 return (ENOTDIR); 1082 #ifdef MAC 1083 error = mac_vnode_check_chdir(td->td_ucred, vp); 1084 if (error != 0) 1085 return (error); 1086 #endif 1087 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1088 } 1089 1090 static __inline void 1091 flags_to_rights(int flags, cap_rights_t *rightsp) 1092 { 1093 if (flags & O_EXEC) { 1094 cap_rights_set_one(rightsp, CAP_FEXECVE); 1095 if (flags & O_PATH) 1096 return; 1097 } else { 1098 switch ((flags & O_ACCMODE)) { 1099 case O_RDONLY: 1100 cap_rights_set_one(rightsp, CAP_READ); 1101 break; 1102 case O_RDWR: 1103 cap_rights_set_one(rightsp, CAP_READ); 1104 /* FALLTHROUGH */ 1105 case O_WRONLY: 1106 cap_rights_set_one(rightsp, CAP_WRITE); 1107 if (!(flags & (O_APPEND | O_TRUNC))) 1108 cap_rights_set_one(rightsp, CAP_SEEK); 1109 break; 1110 } 1111 } 1112 1113 if (flags & O_CREAT) 1114 cap_rights_set_one(rightsp, CAP_CREATE); 1115 1116 if (flags & O_TRUNC) 1117 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1118 1119 if (flags & (O_SYNC | O_FSYNC | O_DSYNC)) 1120 cap_rights_set_one(rightsp, CAP_FSYNC); 1121 1122 if (flags & (O_EXLOCK | O_SHLOCK)) 1123 cap_rights_set_one(rightsp, CAP_FLOCK); 1124 } 1125 1126 /* 1127 * Check permissions, allocate an open file structure, and call the device 1128 * open routine if any. 1129 */ 1130 #ifndef _SYS_SYSPROTO_H_ 1131 struct open_args { 1132 char *path; 1133 int flags; 1134 int mode; 1135 }; 1136 #endif 1137 int 1138 sys_open(struct thread *td, struct open_args *uap) 1139 { 1140 1141 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1142 uap->flags, uap->mode)); 1143 } 1144 1145 #ifndef _SYS_SYSPROTO_H_ 1146 struct openat_args { 1147 int fd; 1148 char *path; 1149 int flag; 1150 int mode; 1151 }; 1152 #endif 1153 int 1154 sys_openat(struct thread *td, struct openat_args *uap) 1155 { 1156 1157 AUDIT_ARG_FD(uap->fd); 1158 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1159 uap->mode)); 1160 } 1161 1162 /* 1163 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1164 * in-kernel representations (FREAD etc.). 1165 */ 1166 static int 1167 openflags(int *flagsp) 1168 { 1169 int flags; 1170 1171 flags = *flagsp; 1172 if ((flags & ~FUSERALLOWED) != 0) 1173 return (EINVAL); 1174 1175 /* 1176 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1177 * may be specified. On the other hand, for O_PATH any mode 1178 * except O_EXEC is ignored. 1179 */ 1180 if ((flags & O_PATH) != 0) { 1181 flags &= ~O_ACCMODE; 1182 } else if ((flags & O_EXEC) != 0) { 1183 if ((flags & O_ACCMODE) != 0) 1184 return (EINVAL); 1185 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1186 return (EINVAL); 1187 } else { 1188 flags = FFLAGS(flags); 1189 } 1190 *flagsp = flags; 1191 return (0); 1192 } 1193 1194 static void 1195 finit_open(struct file *fp, struct vnode *vp, int flags) 1196 { 1197 /* 1198 * Store the vnode, for any f_type. Typically, the vnode use count is 1199 * decremented by a direct call to vnops.fo_close() for files that 1200 * switched type. 1201 */ 1202 fp->f_vnode = vp; 1203 1204 /* 1205 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1206 * vnode operations here. 1207 */ 1208 if (fp->f_ops == &badfileops) { 1209 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1210 ("Unexpected fifo fp %p vp %p", fp, vp)); 1211 if ((flags & O_PATH) != 0) { 1212 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1213 DTYPE_VNODE, NULL, &path_fileops); 1214 } else { 1215 finit_vnode(fp, flags, NULL, &vnops); 1216 } 1217 } 1218 } 1219 1220 /* 1221 * If fpp != NULL, opened file is not installed into the file 1222 * descriptor table, instead it is returned in *fpp. This is 1223 * incompatible with fdopen(), in which case we return EINVAL. 1224 */ 1225 static int 1226 openatfp(struct thread *td, int dirfd, const char *path, 1227 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1228 { 1229 struct proc *p; 1230 struct filedesc *fdp; 1231 struct pwddesc *pdp; 1232 struct file *fp; 1233 struct vnode *vp; 1234 struct filecaps *fcaps; 1235 struct nameidata nd; 1236 cap_rights_t rights; 1237 int cmode, error, indx; 1238 1239 indx = -1; 1240 p = td->td_proc; 1241 fdp = p->p_fd; 1242 pdp = p->p_pd; 1243 1244 AUDIT_ARG_FFLAGS(flags); 1245 AUDIT_ARG_MODE(mode); 1246 cap_rights_init_one(&rights, CAP_LOOKUP); 1247 flags_to_rights(flags, &rights); 1248 1249 error = openflags(&flags); 1250 if (error != 0) 1251 return (error); 1252 1253 /* 1254 * Allocate a file structure. The descriptor to reference it 1255 * is allocated and used by finstall_refed() below. 1256 */ 1257 error = falloc_noinstall(td, &fp); 1258 if (error != 0) 1259 return (error); 1260 /* Set the flags early so the finit in devfs can pick them up. */ 1261 fp->f_flag = flags & FMASK; 1262 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1263 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1264 pathseg, path, dirfd, &rights); 1265 td->td_dupfd = -1; /* XXX check for fdopen */ 1266 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1267 td->td_ucred, fp); 1268 if (error != 0) { 1269 /* 1270 * If the vn_open replaced the method vector, something 1271 * wonderous happened deep below and we just pass it up 1272 * pretending we know what we do. 1273 */ 1274 if (error == ENXIO && fp->f_ops != &badfileops) { 1275 MPASS((flags & O_PATH) == 0); 1276 goto success; 1277 } 1278 1279 /* 1280 * Handle special fdopen() case. bleh. 1281 * 1282 * Don't do this for relative (capability) lookups; we don't 1283 * understand exactly what would happen, and we don't think 1284 * that it ever should. 1285 */ 1286 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1287 (error == ENODEV || error == ENXIO) && 1288 td->td_dupfd >= 0) { 1289 MPASS(fpp == NULL); 1290 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1291 &indx); 1292 if (error == 0) 1293 goto success; 1294 } 1295 1296 goto bad; 1297 } 1298 td->td_dupfd = 0; 1299 NDFREE_PNBUF(&nd); 1300 vp = nd.ni_vp; 1301 1302 finit_open(fp, vp, flags); 1303 VOP_UNLOCK(vp); 1304 if (flags & O_TRUNC) { 1305 error = fo_truncate(fp, 0, td->td_ucred, td); 1306 if (error != 0) 1307 goto bad; 1308 } 1309 success: 1310 if (fpp != NULL) { 1311 MPASS(error == 0); 1312 NDFREE_IOCTLCAPS(&nd); 1313 *fpp = fp; 1314 return (0); 1315 } 1316 1317 /* 1318 * If we haven't already installed the FD (for dupfdopen), do so now. 1319 */ 1320 if (indx == -1) { 1321 #ifdef CAPABILITIES 1322 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1323 fcaps = &nd.ni_filecaps; 1324 else 1325 #endif 1326 fcaps = NULL; 1327 if ((nd.ni_resflags & NIRES_BENEATH) != 0) 1328 flags |= O_RESOLVE_BENEATH; 1329 else 1330 flags &= ~O_RESOLVE_BENEATH; 1331 error = finstall_refed(td, fp, &indx, flags, fcaps); 1332 /* On success finstall_refed() consumes fcaps. */ 1333 if (error != 0) { 1334 goto bad; 1335 } 1336 } else { 1337 NDFREE_IOCTLCAPS(&nd); 1338 falloc_abort(td, fp); 1339 } 1340 1341 td->td_retval[0] = indx; 1342 return (0); 1343 bad: 1344 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1345 NDFREE_IOCTLCAPS(&nd); 1346 falloc_abort(td, fp); 1347 return (error); 1348 } 1349 1350 int 1351 kern_openat(struct thread *td, int dirfd, const char *path, 1352 enum uio_seg pathseg, int flags, int mode) 1353 { 1354 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1355 } 1356 1357 int 1358 kern_openatfp(struct thread *td, int dirfd, const char *path, 1359 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1360 { 1361 int error, old_dupfd; 1362 1363 old_dupfd = td->td_dupfd; 1364 td->td_dupfd = -1; 1365 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1366 td->td_dupfd = old_dupfd; 1367 return (error); 1368 } 1369 1370 #ifdef COMPAT_43 1371 /* 1372 * Create a file. 1373 */ 1374 #ifndef _SYS_SYSPROTO_H_ 1375 struct ocreat_args { 1376 char *path; 1377 int mode; 1378 }; 1379 #endif 1380 int 1381 ocreat(struct thread *td, struct ocreat_args *uap) 1382 { 1383 1384 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1385 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1386 } 1387 #endif /* COMPAT_43 */ 1388 1389 /* 1390 * Create a special file. 1391 */ 1392 #ifndef _SYS_SYSPROTO_H_ 1393 struct mknodat_args { 1394 int fd; 1395 char *path; 1396 mode_t mode; 1397 dev_t dev; 1398 }; 1399 #endif 1400 int 1401 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1402 { 1403 1404 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1405 uap->dev)); 1406 } 1407 1408 #if defined(COMPAT_FREEBSD11) 1409 int 1410 freebsd11_mknod(struct thread *td, 1411 struct freebsd11_mknod_args *uap) 1412 { 1413 1414 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1415 uap->mode, uap->dev)); 1416 } 1417 1418 int 1419 freebsd11_mknodat(struct thread *td, 1420 struct freebsd11_mknodat_args *uap) 1421 { 1422 1423 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1424 uap->dev)); 1425 } 1426 #endif /* COMPAT_FREEBSD11 */ 1427 1428 int 1429 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1430 int mode, dev_t dev) 1431 { 1432 struct vnode *vp; 1433 struct mount *mp; 1434 struct vattr vattr; 1435 struct nameidata nd; 1436 int error, whiteout = 0; 1437 1438 AUDIT_ARG_MODE(mode); 1439 AUDIT_ARG_DEV(dev); 1440 switch (mode & S_IFMT) { 1441 case S_IFCHR: 1442 case S_IFBLK: 1443 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1444 if (error == 0 && dev == VNOVAL) 1445 error = EINVAL; 1446 break; 1447 case S_IFWHT: 1448 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1449 break; 1450 case S_IFIFO: 1451 if (dev == 0) 1452 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1453 /* FALLTHROUGH */ 1454 default: 1455 error = EINVAL; 1456 break; 1457 } 1458 if (error != 0) 1459 return (error); 1460 NDPREINIT(&nd); 1461 restart: 1462 bwillwrite(); 1463 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1464 pathseg, path, fd, &cap_mknodat_rights); 1465 if ((error = namei(&nd)) != 0) 1466 return (error); 1467 vp = nd.ni_vp; 1468 if (vp != NULL) { 1469 NDFREE_PNBUF(&nd); 1470 if (vp == nd.ni_dvp) 1471 vrele(nd.ni_dvp); 1472 else 1473 vput(nd.ni_dvp); 1474 vrele(vp); 1475 return (EEXIST); 1476 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1477 NDFREE_PNBUF(&nd); 1478 vput(nd.ni_dvp); 1479 return (EINVAL); 1480 } else { 1481 VATTR_NULL(&vattr); 1482 vattr.va_mode = (mode & ALLPERMS) & 1483 ~td->td_proc->p_pd->pd_cmask; 1484 vattr.va_rdev = dev; 1485 whiteout = 0; 1486 1487 switch (mode & S_IFMT) { 1488 case S_IFCHR: 1489 vattr.va_type = VCHR; 1490 break; 1491 case S_IFBLK: 1492 vattr.va_type = VBLK; 1493 break; 1494 case S_IFWHT: 1495 whiteout = 1; 1496 break; 1497 default: 1498 panic("kern_mknod: invalid mode"); 1499 } 1500 } 1501 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1502 NDFREE_PNBUF(&nd); 1503 vput(nd.ni_dvp); 1504 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1505 return (error); 1506 goto restart; 1507 } 1508 #ifdef MAC 1509 if (error == 0 && !whiteout) 1510 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1511 &nd.ni_cnd, &vattr); 1512 #endif 1513 if (error == 0) { 1514 if (whiteout) 1515 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1516 else { 1517 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1518 &nd.ni_cnd, &vattr); 1519 } 1520 } 1521 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1522 true); 1523 vn_finished_write(mp); 1524 NDFREE_PNBUF(&nd); 1525 if (error == ERELOOKUP) 1526 goto restart; 1527 return (error); 1528 } 1529 1530 /* 1531 * Create a named pipe. 1532 */ 1533 #ifndef _SYS_SYSPROTO_H_ 1534 struct mkfifo_args { 1535 char *path; 1536 int mode; 1537 }; 1538 #endif 1539 int 1540 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1541 { 1542 1543 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1544 uap->mode)); 1545 } 1546 1547 #ifndef _SYS_SYSPROTO_H_ 1548 struct mkfifoat_args { 1549 int fd; 1550 char *path; 1551 mode_t mode; 1552 }; 1553 #endif 1554 int 1555 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1556 { 1557 1558 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1559 uap->mode)); 1560 } 1561 1562 int 1563 kern_mkfifoat(struct thread *td, int fd, const char *path, 1564 enum uio_seg pathseg, int mode) 1565 { 1566 struct mount *mp; 1567 struct vattr vattr; 1568 struct nameidata nd; 1569 int error; 1570 1571 AUDIT_ARG_MODE(mode); 1572 NDPREINIT(&nd); 1573 restart: 1574 bwillwrite(); 1575 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1576 pathseg, path, fd, &cap_mkfifoat_rights); 1577 if ((error = namei(&nd)) != 0) 1578 return (error); 1579 if (nd.ni_vp != NULL) { 1580 NDFREE_PNBUF(&nd); 1581 if (nd.ni_vp == nd.ni_dvp) 1582 vrele(nd.ni_dvp); 1583 else 1584 vput(nd.ni_dvp); 1585 vrele(nd.ni_vp); 1586 return (EEXIST); 1587 } 1588 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1589 NDFREE_PNBUF(&nd); 1590 vput(nd.ni_dvp); 1591 return (EINVAL); 1592 } 1593 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1594 NDFREE_PNBUF(&nd); 1595 vput(nd.ni_dvp); 1596 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1597 return (error); 1598 goto restart; 1599 } 1600 VATTR_NULL(&vattr); 1601 vattr.va_type = VFIFO; 1602 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1603 #ifdef MAC 1604 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1605 &vattr); 1606 if (error != 0) 1607 goto out; 1608 #endif 1609 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1610 #ifdef MAC 1611 out: 1612 #endif 1613 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1614 vn_finished_write(mp); 1615 NDFREE_PNBUF(&nd); 1616 if (error == ERELOOKUP) 1617 goto restart; 1618 return (error); 1619 } 1620 1621 /* 1622 * Make a hard file link. 1623 */ 1624 #ifndef _SYS_SYSPROTO_H_ 1625 struct link_args { 1626 char *path; 1627 char *link; 1628 }; 1629 #endif 1630 int 1631 sys_link(struct thread *td, struct link_args *uap) 1632 { 1633 1634 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1635 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1636 } 1637 1638 #ifndef _SYS_SYSPROTO_H_ 1639 struct linkat_args { 1640 int fd1; 1641 char *path1; 1642 int fd2; 1643 char *path2; 1644 int flag; 1645 }; 1646 #endif 1647 int 1648 sys_linkat(struct thread *td, struct linkat_args *uap) 1649 { 1650 1651 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1652 UIO_USERSPACE, uap->flag)); 1653 } 1654 1655 int hardlink_check_uid = 0; 1656 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1657 &hardlink_check_uid, 0, 1658 "Unprivileged processes cannot create hard links to files owned by other " 1659 "users"); 1660 static int hardlink_check_gid = 0; 1661 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1662 &hardlink_check_gid, 0, 1663 "Unprivileged processes cannot create hard links to files owned by other " 1664 "groups"); 1665 1666 static int 1667 can_hardlink(struct vnode *vp, struct ucred *cred) 1668 { 1669 struct vattr va; 1670 int error; 1671 1672 if (!hardlink_check_uid && !hardlink_check_gid) 1673 return (0); 1674 1675 error = VOP_GETATTR(vp, &va, cred); 1676 if (error != 0) 1677 return (error); 1678 1679 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1680 error = priv_check_cred(cred, PRIV_VFS_LINK); 1681 if (error != 0) 1682 return (error); 1683 } 1684 1685 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1686 error = priv_check_cred(cred, PRIV_VFS_LINK); 1687 if (error != 0) 1688 return (error); 1689 } 1690 1691 return (0); 1692 } 1693 1694 int 1695 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1696 const char *path2, enum uio_seg segflag, int flag) 1697 { 1698 struct nameidata nd; 1699 int error; 1700 1701 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1702 AT_EMPTY_PATH)) != 0) 1703 return (EINVAL); 1704 1705 NDPREINIT(&nd); 1706 do { 1707 bwillwrite(); 1708 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1709 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1710 segflag, path1, fd1, &cap_linkat_source_rights); 1711 if ((error = namei(&nd)) != 0) 1712 return (error); 1713 NDFREE_PNBUF(&nd); 1714 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1715 error = priv_check(td, PRIV_VFS_FHOPEN); 1716 if (error != 0) { 1717 vrele(nd.ni_vp); 1718 return (error); 1719 } 1720 } 1721 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1722 } while (error == EAGAIN || error == ERELOOKUP); 1723 return (error); 1724 } 1725 1726 static int 1727 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1728 enum uio_seg segflag) 1729 { 1730 struct nameidata nd; 1731 struct mount *mp; 1732 int error; 1733 1734 if (vp->v_type == VDIR) { 1735 vrele(vp); 1736 return (EPERM); /* POSIX */ 1737 } 1738 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1739 vrele(vp); 1740 return (EINVAL); 1741 } 1742 NDINIT_ATRIGHTS(&nd, CREATE, 1743 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1744 &cap_linkat_target_rights); 1745 if ((error = namei(&nd)) == 0) { 1746 if (nd.ni_vp != NULL) { 1747 NDFREE_PNBUF(&nd); 1748 if (nd.ni_dvp == nd.ni_vp) 1749 vrele(nd.ni_dvp); 1750 else 1751 vput(nd.ni_dvp); 1752 vrele(nd.ni_vp); 1753 vrele(vp); 1754 return (EEXIST); 1755 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1756 /* 1757 * Cross-device link. No need to recheck 1758 * vp->v_type, since it cannot change, except 1759 * to VBAD. 1760 */ 1761 NDFREE_PNBUF(&nd); 1762 vput(nd.ni_dvp); 1763 vrele(vp); 1764 return (EXDEV); 1765 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1766 error = can_hardlink(vp, td->td_ucred); 1767 #ifdef MAC 1768 if (error == 0) 1769 error = mac_vnode_check_link(td->td_ucred, 1770 nd.ni_dvp, vp, &nd.ni_cnd); 1771 #endif 1772 if (error != 0) { 1773 vput(vp); 1774 vput(nd.ni_dvp); 1775 NDFREE_PNBUF(&nd); 1776 return (error); 1777 } 1778 error = vn_start_write(vp, &mp, V_NOWAIT); 1779 if (error != 0) { 1780 vput(vp); 1781 vput(nd.ni_dvp); 1782 NDFREE_PNBUF(&nd); 1783 error = vn_start_write(NULL, &mp, 1784 V_XSLEEP | V_PCATCH); 1785 if (error != 0) 1786 return (error); 1787 return (EAGAIN); 1788 } 1789 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1790 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1791 vn_finished_write(mp); 1792 NDFREE_PNBUF(&nd); 1793 vp = NULL; 1794 } else { 1795 vput(nd.ni_dvp); 1796 NDFREE_PNBUF(&nd); 1797 vrele(vp); 1798 return (EAGAIN); 1799 } 1800 } 1801 if (vp != NULL) 1802 vrele(vp); 1803 return (error); 1804 } 1805 1806 /* 1807 * Make a symbolic link. 1808 */ 1809 #ifndef _SYS_SYSPROTO_H_ 1810 struct symlink_args { 1811 char *path; 1812 char *link; 1813 }; 1814 #endif 1815 int 1816 sys_symlink(struct thread *td, struct symlink_args *uap) 1817 { 1818 1819 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1820 UIO_USERSPACE)); 1821 } 1822 1823 #ifndef _SYS_SYSPROTO_H_ 1824 struct symlinkat_args { 1825 char *path; 1826 int fd; 1827 char *path2; 1828 }; 1829 #endif 1830 int 1831 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1832 { 1833 1834 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1835 UIO_USERSPACE)); 1836 } 1837 1838 int 1839 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1840 enum uio_seg segflg) 1841 { 1842 struct mount *mp; 1843 struct vattr vattr; 1844 const char *syspath; 1845 char *tmppath; 1846 struct nameidata nd; 1847 int error; 1848 1849 if (segflg == UIO_SYSSPACE) { 1850 syspath = path1; 1851 } else { 1852 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1853 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1854 goto out; 1855 syspath = tmppath; 1856 } 1857 AUDIT_ARG_TEXT(syspath); 1858 NDPREINIT(&nd); 1859 restart: 1860 bwillwrite(); 1861 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1862 path2, fd, &cap_symlinkat_rights); 1863 if ((error = namei(&nd)) != 0) 1864 goto out; 1865 if (nd.ni_vp) { 1866 NDFREE_PNBUF(&nd); 1867 if (nd.ni_vp == nd.ni_dvp) 1868 vrele(nd.ni_dvp); 1869 else 1870 vput(nd.ni_dvp); 1871 vrele(nd.ni_vp); 1872 nd.ni_vp = NULL; 1873 error = EEXIST; 1874 goto out; 1875 } 1876 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1877 NDFREE_PNBUF(&nd); 1878 vput(nd.ni_dvp); 1879 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1880 goto out; 1881 goto restart; 1882 } 1883 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1884 error = EINVAL; 1885 goto out; 1886 } 1887 VATTR_NULL(&vattr); 1888 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1889 #ifdef MAC 1890 vattr.va_type = VLNK; 1891 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1892 &vattr); 1893 if (error != 0) 1894 goto out2; 1895 #endif 1896 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1897 #ifdef MAC 1898 out2: 1899 #endif 1900 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1901 vn_finished_write(mp); 1902 NDFREE_PNBUF(&nd); 1903 if (error == ERELOOKUP) 1904 goto restart; 1905 out: 1906 if (segflg != UIO_SYSSPACE) 1907 uma_zfree(namei_zone, tmppath); 1908 return (error); 1909 } 1910 1911 /* 1912 * Delete a whiteout from the filesystem. 1913 */ 1914 #ifndef _SYS_SYSPROTO_H_ 1915 struct undelete_args { 1916 char *path; 1917 }; 1918 #endif 1919 int 1920 sys_undelete(struct thread *td, struct undelete_args *uap) 1921 { 1922 struct mount *mp; 1923 struct nameidata nd; 1924 int error; 1925 1926 NDPREINIT(&nd); 1927 restart: 1928 bwillwrite(); 1929 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1930 UIO_USERSPACE, uap->path); 1931 error = namei(&nd); 1932 if (error != 0) 1933 return (error); 1934 1935 if (nd.ni_vp != NULL || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1936 NDFREE_PNBUF(&nd); 1937 if (nd.ni_vp == nd.ni_dvp) 1938 vrele(nd.ni_dvp); 1939 else 1940 vput(nd.ni_dvp); 1941 if (nd.ni_vp) 1942 vrele(nd.ni_vp); 1943 return (EEXIST); 1944 } 1945 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1946 NDFREE_PNBUF(&nd); 1947 vput(nd.ni_dvp); 1948 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1949 return (error); 1950 goto restart; 1951 } 1952 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1953 NDFREE_PNBUF(&nd); 1954 vput(nd.ni_dvp); 1955 vn_finished_write(mp); 1956 if (error == ERELOOKUP) 1957 goto restart; 1958 return (error); 1959 } 1960 1961 /* 1962 * Delete a name from the filesystem. 1963 */ 1964 #ifndef _SYS_SYSPROTO_H_ 1965 struct unlink_args { 1966 char *path; 1967 }; 1968 #endif 1969 int 1970 sys_unlink(struct thread *td, struct unlink_args *uap) 1971 { 1972 1973 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1974 0, 0)); 1975 } 1976 1977 static int 1978 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1979 int flag, enum uio_seg pathseg, ino_t oldinum) 1980 { 1981 1982 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1983 return (EINVAL); 1984 1985 if ((flag & AT_REMOVEDIR) != 0) 1986 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1987 1988 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1989 } 1990 1991 #ifndef _SYS_SYSPROTO_H_ 1992 struct unlinkat_args { 1993 int fd; 1994 char *path; 1995 int flag; 1996 }; 1997 #endif 1998 int 1999 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 2000 { 2001 2002 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 2003 UIO_USERSPACE, 0)); 2004 } 2005 2006 #ifndef _SYS_SYSPROTO_H_ 2007 struct funlinkat_args { 2008 int dfd; 2009 const char *path; 2010 int fd; 2011 int flag; 2012 }; 2013 #endif 2014 int 2015 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 2016 { 2017 2018 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2019 UIO_USERSPACE, 0)); 2020 } 2021 2022 int 2023 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2024 enum uio_seg pathseg, int flag, ino_t oldinum) 2025 { 2026 struct mount *mp; 2027 struct file *fp; 2028 struct vnode *vp; 2029 struct nameidata nd; 2030 struct stat sb; 2031 int error; 2032 2033 fp = NULL; 2034 if (fd != FD_NONE) { 2035 error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp); 2036 if (error != 0) 2037 return (error); 2038 } 2039 2040 NDPREINIT(&nd); 2041 restart: 2042 bwillwrite(); 2043 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2044 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2045 pathseg, path, dfd, &cap_unlinkat_rights); 2046 if ((error = namei(&nd)) != 0) { 2047 if (error == EINVAL) 2048 error = EPERM; 2049 goto fdout; 2050 } 2051 vp = nd.ni_vp; 2052 if (vp->v_type == VDIR && oldinum == 0) { 2053 error = EPERM; /* POSIX */ 2054 } else if (oldinum != 0 && 2055 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2056 sb.st_ino != oldinum) { 2057 error = EIDRM; /* Identifier removed */ 2058 } else if (fp != NULL && fp->f_vnode != vp) { 2059 if (VN_IS_DOOMED(fp->f_vnode)) 2060 error = EBADF; 2061 else 2062 error = EDEADLK; 2063 } else { 2064 /* 2065 * The root of a mounted filesystem cannot be deleted. 2066 * 2067 * XXX: can this only be a VDIR case? 2068 */ 2069 if (vp->v_vflag & VV_ROOT) 2070 error = EBUSY; 2071 } 2072 if (error == 0) { 2073 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2074 NDFREE_PNBUF(&nd); 2075 vput(nd.ni_dvp); 2076 if (vp == nd.ni_dvp) 2077 vrele(vp); 2078 else 2079 vput(vp); 2080 if ((error = vn_start_write(NULL, &mp, 2081 V_XSLEEP | V_PCATCH)) != 0) { 2082 goto fdout; 2083 } 2084 goto restart; 2085 } 2086 #ifdef MAC 2087 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2088 &nd.ni_cnd); 2089 if (error != 0) 2090 goto out; 2091 #endif 2092 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2093 #ifdef MAC 2094 out: 2095 #endif 2096 vn_finished_write(mp); 2097 } 2098 NDFREE_PNBUF(&nd); 2099 vput(nd.ni_dvp); 2100 if (vp == nd.ni_dvp) 2101 vrele(vp); 2102 else 2103 vput(vp); 2104 if (error == ERELOOKUP) 2105 goto restart; 2106 fdout: 2107 if (fp != NULL) 2108 fdrop(fp, td); 2109 return (error); 2110 } 2111 2112 /* 2113 * Reposition read/write file offset. 2114 */ 2115 #ifndef _SYS_SYSPROTO_H_ 2116 struct lseek_args { 2117 int fd; 2118 int pad; 2119 off_t offset; 2120 int whence; 2121 }; 2122 #endif 2123 int 2124 sys_lseek(struct thread *td, struct lseek_args *uap) 2125 { 2126 2127 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2128 } 2129 2130 int 2131 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2132 { 2133 struct file *fp; 2134 int error; 2135 2136 AUDIT_ARG_FD(fd); 2137 error = fget(td, fd, &cap_seek_rights, &fp); 2138 if (error != 0) 2139 return (error); 2140 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2141 fo_seek(fp, offset, whence, td) : ESPIPE; 2142 fdrop(fp, td); 2143 return (error); 2144 } 2145 2146 #if defined(COMPAT_43) 2147 /* 2148 * Reposition read/write file offset. 2149 */ 2150 #ifndef _SYS_SYSPROTO_H_ 2151 struct olseek_args { 2152 int fd; 2153 long offset; 2154 int whence; 2155 }; 2156 #endif 2157 int 2158 olseek(struct thread *td, struct olseek_args *uap) 2159 { 2160 2161 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2162 } 2163 #endif /* COMPAT_43 */ 2164 2165 #if defined(COMPAT_FREEBSD6) 2166 /* Version with the 'pad' argument */ 2167 int 2168 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2169 { 2170 2171 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2172 } 2173 #endif 2174 2175 /* 2176 * Check access permissions using passed credentials. 2177 */ 2178 static int 2179 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2180 struct thread *td) 2181 { 2182 accmode_t accmode; 2183 int error; 2184 2185 /* Flags == 0 means only check for existence. */ 2186 if (user_flags == 0) 2187 return (0); 2188 2189 accmode = 0; 2190 if (user_flags & R_OK) 2191 accmode |= VREAD; 2192 if (user_flags & W_OK) 2193 accmode |= VWRITE; 2194 if (user_flags & X_OK) 2195 accmode |= VEXEC; 2196 #ifdef MAC 2197 error = mac_vnode_check_access(cred, vp, accmode); 2198 if (error != 0) 2199 return (error); 2200 #endif 2201 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2202 error = VOP_ACCESS(vp, accmode, cred, td); 2203 return (error); 2204 } 2205 2206 /* 2207 * Check access permissions using "real" credentials. 2208 */ 2209 #ifndef _SYS_SYSPROTO_H_ 2210 struct access_args { 2211 char *path; 2212 int amode; 2213 }; 2214 #endif 2215 int 2216 sys_access(struct thread *td, struct access_args *uap) 2217 { 2218 2219 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2220 0, uap->amode)); 2221 } 2222 2223 #ifndef _SYS_SYSPROTO_H_ 2224 struct faccessat_args { 2225 int dirfd; 2226 char *path; 2227 int amode; 2228 int flag; 2229 } 2230 #endif 2231 int 2232 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2233 { 2234 2235 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2236 uap->amode)); 2237 } 2238 2239 int 2240 kern_accessat(struct thread *td, int fd, const char *path, 2241 enum uio_seg pathseg, int flag, int amode) 2242 { 2243 struct ucred *cred, *usecred; 2244 struct vnode *vp; 2245 struct nameidata nd; 2246 int error; 2247 2248 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2249 AT_SYMLINK_NOFOLLOW)) != 0) 2250 return (EINVAL); 2251 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2252 return (EINVAL); 2253 2254 /* 2255 * Create and modify a temporary credential instead of one that 2256 * is potentially shared (if we need one). 2257 */ 2258 cred = td->td_ucred; 2259 if ((flag & AT_EACCESS) == 0 && 2260 ((cred->cr_uid != cred->cr_ruid || 2261 cred->cr_rgid != cred->cr_gid))) { 2262 usecred = crdup(cred); 2263 usecred->cr_uid = cred->cr_ruid; 2264 usecred->cr_gid = cred->cr_rgid; 2265 td->td_ucred = usecred; 2266 } else 2267 usecred = cred; 2268 AUDIT_ARG_VALUE(amode); 2269 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2270 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2271 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2272 if ((error = namei(&nd)) != 0) 2273 goto out; 2274 vp = nd.ni_vp; 2275 2276 error = vn_access(vp, amode, usecred, td); 2277 NDFREE_PNBUF(&nd); 2278 vput(vp); 2279 out: 2280 if (usecred != cred) { 2281 td->td_ucred = cred; 2282 crfree(usecred); 2283 } 2284 return (error); 2285 } 2286 2287 /* 2288 * Check access permissions using "effective" credentials. 2289 */ 2290 #ifndef _SYS_SYSPROTO_H_ 2291 struct eaccess_args { 2292 char *path; 2293 int amode; 2294 }; 2295 #endif 2296 int 2297 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2298 { 2299 2300 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2301 AT_EACCESS, uap->amode)); 2302 } 2303 2304 #if defined(COMPAT_43) 2305 /* 2306 * Get file status; this version follows links. 2307 */ 2308 #ifndef _SYS_SYSPROTO_H_ 2309 struct ostat_args { 2310 char *path; 2311 struct ostat *ub; 2312 }; 2313 #endif 2314 int 2315 ostat(struct thread *td, struct ostat_args *uap) 2316 { 2317 struct stat sb; 2318 struct ostat osb; 2319 int error; 2320 2321 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2322 if (error != 0) 2323 return (error); 2324 cvtstat(&sb, &osb); 2325 return (copyout(&osb, uap->ub, sizeof (osb))); 2326 } 2327 2328 /* 2329 * Get file status; this version does not follow links. 2330 */ 2331 #ifndef _SYS_SYSPROTO_H_ 2332 struct olstat_args { 2333 char *path; 2334 struct ostat *ub; 2335 }; 2336 #endif 2337 int 2338 olstat(struct thread *td, struct olstat_args *uap) 2339 { 2340 struct stat sb; 2341 struct ostat osb; 2342 int error; 2343 2344 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2345 UIO_USERSPACE, &sb); 2346 if (error != 0) 2347 return (error); 2348 cvtstat(&sb, &osb); 2349 return (copyout(&osb, uap->ub, sizeof (osb))); 2350 } 2351 2352 /* 2353 * Convert from an old to a new stat structure. 2354 * XXX: many values are blindly truncated. 2355 */ 2356 void 2357 cvtstat(struct stat *st, struct ostat *ost) 2358 { 2359 2360 bzero(ost, sizeof(*ost)); 2361 ost->st_dev = st->st_dev; 2362 ost->st_ino = st->st_ino; 2363 ost->st_mode = st->st_mode; 2364 ost->st_nlink = st->st_nlink; 2365 ost->st_uid = st->st_uid; 2366 ost->st_gid = st->st_gid; 2367 ost->st_rdev = st->st_rdev; 2368 ost->st_size = MIN(st->st_size, INT32_MAX); 2369 ost->st_atim = st->st_atim; 2370 ost->st_mtim = st->st_mtim; 2371 ost->st_ctim = st->st_ctim; 2372 ost->st_blksize = st->st_blksize; 2373 ost->st_blocks = st->st_blocks; 2374 ost->st_flags = st->st_flags; 2375 ost->st_gen = st->st_gen; 2376 } 2377 #endif /* COMPAT_43 */ 2378 2379 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2380 int ino64_trunc_error; 2381 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2382 &ino64_trunc_error, 0, 2383 "Error on truncation of device, file or inode number, or link count"); 2384 2385 int 2386 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2387 { 2388 2389 ost->st_dev = st->st_dev; 2390 if (ost->st_dev != st->st_dev) { 2391 switch (ino64_trunc_error) { 2392 default: 2393 /* 2394 * Since dev_t is almost raw, don't clamp to the 2395 * maximum for case 2, but ignore the error. 2396 */ 2397 break; 2398 case 1: 2399 return (EOVERFLOW); 2400 } 2401 } 2402 ost->st_ino = st->st_ino; 2403 if (ost->st_ino != st->st_ino) { 2404 switch (ino64_trunc_error) { 2405 default: 2406 case 0: 2407 break; 2408 case 1: 2409 return (EOVERFLOW); 2410 case 2: 2411 ost->st_ino = UINT32_MAX; 2412 break; 2413 } 2414 } 2415 ost->st_mode = st->st_mode; 2416 ost->st_nlink = st->st_nlink; 2417 if (ost->st_nlink != st->st_nlink) { 2418 switch (ino64_trunc_error) { 2419 default: 2420 case 0: 2421 break; 2422 case 1: 2423 return (EOVERFLOW); 2424 case 2: 2425 ost->st_nlink = UINT16_MAX; 2426 break; 2427 } 2428 } 2429 ost->st_uid = st->st_uid; 2430 ost->st_gid = st->st_gid; 2431 ost->st_rdev = st->st_rdev; 2432 if (ost->st_rdev != st->st_rdev) { 2433 switch (ino64_trunc_error) { 2434 default: 2435 break; 2436 case 1: 2437 return (EOVERFLOW); 2438 } 2439 } 2440 ost->st_atim = st->st_atim; 2441 ost->st_mtim = st->st_mtim; 2442 ost->st_ctim = st->st_ctim; 2443 ost->st_size = st->st_size; 2444 ost->st_blocks = st->st_blocks; 2445 ost->st_blksize = st->st_blksize; 2446 ost->st_flags = st->st_flags; 2447 ost->st_gen = st->st_gen; 2448 ost->st_lspare = 0; 2449 ost->st_birthtim = st->st_birthtim; 2450 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2451 sizeof(*ost) - offsetof(struct freebsd11_stat, 2452 st_birthtim) - sizeof(ost->st_birthtim)); 2453 return (0); 2454 } 2455 2456 int 2457 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2458 { 2459 struct stat sb; 2460 struct freebsd11_stat osb; 2461 int error; 2462 2463 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2464 if (error != 0) 2465 return (error); 2466 error = freebsd11_cvtstat(&sb, &osb); 2467 if (error == 0) 2468 error = copyout(&osb, uap->ub, sizeof(osb)); 2469 return (error); 2470 } 2471 2472 int 2473 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2474 { 2475 struct stat sb; 2476 struct freebsd11_stat osb; 2477 int error; 2478 2479 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2480 UIO_USERSPACE, &sb); 2481 if (error != 0) 2482 return (error); 2483 error = freebsd11_cvtstat(&sb, &osb); 2484 if (error == 0) 2485 error = copyout(&osb, uap->ub, sizeof(osb)); 2486 return (error); 2487 } 2488 2489 int 2490 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2491 { 2492 struct fhandle fh; 2493 struct stat sb; 2494 struct freebsd11_stat osb; 2495 int error; 2496 2497 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2498 if (error != 0) 2499 return (error); 2500 error = kern_fhstat(td, fh, &sb); 2501 if (error != 0) 2502 return (error); 2503 error = freebsd11_cvtstat(&sb, &osb); 2504 if (error == 0) 2505 error = copyout(&osb, uap->sb, sizeof(osb)); 2506 return (error); 2507 } 2508 2509 int 2510 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2511 { 2512 struct stat sb; 2513 struct freebsd11_stat osb; 2514 int error; 2515 2516 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2517 UIO_USERSPACE, &sb); 2518 if (error != 0) 2519 return (error); 2520 error = freebsd11_cvtstat(&sb, &osb); 2521 if (error == 0) 2522 error = copyout(&osb, uap->buf, sizeof(osb)); 2523 return (error); 2524 } 2525 #endif /* COMPAT_FREEBSD11 */ 2526 2527 /* 2528 * Get file status 2529 */ 2530 #ifndef _SYS_SYSPROTO_H_ 2531 struct fstatat_args { 2532 int fd; 2533 char *path; 2534 struct stat *buf; 2535 int flag; 2536 } 2537 #endif 2538 int 2539 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2540 { 2541 struct stat sb; 2542 int error; 2543 2544 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2545 UIO_USERSPACE, &sb); 2546 if (error == 0) 2547 error = copyout(&sb, uap->buf, sizeof (sb)); 2548 return (error); 2549 } 2550 2551 int 2552 kern_statat(struct thread *td, int flag, int fd, const char *path, 2553 enum uio_seg pathseg, struct stat *sbp) 2554 { 2555 struct nameidata nd; 2556 int error; 2557 2558 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2559 AT_EMPTY_PATH)) != 0) 2560 return (EINVAL); 2561 2562 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2563 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2564 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2565 2566 if ((error = namei(&nd)) != 0) { 2567 if (error == ENOTDIR && 2568 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2569 error = kern_fstat(td, fd, sbp); 2570 return (error); 2571 } 2572 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2573 NDFREE_PNBUF(&nd); 2574 vput(nd.ni_vp); 2575 #ifdef __STAT_TIME_T_EXT 2576 sbp->st_atim_ext = 0; 2577 sbp->st_mtim_ext = 0; 2578 sbp->st_ctim_ext = 0; 2579 sbp->st_btim_ext = 0; 2580 #endif 2581 #ifdef KTRACE 2582 if (KTRPOINT(td, KTR_STRUCT)) 2583 ktrstat_error(sbp, error); 2584 #endif 2585 return (error); 2586 } 2587 2588 #if defined(COMPAT_FREEBSD11) 2589 /* 2590 * Implementation of the NetBSD [l]stat() functions. 2591 */ 2592 int 2593 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2594 { 2595 struct freebsd11_stat sb11; 2596 int error; 2597 2598 error = freebsd11_cvtstat(sb, &sb11); 2599 if (error != 0) 2600 return (error); 2601 2602 bzero(nsb, sizeof(*nsb)); 2603 CP(sb11, *nsb, st_dev); 2604 CP(sb11, *nsb, st_ino); 2605 CP(sb11, *nsb, st_mode); 2606 CP(sb11, *nsb, st_nlink); 2607 CP(sb11, *nsb, st_uid); 2608 CP(sb11, *nsb, st_gid); 2609 CP(sb11, *nsb, st_rdev); 2610 CP(sb11, *nsb, st_atim); 2611 CP(sb11, *nsb, st_mtim); 2612 CP(sb11, *nsb, st_ctim); 2613 CP(sb11, *nsb, st_size); 2614 CP(sb11, *nsb, st_blocks); 2615 CP(sb11, *nsb, st_blksize); 2616 CP(sb11, *nsb, st_flags); 2617 CP(sb11, *nsb, st_gen); 2618 CP(sb11, *nsb, st_birthtim); 2619 return (0); 2620 } 2621 2622 #ifndef _SYS_SYSPROTO_H_ 2623 struct freebsd11_nstat_args { 2624 char *path; 2625 struct nstat *ub; 2626 }; 2627 #endif 2628 int 2629 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2630 { 2631 struct stat sb; 2632 struct nstat nsb; 2633 int error; 2634 2635 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2636 if (error != 0) 2637 return (error); 2638 error = freebsd11_cvtnstat(&sb, &nsb); 2639 if (error == 0) 2640 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2641 return (error); 2642 } 2643 2644 /* 2645 * NetBSD lstat. Get file status; this version does not follow links. 2646 */ 2647 #ifndef _SYS_SYSPROTO_H_ 2648 struct freebsd11_nlstat_args { 2649 char *path; 2650 struct nstat *ub; 2651 }; 2652 #endif 2653 int 2654 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2655 { 2656 struct stat sb; 2657 struct nstat nsb; 2658 int error; 2659 2660 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2661 UIO_USERSPACE, &sb); 2662 if (error != 0) 2663 return (error); 2664 error = freebsd11_cvtnstat(&sb, &nsb); 2665 if (error == 0) 2666 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2667 return (error); 2668 } 2669 #endif /* COMPAT_FREEBSD11 */ 2670 2671 /* 2672 * Get configurable pathname variables. 2673 */ 2674 #ifndef _SYS_SYSPROTO_H_ 2675 struct pathconf_args { 2676 char *path; 2677 int name; 2678 }; 2679 #endif 2680 int 2681 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2682 { 2683 long value; 2684 int error; 2685 2686 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2687 &value); 2688 if (error == 0) 2689 td->td_retval[0] = value; 2690 return (error); 2691 } 2692 2693 #ifndef _SYS_SYSPROTO_H_ 2694 struct lpathconf_args { 2695 char *path; 2696 int name; 2697 }; 2698 #endif 2699 int 2700 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2701 { 2702 long value; 2703 int error; 2704 2705 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2706 NOFOLLOW, &value); 2707 if (error == 0) 2708 td->td_retval[0] = value; 2709 return (error); 2710 } 2711 2712 int 2713 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2714 int name, u_long flags, long *valuep) 2715 { 2716 struct nameidata nd; 2717 int error; 2718 2719 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2720 pathseg, path); 2721 if ((error = namei(&nd)) != 0) 2722 return (error); 2723 NDFREE_PNBUF(&nd); 2724 2725 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2726 vput(nd.ni_vp); 2727 return (error); 2728 } 2729 2730 /* 2731 * Return target name of a symbolic link. 2732 */ 2733 #ifndef _SYS_SYSPROTO_H_ 2734 struct readlink_args { 2735 char *path; 2736 char *buf; 2737 size_t count; 2738 }; 2739 #endif 2740 int 2741 sys_readlink(struct thread *td, struct readlink_args *uap) 2742 { 2743 2744 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2745 uap->buf, UIO_USERSPACE, uap->count)); 2746 } 2747 #ifndef _SYS_SYSPROTO_H_ 2748 struct readlinkat_args { 2749 int fd; 2750 char *path; 2751 char *buf; 2752 size_t bufsize; 2753 }; 2754 #endif 2755 int 2756 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2757 { 2758 2759 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2760 uap->buf, UIO_USERSPACE, uap->bufsize)); 2761 } 2762 2763 int 2764 kern_readlinkat(struct thread *td, int fd, const char *path, 2765 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2766 { 2767 struct vnode *vp; 2768 struct nameidata nd; 2769 int error; 2770 2771 if (count > IOSIZE_MAX) 2772 return (EINVAL); 2773 2774 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2775 EMPTYPATH, pathseg, path, fd); 2776 2777 if ((error = namei(&nd)) != 0) 2778 return (error); 2779 NDFREE_PNBUF(&nd); 2780 vp = nd.ni_vp; 2781 2782 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2783 vput(vp); 2784 2785 return (error); 2786 } 2787 2788 /* 2789 * Helper function to readlink from a vnode 2790 */ 2791 static int 2792 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2793 struct thread *td) 2794 { 2795 struct iovec aiov; 2796 struct uio auio; 2797 int error; 2798 2799 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2800 #ifdef MAC 2801 error = mac_vnode_check_readlink(td->td_ucred, vp); 2802 if (error != 0) 2803 return (error); 2804 #endif 2805 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2806 return (EINVAL); 2807 2808 aiov.iov_base = buf; 2809 aiov.iov_len = count; 2810 auio.uio_iov = &aiov; 2811 auio.uio_iovcnt = 1; 2812 auio.uio_offset = 0; 2813 auio.uio_rw = UIO_READ; 2814 auio.uio_segflg = bufseg; 2815 auio.uio_td = td; 2816 auio.uio_resid = count; 2817 error = VOP_READLINK(vp, &auio, td->td_ucred); 2818 td->td_retval[0] = count - auio.uio_resid; 2819 return (error); 2820 } 2821 2822 /* 2823 * Common implementation code for chflags() and fchflags(). 2824 */ 2825 static int 2826 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2827 { 2828 struct mount *mp; 2829 struct vattr vattr; 2830 int error; 2831 2832 /* We can't support the value matching VNOVAL. */ 2833 if (flags == VNOVAL) 2834 return (EOPNOTSUPP); 2835 2836 /* 2837 * Prevent non-root users from setting flags on devices. When 2838 * a device is reused, users can retain ownership of the device 2839 * if they are allowed to set flags and programs assume that 2840 * chown can't fail when done as root. 2841 */ 2842 if (VN_ISDEV(vp)) { 2843 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2844 if (error != 0) 2845 return (error); 2846 } 2847 2848 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2849 return (error); 2850 VATTR_NULL(&vattr); 2851 vattr.va_flags = flags; 2852 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2853 #ifdef MAC 2854 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2855 if (error == 0) 2856 #endif 2857 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2858 VOP_UNLOCK(vp); 2859 vn_finished_write(mp); 2860 return (error); 2861 } 2862 2863 /* 2864 * Change flags of a file given a path name. 2865 */ 2866 #ifndef _SYS_SYSPROTO_H_ 2867 struct chflags_args { 2868 const char *path; 2869 u_long flags; 2870 }; 2871 #endif 2872 int 2873 sys_chflags(struct thread *td, struct chflags_args *uap) 2874 { 2875 2876 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2877 uap->flags, 0)); 2878 } 2879 2880 #ifndef _SYS_SYSPROTO_H_ 2881 struct chflagsat_args { 2882 int fd; 2883 const char *path; 2884 u_long flags; 2885 int atflag; 2886 } 2887 #endif 2888 int 2889 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2890 { 2891 2892 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2893 uap->flags, uap->atflag)); 2894 } 2895 2896 /* 2897 * Same as chflags() but doesn't follow symlinks. 2898 */ 2899 #ifndef _SYS_SYSPROTO_H_ 2900 struct lchflags_args { 2901 const char *path; 2902 u_long flags; 2903 }; 2904 #endif 2905 int 2906 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2907 { 2908 2909 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2910 uap->flags, AT_SYMLINK_NOFOLLOW)); 2911 } 2912 2913 static int 2914 kern_chflagsat(struct thread *td, int fd, const char *path, 2915 enum uio_seg pathseg, u_long flags, int atflag) 2916 { 2917 struct nameidata nd; 2918 int error; 2919 2920 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2921 AT_EMPTY_PATH)) != 0) 2922 return (EINVAL); 2923 2924 AUDIT_ARG_FFLAGS(flags); 2925 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2926 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2927 fd, &cap_fchflags_rights); 2928 if ((error = namei(&nd)) != 0) 2929 return (error); 2930 NDFREE_PNBUF(&nd); 2931 error = setfflags(td, nd.ni_vp, flags); 2932 vrele(nd.ni_vp); 2933 return (error); 2934 } 2935 2936 /* 2937 * Change flags of a file given a file descriptor. 2938 */ 2939 #ifndef _SYS_SYSPROTO_H_ 2940 struct fchflags_args { 2941 int fd; 2942 u_long flags; 2943 }; 2944 #endif 2945 int 2946 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2947 { 2948 struct file *fp; 2949 int error; 2950 2951 AUDIT_ARG_FD(uap->fd); 2952 AUDIT_ARG_FFLAGS(uap->flags); 2953 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2954 &fp); 2955 if (error != 0) 2956 return (error); 2957 #ifdef AUDIT 2958 if (AUDITING_TD(td)) { 2959 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2960 AUDIT_ARG_VNODE1(fp->f_vnode); 2961 VOP_UNLOCK(fp->f_vnode); 2962 } 2963 #endif 2964 error = setfflags(td, fp->f_vnode, uap->flags); 2965 fdrop(fp, td); 2966 return (error); 2967 } 2968 2969 /* 2970 * Common implementation code for chmod(), lchmod() and fchmod(). 2971 */ 2972 int 2973 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2974 { 2975 struct mount *mp; 2976 struct vattr vattr; 2977 int error; 2978 2979 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2980 return (error); 2981 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2982 VATTR_NULL(&vattr); 2983 vattr.va_mode = mode & ALLPERMS; 2984 #ifdef MAC 2985 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2986 if (error == 0) 2987 #endif 2988 error = VOP_SETATTR(vp, &vattr, cred); 2989 VOP_UNLOCK(vp); 2990 vn_finished_write(mp); 2991 return (error); 2992 } 2993 2994 /* 2995 * Change mode of a file given path name. 2996 */ 2997 #ifndef _SYS_SYSPROTO_H_ 2998 struct chmod_args { 2999 char *path; 3000 int mode; 3001 }; 3002 #endif 3003 int 3004 sys_chmod(struct thread *td, struct chmod_args *uap) 3005 { 3006 3007 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3008 uap->mode, 0)); 3009 } 3010 3011 #ifndef _SYS_SYSPROTO_H_ 3012 struct fchmodat_args { 3013 int dirfd; 3014 char *path; 3015 mode_t mode; 3016 int flag; 3017 } 3018 #endif 3019 int 3020 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3021 { 3022 3023 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3024 uap->mode, uap->flag)); 3025 } 3026 3027 /* 3028 * Change mode of a file given path name (don't follow links.) 3029 */ 3030 #ifndef _SYS_SYSPROTO_H_ 3031 struct lchmod_args { 3032 char *path; 3033 int mode; 3034 }; 3035 #endif 3036 int 3037 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3038 { 3039 3040 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3041 uap->mode, AT_SYMLINK_NOFOLLOW)); 3042 } 3043 3044 int 3045 kern_fchmodat(struct thread *td, int fd, const char *path, 3046 enum uio_seg pathseg, mode_t mode, int flag) 3047 { 3048 struct nameidata nd; 3049 int error; 3050 3051 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3052 AT_EMPTY_PATH)) != 0) 3053 return (EINVAL); 3054 3055 AUDIT_ARG_MODE(mode); 3056 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3057 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3058 fd, &cap_fchmod_rights); 3059 if ((error = namei(&nd)) != 0) 3060 return (error); 3061 NDFREE_PNBUF(&nd); 3062 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3063 vrele(nd.ni_vp); 3064 return (error); 3065 } 3066 3067 /* 3068 * Change mode of a file given a file descriptor. 3069 */ 3070 #ifndef _SYS_SYSPROTO_H_ 3071 struct fchmod_args { 3072 int fd; 3073 int mode; 3074 }; 3075 #endif 3076 int 3077 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3078 { 3079 struct file *fp; 3080 int error; 3081 3082 AUDIT_ARG_FD(uap->fd); 3083 AUDIT_ARG_MODE(uap->mode); 3084 3085 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3086 if (error != 0) 3087 return (error); 3088 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3089 fdrop(fp, td); 3090 return (error); 3091 } 3092 3093 /* 3094 * Common implementation for chown(), lchown(), and fchown() 3095 */ 3096 int 3097 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3098 gid_t gid) 3099 { 3100 struct mount *mp; 3101 struct vattr vattr; 3102 int error; 3103 3104 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3105 return (error); 3106 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3107 VATTR_NULL(&vattr); 3108 vattr.va_uid = uid; 3109 vattr.va_gid = gid; 3110 #ifdef MAC 3111 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3112 vattr.va_gid); 3113 if (error == 0) 3114 #endif 3115 error = VOP_SETATTR(vp, &vattr, cred); 3116 VOP_UNLOCK(vp); 3117 vn_finished_write(mp); 3118 return (error); 3119 } 3120 3121 /* 3122 * Set ownership given a path name. 3123 */ 3124 #ifndef _SYS_SYSPROTO_H_ 3125 struct chown_args { 3126 char *path; 3127 int uid; 3128 int gid; 3129 }; 3130 #endif 3131 int 3132 sys_chown(struct thread *td, struct chown_args *uap) 3133 { 3134 3135 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3136 uap->gid, 0)); 3137 } 3138 3139 #ifndef _SYS_SYSPROTO_H_ 3140 struct fchownat_args { 3141 int fd; 3142 const char * path; 3143 uid_t uid; 3144 gid_t gid; 3145 int flag; 3146 }; 3147 #endif 3148 int 3149 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3150 { 3151 3152 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3153 uap->gid, uap->flag)); 3154 } 3155 3156 int 3157 kern_fchownat(struct thread *td, int fd, const char *path, 3158 enum uio_seg pathseg, int uid, int gid, int flag) 3159 { 3160 struct nameidata nd; 3161 int error; 3162 3163 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3164 AT_EMPTY_PATH)) != 0) 3165 return (EINVAL); 3166 3167 AUDIT_ARG_OWNER(uid, gid); 3168 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3169 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3170 fd, &cap_fchown_rights); 3171 3172 if ((error = namei(&nd)) != 0) 3173 return (error); 3174 NDFREE_PNBUF(&nd); 3175 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3176 vrele(nd.ni_vp); 3177 return (error); 3178 } 3179 3180 /* 3181 * Set ownership given a path name, do not cross symlinks. 3182 */ 3183 #ifndef _SYS_SYSPROTO_H_ 3184 struct lchown_args { 3185 char *path; 3186 int uid; 3187 int gid; 3188 }; 3189 #endif 3190 int 3191 sys_lchown(struct thread *td, struct lchown_args *uap) 3192 { 3193 3194 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3195 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3196 } 3197 3198 /* 3199 * Set ownership given a file descriptor. 3200 */ 3201 #ifndef _SYS_SYSPROTO_H_ 3202 struct fchown_args { 3203 int fd; 3204 int uid; 3205 int gid; 3206 }; 3207 #endif 3208 int 3209 sys_fchown(struct thread *td, struct fchown_args *uap) 3210 { 3211 struct file *fp; 3212 int error; 3213 3214 AUDIT_ARG_FD(uap->fd); 3215 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3216 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3217 if (error != 0) 3218 return (error); 3219 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3220 fdrop(fp, td); 3221 return (error); 3222 } 3223 3224 /* 3225 * Common implementation code for utimes(), lutimes(), and futimes(). 3226 */ 3227 static int 3228 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3229 struct timespec *tsp) 3230 { 3231 struct timeval tv[2]; 3232 const struct timeval *tvp; 3233 int error; 3234 3235 if (usrtvp == NULL) { 3236 vfs_timestamp(&tsp[0]); 3237 tsp[1] = tsp[0]; 3238 } else { 3239 if (tvpseg == UIO_SYSSPACE) { 3240 tvp = usrtvp; 3241 } else { 3242 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3243 return (error); 3244 tvp = tv; 3245 } 3246 3247 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3248 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3249 return (EINVAL); 3250 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3251 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3252 } 3253 return (0); 3254 } 3255 3256 /* 3257 * Common implementation code for futimens(), utimensat(). 3258 */ 3259 #define UTIMENS_NULL 0x1 3260 #define UTIMENS_EXIT 0x2 3261 static int 3262 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3263 struct timespec *tsp, int *retflags) 3264 { 3265 struct timespec tsnow; 3266 int error; 3267 3268 vfs_timestamp(&tsnow); 3269 *retflags = 0; 3270 if (usrtsp == NULL) { 3271 tsp[0] = tsnow; 3272 tsp[1] = tsnow; 3273 *retflags |= UTIMENS_NULL; 3274 return (0); 3275 } 3276 if (tspseg == UIO_SYSSPACE) { 3277 tsp[0] = usrtsp[0]; 3278 tsp[1] = usrtsp[1]; 3279 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3280 return (error); 3281 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3282 *retflags |= UTIMENS_EXIT; 3283 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3284 *retflags |= UTIMENS_NULL; 3285 if (tsp[0].tv_nsec == UTIME_OMIT) 3286 tsp[0].tv_sec = VNOVAL; 3287 else if (tsp[0].tv_nsec == UTIME_NOW) 3288 tsp[0] = tsnow; 3289 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3290 return (EINVAL); 3291 if (tsp[1].tv_nsec == UTIME_OMIT) 3292 tsp[1].tv_sec = VNOVAL; 3293 else if (tsp[1].tv_nsec == UTIME_NOW) 3294 tsp[1] = tsnow; 3295 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3296 return (EINVAL); 3297 3298 return (0); 3299 } 3300 3301 /* 3302 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3303 * and utimensat(). 3304 */ 3305 static int 3306 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3307 int numtimes, int nullflag) 3308 { 3309 struct mount *mp; 3310 struct vattr vattr; 3311 int error; 3312 bool setbirthtime; 3313 3314 setbirthtime = false; 3315 vattr.va_birthtime.tv_sec = VNOVAL; 3316 vattr.va_birthtime.tv_nsec = 0; 3317 3318 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3319 return (error); 3320 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3321 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3322 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3323 setbirthtime = true; 3324 VATTR_NULL(&vattr); 3325 vattr.va_atime = ts[0]; 3326 vattr.va_mtime = ts[1]; 3327 if (setbirthtime) 3328 vattr.va_birthtime = ts[1]; 3329 if (numtimes > 2) 3330 vattr.va_birthtime = ts[2]; 3331 if (nullflag) 3332 vattr.va_vaflags |= VA_UTIMES_NULL; 3333 #ifdef MAC 3334 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3335 vattr.va_mtime); 3336 #endif 3337 if (error == 0) 3338 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3339 VOP_UNLOCK(vp); 3340 vn_finished_write(mp); 3341 return (error); 3342 } 3343 3344 /* 3345 * Set the access and modification times of a file. 3346 */ 3347 #ifndef _SYS_SYSPROTO_H_ 3348 struct utimes_args { 3349 char *path; 3350 struct timeval *tptr; 3351 }; 3352 #endif 3353 int 3354 sys_utimes(struct thread *td, struct utimes_args *uap) 3355 { 3356 3357 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3358 uap->tptr, UIO_USERSPACE)); 3359 } 3360 3361 #ifndef _SYS_SYSPROTO_H_ 3362 struct futimesat_args { 3363 int fd; 3364 const char * path; 3365 const struct timeval * times; 3366 }; 3367 #endif 3368 int 3369 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3370 { 3371 3372 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3373 uap->times, UIO_USERSPACE)); 3374 } 3375 3376 int 3377 kern_utimesat(struct thread *td, int fd, const char *path, 3378 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3379 { 3380 struct nameidata nd; 3381 struct timespec ts[2]; 3382 int error; 3383 3384 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3385 return (error); 3386 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3387 &cap_futimes_rights); 3388 3389 if ((error = namei(&nd)) != 0) 3390 return (error); 3391 NDFREE_PNBUF(&nd); 3392 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3393 vrele(nd.ni_vp); 3394 return (error); 3395 } 3396 3397 /* 3398 * Set the access and modification times of a file. 3399 */ 3400 #ifndef _SYS_SYSPROTO_H_ 3401 struct lutimes_args { 3402 char *path; 3403 struct timeval *tptr; 3404 }; 3405 #endif 3406 int 3407 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3408 { 3409 3410 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3411 UIO_USERSPACE)); 3412 } 3413 3414 int 3415 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3416 const struct timeval *tptr, enum uio_seg tptrseg) 3417 { 3418 struct timespec ts[2]; 3419 struct nameidata nd; 3420 int error; 3421 3422 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3423 return (error); 3424 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3425 if ((error = namei(&nd)) != 0) 3426 return (error); 3427 NDFREE_PNBUF(&nd); 3428 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3429 vrele(nd.ni_vp); 3430 return (error); 3431 } 3432 3433 /* 3434 * Set the access and modification times of a file. 3435 */ 3436 #ifndef _SYS_SYSPROTO_H_ 3437 struct futimes_args { 3438 int fd; 3439 struct timeval *tptr; 3440 }; 3441 #endif 3442 int 3443 sys_futimes(struct thread *td, struct futimes_args *uap) 3444 { 3445 3446 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3447 } 3448 3449 int 3450 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3451 enum uio_seg tptrseg) 3452 { 3453 struct timespec ts[2]; 3454 struct file *fp; 3455 int error; 3456 3457 AUDIT_ARG_FD(fd); 3458 error = getutimes(tptr, tptrseg, ts); 3459 if (error != 0) 3460 return (error); 3461 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3462 if (error != 0) 3463 return (error); 3464 #ifdef AUDIT 3465 if (AUDITING_TD(td)) { 3466 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3467 AUDIT_ARG_VNODE1(fp->f_vnode); 3468 VOP_UNLOCK(fp->f_vnode); 3469 } 3470 #endif 3471 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3472 fdrop(fp, td); 3473 return (error); 3474 } 3475 3476 int 3477 sys_futimens(struct thread *td, struct futimens_args *uap) 3478 { 3479 3480 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3481 } 3482 3483 int 3484 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3485 enum uio_seg tptrseg) 3486 { 3487 struct timespec ts[2]; 3488 struct file *fp; 3489 int error, flags; 3490 3491 AUDIT_ARG_FD(fd); 3492 error = getutimens(tptr, tptrseg, ts, &flags); 3493 if (error != 0) 3494 return (error); 3495 if (flags & UTIMENS_EXIT) 3496 return (0); 3497 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3498 if (error != 0) 3499 return (error); 3500 #ifdef AUDIT 3501 if (AUDITING_TD(td)) { 3502 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3503 AUDIT_ARG_VNODE1(fp->f_vnode); 3504 VOP_UNLOCK(fp->f_vnode); 3505 } 3506 #endif 3507 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3508 fdrop(fp, td); 3509 return (error); 3510 } 3511 3512 int 3513 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3514 { 3515 3516 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3517 uap->times, UIO_USERSPACE, uap->flag)); 3518 } 3519 3520 int 3521 kern_utimensat(struct thread *td, int fd, const char *path, 3522 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3523 int flag) 3524 { 3525 struct nameidata nd; 3526 struct timespec ts[2]; 3527 int error, flags; 3528 3529 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3530 AT_EMPTY_PATH)) != 0) 3531 return (EINVAL); 3532 3533 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3534 return (error); 3535 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3536 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3537 pathseg, path, fd, &cap_futimes_rights); 3538 if ((error = namei(&nd)) != 0) 3539 return (error); 3540 /* 3541 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3542 * POSIX states: 3543 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3544 * "Search permission is denied by a component of the path prefix." 3545 */ 3546 NDFREE_PNBUF(&nd); 3547 if ((flags & UTIMENS_EXIT) == 0) 3548 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3549 vrele(nd.ni_vp); 3550 return (error); 3551 } 3552 3553 /* 3554 * Truncate a file given its path name. 3555 */ 3556 #ifndef _SYS_SYSPROTO_H_ 3557 struct truncate_args { 3558 char *path; 3559 int pad; 3560 off_t length; 3561 }; 3562 #endif 3563 int 3564 sys_truncate(struct thread *td, struct truncate_args *uap) 3565 { 3566 3567 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3568 } 3569 3570 int 3571 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3572 off_t length) 3573 { 3574 struct mount *mp; 3575 struct vnode *vp; 3576 void *rl_cookie; 3577 struct nameidata nd; 3578 int error; 3579 3580 if (length < 0) 3581 return (EINVAL); 3582 NDPREINIT(&nd); 3583 retry: 3584 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3585 if ((error = namei(&nd)) != 0) 3586 return (error); 3587 vp = nd.ni_vp; 3588 NDFREE_PNBUF(&nd); 3589 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3590 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3591 vn_rangelock_unlock(vp, rl_cookie); 3592 vrele(vp); 3593 return (error); 3594 } 3595 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3596 if (vp->v_type == VDIR) { 3597 error = EISDIR; 3598 goto out; 3599 } 3600 #ifdef MAC 3601 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3602 if (error != 0) 3603 goto out; 3604 #endif 3605 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3606 if (error != 0) 3607 goto out; 3608 3609 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3610 out: 3611 VOP_UNLOCK(vp); 3612 vn_finished_write(mp); 3613 vn_rangelock_unlock(vp, rl_cookie); 3614 vrele(vp); 3615 if (error == ERELOOKUP) 3616 goto retry; 3617 return (error); 3618 } 3619 3620 #if defined(COMPAT_43) 3621 /* 3622 * Truncate a file given its path name. 3623 */ 3624 #ifndef _SYS_SYSPROTO_H_ 3625 struct otruncate_args { 3626 char *path; 3627 long length; 3628 }; 3629 #endif 3630 int 3631 otruncate(struct thread *td, struct otruncate_args *uap) 3632 { 3633 3634 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3635 } 3636 #endif /* COMPAT_43 */ 3637 3638 #if defined(COMPAT_FREEBSD6) 3639 /* Versions with the pad argument */ 3640 int 3641 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3642 { 3643 3644 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3645 } 3646 3647 int 3648 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3649 { 3650 3651 return (kern_ftruncate(td, uap->fd, uap->length)); 3652 } 3653 #endif 3654 3655 int 3656 kern_fsync(struct thread *td, int fd, bool fullsync) 3657 { 3658 struct vnode *vp; 3659 struct mount *mp; 3660 struct file *fp; 3661 int error; 3662 3663 AUDIT_ARG_FD(fd); 3664 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3665 if (error != 0) 3666 return (error); 3667 vp = fp->f_vnode; 3668 #if 0 3669 if (!fullsync) 3670 /* XXXKIB: compete outstanding aio writes */; 3671 #endif 3672 retry: 3673 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3674 if (error != 0) 3675 goto drop; 3676 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3677 AUDIT_ARG_VNODE1(vp); 3678 vnode_pager_clean_async(vp); 3679 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3680 VOP_UNLOCK(vp); 3681 vn_finished_write(mp); 3682 if (error == ERELOOKUP) 3683 goto retry; 3684 drop: 3685 fdrop(fp, td); 3686 return (error); 3687 } 3688 3689 /* 3690 * Sync an open file. 3691 */ 3692 #ifndef _SYS_SYSPROTO_H_ 3693 struct fsync_args { 3694 int fd; 3695 }; 3696 #endif 3697 int 3698 sys_fsync(struct thread *td, struct fsync_args *uap) 3699 { 3700 3701 return (kern_fsync(td, uap->fd, true)); 3702 } 3703 3704 int 3705 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3706 { 3707 3708 return (kern_fsync(td, uap->fd, false)); 3709 } 3710 3711 /* 3712 * Rename files. Source and destination must either both be directories, or 3713 * both not be directories. If target is a directory, it must be empty. 3714 */ 3715 #ifndef _SYS_SYSPROTO_H_ 3716 struct rename_args { 3717 char *from; 3718 char *to; 3719 }; 3720 #endif 3721 int 3722 sys_rename(struct thread *td, struct rename_args *uap) 3723 { 3724 3725 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3726 uap->to, UIO_USERSPACE, 0)); 3727 } 3728 3729 #ifndef _SYS_SYSPROTO_H_ 3730 struct renameat_args { 3731 int oldfd; 3732 char *old; 3733 int newfd; 3734 char *new; 3735 }; 3736 #endif 3737 int 3738 sys_renameat(struct thread *td, struct renameat_args *uap) 3739 { 3740 3741 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3742 UIO_USERSPACE, 0)); 3743 } 3744 3745 int 3746 sys_renameat2(struct thread *td, struct renameat2_args *uap) 3747 { 3748 3749 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3750 UIO_USERSPACE, uap->flags)); 3751 } 3752 3753 #ifdef MAC 3754 static int 3755 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3756 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3757 { 3758 int error; 3759 3760 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3761 pathseg, old, oldfd, &cap_renameat_source_rights); 3762 if ((error = namei(fromnd)) != 0) 3763 return (error); 3764 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3765 fromnd->ni_vp, &fromnd->ni_cnd); 3766 VOP_UNLOCK(fromnd->ni_dvp); 3767 if (fromnd->ni_dvp != fromnd->ni_vp) 3768 VOP_UNLOCK(fromnd->ni_vp); 3769 if (error != 0) { 3770 NDFREE_PNBUF(fromnd); 3771 vrele(fromnd->ni_dvp); 3772 vrele(fromnd->ni_vp); 3773 } 3774 return (error); 3775 } 3776 #endif 3777 3778 int 3779 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3780 const char *new, enum uio_seg pathseg, u_int flags) 3781 { 3782 struct mount *mp, *tmp; 3783 struct vnode *tvp, *fvp, *tdvp; 3784 struct nameidata fromnd, tond; 3785 uint64_t tondflags; 3786 int error; 3787 short irflag; 3788 3789 if ((flags & ~(AT_RENAME_NOREPLACE)) != 0) 3790 return (EINVAL); 3791 again: 3792 tmp = mp = NULL; 3793 bwillwrite(); 3794 #ifdef MAC 3795 if (mac_vnode_check_rename_from_enabled()) { 3796 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3797 &fromnd); 3798 if (error != 0) 3799 return (error); 3800 } else { 3801 #endif 3802 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3803 pathseg, old, oldfd, &cap_renameat_source_rights); 3804 if ((error = namei(&fromnd)) != 0) 3805 return (error); 3806 #ifdef MAC 3807 } 3808 #endif 3809 fvp = fromnd.ni_vp; 3810 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3811 if (fromnd.ni_vp->v_type == VDIR) 3812 tondflags |= WILLBEDIR; 3813 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3814 &cap_renameat_target_rights); 3815 if ((error = namei(&tond)) != 0) { 3816 /* Translate error code for rename("dir1", "dir2/."). */ 3817 if (error == EISDIR && fvp->v_type == VDIR) 3818 error = EINVAL; 3819 NDFREE_PNBUF(&fromnd); 3820 vrele(fromnd.ni_dvp); 3821 vrele(fvp); 3822 goto out1; 3823 } 3824 tdvp = tond.ni_dvp; 3825 tvp = tond.ni_vp; 3826 if (tdvp == vp_crossmp) { 3827 /* 3828 * Rename of the root vnode of the mounted 3829 * filesystem. It is possible to get there with the 3830 * nullfs mount over the regular file. 3831 */ 3832 error = EBUSY; 3833 goto out; 3834 } 3835 if (tvp != NULL && (flags & AT_RENAME_NOREPLACE) != 0) { 3836 /* 3837 * Often filesystems need to relock the vnodes in 3838 * VOP_RENAME(), which opens a window for invalidation 3839 * of this check. Then, not all filesystems might 3840 * implement AT_RENAME_NOREPLACE. This leads to 3841 * situation where sometimes EOPNOTSUPP might be 3842 * returned from the VOP due to race, while most of 3843 * the time this check works. 3844 */ 3845 error = EEXIST; 3846 goto out; 3847 } 3848 error = vn_start_write(fvp, &mp, V_NOWAIT); 3849 if (error != 0) { 3850 again1: 3851 NDFREE_PNBUF(&fromnd); 3852 NDFREE_PNBUF(&tond); 3853 if (tvp != NULL) 3854 vput(tvp); 3855 if (tdvp == tvp) 3856 vrele(tdvp); 3857 else 3858 vput(tdvp); 3859 vrele(fromnd.ni_dvp); 3860 vrele(fvp); 3861 if (tmp != NULL) { 3862 lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); 3863 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); 3864 vfs_rel(tmp); 3865 tmp = NULL; 3866 } 3867 error = sig_intr(); 3868 if (error != 0) 3869 return (error); 3870 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3871 if (error != 0) 3872 return (error); 3873 goto again; 3874 } 3875 error = VOP_GETWRITEMOUNT(tdvp, &tmp); 3876 if (error != 0 || tmp == NULL) 3877 goto again1; 3878 error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); 3879 if (error != 0) { 3880 vn_finished_write(mp); 3881 goto again1; 3882 } 3883 irflag = vn_irflag_read(fvp); 3884 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3885 (irflag & VIRF_NAMEDDIR) != 0) { 3886 error = EINVAL; 3887 goto out; 3888 } 3889 if (tvp != NULL) { 3890 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3891 error = ENOTDIR; 3892 goto out; 3893 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3894 error = EISDIR; 3895 goto out; 3896 } 3897 #ifdef CAPABILITIES 3898 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3899 /* 3900 * If the target already exists we require CAP_UNLINKAT 3901 * from 'newfd', when newfd was used for the lookup. 3902 */ 3903 error = cap_check(&tond.ni_filecaps.fc_rights, 3904 &cap_unlinkat_rights); 3905 if (error != 0) 3906 goto out; 3907 } 3908 #endif 3909 } 3910 if (fvp == tdvp) { 3911 error = EINVAL; 3912 goto out; 3913 } 3914 /* 3915 * If the source is the same as the destination (that is, if they 3916 * are links to the same vnode), then there is nothing to do. 3917 */ 3918 if (fvp == tvp) 3919 error = ERESTART; 3920 #ifdef MAC 3921 else 3922 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3923 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3924 #endif 3925 out: 3926 if (error == 0) { 3927 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3928 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd, flags); 3929 NDFREE_PNBUF(&fromnd); 3930 NDFREE_PNBUF(&tond); 3931 } else { 3932 NDFREE_PNBUF(&fromnd); 3933 NDFREE_PNBUF(&tond); 3934 if (tvp != NULL) 3935 vput(tvp); 3936 if (tdvp == tvp) 3937 vrele(tdvp); 3938 else 3939 vput(tdvp); 3940 vrele(fromnd.ni_dvp); 3941 vrele(fvp); 3942 } 3943 if (tmp != NULL) { 3944 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); 3945 vfs_rel(tmp); 3946 } 3947 if (mp != NULL) 3948 vn_finished_write(mp); 3949 out1: 3950 if (error == ERESTART) 3951 return (0); 3952 if (error == ERELOOKUP) { 3953 error = sig_intr(); 3954 if (error == 0) 3955 goto again; 3956 } 3957 return (error); 3958 } 3959 3960 /* 3961 * Make a directory file. 3962 */ 3963 #ifndef _SYS_SYSPROTO_H_ 3964 struct mkdir_args { 3965 char *path; 3966 int mode; 3967 }; 3968 #endif 3969 int 3970 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3971 { 3972 3973 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3974 uap->mode)); 3975 } 3976 3977 #ifndef _SYS_SYSPROTO_H_ 3978 struct mkdirat_args { 3979 int fd; 3980 char *path; 3981 mode_t mode; 3982 }; 3983 #endif 3984 int 3985 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3986 { 3987 3988 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3989 } 3990 3991 int 3992 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3993 int mode) 3994 { 3995 struct mount *mp; 3996 struct vattr vattr; 3997 struct nameidata nd; 3998 int error; 3999 4000 AUDIT_ARG_MODE(mode); 4001 NDPREINIT(&nd); 4002 restart: 4003 bwillwrite(); 4004 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 4005 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 4006 segflg, path, fd, &cap_mkdirat_rights); 4007 if ((error = namei(&nd)) != 0) 4008 return (error); 4009 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4010 NDFREE_PNBUF(&nd); 4011 vput(nd.ni_dvp); 4012 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4013 return (error); 4014 goto restart; 4015 } 4016 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 4017 error = EINVAL; 4018 goto out; 4019 } 4020 VATTR_NULL(&vattr); 4021 vattr.va_type = VDIR; 4022 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 4023 #ifdef MAC 4024 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 4025 &vattr); 4026 if (error != 0) 4027 goto out; 4028 #endif 4029 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4030 out: 4031 NDFREE_PNBUF(&nd); 4032 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 4033 vn_finished_write(mp); 4034 if (error == ERELOOKUP) 4035 goto restart; 4036 return (error); 4037 } 4038 4039 /* 4040 * Remove a directory file. 4041 */ 4042 #ifndef _SYS_SYSPROTO_H_ 4043 struct rmdir_args { 4044 char *path; 4045 }; 4046 #endif 4047 int 4048 sys_rmdir(struct thread *td, struct rmdir_args *uap) 4049 { 4050 4051 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 4052 0)); 4053 } 4054 4055 int 4056 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 4057 enum uio_seg pathseg, int flag) 4058 { 4059 struct mount *mp; 4060 struct vnode *vp; 4061 struct file *fp; 4062 struct nameidata nd; 4063 cap_rights_t rights; 4064 int error; 4065 4066 fp = NULL; 4067 if (fd != FD_NONE) { 4068 error = getvnode(td, fd, cap_rights_init_one(&rights, 4069 CAP_LOOKUP), &fp); 4070 if (error != 0) 4071 return (error); 4072 } 4073 4074 NDPREINIT(&nd); 4075 restart: 4076 bwillwrite(); 4077 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4078 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4079 pathseg, path, dfd, &cap_unlinkat_rights); 4080 if ((error = namei(&nd)) != 0) 4081 goto fdout; 4082 vp = nd.ni_vp; 4083 if (vp->v_type != VDIR) { 4084 error = ENOTDIR; 4085 goto out; 4086 } 4087 /* 4088 * No rmdir "." please. 4089 */ 4090 if (nd.ni_dvp == vp) { 4091 error = EINVAL; 4092 goto out; 4093 } 4094 /* 4095 * The root of a mounted filesystem cannot be deleted. 4096 */ 4097 if (vp->v_vflag & VV_ROOT) { 4098 error = EBUSY; 4099 goto out; 4100 } 4101 4102 if (fp != NULL && fp->f_vnode != vp) { 4103 if (VN_IS_DOOMED(fp->f_vnode)) 4104 error = EBADF; 4105 else 4106 error = EDEADLK; 4107 goto out; 4108 } 4109 4110 #ifdef MAC 4111 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4112 &nd.ni_cnd); 4113 if (error != 0) 4114 goto out; 4115 #endif 4116 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4117 NDFREE_PNBUF(&nd); 4118 vput(vp); 4119 if (nd.ni_dvp == vp) 4120 vrele(nd.ni_dvp); 4121 else 4122 vput(nd.ni_dvp); 4123 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4124 goto fdout; 4125 goto restart; 4126 } 4127 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4128 vn_finished_write(mp); 4129 out: 4130 NDFREE_PNBUF(&nd); 4131 vput(vp); 4132 if (nd.ni_dvp == vp) 4133 vrele(nd.ni_dvp); 4134 else 4135 vput(nd.ni_dvp); 4136 if (error == ERELOOKUP) 4137 goto restart; 4138 fdout: 4139 if (fp != NULL) 4140 fdrop(fp, td); 4141 return (error); 4142 } 4143 4144 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4145 int 4146 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4147 long *basep, void (*func)(struct freebsd11_dirent *)) 4148 { 4149 struct freebsd11_dirent dstdp; 4150 struct dirent *dp, *edp; 4151 char *dirbuf; 4152 off_t base; 4153 ssize_t resid, ucount; 4154 int error; 4155 4156 /* XXX arbitrary sanity limit on `count'. */ 4157 count = min(count, 64 * 1024); 4158 4159 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4160 4161 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4162 UIO_SYSSPACE); 4163 if (error != 0) 4164 goto done; 4165 if (basep != NULL) 4166 *basep = base; 4167 4168 ucount = 0; 4169 for (dp = (struct dirent *)dirbuf, 4170 edp = (struct dirent *)&dirbuf[count - resid]; 4171 ucount < count && dp < edp; ) { 4172 if (dp->d_reclen == 0) 4173 break; 4174 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4175 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4176 continue; 4177 dstdp.d_type = dp->d_type; 4178 dstdp.d_namlen = dp->d_namlen; 4179 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4180 if (dstdp.d_fileno != dp->d_fileno) { 4181 switch (ino64_trunc_error) { 4182 default: 4183 case 0: 4184 break; 4185 case 1: 4186 error = EOVERFLOW; 4187 goto done; 4188 case 2: 4189 dstdp.d_fileno = UINT32_MAX; 4190 break; 4191 } 4192 } 4193 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4194 ((dp->d_namlen + 1 + 3) &~ 3); 4195 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4196 bzero(dstdp.d_name + dstdp.d_namlen, 4197 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4198 dstdp.d_namlen); 4199 MPASS(dstdp.d_reclen <= dp->d_reclen); 4200 MPASS(ucount + dstdp.d_reclen <= count); 4201 if (func != NULL) 4202 func(&dstdp); 4203 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4204 if (error != 0) 4205 break; 4206 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4207 ucount += dstdp.d_reclen; 4208 } 4209 4210 done: 4211 free(dirbuf, M_TEMP); 4212 if (error == 0) 4213 td->td_retval[0] = ucount; 4214 return (error); 4215 } 4216 #endif /* COMPAT */ 4217 4218 #ifdef COMPAT_43 4219 static void 4220 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4221 { 4222 #if (BYTE_ORDER == LITTLE_ENDIAN) 4223 /* 4224 * The expected low byte of dp->d_namlen is our dp->d_type. 4225 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4226 */ 4227 dp->d_type = dp->d_namlen; 4228 dp->d_namlen = 0; 4229 #else 4230 /* 4231 * The dp->d_type is the high byte of the expected dp->d_namlen, 4232 * so must be zero'ed. 4233 */ 4234 dp->d_type = 0; 4235 #endif 4236 } 4237 4238 /* 4239 * Read a block of directory entries in a filesystem independent format. 4240 */ 4241 #ifndef _SYS_SYSPROTO_H_ 4242 struct ogetdirentries_args { 4243 int fd; 4244 char *buf; 4245 u_int count; 4246 long *basep; 4247 }; 4248 #endif 4249 int 4250 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4251 { 4252 long loff; 4253 int error; 4254 4255 error = kern_ogetdirentries(td, uap, &loff); 4256 if (error == 0) 4257 error = copyout(&loff, uap->basep, sizeof(long)); 4258 return (error); 4259 } 4260 4261 int 4262 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4263 long *ploff) 4264 { 4265 long base; 4266 int error; 4267 4268 /* XXX arbitrary sanity limit on `count'. */ 4269 if (uap->count > 64 * 1024) 4270 return (EINVAL); 4271 4272 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4273 &base, ogetdirentries_cvt); 4274 4275 if (error == 0 && uap->basep != NULL) 4276 error = copyout(&base, uap->basep, sizeof(long)); 4277 4278 return (error); 4279 } 4280 #endif /* COMPAT_43 */ 4281 4282 #if defined(COMPAT_FREEBSD11) 4283 #ifndef _SYS_SYSPROTO_H_ 4284 struct freebsd11_getdirentries_args { 4285 int fd; 4286 char *buf; 4287 u_int count; 4288 long *basep; 4289 }; 4290 #endif 4291 int 4292 freebsd11_getdirentries(struct thread *td, 4293 struct freebsd11_getdirentries_args *uap) 4294 { 4295 long base; 4296 int error; 4297 4298 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4299 &base, NULL); 4300 4301 if (error == 0 && uap->basep != NULL) 4302 error = copyout(&base, uap->basep, sizeof(long)); 4303 return (error); 4304 } 4305 4306 int 4307 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4308 { 4309 struct freebsd11_getdirentries_args ap; 4310 4311 ap.fd = uap->fd; 4312 ap.buf = uap->buf; 4313 ap.count = uap->count; 4314 ap.basep = NULL; 4315 return (freebsd11_getdirentries(td, &ap)); 4316 } 4317 #endif /* COMPAT_FREEBSD11 */ 4318 4319 /* 4320 * Read a block of directory entries in a filesystem independent format. 4321 */ 4322 int 4323 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4324 { 4325 off_t base; 4326 int error; 4327 4328 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4329 NULL, UIO_USERSPACE); 4330 if (error != 0) 4331 return (error); 4332 if (uap->basep != NULL) 4333 error = copyout(&base, uap->basep, sizeof(off_t)); 4334 return (error); 4335 } 4336 4337 int 4338 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4339 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4340 { 4341 struct vnode *vp; 4342 struct file *fp; 4343 struct uio auio; 4344 struct iovec aiov; 4345 off_t loff; 4346 int error, eofflag; 4347 off_t foffset; 4348 4349 AUDIT_ARG_FD(fd); 4350 if (count > IOSIZE_MAX) 4351 return (EINVAL); 4352 auio.uio_resid = count; 4353 error = getvnode(td, fd, &cap_read_rights, &fp); 4354 if (error != 0) 4355 return (error); 4356 if ((fp->f_flag & FREAD) == 0) { 4357 fdrop(fp, td); 4358 return (EBADF); 4359 } 4360 vp = fp->f_vnode; 4361 foffset = foffset_lock(fp, 0); 4362 unionread: 4363 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4364 error = ENOENT; 4365 goto fail; 4366 } 4367 aiov.iov_base = buf; 4368 aiov.iov_len = count; 4369 auio.uio_iov = &aiov; 4370 auio.uio_iovcnt = 1; 4371 auio.uio_rw = UIO_READ; 4372 auio.uio_segflg = bufseg; 4373 auio.uio_td = td; 4374 vn_lock(vp, LK_SHARED | LK_RETRY); 4375 /* 4376 * We want to return ENOTDIR for anything that is not VDIR, but 4377 * not for VBAD, and we can't check for VBAD while the vnode is 4378 * unlocked. 4379 */ 4380 if (vp->v_type != VDIR) { 4381 if (vp->v_type == VBAD) 4382 error = EBADF; 4383 else 4384 error = ENOTDIR; 4385 VOP_UNLOCK(vp); 4386 goto fail; 4387 } 4388 AUDIT_ARG_VNODE1(vp); 4389 loff = auio.uio_offset = foffset; 4390 #ifdef MAC 4391 error = mac_vnode_check_readdir(td->td_ucred, vp); 4392 if (error == 0) 4393 #endif 4394 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4395 NULL); 4396 foffset = auio.uio_offset; 4397 if (error != 0) { 4398 VOP_UNLOCK(vp); 4399 goto fail; 4400 } 4401 if (count == auio.uio_resid && 4402 (vp->v_vflag & VV_ROOT) && 4403 (vp->v_mount->mnt_flag & MNT_UNION)) { 4404 struct vnode *tvp = vp; 4405 4406 vp = vp->v_mount->mnt_vnodecovered; 4407 vref(vp); 4408 fp->f_vnode = vp; 4409 foffset = 0; 4410 vput(tvp); 4411 goto unionread; 4412 } 4413 VOP_UNLOCK(vp); 4414 *basep = loff; 4415 if (residp != NULL) 4416 *residp = auio.uio_resid; 4417 td->td_retval[0] = count - auio.uio_resid; 4418 fail: 4419 foffset_unlock(fp, foffset, 0); 4420 fdrop(fp, td); 4421 return (error); 4422 } 4423 4424 /* 4425 * Set the mode mask for creation of filesystem nodes. 4426 */ 4427 #ifndef _SYS_SYSPROTO_H_ 4428 struct umask_args { 4429 int newmask; 4430 }; 4431 #endif 4432 int 4433 sys_umask(struct thread *td, struct umask_args *uap) 4434 { 4435 struct pwddesc *pdp; 4436 4437 pdp = td->td_proc->p_pd; 4438 PWDDESC_XLOCK(pdp); 4439 td->td_retval[0] = pdp->pd_cmask; 4440 pdp->pd_cmask = uap->newmask & ALLPERMS; 4441 PWDDESC_XUNLOCK(pdp); 4442 return (0); 4443 } 4444 4445 /* 4446 * Void all references to file by ripping underlying filesystem away from 4447 * vnode. 4448 */ 4449 #ifndef _SYS_SYSPROTO_H_ 4450 struct revoke_args { 4451 char *path; 4452 }; 4453 #endif 4454 int 4455 sys_revoke(struct thread *td, struct revoke_args *uap) 4456 { 4457 struct vnode *vp; 4458 struct vattr vattr; 4459 struct nameidata nd; 4460 int error; 4461 4462 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4463 uap->path); 4464 if ((error = namei(&nd)) != 0) 4465 return (error); 4466 vp = nd.ni_vp; 4467 NDFREE_PNBUF(&nd); 4468 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4469 error = EINVAL; 4470 goto out; 4471 } 4472 #ifdef MAC 4473 error = mac_vnode_check_revoke(td->td_ucred, vp); 4474 if (error != 0) 4475 goto out; 4476 #endif 4477 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4478 if (error != 0) 4479 goto out; 4480 if (td->td_ucred->cr_uid != vattr.va_uid) { 4481 error = priv_check(td, PRIV_VFS_ADMIN); 4482 if (error != 0) 4483 goto out; 4484 } 4485 if (devfs_usecount(vp) > 0) 4486 VOP_REVOKE(vp, REVOKEALL); 4487 out: 4488 vput(vp); 4489 return (error); 4490 } 4491 4492 /* 4493 * This variant of getvnode() allows O_PATH files. Caller should 4494 * ensure that returned file and vnode are only used for compatible 4495 * semantics. 4496 */ 4497 int 4498 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, 4499 uint8_t *flagsp, struct file **fpp) 4500 { 4501 struct file *fp; 4502 int error; 4503 4504 error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp); 4505 if (error != 0) 4506 return (error); 4507 4508 /* 4509 * The file could be not of the vnode type, or it may be not 4510 * yet fully initialized, in which case the f_vnode pointer 4511 * may be set, but f_ops is still badfileops. E.g., 4512 * devfs_open() transiently create such situation to 4513 * facilitate csw d_fdopen(). 4514 * 4515 * Dupfdopen() handling in kern_openat() installs the 4516 * half-baked file into the process descriptor table, allowing 4517 * other thread to dereference it. Guard against the race by 4518 * checking f_ops. 4519 */ 4520 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4521 fdrop(fp, td); 4522 *fpp = NULL; 4523 return (EINVAL); 4524 } 4525 4526 *fpp = fp; 4527 return (0); 4528 } 4529 4530 /* 4531 * Convert a user file descriptor to a kernel file entry and check 4532 * that, if it is a capability, the correct rights are present. 4533 * A reference on the file entry is held upon returning. 4534 */ 4535 int 4536 getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, 4537 struct file **fpp) 4538 { 4539 int error; 4540 4541 error = getvnode_path(td, fd, rightsp, NULL, fpp); 4542 if (__predict_false(error != 0)) 4543 return (error); 4544 4545 /* 4546 * Filter out O_PATH file descriptors, most getvnode() callers 4547 * do not call fo_ methods. 4548 */ 4549 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4550 fdrop(*fpp, td); 4551 *fpp = NULL; 4552 error = EBADF; 4553 } 4554 4555 return (error); 4556 } 4557 4558 /* 4559 * Get an (NFS) file handle. 4560 */ 4561 #ifndef _SYS_SYSPROTO_H_ 4562 struct lgetfh_args { 4563 char *fname; 4564 fhandle_t *fhp; 4565 }; 4566 #endif 4567 int 4568 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4569 { 4570 4571 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4572 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4573 } 4574 4575 #ifndef _SYS_SYSPROTO_H_ 4576 struct getfh_args { 4577 char *fname; 4578 fhandle_t *fhp; 4579 }; 4580 #endif 4581 int 4582 sys_getfh(struct thread *td, struct getfh_args *uap) 4583 { 4584 4585 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4586 uap->fhp, UIO_USERSPACE)); 4587 } 4588 4589 /* 4590 * syscall for the rpc.lockd to use to translate an open descriptor into 4591 * a NFS file handle. 4592 * 4593 * warning: do not remove the priv_check() call or this becomes one giant 4594 * security hole. 4595 */ 4596 #ifndef _SYS_SYSPROTO_H_ 4597 struct getfhat_args { 4598 int fd; 4599 char *path; 4600 fhandle_t *fhp; 4601 int flags; 4602 }; 4603 #endif 4604 int 4605 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4606 { 4607 4608 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4609 uap->fhp, UIO_USERSPACE)); 4610 } 4611 4612 int 4613 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4614 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4615 { 4616 struct nameidata nd; 4617 fhandle_t fh; 4618 struct vnode *vp; 4619 int error; 4620 4621 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4622 return (EINVAL); 4623 error = priv_check(td, PRIV_VFS_GETFH); 4624 if (error != 0) 4625 return (error); 4626 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4627 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4628 fd); 4629 error = namei(&nd); 4630 if (error != 0) 4631 return (error); 4632 NDFREE_PNBUF(&nd); 4633 vp = nd.ni_vp; 4634 bzero(&fh, sizeof(fh)); 4635 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4636 error = VOP_VPTOFH(vp, &fh.fh_fid); 4637 vput(vp); 4638 if (error == 0) { 4639 if (fhseg == UIO_USERSPACE) 4640 error = copyout(&fh, fhp, sizeof (fh)); 4641 else 4642 memcpy(fhp, &fh, sizeof(fh)); 4643 } 4644 return (error); 4645 } 4646 4647 #ifndef _SYS_SYSPROTO_H_ 4648 struct fhlink_args { 4649 fhandle_t *fhp; 4650 const char *to; 4651 }; 4652 #endif 4653 int 4654 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4655 { 4656 4657 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4658 } 4659 4660 #ifndef _SYS_SYSPROTO_H_ 4661 struct fhlinkat_args { 4662 fhandle_t *fhp; 4663 int tofd; 4664 const char *to; 4665 }; 4666 #endif 4667 int 4668 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4669 { 4670 4671 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4672 } 4673 4674 static int 4675 kern_fhlinkat(struct thread *td, int fd, const char *path, 4676 enum uio_seg pathseg, fhandle_t *fhp) 4677 { 4678 fhandle_t fh; 4679 struct mount *mp; 4680 struct vnode *vp; 4681 int error; 4682 4683 error = priv_check(td, PRIV_VFS_GETFH); 4684 if (error != 0) 4685 return (error); 4686 error = copyin(fhp, &fh, sizeof(fh)); 4687 if (error != 0) 4688 return (error); 4689 do { 4690 bwillwrite(); 4691 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4692 return (ESTALE); 4693 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4694 vfs_unbusy(mp); 4695 if (error != 0) 4696 return (error); 4697 VOP_UNLOCK(vp); 4698 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4699 } while (error == EAGAIN || error == ERELOOKUP); 4700 return (error); 4701 } 4702 4703 #ifndef _SYS_SYSPROTO_H_ 4704 struct fhreadlink_args { 4705 fhandle_t *fhp; 4706 char *buf; 4707 size_t bufsize; 4708 }; 4709 #endif 4710 int 4711 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4712 { 4713 fhandle_t fh; 4714 struct mount *mp; 4715 struct vnode *vp; 4716 int error; 4717 4718 error = priv_check(td, PRIV_VFS_GETFH); 4719 if (error != 0) 4720 return (error); 4721 if (uap->bufsize > IOSIZE_MAX) 4722 return (EINVAL); 4723 error = copyin(uap->fhp, &fh, sizeof(fh)); 4724 if (error != 0) 4725 return (error); 4726 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4727 return (ESTALE); 4728 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4729 vfs_unbusy(mp); 4730 if (error != 0) 4731 return (error); 4732 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4733 vput(vp); 4734 return (error); 4735 } 4736 4737 /* 4738 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4739 * open descriptor. 4740 * 4741 * warning: do not remove the priv_check() call or this becomes one giant 4742 * security hole. 4743 */ 4744 #ifndef _SYS_SYSPROTO_H_ 4745 struct fhopen_args { 4746 const struct fhandle *u_fhp; 4747 int flags; 4748 }; 4749 #endif 4750 int 4751 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4752 { 4753 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4754 } 4755 4756 int 4757 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4758 { 4759 struct mount *mp; 4760 struct vnode *vp; 4761 struct fhandle fhp; 4762 struct file *fp; 4763 int error, indx; 4764 bool named_attr; 4765 4766 error = priv_check(td, PRIV_VFS_FHOPEN); 4767 if (error != 0) 4768 return (error); 4769 4770 indx = -1; 4771 if ((flags & O_CREAT) != 0) 4772 return (EINVAL); 4773 error = openflags(&flags); 4774 if (error != 0) 4775 return (error); 4776 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4777 if (error != 0) 4778 return (error); 4779 /* find the mount point */ 4780 mp = vfs_busyfs(&fhp.fh_fsid); 4781 if (mp == NULL) 4782 return (ESTALE); 4783 /* now give me my vnode, it gets returned to me locked */ 4784 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4785 vfs_unbusy(mp); 4786 if (error != 0) 4787 return (error); 4788 4789 /* 4790 * Check to see if the file handle refers to a named attribute 4791 * directory or attribute. If it does, the O_NAMEDATTR flag 4792 * must have been specified. 4793 */ 4794 named_attr = (vn_irflag_read(vp) & 4795 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4796 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4797 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4798 vput(vp); 4799 return (ENOATTR); 4800 } 4801 4802 error = falloc_noinstall(td, &fp); 4803 if (error != 0) { 4804 vput(vp); 4805 return (error); 4806 } 4807 /* Set the flags early so the finit in devfs can pick them up. */ 4808 fp->f_flag = flags & FMASK; 4809 4810 #ifdef INVARIANTS 4811 td->td_dupfd = -1; 4812 #endif 4813 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4814 if (error != 0) { 4815 KASSERT(fp->f_ops == &badfileops, 4816 ("VOP_OPEN in fhopen() set f_ops")); 4817 KASSERT(td->td_dupfd < 0, 4818 ("fhopen() encountered fdopen()")); 4819 4820 vput(vp); 4821 goto bad; 4822 } 4823 #ifdef INVARIANTS 4824 td->td_dupfd = 0; 4825 #endif 4826 finit_open(fp, vp, flags); 4827 VOP_UNLOCK(vp); 4828 if ((flags & O_TRUNC) != 0) { 4829 error = fo_truncate(fp, 0, td->td_ucred, td); 4830 if (error != 0) 4831 goto bad; 4832 } 4833 4834 error = finstall(td, fp, &indx, flags, NULL); 4835 bad: 4836 fdrop(fp, td); 4837 td->td_retval[0] = indx; 4838 return (error); 4839 } 4840 4841 /* 4842 * Stat an (NFS) file handle. 4843 */ 4844 #ifndef _SYS_SYSPROTO_H_ 4845 struct fhstat_args { 4846 struct fhandle *u_fhp; 4847 struct stat *sb; 4848 }; 4849 #endif 4850 int 4851 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4852 { 4853 struct stat sb; 4854 struct fhandle fh; 4855 int error; 4856 4857 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4858 if (error != 0) 4859 return (error); 4860 error = kern_fhstat(td, fh, &sb); 4861 if (error == 0) 4862 error = copyout(&sb, uap->sb, sizeof(sb)); 4863 return (error); 4864 } 4865 4866 int 4867 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4868 { 4869 struct mount *mp; 4870 struct vnode *vp; 4871 int error; 4872 4873 error = priv_check(td, PRIV_VFS_FHSTAT); 4874 if (error != 0) 4875 return (error); 4876 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4877 return (ESTALE); 4878 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4879 vfs_unbusy(mp); 4880 if (error != 0) 4881 return (error); 4882 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4883 vput(vp); 4884 return (error); 4885 } 4886 4887 /* 4888 * Implement fstatfs() for (NFS) file handles. 4889 */ 4890 #ifndef _SYS_SYSPROTO_H_ 4891 struct fhstatfs_args { 4892 struct fhandle *u_fhp; 4893 struct statfs *buf; 4894 }; 4895 #endif 4896 int 4897 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4898 { 4899 struct statfs *sfp; 4900 fhandle_t fh; 4901 int error; 4902 4903 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4904 if (error != 0) 4905 return (error); 4906 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4907 error = kern_fhstatfs(td, fh, sfp); 4908 if (error == 0) 4909 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4910 free(sfp, M_STATFS); 4911 return (error); 4912 } 4913 4914 int 4915 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4916 { 4917 struct mount *mp; 4918 struct vnode *vp; 4919 int error; 4920 4921 error = priv_check(td, PRIV_VFS_FHSTATFS); 4922 if (error != 0) 4923 return (error); 4924 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4925 return (ESTALE); 4926 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4927 if (error != 0) { 4928 vfs_unbusy(mp); 4929 return (error); 4930 } 4931 vput(vp); 4932 error = prison_canseemount(td->td_ucred, mp); 4933 if (error != 0) 4934 goto out; 4935 #ifdef MAC 4936 error = mac_mount_check_stat(td->td_ucred, mp); 4937 if (error != 0) 4938 goto out; 4939 #endif 4940 error = VFS_STATFS(mp, buf); 4941 out: 4942 vfs_unbusy(mp); 4943 return (error); 4944 } 4945 4946 /* 4947 * Unlike madvise(2), we do not make a best effort to remember every 4948 * possible caching hint. Instead, we remember the last setting with 4949 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4950 * region of any current setting. 4951 */ 4952 int 4953 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4954 int advice) 4955 { 4956 struct fadvise_info *fa, *new; 4957 struct file *fp; 4958 struct vnode *vp; 4959 off_t end; 4960 int error; 4961 4962 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4963 return (EINVAL); 4964 AUDIT_ARG_VALUE(advice); 4965 switch (advice) { 4966 case POSIX_FADV_SEQUENTIAL: 4967 case POSIX_FADV_RANDOM: 4968 case POSIX_FADV_NOREUSE: 4969 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4970 break; 4971 case POSIX_FADV_NORMAL: 4972 case POSIX_FADV_WILLNEED: 4973 case POSIX_FADV_DONTNEED: 4974 new = NULL; 4975 break; 4976 default: 4977 return (EINVAL); 4978 } 4979 /* XXX: CAP_POSIX_FADVISE? */ 4980 AUDIT_ARG_FD(fd); 4981 error = fget(td, fd, &cap_no_rights, &fp); 4982 if (error != 0) 4983 goto out; 4984 AUDIT_ARG_FILE(td->td_proc, fp); 4985 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4986 error = ESPIPE; 4987 goto out; 4988 } 4989 if (fp->f_type != DTYPE_VNODE) { 4990 error = ENODEV; 4991 goto out; 4992 } 4993 vp = fp->f_vnode; 4994 if (vp->v_type != VREG) { 4995 error = ENODEV; 4996 goto out; 4997 } 4998 if (len == 0) 4999 end = OFF_MAX; 5000 else 5001 end = offset + len - 1; 5002 switch (advice) { 5003 case POSIX_FADV_SEQUENTIAL: 5004 case POSIX_FADV_RANDOM: 5005 case POSIX_FADV_NOREUSE: 5006 /* 5007 * Try to merge any existing non-standard region with 5008 * this new region if possible, otherwise create a new 5009 * non-standard region for this request. 5010 */ 5011 mtx_pool_lock(mtxpool_sleep, fp); 5012 fa = fp->f_advice; 5013 if (fa != NULL && fa->fa_advice == advice && 5014 ((fa->fa_start <= end && fa->fa_end >= offset) || 5015 (end != OFF_MAX && fa->fa_start == end + 1) || 5016 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 5017 if (offset < fa->fa_start) 5018 fa->fa_start = offset; 5019 if (end > fa->fa_end) 5020 fa->fa_end = end; 5021 } else { 5022 new->fa_advice = advice; 5023 new->fa_start = offset; 5024 new->fa_end = end; 5025 fp->f_advice = new; 5026 new = fa; 5027 } 5028 mtx_pool_unlock(mtxpool_sleep, fp); 5029 break; 5030 case POSIX_FADV_NORMAL: 5031 /* 5032 * If a the "normal" region overlaps with an existing 5033 * non-standard region, trim or remove the 5034 * non-standard region. 5035 */ 5036 mtx_pool_lock(mtxpool_sleep, fp); 5037 fa = fp->f_advice; 5038 if (fa != NULL) { 5039 if (offset <= fa->fa_start && end >= fa->fa_end) { 5040 new = fa; 5041 fp->f_advice = NULL; 5042 } else if (offset <= fa->fa_start && 5043 end >= fa->fa_start) 5044 fa->fa_start = end + 1; 5045 else if (offset <= fa->fa_end && end >= fa->fa_end) 5046 fa->fa_end = offset - 1; 5047 else if (offset >= fa->fa_start && end <= fa->fa_end) { 5048 /* 5049 * If the "normal" region is a middle 5050 * portion of the existing 5051 * non-standard region, just remove 5052 * the whole thing rather than picking 5053 * one side or the other to 5054 * preserve. 5055 */ 5056 new = fa; 5057 fp->f_advice = NULL; 5058 } 5059 } 5060 mtx_pool_unlock(mtxpool_sleep, fp); 5061 break; 5062 case POSIX_FADV_WILLNEED: 5063 case POSIX_FADV_DONTNEED: 5064 error = VOP_ADVISE(vp, offset, end, advice); 5065 break; 5066 } 5067 out: 5068 if (fp != NULL) 5069 fdrop(fp, td); 5070 free(new, M_FADVISE); 5071 return (error); 5072 } 5073 5074 int 5075 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5076 { 5077 int error; 5078 5079 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5080 uap->advice); 5081 return (kern_posix_error(td, error)); 5082 } 5083 5084 int 5085 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 5086 off_t *outoffp, size_t len, unsigned int flags) 5087 { 5088 struct file *infp, *infp1, *outfp, *outfp1; 5089 struct vnode *invp, *outvp; 5090 int error; 5091 size_t retlen; 5092 void *rl_rcookie, *rl_wcookie; 5093 off_t inoff, outoff, savinoff, savoutoff; 5094 bool foffsets_locked, foffsets_set; 5095 5096 infp = outfp = NULL; 5097 rl_rcookie = rl_wcookie = NULL; 5098 foffsets_locked = false; 5099 foffsets_set = false; 5100 error = 0; 5101 retlen = 0; 5102 5103 if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { 5104 error = EINVAL; 5105 goto out; 5106 } 5107 if (len > SSIZE_MAX) 5108 /* 5109 * Although the len argument is size_t, the return argument 5110 * is ssize_t (which is signed). Therefore a size that won't 5111 * fit in ssize_t can't be returned. 5112 */ 5113 len = SSIZE_MAX; 5114 5115 /* Get the file structures for the file descriptors. */ 5116 error = fget_read(td, infd, 5117 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5118 if (error != 0) 5119 goto out; 5120 if (infp->f_ops == &badfileops) { 5121 error = EBADF; 5122 goto out; 5123 } 5124 if (infp->f_vnode == NULL) { 5125 error = EINVAL; 5126 goto out; 5127 } 5128 error = fget_write(td, outfd, 5129 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5130 if (error != 0) 5131 goto out; 5132 if (outfp->f_ops == &badfileops) { 5133 error = EBADF; 5134 goto out; 5135 } 5136 if (outfp->f_vnode == NULL) { 5137 error = EINVAL; 5138 goto out; 5139 } 5140 5141 /* 5142 * Figure out which file offsets we're reading from and writing to. 5143 * If the offsets come from the file descriptions, we need to lock them, 5144 * and locking both offsets requires a loop to avoid deadlocks. 5145 */ 5146 infp1 = outfp1 = NULL; 5147 if (inoffp != NULL) 5148 inoff = *inoffp; 5149 else 5150 infp1 = infp; 5151 if (outoffp != NULL) 5152 outoff = *outoffp; 5153 else 5154 outfp1 = outfp; 5155 if (infp1 != NULL || outfp1 != NULL) { 5156 if (infp1 == outfp1) { 5157 /* 5158 * Overlapping ranges are not allowed. A more thorough 5159 * check appears below, but we must not lock the same 5160 * offset twice. 5161 */ 5162 error = EINVAL; 5163 goto out; 5164 } 5165 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5166 foffsets_locked = true; 5167 } else { 5168 foffsets_set = true; 5169 } 5170 savinoff = inoff; 5171 savoutoff = outoff; 5172 5173 invp = infp->f_vnode; 5174 outvp = outfp->f_vnode; 5175 /* Sanity check the f_flag bits. */ 5176 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5177 (infp->f_flag & FREAD) == 0) { 5178 error = EBADF; 5179 goto out; 5180 } 5181 5182 /* If len == 0, just return 0. */ 5183 if (len == 0) 5184 goto out; 5185 5186 /* 5187 * Make sure that the ranges we check and lock below are valid. Note 5188 * that len is clamped to SSIZE_MAX above. 5189 */ 5190 if (inoff < 0 || outoff < 0) { 5191 error = EINVAL; 5192 goto out; 5193 } 5194 5195 /* 5196 * If infp and outfp refer to the same file, the byte ranges cannot 5197 * overlap. 5198 */ 5199 if (invp == outvp) { 5200 if ((inoff <= outoff && inoff + len > outoff) || 5201 (inoff > outoff && outoff + len > inoff)) { 5202 error = EINVAL; 5203 goto out; 5204 } 5205 rangelock_may_recurse(&invp->v_rl); 5206 } 5207 5208 /* Range lock the byte ranges for both invp and outvp. */ 5209 for (;;) { 5210 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5211 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5212 if (rl_rcookie != NULL) 5213 break; 5214 vn_rangelock_unlock(outvp, rl_wcookie); 5215 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5216 vn_rangelock_unlock(invp, rl_rcookie); 5217 } 5218 5219 retlen = len; 5220 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5221 flags, infp->f_cred, outfp->f_cred, td); 5222 out: 5223 if (rl_rcookie != NULL) 5224 vn_rangelock_unlock(invp, rl_rcookie); 5225 if (rl_wcookie != NULL) 5226 vn_rangelock_unlock(outvp, rl_wcookie); 5227 if ((foffsets_locked || foffsets_set) && 5228 (error == EINTR || error == ERESTART)) { 5229 inoff = savinoff; 5230 outoff = savoutoff; 5231 } 5232 if (foffsets_locked) { 5233 if (inoffp == NULL) 5234 foffset_unlock(infp, inoff, 0); 5235 else 5236 *inoffp = inoff; 5237 if (outoffp == NULL) 5238 foffset_unlock(outfp, outoff, 0); 5239 else 5240 *outoffp = outoff; 5241 } else if (foffsets_set) { 5242 *inoffp = inoff; 5243 *outoffp = outoff; 5244 } 5245 if (outfp != NULL) 5246 fdrop(outfp, td); 5247 if (infp != NULL) 5248 fdrop(infp, td); 5249 td->td_retval[0] = retlen; 5250 return (error); 5251 } 5252 5253 int 5254 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5255 { 5256 off_t inoff, outoff, *inoffp, *outoffp; 5257 int error; 5258 5259 inoffp = outoffp = NULL; 5260 if (uap->inoffp != NULL) { 5261 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5262 if (error != 0) 5263 return (error); 5264 inoffp = &inoff; 5265 } 5266 if (uap->outoffp != NULL) { 5267 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5268 if (error != 0) 5269 return (error); 5270 outoffp = &outoff; 5271 } 5272 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5273 outoffp, uap->len, uap->flags); 5274 if (error == 0 && uap->inoffp != NULL) 5275 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5276 if (error == 0 && uap->outoffp != NULL) 5277 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5278 return (error); 5279 } 5280