1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 */ 36 37 #include "opt_capsicum.h" 38 #include "opt_ktrace.h" 39 40 #define EXTERR_CATEGORY EXTERR_CAT_VFSSYSCALL 41 #include <sys/systm.h> 42 #ifdef COMPAT_FREEBSD11 43 #include <sys/abi_compat.h> 44 #endif 45 #include <sys/bio.h> 46 #include <sys/buf.h> 47 #include <sys/capsicum.h> 48 #include <sys/disk.h> 49 #include <sys/dirent.h> 50 #include <sys/exterrvar.h> 51 #include <sys/fcntl.h> 52 #include <sys/file.h> 53 #include <sys/filedesc.h> 54 #include <sys/filio.h> 55 #include <sys/jail.h> 56 #include <sys/kernel.h> 57 #ifdef KTRACE 58 #include <sys/ktrace.h> 59 #endif 60 #include <sys/limits.h> 61 #include <sys/linker.h> 62 #include <sys/malloc.h> 63 #include <sys/mount.h> 64 #include <sys/mutex.h> 65 #include <sys/namei.h> 66 #include <sys/priv.h> 67 #include <sys/proc.h> 68 #include <sys/rwlock.h> 69 #include <sys/sdt.h> 70 #include <sys/stat.h> 71 #include <sys/stdarg.h> 72 #include <sys/sx.h> 73 #include <sys/syscallsubr.h> 74 #include <sys/sysctl.h> 75 #include <sys/sysproto.h> 76 #include <sys/unistd.h> 77 #include <sys/vnode.h> 78 79 #include <security/audit/audit.h> 80 #include <security/mac/mac_framework.h> 81 82 #include <vm/vm.h> 83 #include <vm/vm_object.h> 84 #include <vm/vm_page.h> 85 #include <vm/vnode_pager.h> 86 #include <vm/uma.h> 87 88 #include <fs/devfs/devfs.h> 89 90 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 91 92 static int kern_chflagsat(struct thread *td, int fd, const char *path, 93 enum uio_seg pathseg, u_long flags, int atflag); 94 static int setfflags(struct thread *td, struct vnode *, u_long); 95 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 96 static int getutimens(const struct timespec *, enum uio_seg, 97 struct timespec *, int *); 98 static int setutimes(struct thread *td, struct vnode *, 99 const struct timespec *, int, int); 100 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 101 struct thread *td); 102 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 103 enum uio_seg pathseg, fhandle_t *fhp); 104 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 105 size_t count, struct thread *td); 106 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 107 const char *path, enum uio_seg segflag); 108 109 uint64_t 110 at2cnpflags(u_int at_flags, u_int mask) 111 { 112 uint64_t res; 113 114 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 115 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 116 117 res = 0; 118 at_flags &= mask; 119 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 120 res |= RBENEATH; 121 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 122 res |= FOLLOW; 123 /* NOFOLLOW is pseudo flag */ 124 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 125 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 126 FOLLOW; 127 } 128 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 129 res |= EMPTYPATH; 130 return (res); 131 } 132 133 int 134 kern_sync(struct thread *td) 135 { 136 struct mount *mp, *nmp; 137 int save; 138 139 mtx_lock(&mountlist_mtx); 140 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 141 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 142 nmp = TAILQ_NEXT(mp, mnt_list); 143 continue; 144 } 145 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 146 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 147 save = curthread_pflags_set(TDP_SYNCIO); 148 vfs_periodic(mp, MNT_NOWAIT); 149 VFS_SYNC(mp, MNT_NOWAIT); 150 curthread_pflags_restore(save); 151 vn_finished_write(mp); 152 } 153 mtx_lock(&mountlist_mtx); 154 nmp = TAILQ_NEXT(mp, mnt_list); 155 vfs_unbusy(mp); 156 } 157 mtx_unlock(&mountlist_mtx); 158 return (0); 159 } 160 161 /* 162 * Sync each mounted filesystem. 163 */ 164 #ifndef _SYS_SYSPROTO_H_ 165 struct sync_args { 166 int dummy; 167 }; 168 #endif 169 /* ARGSUSED */ 170 int 171 sys_sync(struct thread *td, struct sync_args *uap) 172 { 173 174 return (kern_sync(td)); 175 } 176 177 /* 178 * Change filesystem quotas. 179 */ 180 #ifndef _SYS_SYSPROTO_H_ 181 struct quotactl_args { 182 char *path; 183 int cmd; 184 int uid; 185 caddr_t arg; 186 }; 187 #endif 188 int 189 sys_quotactl(struct thread *td, struct quotactl_args *uap) 190 { 191 struct mount *mp; 192 struct nameidata nd; 193 int error; 194 bool mp_busy; 195 196 AUDIT_ARG_CMD(uap->cmd); 197 AUDIT_ARG_UID(uap->uid); 198 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 199 return (EPERM); 200 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 201 uap->path); 202 if ((error = namei(&nd)) != 0) 203 return (error); 204 NDFREE_PNBUF(&nd); 205 mp = nd.ni_vp->v_mount; 206 vfs_ref(mp); 207 vput(nd.ni_vp); 208 error = vfs_busy(mp, 0); 209 if (error != 0) { 210 vfs_rel(mp); 211 return (error); 212 } 213 mp_busy = true; 214 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 215 216 /* 217 * Since quota on/off operations typically need to open quota 218 * files, the implementation may need to unbusy the mount point 219 * before calling into namei. Otherwise, unmount might be 220 * started between two vfs_busy() invocations (first is ours, 221 * second is from mount point cross-walk code in lookup()), 222 * causing deadlock. 223 * 224 * Avoid unbusying mp if the implementation indicates it has 225 * already done so. 226 */ 227 if (mp_busy) 228 vfs_unbusy(mp); 229 vfs_rel(mp); 230 return (error); 231 } 232 233 /* 234 * Used by statfs conversion routines to scale the block size up if 235 * necessary so that all of the block counts are <= 'max_size'. Note 236 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 237 * value of 'n'. 238 */ 239 void 240 statfs_scale_blocks(struct statfs *sf, long max_size) 241 { 242 uint64_t count; 243 int shift; 244 245 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 246 247 /* 248 * Attempt to scale the block counts to give a more accurate 249 * overview to userland of the ratio of free space to used 250 * space. To do this, find the largest block count and compute 251 * a divisor that lets it fit into a signed integer <= max_size. 252 */ 253 if (sf->f_bavail < 0) 254 count = -sf->f_bavail; 255 else 256 count = sf->f_bavail; 257 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 258 if (count <= max_size) 259 return; 260 261 count >>= flsl(max_size); 262 shift = 0; 263 while (count > 0) { 264 shift++; 265 count >>=1; 266 } 267 268 sf->f_bsize <<= shift; 269 sf->f_blocks >>= shift; 270 sf->f_bfree >>= shift; 271 sf->f_bavail >>= shift; 272 } 273 274 static int 275 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 276 { 277 int error; 278 279 if (mp == NULL) 280 return (EBADF); 281 error = vfs_busy(mp, 0); 282 vfs_rel(mp); 283 if (error != 0) 284 return (error); 285 #ifdef MAC 286 error = mac_mount_check_stat(td->td_ucred, mp); 287 if (error != 0) 288 goto out; 289 #endif 290 error = VFS_STATFS(mp, buf); 291 if (error != 0) 292 goto out; 293 if (priv_check_cred_vfs_generation(td->td_ucred)) 294 prison_enforce_statfs(td->td_ucred, mp, buf); 295 out: 296 vfs_unbusy(mp); 297 return (error); 298 } 299 300 /* 301 * Get filesystem statistics. 302 */ 303 #ifndef _SYS_SYSPROTO_H_ 304 struct statfs_args { 305 char *path; 306 struct statfs *buf; 307 }; 308 #endif 309 int 310 sys_statfs(struct thread *td, struct statfs_args *uap) 311 { 312 struct statfs *sfp; 313 int error; 314 315 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 316 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 317 if (error == 0) 318 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 319 free(sfp, M_STATFS); 320 return (error); 321 } 322 323 int 324 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 325 struct statfs *buf) 326 { 327 struct mount *mp; 328 struct nameidata nd; 329 int error; 330 331 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 332 error = namei(&nd); 333 if (error != 0) 334 return (error); 335 NDFREE_PNBUF(&nd); 336 mp = vfs_ref_from_vp(nd.ni_vp); 337 vrele(nd.ni_vp); 338 return (kern_do_statfs(td, mp, buf)); 339 } 340 341 /* 342 * Get filesystem statistics. 343 */ 344 #ifndef _SYS_SYSPROTO_H_ 345 struct fstatfs_args { 346 int fd; 347 struct statfs *buf; 348 }; 349 #endif 350 int 351 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 352 { 353 struct statfs *sfp; 354 int error; 355 356 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 357 error = kern_fstatfs(td, uap->fd, sfp); 358 if (error == 0) 359 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 360 free(sfp, M_STATFS); 361 return (error); 362 } 363 364 int 365 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 366 { 367 struct file *fp; 368 struct mount *mp; 369 struct vnode *vp; 370 int error; 371 372 AUDIT_ARG_FD(fd); 373 error = getvnode_path(td, fd, &cap_fstatfs_rights, NULL, &fp); 374 if (error != 0) 375 return (error); 376 vp = fp->f_vnode; 377 #ifdef AUDIT 378 if (AUDITING_TD(td)) { 379 vn_lock(vp, LK_SHARED | LK_RETRY); 380 AUDIT_ARG_VNODE1(vp); 381 VOP_UNLOCK(vp); 382 } 383 #endif 384 mp = vfs_ref_from_vp(vp); 385 fdrop(fp, td); 386 return (kern_do_statfs(td, mp, buf)); 387 } 388 389 /* 390 * Get statistics on all filesystems. 391 */ 392 #ifndef _SYS_SYSPROTO_H_ 393 struct getfsstat_args { 394 struct statfs *buf; 395 long bufsize; 396 int mode; 397 }; 398 #endif 399 int 400 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 401 { 402 size_t count; 403 int error; 404 405 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 406 return (EINVAL); 407 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 408 UIO_USERSPACE, uap->mode); 409 if (error == 0) 410 td->td_retval[0] = count; 411 return (error); 412 } 413 414 /* 415 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 416 * The caller is responsible for freeing memory which will be allocated 417 * in '*buf'. 418 */ 419 int 420 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 421 size_t *countp, enum uio_seg bufseg, int mode) 422 { 423 struct mount *mp, *nmp; 424 struct statfs *sfsp, *sp, *sptmp, *tofree; 425 size_t count, maxcount; 426 int error; 427 428 switch (mode) { 429 case MNT_WAIT: 430 case MNT_NOWAIT: 431 break; 432 default: 433 if (bufseg == UIO_SYSSPACE) 434 *buf = NULL; 435 return (EINVAL); 436 } 437 restart: 438 maxcount = bufsize / sizeof(struct statfs); 439 if (bufsize == 0) { 440 sfsp = NULL; 441 tofree = NULL; 442 } else if (bufseg == UIO_USERSPACE) { 443 sfsp = *buf; 444 tofree = NULL; 445 } else /* if (bufseg == UIO_SYSSPACE) */ { 446 count = 0; 447 mtx_lock(&mountlist_mtx); 448 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 449 count++; 450 } 451 mtx_unlock(&mountlist_mtx); 452 if (maxcount > count) 453 maxcount = count; 454 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 455 M_STATFS, M_WAITOK); 456 } 457 458 count = 0; 459 460 /* 461 * If there is no target buffer they only want the count. 462 * 463 * This could be TAILQ_FOREACH but it is open-coded to match the original 464 * code below. 465 */ 466 if (sfsp == NULL) { 467 mtx_lock(&mountlist_mtx); 468 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 469 if (prison_canseemount(td->td_ucred, mp) != 0) { 470 nmp = TAILQ_NEXT(mp, mnt_list); 471 continue; 472 } 473 #ifdef MAC 474 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 475 nmp = TAILQ_NEXT(mp, mnt_list); 476 continue; 477 } 478 #endif 479 count++; 480 nmp = TAILQ_NEXT(mp, mnt_list); 481 } 482 mtx_unlock(&mountlist_mtx); 483 *countp = count; 484 return (0); 485 } 486 487 /* 488 * They want the entire thing. 489 * 490 * Short-circuit the corner case of no room for anything, avoids 491 * relocking below. 492 */ 493 if (maxcount < 1) { 494 goto out; 495 } 496 497 mtx_lock(&mountlist_mtx); 498 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 499 if (prison_canseemount(td->td_ucred, mp) != 0) { 500 nmp = TAILQ_NEXT(mp, mnt_list); 501 continue; 502 } 503 #ifdef MAC 504 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 505 nmp = TAILQ_NEXT(mp, mnt_list); 506 continue; 507 } 508 #endif 509 if (mode == MNT_WAIT) { 510 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 511 /* 512 * If vfs_busy() failed, and MBF_NOWAIT 513 * wasn't passed, then the mp is gone. 514 * Furthermore, because of MBF_MNTLSTLOCK, 515 * the mountlist_mtx was dropped. We have 516 * no other choice than to start over. 517 */ 518 mtx_unlock(&mountlist_mtx); 519 free(tofree, M_STATFS); 520 goto restart; 521 } 522 } else { 523 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 524 nmp = TAILQ_NEXT(mp, mnt_list); 525 continue; 526 } 527 } 528 sp = &mp->mnt_stat; 529 /* 530 * If MNT_NOWAIT is specified, do not refresh 531 * the fsstat cache. 532 */ 533 if (mode != MNT_NOWAIT) { 534 error = VFS_STATFS(mp, sp); 535 if (error != 0) { 536 mtx_lock(&mountlist_mtx); 537 nmp = TAILQ_NEXT(mp, mnt_list); 538 vfs_unbusy(mp); 539 continue; 540 } 541 } 542 if (priv_check_cred_vfs_generation(td->td_ucred)) { 543 sptmp = malloc(sizeof(struct statfs), M_STATFS, 544 M_WAITOK); 545 *sptmp = *sp; 546 prison_enforce_statfs(td->td_ucred, mp, sptmp); 547 sp = sptmp; 548 } else 549 sptmp = NULL; 550 if (bufseg == UIO_SYSSPACE) { 551 bcopy(sp, sfsp, sizeof(*sp)); 552 free(sptmp, M_STATFS); 553 } else /* if (bufseg == UIO_USERSPACE) */ { 554 error = copyout(sp, sfsp, sizeof(*sp)); 555 free(sptmp, M_STATFS); 556 if (error != 0) { 557 vfs_unbusy(mp); 558 return (error); 559 } 560 } 561 sfsp++; 562 count++; 563 564 if (count == maxcount) { 565 vfs_unbusy(mp); 566 goto out; 567 } 568 569 mtx_lock(&mountlist_mtx); 570 nmp = TAILQ_NEXT(mp, mnt_list); 571 vfs_unbusy(mp); 572 } 573 mtx_unlock(&mountlist_mtx); 574 out: 575 *countp = count; 576 return (0); 577 } 578 579 #ifdef COMPAT_FREEBSD4 580 /* 581 * Get old format filesystem statistics. 582 */ 583 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 584 585 #ifndef _SYS_SYSPROTO_H_ 586 struct freebsd4_statfs_args { 587 char *path; 588 struct ostatfs *buf; 589 }; 590 #endif 591 int 592 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 593 { 594 struct ostatfs osb; 595 struct statfs *sfp; 596 int error; 597 598 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 599 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 600 if (error == 0) { 601 freebsd4_cvtstatfs(sfp, &osb); 602 error = copyout(&osb, uap->buf, sizeof(osb)); 603 } 604 free(sfp, M_STATFS); 605 return (error); 606 } 607 608 /* 609 * Get filesystem statistics. 610 */ 611 #ifndef _SYS_SYSPROTO_H_ 612 struct freebsd4_fstatfs_args { 613 int fd; 614 struct ostatfs *buf; 615 }; 616 #endif 617 int 618 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 619 { 620 struct ostatfs osb; 621 struct statfs *sfp; 622 int error; 623 624 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 625 error = kern_fstatfs(td, uap->fd, sfp); 626 if (error == 0) { 627 freebsd4_cvtstatfs(sfp, &osb); 628 error = copyout(&osb, uap->buf, sizeof(osb)); 629 } 630 free(sfp, M_STATFS); 631 return (error); 632 } 633 634 /* 635 * Get statistics on all filesystems. 636 */ 637 #ifndef _SYS_SYSPROTO_H_ 638 struct freebsd4_getfsstat_args { 639 struct ostatfs *buf; 640 long bufsize; 641 int mode; 642 }; 643 #endif 644 int 645 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 646 { 647 struct statfs *buf, *sp; 648 struct ostatfs osb; 649 size_t count, size; 650 int error; 651 652 if (uap->bufsize < 0) 653 return (EINVAL); 654 count = uap->bufsize / sizeof(struct ostatfs); 655 if (count > SIZE_MAX / sizeof(struct statfs)) 656 return (EINVAL); 657 size = count * sizeof(struct statfs); 658 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 659 uap->mode); 660 if (error == 0) 661 td->td_retval[0] = count; 662 if (size != 0) { 663 sp = buf; 664 while (count != 0 && error == 0) { 665 freebsd4_cvtstatfs(sp, &osb); 666 error = copyout(&osb, uap->buf, sizeof(osb)); 667 sp++; 668 uap->buf++; 669 count--; 670 } 671 free(buf, M_STATFS); 672 } 673 return (error); 674 } 675 676 /* 677 * Implement fstatfs() for (NFS) file handles. 678 */ 679 #ifndef _SYS_SYSPROTO_H_ 680 struct freebsd4_fhstatfs_args { 681 struct fhandle *u_fhp; 682 struct ostatfs *buf; 683 }; 684 #endif 685 int 686 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 687 { 688 struct ostatfs osb; 689 struct statfs *sfp; 690 fhandle_t fh; 691 int error; 692 693 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 694 if (error != 0) 695 return (error); 696 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 697 error = kern_fhstatfs(td, fh, sfp); 698 if (error == 0) { 699 freebsd4_cvtstatfs(sfp, &osb); 700 error = copyout(&osb, uap->buf, sizeof(osb)); 701 } 702 free(sfp, M_STATFS); 703 return (error); 704 } 705 706 /* 707 * Convert a new format statfs structure to an old format statfs structure. 708 */ 709 static void 710 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 711 { 712 713 statfs_scale_blocks(nsp, LONG_MAX); 714 bzero(osp, sizeof(*osp)); 715 osp->f_bsize = nsp->f_bsize; 716 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 717 osp->f_blocks = nsp->f_blocks; 718 osp->f_bfree = nsp->f_bfree; 719 osp->f_bavail = nsp->f_bavail; 720 osp->f_files = MIN(nsp->f_files, LONG_MAX); 721 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 722 osp->f_owner = nsp->f_owner; 723 osp->f_type = nsp->f_type; 724 osp->f_flags = nsp->f_flags; 725 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 726 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 727 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 728 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 729 strlcpy(osp->f_fstypename, nsp->f_fstypename, 730 MIN(MFSNAMELEN, OMFSNAMELEN)); 731 strlcpy(osp->f_mntonname, nsp->f_mntonname, 732 MIN(MNAMELEN, OMNAMELEN)); 733 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 734 MIN(MNAMELEN, OMNAMELEN)); 735 osp->f_fsid = nsp->f_fsid; 736 } 737 #endif /* COMPAT_FREEBSD4 */ 738 739 #if defined(COMPAT_FREEBSD11) 740 /* 741 * Get old format filesystem statistics. 742 */ 743 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 744 745 int 746 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 747 { 748 struct freebsd11_statfs osb; 749 struct statfs *sfp; 750 int error; 751 752 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 753 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 754 if (error == 0) { 755 freebsd11_cvtstatfs(sfp, &osb); 756 error = copyout(&osb, uap->buf, sizeof(osb)); 757 } 758 free(sfp, M_STATFS); 759 return (error); 760 } 761 762 /* 763 * Get filesystem statistics. 764 */ 765 int 766 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 767 { 768 struct freebsd11_statfs osb; 769 struct statfs *sfp; 770 int error; 771 772 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 773 error = kern_fstatfs(td, uap->fd, sfp); 774 if (error == 0) { 775 freebsd11_cvtstatfs(sfp, &osb); 776 error = copyout(&osb, uap->buf, sizeof(osb)); 777 } 778 free(sfp, M_STATFS); 779 return (error); 780 } 781 782 /* 783 * Get statistics on all filesystems. 784 */ 785 int 786 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 787 { 788 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 789 } 790 791 int 792 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 793 long bufsize, int mode) 794 { 795 struct freebsd11_statfs osb; 796 struct statfs *buf, *sp; 797 size_t count, size; 798 int error; 799 800 if (bufsize < 0) 801 return (EINVAL); 802 803 count = bufsize / sizeof(struct ostatfs); 804 size = count * sizeof(struct statfs); 805 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 806 if (error == 0) 807 td->td_retval[0] = count; 808 if (size > 0) { 809 sp = buf; 810 while (count > 0 && error == 0) { 811 freebsd11_cvtstatfs(sp, &osb); 812 error = copyout(&osb, ubuf, sizeof(osb)); 813 sp++; 814 ubuf++; 815 count--; 816 } 817 free(buf, M_STATFS); 818 } 819 return (error); 820 } 821 822 /* 823 * Implement fstatfs() for (NFS) file handles. 824 */ 825 int 826 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 827 { 828 struct freebsd11_statfs osb; 829 struct statfs *sfp; 830 fhandle_t fh; 831 int error; 832 833 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 834 if (error) 835 return (error); 836 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 837 error = kern_fhstatfs(td, fh, sfp); 838 if (error == 0) { 839 freebsd11_cvtstatfs(sfp, &osb); 840 error = copyout(&osb, uap->buf, sizeof(osb)); 841 } 842 free(sfp, M_STATFS); 843 return (error); 844 } 845 846 /* 847 * Convert a new format statfs structure to an old format statfs structure. 848 */ 849 static void 850 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 851 { 852 853 bzero(osp, sizeof(*osp)); 854 osp->f_version = FREEBSD11_STATFS_VERSION; 855 osp->f_type = nsp->f_type; 856 osp->f_flags = nsp->f_flags; 857 osp->f_bsize = nsp->f_bsize; 858 osp->f_iosize = nsp->f_iosize; 859 osp->f_blocks = nsp->f_blocks; 860 osp->f_bfree = nsp->f_bfree; 861 osp->f_bavail = nsp->f_bavail; 862 osp->f_files = nsp->f_files; 863 osp->f_ffree = nsp->f_ffree; 864 osp->f_syncwrites = nsp->f_syncwrites; 865 osp->f_asyncwrites = nsp->f_asyncwrites; 866 osp->f_syncreads = nsp->f_syncreads; 867 osp->f_asyncreads = nsp->f_asyncreads; 868 osp->f_namemax = nsp->f_namemax; 869 osp->f_owner = nsp->f_owner; 870 osp->f_fsid = nsp->f_fsid; 871 strlcpy(osp->f_fstypename, nsp->f_fstypename, 872 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 873 strlcpy(osp->f_mntonname, nsp->f_mntonname, 874 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 875 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 876 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 877 } 878 #endif /* COMPAT_FREEBSD11 */ 879 880 /* 881 * Change current working directory to a given file descriptor. 882 */ 883 #ifndef _SYS_SYSPROTO_H_ 884 struct fchdir_args { 885 int fd; 886 }; 887 #endif 888 int 889 sys_fchdir(struct thread *td, struct fchdir_args *uap) 890 { 891 struct vnode *vp, *tdp; 892 struct mount *mp; 893 struct file *fp; 894 int error; 895 uint8_t fdflags; 896 897 AUDIT_ARG_FD(uap->fd); 898 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, &fdflags, 899 &fp); 900 if (error != 0) 901 return (error); 902 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 903 fdrop(fp, td); 904 return (ENOTCAPABLE); 905 } 906 vp = fp->f_vnode; 907 vrefact(vp); 908 fdrop(fp, td); 909 vn_lock(vp, LK_SHARED | LK_RETRY); 910 AUDIT_ARG_VNODE1(vp); 911 error = change_dir(vp, td); 912 while (!error && (mp = vp->v_mountedhere) != NULL) { 913 if (vfs_busy(mp, 0)) 914 continue; 915 error = VFS_ROOT(mp, LK_SHARED, &tdp); 916 vfs_unbusy(mp); 917 if (error != 0) 918 break; 919 vput(vp); 920 vp = tdp; 921 } 922 if (error != 0) { 923 vput(vp); 924 return (error); 925 } 926 VOP_UNLOCK(vp); 927 pwd_chdir(td, vp); 928 return (0); 929 } 930 931 /* 932 * Change current working directory (``.''). 933 */ 934 #ifndef _SYS_SYSPROTO_H_ 935 struct chdir_args { 936 char *path; 937 }; 938 #endif 939 int 940 sys_chdir(struct thread *td, struct chdir_args *uap) 941 { 942 943 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 944 } 945 946 int 947 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 948 { 949 struct nameidata nd; 950 int error; 951 952 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 953 pathseg, path); 954 if ((error = namei(&nd)) != 0) 955 return (error); 956 if ((error = change_dir(nd.ni_vp, td)) != 0) { 957 vput(nd.ni_vp); 958 NDFREE_PNBUF(&nd); 959 return (error); 960 } 961 VOP_UNLOCK(nd.ni_vp); 962 NDFREE_PNBUF(&nd); 963 pwd_chdir(td, nd.ni_vp); 964 return (0); 965 } 966 967 static int unprivileged_chroot = 0; 968 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 969 &unprivileged_chroot, 0, 970 "Unprivileged processes can use chroot(2)"); 971 972 /* 973 * Takes locked vnode, unlocks it before returning. 974 */ 975 static int 976 kern_chroot(struct thread *td, struct vnode *vp) 977 { 978 struct proc *p; 979 int error; 980 981 error = priv_check(td, PRIV_VFS_CHROOT); 982 if (error != 0) { 983 p = td->td_proc; 984 if (unprivileged_chroot == 0) { 985 error = EXTERROR(EPERM, 986 "security.bsd.unprivileged_chroot sysctl not enabled"); 987 goto e_vunlock; 988 } 989 if ((p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 990 error = EXTERROR(EPERM, 991 "PROC_NO_NEW_PRIVS not enabled"); 992 goto e_vunlock; 993 } 994 } 995 996 error = change_dir(vp, td); 997 if (error != 0) 998 goto e_vunlock; 999 #ifdef MAC 1000 error = mac_vnode_check_chroot(td->td_ucred, vp); 1001 if (error != 0) 1002 goto e_vunlock; 1003 #endif 1004 VOP_UNLOCK(vp); 1005 error = pwd_chroot(td, vp); 1006 vrele(vp); 1007 return (error); 1008 e_vunlock: 1009 vput(vp); 1010 return (error); 1011 } 1012 1013 /* 1014 * Change notion of root (``/'') directory. 1015 */ 1016 #ifndef _SYS_SYSPROTO_H_ 1017 struct chroot_args { 1018 char *path; 1019 }; 1020 #endif 1021 int 1022 sys_chroot(struct thread *td, struct chroot_args *uap) 1023 { 1024 struct nameidata nd; 1025 int error; 1026 1027 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1028 UIO_USERSPACE, uap->path); 1029 error = namei(&nd); 1030 if (error != 0) 1031 return (error); 1032 NDFREE_PNBUF(&nd); 1033 error = kern_chroot(td, nd.ni_vp); 1034 return (error); 1035 } 1036 1037 /* 1038 * Change notion of root directory to a given file descriptor. 1039 */ 1040 #ifndef _SYS_SYSPROTO_H_ 1041 struct fchroot_args { 1042 int fd; 1043 }; 1044 #endif 1045 int 1046 sys_fchroot(struct thread *td, struct fchroot_args *uap) 1047 { 1048 struct vnode *vp; 1049 struct file *fp; 1050 int error; 1051 uint8_t fdflags; 1052 1053 error = getvnode_path(td, uap->fd, &cap_fchroot_rights, &fdflags, &fp); 1054 if (error != 0) 1055 return (error); 1056 if ((fdflags & UF_RESOLVE_BENEATH) != 0) { 1057 fdrop(fp, td); 1058 return (ENOTCAPABLE); 1059 } 1060 vp = fp->f_vnode; 1061 vrefact(vp); 1062 fdrop(fp, td); 1063 vn_lock(vp, LK_SHARED | LK_RETRY); 1064 error = kern_chroot(td, vp); 1065 return (error); 1066 } 1067 1068 /* 1069 * Common routine for chroot and chdir. Callers must provide a locked vnode 1070 * instance. 1071 */ 1072 int 1073 change_dir(struct vnode *vp, struct thread *td) 1074 { 1075 #ifdef MAC 1076 int error; 1077 #endif 1078 1079 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1080 if (vp->v_type != VDIR) 1081 return (ENOTDIR); 1082 #ifdef MAC 1083 error = mac_vnode_check_chdir(td->td_ucred, vp); 1084 if (error != 0) 1085 return (error); 1086 #endif 1087 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1088 } 1089 1090 static __inline void 1091 flags_to_rights(int flags, cap_rights_t *rightsp) 1092 { 1093 if (flags & O_EXEC) { 1094 cap_rights_set_one(rightsp, CAP_FEXECVE); 1095 if (flags & O_PATH) 1096 return; 1097 } else { 1098 switch ((flags & O_ACCMODE)) { 1099 case O_RDONLY: 1100 cap_rights_set_one(rightsp, CAP_READ); 1101 break; 1102 case O_RDWR: 1103 cap_rights_set_one(rightsp, CAP_READ); 1104 /* FALLTHROUGH */ 1105 case O_WRONLY: 1106 cap_rights_set_one(rightsp, CAP_WRITE); 1107 if (!(flags & (O_APPEND | O_TRUNC))) 1108 cap_rights_set_one(rightsp, CAP_SEEK); 1109 break; 1110 } 1111 } 1112 1113 if (flags & O_CREAT) 1114 cap_rights_set_one(rightsp, CAP_CREATE); 1115 1116 if (flags & O_TRUNC) 1117 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1118 1119 if (flags & (O_SYNC | O_FSYNC | O_DSYNC)) 1120 cap_rights_set_one(rightsp, CAP_FSYNC); 1121 1122 if (flags & (O_EXLOCK | O_SHLOCK)) 1123 cap_rights_set_one(rightsp, CAP_FLOCK); 1124 } 1125 1126 /* 1127 * Check permissions, allocate an open file structure, and call the device 1128 * open routine if any. 1129 */ 1130 #ifndef _SYS_SYSPROTO_H_ 1131 struct open_args { 1132 char *path; 1133 int flags; 1134 int mode; 1135 }; 1136 #endif 1137 int 1138 sys_open(struct thread *td, struct open_args *uap) 1139 { 1140 1141 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1142 uap->flags, uap->mode)); 1143 } 1144 1145 #ifndef _SYS_SYSPROTO_H_ 1146 struct openat_args { 1147 int fd; 1148 char *path; 1149 int flag; 1150 int mode; 1151 }; 1152 #endif 1153 int 1154 sys_openat(struct thread *td, struct openat_args *uap) 1155 { 1156 1157 AUDIT_ARG_FD(uap->fd); 1158 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1159 uap->mode)); 1160 } 1161 1162 /* 1163 * Validate open(2) flags and convert access mode flags (O_RDONLY etc.) to their 1164 * in-kernel representations (FREAD etc.). 1165 */ 1166 static int 1167 openflags(int *flagsp) 1168 { 1169 int flags; 1170 1171 /* 1172 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1173 * may be specified. On the other hand, for O_PATH any mode 1174 * except O_EXEC is ignored. 1175 */ 1176 flags = *flagsp; 1177 if ((flags & O_PATH) != 0) { 1178 flags &= ~O_ACCMODE; 1179 } else if ((flags & O_EXEC) != 0) { 1180 if ((flags & O_ACCMODE) != 0) 1181 return (EINVAL); 1182 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1183 return (EINVAL); 1184 } else { 1185 flags = FFLAGS(flags); 1186 } 1187 *flagsp = flags; 1188 return (0); 1189 } 1190 1191 static void 1192 finit_open(struct file *fp, struct vnode *vp, int flags) 1193 { 1194 /* 1195 * Store the vnode, for any f_type. Typically, the vnode use count is 1196 * decremented by a direct call to vnops.fo_close() for files that 1197 * switched type. 1198 */ 1199 fp->f_vnode = vp; 1200 1201 /* 1202 * If the file wasn't claimed by devfs or fifofs, bind it to the normal 1203 * vnode operations here. 1204 */ 1205 if (fp->f_ops == &badfileops) { 1206 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1207 ("Unexpected fifo fp %p vp %p", fp, vp)); 1208 if ((flags & O_PATH) != 0) { 1209 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1210 DTYPE_VNODE, NULL, &path_fileops); 1211 } else { 1212 finit_vnode(fp, flags, NULL, &vnops); 1213 } 1214 } 1215 } 1216 1217 /* 1218 * If fpp != NULL, opened file is not installed into the file 1219 * descriptor table, instead it is returned in *fpp. This is 1220 * incompatible with fdopen(), in which case we return EINVAL. 1221 */ 1222 static int 1223 openatfp(struct thread *td, int dirfd, const char *path, 1224 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1225 { 1226 struct proc *p; 1227 struct filedesc *fdp; 1228 struct pwddesc *pdp; 1229 struct file *fp; 1230 struct vnode *vp; 1231 struct filecaps *fcaps; 1232 struct nameidata nd; 1233 cap_rights_t rights; 1234 int cmode, error, indx; 1235 1236 indx = -1; 1237 p = td->td_proc; 1238 fdp = p->p_fd; 1239 pdp = p->p_pd; 1240 1241 AUDIT_ARG_FFLAGS(flags); 1242 AUDIT_ARG_MODE(mode); 1243 cap_rights_init_one(&rights, CAP_LOOKUP); 1244 flags_to_rights(flags, &rights); 1245 1246 error = openflags(&flags); 1247 if (error != 0) 1248 return (error); 1249 1250 /* 1251 * Allocate a file structure. The descriptor to reference it 1252 * is allocated and used by finstall_refed() below. 1253 */ 1254 error = falloc_noinstall(td, &fp); 1255 if (error != 0) 1256 return (error); 1257 /* Set the flags early so the finit in devfs can pick them up. */ 1258 fp->f_flag = flags & FMASK; 1259 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1260 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1261 pathseg, path, dirfd, &rights); 1262 td->td_dupfd = -1; /* XXX check for fdopen */ 1263 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1264 td->td_ucred, fp); 1265 if (error != 0) { 1266 /* 1267 * If the vn_open replaced the method vector, something 1268 * wonderous happened deep below and we just pass it up 1269 * pretending we know what we do. 1270 */ 1271 if (error == ENXIO && fp->f_ops != &badfileops) { 1272 MPASS((flags & O_PATH) == 0); 1273 goto success; 1274 } 1275 1276 /* 1277 * Handle special fdopen() case. bleh. 1278 * 1279 * Don't do this for relative (capability) lookups; we don't 1280 * understand exactly what would happen, and we don't think 1281 * that it ever should. 1282 */ 1283 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1284 (error == ENODEV || error == ENXIO) && 1285 td->td_dupfd >= 0) { 1286 MPASS(fpp == NULL); 1287 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1288 &indx); 1289 if (error == 0) 1290 goto success; 1291 } 1292 1293 goto bad; 1294 } 1295 td->td_dupfd = 0; 1296 NDFREE_PNBUF(&nd); 1297 vp = nd.ni_vp; 1298 1299 finit_open(fp, vp, flags); 1300 VOP_UNLOCK(vp); 1301 if (flags & O_TRUNC) { 1302 error = fo_truncate(fp, 0, td->td_ucred, td); 1303 if (error != 0) 1304 goto bad; 1305 } 1306 success: 1307 if (fpp != NULL) { 1308 MPASS(error == 0); 1309 NDFREE_IOCTLCAPS(&nd); 1310 *fpp = fp; 1311 return (0); 1312 } 1313 1314 /* 1315 * If we haven't already installed the FD (for dupfdopen), do so now. 1316 */ 1317 if (indx == -1) { 1318 #ifdef CAPABILITIES 1319 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1320 fcaps = &nd.ni_filecaps; 1321 else 1322 #endif 1323 fcaps = NULL; 1324 if ((nd.ni_resflags & NIRES_BENEATH) != 0) 1325 flags |= O_RESOLVE_BENEATH; 1326 else 1327 flags &= ~O_RESOLVE_BENEATH; 1328 error = finstall_refed(td, fp, &indx, flags, fcaps); 1329 /* On success finstall_refed() consumes fcaps. */ 1330 if (error != 0) { 1331 goto bad; 1332 } 1333 } else { 1334 NDFREE_IOCTLCAPS(&nd); 1335 falloc_abort(td, fp); 1336 } 1337 1338 td->td_retval[0] = indx; 1339 return (0); 1340 bad: 1341 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1342 NDFREE_IOCTLCAPS(&nd); 1343 falloc_abort(td, fp); 1344 return (error); 1345 } 1346 1347 int 1348 kern_openat(struct thread *td, int dirfd, const char *path, 1349 enum uio_seg pathseg, int flags, int mode) 1350 { 1351 return (openatfp(td, dirfd, path, pathseg, flags, mode, NULL)); 1352 } 1353 1354 int 1355 kern_openatfp(struct thread *td, int dirfd, const char *path, 1356 enum uio_seg pathseg, int flags, int mode, struct file **fpp) 1357 { 1358 int error, old_dupfd; 1359 1360 old_dupfd = td->td_dupfd; 1361 td->td_dupfd = -1; 1362 error = openatfp(td, dirfd, path, pathseg, flags, mode, fpp); 1363 td->td_dupfd = old_dupfd; 1364 return (error); 1365 } 1366 1367 #ifdef COMPAT_43 1368 /* 1369 * Create a file. 1370 */ 1371 #ifndef _SYS_SYSPROTO_H_ 1372 struct ocreat_args { 1373 char *path; 1374 int mode; 1375 }; 1376 #endif 1377 int 1378 ocreat(struct thread *td, struct ocreat_args *uap) 1379 { 1380 1381 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1382 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1383 } 1384 #endif /* COMPAT_43 */ 1385 1386 /* 1387 * Create a special file. 1388 */ 1389 #ifndef _SYS_SYSPROTO_H_ 1390 struct mknodat_args { 1391 int fd; 1392 char *path; 1393 mode_t mode; 1394 dev_t dev; 1395 }; 1396 #endif 1397 int 1398 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1399 { 1400 1401 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1402 uap->dev)); 1403 } 1404 1405 #if defined(COMPAT_FREEBSD11) 1406 int 1407 freebsd11_mknod(struct thread *td, 1408 struct freebsd11_mknod_args *uap) 1409 { 1410 1411 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1412 uap->mode, uap->dev)); 1413 } 1414 1415 int 1416 freebsd11_mknodat(struct thread *td, 1417 struct freebsd11_mknodat_args *uap) 1418 { 1419 1420 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1421 uap->dev)); 1422 } 1423 #endif /* COMPAT_FREEBSD11 */ 1424 1425 int 1426 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1427 int mode, dev_t dev) 1428 { 1429 struct vnode *vp; 1430 struct mount *mp; 1431 struct vattr vattr; 1432 struct nameidata nd; 1433 int error, whiteout = 0; 1434 1435 AUDIT_ARG_MODE(mode); 1436 AUDIT_ARG_DEV(dev); 1437 switch (mode & S_IFMT) { 1438 case S_IFCHR: 1439 case S_IFBLK: 1440 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1441 if (error == 0 && dev == VNOVAL) 1442 error = EINVAL; 1443 break; 1444 case S_IFWHT: 1445 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1446 break; 1447 case S_IFIFO: 1448 if (dev == 0) 1449 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1450 /* FALLTHROUGH */ 1451 default: 1452 error = EINVAL; 1453 break; 1454 } 1455 if (error != 0) 1456 return (error); 1457 NDPREINIT(&nd); 1458 restart: 1459 bwillwrite(); 1460 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1461 pathseg, path, fd, &cap_mknodat_rights); 1462 if ((error = namei(&nd)) != 0) 1463 return (error); 1464 vp = nd.ni_vp; 1465 if (vp != NULL) { 1466 NDFREE_PNBUF(&nd); 1467 if (vp == nd.ni_dvp) 1468 vrele(nd.ni_dvp); 1469 else 1470 vput(nd.ni_dvp); 1471 vrele(vp); 1472 return (EEXIST); 1473 } else if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1474 NDFREE_PNBUF(&nd); 1475 vput(nd.ni_dvp); 1476 return (EINVAL); 1477 } else { 1478 VATTR_NULL(&vattr); 1479 vattr.va_mode = (mode & ALLPERMS) & 1480 ~td->td_proc->p_pd->pd_cmask; 1481 vattr.va_rdev = dev; 1482 whiteout = 0; 1483 1484 switch (mode & S_IFMT) { 1485 case S_IFCHR: 1486 vattr.va_type = VCHR; 1487 break; 1488 case S_IFBLK: 1489 vattr.va_type = VBLK; 1490 break; 1491 case S_IFWHT: 1492 whiteout = 1; 1493 break; 1494 default: 1495 panic("kern_mknod: invalid mode"); 1496 } 1497 } 1498 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1499 NDFREE_PNBUF(&nd); 1500 vput(nd.ni_dvp); 1501 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1502 return (error); 1503 goto restart; 1504 } 1505 #ifdef MAC 1506 if (error == 0 && !whiteout) 1507 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1508 &nd.ni_cnd, &vattr); 1509 #endif 1510 if (error == 0) { 1511 if (whiteout) 1512 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1513 else { 1514 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1515 &nd.ni_cnd, &vattr); 1516 } 1517 } 1518 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1519 true); 1520 vn_finished_write(mp); 1521 NDFREE_PNBUF(&nd); 1522 if (error == ERELOOKUP) 1523 goto restart; 1524 return (error); 1525 } 1526 1527 /* 1528 * Create a named pipe. 1529 */ 1530 #ifndef _SYS_SYSPROTO_H_ 1531 struct mkfifo_args { 1532 char *path; 1533 int mode; 1534 }; 1535 #endif 1536 int 1537 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1538 { 1539 1540 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1541 uap->mode)); 1542 } 1543 1544 #ifndef _SYS_SYSPROTO_H_ 1545 struct mkfifoat_args { 1546 int fd; 1547 char *path; 1548 mode_t mode; 1549 }; 1550 #endif 1551 int 1552 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1553 { 1554 1555 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1556 uap->mode)); 1557 } 1558 1559 int 1560 kern_mkfifoat(struct thread *td, int fd, const char *path, 1561 enum uio_seg pathseg, int mode) 1562 { 1563 struct mount *mp; 1564 struct vattr vattr; 1565 struct nameidata nd; 1566 int error; 1567 1568 AUDIT_ARG_MODE(mode); 1569 NDPREINIT(&nd); 1570 restart: 1571 bwillwrite(); 1572 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, 1573 pathseg, path, fd, &cap_mkfifoat_rights); 1574 if ((error = namei(&nd)) != 0) 1575 return (error); 1576 if (nd.ni_vp != NULL) { 1577 NDFREE_PNBUF(&nd); 1578 if (nd.ni_vp == nd.ni_dvp) 1579 vrele(nd.ni_dvp); 1580 else 1581 vput(nd.ni_dvp); 1582 vrele(nd.ni_vp); 1583 return (EEXIST); 1584 } 1585 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1586 NDFREE_PNBUF(&nd); 1587 vput(nd.ni_dvp); 1588 return (EINVAL); 1589 } 1590 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1591 NDFREE_PNBUF(&nd); 1592 vput(nd.ni_dvp); 1593 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1594 return (error); 1595 goto restart; 1596 } 1597 VATTR_NULL(&vattr); 1598 vattr.va_type = VFIFO; 1599 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1600 #ifdef MAC 1601 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1602 &vattr); 1603 if (error != 0) 1604 goto out; 1605 #endif 1606 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1607 #ifdef MAC 1608 out: 1609 #endif 1610 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1611 vn_finished_write(mp); 1612 NDFREE_PNBUF(&nd); 1613 if (error == ERELOOKUP) 1614 goto restart; 1615 return (error); 1616 } 1617 1618 /* 1619 * Make a hard file link. 1620 */ 1621 #ifndef _SYS_SYSPROTO_H_ 1622 struct link_args { 1623 char *path; 1624 char *link; 1625 }; 1626 #endif 1627 int 1628 sys_link(struct thread *td, struct link_args *uap) 1629 { 1630 1631 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1632 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1633 } 1634 1635 #ifndef _SYS_SYSPROTO_H_ 1636 struct linkat_args { 1637 int fd1; 1638 char *path1; 1639 int fd2; 1640 char *path2; 1641 int flag; 1642 }; 1643 #endif 1644 int 1645 sys_linkat(struct thread *td, struct linkat_args *uap) 1646 { 1647 1648 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1649 UIO_USERSPACE, uap->flag)); 1650 } 1651 1652 int hardlink_check_uid = 0; 1653 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1654 &hardlink_check_uid, 0, 1655 "Unprivileged processes cannot create hard links to files owned by other " 1656 "users"); 1657 static int hardlink_check_gid = 0; 1658 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1659 &hardlink_check_gid, 0, 1660 "Unprivileged processes cannot create hard links to files owned by other " 1661 "groups"); 1662 1663 static int 1664 can_hardlink(struct vnode *vp, struct ucred *cred) 1665 { 1666 struct vattr va; 1667 int error; 1668 1669 if (!hardlink_check_uid && !hardlink_check_gid) 1670 return (0); 1671 1672 error = VOP_GETATTR(vp, &va, cred); 1673 if (error != 0) 1674 return (error); 1675 1676 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1677 error = priv_check_cred(cred, PRIV_VFS_LINK); 1678 if (error != 0) 1679 return (error); 1680 } 1681 1682 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1683 error = priv_check_cred(cred, PRIV_VFS_LINK); 1684 if (error != 0) 1685 return (error); 1686 } 1687 1688 return (0); 1689 } 1690 1691 int 1692 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1693 const char *path2, enum uio_seg segflag, int flag) 1694 { 1695 struct nameidata nd; 1696 int error; 1697 1698 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1699 AT_EMPTY_PATH)) != 0) 1700 return (EINVAL); 1701 1702 NDPREINIT(&nd); 1703 do { 1704 bwillwrite(); 1705 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1706 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1707 segflag, path1, fd1, &cap_linkat_source_rights); 1708 if ((error = namei(&nd)) != 0) 1709 return (error); 1710 NDFREE_PNBUF(&nd); 1711 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1712 error = priv_check(td, PRIV_VFS_FHOPEN); 1713 if (error != 0) { 1714 vrele(nd.ni_vp); 1715 return (error); 1716 } 1717 } 1718 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1719 } while (error == EAGAIN || error == ERELOOKUP); 1720 return (error); 1721 } 1722 1723 static int 1724 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1725 enum uio_seg segflag) 1726 { 1727 struct nameidata nd; 1728 struct mount *mp; 1729 int error; 1730 1731 if (vp->v_type == VDIR) { 1732 vrele(vp); 1733 return (EPERM); /* POSIX */ 1734 } 1735 if ((vn_irflag_read(vp) & (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0) { 1736 vrele(vp); 1737 return (EINVAL); 1738 } 1739 NDINIT_ATRIGHTS(&nd, CREATE, 1740 LOCKPARENT | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1741 &cap_linkat_target_rights); 1742 if ((error = namei(&nd)) == 0) { 1743 if (nd.ni_vp != NULL) { 1744 NDFREE_PNBUF(&nd); 1745 if (nd.ni_dvp == nd.ni_vp) 1746 vrele(nd.ni_dvp); 1747 else 1748 vput(nd.ni_dvp); 1749 vrele(nd.ni_vp); 1750 vrele(vp); 1751 return (EEXIST); 1752 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1753 /* 1754 * Cross-device link. No need to recheck 1755 * vp->v_type, since it cannot change, except 1756 * to VBAD. 1757 */ 1758 NDFREE_PNBUF(&nd); 1759 vput(nd.ni_dvp); 1760 vrele(vp); 1761 return (EXDEV); 1762 } else if (vn_lock(vp, LK_EXCLUSIVE) == 0) { 1763 error = can_hardlink(vp, td->td_ucred); 1764 #ifdef MAC 1765 if (error == 0) 1766 error = mac_vnode_check_link(td->td_ucred, 1767 nd.ni_dvp, vp, &nd.ni_cnd); 1768 #endif 1769 if (error != 0) { 1770 vput(vp); 1771 vput(nd.ni_dvp); 1772 NDFREE_PNBUF(&nd); 1773 return (error); 1774 } 1775 error = vn_start_write(vp, &mp, V_NOWAIT); 1776 if (error != 0) { 1777 vput(vp); 1778 vput(nd.ni_dvp); 1779 NDFREE_PNBUF(&nd); 1780 error = vn_start_write(NULL, &mp, 1781 V_XSLEEP | V_PCATCH); 1782 if (error != 0) 1783 return (error); 1784 return (EAGAIN); 1785 } 1786 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1787 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1788 vn_finished_write(mp); 1789 NDFREE_PNBUF(&nd); 1790 vp = NULL; 1791 } else { 1792 vput(nd.ni_dvp); 1793 NDFREE_PNBUF(&nd); 1794 vrele(vp); 1795 return (EAGAIN); 1796 } 1797 } 1798 if (vp != NULL) 1799 vrele(vp); 1800 return (error); 1801 } 1802 1803 /* 1804 * Make a symbolic link. 1805 */ 1806 #ifndef _SYS_SYSPROTO_H_ 1807 struct symlink_args { 1808 char *path; 1809 char *link; 1810 }; 1811 #endif 1812 int 1813 sys_symlink(struct thread *td, struct symlink_args *uap) 1814 { 1815 1816 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1817 UIO_USERSPACE)); 1818 } 1819 1820 #ifndef _SYS_SYSPROTO_H_ 1821 struct symlinkat_args { 1822 char *path; 1823 int fd; 1824 char *path2; 1825 }; 1826 #endif 1827 int 1828 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1829 { 1830 1831 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1832 UIO_USERSPACE)); 1833 } 1834 1835 int 1836 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1837 enum uio_seg segflg) 1838 { 1839 struct mount *mp; 1840 struct vattr vattr; 1841 const char *syspath; 1842 char *tmppath; 1843 struct nameidata nd; 1844 int error; 1845 1846 if (segflg == UIO_SYSSPACE) { 1847 syspath = path1; 1848 } else { 1849 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1850 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1851 goto out; 1852 syspath = tmppath; 1853 } 1854 AUDIT_ARG_TEXT(syspath); 1855 NDPREINIT(&nd); 1856 restart: 1857 bwillwrite(); 1858 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | NOCACHE, segflg, 1859 path2, fd, &cap_symlinkat_rights); 1860 if ((error = namei(&nd)) != 0) 1861 goto out; 1862 if (nd.ni_vp) { 1863 NDFREE_PNBUF(&nd); 1864 if (nd.ni_vp == nd.ni_dvp) 1865 vrele(nd.ni_dvp); 1866 else 1867 vput(nd.ni_dvp); 1868 vrele(nd.ni_vp); 1869 nd.ni_vp = NULL; 1870 error = EEXIST; 1871 goto out; 1872 } 1873 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1874 NDFREE_PNBUF(&nd); 1875 vput(nd.ni_dvp); 1876 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1877 goto out; 1878 goto restart; 1879 } 1880 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 1881 error = EINVAL; 1882 goto out; 1883 } 1884 VATTR_NULL(&vattr); 1885 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1886 #ifdef MAC 1887 vattr.va_type = VLNK; 1888 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1889 &vattr); 1890 if (error != 0) 1891 goto out2; 1892 #endif 1893 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1894 #ifdef MAC 1895 out2: 1896 #endif 1897 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1898 vn_finished_write(mp); 1899 NDFREE_PNBUF(&nd); 1900 if (error == ERELOOKUP) 1901 goto restart; 1902 out: 1903 if (segflg != UIO_SYSSPACE) 1904 uma_zfree(namei_zone, tmppath); 1905 return (error); 1906 } 1907 1908 /* 1909 * Delete a whiteout from the filesystem. 1910 */ 1911 #ifndef _SYS_SYSPROTO_H_ 1912 struct undelete_args { 1913 char *path; 1914 }; 1915 #endif 1916 int 1917 sys_undelete(struct thread *td, struct undelete_args *uap) 1918 { 1919 struct mount *mp; 1920 struct nameidata nd; 1921 int error; 1922 1923 NDPREINIT(&nd); 1924 restart: 1925 bwillwrite(); 1926 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1927 UIO_USERSPACE, uap->path); 1928 error = namei(&nd); 1929 if (error != 0) 1930 return (error); 1931 1932 if (nd.ni_vp != NULL || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1933 NDFREE_PNBUF(&nd); 1934 if (nd.ni_vp == nd.ni_dvp) 1935 vrele(nd.ni_dvp); 1936 else 1937 vput(nd.ni_dvp); 1938 if (nd.ni_vp) 1939 vrele(nd.ni_vp); 1940 return (EEXIST); 1941 } 1942 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1943 NDFREE_PNBUF(&nd); 1944 vput(nd.ni_dvp); 1945 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 1946 return (error); 1947 goto restart; 1948 } 1949 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1950 NDFREE_PNBUF(&nd); 1951 vput(nd.ni_dvp); 1952 vn_finished_write(mp); 1953 if (error == ERELOOKUP) 1954 goto restart; 1955 return (error); 1956 } 1957 1958 /* 1959 * Delete a name from the filesystem. 1960 */ 1961 #ifndef _SYS_SYSPROTO_H_ 1962 struct unlink_args { 1963 char *path; 1964 }; 1965 #endif 1966 int 1967 sys_unlink(struct thread *td, struct unlink_args *uap) 1968 { 1969 1970 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1971 0, 0)); 1972 } 1973 1974 static int 1975 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1976 int flag, enum uio_seg pathseg, ino_t oldinum) 1977 { 1978 1979 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1980 return (EINVAL); 1981 1982 if ((flag & AT_REMOVEDIR) != 0) 1983 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1984 1985 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1986 } 1987 1988 #ifndef _SYS_SYSPROTO_H_ 1989 struct unlinkat_args { 1990 int fd; 1991 char *path; 1992 int flag; 1993 }; 1994 #endif 1995 int 1996 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1997 { 1998 1999 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 2000 UIO_USERSPACE, 0)); 2001 } 2002 2003 #ifndef _SYS_SYSPROTO_H_ 2004 struct funlinkat_args { 2005 int dfd; 2006 const char *path; 2007 int fd; 2008 int flag; 2009 }; 2010 #endif 2011 int 2012 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 2013 { 2014 2015 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 2016 UIO_USERSPACE, 0)); 2017 } 2018 2019 int 2020 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 2021 enum uio_seg pathseg, int flag, ino_t oldinum) 2022 { 2023 struct mount *mp; 2024 struct file *fp; 2025 struct vnode *vp; 2026 struct nameidata nd; 2027 struct stat sb; 2028 int error; 2029 2030 fp = NULL; 2031 if (fd != FD_NONE) { 2032 error = getvnode_path(td, fd, &cap_no_rights, NULL, &fp); 2033 if (error != 0) 2034 return (error); 2035 } 2036 2037 NDPREINIT(&nd); 2038 restart: 2039 bwillwrite(); 2040 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 2041 at2cnpflags(flag, AT_RESOLVE_BENEATH), 2042 pathseg, path, dfd, &cap_unlinkat_rights); 2043 if ((error = namei(&nd)) != 0) { 2044 if (error == EINVAL) 2045 error = EPERM; 2046 goto fdout; 2047 } 2048 vp = nd.ni_vp; 2049 if (vp->v_type == VDIR && oldinum == 0) { 2050 error = EPERM; /* POSIX */ 2051 } else if (oldinum != 0 && 2052 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 2053 sb.st_ino != oldinum) { 2054 error = EIDRM; /* Identifier removed */ 2055 } else if (fp != NULL && fp->f_vnode != vp) { 2056 if (VN_IS_DOOMED(fp->f_vnode)) 2057 error = EBADF; 2058 else 2059 error = EDEADLK; 2060 } else { 2061 /* 2062 * The root of a mounted filesystem cannot be deleted. 2063 * 2064 * XXX: can this only be a VDIR case? 2065 */ 2066 if (vp->v_vflag & VV_ROOT) 2067 error = EBUSY; 2068 } 2069 if (error == 0) { 2070 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 2071 NDFREE_PNBUF(&nd); 2072 vput(nd.ni_dvp); 2073 if (vp == nd.ni_dvp) 2074 vrele(vp); 2075 else 2076 vput(vp); 2077 if ((error = vn_start_write(NULL, &mp, 2078 V_XSLEEP | V_PCATCH)) != 0) { 2079 goto fdout; 2080 } 2081 goto restart; 2082 } 2083 #ifdef MAC 2084 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 2085 &nd.ni_cnd); 2086 if (error != 0) 2087 goto out; 2088 #endif 2089 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 2090 #ifdef MAC 2091 out: 2092 #endif 2093 vn_finished_write(mp); 2094 } 2095 NDFREE_PNBUF(&nd); 2096 vput(nd.ni_dvp); 2097 if (vp == nd.ni_dvp) 2098 vrele(vp); 2099 else 2100 vput(vp); 2101 if (error == ERELOOKUP) 2102 goto restart; 2103 fdout: 2104 if (fp != NULL) 2105 fdrop(fp, td); 2106 return (error); 2107 } 2108 2109 /* 2110 * Reposition read/write file offset. 2111 */ 2112 #ifndef _SYS_SYSPROTO_H_ 2113 struct lseek_args { 2114 int fd; 2115 int pad; 2116 off_t offset; 2117 int whence; 2118 }; 2119 #endif 2120 int 2121 sys_lseek(struct thread *td, struct lseek_args *uap) 2122 { 2123 2124 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2125 } 2126 2127 int 2128 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2129 { 2130 struct file *fp; 2131 int error; 2132 2133 AUDIT_ARG_FD(fd); 2134 error = fget(td, fd, &cap_seek_rights, &fp); 2135 if (error != 0) 2136 return (error); 2137 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2138 fo_seek(fp, offset, whence, td) : ESPIPE; 2139 fdrop(fp, td); 2140 return (error); 2141 } 2142 2143 #if defined(COMPAT_43) 2144 /* 2145 * Reposition read/write file offset. 2146 */ 2147 #ifndef _SYS_SYSPROTO_H_ 2148 struct olseek_args { 2149 int fd; 2150 long offset; 2151 int whence; 2152 }; 2153 #endif 2154 int 2155 olseek(struct thread *td, struct olseek_args *uap) 2156 { 2157 2158 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2159 } 2160 #endif /* COMPAT_43 */ 2161 2162 #if defined(COMPAT_FREEBSD6) 2163 /* Version with the 'pad' argument */ 2164 int 2165 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2166 { 2167 2168 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2169 } 2170 #endif 2171 2172 /* 2173 * Check access permissions using passed credentials. 2174 */ 2175 static int 2176 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2177 struct thread *td) 2178 { 2179 accmode_t accmode; 2180 int error; 2181 2182 /* Flags == 0 means only check for existence. */ 2183 if (user_flags == 0) 2184 return (0); 2185 2186 accmode = 0; 2187 if (user_flags & R_OK) 2188 accmode |= VREAD; 2189 if (user_flags & W_OK) 2190 accmode |= VWRITE; 2191 if (user_flags & X_OK) 2192 accmode |= VEXEC; 2193 #ifdef MAC 2194 error = mac_vnode_check_access(cred, vp, accmode); 2195 if (error != 0) 2196 return (error); 2197 #endif 2198 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2199 error = VOP_ACCESS(vp, accmode, cred, td); 2200 return (error); 2201 } 2202 2203 /* 2204 * Check access permissions using "real" credentials. 2205 */ 2206 #ifndef _SYS_SYSPROTO_H_ 2207 struct access_args { 2208 char *path; 2209 int amode; 2210 }; 2211 #endif 2212 int 2213 sys_access(struct thread *td, struct access_args *uap) 2214 { 2215 2216 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2217 0, uap->amode)); 2218 } 2219 2220 #ifndef _SYS_SYSPROTO_H_ 2221 struct faccessat_args { 2222 int dirfd; 2223 char *path; 2224 int amode; 2225 int flag; 2226 } 2227 #endif 2228 int 2229 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2230 { 2231 2232 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2233 uap->amode)); 2234 } 2235 2236 int 2237 kern_accessat(struct thread *td, int fd, const char *path, 2238 enum uio_seg pathseg, int flag, int amode) 2239 { 2240 struct ucred *cred, *usecred; 2241 struct vnode *vp; 2242 struct nameidata nd; 2243 int error; 2244 2245 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH | 2246 AT_SYMLINK_NOFOLLOW)) != 0) 2247 return (EINVAL); 2248 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2249 return (EINVAL); 2250 2251 /* 2252 * Create and modify a temporary credential instead of one that 2253 * is potentially shared (if we need one). 2254 */ 2255 cred = td->td_ucred; 2256 if ((flag & AT_EACCESS) == 0 && 2257 ((cred->cr_uid != cred->cr_ruid || 2258 cred->cr_rgid != cred->cr_gid))) { 2259 usecred = crdup(cred); 2260 usecred->cr_uid = cred->cr_ruid; 2261 usecred->cr_gid = cred->cr_rgid; 2262 td->td_ucred = usecred; 2263 } else 2264 usecred = cred; 2265 AUDIT_ARG_VALUE(amode); 2266 NDINIT_ATRIGHTS(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | 2267 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW | 2268 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2269 if ((error = namei(&nd)) != 0) 2270 goto out; 2271 vp = nd.ni_vp; 2272 2273 error = vn_access(vp, amode, usecred, td); 2274 NDFREE_PNBUF(&nd); 2275 vput(vp); 2276 out: 2277 if (usecred != cred) { 2278 td->td_ucred = cred; 2279 crfree(usecred); 2280 } 2281 return (error); 2282 } 2283 2284 /* 2285 * Check access permissions using "effective" credentials. 2286 */ 2287 #ifndef _SYS_SYSPROTO_H_ 2288 struct eaccess_args { 2289 char *path; 2290 int amode; 2291 }; 2292 #endif 2293 int 2294 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2295 { 2296 2297 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2298 AT_EACCESS, uap->amode)); 2299 } 2300 2301 #if defined(COMPAT_43) 2302 /* 2303 * Get file status; this version follows links. 2304 */ 2305 #ifndef _SYS_SYSPROTO_H_ 2306 struct ostat_args { 2307 char *path; 2308 struct ostat *ub; 2309 }; 2310 #endif 2311 int 2312 ostat(struct thread *td, struct ostat_args *uap) 2313 { 2314 struct stat sb; 2315 struct ostat osb; 2316 int error; 2317 2318 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2319 if (error != 0) 2320 return (error); 2321 cvtstat(&sb, &osb); 2322 return (copyout(&osb, uap->ub, sizeof (osb))); 2323 } 2324 2325 /* 2326 * Get file status; this version does not follow links. 2327 */ 2328 #ifndef _SYS_SYSPROTO_H_ 2329 struct olstat_args { 2330 char *path; 2331 struct ostat *ub; 2332 }; 2333 #endif 2334 int 2335 olstat(struct thread *td, struct olstat_args *uap) 2336 { 2337 struct stat sb; 2338 struct ostat osb; 2339 int error; 2340 2341 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2342 UIO_USERSPACE, &sb); 2343 if (error != 0) 2344 return (error); 2345 cvtstat(&sb, &osb); 2346 return (copyout(&osb, uap->ub, sizeof (osb))); 2347 } 2348 2349 /* 2350 * Convert from an old to a new stat structure. 2351 * XXX: many values are blindly truncated. 2352 */ 2353 void 2354 cvtstat(struct stat *st, struct ostat *ost) 2355 { 2356 2357 bzero(ost, sizeof(*ost)); 2358 ost->st_dev = st->st_dev; 2359 ost->st_ino = st->st_ino; 2360 ost->st_mode = st->st_mode; 2361 ost->st_nlink = st->st_nlink; 2362 ost->st_uid = st->st_uid; 2363 ost->st_gid = st->st_gid; 2364 ost->st_rdev = st->st_rdev; 2365 ost->st_size = MIN(st->st_size, INT32_MAX); 2366 ost->st_atim = st->st_atim; 2367 ost->st_mtim = st->st_mtim; 2368 ost->st_ctim = st->st_ctim; 2369 ost->st_blksize = st->st_blksize; 2370 ost->st_blocks = st->st_blocks; 2371 ost->st_flags = st->st_flags; 2372 ost->st_gen = st->st_gen; 2373 } 2374 #endif /* COMPAT_43 */ 2375 2376 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2377 int ino64_trunc_error; 2378 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2379 &ino64_trunc_error, 0, 2380 "Error on truncation of device, file or inode number, or link count"); 2381 2382 int 2383 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2384 { 2385 2386 ost->st_dev = st->st_dev; 2387 if (ost->st_dev != st->st_dev) { 2388 switch (ino64_trunc_error) { 2389 default: 2390 /* 2391 * Since dev_t is almost raw, don't clamp to the 2392 * maximum for case 2, but ignore the error. 2393 */ 2394 break; 2395 case 1: 2396 return (EOVERFLOW); 2397 } 2398 } 2399 ost->st_ino = st->st_ino; 2400 if (ost->st_ino != st->st_ino) { 2401 switch (ino64_trunc_error) { 2402 default: 2403 case 0: 2404 break; 2405 case 1: 2406 return (EOVERFLOW); 2407 case 2: 2408 ost->st_ino = UINT32_MAX; 2409 break; 2410 } 2411 } 2412 ost->st_mode = st->st_mode; 2413 ost->st_nlink = st->st_nlink; 2414 if (ost->st_nlink != st->st_nlink) { 2415 switch (ino64_trunc_error) { 2416 default: 2417 case 0: 2418 break; 2419 case 1: 2420 return (EOVERFLOW); 2421 case 2: 2422 ost->st_nlink = UINT16_MAX; 2423 break; 2424 } 2425 } 2426 ost->st_uid = st->st_uid; 2427 ost->st_gid = st->st_gid; 2428 ost->st_rdev = st->st_rdev; 2429 if (ost->st_rdev != st->st_rdev) { 2430 switch (ino64_trunc_error) { 2431 default: 2432 break; 2433 case 1: 2434 return (EOVERFLOW); 2435 } 2436 } 2437 ost->st_atim = st->st_atim; 2438 ost->st_mtim = st->st_mtim; 2439 ost->st_ctim = st->st_ctim; 2440 ost->st_size = st->st_size; 2441 ost->st_blocks = st->st_blocks; 2442 ost->st_blksize = st->st_blksize; 2443 ost->st_flags = st->st_flags; 2444 ost->st_gen = st->st_gen; 2445 ost->st_lspare = 0; 2446 ost->st_birthtim = st->st_birthtim; 2447 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2448 sizeof(*ost) - offsetof(struct freebsd11_stat, 2449 st_birthtim) - sizeof(ost->st_birthtim)); 2450 return (0); 2451 } 2452 2453 int 2454 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2455 { 2456 struct stat sb; 2457 struct freebsd11_stat osb; 2458 int error; 2459 2460 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2461 if (error != 0) 2462 return (error); 2463 error = freebsd11_cvtstat(&sb, &osb); 2464 if (error == 0) 2465 error = copyout(&osb, uap->ub, sizeof(osb)); 2466 return (error); 2467 } 2468 2469 int 2470 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2471 { 2472 struct stat sb; 2473 struct freebsd11_stat osb; 2474 int error; 2475 2476 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2477 UIO_USERSPACE, &sb); 2478 if (error != 0) 2479 return (error); 2480 error = freebsd11_cvtstat(&sb, &osb); 2481 if (error == 0) 2482 error = copyout(&osb, uap->ub, sizeof(osb)); 2483 return (error); 2484 } 2485 2486 int 2487 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2488 { 2489 struct fhandle fh; 2490 struct stat sb; 2491 struct freebsd11_stat osb; 2492 int error; 2493 2494 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2495 if (error != 0) 2496 return (error); 2497 error = kern_fhstat(td, fh, &sb); 2498 if (error != 0) 2499 return (error); 2500 error = freebsd11_cvtstat(&sb, &osb); 2501 if (error == 0) 2502 error = copyout(&osb, uap->sb, sizeof(osb)); 2503 return (error); 2504 } 2505 2506 int 2507 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2508 { 2509 struct stat sb; 2510 struct freebsd11_stat osb; 2511 int error; 2512 2513 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2514 UIO_USERSPACE, &sb); 2515 if (error != 0) 2516 return (error); 2517 error = freebsd11_cvtstat(&sb, &osb); 2518 if (error == 0) 2519 error = copyout(&osb, uap->buf, sizeof(osb)); 2520 return (error); 2521 } 2522 #endif /* COMPAT_FREEBSD11 */ 2523 2524 /* 2525 * Get file status 2526 */ 2527 #ifndef _SYS_SYSPROTO_H_ 2528 struct fstatat_args { 2529 int fd; 2530 char *path; 2531 struct stat *buf; 2532 int flag; 2533 } 2534 #endif 2535 int 2536 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2537 { 2538 struct stat sb; 2539 int error; 2540 2541 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2542 UIO_USERSPACE, &sb); 2543 if (error == 0) 2544 error = copyout(&sb, uap->buf, sizeof (sb)); 2545 return (error); 2546 } 2547 2548 int 2549 kern_statat(struct thread *td, int flag, int fd, const char *path, 2550 enum uio_seg pathseg, struct stat *sbp) 2551 { 2552 struct nameidata nd; 2553 int error; 2554 2555 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2556 AT_EMPTY_PATH)) != 0) 2557 return (EINVAL); 2558 2559 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2560 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2561 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2562 2563 if ((error = namei(&nd)) != 0) { 2564 if (error == ENOTDIR && 2565 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2566 error = kern_fstat(td, fd, sbp); 2567 return (error); 2568 } 2569 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2570 NDFREE_PNBUF(&nd); 2571 vput(nd.ni_vp); 2572 #ifdef __STAT_TIME_T_EXT 2573 sbp->st_atim_ext = 0; 2574 sbp->st_mtim_ext = 0; 2575 sbp->st_ctim_ext = 0; 2576 sbp->st_btim_ext = 0; 2577 #endif 2578 #ifdef KTRACE 2579 if (KTRPOINT(td, KTR_STRUCT)) 2580 ktrstat_error(sbp, error); 2581 #endif 2582 return (error); 2583 } 2584 2585 #if defined(COMPAT_FREEBSD11) 2586 /* 2587 * Implementation of the NetBSD [l]stat() functions. 2588 */ 2589 int 2590 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2591 { 2592 struct freebsd11_stat sb11; 2593 int error; 2594 2595 error = freebsd11_cvtstat(sb, &sb11); 2596 if (error != 0) 2597 return (error); 2598 2599 bzero(nsb, sizeof(*nsb)); 2600 CP(sb11, *nsb, st_dev); 2601 CP(sb11, *nsb, st_ino); 2602 CP(sb11, *nsb, st_mode); 2603 CP(sb11, *nsb, st_nlink); 2604 CP(sb11, *nsb, st_uid); 2605 CP(sb11, *nsb, st_gid); 2606 CP(sb11, *nsb, st_rdev); 2607 CP(sb11, *nsb, st_atim); 2608 CP(sb11, *nsb, st_mtim); 2609 CP(sb11, *nsb, st_ctim); 2610 CP(sb11, *nsb, st_size); 2611 CP(sb11, *nsb, st_blocks); 2612 CP(sb11, *nsb, st_blksize); 2613 CP(sb11, *nsb, st_flags); 2614 CP(sb11, *nsb, st_gen); 2615 CP(sb11, *nsb, st_birthtim); 2616 return (0); 2617 } 2618 2619 #ifndef _SYS_SYSPROTO_H_ 2620 struct freebsd11_nstat_args { 2621 char *path; 2622 struct nstat *ub; 2623 }; 2624 #endif 2625 int 2626 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2627 { 2628 struct stat sb; 2629 struct nstat nsb; 2630 int error; 2631 2632 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, &sb); 2633 if (error != 0) 2634 return (error); 2635 error = freebsd11_cvtnstat(&sb, &nsb); 2636 if (error == 0) 2637 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2638 return (error); 2639 } 2640 2641 /* 2642 * NetBSD lstat. Get file status; this version does not follow links. 2643 */ 2644 #ifndef _SYS_SYSPROTO_H_ 2645 struct freebsd11_nlstat_args { 2646 char *path; 2647 struct nstat *ub; 2648 }; 2649 #endif 2650 int 2651 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2652 { 2653 struct stat sb; 2654 struct nstat nsb; 2655 int error; 2656 2657 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2658 UIO_USERSPACE, &sb); 2659 if (error != 0) 2660 return (error); 2661 error = freebsd11_cvtnstat(&sb, &nsb); 2662 if (error == 0) 2663 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2664 return (error); 2665 } 2666 #endif /* COMPAT_FREEBSD11 */ 2667 2668 /* 2669 * Get configurable pathname variables. 2670 */ 2671 #ifndef _SYS_SYSPROTO_H_ 2672 struct pathconf_args { 2673 char *path; 2674 int name; 2675 }; 2676 #endif 2677 int 2678 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2679 { 2680 long value; 2681 int error; 2682 2683 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2684 &value); 2685 if (error == 0) 2686 td->td_retval[0] = value; 2687 return (error); 2688 } 2689 2690 #ifndef _SYS_SYSPROTO_H_ 2691 struct lpathconf_args { 2692 char *path; 2693 int name; 2694 }; 2695 #endif 2696 int 2697 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2698 { 2699 long value; 2700 int error; 2701 2702 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2703 NOFOLLOW, &value); 2704 if (error == 0) 2705 td->td_retval[0] = value; 2706 return (error); 2707 } 2708 2709 int 2710 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2711 int name, u_long flags, long *valuep) 2712 { 2713 struct nameidata nd; 2714 int error; 2715 2716 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2717 pathseg, path); 2718 if ((error = namei(&nd)) != 0) 2719 return (error); 2720 NDFREE_PNBUF(&nd); 2721 2722 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2723 vput(nd.ni_vp); 2724 return (error); 2725 } 2726 2727 /* 2728 * Return target name of a symbolic link. 2729 */ 2730 #ifndef _SYS_SYSPROTO_H_ 2731 struct readlink_args { 2732 char *path; 2733 char *buf; 2734 size_t count; 2735 }; 2736 #endif 2737 int 2738 sys_readlink(struct thread *td, struct readlink_args *uap) 2739 { 2740 2741 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2742 uap->buf, UIO_USERSPACE, uap->count)); 2743 } 2744 #ifndef _SYS_SYSPROTO_H_ 2745 struct readlinkat_args { 2746 int fd; 2747 char *path; 2748 char *buf; 2749 size_t bufsize; 2750 }; 2751 #endif 2752 int 2753 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2754 { 2755 2756 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2757 uap->buf, UIO_USERSPACE, uap->bufsize)); 2758 } 2759 2760 int 2761 kern_readlinkat(struct thread *td, int fd, const char *path, 2762 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2763 { 2764 struct vnode *vp; 2765 struct nameidata nd; 2766 int error; 2767 2768 if (count > IOSIZE_MAX) 2769 return (EINVAL); 2770 2771 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2772 EMPTYPATH, pathseg, path, fd); 2773 2774 if ((error = namei(&nd)) != 0) 2775 return (error); 2776 NDFREE_PNBUF(&nd); 2777 vp = nd.ni_vp; 2778 2779 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2780 vput(vp); 2781 2782 return (error); 2783 } 2784 2785 /* 2786 * Helper function to readlink from a vnode 2787 */ 2788 static int 2789 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2790 struct thread *td) 2791 { 2792 struct iovec aiov; 2793 struct uio auio; 2794 int error; 2795 2796 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2797 #ifdef MAC 2798 error = mac_vnode_check_readlink(td->td_ucred, vp); 2799 if (error != 0) 2800 return (error); 2801 #endif 2802 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2803 return (EINVAL); 2804 2805 aiov.iov_base = buf; 2806 aiov.iov_len = count; 2807 auio.uio_iov = &aiov; 2808 auio.uio_iovcnt = 1; 2809 auio.uio_offset = 0; 2810 auio.uio_rw = UIO_READ; 2811 auio.uio_segflg = bufseg; 2812 auio.uio_td = td; 2813 auio.uio_resid = count; 2814 error = VOP_READLINK(vp, &auio, td->td_ucred); 2815 td->td_retval[0] = count - auio.uio_resid; 2816 return (error); 2817 } 2818 2819 /* 2820 * Common implementation code for chflags() and fchflags(). 2821 */ 2822 static int 2823 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2824 { 2825 struct mount *mp; 2826 struct vattr vattr; 2827 int error; 2828 2829 /* We can't support the value matching VNOVAL. */ 2830 if (flags == VNOVAL) 2831 return (EOPNOTSUPP); 2832 2833 /* 2834 * Prevent non-root users from setting flags on devices. When 2835 * a device is reused, users can retain ownership of the device 2836 * if they are allowed to set flags and programs assume that 2837 * chown can't fail when done as root. 2838 */ 2839 if (VN_ISDEV(vp)) { 2840 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2841 if (error != 0) 2842 return (error); 2843 } 2844 2845 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2846 return (error); 2847 VATTR_NULL(&vattr); 2848 vattr.va_flags = flags; 2849 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2850 #ifdef MAC 2851 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2852 if (error == 0) 2853 #endif 2854 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2855 VOP_UNLOCK(vp); 2856 vn_finished_write(mp); 2857 return (error); 2858 } 2859 2860 /* 2861 * Change flags of a file given a path name. 2862 */ 2863 #ifndef _SYS_SYSPROTO_H_ 2864 struct chflags_args { 2865 const char *path; 2866 u_long flags; 2867 }; 2868 #endif 2869 int 2870 sys_chflags(struct thread *td, struct chflags_args *uap) 2871 { 2872 2873 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2874 uap->flags, 0)); 2875 } 2876 2877 #ifndef _SYS_SYSPROTO_H_ 2878 struct chflagsat_args { 2879 int fd; 2880 const char *path; 2881 u_long flags; 2882 int atflag; 2883 } 2884 #endif 2885 int 2886 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2887 { 2888 2889 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2890 uap->flags, uap->atflag)); 2891 } 2892 2893 /* 2894 * Same as chflags() but doesn't follow symlinks. 2895 */ 2896 #ifndef _SYS_SYSPROTO_H_ 2897 struct lchflags_args { 2898 const char *path; 2899 u_long flags; 2900 }; 2901 #endif 2902 int 2903 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2904 { 2905 2906 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2907 uap->flags, AT_SYMLINK_NOFOLLOW)); 2908 } 2909 2910 static int 2911 kern_chflagsat(struct thread *td, int fd, const char *path, 2912 enum uio_seg pathseg, u_long flags, int atflag) 2913 { 2914 struct nameidata nd; 2915 int error; 2916 2917 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2918 AT_EMPTY_PATH)) != 0) 2919 return (EINVAL); 2920 2921 AUDIT_ARG_FFLAGS(flags); 2922 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2923 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2924 fd, &cap_fchflags_rights); 2925 if ((error = namei(&nd)) != 0) 2926 return (error); 2927 NDFREE_PNBUF(&nd); 2928 error = setfflags(td, nd.ni_vp, flags); 2929 vrele(nd.ni_vp); 2930 return (error); 2931 } 2932 2933 /* 2934 * Change flags of a file given a file descriptor. 2935 */ 2936 #ifndef _SYS_SYSPROTO_H_ 2937 struct fchflags_args { 2938 int fd; 2939 u_long flags; 2940 }; 2941 #endif 2942 int 2943 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2944 { 2945 struct file *fp; 2946 int error; 2947 2948 AUDIT_ARG_FD(uap->fd); 2949 AUDIT_ARG_FFLAGS(uap->flags); 2950 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2951 &fp); 2952 if (error != 0) 2953 return (error); 2954 #ifdef AUDIT 2955 if (AUDITING_TD(td)) { 2956 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2957 AUDIT_ARG_VNODE1(fp->f_vnode); 2958 VOP_UNLOCK(fp->f_vnode); 2959 } 2960 #endif 2961 error = setfflags(td, fp->f_vnode, uap->flags); 2962 fdrop(fp, td); 2963 return (error); 2964 } 2965 2966 /* 2967 * Common implementation code for chmod(), lchmod() and fchmod(). 2968 */ 2969 int 2970 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2971 { 2972 struct mount *mp; 2973 struct vattr vattr; 2974 int error; 2975 2976 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 2977 return (error); 2978 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2979 VATTR_NULL(&vattr); 2980 vattr.va_mode = mode & ALLPERMS; 2981 #ifdef MAC 2982 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2983 if (error == 0) 2984 #endif 2985 error = VOP_SETATTR(vp, &vattr, cred); 2986 VOP_UNLOCK(vp); 2987 vn_finished_write(mp); 2988 return (error); 2989 } 2990 2991 /* 2992 * Change mode of a file given path name. 2993 */ 2994 #ifndef _SYS_SYSPROTO_H_ 2995 struct chmod_args { 2996 char *path; 2997 int mode; 2998 }; 2999 #endif 3000 int 3001 sys_chmod(struct thread *td, struct chmod_args *uap) 3002 { 3003 3004 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3005 uap->mode, 0)); 3006 } 3007 3008 #ifndef _SYS_SYSPROTO_H_ 3009 struct fchmodat_args { 3010 int dirfd; 3011 char *path; 3012 mode_t mode; 3013 int flag; 3014 } 3015 #endif 3016 int 3017 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 3018 { 3019 3020 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 3021 uap->mode, uap->flag)); 3022 } 3023 3024 /* 3025 * Change mode of a file given path name (don't follow links.) 3026 */ 3027 #ifndef _SYS_SYSPROTO_H_ 3028 struct lchmod_args { 3029 char *path; 3030 int mode; 3031 }; 3032 #endif 3033 int 3034 sys_lchmod(struct thread *td, struct lchmod_args *uap) 3035 { 3036 3037 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3038 uap->mode, AT_SYMLINK_NOFOLLOW)); 3039 } 3040 3041 int 3042 kern_fchmodat(struct thread *td, int fd, const char *path, 3043 enum uio_seg pathseg, mode_t mode, int flag) 3044 { 3045 struct nameidata nd; 3046 int error; 3047 3048 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3049 AT_EMPTY_PATH)) != 0) 3050 return (EINVAL); 3051 3052 AUDIT_ARG_MODE(mode); 3053 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3054 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3055 fd, &cap_fchmod_rights); 3056 if ((error = namei(&nd)) != 0) 3057 return (error); 3058 NDFREE_PNBUF(&nd); 3059 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 3060 vrele(nd.ni_vp); 3061 return (error); 3062 } 3063 3064 /* 3065 * Change mode of a file given a file descriptor. 3066 */ 3067 #ifndef _SYS_SYSPROTO_H_ 3068 struct fchmod_args { 3069 int fd; 3070 int mode; 3071 }; 3072 #endif 3073 int 3074 sys_fchmod(struct thread *td, struct fchmod_args *uap) 3075 { 3076 struct file *fp; 3077 int error; 3078 3079 AUDIT_ARG_FD(uap->fd); 3080 AUDIT_ARG_MODE(uap->mode); 3081 3082 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 3083 if (error != 0) 3084 return (error); 3085 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 3086 fdrop(fp, td); 3087 return (error); 3088 } 3089 3090 /* 3091 * Common implementation for chown(), lchown(), and fchown() 3092 */ 3093 int 3094 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 3095 gid_t gid) 3096 { 3097 struct mount *mp; 3098 struct vattr vattr; 3099 int error; 3100 3101 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3102 return (error); 3103 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3104 VATTR_NULL(&vattr); 3105 vattr.va_uid = uid; 3106 vattr.va_gid = gid; 3107 #ifdef MAC 3108 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3109 vattr.va_gid); 3110 if (error == 0) 3111 #endif 3112 error = VOP_SETATTR(vp, &vattr, cred); 3113 VOP_UNLOCK(vp); 3114 vn_finished_write(mp); 3115 return (error); 3116 } 3117 3118 /* 3119 * Set ownership given a path name. 3120 */ 3121 #ifndef _SYS_SYSPROTO_H_ 3122 struct chown_args { 3123 char *path; 3124 int uid; 3125 int gid; 3126 }; 3127 #endif 3128 int 3129 sys_chown(struct thread *td, struct chown_args *uap) 3130 { 3131 3132 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3133 uap->gid, 0)); 3134 } 3135 3136 #ifndef _SYS_SYSPROTO_H_ 3137 struct fchownat_args { 3138 int fd; 3139 const char * path; 3140 uid_t uid; 3141 gid_t gid; 3142 int flag; 3143 }; 3144 #endif 3145 int 3146 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3147 { 3148 3149 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3150 uap->gid, uap->flag)); 3151 } 3152 3153 int 3154 kern_fchownat(struct thread *td, int fd, const char *path, 3155 enum uio_seg pathseg, int uid, int gid, int flag) 3156 { 3157 struct nameidata nd; 3158 int error; 3159 3160 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3161 AT_EMPTY_PATH)) != 0) 3162 return (EINVAL); 3163 3164 AUDIT_ARG_OWNER(uid, gid); 3165 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3166 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3167 fd, &cap_fchown_rights); 3168 3169 if ((error = namei(&nd)) != 0) 3170 return (error); 3171 NDFREE_PNBUF(&nd); 3172 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3173 vrele(nd.ni_vp); 3174 return (error); 3175 } 3176 3177 /* 3178 * Set ownership given a path name, do not cross symlinks. 3179 */ 3180 #ifndef _SYS_SYSPROTO_H_ 3181 struct lchown_args { 3182 char *path; 3183 int uid; 3184 int gid; 3185 }; 3186 #endif 3187 int 3188 sys_lchown(struct thread *td, struct lchown_args *uap) 3189 { 3190 3191 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3192 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3193 } 3194 3195 /* 3196 * Set ownership given a file descriptor. 3197 */ 3198 #ifndef _SYS_SYSPROTO_H_ 3199 struct fchown_args { 3200 int fd; 3201 int uid; 3202 int gid; 3203 }; 3204 #endif 3205 int 3206 sys_fchown(struct thread *td, struct fchown_args *uap) 3207 { 3208 struct file *fp; 3209 int error; 3210 3211 AUDIT_ARG_FD(uap->fd); 3212 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3213 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3214 if (error != 0) 3215 return (error); 3216 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3217 fdrop(fp, td); 3218 return (error); 3219 } 3220 3221 /* 3222 * Common implementation code for utimes(), lutimes(), and futimes(). 3223 */ 3224 static int 3225 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3226 struct timespec *tsp) 3227 { 3228 struct timeval tv[2]; 3229 const struct timeval *tvp; 3230 int error; 3231 3232 if (usrtvp == NULL) { 3233 vfs_timestamp(&tsp[0]); 3234 tsp[1] = tsp[0]; 3235 } else { 3236 if (tvpseg == UIO_SYSSPACE) { 3237 tvp = usrtvp; 3238 } else { 3239 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3240 return (error); 3241 tvp = tv; 3242 } 3243 3244 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3245 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3246 return (EINVAL); 3247 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3248 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3249 } 3250 return (0); 3251 } 3252 3253 /* 3254 * Common implementation code for futimens(), utimensat(). 3255 */ 3256 #define UTIMENS_NULL 0x1 3257 #define UTIMENS_EXIT 0x2 3258 static int 3259 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3260 struct timespec *tsp, int *retflags) 3261 { 3262 struct timespec tsnow; 3263 int error; 3264 3265 vfs_timestamp(&tsnow); 3266 *retflags = 0; 3267 if (usrtsp == NULL) { 3268 tsp[0] = tsnow; 3269 tsp[1] = tsnow; 3270 *retflags |= UTIMENS_NULL; 3271 return (0); 3272 } 3273 if (tspseg == UIO_SYSSPACE) { 3274 tsp[0] = usrtsp[0]; 3275 tsp[1] = usrtsp[1]; 3276 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3277 return (error); 3278 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3279 *retflags |= UTIMENS_EXIT; 3280 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3281 *retflags |= UTIMENS_NULL; 3282 if (tsp[0].tv_nsec == UTIME_OMIT) 3283 tsp[0].tv_sec = VNOVAL; 3284 else if (tsp[0].tv_nsec == UTIME_NOW) 3285 tsp[0] = tsnow; 3286 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3287 return (EINVAL); 3288 if (tsp[1].tv_nsec == UTIME_OMIT) 3289 tsp[1].tv_sec = VNOVAL; 3290 else if (tsp[1].tv_nsec == UTIME_NOW) 3291 tsp[1] = tsnow; 3292 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3293 return (EINVAL); 3294 3295 return (0); 3296 } 3297 3298 /* 3299 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3300 * and utimensat(). 3301 */ 3302 static int 3303 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3304 int numtimes, int nullflag) 3305 { 3306 struct mount *mp; 3307 struct vattr vattr; 3308 int error; 3309 bool setbirthtime; 3310 3311 setbirthtime = false; 3312 vattr.va_birthtime.tv_sec = VNOVAL; 3313 vattr.va_birthtime.tv_nsec = 0; 3314 3315 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) 3316 return (error); 3317 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3318 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3319 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3320 setbirthtime = true; 3321 VATTR_NULL(&vattr); 3322 vattr.va_atime = ts[0]; 3323 vattr.va_mtime = ts[1]; 3324 if (setbirthtime) 3325 vattr.va_birthtime = ts[1]; 3326 if (numtimes > 2) 3327 vattr.va_birthtime = ts[2]; 3328 if (nullflag) 3329 vattr.va_vaflags |= VA_UTIMES_NULL; 3330 #ifdef MAC 3331 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3332 vattr.va_mtime); 3333 #endif 3334 if (error == 0) 3335 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3336 VOP_UNLOCK(vp); 3337 vn_finished_write(mp); 3338 return (error); 3339 } 3340 3341 /* 3342 * Set the access and modification times of a file. 3343 */ 3344 #ifndef _SYS_SYSPROTO_H_ 3345 struct utimes_args { 3346 char *path; 3347 struct timeval *tptr; 3348 }; 3349 #endif 3350 int 3351 sys_utimes(struct thread *td, struct utimes_args *uap) 3352 { 3353 3354 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3355 uap->tptr, UIO_USERSPACE)); 3356 } 3357 3358 #ifndef _SYS_SYSPROTO_H_ 3359 struct futimesat_args { 3360 int fd; 3361 const char * path; 3362 const struct timeval * times; 3363 }; 3364 #endif 3365 int 3366 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3367 { 3368 3369 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3370 uap->times, UIO_USERSPACE)); 3371 } 3372 3373 int 3374 kern_utimesat(struct thread *td, int fd, const char *path, 3375 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3376 { 3377 struct nameidata nd; 3378 struct timespec ts[2]; 3379 int error; 3380 3381 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3382 return (error); 3383 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3384 &cap_futimes_rights); 3385 3386 if ((error = namei(&nd)) != 0) 3387 return (error); 3388 NDFREE_PNBUF(&nd); 3389 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3390 vrele(nd.ni_vp); 3391 return (error); 3392 } 3393 3394 /* 3395 * Set the access and modification times of a file. 3396 */ 3397 #ifndef _SYS_SYSPROTO_H_ 3398 struct lutimes_args { 3399 char *path; 3400 struct timeval *tptr; 3401 }; 3402 #endif 3403 int 3404 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3405 { 3406 3407 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3408 UIO_USERSPACE)); 3409 } 3410 3411 int 3412 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3413 const struct timeval *tptr, enum uio_seg tptrseg) 3414 { 3415 struct timespec ts[2]; 3416 struct nameidata nd; 3417 int error; 3418 3419 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3420 return (error); 3421 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3422 if ((error = namei(&nd)) != 0) 3423 return (error); 3424 NDFREE_PNBUF(&nd); 3425 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3426 vrele(nd.ni_vp); 3427 return (error); 3428 } 3429 3430 /* 3431 * Set the access and modification times of a file. 3432 */ 3433 #ifndef _SYS_SYSPROTO_H_ 3434 struct futimes_args { 3435 int fd; 3436 struct timeval *tptr; 3437 }; 3438 #endif 3439 int 3440 sys_futimes(struct thread *td, struct futimes_args *uap) 3441 { 3442 3443 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3444 } 3445 3446 int 3447 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3448 enum uio_seg tptrseg) 3449 { 3450 struct timespec ts[2]; 3451 struct file *fp; 3452 int error; 3453 3454 AUDIT_ARG_FD(fd); 3455 error = getutimes(tptr, tptrseg, ts); 3456 if (error != 0) 3457 return (error); 3458 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3459 if (error != 0) 3460 return (error); 3461 #ifdef AUDIT 3462 if (AUDITING_TD(td)) { 3463 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3464 AUDIT_ARG_VNODE1(fp->f_vnode); 3465 VOP_UNLOCK(fp->f_vnode); 3466 } 3467 #endif 3468 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3469 fdrop(fp, td); 3470 return (error); 3471 } 3472 3473 int 3474 sys_futimens(struct thread *td, struct futimens_args *uap) 3475 { 3476 3477 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3478 } 3479 3480 int 3481 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3482 enum uio_seg tptrseg) 3483 { 3484 struct timespec ts[2]; 3485 struct file *fp; 3486 int error, flags; 3487 3488 AUDIT_ARG_FD(fd); 3489 error = getutimens(tptr, tptrseg, ts, &flags); 3490 if (error != 0) 3491 return (error); 3492 if (flags & UTIMENS_EXIT) 3493 return (0); 3494 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3495 if (error != 0) 3496 return (error); 3497 #ifdef AUDIT 3498 if (AUDITING_TD(td)) { 3499 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3500 AUDIT_ARG_VNODE1(fp->f_vnode); 3501 VOP_UNLOCK(fp->f_vnode); 3502 } 3503 #endif 3504 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3505 fdrop(fp, td); 3506 return (error); 3507 } 3508 3509 int 3510 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3511 { 3512 3513 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3514 uap->times, UIO_USERSPACE, uap->flag)); 3515 } 3516 3517 int 3518 kern_utimensat(struct thread *td, int fd, const char *path, 3519 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3520 int flag) 3521 { 3522 struct nameidata nd; 3523 struct timespec ts[2]; 3524 int error, flags; 3525 3526 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3527 AT_EMPTY_PATH)) != 0) 3528 return (EINVAL); 3529 3530 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3531 return (error); 3532 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3533 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3534 pathseg, path, fd, &cap_futimes_rights); 3535 if ((error = namei(&nd)) != 0) 3536 return (error); 3537 /* 3538 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3539 * POSIX states: 3540 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3541 * "Search permission is denied by a component of the path prefix." 3542 */ 3543 NDFREE_PNBUF(&nd); 3544 if ((flags & UTIMENS_EXIT) == 0) 3545 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3546 vrele(nd.ni_vp); 3547 return (error); 3548 } 3549 3550 /* 3551 * Truncate a file given its path name. 3552 */ 3553 #ifndef _SYS_SYSPROTO_H_ 3554 struct truncate_args { 3555 char *path; 3556 int pad; 3557 off_t length; 3558 }; 3559 #endif 3560 int 3561 sys_truncate(struct thread *td, struct truncate_args *uap) 3562 { 3563 3564 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3565 } 3566 3567 int 3568 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3569 off_t length) 3570 { 3571 struct mount *mp; 3572 struct vnode *vp; 3573 void *rl_cookie; 3574 struct nameidata nd; 3575 int error; 3576 3577 if (length < 0) 3578 return (EINVAL); 3579 NDPREINIT(&nd); 3580 retry: 3581 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3582 if ((error = namei(&nd)) != 0) 3583 return (error); 3584 vp = nd.ni_vp; 3585 NDFREE_PNBUF(&nd); 3586 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3587 if ((error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH)) != 0) { 3588 vn_rangelock_unlock(vp, rl_cookie); 3589 vrele(vp); 3590 return (error); 3591 } 3592 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3593 if (vp->v_type == VDIR) { 3594 error = EISDIR; 3595 goto out; 3596 } 3597 #ifdef MAC 3598 error = mac_vnode_check_write(td->td_ucred, NOCRED, vp); 3599 if (error != 0) 3600 goto out; 3601 #endif 3602 error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td); 3603 if (error != 0) 3604 goto out; 3605 3606 error = vn_truncate_locked(vp, length, false, td->td_ucred); 3607 out: 3608 VOP_UNLOCK(vp); 3609 vn_finished_write(mp); 3610 vn_rangelock_unlock(vp, rl_cookie); 3611 vrele(vp); 3612 if (error == ERELOOKUP) 3613 goto retry; 3614 return (error); 3615 } 3616 3617 #if defined(COMPAT_43) 3618 /* 3619 * Truncate a file given its path name. 3620 */ 3621 #ifndef _SYS_SYSPROTO_H_ 3622 struct otruncate_args { 3623 char *path; 3624 long length; 3625 }; 3626 #endif 3627 int 3628 otruncate(struct thread *td, struct otruncate_args *uap) 3629 { 3630 3631 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3632 } 3633 #endif /* COMPAT_43 */ 3634 3635 #if defined(COMPAT_FREEBSD6) 3636 /* Versions with the pad argument */ 3637 int 3638 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3639 { 3640 3641 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3642 } 3643 3644 int 3645 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3646 { 3647 3648 return (kern_ftruncate(td, uap->fd, uap->length)); 3649 } 3650 #endif 3651 3652 int 3653 kern_fsync(struct thread *td, int fd, bool fullsync) 3654 { 3655 struct vnode *vp; 3656 struct mount *mp; 3657 struct file *fp; 3658 int error; 3659 3660 AUDIT_ARG_FD(fd); 3661 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3662 if (error != 0) 3663 return (error); 3664 vp = fp->f_vnode; 3665 #if 0 3666 if (!fullsync) 3667 /* XXXKIB: compete outstanding aio writes */; 3668 #endif 3669 retry: 3670 error = vn_start_write(vp, &mp, V_WAIT | V_PCATCH); 3671 if (error != 0) 3672 goto drop; 3673 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3674 AUDIT_ARG_VNODE1(vp); 3675 vnode_pager_clean_async(vp); 3676 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3677 VOP_UNLOCK(vp); 3678 vn_finished_write(mp); 3679 if (error == ERELOOKUP) 3680 goto retry; 3681 drop: 3682 fdrop(fp, td); 3683 return (error); 3684 } 3685 3686 /* 3687 * Sync an open file. 3688 */ 3689 #ifndef _SYS_SYSPROTO_H_ 3690 struct fsync_args { 3691 int fd; 3692 }; 3693 #endif 3694 int 3695 sys_fsync(struct thread *td, struct fsync_args *uap) 3696 { 3697 3698 return (kern_fsync(td, uap->fd, true)); 3699 } 3700 3701 int 3702 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3703 { 3704 3705 return (kern_fsync(td, uap->fd, false)); 3706 } 3707 3708 /* 3709 * Rename files. Source and destination must either both be directories, or 3710 * both not be directories. If target is a directory, it must be empty. 3711 */ 3712 #ifndef _SYS_SYSPROTO_H_ 3713 struct rename_args { 3714 char *from; 3715 char *to; 3716 }; 3717 #endif 3718 int 3719 sys_rename(struct thread *td, struct rename_args *uap) 3720 { 3721 3722 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3723 uap->to, UIO_USERSPACE, 0)); 3724 } 3725 3726 #ifndef _SYS_SYSPROTO_H_ 3727 struct renameat_args { 3728 int oldfd; 3729 char *old; 3730 int newfd; 3731 char *new; 3732 }; 3733 #endif 3734 int 3735 sys_renameat(struct thread *td, struct renameat_args *uap) 3736 { 3737 3738 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3739 UIO_USERSPACE, 0)); 3740 } 3741 3742 int 3743 sys_renameat2(struct thread *td, struct renameat2_args *uap) 3744 { 3745 3746 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3747 UIO_USERSPACE, uap->flags)); 3748 } 3749 3750 #ifdef MAC 3751 static int 3752 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3753 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3754 { 3755 int error; 3756 3757 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1, 3758 pathseg, old, oldfd, &cap_renameat_source_rights); 3759 if ((error = namei(fromnd)) != 0) 3760 return (error); 3761 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3762 fromnd->ni_vp, &fromnd->ni_cnd); 3763 VOP_UNLOCK(fromnd->ni_dvp); 3764 if (fromnd->ni_dvp != fromnd->ni_vp) 3765 VOP_UNLOCK(fromnd->ni_vp); 3766 if (error != 0) { 3767 NDFREE_PNBUF(fromnd); 3768 vrele(fromnd->ni_dvp); 3769 vrele(fromnd->ni_vp); 3770 } 3771 return (error); 3772 } 3773 #endif 3774 3775 int 3776 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3777 const char *new, enum uio_seg pathseg, u_int flags) 3778 { 3779 struct mount *mp, *tmp; 3780 struct vnode *tvp, *fvp, *tdvp; 3781 struct nameidata fromnd, tond; 3782 uint64_t tondflags; 3783 int error; 3784 short irflag; 3785 3786 if ((flags & ~(AT_RENAME_NOREPLACE)) != 0) 3787 return (EINVAL); 3788 again: 3789 tmp = mp = NULL; 3790 bwillwrite(); 3791 #ifdef MAC 3792 if (mac_vnode_check_rename_from_enabled()) { 3793 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3794 &fromnd); 3795 if (error != 0) 3796 return (error); 3797 } else { 3798 #endif 3799 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | AUDITVNODE1, 3800 pathseg, old, oldfd, &cap_renameat_source_rights); 3801 if ((error = namei(&fromnd)) != 0) 3802 return (error); 3803 #ifdef MAC 3804 } 3805 #endif 3806 fvp = fromnd.ni_vp; 3807 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | AUDITVNODE2; 3808 if (fromnd.ni_vp->v_type == VDIR) 3809 tondflags |= WILLBEDIR; 3810 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3811 &cap_renameat_target_rights); 3812 if ((error = namei(&tond)) != 0) { 3813 /* Translate error code for rename("dir1", "dir2/."). */ 3814 if (error == EISDIR && fvp->v_type == VDIR) 3815 error = EINVAL; 3816 NDFREE_PNBUF(&fromnd); 3817 vrele(fromnd.ni_dvp); 3818 vrele(fvp); 3819 goto out1; 3820 } 3821 tdvp = tond.ni_dvp; 3822 tvp = tond.ni_vp; 3823 if (tvp != NULL && (flags & AT_RENAME_NOREPLACE) != 0) { 3824 /* 3825 * Often filesystems need to relock the vnodes in 3826 * VOP_RENAME(), which opens a window for invalidation 3827 * of this check. Then, not all filesystems might 3828 * implement AT_RENAME_NOREPLACE. This leads to 3829 * situation where sometimes EOPNOTSUPP might be 3830 * returned from the VOP due to race, while most of 3831 * the time this check works. 3832 */ 3833 error = EEXIST; 3834 goto out; 3835 } 3836 error = vn_start_write(fvp, &mp, V_NOWAIT); 3837 if (error != 0) { 3838 again1: 3839 NDFREE_PNBUF(&fromnd); 3840 NDFREE_PNBUF(&tond); 3841 if (tvp != NULL) 3842 vput(tvp); 3843 if (tdvp == tvp) 3844 vrele(tdvp); 3845 else 3846 vput(tdvp); 3847 vrele(fromnd.ni_dvp); 3848 vrele(fvp); 3849 if (tmp != NULL) { 3850 lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE, NULL); 3851 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, NULL); 3852 vfs_rel(tmp); 3853 tmp = NULL; 3854 } 3855 error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH); 3856 if (error != 0) 3857 return (error); 3858 goto again; 3859 } 3860 error = VOP_GETWRITEMOUNT(tdvp, &tmp); 3861 if (error != 0 || tmp == NULL) 3862 goto again1; 3863 error = lockmgr(&tmp->mnt_renamelock, LK_EXCLUSIVE | LK_NOWAIT, NULL); 3864 if (error != 0) { 3865 vn_finished_write(mp); 3866 goto again1; 3867 } 3868 irflag = vn_irflag_read(fvp); 3869 if (((irflag & VIRF_NAMEDATTR) != 0 && tdvp != fromnd.ni_dvp) || 3870 (irflag & VIRF_NAMEDDIR) != 0) { 3871 error = EINVAL; 3872 goto out; 3873 } 3874 if (tvp != NULL) { 3875 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3876 error = ENOTDIR; 3877 goto out; 3878 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3879 error = EISDIR; 3880 goto out; 3881 } 3882 #ifdef CAPABILITIES 3883 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3884 /* 3885 * If the target already exists we require CAP_UNLINKAT 3886 * from 'newfd', when newfd was used for the lookup. 3887 */ 3888 error = cap_check(&tond.ni_filecaps.fc_rights, 3889 &cap_unlinkat_rights); 3890 if (error != 0) 3891 goto out; 3892 } 3893 #endif 3894 } 3895 if (fvp == tdvp) { 3896 error = EINVAL; 3897 goto out; 3898 } 3899 /* 3900 * If the source is the same as the destination (that is, if they 3901 * are links to the same vnode), then there is nothing to do. 3902 */ 3903 if (fvp == tvp) 3904 error = ERESTART; 3905 #ifdef MAC 3906 else 3907 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3908 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3909 #endif 3910 out: 3911 if (error == 0) { 3912 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3913 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd, flags); 3914 NDFREE_PNBUF(&fromnd); 3915 NDFREE_PNBUF(&tond); 3916 } else { 3917 NDFREE_PNBUF(&fromnd); 3918 NDFREE_PNBUF(&tond); 3919 if (tvp != NULL) 3920 vput(tvp); 3921 if (tdvp == tvp) 3922 vrele(tdvp); 3923 else 3924 vput(tdvp); 3925 vrele(fromnd.ni_dvp); 3926 vrele(fvp); 3927 } 3928 if (tmp != NULL) { 3929 lockmgr(&tmp->mnt_renamelock, LK_RELEASE, 0); 3930 vfs_rel(tmp); 3931 } 3932 if (mp != NULL) 3933 vn_finished_write(mp); 3934 out1: 3935 if (error == ERESTART) 3936 return (0); 3937 if (error == ERELOOKUP) 3938 goto again; 3939 return (error); 3940 } 3941 3942 /* 3943 * Make a directory file. 3944 */ 3945 #ifndef _SYS_SYSPROTO_H_ 3946 struct mkdir_args { 3947 char *path; 3948 int mode; 3949 }; 3950 #endif 3951 int 3952 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3953 { 3954 3955 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3956 uap->mode)); 3957 } 3958 3959 #ifndef _SYS_SYSPROTO_H_ 3960 struct mkdirat_args { 3961 int fd; 3962 char *path; 3963 mode_t mode; 3964 }; 3965 #endif 3966 int 3967 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3968 { 3969 3970 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3971 } 3972 3973 int 3974 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3975 int mode) 3976 { 3977 struct mount *mp; 3978 struct vattr vattr; 3979 struct nameidata nd; 3980 int error; 3981 3982 AUDIT_ARG_MODE(mode); 3983 NDPREINIT(&nd); 3984 restart: 3985 bwillwrite(); 3986 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | AUDITVNODE1 | 3987 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3988 segflg, path, fd, &cap_mkdirat_rights); 3989 if ((error = namei(&nd)) != 0) 3990 return (error); 3991 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3992 NDFREE_PNBUF(&nd); 3993 vput(nd.ni_dvp); 3994 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 3995 return (error); 3996 goto restart; 3997 } 3998 if ((vn_irflag_read(nd.ni_dvp) & VIRF_NAMEDDIR) != 0) { 3999 error = EINVAL; 4000 goto out; 4001 } 4002 VATTR_NULL(&vattr); 4003 vattr.va_type = VDIR; 4004 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 4005 #ifdef MAC 4006 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 4007 &vattr); 4008 if (error != 0) 4009 goto out; 4010 #endif 4011 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 4012 out: 4013 NDFREE_PNBUF(&nd); 4014 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 4015 vn_finished_write(mp); 4016 if (error == ERELOOKUP) 4017 goto restart; 4018 return (error); 4019 } 4020 4021 /* 4022 * Remove a directory file. 4023 */ 4024 #ifndef _SYS_SYSPROTO_H_ 4025 struct rmdir_args { 4026 char *path; 4027 }; 4028 #endif 4029 int 4030 sys_rmdir(struct thread *td, struct rmdir_args *uap) 4031 { 4032 4033 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 4034 0)); 4035 } 4036 4037 int 4038 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 4039 enum uio_seg pathseg, int flag) 4040 { 4041 struct mount *mp; 4042 struct vnode *vp; 4043 struct file *fp; 4044 struct nameidata nd; 4045 cap_rights_t rights; 4046 int error; 4047 4048 fp = NULL; 4049 if (fd != FD_NONE) { 4050 error = getvnode(td, fd, cap_rights_init_one(&rights, 4051 CAP_LOOKUP), &fp); 4052 if (error != 0) 4053 return (error); 4054 } 4055 4056 NDPREINIT(&nd); 4057 restart: 4058 bwillwrite(); 4059 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 4060 at2cnpflags(flag, AT_RESOLVE_BENEATH), 4061 pathseg, path, dfd, &cap_unlinkat_rights); 4062 if ((error = namei(&nd)) != 0) 4063 goto fdout; 4064 vp = nd.ni_vp; 4065 if (vp->v_type != VDIR) { 4066 error = ENOTDIR; 4067 goto out; 4068 } 4069 /* 4070 * No rmdir "." please. 4071 */ 4072 if (nd.ni_dvp == vp) { 4073 error = EINVAL; 4074 goto out; 4075 } 4076 /* 4077 * The root of a mounted filesystem cannot be deleted. 4078 */ 4079 if (vp->v_vflag & VV_ROOT) { 4080 error = EBUSY; 4081 goto out; 4082 } 4083 4084 if (fp != NULL && fp->f_vnode != vp) { 4085 if (VN_IS_DOOMED(fp->f_vnode)) 4086 error = EBADF; 4087 else 4088 error = EDEADLK; 4089 goto out; 4090 } 4091 4092 #ifdef MAC 4093 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 4094 &nd.ni_cnd); 4095 if (error != 0) 4096 goto out; 4097 #endif 4098 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 4099 NDFREE_PNBUF(&nd); 4100 vput(vp); 4101 if (nd.ni_dvp == vp) 4102 vrele(nd.ni_dvp); 4103 else 4104 vput(nd.ni_dvp); 4105 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | V_PCATCH)) != 0) 4106 goto fdout; 4107 goto restart; 4108 } 4109 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 4110 vn_finished_write(mp); 4111 out: 4112 NDFREE_PNBUF(&nd); 4113 vput(vp); 4114 if (nd.ni_dvp == vp) 4115 vrele(nd.ni_dvp); 4116 else 4117 vput(nd.ni_dvp); 4118 if (error == ERELOOKUP) 4119 goto restart; 4120 fdout: 4121 if (fp != NULL) 4122 fdrop(fp, td); 4123 return (error); 4124 } 4125 4126 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 4127 int 4128 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 4129 long *basep, void (*func)(struct freebsd11_dirent *)) 4130 { 4131 struct freebsd11_dirent dstdp; 4132 struct dirent *dp, *edp; 4133 char *dirbuf; 4134 off_t base; 4135 ssize_t resid, ucount; 4136 int error; 4137 4138 /* XXX arbitrary sanity limit on `count'. */ 4139 count = min(count, 64 * 1024); 4140 4141 dirbuf = malloc(count, M_TEMP, M_WAITOK); 4142 4143 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 4144 UIO_SYSSPACE); 4145 if (error != 0) 4146 goto done; 4147 if (basep != NULL) 4148 *basep = base; 4149 4150 ucount = 0; 4151 for (dp = (struct dirent *)dirbuf, 4152 edp = (struct dirent *)&dirbuf[count - resid]; 4153 ucount < count && dp < edp; ) { 4154 if (dp->d_reclen == 0) 4155 break; 4156 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4157 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4158 continue; 4159 dstdp.d_type = dp->d_type; 4160 dstdp.d_namlen = dp->d_namlen; 4161 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4162 if (dstdp.d_fileno != dp->d_fileno) { 4163 switch (ino64_trunc_error) { 4164 default: 4165 case 0: 4166 break; 4167 case 1: 4168 error = EOVERFLOW; 4169 goto done; 4170 case 2: 4171 dstdp.d_fileno = UINT32_MAX; 4172 break; 4173 } 4174 } 4175 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4176 ((dp->d_namlen + 1 + 3) &~ 3); 4177 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4178 bzero(dstdp.d_name + dstdp.d_namlen, 4179 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4180 dstdp.d_namlen); 4181 MPASS(dstdp.d_reclen <= dp->d_reclen); 4182 MPASS(ucount + dstdp.d_reclen <= count); 4183 if (func != NULL) 4184 func(&dstdp); 4185 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4186 if (error != 0) 4187 break; 4188 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4189 ucount += dstdp.d_reclen; 4190 } 4191 4192 done: 4193 free(dirbuf, M_TEMP); 4194 if (error == 0) 4195 td->td_retval[0] = ucount; 4196 return (error); 4197 } 4198 #endif /* COMPAT */ 4199 4200 #ifdef COMPAT_43 4201 static void 4202 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4203 { 4204 #if (BYTE_ORDER == LITTLE_ENDIAN) 4205 /* 4206 * The expected low byte of dp->d_namlen is our dp->d_type. 4207 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4208 */ 4209 dp->d_type = dp->d_namlen; 4210 dp->d_namlen = 0; 4211 #else 4212 /* 4213 * The dp->d_type is the high byte of the expected dp->d_namlen, 4214 * so must be zero'ed. 4215 */ 4216 dp->d_type = 0; 4217 #endif 4218 } 4219 4220 /* 4221 * Read a block of directory entries in a filesystem independent format. 4222 */ 4223 #ifndef _SYS_SYSPROTO_H_ 4224 struct ogetdirentries_args { 4225 int fd; 4226 char *buf; 4227 u_int count; 4228 long *basep; 4229 }; 4230 #endif 4231 int 4232 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4233 { 4234 long loff; 4235 int error; 4236 4237 error = kern_ogetdirentries(td, uap, &loff); 4238 if (error == 0) 4239 error = copyout(&loff, uap->basep, sizeof(long)); 4240 return (error); 4241 } 4242 4243 int 4244 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4245 long *ploff) 4246 { 4247 long base; 4248 int error; 4249 4250 /* XXX arbitrary sanity limit on `count'. */ 4251 if (uap->count > 64 * 1024) 4252 return (EINVAL); 4253 4254 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4255 &base, ogetdirentries_cvt); 4256 4257 if (error == 0 && uap->basep != NULL) 4258 error = copyout(&base, uap->basep, sizeof(long)); 4259 4260 return (error); 4261 } 4262 #endif /* COMPAT_43 */ 4263 4264 #if defined(COMPAT_FREEBSD11) 4265 #ifndef _SYS_SYSPROTO_H_ 4266 struct freebsd11_getdirentries_args { 4267 int fd; 4268 char *buf; 4269 u_int count; 4270 long *basep; 4271 }; 4272 #endif 4273 int 4274 freebsd11_getdirentries(struct thread *td, 4275 struct freebsd11_getdirentries_args *uap) 4276 { 4277 long base; 4278 int error; 4279 4280 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4281 &base, NULL); 4282 4283 if (error == 0 && uap->basep != NULL) 4284 error = copyout(&base, uap->basep, sizeof(long)); 4285 return (error); 4286 } 4287 4288 int 4289 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4290 { 4291 struct freebsd11_getdirentries_args ap; 4292 4293 ap.fd = uap->fd; 4294 ap.buf = uap->buf; 4295 ap.count = uap->count; 4296 ap.basep = NULL; 4297 return (freebsd11_getdirentries(td, &ap)); 4298 } 4299 #endif /* COMPAT_FREEBSD11 */ 4300 4301 /* 4302 * Read a block of directory entries in a filesystem independent format. 4303 */ 4304 int 4305 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4306 { 4307 off_t base; 4308 int error; 4309 4310 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4311 NULL, UIO_USERSPACE); 4312 if (error != 0) 4313 return (error); 4314 if (uap->basep != NULL) 4315 error = copyout(&base, uap->basep, sizeof(off_t)); 4316 return (error); 4317 } 4318 4319 int 4320 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4321 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4322 { 4323 struct vnode *vp; 4324 struct file *fp; 4325 struct uio auio; 4326 struct iovec aiov; 4327 off_t loff; 4328 int error, eofflag; 4329 off_t foffset; 4330 4331 AUDIT_ARG_FD(fd); 4332 if (count > IOSIZE_MAX) 4333 return (EINVAL); 4334 auio.uio_resid = count; 4335 error = getvnode(td, fd, &cap_read_rights, &fp); 4336 if (error != 0) 4337 return (error); 4338 if ((fp->f_flag & FREAD) == 0) { 4339 fdrop(fp, td); 4340 return (EBADF); 4341 } 4342 vp = fp->f_vnode; 4343 foffset = foffset_lock(fp, 0); 4344 unionread: 4345 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4346 error = ENOENT; 4347 goto fail; 4348 } 4349 aiov.iov_base = buf; 4350 aiov.iov_len = count; 4351 auio.uio_iov = &aiov; 4352 auio.uio_iovcnt = 1; 4353 auio.uio_rw = UIO_READ; 4354 auio.uio_segflg = bufseg; 4355 auio.uio_td = td; 4356 vn_lock(vp, LK_SHARED | LK_RETRY); 4357 /* 4358 * We want to return ENOTDIR for anything that is not VDIR, but 4359 * not for VBAD, and we can't check for VBAD while the vnode is 4360 * unlocked. 4361 */ 4362 if (vp->v_type != VDIR) { 4363 if (vp->v_type == VBAD) 4364 error = EBADF; 4365 else 4366 error = ENOTDIR; 4367 VOP_UNLOCK(vp); 4368 goto fail; 4369 } 4370 AUDIT_ARG_VNODE1(vp); 4371 loff = auio.uio_offset = foffset; 4372 #ifdef MAC 4373 error = mac_vnode_check_readdir(td->td_ucred, vp); 4374 if (error == 0) 4375 #endif 4376 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4377 NULL); 4378 foffset = auio.uio_offset; 4379 if (error != 0) { 4380 VOP_UNLOCK(vp); 4381 goto fail; 4382 } 4383 if (count == auio.uio_resid && 4384 (vp->v_vflag & VV_ROOT) && 4385 (vp->v_mount->mnt_flag & MNT_UNION)) { 4386 struct vnode *tvp = vp; 4387 4388 vp = vp->v_mount->mnt_vnodecovered; 4389 vref(vp); 4390 fp->f_vnode = vp; 4391 foffset = 0; 4392 vput(tvp); 4393 goto unionread; 4394 } 4395 VOP_UNLOCK(vp); 4396 *basep = loff; 4397 if (residp != NULL) 4398 *residp = auio.uio_resid; 4399 td->td_retval[0] = count - auio.uio_resid; 4400 fail: 4401 foffset_unlock(fp, foffset, 0); 4402 fdrop(fp, td); 4403 return (error); 4404 } 4405 4406 /* 4407 * Set the mode mask for creation of filesystem nodes. 4408 */ 4409 #ifndef _SYS_SYSPROTO_H_ 4410 struct umask_args { 4411 int newmask; 4412 }; 4413 #endif 4414 int 4415 sys_umask(struct thread *td, struct umask_args *uap) 4416 { 4417 struct pwddesc *pdp; 4418 4419 pdp = td->td_proc->p_pd; 4420 PWDDESC_XLOCK(pdp); 4421 td->td_retval[0] = pdp->pd_cmask; 4422 pdp->pd_cmask = uap->newmask & ALLPERMS; 4423 PWDDESC_XUNLOCK(pdp); 4424 return (0); 4425 } 4426 4427 /* 4428 * Void all references to file by ripping underlying filesystem away from 4429 * vnode. 4430 */ 4431 #ifndef _SYS_SYSPROTO_H_ 4432 struct revoke_args { 4433 char *path; 4434 }; 4435 #endif 4436 int 4437 sys_revoke(struct thread *td, struct revoke_args *uap) 4438 { 4439 struct vnode *vp; 4440 struct vattr vattr; 4441 struct nameidata nd; 4442 int error; 4443 4444 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4445 uap->path); 4446 if ((error = namei(&nd)) != 0) 4447 return (error); 4448 vp = nd.ni_vp; 4449 NDFREE_PNBUF(&nd); 4450 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4451 error = EINVAL; 4452 goto out; 4453 } 4454 #ifdef MAC 4455 error = mac_vnode_check_revoke(td->td_ucred, vp); 4456 if (error != 0) 4457 goto out; 4458 #endif 4459 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4460 if (error != 0) 4461 goto out; 4462 if (td->td_ucred->cr_uid != vattr.va_uid) { 4463 error = priv_check(td, PRIV_VFS_ADMIN); 4464 if (error != 0) 4465 goto out; 4466 } 4467 if (devfs_usecount(vp) > 0) 4468 VOP_REVOKE(vp, REVOKEALL); 4469 out: 4470 vput(vp); 4471 return (error); 4472 } 4473 4474 /* 4475 * This variant of getvnode() allows O_PATH files. Caller should 4476 * ensure that returned file and vnode are only used for compatible 4477 * semantics. 4478 */ 4479 int 4480 getvnode_path(struct thread *td, int fd, const cap_rights_t *rightsp, 4481 uint8_t *flagsp, struct file **fpp) 4482 { 4483 struct file *fp; 4484 int error; 4485 4486 error = fget_unlocked_flags(td, fd, rightsp, flagsp, &fp); 4487 if (error != 0) 4488 return (error); 4489 4490 /* 4491 * The file could be not of the vnode type, or it may be not 4492 * yet fully initialized, in which case the f_vnode pointer 4493 * may be set, but f_ops is still badfileops. E.g., 4494 * devfs_open() transiently create such situation to 4495 * facilitate csw d_fdopen(). 4496 * 4497 * Dupfdopen() handling in kern_openat() installs the 4498 * half-baked file into the process descriptor table, allowing 4499 * other thread to dereference it. Guard against the race by 4500 * checking f_ops. 4501 */ 4502 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4503 fdrop(fp, td); 4504 *fpp = NULL; 4505 return (EINVAL); 4506 } 4507 4508 *fpp = fp; 4509 return (0); 4510 } 4511 4512 /* 4513 * Convert a user file descriptor to a kernel file entry and check 4514 * that, if it is a capability, the correct rights are present. 4515 * A reference on the file entry is held upon returning. 4516 */ 4517 int 4518 getvnode(struct thread *td, int fd, const cap_rights_t *rightsp, 4519 struct file **fpp) 4520 { 4521 int error; 4522 4523 error = getvnode_path(td, fd, rightsp, NULL, fpp); 4524 if (__predict_false(error != 0)) 4525 return (error); 4526 4527 /* 4528 * Filter out O_PATH file descriptors, most getvnode() callers 4529 * do not call fo_ methods. 4530 */ 4531 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4532 fdrop(*fpp, td); 4533 *fpp = NULL; 4534 error = EBADF; 4535 } 4536 4537 return (error); 4538 } 4539 4540 /* 4541 * Get an (NFS) file handle. 4542 */ 4543 #ifndef _SYS_SYSPROTO_H_ 4544 struct lgetfh_args { 4545 char *fname; 4546 fhandle_t *fhp; 4547 }; 4548 #endif 4549 int 4550 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4551 { 4552 4553 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4554 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4555 } 4556 4557 #ifndef _SYS_SYSPROTO_H_ 4558 struct getfh_args { 4559 char *fname; 4560 fhandle_t *fhp; 4561 }; 4562 #endif 4563 int 4564 sys_getfh(struct thread *td, struct getfh_args *uap) 4565 { 4566 4567 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4568 uap->fhp, UIO_USERSPACE)); 4569 } 4570 4571 /* 4572 * syscall for the rpc.lockd to use to translate an open descriptor into 4573 * a NFS file handle. 4574 * 4575 * warning: do not remove the priv_check() call or this becomes one giant 4576 * security hole. 4577 */ 4578 #ifndef _SYS_SYSPROTO_H_ 4579 struct getfhat_args { 4580 int fd; 4581 char *path; 4582 fhandle_t *fhp; 4583 int flags; 4584 }; 4585 #endif 4586 int 4587 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4588 { 4589 4590 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4591 uap->fhp, UIO_USERSPACE)); 4592 } 4593 4594 int 4595 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4596 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4597 { 4598 struct nameidata nd; 4599 fhandle_t fh; 4600 struct vnode *vp; 4601 int error; 4602 4603 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4604 return (EINVAL); 4605 error = priv_check(td, PRIV_VFS_GETFH); 4606 if (error != 0) 4607 return (error); 4608 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4609 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4610 fd); 4611 error = namei(&nd); 4612 if (error != 0) 4613 return (error); 4614 NDFREE_PNBUF(&nd); 4615 vp = nd.ni_vp; 4616 bzero(&fh, sizeof(fh)); 4617 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4618 error = VOP_VPTOFH(vp, &fh.fh_fid); 4619 vput(vp); 4620 if (error == 0) { 4621 if (fhseg == UIO_USERSPACE) 4622 error = copyout(&fh, fhp, sizeof (fh)); 4623 else 4624 memcpy(fhp, &fh, sizeof(fh)); 4625 } 4626 return (error); 4627 } 4628 4629 #ifndef _SYS_SYSPROTO_H_ 4630 struct fhlink_args { 4631 fhandle_t *fhp; 4632 const char *to; 4633 }; 4634 #endif 4635 int 4636 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4637 { 4638 4639 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4640 } 4641 4642 #ifndef _SYS_SYSPROTO_H_ 4643 struct fhlinkat_args { 4644 fhandle_t *fhp; 4645 int tofd; 4646 const char *to; 4647 }; 4648 #endif 4649 int 4650 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4651 { 4652 4653 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4654 } 4655 4656 static int 4657 kern_fhlinkat(struct thread *td, int fd, const char *path, 4658 enum uio_seg pathseg, fhandle_t *fhp) 4659 { 4660 fhandle_t fh; 4661 struct mount *mp; 4662 struct vnode *vp; 4663 int error; 4664 4665 error = priv_check(td, PRIV_VFS_GETFH); 4666 if (error != 0) 4667 return (error); 4668 error = copyin(fhp, &fh, sizeof(fh)); 4669 if (error != 0) 4670 return (error); 4671 do { 4672 bwillwrite(); 4673 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4674 return (ESTALE); 4675 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4676 vfs_unbusy(mp); 4677 if (error != 0) 4678 return (error); 4679 VOP_UNLOCK(vp); 4680 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4681 } while (error == EAGAIN || error == ERELOOKUP); 4682 return (error); 4683 } 4684 4685 #ifndef _SYS_SYSPROTO_H_ 4686 struct fhreadlink_args { 4687 fhandle_t *fhp; 4688 char *buf; 4689 size_t bufsize; 4690 }; 4691 #endif 4692 int 4693 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4694 { 4695 fhandle_t fh; 4696 struct mount *mp; 4697 struct vnode *vp; 4698 int error; 4699 4700 error = priv_check(td, PRIV_VFS_GETFH); 4701 if (error != 0) 4702 return (error); 4703 if (uap->bufsize > IOSIZE_MAX) 4704 return (EINVAL); 4705 error = copyin(uap->fhp, &fh, sizeof(fh)); 4706 if (error != 0) 4707 return (error); 4708 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4709 return (ESTALE); 4710 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4711 vfs_unbusy(mp); 4712 if (error != 0) 4713 return (error); 4714 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4715 vput(vp); 4716 return (error); 4717 } 4718 4719 /* 4720 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4721 * open descriptor. 4722 * 4723 * warning: do not remove the priv_check() call or this becomes one giant 4724 * security hole. 4725 */ 4726 #ifndef _SYS_SYSPROTO_H_ 4727 struct fhopen_args { 4728 const struct fhandle *u_fhp; 4729 int flags; 4730 }; 4731 #endif 4732 int 4733 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4734 { 4735 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4736 } 4737 4738 int 4739 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4740 { 4741 struct mount *mp; 4742 struct vnode *vp; 4743 struct fhandle fhp; 4744 struct file *fp; 4745 int error, indx; 4746 bool named_attr; 4747 4748 error = priv_check(td, PRIV_VFS_FHOPEN); 4749 if (error != 0) 4750 return (error); 4751 4752 indx = -1; 4753 if ((flags & O_CREAT) != 0) 4754 return (EINVAL); 4755 error = openflags(&flags); 4756 if (error != 0) 4757 return (error); 4758 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4759 if (error != 0) 4760 return (error); 4761 /* find the mount point */ 4762 mp = vfs_busyfs(&fhp.fh_fsid); 4763 if (mp == NULL) 4764 return (ESTALE); 4765 /* now give me my vnode, it gets returned to me locked */ 4766 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4767 vfs_unbusy(mp); 4768 if (error != 0) 4769 return (error); 4770 4771 /* 4772 * Check to see if the file handle refers to a named attribute 4773 * directory or attribute. If it does, the O_NAMEDATTR flag 4774 * must have been specified. 4775 */ 4776 named_attr = (vn_irflag_read(vp) & 4777 (VIRF_NAMEDDIR | VIRF_NAMEDATTR)) != 0; 4778 if ((named_attr && (flags & O_NAMEDATTR) == 0) || 4779 (!named_attr && (flags & O_NAMEDATTR) != 0)) { 4780 vput(vp); 4781 return (ENOATTR); 4782 } 4783 4784 error = falloc_noinstall(td, &fp); 4785 if (error != 0) { 4786 vput(vp); 4787 return (error); 4788 } 4789 /* Set the flags early so the finit in devfs can pick them up. */ 4790 fp->f_flag = flags & FMASK; 4791 4792 #ifdef INVARIANTS 4793 td->td_dupfd = -1; 4794 #endif 4795 error = vn_open_vnode(vp, flags, td->td_ucred, td, fp); 4796 if (error != 0) { 4797 KASSERT(fp->f_ops == &badfileops, 4798 ("VOP_OPEN in fhopen() set f_ops")); 4799 KASSERT(td->td_dupfd < 0, 4800 ("fhopen() encountered fdopen()")); 4801 4802 vput(vp); 4803 goto bad; 4804 } 4805 #ifdef INVARIANTS 4806 td->td_dupfd = 0; 4807 #endif 4808 finit_open(fp, vp, flags); 4809 VOP_UNLOCK(vp); 4810 if ((flags & O_TRUNC) != 0) { 4811 error = fo_truncate(fp, 0, td->td_ucred, td); 4812 if (error != 0) 4813 goto bad; 4814 } 4815 4816 error = finstall(td, fp, &indx, flags, NULL); 4817 bad: 4818 fdrop(fp, td); 4819 td->td_retval[0] = indx; 4820 return (error); 4821 } 4822 4823 /* 4824 * Stat an (NFS) file handle. 4825 */ 4826 #ifndef _SYS_SYSPROTO_H_ 4827 struct fhstat_args { 4828 struct fhandle *u_fhp; 4829 struct stat *sb; 4830 }; 4831 #endif 4832 int 4833 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4834 { 4835 struct stat sb; 4836 struct fhandle fh; 4837 int error; 4838 4839 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4840 if (error != 0) 4841 return (error); 4842 error = kern_fhstat(td, fh, &sb); 4843 if (error == 0) 4844 error = copyout(&sb, uap->sb, sizeof(sb)); 4845 return (error); 4846 } 4847 4848 int 4849 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4850 { 4851 struct mount *mp; 4852 struct vnode *vp; 4853 int error; 4854 4855 error = priv_check(td, PRIV_VFS_FHSTAT); 4856 if (error != 0) 4857 return (error); 4858 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4859 return (ESTALE); 4860 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4861 vfs_unbusy(mp); 4862 if (error != 0) 4863 return (error); 4864 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4865 vput(vp); 4866 return (error); 4867 } 4868 4869 /* 4870 * Implement fstatfs() for (NFS) file handles. 4871 */ 4872 #ifndef _SYS_SYSPROTO_H_ 4873 struct fhstatfs_args { 4874 struct fhandle *u_fhp; 4875 struct statfs *buf; 4876 }; 4877 #endif 4878 int 4879 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4880 { 4881 struct statfs *sfp; 4882 fhandle_t fh; 4883 int error; 4884 4885 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4886 if (error != 0) 4887 return (error); 4888 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4889 error = kern_fhstatfs(td, fh, sfp); 4890 if (error == 0) 4891 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4892 free(sfp, M_STATFS); 4893 return (error); 4894 } 4895 4896 int 4897 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4898 { 4899 struct mount *mp; 4900 struct vnode *vp; 4901 int error; 4902 4903 error = priv_check(td, PRIV_VFS_FHSTATFS); 4904 if (error != 0) 4905 return (error); 4906 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4907 return (ESTALE); 4908 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4909 if (error != 0) { 4910 vfs_unbusy(mp); 4911 return (error); 4912 } 4913 vput(vp); 4914 error = prison_canseemount(td->td_ucred, mp); 4915 if (error != 0) 4916 goto out; 4917 #ifdef MAC 4918 error = mac_mount_check_stat(td->td_ucred, mp); 4919 if (error != 0) 4920 goto out; 4921 #endif 4922 error = VFS_STATFS(mp, buf); 4923 out: 4924 vfs_unbusy(mp); 4925 return (error); 4926 } 4927 4928 /* 4929 * Unlike madvise(2), we do not make a best effort to remember every 4930 * possible caching hint. Instead, we remember the last setting with 4931 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4932 * region of any current setting. 4933 */ 4934 int 4935 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4936 int advice) 4937 { 4938 struct fadvise_info *fa, *new; 4939 struct file *fp; 4940 struct vnode *vp; 4941 off_t end; 4942 int error; 4943 4944 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4945 return (EINVAL); 4946 AUDIT_ARG_VALUE(advice); 4947 switch (advice) { 4948 case POSIX_FADV_SEQUENTIAL: 4949 case POSIX_FADV_RANDOM: 4950 case POSIX_FADV_NOREUSE: 4951 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4952 break; 4953 case POSIX_FADV_NORMAL: 4954 case POSIX_FADV_WILLNEED: 4955 case POSIX_FADV_DONTNEED: 4956 new = NULL; 4957 break; 4958 default: 4959 return (EINVAL); 4960 } 4961 /* XXX: CAP_POSIX_FADVISE? */ 4962 AUDIT_ARG_FD(fd); 4963 error = fget(td, fd, &cap_no_rights, &fp); 4964 if (error != 0) 4965 goto out; 4966 AUDIT_ARG_FILE(td->td_proc, fp); 4967 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4968 error = ESPIPE; 4969 goto out; 4970 } 4971 if (fp->f_type != DTYPE_VNODE) { 4972 error = ENODEV; 4973 goto out; 4974 } 4975 vp = fp->f_vnode; 4976 if (vp->v_type != VREG) { 4977 error = ENODEV; 4978 goto out; 4979 } 4980 if (len == 0) 4981 end = OFF_MAX; 4982 else 4983 end = offset + len - 1; 4984 switch (advice) { 4985 case POSIX_FADV_SEQUENTIAL: 4986 case POSIX_FADV_RANDOM: 4987 case POSIX_FADV_NOREUSE: 4988 /* 4989 * Try to merge any existing non-standard region with 4990 * this new region if possible, otherwise create a new 4991 * non-standard region for this request. 4992 */ 4993 mtx_pool_lock(mtxpool_sleep, fp); 4994 fa = fp->f_advice; 4995 if (fa != NULL && fa->fa_advice == advice && 4996 ((fa->fa_start <= end && fa->fa_end >= offset) || 4997 (end != OFF_MAX && fa->fa_start == end + 1) || 4998 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4999 if (offset < fa->fa_start) 5000 fa->fa_start = offset; 5001 if (end > fa->fa_end) 5002 fa->fa_end = end; 5003 } else { 5004 new->fa_advice = advice; 5005 new->fa_start = offset; 5006 new->fa_end = end; 5007 fp->f_advice = new; 5008 new = fa; 5009 } 5010 mtx_pool_unlock(mtxpool_sleep, fp); 5011 break; 5012 case POSIX_FADV_NORMAL: 5013 /* 5014 * If a the "normal" region overlaps with an existing 5015 * non-standard region, trim or remove the 5016 * non-standard region. 5017 */ 5018 mtx_pool_lock(mtxpool_sleep, fp); 5019 fa = fp->f_advice; 5020 if (fa != NULL) { 5021 if (offset <= fa->fa_start && end >= fa->fa_end) { 5022 new = fa; 5023 fp->f_advice = NULL; 5024 } else if (offset <= fa->fa_start && 5025 end >= fa->fa_start) 5026 fa->fa_start = end + 1; 5027 else if (offset <= fa->fa_end && end >= fa->fa_end) 5028 fa->fa_end = offset - 1; 5029 else if (offset >= fa->fa_start && end <= fa->fa_end) { 5030 /* 5031 * If the "normal" region is a middle 5032 * portion of the existing 5033 * non-standard region, just remove 5034 * the whole thing rather than picking 5035 * one side or the other to 5036 * preserve. 5037 */ 5038 new = fa; 5039 fp->f_advice = NULL; 5040 } 5041 } 5042 mtx_pool_unlock(mtxpool_sleep, fp); 5043 break; 5044 case POSIX_FADV_WILLNEED: 5045 case POSIX_FADV_DONTNEED: 5046 error = VOP_ADVISE(vp, offset, end, advice); 5047 break; 5048 } 5049 out: 5050 if (fp != NULL) 5051 fdrop(fp, td); 5052 free(new, M_FADVISE); 5053 return (error); 5054 } 5055 5056 int 5057 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 5058 { 5059 int error; 5060 5061 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 5062 uap->advice); 5063 return (kern_posix_error(td, error)); 5064 } 5065 5066 int 5067 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 5068 off_t *outoffp, size_t len, unsigned int flags) 5069 { 5070 struct file *infp, *infp1, *outfp, *outfp1; 5071 struct vnode *invp, *outvp; 5072 int error; 5073 size_t retlen; 5074 void *rl_rcookie, *rl_wcookie; 5075 off_t inoff, outoff, savinoff, savoutoff; 5076 bool foffsets_locked, foffsets_set; 5077 5078 infp = outfp = NULL; 5079 rl_rcookie = rl_wcookie = NULL; 5080 foffsets_locked = false; 5081 foffsets_set = false; 5082 error = 0; 5083 retlen = 0; 5084 5085 if ((flags & ~COPY_FILE_RANGE_USERFLAGS) != 0) { 5086 error = EINVAL; 5087 goto out; 5088 } 5089 if (len > SSIZE_MAX) 5090 /* 5091 * Although the len argument is size_t, the return argument 5092 * is ssize_t (which is signed). Therefore a size that won't 5093 * fit in ssize_t can't be returned. 5094 */ 5095 len = SSIZE_MAX; 5096 5097 /* Get the file structures for the file descriptors. */ 5098 error = fget_read(td, infd, 5099 inoffp != NULL ? &cap_pread_rights : &cap_read_rights, &infp); 5100 if (error != 0) 5101 goto out; 5102 if (infp->f_ops == &badfileops) { 5103 error = EBADF; 5104 goto out; 5105 } 5106 if (infp->f_vnode == NULL) { 5107 error = EINVAL; 5108 goto out; 5109 } 5110 error = fget_write(td, outfd, 5111 outoffp != NULL ? &cap_pwrite_rights : &cap_write_rights, &outfp); 5112 if (error != 0) 5113 goto out; 5114 if (outfp->f_ops == &badfileops) { 5115 error = EBADF; 5116 goto out; 5117 } 5118 if (outfp->f_vnode == NULL) { 5119 error = EINVAL; 5120 goto out; 5121 } 5122 5123 /* 5124 * Figure out which file offsets we're reading from and writing to. 5125 * If the offsets come from the file descriptions, we need to lock them, 5126 * and locking both offsets requires a loop to avoid deadlocks. 5127 */ 5128 infp1 = outfp1 = NULL; 5129 if (inoffp != NULL) 5130 inoff = *inoffp; 5131 else 5132 infp1 = infp; 5133 if (outoffp != NULL) 5134 outoff = *outoffp; 5135 else 5136 outfp1 = outfp; 5137 if (infp1 != NULL || outfp1 != NULL) { 5138 if (infp1 == outfp1) { 5139 /* 5140 * Overlapping ranges are not allowed. A more thorough 5141 * check appears below, but we must not lock the same 5142 * offset twice. 5143 */ 5144 error = EINVAL; 5145 goto out; 5146 } 5147 foffset_lock_pair(infp1, &inoff, outfp1, &outoff, 0); 5148 foffsets_locked = true; 5149 } else { 5150 foffsets_set = true; 5151 } 5152 savinoff = inoff; 5153 savoutoff = outoff; 5154 5155 invp = infp->f_vnode; 5156 outvp = outfp->f_vnode; 5157 /* Sanity check the f_flag bits. */ 5158 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 5159 (infp->f_flag & FREAD) == 0) { 5160 error = EBADF; 5161 goto out; 5162 } 5163 5164 /* If len == 0, just return 0. */ 5165 if (len == 0) 5166 goto out; 5167 5168 /* 5169 * Make sure that the ranges we check and lock below are valid. Note 5170 * that len is clamped to SSIZE_MAX above. 5171 */ 5172 if (inoff < 0 || outoff < 0) { 5173 error = EINVAL; 5174 goto out; 5175 } 5176 5177 /* 5178 * If infp and outfp refer to the same file, the byte ranges cannot 5179 * overlap. 5180 */ 5181 if (invp == outvp) { 5182 if ((inoff <= outoff && inoff + len > outoff) || 5183 (inoff > outoff && outoff + len > inoff)) { 5184 error = EINVAL; 5185 goto out; 5186 } 5187 rangelock_may_recurse(&invp->v_rl); 5188 } 5189 5190 /* Range lock the byte ranges for both invp and outvp. */ 5191 for (;;) { 5192 rl_wcookie = vn_rangelock_wlock(outvp, outoff, outoff + len); 5193 rl_rcookie = vn_rangelock_tryrlock(invp, inoff, inoff + len); 5194 if (rl_rcookie != NULL) 5195 break; 5196 vn_rangelock_unlock(outvp, rl_wcookie); 5197 rl_rcookie = vn_rangelock_rlock(invp, inoff, inoff + len); 5198 vn_rangelock_unlock(invp, rl_rcookie); 5199 } 5200 5201 retlen = len; 5202 error = vn_copy_file_range(invp, &inoff, outvp, &outoff, &retlen, 5203 flags, infp->f_cred, outfp->f_cred, td); 5204 out: 5205 if (rl_rcookie != NULL) 5206 vn_rangelock_unlock(invp, rl_rcookie); 5207 if (rl_wcookie != NULL) 5208 vn_rangelock_unlock(outvp, rl_wcookie); 5209 if ((foffsets_locked || foffsets_set) && 5210 (error == EINTR || error == ERESTART)) { 5211 inoff = savinoff; 5212 outoff = savoutoff; 5213 } 5214 if (foffsets_locked) { 5215 if (inoffp == NULL) 5216 foffset_unlock(infp, inoff, 0); 5217 else 5218 *inoffp = inoff; 5219 if (outoffp == NULL) 5220 foffset_unlock(outfp, outoff, 0); 5221 else 5222 *outoffp = outoff; 5223 } else if (foffsets_set) { 5224 *inoffp = inoff; 5225 *outoffp = outoff; 5226 } 5227 if (outfp != NULL) 5228 fdrop(outfp, td); 5229 if (infp != NULL) 5230 fdrop(infp, td); 5231 td->td_retval[0] = retlen; 5232 return (error); 5233 } 5234 5235 int 5236 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5237 { 5238 off_t inoff, outoff, *inoffp, *outoffp; 5239 int error; 5240 5241 inoffp = outoffp = NULL; 5242 if (uap->inoffp != NULL) { 5243 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5244 if (error != 0) 5245 return (error); 5246 inoffp = &inoff; 5247 } 5248 if (uap->outoffp != NULL) { 5249 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5250 if (error != 0) 5251 return (error); 5252 outoffp = &outoff; 5253 } 5254 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5255 outoffp, uap->len, uap->flags); 5256 if (error == 0 && uap->inoffp != NULL) 5257 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5258 if (error == 0 && uap->outoffp != NULL) 5259 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5260 return (error); 5261 } 5262