1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <fs/devfs/devfs.h> 91 92 #include <ufs/ufs/quota.h> 93 94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 95 96 static int kern_chflagsat(struct thread *td, int fd, const char *path, 97 enum uio_seg pathseg, u_long flags, int atflag); 98 static int setfflags(struct thread *td, struct vnode *, u_long); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int getutimens(const struct timespec *, enum uio_seg, 101 struct timespec *, int *); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 107 enum uio_seg pathseg, fhandle_t *fhp); 108 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 109 size_t count, struct thread *td); 110 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 111 const char *path, enum uio_seg segflag); 112 113 static uint64_t 114 at2cnpflags(u_int at_flags, u_int mask) 115 { 116 u_int64_t res; 117 118 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 119 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 120 121 res = 0; 122 at_flags &= mask; 123 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 124 res |= RBENEATH; 125 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 126 res |= FOLLOW; 127 /* NOFOLLOW is pseudo flag */ 128 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 129 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 130 FOLLOW; 131 } 132 return (res); 133 } 134 135 int 136 kern_sync(struct thread *td) 137 { 138 struct mount *mp, *nmp; 139 int save; 140 141 mtx_lock(&mountlist_mtx); 142 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 143 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 144 nmp = TAILQ_NEXT(mp, mnt_list); 145 continue; 146 } 147 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 148 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 149 save = curthread_pflags_set(TDP_SYNCIO); 150 vfs_periodic(mp, MNT_NOWAIT); 151 VFS_SYNC(mp, MNT_NOWAIT); 152 curthread_pflags_restore(save); 153 vn_finished_write(mp); 154 } 155 mtx_lock(&mountlist_mtx); 156 nmp = TAILQ_NEXT(mp, mnt_list); 157 vfs_unbusy(mp); 158 } 159 mtx_unlock(&mountlist_mtx); 160 return (0); 161 } 162 163 /* 164 * Sync each mounted filesystem. 165 */ 166 #ifndef _SYS_SYSPROTO_H_ 167 struct sync_args { 168 int dummy; 169 }; 170 #endif 171 /* ARGSUSED */ 172 int 173 sys_sync(struct thread *td, struct sync_args *uap) 174 { 175 176 return (kern_sync(td)); 177 } 178 179 /* 180 * Change filesystem quotas. 181 */ 182 #ifndef _SYS_SYSPROTO_H_ 183 struct quotactl_args { 184 char *path; 185 int cmd; 186 int uid; 187 caddr_t arg; 188 }; 189 #endif 190 int 191 sys_quotactl(struct thread *td, struct quotactl_args *uap) 192 { 193 struct mount *mp; 194 struct nameidata nd; 195 int error; 196 197 AUDIT_ARG_CMD(uap->cmd); 198 AUDIT_ARG_UID(uap->uid); 199 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 200 return (EPERM); 201 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 202 uap->path, td); 203 if ((error = namei(&nd)) != 0) 204 return (error); 205 NDFREE(&nd, NDF_ONLY_PNBUF); 206 mp = nd.ni_vp->v_mount; 207 vfs_ref(mp); 208 vput(nd.ni_vp); 209 error = vfs_busy(mp, 0); 210 if (error != 0) { 211 vfs_rel(mp); 212 return (error); 213 } 214 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 215 216 /* 217 * Since quota on operation typically needs to open quota 218 * file, the Q_QUOTAON handler needs to unbusy the mount point 219 * before calling into namei. Otherwise, unmount might be 220 * started between two vfs_busy() invocations (first is our, 221 * second is from mount point cross-walk code in lookup()), 222 * causing deadlock. 223 * 224 * Require that Q_QUOTAON handles the vfs_busy() reference on 225 * its own, always returning with ubusied mount point. 226 */ 227 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 228 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 229 vfs_unbusy(mp); 230 vfs_rel(mp); 231 return (error); 232 } 233 234 /* 235 * Used by statfs conversion routines to scale the block size up if 236 * necessary so that all of the block counts are <= 'max_size'. Note 237 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 238 * value of 'n'. 239 */ 240 void 241 statfs_scale_blocks(struct statfs *sf, long max_size) 242 { 243 uint64_t count; 244 int shift; 245 246 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 247 248 /* 249 * Attempt to scale the block counts to give a more accurate 250 * overview to userland of the ratio of free space to used 251 * space. To do this, find the largest block count and compute 252 * a divisor that lets it fit into a signed integer <= max_size. 253 */ 254 if (sf->f_bavail < 0) 255 count = -sf->f_bavail; 256 else 257 count = sf->f_bavail; 258 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 259 if (count <= max_size) 260 return; 261 262 count >>= flsl(max_size); 263 shift = 0; 264 while (count > 0) { 265 shift++; 266 count >>=1; 267 } 268 269 sf->f_bsize <<= shift; 270 sf->f_blocks >>= shift; 271 sf->f_bfree >>= shift; 272 sf->f_bavail >>= shift; 273 } 274 275 static int 276 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 277 { 278 int error; 279 280 if (mp == NULL) 281 return (EBADF); 282 error = vfs_busy(mp, 0); 283 vfs_rel(mp); 284 if (error != 0) 285 return (error); 286 #ifdef MAC 287 error = mac_mount_check_stat(td->td_ucred, mp); 288 if (error != 0) 289 goto out; 290 #endif 291 error = VFS_STATFS(mp, buf); 292 if (error != 0) 293 goto out; 294 if (priv_check_cred_vfs_generation(td->td_ucred)) { 295 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 296 prison_enforce_statfs(td->td_ucred, mp, buf); 297 } 298 out: 299 vfs_unbusy(mp); 300 return (error); 301 } 302 303 /* 304 * Get filesystem statistics. 305 */ 306 #ifndef _SYS_SYSPROTO_H_ 307 struct statfs_args { 308 char *path; 309 struct statfs *buf; 310 }; 311 #endif 312 int 313 sys_statfs(struct thread *td, struct statfs_args *uap) 314 { 315 struct statfs *sfp; 316 int error; 317 318 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 319 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 320 if (error == 0) 321 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 322 free(sfp, M_STATFS); 323 return (error); 324 } 325 326 int 327 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 328 struct statfs *buf) 329 { 330 struct mount *mp; 331 struct nameidata nd; 332 int error; 333 334 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 335 error = namei(&nd); 336 if (error != 0) 337 return (error); 338 mp = vfs_ref_from_vp(nd.ni_vp); 339 NDFREE_NOTHING(&nd); 340 vrele(nd.ni_vp); 341 return (kern_do_statfs(td, mp, buf)); 342 } 343 344 /* 345 * Get filesystem statistics. 346 */ 347 #ifndef _SYS_SYSPROTO_H_ 348 struct fstatfs_args { 349 int fd; 350 struct statfs *buf; 351 }; 352 #endif 353 int 354 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 355 { 356 struct statfs *sfp; 357 int error; 358 359 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 360 error = kern_fstatfs(td, uap->fd, sfp); 361 if (error == 0) 362 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 363 free(sfp, M_STATFS); 364 return (error); 365 } 366 367 int 368 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 369 { 370 struct file *fp; 371 struct mount *mp; 372 struct vnode *vp; 373 int error; 374 375 AUDIT_ARG_FD(fd); 376 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 377 if (error != 0) 378 return (error); 379 vp = fp->f_vnode; 380 #ifdef AUDIT 381 if (AUDITING_TD(td)) { 382 vn_lock(vp, LK_SHARED | LK_RETRY); 383 AUDIT_ARG_VNODE1(vp); 384 VOP_UNLOCK(vp); 385 } 386 #endif 387 mp = vfs_ref_from_vp(vp); 388 fdrop(fp, td); 389 return (kern_do_statfs(td, mp, buf)); 390 } 391 392 /* 393 * Get statistics on all filesystems. 394 */ 395 #ifndef _SYS_SYSPROTO_H_ 396 struct getfsstat_args { 397 struct statfs *buf; 398 long bufsize; 399 int mode; 400 }; 401 #endif 402 int 403 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 404 { 405 size_t count; 406 int error; 407 408 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 409 return (EINVAL); 410 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 411 UIO_USERSPACE, uap->mode); 412 if (error == 0) 413 td->td_retval[0] = count; 414 return (error); 415 } 416 417 /* 418 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 419 * The caller is responsible for freeing memory which will be allocated 420 * in '*buf'. 421 */ 422 int 423 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 424 size_t *countp, enum uio_seg bufseg, int mode) 425 { 426 struct mount *mp, *nmp; 427 struct statfs *sfsp, *sp, *sptmp, *tofree; 428 size_t count, maxcount; 429 int error; 430 431 switch (mode) { 432 case MNT_WAIT: 433 case MNT_NOWAIT: 434 break; 435 default: 436 if (bufseg == UIO_SYSSPACE) 437 *buf = NULL; 438 return (EINVAL); 439 } 440 restart: 441 maxcount = bufsize / sizeof(struct statfs); 442 if (bufsize == 0) { 443 sfsp = NULL; 444 tofree = NULL; 445 } else if (bufseg == UIO_USERSPACE) { 446 sfsp = *buf; 447 tofree = NULL; 448 } else /* if (bufseg == UIO_SYSSPACE) */ { 449 count = 0; 450 mtx_lock(&mountlist_mtx); 451 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 452 count++; 453 } 454 mtx_unlock(&mountlist_mtx); 455 if (maxcount > count) 456 maxcount = count; 457 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 458 M_STATFS, M_WAITOK); 459 } 460 461 count = 0; 462 463 /* 464 * If there is no target buffer they only want the count. 465 * 466 * This could be TAILQ_FOREACH but it is open-coded to match the original 467 * code below. 468 */ 469 if (sfsp == NULL) { 470 mtx_lock(&mountlist_mtx); 471 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 472 if (prison_canseemount(td->td_ucred, mp) != 0) { 473 nmp = TAILQ_NEXT(mp, mnt_list); 474 continue; 475 } 476 #ifdef MAC 477 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 478 nmp = TAILQ_NEXT(mp, mnt_list); 479 continue; 480 } 481 #endif 482 count++; 483 nmp = TAILQ_NEXT(mp, mnt_list); 484 } 485 mtx_unlock(&mountlist_mtx); 486 *countp = count; 487 return (0); 488 } 489 490 /* 491 * They want the entire thing. 492 * 493 * Short-circuit the corner case of no room for anything, avoids 494 * relocking below. 495 */ 496 if (maxcount < 1) { 497 goto out; 498 } 499 500 mtx_lock(&mountlist_mtx); 501 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 502 if (prison_canseemount(td->td_ucred, mp) != 0) { 503 nmp = TAILQ_NEXT(mp, mnt_list); 504 continue; 505 } 506 #ifdef MAC 507 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 508 nmp = TAILQ_NEXT(mp, mnt_list); 509 continue; 510 } 511 #endif 512 if (mode == MNT_WAIT) { 513 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 514 /* 515 * If vfs_busy() failed, and MBF_NOWAIT 516 * wasn't passed, then the mp is gone. 517 * Furthermore, because of MBF_MNTLSTLOCK, 518 * the mountlist_mtx was dropped. We have 519 * no other choice than to start over. 520 */ 521 mtx_unlock(&mountlist_mtx); 522 free(tofree, M_STATFS); 523 goto restart; 524 } 525 } else { 526 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 527 nmp = TAILQ_NEXT(mp, mnt_list); 528 continue; 529 } 530 } 531 sp = &mp->mnt_stat; 532 /* 533 * If MNT_NOWAIT is specified, do not refresh 534 * the fsstat cache. 535 */ 536 if (mode != MNT_NOWAIT) { 537 error = VFS_STATFS(mp, sp); 538 if (error != 0) { 539 mtx_lock(&mountlist_mtx); 540 nmp = TAILQ_NEXT(mp, mnt_list); 541 vfs_unbusy(mp); 542 continue; 543 } 544 } 545 if (priv_check_cred_vfs_generation(td->td_ucred)) { 546 sptmp = malloc(sizeof(struct statfs), M_STATFS, 547 M_WAITOK); 548 *sptmp = *sp; 549 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 550 prison_enforce_statfs(td->td_ucred, mp, sptmp); 551 sp = sptmp; 552 } else 553 sptmp = NULL; 554 if (bufseg == UIO_SYSSPACE) { 555 bcopy(sp, sfsp, sizeof(*sp)); 556 free(sptmp, M_STATFS); 557 } else /* if (bufseg == UIO_USERSPACE) */ { 558 error = copyout(sp, sfsp, sizeof(*sp)); 559 free(sptmp, M_STATFS); 560 if (error != 0) { 561 vfs_unbusy(mp); 562 return (error); 563 } 564 } 565 sfsp++; 566 count++; 567 568 if (count == maxcount) { 569 vfs_unbusy(mp); 570 goto out; 571 } 572 573 mtx_lock(&mountlist_mtx); 574 nmp = TAILQ_NEXT(mp, mnt_list); 575 vfs_unbusy(mp); 576 } 577 mtx_unlock(&mountlist_mtx); 578 out: 579 *countp = count; 580 return (0); 581 } 582 583 #ifdef COMPAT_FREEBSD4 584 /* 585 * Get old format filesystem statistics. 586 */ 587 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 588 589 #ifndef _SYS_SYSPROTO_H_ 590 struct freebsd4_statfs_args { 591 char *path; 592 struct ostatfs *buf; 593 }; 594 #endif 595 int 596 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 597 { 598 struct ostatfs osb; 599 struct statfs *sfp; 600 int error; 601 602 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 603 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 604 if (error == 0) { 605 freebsd4_cvtstatfs(sfp, &osb); 606 error = copyout(&osb, uap->buf, sizeof(osb)); 607 } 608 free(sfp, M_STATFS); 609 return (error); 610 } 611 612 /* 613 * Get filesystem statistics. 614 */ 615 #ifndef _SYS_SYSPROTO_H_ 616 struct freebsd4_fstatfs_args { 617 int fd; 618 struct ostatfs *buf; 619 }; 620 #endif 621 int 622 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 623 { 624 struct ostatfs osb; 625 struct statfs *sfp; 626 int error; 627 628 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 629 error = kern_fstatfs(td, uap->fd, sfp); 630 if (error == 0) { 631 freebsd4_cvtstatfs(sfp, &osb); 632 error = copyout(&osb, uap->buf, sizeof(osb)); 633 } 634 free(sfp, M_STATFS); 635 return (error); 636 } 637 638 /* 639 * Get statistics on all filesystems. 640 */ 641 #ifndef _SYS_SYSPROTO_H_ 642 struct freebsd4_getfsstat_args { 643 struct ostatfs *buf; 644 long bufsize; 645 int mode; 646 }; 647 #endif 648 int 649 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 650 { 651 struct statfs *buf, *sp; 652 struct ostatfs osb; 653 size_t count, size; 654 int error; 655 656 if (uap->bufsize < 0) 657 return (EINVAL); 658 count = uap->bufsize / sizeof(struct ostatfs); 659 if (count > SIZE_MAX / sizeof(struct statfs)) 660 return (EINVAL); 661 size = count * sizeof(struct statfs); 662 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 663 uap->mode); 664 if (error == 0) 665 td->td_retval[0] = count; 666 if (size != 0) { 667 sp = buf; 668 while (count != 0 && error == 0) { 669 freebsd4_cvtstatfs(sp, &osb); 670 error = copyout(&osb, uap->buf, sizeof(osb)); 671 sp++; 672 uap->buf++; 673 count--; 674 } 675 free(buf, M_STATFS); 676 } 677 return (error); 678 } 679 680 /* 681 * Implement fstatfs() for (NFS) file handles. 682 */ 683 #ifndef _SYS_SYSPROTO_H_ 684 struct freebsd4_fhstatfs_args { 685 struct fhandle *u_fhp; 686 struct ostatfs *buf; 687 }; 688 #endif 689 int 690 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 691 { 692 struct ostatfs osb; 693 struct statfs *sfp; 694 fhandle_t fh; 695 int error; 696 697 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 698 if (error != 0) 699 return (error); 700 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 701 error = kern_fhstatfs(td, fh, sfp); 702 if (error == 0) { 703 freebsd4_cvtstatfs(sfp, &osb); 704 error = copyout(&osb, uap->buf, sizeof(osb)); 705 } 706 free(sfp, M_STATFS); 707 return (error); 708 } 709 710 /* 711 * Convert a new format statfs structure to an old format statfs structure. 712 */ 713 static void 714 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 715 { 716 717 statfs_scale_blocks(nsp, LONG_MAX); 718 bzero(osp, sizeof(*osp)); 719 osp->f_bsize = nsp->f_bsize; 720 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 721 osp->f_blocks = nsp->f_blocks; 722 osp->f_bfree = nsp->f_bfree; 723 osp->f_bavail = nsp->f_bavail; 724 osp->f_files = MIN(nsp->f_files, LONG_MAX); 725 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 726 osp->f_owner = nsp->f_owner; 727 osp->f_type = nsp->f_type; 728 osp->f_flags = nsp->f_flags; 729 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 730 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 731 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 732 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 733 strlcpy(osp->f_fstypename, nsp->f_fstypename, 734 MIN(MFSNAMELEN, OMFSNAMELEN)); 735 strlcpy(osp->f_mntonname, nsp->f_mntonname, 736 MIN(MNAMELEN, OMNAMELEN)); 737 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 738 MIN(MNAMELEN, OMNAMELEN)); 739 osp->f_fsid = nsp->f_fsid; 740 } 741 #endif /* COMPAT_FREEBSD4 */ 742 743 #if defined(COMPAT_FREEBSD11) 744 /* 745 * Get old format filesystem statistics. 746 */ 747 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 748 749 int 750 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 751 { 752 struct freebsd11_statfs osb; 753 struct statfs *sfp; 754 int error; 755 756 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 757 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 758 if (error == 0) { 759 freebsd11_cvtstatfs(sfp, &osb); 760 error = copyout(&osb, uap->buf, sizeof(osb)); 761 } 762 free(sfp, M_STATFS); 763 return (error); 764 } 765 766 /* 767 * Get filesystem statistics. 768 */ 769 int 770 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 771 { 772 struct freebsd11_statfs osb; 773 struct statfs *sfp; 774 int error; 775 776 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 777 error = kern_fstatfs(td, uap->fd, sfp); 778 if (error == 0) { 779 freebsd11_cvtstatfs(sfp, &osb); 780 error = copyout(&osb, uap->buf, sizeof(osb)); 781 } 782 free(sfp, M_STATFS); 783 return (error); 784 } 785 786 /* 787 * Get statistics on all filesystems. 788 */ 789 int 790 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 791 { 792 struct freebsd11_statfs osb; 793 struct statfs *buf, *sp; 794 size_t count, size; 795 int error; 796 797 count = uap->bufsize / sizeof(struct ostatfs); 798 size = count * sizeof(struct statfs); 799 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 800 uap->mode); 801 if (error == 0) 802 td->td_retval[0] = count; 803 if (size > 0) { 804 sp = buf; 805 while (count > 0 && error == 0) { 806 freebsd11_cvtstatfs(sp, &osb); 807 error = copyout(&osb, uap->buf, sizeof(osb)); 808 sp++; 809 uap->buf++; 810 count--; 811 } 812 free(buf, M_STATFS); 813 } 814 return (error); 815 } 816 817 /* 818 * Implement fstatfs() for (NFS) file handles. 819 */ 820 int 821 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 822 { 823 struct freebsd11_statfs osb; 824 struct statfs *sfp; 825 fhandle_t fh; 826 int error; 827 828 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 829 if (error) 830 return (error); 831 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 832 error = kern_fhstatfs(td, fh, sfp); 833 if (error == 0) { 834 freebsd11_cvtstatfs(sfp, &osb); 835 error = copyout(&osb, uap->buf, sizeof(osb)); 836 } 837 free(sfp, M_STATFS); 838 return (error); 839 } 840 841 /* 842 * Convert a new format statfs structure to an old format statfs structure. 843 */ 844 static void 845 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 846 { 847 848 bzero(osp, sizeof(*osp)); 849 osp->f_version = FREEBSD11_STATFS_VERSION; 850 osp->f_type = nsp->f_type; 851 osp->f_flags = nsp->f_flags; 852 osp->f_bsize = nsp->f_bsize; 853 osp->f_iosize = nsp->f_iosize; 854 osp->f_blocks = nsp->f_blocks; 855 osp->f_bfree = nsp->f_bfree; 856 osp->f_bavail = nsp->f_bavail; 857 osp->f_files = nsp->f_files; 858 osp->f_ffree = nsp->f_ffree; 859 osp->f_syncwrites = nsp->f_syncwrites; 860 osp->f_asyncwrites = nsp->f_asyncwrites; 861 osp->f_syncreads = nsp->f_syncreads; 862 osp->f_asyncreads = nsp->f_asyncreads; 863 osp->f_namemax = nsp->f_namemax; 864 osp->f_owner = nsp->f_owner; 865 osp->f_fsid = nsp->f_fsid; 866 strlcpy(osp->f_fstypename, nsp->f_fstypename, 867 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 868 strlcpy(osp->f_mntonname, nsp->f_mntonname, 869 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 870 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 871 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 872 } 873 #endif /* COMPAT_FREEBSD11 */ 874 875 /* 876 * Change current working directory to a given file descriptor. 877 */ 878 #ifndef _SYS_SYSPROTO_H_ 879 struct fchdir_args { 880 int fd; 881 }; 882 #endif 883 int 884 sys_fchdir(struct thread *td, struct fchdir_args *uap) 885 { 886 struct vnode *vp, *tdp; 887 struct mount *mp; 888 struct file *fp; 889 int error; 890 891 AUDIT_ARG_FD(uap->fd); 892 error = getvnode(td, uap->fd, &cap_fchdir_rights, 893 &fp); 894 if (error != 0) 895 return (error); 896 vp = fp->f_vnode; 897 vrefact(vp); 898 fdrop(fp, td); 899 vn_lock(vp, LK_SHARED | LK_RETRY); 900 AUDIT_ARG_VNODE1(vp); 901 error = change_dir(vp, td); 902 while (!error && (mp = vp->v_mountedhere) != NULL) { 903 if (vfs_busy(mp, 0)) 904 continue; 905 error = VFS_ROOT(mp, LK_SHARED, &tdp); 906 vfs_unbusy(mp); 907 if (error != 0) 908 break; 909 vput(vp); 910 vp = tdp; 911 } 912 if (error != 0) { 913 vput(vp); 914 return (error); 915 } 916 VOP_UNLOCK(vp); 917 pwd_chdir(td, vp); 918 return (0); 919 } 920 921 /* 922 * Change current working directory (``.''). 923 */ 924 #ifndef _SYS_SYSPROTO_H_ 925 struct chdir_args { 926 char *path; 927 }; 928 #endif 929 int 930 sys_chdir(struct thread *td, struct chdir_args *uap) 931 { 932 933 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 934 } 935 936 int 937 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 938 { 939 struct nameidata nd; 940 int error; 941 942 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 943 pathseg, path, td); 944 if ((error = namei(&nd)) != 0) 945 return (error); 946 if ((error = change_dir(nd.ni_vp, td)) != 0) { 947 vput(nd.ni_vp); 948 NDFREE_NOTHING(&nd); 949 return (error); 950 } 951 VOP_UNLOCK(nd.ni_vp); 952 NDFREE_NOTHING(&nd); 953 pwd_chdir(td, nd.ni_vp); 954 return (0); 955 } 956 957 /* 958 * Change notion of root (``/'') directory. 959 */ 960 #ifndef _SYS_SYSPROTO_H_ 961 struct chroot_args { 962 char *path; 963 }; 964 #endif 965 int 966 sys_chroot(struct thread *td, struct chroot_args *uap) 967 { 968 struct nameidata nd; 969 int error; 970 971 error = priv_check(td, PRIV_VFS_CHROOT); 972 if (error != 0) 973 return (error); 974 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 975 UIO_USERSPACE, uap->path, td); 976 error = namei(&nd); 977 if (error != 0) 978 goto error; 979 error = change_dir(nd.ni_vp, td); 980 if (error != 0) 981 goto e_vunlock; 982 #ifdef MAC 983 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 984 if (error != 0) 985 goto e_vunlock; 986 #endif 987 VOP_UNLOCK(nd.ni_vp); 988 error = pwd_chroot(td, nd.ni_vp); 989 vrele(nd.ni_vp); 990 NDFREE_NOTHING(&nd); 991 return (error); 992 e_vunlock: 993 vput(nd.ni_vp); 994 error: 995 NDFREE_NOTHING(&nd); 996 return (error); 997 } 998 999 /* 1000 * Common routine for chroot and chdir. Callers must provide a locked vnode 1001 * instance. 1002 */ 1003 int 1004 change_dir(struct vnode *vp, struct thread *td) 1005 { 1006 #ifdef MAC 1007 int error; 1008 #endif 1009 1010 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1011 if (vp->v_type != VDIR) 1012 return (ENOTDIR); 1013 #ifdef MAC 1014 error = mac_vnode_check_chdir(td->td_ucred, vp); 1015 if (error != 0) 1016 return (error); 1017 #endif 1018 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1019 } 1020 1021 static __inline void 1022 flags_to_rights(int flags, cap_rights_t *rightsp) 1023 { 1024 1025 if (flags & O_EXEC) { 1026 cap_rights_set_one(rightsp, CAP_FEXECVE); 1027 } else { 1028 switch ((flags & O_ACCMODE)) { 1029 case O_RDONLY: 1030 cap_rights_set_one(rightsp, CAP_READ); 1031 break; 1032 case O_RDWR: 1033 cap_rights_set_one(rightsp, CAP_READ); 1034 /* FALLTHROUGH */ 1035 case O_WRONLY: 1036 cap_rights_set_one(rightsp, CAP_WRITE); 1037 if (!(flags & (O_APPEND | O_TRUNC))) 1038 cap_rights_set_one(rightsp, CAP_SEEK); 1039 break; 1040 } 1041 } 1042 1043 if (flags & O_CREAT) 1044 cap_rights_set_one(rightsp, CAP_CREATE); 1045 1046 if (flags & O_TRUNC) 1047 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1048 1049 if (flags & (O_SYNC | O_FSYNC)) 1050 cap_rights_set_one(rightsp, CAP_FSYNC); 1051 1052 if (flags & (O_EXLOCK | O_SHLOCK)) 1053 cap_rights_set_one(rightsp, CAP_FLOCK); 1054 } 1055 1056 /* 1057 * Check permissions, allocate an open file structure, and call the device 1058 * open routine if any. 1059 */ 1060 #ifndef _SYS_SYSPROTO_H_ 1061 struct open_args { 1062 char *path; 1063 int flags; 1064 int mode; 1065 }; 1066 #endif 1067 int 1068 sys_open(struct thread *td, struct open_args *uap) 1069 { 1070 1071 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1072 uap->flags, uap->mode)); 1073 } 1074 1075 #ifndef _SYS_SYSPROTO_H_ 1076 struct openat_args { 1077 int fd; 1078 char *path; 1079 int flag; 1080 int mode; 1081 }; 1082 #endif 1083 int 1084 sys_openat(struct thread *td, struct openat_args *uap) 1085 { 1086 1087 AUDIT_ARG_FD(uap->fd); 1088 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1089 uap->mode)); 1090 } 1091 1092 int 1093 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1094 int flags, int mode) 1095 { 1096 struct proc *p = td->td_proc; 1097 struct filedesc *fdp; 1098 struct pwddesc *pdp; 1099 struct file *fp; 1100 struct vnode *vp; 1101 struct nameidata nd; 1102 cap_rights_t rights; 1103 int cmode, error, indx; 1104 1105 indx = -1; 1106 fdp = p->p_fd; 1107 pdp = p->p_pd; 1108 1109 AUDIT_ARG_FFLAGS(flags); 1110 AUDIT_ARG_MODE(mode); 1111 cap_rights_init_one(&rights, CAP_LOOKUP); 1112 flags_to_rights(flags, &rights); 1113 /* 1114 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1115 * may be specified. 1116 */ 1117 if (flags & O_EXEC) { 1118 if (flags & O_ACCMODE) 1119 return (EINVAL); 1120 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1121 return (EINVAL); 1122 } else { 1123 flags = FFLAGS(flags); 1124 } 1125 1126 /* 1127 * Allocate a file structure. The descriptor to reference it 1128 * is allocated and used by finstall_refed() below. 1129 */ 1130 error = falloc_noinstall(td, &fp); 1131 if (error != 0) 1132 return (error); 1133 /* Set the flags early so the finit in devfs can pick them up. */ 1134 fp->f_flag = flags & FMASK; 1135 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1136 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1137 &rights, td); 1138 td->td_dupfd = -1; /* XXX check for fdopen */ 1139 error = vn_open(&nd, &flags, cmode, fp); 1140 if (error != 0) { 1141 /* 1142 * If the vn_open replaced the method vector, something 1143 * wonderous happened deep below and we just pass it up 1144 * pretending we know what we do. 1145 */ 1146 if (error == ENXIO && fp->f_ops != &badfileops) 1147 goto success; 1148 1149 /* 1150 * Handle special fdopen() case. bleh. 1151 * 1152 * Don't do this for relative (capability) lookups; we don't 1153 * understand exactly what would happen, and we don't think 1154 * that it ever should. 1155 */ 1156 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1157 (error == ENODEV || error == ENXIO) && 1158 td->td_dupfd >= 0) { 1159 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1160 &indx); 1161 if (error == 0) 1162 goto success; 1163 } 1164 1165 goto bad; 1166 } 1167 td->td_dupfd = 0; 1168 NDFREE(&nd, NDF_ONLY_PNBUF); 1169 vp = nd.ni_vp; 1170 1171 /* 1172 * Store the vnode, for any f_type. Typically, the vnode use 1173 * count is decremented by direct call to vn_closefile() for 1174 * files that switched type in the cdevsw fdopen() method. 1175 */ 1176 fp->f_vnode = vp; 1177 /* 1178 * If the file wasn't claimed by devfs bind it to the normal 1179 * vnode operations here. 1180 */ 1181 if (fp->f_ops == &badfileops) { 1182 KASSERT(vp->v_type != VFIFO, 1183 ("Unexpected fifo fp %p vp %p", fp, vp)); 1184 finit_vnode(fp, flags, NULL, &vnops); 1185 } 1186 1187 VOP_UNLOCK(vp); 1188 if (flags & O_TRUNC) { 1189 error = fo_truncate(fp, 0, td->td_ucred, td); 1190 if (error != 0) 1191 goto bad; 1192 } 1193 success: 1194 /* 1195 * If we haven't already installed the FD (for dupfdopen), do so now. 1196 */ 1197 if (indx == -1) { 1198 struct filecaps *fcaps; 1199 1200 #ifdef CAPABILITIES 1201 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1202 fcaps = &nd.ni_filecaps; 1203 else 1204 #endif 1205 fcaps = NULL; 1206 error = finstall_refed(td, fp, &indx, flags, fcaps); 1207 /* On success finstall_refed() consumes fcaps. */ 1208 if (error != 0) { 1209 filecaps_free(&nd.ni_filecaps); 1210 goto bad; 1211 } 1212 } else { 1213 filecaps_free(&nd.ni_filecaps); 1214 falloc_abort(td, fp); 1215 } 1216 1217 td->td_retval[0] = indx; 1218 return (0); 1219 bad: 1220 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1221 falloc_abort(td, fp); 1222 return (error); 1223 } 1224 1225 #ifdef COMPAT_43 1226 /* 1227 * Create a file. 1228 */ 1229 #ifndef _SYS_SYSPROTO_H_ 1230 struct ocreat_args { 1231 char *path; 1232 int mode; 1233 }; 1234 #endif 1235 int 1236 ocreat(struct thread *td, struct ocreat_args *uap) 1237 { 1238 1239 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1240 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1241 } 1242 #endif /* COMPAT_43 */ 1243 1244 /* 1245 * Create a special file. 1246 */ 1247 #ifndef _SYS_SYSPROTO_H_ 1248 struct mknodat_args { 1249 int fd; 1250 char *path; 1251 mode_t mode; 1252 dev_t dev; 1253 }; 1254 #endif 1255 int 1256 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1257 { 1258 1259 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1260 uap->dev)); 1261 } 1262 1263 #if defined(COMPAT_FREEBSD11) 1264 int 1265 freebsd11_mknod(struct thread *td, 1266 struct freebsd11_mknod_args *uap) 1267 { 1268 1269 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1270 uap->mode, uap->dev)); 1271 } 1272 1273 int 1274 freebsd11_mknodat(struct thread *td, 1275 struct freebsd11_mknodat_args *uap) 1276 { 1277 1278 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1279 uap->dev)); 1280 } 1281 #endif /* COMPAT_FREEBSD11 */ 1282 1283 int 1284 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1285 int mode, dev_t dev) 1286 { 1287 struct vnode *vp; 1288 struct mount *mp; 1289 struct vattr vattr; 1290 struct nameidata nd; 1291 int error, whiteout = 0; 1292 1293 AUDIT_ARG_MODE(mode); 1294 AUDIT_ARG_DEV(dev); 1295 switch (mode & S_IFMT) { 1296 case S_IFCHR: 1297 case S_IFBLK: 1298 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1299 if (error == 0 && dev == VNOVAL) 1300 error = EINVAL; 1301 break; 1302 case S_IFWHT: 1303 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1304 break; 1305 case S_IFIFO: 1306 if (dev == 0) 1307 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1308 /* FALLTHROUGH */ 1309 default: 1310 error = EINVAL; 1311 break; 1312 } 1313 if (error != 0) 1314 return (error); 1315 restart: 1316 bwillwrite(); 1317 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1318 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1319 td); 1320 if ((error = namei(&nd)) != 0) 1321 return (error); 1322 vp = nd.ni_vp; 1323 if (vp != NULL) { 1324 NDFREE(&nd, NDF_ONLY_PNBUF); 1325 if (vp == nd.ni_dvp) 1326 vrele(nd.ni_dvp); 1327 else 1328 vput(nd.ni_dvp); 1329 vrele(vp); 1330 return (EEXIST); 1331 } else { 1332 VATTR_NULL(&vattr); 1333 vattr.va_mode = (mode & ALLPERMS) & 1334 ~td->td_proc->p_pd->pd_cmask; 1335 vattr.va_rdev = dev; 1336 whiteout = 0; 1337 1338 switch (mode & S_IFMT) { 1339 case S_IFCHR: 1340 vattr.va_type = VCHR; 1341 break; 1342 case S_IFBLK: 1343 vattr.va_type = VBLK; 1344 break; 1345 case S_IFWHT: 1346 whiteout = 1; 1347 break; 1348 default: 1349 panic("kern_mknod: invalid mode"); 1350 } 1351 } 1352 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1353 NDFREE(&nd, NDF_ONLY_PNBUF); 1354 vput(nd.ni_dvp); 1355 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1356 return (error); 1357 goto restart; 1358 } 1359 #ifdef MAC 1360 if (error == 0 && !whiteout) 1361 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1362 &nd.ni_cnd, &vattr); 1363 #endif 1364 if (error == 0) { 1365 if (whiteout) 1366 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1367 else { 1368 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1369 &nd.ni_cnd, &vattr); 1370 } 1371 } 1372 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1373 true); 1374 vn_finished_write(mp); 1375 NDFREE(&nd, NDF_ONLY_PNBUF); 1376 if (error == ERELOOKUP) 1377 goto restart; 1378 return (error); 1379 } 1380 1381 /* 1382 * Create a named pipe. 1383 */ 1384 #ifndef _SYS_SYSPROTO_H_ 1385 struct mkfifo_args { 1386 char *path; 1387 int mode; 1388 }; 1389 #endif 1390 int 1391 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1392 { 1393 1394 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1395 uap->mode)); 1396 } 1397 1398 #ifndef _SYS_SYSPROTO_H_ 1399 struct mkfifoat_args { 1400 int fd; 1401 char *path; 1402 mode_t mode; 1403 }; 1404 #endif 1405 int 1406 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1407 { 1408 1409 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1410 uap->mode)); 1411 } 1412 1413 int 1414 kern_mkfifoat(struct thread *td, int fd, const char *path, 1415 enum uio_seg pathseg, int mode) 1416 { 1417 struct mount *mp; 1418 struct vattr vattr; 1419 struct nameidata nd; 1420 int error; 1421 1422 AUDIT_ARG_MODE(mode); 1423 restart: 1424 bwillwrite(); 1425 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1426 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1427 td); 1428 if ((error = namei(&nd)) != 0) 1429 return (error); 1430 if (nd.ni_vp != NULL) { 1431 NDFREE(&nd, NDF_ONLY_PNBUF); 1432 if (nd.ni_vp == nd.ni_dvp) 1433 vrele(nd.ni_dvp); 1434 else 1435 vput(nd.ni_dvp); 1436 vrele(nd.ni_vp); 1437 return (EEXIST); 1438 } 1439 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1440 NDFREE(&nd, NDF_ONLY_PNBUF); 1441 vput(nd.ni_dvp); 1442 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1443 return (error); 1444 goto restart; 1445 } 1446 VATTR_NULL(&vattr); 1447 vattr.va_type = VFIFO; 1448 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1449 #ifdef MAC 1450 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1451 &vattr); 1452 if (error != 0) 1453 goto out; 1454 #endif 1455 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1456 #ifdef MAC 1457 out: 1458 #endif 1459 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1460 vn_finished_write(mp); 1461 NDFREE(&nd, NDF_ONLY_PNBUF); 1462 if (error == ERELOOKUP) 1463 goto restart; 1464 return (error); 1465 } 1466 1467 /* 1468 * Make a hard file link. 1469 */ 1470 #ifndef _SYS_SYSPROTO_H_ 1471 struct link_args { 1472 char *path; 1473 char *link; 1474 }; 1475 #endif 1476 int 1477 sys_link(struct thread *td, struct link_args *uap) 1478 { 1479 1480 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1481 UIO_USERSPACE, FOLLOW)); 1482 } 1483 1484 #ifndef _SYS_SYSPROTO_H_ 1485 struct linkat_args { 1486 int fd1; 1487 char *path1; 1488 int fd2; 1489 char *path2; 1490 int flag; 1491 }; 1492 #endif 1493 int 1494 sys_linkat(struct thread *td, struct linkat_args *uap) 1495 { 1496 int flag; 1497 1498 flag = uap->flag; 1499 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH)) != 0) 1500 return (EINVAL); 1501 1502 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1503 UIO_USERSPACE, at2cnpflags(flag, AT_SYMLINK_FOLLOW | 1504 AT_RESOLVE_BENEATH))); 1505 } 1506 1507 int hardlink_check_uid = 0; 1508 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1509 &hardlink_check_uid, 0, 1510 "Unprivileged processes cannot create hard links to files owned by other " 1511 "users"); 1512 static int hardlink_check_gid = 0; 1513 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1514 &hardlink_check_gid, 0, 1515 "Unprivileged processes cannot create hard links to files owned by other " 1516 "groups"); 1517 1518 static int 1519 can_hardlink(struct vnode *vp, struct ucred *cred) 1520 { 1521 struct vattr va; 1522 int error; 1523 1524 if (!hardlink_check_uid && !hardlink_check_gid) 1525 return (0); 1526 1527 error = VOP_GETATTR(vp, &va, cred); 1528 if (error != 0) 1529 return (error); 1530 1531 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1532 error = priv_check_cred(cred, PRIV_VFS_LINK); 1533 if (error != 0) 1534 return (error); 1535 } 1536 1537 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1538 error = priv_check_cred(cred, PRIV_VFS_LINK); 1539 if (error != 0) 1540 return (error); 1541 } 1542 1543 return (0); 1544 } 1545 1546 int 1547 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1548 const char *path2, enum uio_seg segflag, int follow) 1549 { 1550 struct nameidata nd; 1551 int error; 1552 1553 do { 1554 bwillwrite(); 1555 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag, 1556 path1, fd1, &cap_linkat_source_rights, td); 1557 if ((error = namei(&nd)) != 0) 1558 return (error); 1559 NDFREE(&nd, NDF_ONLY_PNBUF); 1560 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1561 } while (error == EAGAIN || error == ERELOOKUP); 1562 return (error); 1563 } 1564 1565 static int 1566 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1567 enum uio_seg segflag) 1568 { 1569 struct nameidata nd; 1570 struct mount *mp; 1571 int error; 1572 1573 if (vp->v_type == VDIR) { 1574 vrele(vp); 1575 return (EPERM); /* POSIX */ 1576 } 1577 NDINIT_ATRIGHTS(&nd, CREATE, 1578 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1579 &cap_linkat_target_rights, td); 1580 if ((error = namei(&nd)) == 0) { 1581 if (nd.ni_vp != NULL) { 1582 NDFREE(&nd, NDF_ONLY_PNBUF); 1583 if (nd.ni_dvp == nd.ni_vp) 1584 vrele(nd.ni_dvp); 1585 else 1586 vput(nd.ni_dvp); 1587 vrele(nd.ni_vp); 1588 vrele(vp); 1589 return (EEXIST); 1590 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1591 /* 1592 * Cross-device link. No need to recheck 1593 * vp->v_type, since it cannot change, except 1594 * to VBAD. 1595 */ 1596 NDFREE(&nd, NDF_ONLY_PNBUF); 1597 vput(nd.ni_dvp); 1598 vrele(vp); 1599 return (EXDEV); 1600 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1601 error = can_hardlink(vp, td->td_ucred); 1602 #ifdef MAC 1603 if (error == 0) 1604 error = mac_vnode_check_link(td->td_ucred, 1605 nd.ni_dvp, vp, &nd.ni_cnd); 1606 #endif 1607 if (error != 0) { 1608 vput(vp); 1609 vput(nd.ni_dvp); 1610 NDFREE(&nd, NDF_ONLY_PNBUF); 1611 return (error); 1612 } 1613 error = vn_start_write(vp, &mp, V_NOWAIT); 1614 if (error != 0) { 1615 vput(vp); 1616 vput(nd.ni_dvp); 1617 NDFREE(&nd, NDF_ONLY_PNBUF); 1618 error = vn_start_write(NULL, &mp, 1619 V_XSLEEP | PCATCH); 1620 if (error != 0) 1621 return (error); 1622 return (EAGAIN); 1623 } 1624 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1625 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1626 vn_finished_write(mp); 1627 NDFREE(&nd, NDF_ONLY_PNBUF); 1628 vp = NULL; 1629 } else { 1630 vput(nd.ni_dvp); 1631 NDFREE(&nd, NDF_ONLY_PNBUF); 1632 vrele(vp); 1633 return (EAGAIN); 1634 } 1635 } 1636 if (vp != NULL) 1637 vrele(vp); 1638 return (error); 1639 } 1640 1641 /* 1642 * Make a symbolic link. 1643 */ 1644 #ifndef _SYS_SYSPROTO_H_ 1645 struct symlink_args { 1646 char *path; 1647 char *link; 1648 }; 1649 #endif 1650 int 1651 sys_symlink(struct thread *td, struct symlink_args *uap) 1652 { 1653 1654 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1655 UIO_USERSPACE)); 1656 } 1657 1658 #ifndef _SYS_SYSPROTO_H_ 1659 struct symlinkat_args { 1660 char *path; 1661 int fd; 1662 char *path2; 1663 }; 1664 #endif 1665 int 1666 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1667 { 1668 1669 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1670 UIO_USERSPACE)); 1671 } 1672 1673 int 1674 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1675 enum uio_seg segflg) 1676 { 1677 struct mount *mp; 1678 struct vattr vattr; 1679 const char *syspath; 1680 char *tmppath; 1681 struct nameidata nd; 1682 int error; 1683 1684 if (segflg == UIO_SYSSPACE) { 1685 syspath = path1; 1686 } else { 1687 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1688 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1689 goto out; 1690 syspath = tmppath; 1691 } 1692 AUDIT_ARG_TEXT(syspath); 1693 restart: 1694 bwillwrite(); 1695 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1696 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1697 td); 1698 if ((error = namei(&nd)) != 0) 1699 goto out; 1700 if (nd.ni_vp) { 1701 NDFREE(&nd, NDF_ONLY_PNBUF); 1702 if (nd.ni_vp == nd.ni_dvp) 1703 vrele(nd.ni_dvp); 1704 else 1705 vput(nd.ni_dvp); 1706 vrele(nd.ni_vp); 1707 nd.ni_vp = NULL; 1708 error = EEXIST; 1709 goto out; 1710 } 1711 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1712 NDFREE(&nd, NDF_ONLY_PNBUF); 1713 vput(nd.ni_dvp); 1714 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1715 goto out; 1716 goto restart; 1717 } 1718 VATTR_NULL(&vattr); 1719 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1720 #ifdef MAC 1721 vattr.va_type = VLNK; 1722 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1723 &vattr); 1724 if (error != 0) 1725 goto out2; 1726 #endif 1727 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1728 #ifdef MAC 1729 out2: 1730 #endif 1731 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1732 vn_finished_write(mp); 1733 NDFREE(&nd, NDF_ONLY_PNBUF); 1734 if (error == ERELOOKUP) 1735 goto restart; 1736 out: 1737 if (segflg != UIO_SYSSPACE) 1738 uma_zfree(namei_zone, tmppath); 1739 return (error); 1740 } 1741 1742 /* 1743 * Delete a whiteout from the filesystem. 1744 */ 1745 #ifndef _SYS_SYSPROTO_H_ 1746 struct undelete_args { 1747 char *path; 1748 }; 1749 #endif 1750 int 1751 sys_undelete(struct thread *td, struct undelete_args *uap) 1752 { 1753 struct mount *mp; 1754 struct nameidata nd; 1755 int error; 1756 1757 restart: 1758 bwillwrite(); 1759 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1760 UIO_USERSPACE, uap->path, td); 1761 error = namei(&nd); 1762 if (error != 0) 1763 return (error); 1764 1765 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1766 NDFREE(&nd, NDF_ONLY_PNBUF); 1767 if (nd.ni_vp == nd.ni_dvp) 1768 vrele(nd.ni_dvp); 1769 else 1770 vput(nd.ni_dvp); 1771 if (nd.ni_vp) 1772 vrele(nd.ni_vp); 1773 return (EEXIST); 1774 } 1775 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1776 NDFREE(&nd, NDF_ONLY_PNBUF); 1777 vput(nd.ni_dvp); 1778 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1779 return (error); 1780 goto restart; 1781 } 1782 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1783 NDFREE(&nd, NDF_ONLY_PNBUF); 1784 vput(nd.ni_dvp); 1785 vn_finished_write(mp); 1786 if (error == ERELOOKUP) 1787 goto restart; 1788 return (error); 1789 } 1790 1791 /* 1792 * Delete a name from the filesystem. 1793 */ 1794 #ifndef _SYS_SYSPROTO_H_ 1795 struct unlink_args { 1796 char *path; 1797 }; 1798 #endif 1799 int 1800 sys_unlink(struct thread *td, struct unlink_args *uap) 1801 { 1802 1803 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1804 0, 0)); 1805 } 1806 1807 static int 1808 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1809 int flag, enum uio_seg pathseg, ino_t oldinum) 1810 { 1811 1812 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1813 return (EINVAL); 1814 1815 if ((flag & AT_REMOVEDIR) != 0) 1816 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1817 1818 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1819 } 1820 1821 #ifndef _SYS_SYSPROTO_H_ 1822 struct unlinkat_args { 1823 int fd; 1824 char *path; 1825 int flag; 1826 }; 1827 #endif 1828 int 1829 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1830 { 1831 1832 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1833 UIO_USERSPACE, 0)); 1834 } 1835 1836 #ifndef _SYS_SYSPROTO_H_ 1837 struct funlinkat_args { 1838 int dfd; 1839 const char *path; 1840 int fd; 1841 int flag; 1842 }; 1843 #endif 1844 int 1845 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1846 { 1847 1848 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1849 UIO_USERSPACE, 0)); 1850 } 1851 1852 int 1853 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1854 enum uio_seg pathseg, int flag, ino_t oldinum) 1855 { 1856 struct mount *mp; 1857 struct file *fp; 1858 struct vnode *vp; 1859 struct nameidata nd; 1860 struct stat sb; 1861 int error; 1862 1863 fp = NULL; 1864 if (fd != FD_NONE) { 1865 error = getvnode(td, fd, &cap_no_rights, &fp); 1866 if (error != 0) 1867 return (error); 1868 } 1869 1870 restart: 1871 bwillwrite(); 1872 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1873 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1874 pathseg, path, dfd, &cap_unlinkat_rights, td); 1875 if ((error = namei(&nd)) != 0) { 1876 if (error == EINVAL) 1877 error = EPERM; 1878 goto fdout; 1879 } 1880 vp = nd.ni_vp; 1881 if (vp->v_type == VDIR && oldinum == 0) { 1882 error = EPERM; /* POSIX */ 1883 } else if (oldinum != 0 && 1884 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1885 sb.st_ino != oldinum) { 1886 error = EIDRM; /* Identifier removed */ 1887 } else if (fp != NULL && fp->f_vnode != vp) { 1888 if (VN_IS_DOOMED(fp->f_vnode)) 1889 error = EBADF; 1890 else 1891 error = EDEADLK; 1892 } else { 1893 /* 1894 * The root of a mounted filesystem cannot be deleted. 1895 * 1896 * XXX: can this only be a VDIR case? 1897 */ 1898 if (vp->v_vflag & VV_ROOT) 1899 error = EBUSY; 1900 } 1901 if (error == 0) { 1902 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1903 NDFREE(&nd, NDF_ONLY_PNBUF); 1904 vput(nd.ni_dvp); 1905 if (vp == nd.ni_dvp) 1906 vrele(vp); 1907 else 1908 vput(vp); 1909 if ((error = vn_start_write(NULL, &mp, 1910 V_XSLEEP | PCATCH)) != 0) { 1911 goto fdout; 1912 } 1913 goto restart; 1914 } 1915 #ifdef MAC 1916 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1917 &nd.ni_cnd); 1918 if (error != 0) 1919 goto out; 1920 #endif 1921 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1922 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1923 #ifdef MAC 1924 out: 1925 #endif 1926 vn_finished_write(mp); 1927 } 1928 NDFREE(&nd, NDF_ONLY_PNBUF); 1929 vput(nd.ni_dvp); 1930 if (vp == nd.ni_dvp) 1931 vrele(vp); 1932 else 1933 vput(vp); 1934 if (error == ERELOOKUP) 1935 goto restart; 1936 fdout: 1937 if (fp != NULL) 1938 fdrop(fp, td); 1939 return (error); 1940 } 1941 1942 /* 1943 * Reposition read/write file offset. 1944 */ 1945 #ifndef _SYS_SYSPROTO_H_ 1946 struct lseek_args { 1947 int fd; 1948 int pad; 1949 off_t offset; 1950 int whence; 1951 }; 1952 #endif 1953 int 1954 sys_lseek(struct thread *td, struct lseek_args *uap) 1955 { 1956 1957 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1958 } 1959 1960 int 1961 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1962 { 1963 struct file *fp; 1964 int error; 1965 1966 AUDIT_ARG_FD(fd); 1967 error = fget(td, fd, &cap_seek_rights, &fp); 1968 if (error != 0) 1969 return (error); 1970 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1971 fo_seek(fp, offset, whence, td) : ESPIPE; 1972 fdrop(fp, td); 1973 return (error); 1974 } 1975 1976 #if defined(COMPAT_43) 1977 /* 1978 * Reposition read/write file offset. 1979 */ 1980 #ifndef _SYS_SYSPROTO_H_ 1981 struct olseek_args { 1982 int fd; 1983 long offset; 1984 int whence; 1985 }; 1986 #endif 1987 int 1988 olseek(struct thread *td, struct olseek_args *uap) 1989 { 1990 1991 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1992 } 1993 #endif /* COMPAT_43 */ 1994 1995 #if defined(COMPAT_FREEBSD6) 1996 /* Version with the 'pad' argument */ 1997 int 1998 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1999 { 2000 2001 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2002 } 2003 #endif 2004 2005 /* 2006 * Check access permissions using passed credentials. 2007 */ 2008 static int 2009 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2010 struct thread *td) 2011 { 2012 accmode_t accmode; 2013 int error; 2014 2015 /* Flags == 0 means only check for existence. */ 2016 if (user_flags == 0) 2017 return (0); 2018 2019 accmode = 0; 2020 if (user_flags & R_OK) 2021 accmode |= VREAD; 2022 if (user_flags & W_OK) 2023 accmode |= VWRITE; 2024 if (user_flags & X_OK) 2025 accmode |= VEXEC; 2026 #ifdef MAC 2027 error = mac_vnode_check_access(cred, vp, accmode); 2028 if (error != 0) 2029 return (error); 2030 #endif 2031 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2032 error = VOP_ACCESS(vp, accmode, cred, td); 2033 return (error); 2034 } 2035 2036 /* 2037 * Check access permissions using "real" credentials. 2038 */ 2039 #ifndef _SYS_SYSPROTO_H_ 2040 struct access_args { 2041 char *path; 2042 int amode; 2043 }; 2044 #endif 2045 int 2046 sys_access(struct thread *td, struct access_args *uap) 2047 { 2048 2049 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2050 0, uap->amode)); 2051 } 2052 2053 #ifndef _SYS_SYSPROTO_H_ 2054 struct faccessat_args { 2055 int dirfd; 2056 char *path; 2057 int amode; 2058 int flag; 2059 } 2060 #endif 2061 int 2062 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2063 { 2064 2065 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2066 uap->amode)); 2067 } 2068 2069 int 2070 kern_accessat(struct thread *td, int fd, const char *path, 2071 enum uio_seg pathseg, int flag, int amode) 2072 { 2073 struct ucred *cred, *usecred; 2074 struct vnode *vp; 2075 struct nameidata nd; 2076 int error; 2077 2078 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH)) != 0) 2079 return (EINVAL); 2080 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2081 return (EINVAL); 2082 2083 /* 2084 * Create and modify a temporary credential instead of one that 2085 * is potentially shared (if we need one). 2086 */ 2087 cred = td->td_ucred; 2088 if ((flag & AT_EACCESS) == 0 && 2089 ((cred->cr_uid != cred->cr_ruid || 2090 cred->cr_rgid != cred->cr_groups[0]))) { 2091 usecred = crdup(cred); 2092 usecred->cr_uid = cred->cr_ruid; 2093 usecred->cr_groups[0] = cred->cr_rgid; 2094 td->td_ucred = usecred; 2095 } else 2096 usecred = cred; 2097 AUDIT_ARG_VALUE(amode); 2098 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2099 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH), 2100 pathseg, path, fd, &cap_fstat_rights, td); 2101 if ((error = namei(&nd)) != 0) 2102 goto out; 2103 vp = nd.ni_vp; 2104 2105 error = vn_access(vp, amode, usecred, td); 2106 NDFREE_NOTHING(&nd); 2107 vput(vp); 2108 out: 2109 if (usecred != cred) { 2110 td->td_ucred = cred; 2111 crfree(usecred); 2112 } 2113 return (error); 2114 } 2115 2116 /* 2117 * Check access permissions using "effective" credentials. 2118 */ 2119 #ifndef _SYS_SYSPROTO_H_ 2120 struct eaccess_args { 2121 char *path; 2122 int amode; 2123 }; 2124 #endif 2125 int 2126 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2127 { 2128 2129 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2130 AT_EACCESS, uap->amode)); 2131 } 2132 2133 #if defined(COMPAT_43) 2134 /* 2135 * Get file status; this version follows links. 2136 */ 2137 #ifndef _SYS_SYSPROTO_H_ 2138 struct ostat_args { 2139 char *path; 2140 struct ostat *ub; 2141 }; 2142 #endif 2143 int 2144 ostat(struct thread *td, struct ostat_args *uap) 2145 { 2146 struct stat sb; 2147 struct ostat osb; 2148 int error; 2149 2150 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2151 &sb, NULL); 2152 if (error != 0) 2153 return (error); 2154 cvtstat(&sb, &osb); 2155 return (copyout(&osb, uap->ub, sizeof (osb))); 2156 } 2157 2158 /* 2159 * Get file status; this version does not follow links. 2160 */ 2161 #ifndef _SYS_SYSPROTO_H_ 2162 struct olstat_args { 2163 char *path; 2164 struct ostat *ub; 2165 }; 2166 #endif 2167 int 2168 olstat(struct thread *td, struct olstat_args *uap) 2169 { 2170 struct stat sb; 2171 struct ostat osb; 2172 int error; 2173 2174 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2175 UIO_USERSPACE, &sb, NULL); 2176 if (error != 0) 2177 return (error); 2178 cvtstat(&sb, &osb); 2179 return (copyout(&osb, uap->ub, sizeof (osb))); 2180 } 2181 2182 /* 2183 * Convert from an old to a new stat structure. 2184 * XXX: many values are blindly truncated. 2185 */ 2186 void 2187 cvtstat(struct stat *st, struct ostat *ost) 2188 { 2189 2190 bzero(ost, sizeof(*ost)); 2191 ost->st_dev = st->st_dev; 2192 ost->st_ino = st->st_ino; 2193 ost->st_mode = st->st_mode; 2194 ost->st_nlink = st->st_nlink; 2195 ost->st_uid = st->st_uid; 2196 ost->st_gid = st->st_gid; 2197 ost->st_rdev = st->st_rdev; 2198 ost->st_size = MIN(st->st_size, INT32_MAX); 2199 ost->st_atim = st->st_atim; 2200 ost->st_mtim = st->st_mtim; 2201 ost->st_ctim = st->st_ctim; 2202 ost->st_blksize = st->st_blksize; 2203 ost->st_blocks = st->st_blocks; 2204 ost->st_flags = st->st_flags; 2205 ost->st_gen = st->st_gen; 2206 } 2207 #endif /* COMPAT_43 */ 2208 2209 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2210 int ino64_trunc_error; 2211 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2212 &ino64_trunc_error, 0, 2213 "Error on truncation of device, file or inode number, or link count"); 2214 2215 int 2216 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2217 { 2218 2219 ost->st_dev = st->st_dev; 2220 if (ost->st_dev != st->st_dev) { 2221 switch (ino64_trunc_error) { 2222 default: 2223 /* 2224 * Since dev_t is almost raw, don't clamp to the 2225 * maximum for case 2, but ignore the error. 2226 */ 2227 break; 2228 case 1: 2229 return (EOVERFLOW); 2230 } 2231 } 2232 ost->st_ino = st->st_ino; 2233 if (ost->st_ino != st->st_ino) { 2234 switch (ino64_trunc_error) { 2235 default: 2236 case 0: 2237 break; 2238 case 1: 2239 return (EOVERFLOW); 2240 case 2: 2241 ost->st_ino = UINT32_MAX; 2242 break; 2243 } 2244 } 2245 ost->st_mode = st->st_mode; 2246 ost->st_nlink = st->st_nlink; 2247 if (ost->st_nlink != st->st_nlink) { 2248 switch (ino64_trunc_error) { 2249 default: 2250 case 0: 2251 break; 2252 case 1: 2253 return (EOVERFLOW); 2254 case 2: 2255 ost->st_nlink = UINT16_MAX; 2256 break; 2257 } 2258 } 2259 ost->st_uid = st->st_uid; 2260 ost->st_gid = st->st_gid; 2261 ost->st_rdev = st->st_rdev; 2262 if (ost->st_rdev != st->st_rdev) { 2263 switch (ino64_trunc_error) { 2264 default: 2265 break; 2266 case 1: 2267 return (EOVERFLOW); 2268 } 2269 } 2270 ost->st_atim = st->st_atim; 2271 ost->st_mtim = st->st_mtim; 2272 ost->st_ctim = st->st_ctim; 2273 ost->st_size = st->st_size; 2274 ost->st_blocks = st->st_blocks; 2275 ost->st_blksize = st->st_blksize; 2276 ost->st_flags = st->st_flags; 2277 ost->st_gen = st->st_gen; 2278 ost->st_lspare = 0; 2279 ost->st_birthtim = st->st_birthtim; 2280 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2281 sizeof(*ost) - offsetof(struct freebsd11_stat, 2282 st_birthtim) - sizeof(ost->st_birthtim)); 2283 return (0); 2284 } 2285 2286 int 2287 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2288 { 2289 struct stat sb; 2290 struct freebsd11_stat osb; 2291 int error; 2292 2293 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2294 &sb, NULL); 2295 if (error != 0) 2296 return (error); 2297 error = freebsd11_cvtstat(&sb, &osb); 2298 if (error == 0) 2299 error = copyout(&osb, uap->ub, sizeof(osb)); 2300 return (error); 2301 } 2302 2303 int 2304 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2305 { 2306 struct stat sb; 2307 struct freebsd11_stat osb; 2308 int error; 2309 2310 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2311 UIO_USERSPACE, &sb, NULL); 2312 if (error != 0) 2313 return (error); 2314 error = freebsd11_cvtstat(&sb, &osb); 2315 if (error == 0) 2316 error = copyout(&osb, uap->ub, sizeof(osb)); 2317 return (error); 2318 } 2319 2320 int 2321 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2322 { 2323 struct fhandle fh; 2324 struct stat sb; 2325 struct freebsd11_stat osb; 2326 int error; 2327 2328 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2329 if (error != 0) 2330 return (error); 2331 error = kern_fhstat(td, fh, &sb); 2332 if (error != 0) 2333 return (error); 2334 error = freebsd11_cvtstat(&sb, &osb); 2335 if (error == 0) 2336 error = copyout(&osb, uap->sb, sizeof(osb)); 2337 return (error); 2338 } 2339 2340 int 2341 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2342 { 2343 struct stat sb; 2344 struct freebsd11_stat osb; 2345 int error; 2346 2347 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2348 UIO_USERSPACE, &sb, NULL); 2349 if (error != 0) 2350 return (error); 2351 error = freebsd11_cvtstat(&sb, &osb); 2352 if (error == 0) 2353 error = copyout(&osb, uap->buf, sizeof(osb)); 2354 return (error); 2355 } 2356 #endif /* COMPAT_FREEBSD11 */ 2357 2358 /* 2359 * Get file status 2360 */ 2361 #ifndef _SYS_SYSPROTO_H_ 2362 struct fstatat_args { 2363 int fd; 2364 char *path; 2365 struct stat *buf; 2366 int flag; 2367 } 2368 #endif 2369 int 2370 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2371 { 2372 struct stat sb; 2373 int error; 2374 2375 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2376 UIO_USERSPACE, &sb, NULL); 2377 if (error == 0) 2378 error = copyout(&sb, uap->buf, sizeof (sb)); 2379 return (error); 2380 } 2381 2382 int 2383 kern_statat(struct thread *td, int flag, int fd, const char *path, 2384 enum uio_seg pathseg, struct stat *sbp, 2385 void (*hook)(struct vnode *vp, struct stat *sbp)) 2386 { 2387 struct nameidata nd; 2388 int error; 2389 2390 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 2391 return (EINVAL); 2392 2393 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2394 AT_SYMLINK_NOFOLLOW) | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2395 pathseg, path, fd, &cap_fstat_rights, td); 2396 2397 if ((error = namei(&nd)) != 0) 2398 return (error); 2399 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td); 2400 if (error == 0) { 2401 if (__predict_false(hook != NULL)) 2402 hook(nd.ni_vp, sbp); 2403 } 2404 NDFREE_NOTHING(&nd); 2405 vput(nd.ni_vp); 2406 #ifdef __STAT_TIME_T_EXT 2407 sbp->st_atim_ext = 0; 2408 sbp->st_mtim_ext = 0; 2409 sbp->st_ctim_ext = 0; 2410 sbp->st_btim_ext = 0; 2411 #endif 2412 #ifdef KTRACE 2413 if (KTRPOINT(td, KTR_STRUCT)) 2414 ktrstat_error(sbp, error); 2415 #endif 2416 return (error); 2417 } 2418 2419 #if defined(COMPAT_FREEBSD11) 2420 /* 2421 * Implementation of the NetBSD [l]stat() functions. 2422 */ 2423 void 2424 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2425 { 2426 2427 bzero(nsb, sizeof(*nsb)); 2428 nsb->st_dev = sb->st_dev; 2429 nsb->st_ino = sb->st_ino; 2430 nsb->st_mode = sb->st_mode; 2431 nsb->st_nlink = sb->st_nlink; 2432 nsb->st_uid = sb->st_uid; 2433 nsb->st_gid = sb->st_gid; 2434 nsb->st_rdev = sb->st_rdev; 2435 nsb->st_atim = sb->st_atim; 2436 nsb->st_mtim = sb->st_mtim; 2437 nsb->st_ctim = sb->st_ctim; 2438 nsb->st_size = sb->st_size; 2439 nsb->st_blocks = sb->st_blocks; 2440 nsb->st_blksize = sb->st_blksize; 2441 nsb->st_flags = sb->st_flags; 2442 nsb->st_gen = sb->st_gen; 2443 nsb->st_birthtim = sb->st_birthtim; 2444 } 2445 2446 #ifndef _SYS_SYSPROTO_H_ 2447 struct freebsd11_nstat_args { 2448 char *path; 2449 struct nstat *ub; 2450 }; 2451 #endif 2452 int 2453 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2454 { 2455 struct stat sb; 2456 struct nstat nsb; 2457 int error; 2458 2459 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2460 &sb, NULL); 2461 if (error != 0) 2462 return (error); 2463 freebsd11_cvtnstat(&sb, &nsb); 2464 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2465 } 2466 2467 /* 2468 * NetBSD lstat. Get file status; this version does not follow links. 2469 */ 2470 #ifndef _SYS_SYSPROTO_H_ 2471 struct freebsd11_nlstat_args { 2472 char *path; 2473 struct nstat *ub; 2474 }; 2475 #endif 2476 int 2477 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2478 { 2479 struct stat sb; 2480 struct nstat nsb; 2481 int error; 2482 2483 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2484 UIO_USERSPACE, &sb, NULL); 2485 if (error != 0) 2486 return (error); 2487 freebsd11_cvtnstat(&sb, &nsb); 2488 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2489 } 2490 #endif /* COMPAT_FREEBSD11 */ 2491 2492 /* 2493 * Get configurable pathname variables. 2494 */ 2495 #ifndef _SYS_SYSPROTO_H_ 2496 struct pathconf_args { 2497 char *path; 2498 int name; 2499 }; 2500 #endif 2501 int 2502 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2503 { 2504 long value; 2505 int error; 2506 2507 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2508 &value); 2509 if (error == 0) 2510 td->td_retval[0] = value; 2511 return (error); 2512 } 2513 2514 #ifndef _SYS_SYSPROTO_H_ 2515 struct lpathconf_args { 2516 char *path; 2517 int name; 2518 }; 2519 #endif 2520 int 2521 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2522 { 2523 long value; 2524 int error; 2525 2526 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2527 NOFOLLOW, &value); 2528 if (error == 0) 2529 td->td_retval[0] = value; 2530 return (error); 2531 } 2532 2533 int 2534 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2535 int name, u_long flags, long *valuep) 2536 { 2537 struct nameidata nd; 2538 int error; 2539 2540 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2541 pathseg, path, td); 2542 if ((error = namei(&nd)) != 0) 2543 return (error); 2544 NDFREE_NOTHING(&nd); 2545 2546 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2547 vput(nd.ni_vp); 2548 return (error); 2549 } 2550 2551 /* 2552 * Return target name of a symbolic link. 2553 */ 2554 #ifndef _SYS_SYSPROTO_H_ 2555 struct readlink_args { 2556 char *path; 2557 char *buf; 2558 size_t count; 2559 }; 2560 #endif 2561 int 2562 sys_readlink(struct thread *td, struct readlink_args *uap) 2563 { 2564 2565 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2566 uap->buf, UIO_USERSPACE, uap->count)); 2567 } 2568 #ifndef _SYS_SYSPROTO_H_ 2569 struct readlinkat_args { 2570 int fd; 2571 char *path; 2572 char *buf; 2573 size_t bufsize; 2574 }; 2575 #endif 2576 int 2577 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2578 { 2579 2580 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2581 uap->buf, UIO_USERSPACE, uap->bufsize)); 2582 } 2583 2584 int 2585 kern_readlinkat(struct thread *td, int fd, const char *path, 2586 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2587 { 2588 struct vnode *vp; 2589 struct nameidata nd; 2590 int error; 2591 2592 if (count > IOSIZE_MAX) 2593 return (EINVAL); 2594 2595 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2596 pathseg, path, fd, td); 2597 2598 if ((error = namei(&nd)) != 0) 2599 return (error); 2600 NDFREE_NOTHING(&nd); 2601 vp = nd.ni_vp; 2602 2603 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2604 vput(vp); 2605 2606 return (error); 2607 } 2608 2609 /* 2610 * Helper function to readlink from a vnode 2611 */ 2612 static int 2613 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2614 struct thread *td) 2615 { 2616 struct iovec aiov; 2617 struct uio auio; 2618 int error; 2619 2620 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2621 #ifdef MAC 2622 error = mac_vnode_check_readlink(td->td_ucred, vp); 2623 if (error != 0) 2624 return (error); 2625 #endif 2626 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2627 return (EINVAL); 2628 2629 aiov.iov_base = buf; 2630 aiov.iov_len = count; 2631 auio.uio_iov = &aiov; 2632 auio.uio_iovcnt = 1; 2633 auio.uio_offset = 0; 2634 auio.uio_rw = UIO_READ; 2635 auio.uio_segflg = bufseg; 2636 auio.uio_td = td; 2637 auio.uio_resid = count; 2638 error = VOP_READLINK(vp, &auio, td->td_ucred); 2639 td->td_retval[0] = count - auio.uio_resid; 2640 return (error); 2641 } 2642 2643 /* 2644 * Common implementation code for chflags() and fchflags(). 2645 */ 2646 static int 2647 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2648 { 2649 struct mount *mp; 2650 struct vattr vattr; 2651 int error; 2652 2653 /* We can't support the value matching VNOVAL. */ 2654 if (flags == VNOVAL) 2655 return (EOPNOTSUPP); 2656 2657 /* 2658 * Prevent non-root users from setting flags on devices. When 2659 * a device is reused, users can retain ownership of the device 2660 * if they are allowed to set flags and programs assume that 2661 * chown can't fail when done as root. 2662 */ 2663 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2664 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2665 if (error != 0) 2666 return (error); 2667 } 2668 2669 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2670 return (error); 2671 VATTR_NULL(&vattr); 2672 vattr.va_flags = flags; 2673 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2674 #ifdef MAC 2675 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2676 if (error == 0) 2677 #endif 2678 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2679 VOP_UNLOCK(vp); 2680 vn_finished_write(mp); 2681 return (error); 2682 } 2683 2684 /* 2685 * Change flags of a file given a path name. 2686 */ 2687 #ifndef _SYS_SYSPROTO_H_ 2688 struct chflags_args { 2689 const char *path; 2690 u_long flags; 2691 }; 2692 #endif 2693 int 2694 sys_chflags(struct thread *td, struct chflags_args *uap) 2695 { 2696 2697 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2698 uap->flags, 0)); 2699 } 2700 2701 #ifndef _SYS_SYSPROTO_H_ 2702 struct chflagsat_args { 2703 int fd; 2704 const char *path; 2705 u_long flags; 2706 int atflag; 2707 } 2708 #endif 2709 int 2710 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2711 { 2712 2713 if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 2714 return (EINVAL); 2715 2716 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2717 uap->flags, uap->atflag)); 2718 } 2719 2720 /* 2721 * Same as chflags() but doesn't follow symlinks. 2722 */ 2723 #ifndef _SYS_SYSPROTO_H_ 2724 struct lchflags_args { 2725 const char *path; 2726 u_long flags; 2727 }; 2728 #endif 2729 int 2730 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2731 { 2732 2733 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2734 uap->flags, AT_SYMLINK_NOFOLLOW)); 2735 } 2736 2737 static int 2738 kern_chflagsat(struct thread *td, int fd, const char *path, 2739 enum uio_seg pathseg, u_long flags, int atflag) 2740 { 2741 struct nameidata nd; 2742 int error; 2743 2744 AUDIT_ARG_FFLAGS(flags); 2745 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2746 AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, 2747 &cap_fchflags_rights, td); 2748 if ((error = namei(&nd)) != 0) 2749 return (error); 2750 NDFREE_NOTHING(&nd); 2751 error = setfflags(td, nd.ni_vp, flags); 2752 vrele(nd.ni_vp); 2753 return (error); 2754 } 2755 2756 /* 2757 * Change flags of a file given a file descriptor. 2758 */ 2759 #ifndef _SYS_SYSPROTO_H_ 2760 struct fchflags_args { 2761 int fd; 2762 u_long flags; 2763 }; 2764 #endif 2765 int 2766 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2767 { 2768 struct file *fp; 2769 int error; 2770 2771 AUDIT_ARG_FD(uap->fd); 2772 AUDIT_ARG_FFLAGS(uap->flags); 2773 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2774 &fp); 2775 if (error != 0) 2776 return (error); 2777 #ifdef AUDIT 2778 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2779 AUDIT_ARG_VNODE1(fp->f_vnode); 2780 VOP_UNLOCK(fp->f_vnode); 2781 #endif 2782 error = setfflags(td, fp->f_vnode, uap->flags); 2783 fdrop(fp, td); 2784 return (error); 2785 } 2786 2787 /* 2788 * Common implementation code for chmod(), lchmod() and fchmod(). 2789 */ 2790 int 2791 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2792 { 2793 struct mount *mp; 2794 struct vattr vattr; 2795 int error; 2796 2797 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2798 return (error); 2799 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2800 VATTR_NULL(&vattr); 2801 vattr.va_mode = mode & ALLPERMS; 2802 #ifdef MAC 2803 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2804 if (error == 0) 2805 #endif 2806 error = VOP_SETATTR(vp, &vattr, cred); 2807 VOP_UNLOCK(vp); 2808 vn_finished_write(mp); 2809 return (error); 2810 } 2811 2812 /* 2813 * Change mode of a file given path name. 2814 */ 2815 #ifndef _SYS_SYSPROTO_H_ 2816 struct chmod_args { 2817 char *path; 2818 int mode; 2819 }; 2820 #endif 2821 int 2822 sys_chmod(struct thread *td, struct chmod_args *uap) 2823 { 2824 2825 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2826 uap->mode, 0)); 2827 } 2828 2829 #ifndef _SYS_SYSPROTO_H_ 2830 struct fchmodat_args { 2831 int dirfd; 2832 char *path; 2833 mode_t mode; 2834 int flag; 2835 } 2836 #endif 2837 int 2838 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2839 { 2840 2841 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 2842 return (EINVAL); 2843 2844 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2845 uap->mode, uap->flag)); 2846 } 2847 2848 /* 2849 * Change mode of a file given path name (don't follow links.) 2850 */ 2851 #ifndef _SYS_SYSPROTO_H_ 2852 struct lchmod_args { 2853 char *path; 2854 int mode; 2855 }; 2856 #endif 2857 int 2858 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2859 { 2860 2861 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2862 uap->mode, AT_SYMLINK_NOFOLLOW)); 2863 } 2864 2865 int 2866 kern_fchmodat(struct thread *td, int fd, const char *path, 2867 enum uio_seg pathseg, mode_t mode, int flag) 2868 { 2869 struct nameidata nd; 2870 int error; 2871 2872 AUDIT_ARG_MODE(mode); 2873 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2874 AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, 2875 &cap_fchmod_rights, td); 2876 if ((error = namei(&nd)) != 0) 2877 return (error); 2878 NDFREE_NOTHING(&nd); 2879 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2880 vrele(nd.ni_vp); 2881 return (error); 2882 } 2883 2884 /* 2885 * Change mode of a file given a file descriptor. 2886 */ 2887 #ifndef _SYS_SYSPROTO_H_ 2888 struct fchmod_args { 2889 int fd; 2890 int mode; 2891 }; 2892 #endif 2893 int 2894 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2895 { 2896 struct file *fp; 2897 int error; 2898 2899 AUDIT_ARG_FD(uap->fd); 2900 AUDIT_ARG_MODE(uap->mode); 2901 2902 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2903 if (error != 0) 2904 return (error); 2905 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2906 fdrop(fp, td); 2907 return (error); 2908 } 2909 2910 /* 2911 * Common implementation for chown(), lchown(), and fchown() 2912 */ 2913 int 2914 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2915 gid_t gid) 2916 { 2917 struct mount *mp; 2918 struct vattr vattr; 2919 int error; 2920 2921 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2922 return (error); 2923 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2924 VATTR_NULL(&vattr); 2925 vattr.va_uid = uid; 2926 vattr.va_gid = gid; 2927 #ifdef MAC 2928 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2929 vattr.va_gid); 2930 if (error == 0) 2931 #endif 2932 error = VOP_SETATTR(vp, &vattr, cred); 2933 VOP_UNLOCK(vp); 2934 vn_finished_write(mp); 2935 return (error); 2936 } 2937 2938 /* 2939 * Set ownership given a path name. 2940 */ 2941 #ifndef _SYS_SYSPROTO_H_ 2942 struct chown_args { 2943 char *path; 2944 int uid; 2945 int gid; 2946 }; 2947 #endif 2948 int 2949 sys_chown(struct thread *td, struct chown_args *uap) 2950 { 2951 2952 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2953 uap->gid, 0)); 2954 } 2955 2956 #ifndef _SYS_SYSPROTO_H_ 2957 struct fchownat_args { 2958 int fd; 2959 const char * path; 2960 uid_t uid; 2961 gid_t gid; 2962 int flag; 2963 }; 2964 #endif 2965 int 2966 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2967 { 2968 2969 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 2970 return (EINVAL); 2971 2972 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2973 uap->gid, uap->flag)); 2974 } 2975 2976 int 2977 kern_fchownat(struct thread *td, int fd, const char *path, 2978 enum uio_seg pathseg, int uid, int gid, int flag) 2979 { 2980 struct nameidata nd; 2981 int error; 2982 2983 AUDIT_ARG_OWNER(uid, gid); 2984 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2985 AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, 2986 &cap_fchown_rights, td); 2987 2988 if ((error = namei(&nd)) != 0) 2989 return (error); 2990 NDFREE_NOTHING(&nd); 2991 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2992 vrele(nd.ni_vp); 2993 return (error); 2994 } 2995 2996 /* 2997 * Set ownership given a path name, do not cross symlinks. 2998 */ 2999 #ifndef _SYS_SYSPROTO_H_ 3000 struct lchown_args { 3001 char *path; 3002 int uid; 3003 int gid; 3004 }; 3005 #endif 3006 int 3007 sys_lchown(struct thread *td, struct lchown_args *uap) 3008 { 3009 3010 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3011 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3012 } 3013 3014 /* 3015 * Set ownership given a file descriptor. 3016 */ 3017 #ifndef _SYS_SYSPROTO_H_ 3018 struct fchown_args { 3019 int fd; 3020 int uid; 3021 int gid; 3022 }; 3023 #endif 3024 int 3025 sys_fchown(struct thread *td, struct fchown_args *uap) 3026 { 3027 struct file *fp; 3028 int error; 3029 3030 AUDIT_ARG_FD(uap->fd); 3031 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3032 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3033 if (error != 0) 3034 return (error); 3035 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3036 fdrop(fp, td); 3037 return (error); 3038 } 3039 3040 /* 3041 * Common implementation code for utimes(), lutimes(), and futimes(). 3042 */ 3043 static int 3044 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3045 struct timespec *tsp) 3046 { 3047 struct timeval tv[2]; 3048 const struct timeval *tvp; 3049 int error; 3050 3051 if (usrtvp == NULL) { 3052 vfs_timestamp(&tsp[0]); 3053 tsp[1] = tsp[0]; 3054 } else { 3055 if (tvpseg == UIO_SYSSPACE) { 3056 tvp = usrtvp; 3057 } else { 3058 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3059 return (error); 3060 tvp = tv; 3061 } 3062 3063 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3064 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3065 return (EINVAL); 3066 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3067 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3068 } 3069 return (0); 3070 } 3071 3072 /* 3073 * Common implementation code for futimens(), utimensat(). 3074 */ 3075 #define UTIMENS_NULL 0x1 3076 #define UTIMENS_EXIT 0x2 3077 static int 3078 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3079 struct timespec *tsp, int *retflags) 3080 { 3081 struct timespec tsnow; 3082 int error; 3083 3084 vfs_timestamp(&tsnow); 3085 *retflags = 0; 3086 if (usrtsp == NULL) { 3087 tsp[0] = tsnow; 3088 tsp[1] = tsnow; 3089 *retflags |= UTIMENS_NULL; 3090 return (0); 3091 } 3092 if (tspseg == UIO_SYSSPACE) { 3093 tsp[0] = usrtsp[0]; 3094 tsp[1] = usrtsp[1]; 3095 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3096 return (error); 3097 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3098 *retflags |= UTIMENS_EXIT; 3099 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3100 *retflags |= UTIMENS_NULL; 3101 if (tsp[0].tv_nsec == UTIME_OMIT) 3102 tsp[0].tv_sec = VNOVAL; 3103 else if (tsp[0].tv_nsec == UTIME_NOW) 3104 tsp[0] = tsnow; 3105 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3106 return (EINVAL); 3107 if (tsp[1].tv_nsec == UTIME_OMIT) 3108 tsp[1].tv_sec = VNOVAL; 3109 else if (tsp[1].tv_nsec == UTIME_NOW) 3110 tsp[1] = tsnow; 3111 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3112 return (EINVAL); 3113 3114 return (0); 3115 } 3116 3117 /* 3118 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3119 * and utimensat(). 3120 */ 3121 static int 3122 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3123 int numtimes, int nullflag) 3124 { 3125 struct mount *mp; 3126 struct vattr vattr; 3127 int error, setbirthtime; 3128 3129 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3130 return (error); 3131 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3132 setbirthtime = 0; 3133 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3134 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3135 setbirthtime = 1; 3136 VATTR_NULL(&vattr); 3137 vattr.va_atime = ts[0]; 3138 vattr.va_mtime = ts[1]; 3139 if (setbirthtime) 3140 vattr.va_birthtime = ts[1]; 3141 if (numtimes > 2) 3142 vattr.va_birthtime = ts[2]; 3143 if (nullflag) 3144 vattr.va_vaflags |= VA_UTIMES_NULL; 3145 #ifdef MAC 3146 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3147 vattr.va_mtime); 3148 #endif 3149 if (error == 0) 3150 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3151 VOP_UNLOCK(vp); 3152 vn_finished_write(mp); 3153 return (error); 3154 } 3155 3156 /* 3157 * Set the access and modification times of a file. 3158 */ 3159 #ifndef _SYS_SYSPROTO_H_ 3160 struct utimes_args { 3161 char *path; 3162 struct timeval *tptr; 3163 }; 3164 #endif 3165 int 3166 sys_utimes(struct thread *td, struct utimes_args *uap) 3167 { 3168 3169 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3170 uap->tptr, UIO_USERSPACE)); 3171 } 3172 3173 #ifndef _SYS_SYSPROTO_H_ 3174 struct futimesat_args { 3175 int fd; 3176 const char * path; 3177 const struct timeval * times; 3178 }; 3179 #endif 3180 int 3181 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3182 { 3183 3184 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3185 uap->times, UIO_USERSPACE)); 3186 } 3187 3188 int 3189 kern_utimesat(struct thread *td, int fd, const char *path, 3190 enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) 3191 { 3192 struct nameidata nd; 3193 struct timespec ts[2]; 3194 int error; 3195 3196 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3197 return (error); 3198 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3199 &cap_futimes_rights, td); 3200 3201 if ((error = namei(&nd)) != 0) 3202 return (error); 3203 NDFREE_NOTHING(&nd); 3204 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3205 vrele(nd.ni_vp); 3206 return (error); 3207 } 3208 3209 /* 3210 * Set the access and modification times of a file. 3211 */ 3212 #ifndef _SYS_SYSPROTO_H_ 3213 struct lutimes_args { 3214 char *path; 3215 struct timeval *tptr; 3216 }; 3217 #endif 3218 int 3219 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3220 { 3221 3222 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3223 UIO_USERSPACE)); 3224 } 3225 3226 int 3227 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3228 struct timeval *tptr, enum uio_seg tptrseg) 3229 { 3230 struct timespec ts[2]; 3231 struct nameidata nd; 3232 int error; 3233 3234 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3235 return (error); 3236 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3237 if ((error = namei(&nd)) != 0) 3238 return (error); 3239 NDFREE_NOTHING(&nd); 3240 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3241 vrele(nd.ni_vp); 3242 return (error); 3243 } 3244 3245 /* 3246 * Set the access and modification times of a file. 3247 */ 3248 #ifndef _SYS_SYSPROTO_H_ 3249 struct futimes_args { 3250 int fd; 3251 struct timeval *tptr; 3252 }; 3253 #endif 3254 int 3255 sys_futimes(struct thread *td, struct futimes_args *uap) 3256 { 3257 3258 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3259 } 3260 3261 int 3262 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3263 enum uio_seg tptrseg) 3264 { 3265 struct timespec ts[2]; 3266 struct file *fp; 3267 int error; 3268 3269 AUDIT_ARG_FD(fd); 3270 error = getutimes(tptr, tptrseg, ts); 3271 if (error != 0) 3272 return (error); 3273 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3274 if (error != 0) 3275 return (error); 3276 #ifdef AUDIT 3277 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3278 AUDIT_ARG_VNODE1(fp->f_vnode); 3279 VOP_UNLOCK(fp->f_vnode); 3280 #endif 3281 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3282 fdrop(fp, td); 3283 return (error); 3284 } 3285 3286 int 3287 sys_futimens(struct thread *td, struct futimens_args *uap) 3288 { 3289 3290 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3291 } 3292 3293 int 3294 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3295 enum uio_seg tptrseg) 3296 { 3297 struct timespec ts[2]; 3298 struct file *fp; 3299 int error, flags; 3300 3301 AUDIT_ARG_FD(fd); 3302 error = getutimens(tptr, tptrseg, ts, &flags); 3303 if (error != 0) 3304 return (error); 3305 if (flags & UTIMENS_EXIT) 3306 return (0); 3307 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3308 if (error != 0) 3309 return (error); 3310 #ifdef AUDIT 3311 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3312 AUDIT_ARG_VNODE1(fp->f_vnode); 3313 VOP_UNLOCK(fp->f_vnode); 3314 #endif 3315 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3316 fdrop(fp, td); 3317 return (error); 3318 } 3319 3320 int 3321 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3322 { 3323 3324 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3325 uap->times, UIO_USERSPACE, uap->flag)); 3326 } 3327 3328 int 3329 kern_utimensat(struct thread *td, int fd, const char *path, 3330 enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, 3331 int flag) 3332 { 3333 struct nameidata nd; 3334 struct timespec ts[2]; 3335 int error, flags; 3336 3337 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 3338 return (EINVAL); 3339 3340 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3341 return (error); 3342 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3343 AT_RESOLVE_BENEATH) | AUDITVNODE1, 3344 pathseg, path, fd, &cap_futimes_rights, td); 3345 if ((error = namei(&nd)) != 0) 3346 return (error); 3347 /* 3348 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3349 * POSIX states: 3350 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3351 * "Search permission is denied by a component of the path prefix." 3352 */ 3353 NDFREE_NOTHING(&nd); 3354 if ((flags & UTIMENS_EXIT) == 0) 3355 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3356 vrele(nd.ni_vp); 3357 return (error); 3358 } 3359 3360 /* 3361 * Truncate a file given its path name. 3362 */ 3363 #ifndef _SYS_SYSPROTO_H_ 3364 struct truncate_args { 3365 char *path; 3366 int pad; 3367 off_t length; 3368 }; 3369 #endif 3370 int 3371 sys_truncate(struct thread *td, struct truncate_args *uap) 3372 { 3373 3374 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3375 } 3376 3377 int 3378 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3379 off_t length) 3380 { 3381 struct mount *mp; 3382 struct vnode *vp; 3383 void *rl_cookie; 3384 struct vattr vattr; 3385 struct nameidata nd; 3386 int error; 3387 3388 if (length < 0) 3389 return (EINVAL); 3390 retry: 3391 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3392 if ((error = namei(&nd)) != 0) 3393 return (error); 3394 vp = nd.ni_vp; 3395 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3396 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3397 vn_rangelock_unlock(vp, rl_cookie); 3398 vrele(vp); 3399 return (error); 3400 } 3401 NDFREE(&nd, NDF_ONLY_PNBUF); 3402 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3403 if (vp->v_type == VDIR) 3404 error = EISDIR; 3405 #ifdef MAC 3406 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3407 } 3408 #endif 3409 else if ((error = vn_writechk(vp)) == 0 && 3410 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3411 VATTR_NULL(&vattr); 3412 vattr.va_size = length; 3413 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3414 } 3415 VOP_UNLOCK(vp); 3416 vn_finished_write(mp); 3417 vn_rangelock_unlock(vp, rl_cookie); 3418 vrele(vp); 3419 if (error == ERELOOKUP) 3420 goto retry; 3421 return (error); 3422 } 3423 3424 #if defined(COMPAT_43) 3425 /* 3426 * Truncate a file given its path name. 3427 */ 3428 #ifndef _SYS_SYSPROTO_H_ 3429 struct otruncate_args { 3430 char *path; 3431 long length; 3432 }; 3433 #endif 3434 int 3435 otruncate(struct thread *td, struct otruncate_args *uap) 3436 { 3437 3438 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3439 } 3440 #endif /* COMPAT_43 */ 3441 3442 #if defined(COMPAT_FREEBSD6) 3443 /* Versions with the pad argument */ 3444 int 3445 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3446 { 3447 3448 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3449 } 3450 3451 int 3452 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3453 { 3454 3455 return (kern_ftruncate(td, uap->fd, uap->length)); 3456 } 3457 #endif 3458 3459 int 3460 kern_fsync(struct thread *td, int fd, bool fullsync) 3461 { 3462 struct vnode *vp; 3463 struct mount *mp; 3464 struct file *fp; 3465 int error, lock_flags; 3466 3467 AUDIT_ARG_FD(fd); 3468 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3469 if (error != 0) 3470 return (error); 3471 vp = fp->f_vnode; 3472 #if 0 3473 if (!fullsync) 3474 /* XXXKIB: compete outstanding aio writes */; 3475 #endif 3476 retry: 3477 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3478 if (error != 0) 3479 goto drop; 3480 if (MNT_SHARED_WRITES(mp) || 3481 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3482 lock_flags = LK_SHARED; 3483 } else { 3484 lock_flags = LK_EXCLUSIVE; 3485 } 3486 vn_lock(vp, lock_flags | LK_RETRY); 3487 AUDIT_ARG_VNODE1(vp); 3488 if (vp->v_object != NULL) { 3489 VM_OBJECT_WLOCK(vp->v_object); 3490 vm_object_page_clean(vp->v_object, 0, 0, 0); 3491 VM_OBJECT_WUNLOCK(vp->v_object); 3492 } 3493 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3494 VOP_UNLOCK(vp); 3495 vn_finished_write(mp); 3496 if (error == ERELOOKUP) 3497 goto retry; 3498 drop: 3499 fdrop(fp, td); 3500 return (error); 3501 } 3502 3503 /* 3504 * Sync an open file. 3505 */ 3506 #ifndef _SYS_SYSPROTO_H_ 3507 struct fsync_args { 3508 int fd; 3509 }; 3510 #endif 3511 int 3512 sys_fsync(struct thread *td, struct fsync_args *uap) 3513 { 3514 3515 return (kern_fsync(td, uap->fd, true)); 3516 } 3517 3518 int 3519 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3520 { 3521 3522 return (kern_fsync(td, uap->fd, false)); 3523 } 3524 3525 /* 3526 * Rename files. Source and destination must either both be directories, or 3527 * both not be directories. If target is a directory, it must be empty. 3528 */ 3529 #ifndef _SYS_SYSPROTO_H_ 3530 struct rename_args { 3531 char *from; 3532 char *to; 3533 }; 3534 #endif 3535 int 3536 sys_rename(struct thread *td, struct rename_args *uap) 3537 { 3538 3539 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3540 uap->to, UIO_USERSPACE)); 3541 } 3542 3543 #ifndef _SYS_SYSPROTO_H_ 3544 struct renameat_args { 3545 int oldfd; 3546 char *old; 3547 int newfd; 3548 char *new; 3549 }; 3550 #endif 3551 int 3552 sys_renameat(struct thread *td, struct renameat_args *uap) 3553 { 3554 3555 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3556 UIO_USERSPACE)); 3557 } 3558 3559 #ifdef MAC 3560 static int 3561 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3562 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3563 { 3564 int error; 3565 3566 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3567 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); 3568 if ((error = namei(fromnd)) != 0) 3569 return (error); 3570 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3571 fromnd->ni_vp, &fromnd->ni_cnd); 3572 VOP_UNLOCK(fromnd->ni_dvp); 3573 if (fromnd->ni_dvp != fromnd->ni_vp) 3574 VOP_UNLOCK(fromnd->ni_vp); 3575 if (error != 0) { 3576 NDFREE(fromnd, NDF_ONLY_PNBUF); 3577 vrele(fromnd->ni_dvp); 3578 vrele(fromnd->ni_vp); 3579 if (fromnd->ni_startdir) 3580 vrele(fromnd->ni_startdir); 3581 } 3582 return (error); 3583 } 3584 #endif 3585 3586 int 3587 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3588 const char *new, enum uio_seg pathseg) 3589 { 3590 struct mount *mp = NULL; 3591 struct vnode *tvp, *fvp, *tdvp; 3592 struct nameidata fromnd, tond; 3593 u_int64_t tondflags; 3594 int error; 3595 3596 again: 3597 bwillwrite(); 3598 #ifdef MAC 3599 if (mac_vnode_check_rename_from_enabled()) { 3600 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3601 &fromnd); 3602 if (error != 0) 3603 return (error); 3604 } else { 3605 #endif 3606 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3607 pathseg, old, oldfd, &cap_renameat_source_rights, td); 3608 if ((error = namei(&fromnd)) != 0) 3609 return (error); 3610 #ifdef MAC 3611 } 3612 #endif 3613 fvp = fromnd.ni_vp; 3614 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2; 3615 if (fromnd.ni_vp->v_type == VDIR) 3616 tondflags |= WILLBEDIR; 3617 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3618 &cap_renameat_target_rights, td); 3619 if ((error = namei(&tond)) != 0) { 3620 /* Translate error code for rename("dir1", "dir2/."). */ 3621 if (error == EISDIR && fvp->v_type == VDIR) 3622 error = EINVAL; 3623 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3624 vrele(fromnd.ni_dvp); 3625 vrele(fvp); 3626 goto out1; 3627 } 3628 tdvp = tond.ni_dvp; 3629 tvp = tond.ni_vp; 3630 error = vn_start_write(fvp, &mp, V_NOWAIT); 3631 if (error != 0) { 3632 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3633 NDFREE(&tond, NDF_ONLY_PNBUF); 3634 if (tvp != NULL) 3635 vput(tvp); 3636 if (tdvp == tvp) 3637 vrele(tdvp); 3638 else 3639 vput(tdvp); 3640 vrele(fromnd.ni_dvp); 3641 vrele(fvp); 3642 vrele(tond.ni_startdir); 3643 if (fromnd.ni_startdir != NULL) 3644 vrele(fromnd.ni_startdir); 3645 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3646 if (error != 0) 3647 return (error); 3648 goto again; 3649 } 3650 if (tvp != NULL) { 3651 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3652 error = ENOTDIR; 3653 goto out; 3654 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3655 error = EISDIR; 3656 goto out; 3657 } 3658 #ifdef CAPABILITIES 3659 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3660 /* 3661 * If the target already exists we require CAP_UNLINKAT 3662 * from 'newfd', when newfd was used for the lookup. 3663 */ 3664 error = cap_check(&tond.ni_filecaps.fc_rights, 3665 &cap_unlinkat_rights); 3666 if (error != 0) 3667 goto out; 3668 } 3669 #endif 3670 } 3671 if (fvp == tdvp) { 3672 error = EINVAL; 3673 goto out; 3674 } 3675 /* 3676 * If the source is the same as the destination (that is, if they 3677 * are links to the same vnode), then there is nothing to do. 3678 */ 3679 if (fvp == tvp) 3680 error = ERESTART; 3681 #ifdef MAC 3682 else 3683 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3684 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3685 #endif 3686 out: 3687 if (error == 0) { 3688 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3689 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3690 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3691 NDFREE(&tond, NDF_ONLY_PNBUF); 3692 } else { 3693 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3694 NDFREE(&tond, NDF_ONLY_PNBUF); 3695 if (tvp != NULL) 3696 vput(tvp); 3697 if (tdvp == tvp) 3698 vrele(tdvp); 3699 else 3700 vput(tdvp); 3701 vrele(fromnd.ni_dvp); 3702 vrele(fvp); 3703 } 3704 vrele(tond.ni_startdir); 3705 vn_finished_write(mp); 3706 out1: 3707 if (fromnd.ni_startdir) 3708 vrele(fromnd.ni_startdir); 3709 if (error == ERESTART) 3710 return (0); 3711 if (error == ERELOOKUP) 3712 goto again; 3713 return (error); 3714 } 3715 3716 /* 3717 * Make a directory file. 3718 */ 3719 #ifndef _SYS_SYSPROTO_H_ 3720 struct mkdir_args { 3721 char *path; 3722 int mode; 3723 }; 3724 #endif 3725 int 3726 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3727 { 3728 3729 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3730 uap->mode)); 3731 } 3732 3733 #ifndef _SYS_SYSPROTO_H_ 3734 struct mkdirat_args { 3735 int fd; 3736 char *path; 3737 mode_t mode; 3738 }; 3739 #endif 3740 int 3741 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3742 { 3743 3744 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3745 } 3746 3747 int 3748 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3749 int mode) 3750 { 3751 struct mount *mp; 3752 struct vattr vattr; 3753 struct nameidata nd; 3754 int error; 3755 3756 AUDIT_ARG_MODE(mode); 3757 restart: 3758 bwillwrite(); 3759 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3760 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3761 segflg, path, fd, &cap_mkdirat_rights, td); 3762 if ((error = namei(&nd)) != 0) 3763 return (error); 3764 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3765 NDFREE(&nd, NDF_ONLY_PNBUF); 3766 vput(nd.ni_dvp); 3767 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3768 return (error); 3769 goto restart; 3770 } 3771 VATTR_NULL(&vattr); 3772 vattr.va_type = VDIR; 3773 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3774 #ifdef MAC 3775 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3776 &vattr); 3777 if (error != 0) 3778 goto out; 3779 #endif 3780 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3781 #ifdef MAC 3782 out: 3783 #endif 3784 NDFREE(&nd, NDF_ONLY_PNBUF); 3785 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3786 vn_finished_write(mp); 3787 if (error == ERELOOKUP) 3788 goto restart; 3789 return (error); 3790 } 3791 3792 /* 3793 * Remove a directory file. 3794 */ 3795 #ifndef _SYS_SYSPROTO_H_ 3796 struct rmdir_args { 3797 char *path; 3798 }; 3799 #endif 3800 int 3801 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3802 { 3803 3804 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3805 0)); 3806 } 3807 3808 int 3809 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3810 enum uio_seg pathseg, int flag) 3811 { 3812 struct mount *mp; 3813 struct vnode *vp; 3814 struct file *fp; 3815 struct nameidata nd; 3816 cap_rights_t rights; 3817 int error; 3818 3819 fp = NULL; 3820 if (fd != FD_NONE) { 3821 error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), 3822 &fp); 3823 if (error != 0) 3824 return (error); 3825 } 3826 3827 restart: 3828 bwillwrite(); 3829 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3830 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3831 pathseg, path, dfd, &cap_unlinkat_rights, td); 3832 if ((error = namei(&nd)) != 0) 3833 goto fdout; 3834 vp = nd.ni_vp; 3835 if (vp->v_type != VDIR) { 3836 error = ENOTDIR; 3837 goto out; 3838 } 3839 /* 3840 * No rmdir "." please. 3841 */ 3842 if (nd.ni_dvp == vp) { 3843 error = EINVAL; 3844 goto out; 3845 } 3846 /* 3847 * The root of a mounted filesystem cannot be deleted. 3848 */ 3849 if (vp->v_vflag & VV_ROOT) { 3850 error = EBUSY; 3851 goto out; 3852 } 3853 3854 if (fp != NULL && fp->f_vnode != vp) { 3855 if (VN_IS_DOOMED(fp->f_vnode)) 3856 error = EBADF; 3857 else 3858 error = EDEADLK; 3859 goto out; 3860 } 3861 3862 #ifdef MAC 3863 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3864 &nd.ni_cnd); 3865 if (error != 0) 3866 goto out; 3867 #endif 3868 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3869 NDFREE(&nd, NDF_ONLY_PNBUF); 3870 vput(vp); 3871 if (nd.ni_dvp == vp) 3872 vrele(nd.ni_dvp); 3873 else 3874 vput(nd.ni_dvp); 3875 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3876 goto fdout; 3877 goto restart; 3878 } 3879 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3880 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3881 vn_finished_write(mp); 3882 out: 3883 NDFREE(&nd, NDF_ONLY_PNBUF); 3884 vput(vp); 3885 if (nd.ni_dvp == vp) 3886 vrele(nd.ni_dvp); 3887 else 3888 vput(nd.ni_dvp); 3889 if (error == ERELOOKUP) 3890 goto restart; 3891 fdout: 3892 if (fp != NULL) 3893 fdrop(fp, td); 3894 return (error); 3895 } 3896 3897 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3898 int 3899 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3900 long *basep, void (*func)(struct freebsd11_dirent *)) 3901 { 3902 struct freebsd11_dirent dstdp; 3903 struct dirent *dp, *edp; 3904 char *dirbuf; 3905 off_t base; 3906 ssize_t resid, ucount; 3907 int error; 3908 3909 /* XXX arbitrary sanity limit on `count'. */ 3910 count = min(count, 64 * 1024); 3911 3912 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3913 3914 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3915 UIO_SYSSPACE); 3916 if (error != 0) 3917 goto done; 3918 if (basep != NULL) 3919 *basep = base; 3920 3921 ucount = 0; 3922 for (dp = (struct dirent *)dirbuf, 3923 edp = (struct dirent *)&dirbuf[count - resid]; 3924 ucount < count && dp < edp; ) { 3925 if (dp->d_reclen == 0) 3926 break; 3927 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3928 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3929 continue; 3930 dstdp.d_type = dp->d_type; 3931 dstdp.d_namlen = dp->d_namlen; 3932 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3933 if (dstdp.d_fileno != dp->d_fileno) { 3934 switch (ino64_trunc_error) { 3935 default: 3936 case 0: 3937 break; 3938 case 1: 3939 error = EOVERFLOW; 3940 goto done; 3941 case 2: 3942 dstdp.d_fileno = UINT32_MAX; 3943 break; 3944 } 3945 } 3946 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3947 ((dp->d_namlen + 1 + 3) &~ 3); 3948 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3949 bzero(dstdp.d_name + dstdp.d_namlen, 3950 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3951 dstdp.d_namlen); 3952 MPASS(dstdp.d_reclen <= dp->d_reclen); 3953 MPASS(ucount + dstdp.d_reclen <= count); 3954 if (func != NULL) 3955 func(&dstdp); 3956 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3957 if (error != 0) 3958 break; 3959 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3960 ucount += dstdp.d_reclen; 3961 } 3962 3963 done: 3964 free(dirbuf, M_TEMP); 3965 if (error == 0) 3966 td->td_retval[0] = ucount; 3967 return (error); 3968 } 3969 #endif /* COMPAT */ 3970 3971 #ifdef COMPAT_43 3972 static void 3973 ogetdirentries_cvt(struct freebsd11_dirent *dp) 3974 { 3975 #if (BYTE_ORDER == LITTLE_ENDIAN) 3976 /* 3977 * The expected low byte of dp->d_namlen is our dp->d_type. 3978 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 3979 */ 3980 dp->d_type = dp->d_namlen; 3981 dp->d_namlen = 0; 3982 #else 3983 /* 3984 * The dp->d_type is the high byte of the expected dp->d_namlen, 3985 * so must be zero'ed. 3986 */ 3987 dp->d_type = 0; 3988 #endif 3989 } 3990 3991 /* 3992 * Read a block of directory entries in a filesystem independent format. 3993 */ 3994 #ifndef _SYS_SYSPROTO_H_ 3995 struct ogetdirentries_args { 3996 int fd; 3997 char *buf; 3998 u_int count; 3999 long *basep; 4000 }; 4001 #endif 4002 int 4003 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4004 { 4005 long loff; 4006 int error; 4007 4008 error = kern_ogetdirentries(td, uap, &loff); 4009 if (error == 0) 4010 error = copyout(&loff, uap->basep, sizeof(long)); 4011 return (error); 4012 } 4013 4014 int 4015 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4016 long *ploff) 4017 { 4018 long base; 4019 int error; 4020 4021 /* XXX arbitrary sanity limit on `count'. */ 4022 if (uap->count > 64 * 1024) 4023 return (EINVAL); 4024 4025 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4026 &base, ogetdirentries_cvt); 4027 4028 if (error == 0 && uap->basep != NULL) 4029 error = copyout(&base, uap->basep, sizeof(long)); 4030 4031 return (error); 4032 } 4033 #endif /* COMPAT_43 */ 4034 4035 #if defined(COMPAT_FREEBSD11) 4036 #ifndef _SYS_SYSPROTO_H_ 4037 struct freebsd11_getdirentries_args { 4038 int fd; 4039 char *buf; 4040 u_int count; 4041 long *basep; 4042 }; 4043 #endif 4044 int 4045 freebsd11_getdirentries(struct thread *td, 4046 struct freebsd11_getdirentries_args *uap) 4047 { 4048 long base; 4049 int error; 4050 4051 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4052 &base, NULL); 4053 4054 if (error == 0 && uap->basep != NULL) 4055 error = copyout(&base, uap->basep, sizeof(long)); 4056 return (error); 4057 } 4058 4059 int 4060 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4061 { 4062 struct freebsd11_getdirentries_args ap; 4063 4064 ap.fd = uap->fd; 4065 ap.buf = uap->buf; 4066 ap.count = uap->count; 4067 ap.basep = NULL; 4068 return (freebsd11_getdirentries(td, &ap)); 4069 } 4070 #endif /* COMPAT_FREEBSD11 */ 4071 4072 /* 4073 * Read a block of directory entries in a filesystem independent format. 4074 */ 4075 int 4076 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4077 { 4078 off_t base; 4079 int error; 4080 4081 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4082 NULL, UIO_USERSPACE); 4083 if (error != 0) 4084 return (error); 4085 if (uap->basep != NULL) 4086 error = copyout(&base, uap->basep, sizeof(off_t)); 4087 return (error); 4088 } 4089 4090 int 4091 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4092 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4093 { 4094 struct vnode *vp; 4095 struct file *fp; 4096 struct uio auio; 4097 struct iovec aiov; 4098 off_t loff; 4099 int error, eofflag; 4100 off_t foffset; 4101 4102 AUDIT_ARG_FD(fd); 4103 if (count > IOSIZE_MAX) 4104 return (EINVAL); 4105 auio.uio_resid = count; 4106 error = getvnode(td, fd, &cap_read_rights, &fp); 4107 if (error != 0) 4108 return (error); 4109 if ((fp->f_flag & FREAD) == 0) { 4110 fdrop(fp, td); 4111 return (EBADF); 4112 } 4113 vp = fp->f_vnode; 4114 foffset = foffset_lock(fp, 0); 4115 unionread: 4116 if (vp->v_type != VDIR) { 4117 error = EINVAL; 4118 goto fail; 4119 } 4120 aiov.iov_base = buf; 4121 aiov.iov_len = count; 4122 auio.uio_iov = &aiov; 4123 auio.uio_iovcnt = 1; 4124 auio.uio_rw = UIO_READ; 4125 auio.uio_segflg = bufseg; 4126 auio.uio_td = td; 4127 vn_lock(vp, LK_SHARED | LK_RETRY); 4128 AUDIT_ARG_VNODE1(vp); 4129 loff = auio.uio_offset = foffset; 4130 #ifdef MAC 4131 error = mac_vnode_check_readdir(td->td_ucred, vp); 4132 if (error == 0) 4133 #endif 4134 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4135 NULL); 4136 foffset = auio.uio_offset; 4137 if (error != 0) { 4138 VOP_UNLOCK(vp); 4139 goto fail; 4140 } 4141 if (count == auio.uio_resid && 4142 (vp->v_vflag & VV_ROOT) && 4143 (vp->v_mount->mnt_flag & MNT_UNION)) { 4144 struct vnode *tvp = vp; 4145 4146 vp = vp->v_mount->mnt_vnodecovered; 4147 VREF(vp); 4148 fp->f_vnode = vp; 4149 foffset = 0; 4150 vput(tvp); 4151 goto unionread; 4152 } 4153 VOP_UNLOCK(vp); 4154 *basep = loff; 4155 if (residp != NULL) 4156 *residp = auio.uio_resid; 4157 td->td_retval[0] = count - auio.uio_resid; 4158 fail: 4159 foffset_unlock(fp, foffset, 0); 4160 fdrop(fp, td); 4161 return (error); 4162 } 4163 4164 /* 4165 * Set the mode mask for creation of filesystem nodes. 4166 */ 4167 #ifndef _SYS_SYSPROTO_H_ 4168 struct umask_args { 4169 int newmask; 4170 }; 4171 #endif 4172 int 4173 sys_umask(struct thread *td, struct umask_args *uap) 4174 { 4175 struct pwddesc *pdp; 4176 4177 pdp = td->td_proc->p_pd; 4178 PWDDESC_XLOCK(pdp); 4179 td->td_retval[0] = pdp->pd_cmask; 4180 pdp->pd_cmask = uap->newmask & ALLPERMS; 4181 PWDDESC_XUNLOCK(pdp); 4182 return (0); 4183 } 4184 4185 /* 4186 * Void all references to file by ripping underlying filesystem away from 4187 * vnode. 4188 */ 4189 #ifndef _SYS_SYSPROTO_H_ 4190 struct revoke_args { 4191 char *path; 4192 }; 4193 #endif 4194 int 4195 sys_revoke(struct thread *td, struct revoke_args *uap) 4196 { 4197 struct vnode *vp; 4198 struct vattr vattr; 4199 struct nameidata nd; 4200 int error; 4201 4202 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4203 uap->path, td); 4204 if ((error = namei(&nd)) != 0) 4205 return (error); 4206 vp = nd.ni_vp; 4207 NDFREE_NOTHING(&nd); 4208 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4209 error = EINVAL; 4210 goto out; 4211 } 4212 #ifdef MAC 4213 error = mac_vnode_check_revoke(td->td_ucred, vp); 4214 if (error != 0) 4215 goto out; 4216 #endif 4217 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4218 if (error != 0) 4219 goto out; 4220 if (td->td_ucred->cr_uid != vattr.va_uid) { 4221 error = priv_check(td, PRIV_VFS_ADMIN); 4222 if (error != 0) 4223 goto out; 4224 } 4225 if (devfs_usecount(vp) > 0) 4226 VOP_REVOKE(vp, REVOKEALL); 4227 out: 4228 vput(vp); 4229 return (error); 4230 } 4231 4232 /* 4233 * Convert a user file descriptor to a kernel file entry and check that, if it 4234 * is a capability, the correct rights are present. A reference on the file 4235 * entry is held upon returning. 4236 */ 4237 int 4238 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4239 { 4240 struct file *fp; 4241 int error; 4242 4243 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); 4244 if (error != 0) 4245 return (error); 4246 4247 /* 4248 * The file could be not of the vnode type, or it may be not 4249 * yet fully initialized, in which case the f_vnode pointer 4250 * may be set, but f_ops is still badfileops. E.g., 4251 * devfs_open() transiently create such situation to 4252 * facilitate csw d_fdopen(). 4253 * 4254 * Dupfdopen() handling in kern_openat() installs the 4255 * half-baked file into the process descriptor table, allowing 4256 * other thread to dereference it. Guard against the race by 4257 * checking f_ops. 4258 */ 4259 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4260 fdrop(fp, td); 4261 return (EINVAL); 4262 } 4263 *fpp = fp; 4264 return (0); 4265 } 4266 4267 /* 4268 * Get an (NFS) file handle. 4269 */ 4270 #ifndef _SYS_SYSPROTO_H_ 4271 struct lgetfh_args { 4272 char *fname; 4273 fhandle_t *fhp; 4274 }; 4275 #endif 4276 int 4277 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4278 { 4279 4280 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4281 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4282 } 4283 4284 #ifndef _SYS_SYSPROTO_H_ 4285 struct getfh_args { 4286 char *fname; 4287 fhandle_t *fhp; 4288 }; 4289 #endif 4290 int 4291 sys_getfh(struct thread *td, struct getfh_args *uap) 4292 { 4293 4294 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4295 uap->fhp, UIO_USERSPACE)); 4296 } 4297 4298 /* 4299 * syscall for the rpc.lockd to use to translate an open descriptor into 4300 * a NFS file handle. 4301 * 4302 * warning: do not remove the priv_check() call or this becomes one giant 4303 * security hole. 4304 */ 4305 #ifndef _SYS_SYSPROTO_H_ 4306 struct getfhat_args { 4307 int fd; 4308 char *path; 4309 fhandle_t *fhp; 4310 int flags; 4311 }; 4312 #endif 4313 int 4314 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4315 { 4316 4317 if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4318 return (EINVAL); 4319 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4320 uap->fhp, UIO_USERSPACE)); 4321 } 4322 4323 int 4324 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4325 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4326 { 4327 struct nameidata nd; 4328 fhandle_t fh; 4329 struct vnode *vp; 4330 int error; 4331 4332 error = priv_check(td, PRIV_VFS_GETFH); 4333 if (error != 0) 4334 return (error); 4335 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4336 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4337 fd, td); 4338 error = namei(&nd); 4339 if (error != 0) 4340 return (error); 4341 NDFREE_NOTHING(&nd); 4342 vp = nd.ni_vp; 4343 bzero(&fh, sizeof(fh)); 4344 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4345 error = VOP_VPTOFH(vp, &fh.fh_fid); 4346 vput(vp); 4347 if (error == 0) { 4348 if (fhseg == UIO_USERSPACE) 4349 error = copyout(&fh, fhp, sizeof (fh)); 4350 else 4351 memcpy(fhp, &fh, sizeof(fh)); 4352 } 4353 return (error); 4354 } 4355 4356 #ifndef _SYS_SYSPROTO_H_ 4357 struct fhlink_args { 4358 fhandle_t *fhp; 4359 const char *to; 4360 }; 4361 #endif 4362 int 4363 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4364 { 4365 4366 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4367 } 4368 4369 #ifndef _SYS_SYSPROTO_H_ 4370 struct fhlinkat_args { 4371 fhandle_t *fhp; 4372 int tofd; 4373 const char *to; 4374 }; 4375 #endif 4376 int 4377 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4378 { 4379 4380 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4381 } 4382 4383 static int 4384 kern_fhlinkat(struct thread *td, int fd, const char *path, 4385 enum uio_seg pathseg, fhandle_t *fhp) 4386 { 4387 fhandle_t fh; 4388 struct mount *mp; 4389 struct vnode *vp; 4390 int error; 4391 4392 error = priv_check(td, PRIV_VFS_GETFH); 4393 if (error != 0) 4394 return (error); 4395 error = copyin(fhp, &fh, sizeof(fh)); 4396 if (error != 0) 4397 return (error); 4398 do { 4399 bwillwrite(); 4400 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4401 return (ESTALE); 4402 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4403 vfs_unbusy(mp); 4404 if (error != 0) 4405 return (error); 4406 VOP_UNLOCK(vp); 4407 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4408 } while (error == EAGAIN || error == ERELOOKUP); 4409 return (error); 4410 } 4411 4412 #ifndef _SYS_SYSPROTO_H_ 4413 struct fhreadlink_args { 4414 fhandle_t *fhp; 4415 char *buf; 4416 size_t bufsize; 4417 }; 4418 #endif 4419 int 4420 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4421 { 4422 fhandle_t fh; 4423 struct mount *mp; 4424 struct vnode *vp; 4425 int error; 4426 4427 error = priv_check(td, PRIV_VFS_GETFH); 4428 if (error != 0) 4429 return (error); 4430 if (uap->bufsize > IOSIZE_MAX) 4431 return (EINVAL); 4432 error = copyin(uap->fhp, &fh, sizeof(fh)); 4433 if (error != 0) 4434 return (error); 4435 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4436 return (ESTALE); 4437 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4438 vfs_unbusy(mp); 4439 if (error != 0) 4440 return (error); 4441 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4442 vput(vp); 4443 return (error); 4444 } 4445 4446 /* 4447 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4448 * open descriptor. 4449 * 4450 * warning: do not remove the priv_check() call or this becomes one giant 4451 * security hole. 4452 */ 4453 #ifndef _SYS_SYSPROTO_H_ 4454 struct fhopen_args { 4455 const struct fhandle *u_fhp; 4456 int flags; 4457 }; 4458 #endif 4459 int 4460 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4461 { 4462 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4463 } 4464 4465 int 4466 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4467 { 4468 struct mount *mp; 4469 struct vnode *vp; 4470 struct fhandle fhp; 4471 struct file *fp; 4472 int fmode, error; 4473 int indx; 4474 4475 error = priv_check(td, PRIV_VFS_FHOPEN); 4476 if (error != 0) 4477 return (error); 4478 indx = -1; 4479 fmode = FFLAGS(flags); 4480 /* why not allow a non-read/write open for our lockd? */ 4481 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4482 return (EINVAL); 4483 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4484 if (error != 0) 4485 return(error); 4486 /* find the mount point */ 4487 mp = vfs_busyfs(&fhp.fh_fsid); 4488 if (mp == NULL) 4489 return (ESTALE); 4490 /* now give me my vnode, it gets returned to me locked */ 4491 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4492 vfs_unbusy(mp); 4493 if (error != 0) 4494 return (error); 4495 4496 error = falloc_noinstall(td, &fp); 4497 if (error != 0) { 4498 vput(vp); 4499 return (error); 4500 } 4501 /* 4502 * An extra reference on `fp' has been held for us by 4503 * falloc_noinstall(). 4504 */ 4505 4506 #ifdef INVARIANTS 4507 td->td_dupfd = -1; 4508 #endif 4509 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4510 if (error != 0) { 4511 KASSERT(fp->f_ops == &badfileops, 4512 ("VOP_OPEN in fhopen() set f_ops")); 4513 KASSERT(td->td_dupfd < 0, 4514 ("fhopen() encountered fdopen()")); 4515 4516 vput(vp); 4517 goto bad; 4518 } 4519 #ifdef INVARIANTS 4520 td->td_dupfd = 0; 4521 #endif 4522 fp->f_vnode = vp; 4523 finit_vnode(fp, fmode, NULL, &vnops); 4524 VOP_UNLOCK(vp); 4525 if ((fmode & O_TRUNC) != 0) { 4526 error = fo_truncate(fp, 0, td->td_ucred, td); 4527 if (error != 0) 4528 goto bad; 4529 } 4530 4531 error = finstall(td, fp, &indx, fmode, NULL); 4532 bad: 4533 fdrop(fp, td); 4534 td->td_retval[0] = indx; 4535 return (error); 4536 } 4537 4538 /* 4539 * Stat an (NFS) file handle. 4540 */ 4541 #ifndef _SYS_SYSPROTO_H_ 4542 struct fhstat_args { 4543 struct fhandle *u_fhp; 4544 struct stat *sb; 4545 }; 4546 #endif 4547 int 4548 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4549 { 4550 struct stat sb; 4551 struct fhandle fh; 4552 int error; 4553 4554 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4555 if (error != 0) 4556 return (error); 4557 error = kern_fhstat(td, fh, &sb); 4558 if (error == 0) 4559 error = copyout(&sb, uap->sb, sizeof(sb)); 4560 return (error); 4561 } 4562 4563 int 4564 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4565 { 4566 struct mount *mp; 4567 struct vnode *vp; 4568 int error; 4569 4570 error = priv_check(td, PRIV_VFS_FHSTAT); 4571 if (error != 0) 4572 return (error); 4573 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4574 return (ESTALE); 4575 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4576 vfs_unbusy(mp); 4577 if (error != 0) 4578 return (error); 4579 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td); 4580 vput(vp); 4581 return (error); 4582 } 4583 4584 /* 4585 * Implement fstatfs() for (NFS) file handles. 4586 */ 4587 #ifndef _SYS_SYSPROTO_H_ 4588 struct fhstatfs_args { 4589 struct fhandle *u_fhp; 4590 struct statfs *buf; 4591 }; 4592 #endif 4593 int 4594 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4595 { 4596 struct statfs *sfp; 4597 fhandle_t fh; 4598 int error; 4599 4600 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4601 if (error != 0) 4602 return (error); 4603 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4604 error = kern_fhstatfs(td, fh, sfp); 4605 if (error == 0) 4606 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4607 free(sfp, M_STATFS); 4608 return (error); 4609 } 4610 4611 int 4612 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4613 { 4614 struct mount *mp; 4615 struct vnode *vp; 4616 int error; 4617 4618 error = priv_check(td, PRIV_VFS_FHSTATFS); 4619 if (error != 0) 4620 return (error); 4621 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4622 return (ESTALE); 4623 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4624 if (error != 0) { 4625 vfs_unbusy(mp); 4626 return (error); 4627 } 4628 vput(vp); 4629 error = prison_canseemount(td->td_ucred, mp); 4630 if (error != 0) 4631 goto out; 4632 #ifdef MAC 4633 error = mac_mount_check_stat(td->td_ucred, mp); 4634 if (error != 0) 4635 goto out; 4636 #endif 4637 error = VFS_STATFS(mp, buf); 4638 out: 4639 vfs_unbusy(mp); 4640 return (error); 4641 } 4642 4643 /* 4644 * Unlike madvise(2), we do not make a best effort to remember every 4645 * possible caching hint. Instead, we remember the last setting with 4646 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4647 * region of any current setting. 4648 */ 4649 int 4650 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4651 int advice) 4652 { 4653 struct fadvise_info *fa, *new; 4654 struct file *fp; 4655 struct vnode *vp; 4656 off_t end; 4657 int error; 4658 4659 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4660 return (EINVAL); 4661 AUDIT_ARG_VALUE(advice); 4662 switch (advice) { 4663 case POSIX_FADV_SEQUENTIAL: 4664 case POSIX_FADV_RANDOM: 4665 case POSIX_FADV_NOREUSE: 4666 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4667 break; 4668 case POSIX_FADV_NORMAL: 4669 case POSIX_FADV_WILLNEED: 4670 case POSIX_FADV_DONTNEED: 4671 new = NULL; 4672 break; 4673 default: 4674 return (EINVAL); 4675 } 4676 /* XXX: CAP_POSIX_FADVISE? */ 4677 AUDIT_ARG_FD(fd); 4678 error = fget(td, fd, &cap_no_rights, &fp); 4679 if (error != 0) 4680 goto out; 4681 AUDIT_ARG_FILE(td->td_proc, fp); 4682 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4683 error = ESPIPE; 4684 goto out; 4685 } 4686 if (fp->f_type != DTYPE_VNODE) { 4687 error = ENODEV; 4688 goto out; 4689 } 4690 vp = fp->f_vnode; 4691 if (vp->v_type != VREG) { 4692 error = ENODEV; 4693 goto out; 4694 } 4695 if (len == 0) 4696 end = OFF_MAX; 4697 else 4698 end = offset + len - 1; 4699 switch (advice) { 4700 case POSIX_FADV_SEQUENTIAL: 4701 case POSIX_FADV_RANDOM: 4702 case POSIX_FADV_NOREUSE: 4703 /* 4704 * Try to merge any existing non-standard region with 4705 * this new region if possible, otherwise create a new 4706 * non-standard region for this request. 4707 */ 4708 mtx_pool_lock(mtxpool_sleep, fp); 4709 fa = fp->f_advice; 4710 if (fa != NULL && fa->fa_advice == advice && 4711 ((fa->fa_start <= end && fa->fa_end >= offset) || 4712 (end != OFF_MAX && fa->fa_start == end + 1) || 4713 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4714 if (offset < fa->fa_start) 4715 fa->fa_start = offset; 4716 if (end > fa->fa_end) 4717 fa->fa_end = end; 4718 } else { 4719 new->fa_advice = advice; 4720 new->fa_start = offset; 4721 new->fa_end = end; 4722 fp->f_advice = new; 4723 new = fa; 4724 } 4725 mtx_pool_unlock(mtxpool_sleep, fp); 4726 break; 4727 case POSIX_FADV_NORMAL: 4728 /* 4729 * If a the "normal" region overlaps with an existing 4730 * non-standard region, trim or remove the 4731 * non-standard region. 4732 */ 4733 mtx_pool_lock(mtxpool_sleep, fp); 4734 fa = fp->f_advice; 4735 if (fa != NULL) { 4736 if (offset <= fa->fa_start && end >= fa->fa_end) { 4737 new = fa; 4738 fp->f_advice = NULL; 4739 } else if (offset <= fa->fa_start && 4740 end >= fa->fa_start) 4741 fa->fa_start = end + 1; 4742 else if (offset <= fa->fa_end && end >= fa->fa_end) 4743 fa->fa_end = offset - 1; 4744 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4745 /* 4746 * If the "normal" region is a middle 4747 * portion of the existing 4748 * non-standard region, just remove 4749 * the whole thing rather than picking 4750 * one side or the other to 4751 * preserve. 4752 */ 4753 new = fa; 4754 fp->f_advice = NULL; 4755 } 4756 } 4757 mtx_pool_unlock(mtxpool_sleep, fp); 4758 break; 4759 case POSIX_FADV_WILLNEED: 4760 case POSIX_FADV_DONTNEED: 4761 error = VOP_ADVISE(vp, offset, end, advice); 4762 break; 4763 } 4764 out: 4765 if (fp != NULL) 4766 fdrop(fp, td); 4767 free(new, M_FADVISE); 4768 return (error); 4769 } 4770 4771 int 4772 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4773 { 4774 int error; 4775 4776 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4777 uap->advice); 4778 return (kern_posix_error(td, error)); 4779 } 4780 4781 int 4782 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4783 off_t *outoffp, size_t len, unsigned int flags) 4784 { 4785 struct file *infp, *outfp; 4786 struct vnode *invp, *outvp; 4787 int error; 4788 size_t retlen; 4789 void *rl_rcookie, *rl_wcookie; 4790 off_t savinoff, savoutoff; 4791 4792 infp = outfp = NULL; 4793 rl_rcookie = rl_wcookie = NULL; 4794 savinoff = -1; 4795 error = 0; 4796 retlen = 0; 4797 4798 if (flags != 0) { 4799 error = EINVAL; 4800 goto out; 4801 } 4802 if (len > SSIZE_MAX) 4803 /* 4804 * Although the len argument is size_t, the return argument 4805 * is ssize_t (which is signed). Therefore a size that won't 4806 * fit in ssize_t can't be returned. 4807 */ 4808 len = SSIZE_MAX; 4809 4810 /* Get the file structures for the file descriptors. */ 4811 error = fget_read(td, infd, &cap_read_rights, &infp); 4812 if (error != 0) 4813 goto out; 4814 if (infp->f_ops == &badfileops) { 4815 error = EBADF; 4816 goto out; 4817 } 4818 if (infp->f_vnode == NULL) { 4819 error = EINVAL; 4820 goto out; 4821 } 4822 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4823 if (error != 0) 4824 goto out; 4825 if (outfp->f_ops == &badfileops) { 4826 error = EBADF; 4827 goto out; 4828 } 4829 if (outfp->f_vnode == NULL) { 4830 error = EINVAL; 4831 goto out; 4832 } 4833 4834 /* Set the offset pointers to the correct place. */ 4835 if (inoffp == NULL) 4836 inoffp = &infp->f_offset; 4837 if (outoffp == NULL) 4838 outoffp = &outfp->f_offset; 4839 savinoff = *inoffp; 4840 savoutoff = *outoffp; 4841 4842 invp = infp->f_vnode; 4843 outvp = outfp->f_vnode; 4844 /* Sanity check the f_flag bits. */ 4845 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4846 (infp->f_flag & FREAD) == 0) { 4847 error = EBADF; 4848 goto out; 4849 } 4850 4851 /* If len == 0, just return 0. */ 4852 if (len == 0) 4853 goto out; 4854 4855 /* 4856 * If infp and outfp refer to the same file, the byte ranges cannot 4857 * overlap. 4858 */ 4859 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4860 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4861 savinoff))) { 4862 error = EINVAL; 4863 goto out; 4864 } 4865 4866 /* Range lock the byte ranges for both invp and outvp. */ 4867 for (;;) { 4868 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4869 len); 4870 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4871 len); 4872 if (rl_rcookie != NULL) 4873 break; 4874 vn_rangelock_unlock(outvp, rl_wcookie); 4875 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4876 vn_rangelock_unlock(invp, rl_rcookie); 4877 } 4878 4879 retlen = len; 4880 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4881 flags, infp->f_cred, outfp->f_cred, td); 4882 out: 4883 if (rl_rcookie != NULL) 4884 vn_rangelock_unlock(invp, rl_rcookie); 4885 if (rl_wcookie != NULL) 4886 vn_rangelock_unlock(outvp, rl_wcookie); 4887 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4888 *inoffp = savinoff; 4889 *outoffp = savoutoff; 4890 } 4891 if (outfp != NULL) 4892 fdrop(outfp, td); 4893 if (infp != NULL) 4894 fdrop(infp, td); 4895 td->td_retval[0] = retlen; 4896 return (error); 4897 } 4898 4899 int 4900 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 4901 { 4902 off_t inoff, outoff, *inoffp, *outoffp; 4903 int error; 4904 4905 inoffp = outoffp = NULL; 4906 if (uap->inoffp != NULL) { 4907 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 4908 if (error != 0) 4909 return (error); 4910 inoffp = &inoff; 4911 } 4912 if (uap->outoffp != NULL) { 4913 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 4914 if (error != 0) 4915 return (error); 4916 outoffp = &outoff; 4917 } 4918 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 4919 outoffp, uap->len, uap->flags); 4920 if (error == 0 && uap->inoffp != NULL) 4921 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 4922 if (error == 0 && uap->outoffp != NULL) 4923 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 4924 return (error); 4925 } 4926