1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <fs/devfs/devfs.h> 91 92 #include <ufs/ufs/quota.h> 93 94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 95 96 static int kern_chflagsat(struct thread *td, int fd, const char *path, 97 enum uio_seg pathseg, u_long flags, int atflag); 98 static int setfflags(struct thread *td, struct vnode *, u_long); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int getutimens(const struct timespec *, enum uio_seg, 101 struct timespec *, int *); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 107 enum uio_seg pathseg, fhandle_t *fhp); 108 static int kern_getfhat(struct thread *td, int flags, int fd, 109 const char *path, enum uio_seg pathseg, fhandle_t *fhp); 110 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 111 size_t count, struct thread *td); 112 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 113 const char *path, enum uio_seg segflag); 114 115 int 116 kern_sync(struct thread *td) 117 { 118 struct mount *mp, *nmp; 119 int save; 120 121 mtx_lock(&mountlist_mtx); 122 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 123 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 124 nmp = TAILQ_NEXT(mp, mnt_list); 125 continue; 126 } 127 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 128 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 129 save = curthread_pflags_set(TDP_SYNCIO); 130 vfs_periodic(mp, MNT_NOWAIT); 131 VFS_SYNC(mp, MNT_NOWAIT); 132 curthread_pflags_restore(save); 133 vn_finished_write(mp); 134 } 135 mtx_lock(&mountlist_mtx); 136 nmp = TAILQ_NEXT(mp, mnt_list); 137 vfs_unbusy(mp); 138 } 139 mtx_unlock(&mountlist_mtx); 140 return (0); 141 } 142 143 /* 144 * Sync each mounted filesystem. 145 */ 146 #ifndef _SYS_SYSPROTO_H_ 147 struct sync_args { 148 int dummy; 149 }; 150 #endif 151 /* ARGSUSED */ 152 int 153 sys_sync(struct thread *td, struct sync_args *uap) 154 { 155 156 return (kern_sync(td)); 157 } 158 159 /* 160 * Change filesystem quotas. 161 */ 162 #ifndef _SYS_SYSPROTO_H_ 163 struct quotactl_args { 164 char *path; 165 int cmd; 166 int uid; 167 caddr_t arg; 168 }; 169 #endif 170 int 171 sys_quotactl(struct thread *td, struct quotactl_args *uap) 172 { 173 struct mount *mp; 174 struct nameidata nd; 175 int error; 176 177 AUDIT_ARG_CMD(uap->cmd); 178 AUDIT_ARG_UID(uap->uid); 179 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 180 return (EPERM); 181 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 182 uap->path, td); 183 if ((error = namei(&nd)) != 0) 184 return (error); 185 NDFREE(&nd, NDF_ONLY_PNBUF); 186 mp = nd.ni_vp->v_mount; 187 vfs_ref(mp); 188 vput(nd.ni_vp); 189 error = vfs_busy(mp, 0); 190 if (error != 0) { 191 vfs_rel(mp); 192 return (error); 193 } 194 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 195 196 /* 197 * Since quota on operation typically needs to open quota 198 * file, the Q_QUOTAON handler needs to unbusy the mount point 199 * before calling into namei. Otherwise, unmount might be 200 * started between two vfs_busy() invocations (first is our, 201 * second is from mount point cross-walk code in lookup()), 202 * causing deadlock. 203 * 204 * Require that Q_QUOTAON handles the vfs_busy() reference on 205 * its own, always returning with ubusied mount point. 206 */ 207 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 208 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 209 vfs_unbusy(mp); 210 vfs_rel(mp); 211 return (error); 212 } 213 214 /* 215 * Used by statfs conversion routines to scale the block size up if 216 * necessary so that all of the block counts are <= 'max_size'. Note 217 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 218 * value of 'n'. 219 */ 220 void 221 statfs_scale_blocks(struct statfs *sf, long max_size) 222 { 223 uint64_t count; 224 int shift; 225 226 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 227 228 /* 229 * Attempt to scale the block counts to give a more accurate 230 * overview to userland of the ratio of free space to used 231 * space. To do this, find the largest block count and compute 232 * a divisor that lets it fit into a signed integer <= max_size. 233 */ 234 if (sf->f_bavail < 0) 235 count = -sf->f_bavail; 236 else 237 count = sf->f_bavail; 238 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 239 if (count <= max_size) 240 return; 241 242 count >>= flsl(max_size); 243 shift = 0; 244 while (count > 0) { 245 shift++; 246 count >>=1; 247 } 248 249 sf->f_bsize <<= shift; 250 sf->f_blocks >>= shift; 251 sf->f_bfree >>= shift; 252 sf->f_bavail >>= shift; 253 } 254 255 static int 256 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 257 { 258 int error; 259 260 if (mp == NULL) 261 return (EBADF); 262 error = vfs_busy(mp, 0); 263 vfs_rel(mp); 264 if (error != 0) 265 return (error); 266 #ifdef MAC 267 error = mac_mount_check_stat(td->td_ucred, mp); 268 if (error != 0) 269 goto out; 270 #endif 271 error = VFS_STATFS(mp, buf); 272 if (error != 0) 273 goto out; 274 if (priv_check_cred_vfs_generation(td->td_ucred)) { 275 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 276 prison_enforce_statfs(td->td_ucred, mp, buf); 277 } 278 out: 279 vfs_unbusy(mp); 280 return (error); 281 } 282 283 /* 284 * Get filesystem statistics. 285 */ 286 #ifndef _SYS_SYSPROTO_H_ 287 struct statfs_args { 288 char *path; 289 struct statfs *buf; 290 }; 291 #endif 292 int 293 sys_statfs(struct thread *td, struct statfs_args *uap) 294 { 295 struct statfs *sfp; 296 int error; 297 298 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 299 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 300 if (error == 0) 301 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 302 free(sfp, M_STATFS); 303 return (error); 304 } 305 306 int 307 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 308 struct statfs *buf) 309 { 310 struct mount *mp; 311 struct nameidata nd; 312 int error; 313 314 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 315 pathseg, path, td); 316 error = namei(&nd); 317 if (error != 0) 318 return (error); 319 mp = nd.ni_vp->v_mount; 320 vfs_ref(mp); 321 NDFREE(&nd, NDF_ONLY_PNBUF); 322 vput(nd.ni_vp); 323 return (kern_do_statfs(td, mp, buf)); 324 } 325 326 /* 327 * Get filesystem statistics. 328 */ 329 #ifndef _SYS_SYSPROTO_H_ 330 struct fstatfs_args { 331 int fd; 332 struct statfs *buf; 333 }; 334 #endif 335 int 336 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 337 { 338 struct statfs *sfp; 339 int error; 340 341 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 342 error = kern_fstatfs(td, uap->fd, sfp); 343 if (error == 0) 344 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 345 free(sfp, M_STATFS); 346 return (error); 347 } 348 349 int 350 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 351 { 352 struct file *fp; 353 struct mount *mp; 354 struct vnode *vp; 355 int error; 356 357 AUDIT_ARG_FD(fd); 358 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 359 if (error != 0) 360 return (error); 361 vp = fp->f_vnode; 362 vn_lock(vp, LK_SHARED | LK_RETRY); 363 #ifdef AUDIT 364 AUDIT_ARG_VNODE1(vp); 365 #endif 366 mp = vp->v_mount; 367 if (mp != NULL) 368 vfs_ref(mp); 369 VOP_UNLOCK(vp); 370 fdrop(fp, td); 371 return (kern_do_statfs(td, mp, buf)); 372 } 373 374 /* 375 * Get statistics on all filesystems. 376 */ 377 #ifndef _SYS_SYSPROTO_H_ 378 struct getfsstat_args { 379 struct statfs *buf; 380 long bufsize; 381 int mode; 382 }; 383 #endif 384 int 385 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 386 { 387 size_t count; 388 int error; 389 390 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 391 return (EINVAL); 392 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 393 UIO_USERSPACE, uap->mode); 394 if (error == 0) 395 td->td_retval[0] = count; 396 return (error); 397 } 398 399 /* 400 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 401 * The caller is responsible for freeing memory which will be allocated 402 * in '*buf'. 403 */ 404 int 405 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 406 size_t *countp, enum uio_seg bufseg, int mode) 407 { 408 struct mount *mp, *nmp; 409 struct statfs *sfsp, *sp, *sptmp, *tofree; 410 size_t count, maxcount; 411 int error; 412 413 switch (mode) { 414 case MNT_WAIT: 415 case MNT_NOWAIT: 416 break; 417 default: 418 if (bufseg == UIO_SYSSPACE) 419 *buf = NULL; 420 return (EINVAL); 421 } 422 restart: 423 maxcount = bufsize / sizeof(struct statfs); 424 if (bufsize == 0) { 425 sfsp = NULL; 426 tofree = NULL; 427 } else if (bufseg == UIO_USERSPACE) { 428 sfsp = *buf; 429 tofree = NULL; 430 } else /* if (bufseg == UIO_SYSSPACE) */ { 431 count = 0; 432 mtx_lock(&mountlist_mtx); 433 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 434 count++; 435 } 436 mtx_unlock(&mountlist_mtx); 437 if (maxcount > count) 438 maxcount = count; 439 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 440 M_STATFS, M_WAITOK); 441 } 442 443 count = 0; 444 445 /* 446 * If there is no target buffer they only want the count. 447 * 448 * This could be TAILQ_FOREACH but it is open-coded to match the original 449 * code below. 450 */ 451 if (sfsp == NULL) { 452 mtx_lock(&mountlist_mtx); 453 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 454 if (prison_canseemount(td->td_ucred, mp) != 0) { 455 nmp = TAILQ_NEXT(mp, mnt_list); 456 continue; 457 } 458 #ifdef MAC 459 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 460 nmp = TAILQ_NEXT(mp, mnt_list); 461 continue; 462 } 463 #endif 464 count++; 465 nmp = TAILQ_NEXT(mp, mnt_list); 466 } 467 mtx_unlock(&mountlist_mtx); 468 *countp = count; 469 return (0); 470 } 471 472 /* 473 * They want the entire thing. 474 * 475 * Short-circuit the corner case of no room for anything, avoids 476 * relocking below. 477 */ 478 if (maxcount < 1) { 479 goto out; 480 } 481 482 mtx_lock(&mountlist_mtx); 483 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 484 if (prison_canseemount(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #ifdef MAC 489 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 #endif 494 if (mode == MNT_WAIT) { 495 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 496 /* 497 * If vfs_busy() failed, and MBF_NOWAIT 498 * wasn't passed, then the mp is gone. 499 * Furthermore, because of MBF_MNTLSTLOCK, 500 * the mountlist_mtx was dropped. We have 501 * no other choice than to start over. 502 */ 503 mtx_unlock(&mountlist_mtx); 504 free(tofree, M_STATFS); 505 goto restart; 506 } 507 } else { 508 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 continue; 511 } 512 } 513 sp = &mp->mnt_stat; 514 /* 515 * If MNT_NOWAIT is specified, do not refresh 516 * the fsstat cache. 517 */ 518 if (mode != MNT_NOWAIT) { 519 error = VFS_STATFS(mp, sp); 520 if (error != 0) { 521 mtx_lock(&mountlist_mtx); 522 nmp = TAILQ_NEXT(mp, mnt_list); 523 vfs_unbusy(mp); 524 continue; 525 } 526 } 527 if (priv_check_cred_vfs_generation(td->td_ucred)) { 528 sptmp = malloc(sizeof(struct statfs), M_STATFS, 529 M_WAITOK); 530 *sptmp = *sp; 531 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 532 prison_enforce_statfs(td->td_ucred, mp, sptmp); 533 sp = sptmp; 534 } else 535 sptmp = NULL; 536 if (bufseg == UIO_SYSSPACE) { 537 bcopy(sp, sfsp, sizeof(*sp)); 538 free(sptmp, M_STATFS); 539 } else /* if (bufseg == UIO_USERSPACE) */ { 540 error = copyout(sp, sfsp, sizeof(*sp)); 541 free(sptmp, M_STATFS); 542 if (error != 0) { 543 vfs_unbusy(mp); 544 return (error); 545 } 546 } 547 sfsp++; 548 count++; 549 550 if (count == maxcount) { 551 vfs_unbusy(mp); 552 goto out; 553 } 554 555 mtx_lock(&mountlist_mtx); 556 nmp = TAILQ_NEXT(mp, mnt_list); 557 vfs_unbusy(mp); 558 } 559 mtx_unlock(&mountlist_mtx); 560 out: 561 *countp = count; 562 return (0); 563 } 564 565 #ifdef COMPAT_FREEBSD4 566 /* 567 * Get old format filesystem statistics. 568 */ 569 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 570 571 #ifndef _SYS_SYSPROTO_H_ 572 struct freebsd4_statfs_args { 573 char *path; 574 struct ostatfs *buf; 575 }; 576 #endif 577 int 578 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 579 { 580 struct ostatfs osb; 581 struct statfs *sfp; 582 int error; 583 584 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 585 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 586 if (error == 0) { 587 freebsd4_cvtstatfs(sfp, &osb); 588 error = copyout(&osb, uap->buf, sizeof(osb)); 589 } 590 free(sfp, M_STATFS); 591 return (error); 592 } 593 594 /* 595 * Get filesystem statistics. 596 */ 597 #ifndef _SYS_SYSPROTO_H_ 598 struct freebsd4_fstatfs_args { 599 int fd; 600 struct ostatfs *buf; 601 }; 602 #endif 603 int 604 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 605 { 606 struct ostatfs osb; 607 struct statfs *sfp; 608 int error; 609 610 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 611 error = kern_fstatfs(td, uap->fd, sfp); 612 if (error == 0) { 613 freebsd4_cvtstatfs(sfp, &osb); 614 error = copyout(&osb, uap->buf, sizeof(osb)); 615 } 616 free(sfp, M_STATFS); 617 return (error); 618 } 619 620 /* 621 * Get statistics on all filesystems. 622 */ 623 #ifndef _SYS_SYSPROTO_H_ 624 struct freebsd4_getfsstat_args { 625 struct ostatfs *buf; 626 long bufsize; 627 int mode; 628 }; 629 #endif 630 int 631 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 632 { 633 struct statfs *buf, *sp; 634 struct ostatfs osb; 635 size_t count, size; 636 int error; 637 638 if (uap->bufsize < 0) 639 return (EINVAL); 640 count = uap->bufsize / sizeof(struct ostatfs); 641 if (count > SIZE_MAX / sizeof(struct statfs)) 642 return (EINVAL); 643 size = count * sizeof(struct statfs); 644 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 645 uap->mode); 646 if (error == 0) 647 td->td_retval[0] = count; 648 if (size != 0) { 649 sp = buf; 650 while (count != 0 && error == 0) { 651 freebsd4_cvtstatfs(sp, &osb); 652 error = copyout(&osb, uap->buf, sizeof(osb)); 653 sp++; 654 uap->buf++; 655 count--; 656 } 657 free(buf, M_STATFS); 658 } 659 return (error); 660 } 661 662 /* 663 * Implement fstatfs() for (NFS) file handles. 664 */ 665 #ifndef _SYS_SYSPROTO_H_ 666 struct freebsd4_fhstatfs_args { 667 struct fhandle *u_fhp; 668 struct ostatfs *buf; 669 }; 670 #endif 671 int 672 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 673 { 674 struct ostatfs osb; 675 struct statfs *sfp; 676 fhandle_t fh; 677 int error; 678 679 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 680 if (error != 0) 681 return (error); 682 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 683 error = kern_fhstatfs(td, fh, sfp); 684 if (error == 0) { 685 freebsd4_cvtstatfs(sfp, &osb); 686 error = copyout(&osb, uap->buf, sizeof(osb)); 687 } 688 free(sfp, M_STATFS); 689 return (error); 690 } 691 692 /* 693 * Convert a new format statfs structure to an old format statfs structure. 694 */ 695 static void 696 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 697 { 698 699 statfs_scale_blocks(nsp, LONG_MAX); 700 bzero(osp, sizeof(*osp)); 701 osp->f_bsize = nsp->f_bsize; 702 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 703 osp->f_blocks = nsp->f_blocks; 704 osp->f_bfree = nsp->f_bfree; 705 osp->f_bavail = nsp->f_bavail; 706 osp->f_files = MIN(nsp->f_files, LONG_MAX); 707 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 708 osp->f_owner = nsp->f_owner; 709 osp->f_type = nsp->f_type; 710 osp->f_flags = nsp->f_flags; 711 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 712 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 713 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 714 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 715 strlcpy(osp->f_fstypename, nsp->f_fstypename, 716 MIN(MFSNAMELEN, OMFSNAMELEN)); 717 strlcpy(osp->f_mntonname, nsp->f_mntonname, 718 MIN(MNAMELEN, OMNAMELEN)); 719 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 720 MIN(MNAMELEN, OMNAMELEN)); 721 osp->f_fsid = nsp->f_fsid; 722 } 723 #endif /* COMPAT_FREEBSD4 */ 724 725 #if defined(COMPAT_FREEBSD11) 726 /* 727 * Get old format filesystem statistics. 728 */ 729 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 730 731 int 732 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 733 { 734 struct freebsd11_statfs osb; 735 struct statfs *sfp; 736 int error; 737 738 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 739 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 740 if (error == 0) { 741 freebsd11_cvtstatfs(sfp, &osb); 742 error = copyout(&osb, uap->buf, sizeof(osb)); 743 } 744 free(sfp, M_STATFS); 745 return (error); 746 } 747 748 /* 749 * Get filesystem statistics. 750 */ 751 int 752 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 753 { 754 struct freebsd11_statfs osb; 755 struct statfs *sfp; 756 int error; 757 758 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 759 error = kern_fstatfs(td, uap->fd, sfp); 760 if (error == 0) { 761 freebsd11_cvtstatfs(sfp, &osb); 762 error = copyout(&osb, uap->buf, sizeof(osb)); 763 } 764 free(sfp, M_STATFS); 765 return (error); 766 } 767 768 /* 769 * Get statistics on all filesystems. 770 */ 771 int 772 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 773 { 774 struct freebsd11_statfs osb; 775 struct statfs *buf, *sp; 776 size_t count, size; 777 int error; 778 779 count = uap->bufsize / sizeof(struct ostatfs); 780 size = count * sizeof(struct statfs); 781 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 782 uap->mode); 783 if (error == 0) 784 td->td_retval[0] = count; 785 if (size > 0) { 786 sp = buf; 787 while (count > 0 && error == 0) { 788 freebsd11_cvtstatfs(sp, &osb); 789 error = copyout(&osb, uap->buf, sizeof(osb)); 790 sp++; 791 uap->buf++; 792 count--; 793 } 794 free(buf, M_STATFS); 795 } 796 return (error); 797 } 798 799 /* 800 * Implement fstatfs() for (NFS) file handles. 801 */ 802 int 803 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 804 { 805 struct freebsd11_statfs osb; 806 struct statfs *sfp; 807 fhandle_t fh; 808 int error; 809 810 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 811 if (error) 812 return (error); 813 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 814 error = kern_fhstatfs(td, fh, sfp); 815 if (error == 0) { 816 freebsd11_cvtstatfs(sfp, &osb); 817 error = copyout(&osb, uap->buf, sizeof(osb)); 818 } 819 free(sfp, M_STATFS); 820 return (error); 821 } 822 823 /* 824 * Convert a new format statfs structure to an old format statfs structure. 825 */ 826 static void 827 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 828 { 829 830 bzero(osp, sizeof(*osp)); 831 osp->f_version = FREEBSD11_STATFS_VERSION; 832 osp->f_type = nsp->f_type; 833 osp->f_flags = nsp->f_flags; 834 osp->f_bsize = nsp->f_bsize; 835 osp->f_iosize = nsp->f_iosize; 836 osp->f_blocks = nsp->f_blocks; 837 osp->f_bfree = nsp->f_bfree; 838 osp->f_bavail = nsp->f_bavail; 839 osp->f_files = nsp->f_files; 840 osp->f_ffree = nsp->f_ffree; 841 osp->f_syncwrites = nsp->f_syncwrites; 842 osp->f_asyncwrites = nsp->f_asyncwrites; 843 osp->f_syncreads = nsp->f_syncreads; 844 osp->f_asyncreads = nsp->f_asyncreads; 845 osp->f_namemax = nsp->f_namemax; 846 osp->f_owner = nsp->f_owner; 847 osp->f_fsid = nsp->f_fsid; 848 strlcpy(osp->f_fstypename, nsp->f_fstypename, 849 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 850 strlcpy(osp->f_mntonname, nsp->f_mntonname, 851 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 852 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 853 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 854 } 855 #endif /* COMPAT_FREEBSD11 */ 856 857 /* 858 * Change current working directory to a given file descriptor. 859 */ 860 #ifndef _SYS_SYSPROTO_H_ 861 struct fchdir_args { 862 int fd; 863 }; 864 #endif 865 int 866 sys_fchdir(struct thread *td, struct fchdir_args *uap) 867 { 868 struct vnode *vp, *tdp; 869 struct mount *mp; 870 struct file *fp; 871 int error; 872 873 AUDIT_ARG_FD(uap->fd); 874 error = getvnode(td, uap->fd, &cap_fchdir_rights, 875 &fp); 876 if (error != 0) 877 return (error); 878 vp = fp->f_vnode; 879 vrefact(vp); 880 fdrop(fp, td); 881 vn_lock(vp, LK_SHARED | LK_RETRY); 882 AUDIT_ARG_VNODE1(vp); 883 error = change_dir(vp, td); 884 while (!error && (mp = vp->v_mountedhere) != NULL) { 885 if (vfs_busy(mp, 0)) 886 continue; 887 error = VFS_ROOT(mp, LK_SHARED, &tdp); 888 vfs_unbusy(mp); 889 if (error != 0) 890 break; 891 vput(vp); 892 vp = tdp; 893 } 894 if (error != 0) { 895 vput(vp); 896 return (error); 897 } 898 VOP_UNLOCK(vp); 899 pwd_chdir(td, vp); 900 return (0); 901 } 902 903 /* 904 * Change current working directory (``.''). 905 */ 906 #ifndef _SYS_SYSPROTO_H_ 907 struct chdir_args { 908 char *path; 909 }; 910 #endif 911 int 912 sys_chdir(struct thread *td, struct chdir_args *uap) 913 { 914 915 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 916 } 917 918 int 919 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 920 { 921 struct nameidata nd; 922 int error; 923 924 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 925 pathseg, path, td); 926 if ((error = namei(&nd)) != 0) 927 return (error); 928 if ((error = change_dir(nd.ni_vp, td)) != 0) { 929 vput(nd.ni_vp); 930 NDFREE(&nd, NDF_ONLY_PNBUF); 931 return (error); 932 } 933 VOP_UNLOCK(nd.ni_vp); 934 NDFREE(&nd, NDF_ONLY_PNBUF); 935 pwd_chdir(td, nd.ni_vp); 936 return (0); 937 } 938 939 /* 940 * Change notion of root (``/'') directory. 941 */ 942 #ifndef _SYS_SYSPROTO_H_ 943 struct chroot_args { 944 char *path; 945 }; 946 #endif 947 int 948 sys_chroot(struct thread *td, struct chroot_args *uap) 949 { 950 struct nameidata nd; 951 int error; 952 953 error = priv_check(td, PRIV_VFS_CHROOT); 954 if (error != 0) 955 return (error); 956 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 957 UIO_USERSPACE, uap->path, td); 958 error = namei(&nd); 959 if (error != 0) 960 goto error; 961 error = change_dir(nd.ni_vp, td); 962 if (error != 0) 963 goto e_vunlock; 964 #ifdef MAC 965 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 966 if (error != 0) 967 goto e_vunlock; 968 #endif 969 VOP_UNLOCK(nd.ni_vp); 970 error = pwd_chroot(td, nd.ni_vp); 971 vrele(nd.ni_vp); 972 NDFREE(&nd, NDF_ONLY_PNBUF); 973 return (error); 974 e_vunlock: 975 vput(nd.ni_vp); 976 error: 977 NDFREE(&nd, NDF_ONLY_PNBUF); 978 return (error); 979 } 980 981 /* 982 * Common routine for chroot and chdir. Callers must provide a locked vnode 983 * instance. 984 */ 985 int 986 change_dir(struct vnode *vp, struct thread *td) 987 { 988 #ifdef MAC 989 int error; 990 #endif 991 992 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 993 if (vp->v_type != VDIR) 994 return (ENOTDIR); 995 #ifdef MAC 996 error = mac_vnode_check_chdir(td->td_ucred, vp); 997 if (error != 0) 998 return (error); 999 #endif 1000 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1001 } 1002 1003 static __inline void 1004 flags_to_rights(int flags, cap_rights_t *rightsp) 1005 { 1006 1007 if (flags & O_EXEC) { 1008 cap_rights_set_one(rightsp, CAP_FEXECVE); 1009 } else { 1010 switch ((flags & O_ACCMODE)) { 1011 case O_RDONLY: 1012 cap_rights_set_one(rightsp, CAP_READ); 1013 break; 1014 case O_RDWR: 1015 cap_rights_set_one(rightsp, CAP_READ); 1016 /* FALLTHROUGH */ 1017 case O_WRONLY: 1018 cap_rights_set_one(rightsp, CAP_WRITE); 1019 if (!(flags & (O_APPEND | O_TRUNC))) 1020 cap_rights_set_one(rightsp, CAP_SEEK); 1021 break; 1022 } 1023 } 1024 1025 if (flags & O_CREAT) 1026 cap_rights_set_one(rightsp, CAP_CREATE); 1027 1028 if (flags & O_TRUNC) 1029 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1030 1031 if (flags & (O_SYNC | O_FSYNC)) 1032 cap_rights_set_one(rightsp, CAP_FSYNC); 1033 1034 if (flags & (O_EXLOCK | O_SHLOCK)) 1035 cap_rights_set_one(rightsp, CAP_FLOCK); 1036 } 1037 1038 /* 1039 * Check permissions, allocate an open file structure, and call the device 1040 * open routine if any. 1041 */ 1042 #ifndef _SYS_SYSPROTO_H_ 1043 struct open_args { 1044 char *path; 1045 int flags; 1046 int mode; 1047 }; 1048 #endif 1049 int 1050 sys_open(struct thread *td, struct open_args *uap) 1051 { 1052 1053 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1054 uap->flags, uap->mode)); 1055 } 1056 1057 #ifndef _SYS_SYSPROTO_H_ 1058 struct openat_args { 1059 int fd; 1060 char *path; 1061 int flag; 1062 int mode; 1063 }; 1064 #endif 1065 int 1066 sys_openat(struct thread *td, struct openat_args *uap) 1067 { 1068 1069 AUDIT_ARG_FD(uap->fd); 1070 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1071 uap->mode)); 1072 } 1073 1074 int 1075 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1076 int flags, int mode) 1077 { 1078 struct proc *p = td->td_proc; 1079 struct filedesc *fdp = p->p_fd; 1080 struct file *fp; 1081 struct vnode *vp; 1082 struct nameidata nd; 1083 cap_rights_t rights; 1084 int cmode, error, indx; 1085 1086 indx = -1; 1087 1088 AUDIT_ARG_FFLAGS(flags); 1089 AUDIT_ARG_MODE(mode); 1090 cap_rights_init_one(&rights, CAP_LOOKUP); 1091 flags_to_rights(flags, &rights); 1092 /* 1093 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1094 * may be specified. 1095 */ 1096 if (flags & O_EXEC) { 1097 if (flags & O_ACCMODE) 1098 return (EINVAL); 1099 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1100 return (EINVAL); 1101 } else { 1102 flags = FFLAGS(flags); 1103 } 1104 1105 /* 1106 * Allocate a file structure. The descriptor to reference it 1107 * is allocated and set by finstall() below. 1108 */ 1109 error = falloc_noinstall(td, &fp); 1110 if (error != 0) 1111 return (error); 1112 /* 1113 * An extra reference on `fp' has been held for us by 1114 * falloc_noinstall(). 1115 */ 1116 /* Set the flags early so the finit in devfs can pick them up. */ 1117 fp->f_flag = flags & FMASK; 1118 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1119 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1120 &rights, td); 1121 td->td_dupfd = -1; /* XXX check for fdopen */ 1122 error = vn_open(&nd, &flags, cmode, fp); 1123 if (error != 0) { 1124 /* 1125 * If the vn_open replaced the method vector, something 1126 * wonderous happened deep below and we just pass it up 1127 * pretending we know what we do. 1128 */ 1129 if (error == ENXIO && fp->f_ops != &badfileops) 1130 goto success; 1131 1132 /* 1133 * Handle special fdopen() case. bleh. 1134 * 1135 * Don't do this for relative (capability) lookups; we don't 1136 * understand exactly what would happen, and we don't think 1137 * that it ever should. 1138 */ 1139 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1140 (error == ENODEV || error == ENXIO) && 1141 td->td_dupfd >= 0) { 1142 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1143 &indx); 1144 if (error == 0) 1145 goto success; 1146 } 1147 1148 goto bad; 1149 } 1150 td->td_dupfd = 0; 1151 NDFREE(&nd, NDF_ONLY_PNBUF); 1152 vp = nd.ni_vp; 1153 1154 /* 1155 * Store the vnode, for any f_type. Typically, the vnode use 1156 * count is decremented by direct call to vn_closefile() for 1157 * files that switched type in the cdevsw fdopen() method. 1158 */ 1159 fp->f_vnode = vp; 1160 /* 1161 * If the file wasn't claimed by devfs bind it to the normal 1162 * vnode operations here. 1163 */ 1164 if (fp->f_ops == &badfileops) { 1165 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1166 finit_vnode(fp, flags, NULL, &vnops); 1167 } 1168 1169 VOP_UNLOCK(vp); 1170 if (flags & O_TRUNC) { 1171 error = fo_truncate(fp, 0, td->td_ucred, td); 1172 if (error != 0) 1173 goto bad; 1174 } 1175 success: 1176 /* 1177 * If we haven't already installed the FD (for dupfdopen), do so now. 1178 */ 1179 if (indx == -1) { 1180 struct filecaps *fcaps; 1181 1182 #ifdef CAPABILITIES 1183 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1184 fcaps = &nd.ni_filecaps; 1185 else 1186 #endif 1187 fcaps = NULL; 1188 error = finstall(td, fp, &indx, flags, fcaps); 1189 /* On success finstall() consumes fcaps. */ 1190 if (error != 0) { 1191 filecaps_free(&nd.ni_filecaps); 1192 goto bad; 1193 } 1194 } else { 1195 filecaps_free(&nd.ni_filecaps); 1196 } 1197 1198 /* 1199 * Release our private reference, leaving the one associated with 1200 * the descriptor table intact. 1201 */ 1202 fdrop(fp, td); 1203 td->td_retval[0] = indx; 1204 return (0); 1205 bad: 1206 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1207 fdrop(fp, td); 1208 return (error); 1209 } 1210 1211 #ifdef COMPAT_43 1212 /* 1213 * Create a file. 1214 */ 1215 #ifndef _SYS_SYSPROTO_H_ 1216 struct ocreat_args { 1217 char *path; 1218 int mode; 1219 }; 1220 #endif 1221 int 1222 ocreat(struct thread *td, struct ocreat_args *uap) 1223 { 1224 1225 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1226 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1227 } 1228 #endif /* COMPAT_43 */ 1229 1230 /* 1231 * Create a special file. 1232 */ 1233 #ifndef _SYS_SYSPROTO_H_ 1234 struct mknodat_args { 1235 int fd; 1236 char *path; 1237 mode_t mode; 1238 dev_t dev; 1239 }; 1240 #endif 1241 int 1242 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1243 { 1244 1245 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1246 uap->dev)); 1247 } 1248 1249 #if defined(COMPAT_FREEBSD11) 1250 int 1251 freebsd11_mknod(struct thread *td, 1252 struct freebsd11_mknod_args *uap) 1253 { 1254 1255 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1256 uap->mode, uap->dev)); 1257 } 1258 1259 int 1260 freebsd11_mknodat(struct thread *td, 1261 struct freebsd11_mknodat_args *uap) 1262 { 1263 1264 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1265 uap->dev)); 1266 } 1267 #endif /* COMPAT_FREEBSD11 */ 1268 1269 int 1270 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1271 int mode, dev_t dev) 1272 { 1273 struct vnode *vp; 1274 struct mount *mp; 1275 struct vattr vattr; 1276 struct nameidata nd; 1277 int error, whiteout = 0; 1278 1279 AUDIT_ARG_MODE(mode); 1280 AUDIT_ARG_DEV(dev); 1281 switch (mode & S_IFMT) { 1282 case S_IFCHR: 1283 case S_IFBLK: 1284 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1285 if (error == 0 && dev == VNOVAL) 1286 error = EINVAL; 1287 break; 1288 case S_IFWHT: 1289 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1290 break; 1291 case S_IFIFO: 1292 if (dev == 0) 1293 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1294 /* FALLTHROUGH */ 1295 default: 1296 error = EINVAL; 1297 break; 1298 } 1299 if (error != 0) 1300 return (error); 1301 restart: 1302 bwillwrite(); 1303 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1304 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1305 td); 1306 if ((error = namei(&nd)) != 0) 1307 return (error); 1308 vp = nd.ni_vp; 1309 if (vp != NULL) { 1310 NDFREE(&nd, NDF_ONLY_PNBUF); 1311 if (vp == nd.ni_dvp) 1312 vrele(nd.ni_dvp); 1313 else 1314 vput(nd.ni_dvp); 1315 vrele(vp); 1316 return (EEXIST); 1317 } else { 1318 VATTR_NULL(&vattr); 1319 vattr.va_mode = (mode & ALLPERMS) & 1320 ~td->td_proc->p_fd->fd_cmask; 1321 vattr.va_rdev = dev; 1322 whiteout = 0; 1323 1324 switch (mode & S_IFMT) { 1325 case S_IFCHR: 1326 vattr.va_type = VCHR; 1327 break; 1328 case S_IFBLK: 1329 vattr.va_type = VBLK; 1330 break; 1331 case S_IFWHT: 1332 whiteout = 1; 1333 break; 1334 default: 1335 panic("kern_mknod: invalid mode"); 1336 } 1337 } 1338 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1339 NDFREE(&nd, NDF_ONLY_PNBUF); 1340 vput(nd.ni_dvp); 1341 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1342 return (error); 1343 goto restart; 1344 } 1345 #ifdef MAC 1346 if (error == 0 && !whiteout) 1347 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1348 &nd.ni_cnd, &vattr); 1349 #endif 1350 if (error == 0) { 1351 if (whiteout) 1352 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1353 else { 1354 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1355 &nd.ni_cnd, &vattr); 1356 if (error == 0) 1357 vput(nd.ni_vp); 1358 } 1359 } 1360 NDFREE(&nd, NDF_ONLY_PNBUF); 1361 vput(nd.ni_dvp); 1362 vn_finished_write(mp); 1363 return (error); 1364 } 1365 1366 /* 1367 * Create a named pipe. 1368 */ 1369 #ifndef _SYS_SYSPROTO_H_ 1370 struct mkfifo_args { 1371 char *path; 1372 int mode; 1373 }; 1374 #endif 1375 int 1376 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1377 { 1378 1379 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1380 uap->mode)); 1381 } 1382 1383 #ifndef _SYS_SYSPROTO_H_ 1384 struct mkfifoat_args { 1385 int fd; 1386 char *path; 1387 mode_t mode; 1388 }; 1389 #endif 1390 int 1391 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1392 { 1393 1394 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1395 uap->mode)); 1396 } 1397 1398 int 1399 kern_mkfifoat(struct thread *td, int fd, const char *path, 1400 enum uio_seg pathseg, int mode) 1401 { 1402 struct mount *mp; 1403 struct vattr vattr; 1404 struct nameidata nd; 1405 int error; 1406 1407 AUDIT_ARG_MODE(mode); 1408 restart: 1409 bwillwrite(); 1410 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1411 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1412 td); 1413 if ((error = namei(&nd)) != 0) 1414 return (error); 1415 if (nd.ni_vp != NULL) { 1416 NDFREE(&nd, NDF_ONLY_PNBUF); 1417 if (nd.ni_vp == nd.ni_dvp) 1418 vrele(nd.ni_dvp); 1419 else 1420 vput(nd.ni_dvp); 1421 vrele(nd.ni_vp); 1422 return (EEXIST); 1423 } 1424 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1425 NDFREE(&nd, NDF_ONLY_PNBUF); 1426 vput(nd.ni_dvp); 1427 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1428 return (error); 1429 goto restart; 1430 } 1431 VATTR_NULL(&vattr); 1432 vattr.va_type = VFIFO; 1433 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1434 #ifdef MAC 1435 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1436 &vattr); 1437 if (error != 0) 1438 goto out; 1439 #endif 1440 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1441 if (error == 0) 1442 vput(nd.ni_vp); 1443 #ifdef MAC 1444 out: 1445 #endif 1446 vput(nd.ni_dvp); 1447 vn_finished_write(mp); 1448 NDFREE(&nd, NDF_ONLY_PNBUF); 1449 return (error); 1450 } 1451 1452 /* 1453 * Make a hard file link. 1454 */ 1455 #ifndef _SYS_SYSPROTO_H_ 1456 struct link_args { 1457 char *path; 1458 char *link; 1459 }; 1460 #endif 1461 int 1462 sys_link(struct thread *td, struct link_args *uap) 1463 { 1464 1465 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1466 UIO_USERSPACE, FOLLOW)); 1467 } 1468 1469 #ifndef _SYS_SYSPROTO_H_ 1470 struct linkat_args { 1471 int fd1; 1472 char *path1; 1473 int fd2; 1474 char *path2; 1475 int flag; 1476 }; 1477 #endif 1478 int 1479 sys_linkat(struct thread *td, struct linkat_args *uap) 1480 { 1481 int flag; 1482 1483 flag = uap->flag; 1484 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH)) != 0) 1485 return (EINVAL); 1486 1487 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1488 UIO_USERSPACE, ((flag & AT_SYMLINK_FOLLOW) != 0 ? FOLLOW : 1489 NOFOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0))); 1490 } 1491 1492 int hardlink_check_uid = 0; 1493 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1494 &hardlink_check_uid, 0, 1495 "Unprivileged processes cannot create hard links to files owned by other " 1496 "users"); 1497 static int hardlink_check_gid = 0; 1498 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1499 &hardlink_check_gid, 0, 1500 "Unprivileged processes cannot create hard links to files owned by other " 1501 "groups"); 1502 1503 static int 1504 can_hardlink(struct vnode *vp, struct ucred *cred) 1505 { 1506 struct vattr va; 1507 int error; 1508 1509 if (!hardlink_check_uid && !hardlink_check_gid) 1510 return (0); 1511 1512 error = VOP_GETATTR(vp, &va, cred); 1513 if (error != 0) 1514 return (error); 1515 1516 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1517 error = priv_check_cred(cred, PRIV_VFS_LINK); 1518 if (error != 0) 1519 return (error); 1520 } 1521 1522 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1523 error = priv_check_cred(cred, PRIV_VFS_LINK); 1524 if (error != 0) 1525 return (error); 1526 } 1527 1528 return (0); 1529 } 1530 1531 int 1532 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1533 const char *path2, enum uio_seg segflag, int follow) 1534 { 1535 struct nameidata nd; 1536 int error; 1537 1538 do { 1539 bwillwrite(); 1540 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag, 1541 path1, fd1, &cap_linkat_source_rights, td); 1542 if ((error = namei(&nd)) != 0) 1543 return (error); 1544 NDFREE(&nd, NDF_ONLY_PNBUF); 1545 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1546 } while (error == EAGAIN); 1547 return (error); 1548 } 1549 1550 static int 1551 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1552 enum uio_seg segflag) 1553 { 1554 struct nameidata nd; 1555 struct mount *mp; 1556 int error; 1557 1558 if (vp->v_type == VDIR) { 1559 vrele(vp); 1560 return (EPERM); /* POSIX */ 1561 } 1562 NDINIT_ATRIGHTS(&nd, CREATE, 1563 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1564 &cap_linkat_target_rights, td); 1565 if ((error = namei(&nd)) == 0) { 1566 if (nd.ni_vp != NULL) { 1567 NDFREE(&nd, NDF_ONLY_PNBUF); 1568 if (nd.ni_dvp == nd.ni_vp) 1569 vrele(nd.ni_dvp); 1570 else 1571 vput(nd.ni_dvp); 1572 vrele(nd.ni_vp); 1573 vrele(vp); 1574 return (EEXIST); 1575 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1576 /* 1577 * Cross-device link. No need to recheck 1578 * vp->v_type, since it cannot change, except 1579 * to VBAD. 1580 */ 1581 NDFREE(&nd, NDF_ONLY_PNBUF); 1582 vput(nd.ni_dvp); 1583 vrele(vp); 1584 return (EXDEV); 1585 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1586 error = can_hardlink(vp, td->td_ucred); 1587 #ifdef MAC 1588 if (error == 0) 1589 error = mac_vnode_check_link(td->td_ucred, 1590 nd.ni_dvp, vp, &nd.ni_cnd); 1591 #endif 1592 if (error != 0) { 1593 vput(vp); 1594 vput(nd.ni_dvp); 1595 NDFREE(&nd, NDF_ONLY_PNBUF); 1596 return (error); 1597 } 1598 error = vn_start_write(vp, &mp, V_NOWAIT); 1599 if (error != 0) { 1600 vput(vp); 1601 vput(nd.ni_dvp); 1602 NDFREE(&nd, NDF_ONLY_PNBUF); 1603 error = vn_start_write(NULL, &mp, 1604 V_XSLEEP | PCATCH); 1605 if (error != 0) 1606 return (error); 1607 return (EAGAIN); 1608 } 1609 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1610 VOP_UNLOCK(vp); 1611 vput(nd.ni_dvp); 1612 vn_finished_write(mp); 1613 NDFREE(&nd, NDF_ONLY_PNBUF); 1614 } else { 1615 vput(nd.ni_dvp); 1616 NDFREE(&nd, NDF_ONLY_PNBUF); 1617 vrele(vp); 1618 return (EAGAIN); 1619 } 1620 } 1621 vrele(vp); 1622 return (error); 1623 } 1624 1625 /* 1626 * Make a symbolic link. 1627 */ 1628 #ifndef _SYS_SYSPROTO_H_ 1629 struct symlink_args { 1630 char *path; 1631 char *link; 1632 }; 1633 #endif 1634 int 1635 sys_symlink(struct thread *td, struct symlink_args *uap) 1636 { 1637 1638 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1639 UIO_USERSPACE)); 1640 } 1641 1642 #ifndef _SYS_SYSPROTO_H_ 1643 struct symlinkat_args { 1644 char *path; 1645 int fd; 1646 char *path2; 1647 }; 1648 #endif 1649 int 1650 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1651 { 1652 1653 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1654 UIO_USERSPACE)); 1655 } 1656 1657 int 1658 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1659 enum uio_seg segflg) 1660 { 1661 struct mount *mp; 1662 struct vattr vattr; 1663 const char *syspath; 1664 char *tmppath; 1665 struct nameidata nd; 1666 int error; 1667 1668 if (segflg == UIO_SYSSPACE) { 1669 syspath = path1; 1670 } else { 1671 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1672 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1673 goto out; 1674 syspath = tmppath; 1675 } 1676 AUDIT_ARG_TEXT(syspath); 1677 restart: 1678 bwillwrite(); 1679 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1680 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1681 td); 1682 if ((error = namei(&nd)) != 0) 1683 goto out; 1684 if (nd.ni_vp) { 1685 NDFREE(&nd, NDF_ONLY_PNBUF); 1686 if (nd.ni_vp == nd.ni_dvp) 1687 vrele(nd.ni_dvp); 1688 else 1689 vput(nd.ni_dvp); 1690 vrele(nd.ni_vp); 1691 error = EEXIST; 1692 goto out; 1693 } 1694 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1695 NDFREE(&nd, NDF_ONLY_PNBUF); 1696 vput(nd.ni_dvp); 1697 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1698 goto out; 1699 goto restart; 1700 } 1701 VATTR_NULL(&vattr); 1702 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1703 #ifdef MAC 1704 vattr.va_type = VLNK; 1705 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1706 &vattr); 1707 if (error != 0) 1708 goto out2; 1709 #endif 1710 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1711 if (error == 0) 1712 vput(nd.ni_vp); 1713 #ifdef MAC 1714 out2: 1715 #endif 1716 NDFREE(&nd, NDF_ONLY_PNBUF); 1717 vput(nd.ni_dvp); 1718 vn_finished_write(mp); 1719 out: 1720 if (segflg != UIO_SYSSPACE) 1721 uma_zfree(namei_zone, tmppath); 1722 return (error); 1723 } 1724 1725 /* 1726 * Delete a whiteout from the filesystem. 1727 */ 1728 #ifndef _SYS_SYSPROTO_H_ 1729 struct undelete_args { 1730 char *path; 1731 }; 1732 #endif 1733 int 1734 sys_undelete(struct thread *td, struct undelete_args *uap) 1735 { 1736 struct mount *mp; 1737 struct nameidata nd; 1738 int error; 1739 1740 restart: 1741 bwillwrite(); 1742 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1743 UIO_USERSPACE, uap->path, td); 1744 error = namei(&nd); 1745 if (error != 0) 1746 return (error); 1747 1748 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1749 NDFREE(&nd, NDF_ONLY_PNBUF); 1750 if (nd.ni_vp == nd.ni_dvp) 1751 vrele(nd.ni_dvp); 1752 else 1753 vput(nd.ni_dvp); 1754 if (nd.ni_vp) 1755 vrele(nd.ni_vp); 1756 return (EEXIST); 1757 } 1758 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1759 NDFREE(&nd, NDF_ONLY_PNBUF); 1760 vput(nd.ni_dvp); 1761 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1762 return (error); 1763 goto restart; 1764 } 1765 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1766 NDFREE(&nd, NDF_ONLY_PNBUF); 1767 vput(nd.ni_dvp); 1768 vn_finished_write(mp); 1769 return (error); 1770 } 1771 1772 /* 1773 * Delete a name from the filesystem. 1774 */ 1775 #ifndef _SYS_SYSPROTO_H_ 1776 struct unlink_args { 1777 char *path; 1778 }; 1779 #endif 1780 int 1781 sys_unlink(struct thread *td, struct unlink_args *uap) 1782 { 1783 1784 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1785 0, 0)); 1786 } 1787 1788 static int 1789 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1790 int flag, enum uio_seg pathseg, ino_t oldinum) 1791 { 1792 1793 if ((flag & ~AT_REMOVEDIR) != 0) 1794 return (EINVAL); 1795 1796 if ((flag & AT_REMOVEDIR) != 0) 1797 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1798 1799 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1800 } 1801 1802 #ifndef _SYS_SYSPROTO_H_ 1803 struct unlinkat_args { 1804 int fd; 1805 char *path; 1806 int flag; 1807 }; 1808 #endif 1809 int 1810 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1811 { 1812 1813 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1814 UIO_USERSPACE, 0)); 1815 } 1816 1817 #ifndef _SYS_SYSPROTO_H_ 1818 struct funlinkat_args { 1819 int dfd; 1820 const char *path; 1821 int fd; 1822 int flag; 1823 }; 1824 #endif 1825 int 1826 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1827 { 1828 1829 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1830 UIO_USERSPACE, 0)); 1831 } 1832 1833 int 1834 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1835 enum uio_seg pathseg, int flag, ino_t oldinum) 1836 { 1837 struct mount *mp; 1838 struct file *fp; 1839 struct vnode *vp; 1840 struct nameidata nd; 1841 struct stat sb; 1842 int error; 1843 1844 fp = NULL; 1845 if (fd != FD_NONE) { 1846 error = getvnode(td, fd, &cap_no_rights, &fp); 1847 if (error != 0) 1848 return (error); 1849 } 1850 1851 restart: 1852 bwillwrite(); 1853 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1854 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 1855 pathseg, path, dfd, &cap_unlinkat_rights, td); 1856 if ((error = namei(&nd)) != 0) { 1857 if (error == EINVAL) 1858 error = EPERM; 1859 goto fdout; 1860 } 1861 vp = nd.ni_vp; 1862 if (vp->v_type == VDIR && oldinum == 0) { 1863 error = EPERM; /* POSIX */ 1864 } else if (oldinum != 0 && 1865 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1866 sb.st_ino != oldinum) { 1867 error = EIDRM; /* Identifier removed */ 1868 } else if (fp != NULL && fp->f_vnode != vp) { 1869 if (VN_IS_DOOMED(fp->f_vnode)) 1870 error = EBADF; 1871 else 1872 error = EDEADLK; 1873 } else { 1874 /* 1875 * The root of a mounted filesystem cannot be deleted. 1876 * 1877 * XXX: can this only be a VDIR case? 1878 */ 1879 if (vp->v_vflag & VV_ROOT) 1880 error = EBUSY; 1881 } 1882 if (error == 0) { 1883 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1884 NDFREE(&nd, NDF_ONLY_PNBUF); 1885 vput(nd.ni_dvp); 1886 if (vp == nd.ni_dvp) 1887 vrele(vp); 1888 else 1889 vput(vp); 1890 if ((error = vn_start_write(NULL, &mp, 1891 V_XSLEEP | PCATCH)) != 0) { 1892 goto fdout; 1893 } 1894 goto restart; 1895 } 1896 #ifdef MAC 1897 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1898 &nd.ni_cnd); 1899 if (error != 0) 1900 goto out; 1901 #endif 1902 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1903 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1904 #ifdef MAC 1905 out: 1906 #endif 1907 vn_finished_write(mp); 1908 } 1909 NDFREE(&nd, NDF_ONLY_PNBUF); 1910 vput(nd.ni_dvp); 1911 if (vp == nd.ni_dvp) 1912 vrele(vp); 1913 else 1914 vput(vp); 1915 fdout: 1916 if (fp != NULL) 1917 fdrop(fp, td); 1918 return (error); 1919 } 1920 1921 /* 1922 * Reposition read/write file offset. 1923 */ 1924 #ifndef _SYS_SYSPROTO_H_ 1925 struct lseek_args { 1926 int fd; 1927 int pad; 1928 off_t offset; 1929 int whence; 1930 }; 1931 #endif 1932 int 1933 sys_lseek(struct thread *td, struct lseek_args *uap) 1934 { 1935 1936 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1937 } 1938 1939 int 1940 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1941 { 1942 struct file *fp; 1943 int error; 1944 1945 AUDIT_ARG_FD(fd); 1946 error = fget(td, fd, &cap_seek_rights, &fp); 1947 if (error != 0) 1948 return (error); 1949 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1950 fo_seek(fp, offset, whence, td) : ESPIPE; 1951 fdrop(fp, td); 1952 return (error); 1953 } 1954 1955 #if defined(COMPAT_43) 1956 /* 1957 * Reposition read/write file offset. 1958 */ 1959 #ifndef _SYS_SYSPROTO_H_ 1960 struct olseek_args { 1961 int fd; 1962 long offset; 1963 int whence; 1964 }; 1965 #endif 1966 int 1967 olseek(struct thread *td, struct olseek_args *uap) 1968 { 1969 1970 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1971 } 1972 #endif /* COMPAT_43 */ 1973 1974 #if defined(COMPAT_FREEBSD6) 1975 /* Version with the 'pad' argument */ 1976 int 1977 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1978 { 1979 1980 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1981 } 1982 #endif 1983 1984 /* 1985 * Check access permissions using passed credentials. 1986 */ 1987 static int 1988 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1989 struct thread *td) 1990 { 1991 accmode_t accmode; 1992 int error; 1993 1994 /* Flags == 0 means only check for existence. */ 1995 if (user_flags == 0) 1996 return (0); 1997 1998 accmode = 0; 1999 if (user_flags & R_OK) 2000 accmode |= VREAD; 2001 if (user_flags & W_OK) 2002 accmode |= VWRITE; 2003 if (user_flags & X_OK) 2004 accmode |= VEXEC; 2005 #ifdef MAC 2006 error = mac_vnode_check_access(cred, vp, accmode); 2007 if (error != 0) 2008 return (error); 2009 #endif 2010 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2011 error = VOP_ACCESS(vp, accmode, cred, td); 2012 return (error); 2013 } 2014 2015 /* 2016 * Check access permissions using "real" credentials. 2017 */ 2018 #ifndef _SYS_SYSPROTO_H_ 2019 struct access_args { 2020 char *path; 2021 int amode; 2022 }; 2023 #endif 2024 int 2025 sys_access(struct thread *td, struct access_args *uap) 2026 { 2027 2028 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2029 0, uap->amode)); 2030 } 2031 2032 #ifndef _SYS_SYSPROTO_H_ 2033 struct faccessat_args { 2034 int dirfd; 2035 char *path; 2036 int amode; 2037 int flag; 2038 } 2039 #endif 2040 int 2041 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2042 { 2043 2044 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2045 uap->amode)); 2046 } 2047 2048 int 2049 kern_accessat(struct thread *td, int fd, const char *path, 2050 enum uio_seg pathseg, int flag, int amode) 2051 { 2052 struct ucred *cred, *usecred; 2053 struct vnode *vp; 2054 struct nameidata nd; 2055 int error; 2056 2057 if ((flag & ~(AT_EACCESS | AT_BENEATH)) != 0) 2058 return (EINVAL); 2059 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2060 return (EINVAL); 2061 2062 /* 2063 * Create and modify a temporary credential instead of one that 2064 * is potentially shared (if we need one). 2065 */ 2066 cred = td->td_ucred; 2067 if ((flag & AT_EACCESS) == 0 && 2068 ((cred->cr_uid != cred->cr_ruid || 2069 cred->cr_rgid != cred->cr_groups[0]))) { 2070 usecred = crdup(cred); 2071 usecred->cr_uid = cred->cr_ruid; 2072 usecred->cr_groups[0] = cred->cr_rgid; 2073 td->td_ucred = usecred; 2074 } else 2075 usecred = cred; 2076 AUDIT_ARG_VALUE(amode); 2077 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2078 AUDITVNODE1 | ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 2079 pathseg, path, fd, &cap_fstat_rights, td); 2080 if ((error = namei(&nd)) != 0) 2081 goto out; 2082 vp = nd.ni_vp; 2083 2084 error = vn_access(vp, amode, usecred, td); 2085 NDFREE(&nd, NDF_ONLY_PNBUF); 2086 vput(vp); 2087 out: 2088 if (usecred != cred) { 2089 td->td_ucred = cred; 2090 crfree(usecred); 2091 } 2092 return (error); 2093 } 2094 2095 /* 2096 * Check access permissions using "effective" credentials. 2097 */ 2098 #ifndef _SYS_SYSPROTO_H_ 2099 struct eaccess_args { 2100 char *path; 2101 int amode; 2102 }; 2103 #endif 2104 int 2105 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2106 { 2107 2108 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2109 AT_EACCESS, uap->amode)); 2110 } 2111 2112 #if defined(COMPAT_43) 2113 /* 2114 * Get file status; this version follows links. 2115 */ 2116 #ifndef _SYS_SYSPROTO_H_ 2117 struct ostat_args { 2118 char *path; 2119 struct ostat *ub; 2120 }; 2121 #endif 2122 int 2123 ostat(struct thread *td, struct ostat_args *uap) 2124 { 2125 struct stat sb; 2126 struct ostat osb; 2127 int error; 2128 2129 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2130 &sb, NULL); 2131 if (error != 0) 2132 return (error); 2133 cvtstat(&sb, &osb); 2134 return (copyout(&osb, uap->ub, sizeof (osb))); 2135 } 2136 2137 /* 2138 * Get file status; this version does not follow links. 2139 */ 2140 #ifndef _SYS_SYSPROTO_H_ 2141 struct olstat_args { 2142 char *path; 2143 struct ostat *ub; 2144 }; 2145 #endif 2146 int 2147 olstat(struct thread *td, struct olstat_args *uap) 2148 { 2149 struct stat sb; 2150 struct ostat osb; 2151 int error; 2152 2153 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2154 UIO_USERSPACE, &sb, NULL); 2155 if (error != 0) 2156 return (error); 2157 cvtstat(&sb, &osb); 2158 return (copyout(&osb, uap->ub, sizeof (osb))); 2159 } 2160 2161 /* 2162 * Convert from an old to a new stat structure. 2163 * XXX: many values are blindly truncated. 2164 */ 2165 void 2166 cvtstat(struct stat *st, struct ostat *ost) 2167 { 2168 2169 bzero(ost, sizeof(*ost)); 2170 ost->st_dev = st->st_dev; 2171 ost->st_ino = st->st_ino; 2172 ost->st_mode = st->st_mode; 2173 ost->st_nlink = st->st_nlink; 2174 ost->st_uid = st->st_uid; 2175 ost->st_gid = st->st_gid; 2176 ost->st_rdev = st->st_rdev; 2177 ost->st_size = MIN(st->st_size, INT32_MAX); 2178 ost->st_atim = st->st_atim; 2179 ost->st_mtim = st->st_mtim; 2180 ost->st_ctim = st->st_ctim; 2181 ost->st_blksize = st->st_blksize; 2182 ost->st_blocks = st->st_blocks; 2183 ost->st_flags = st->st_flags; 2184 ost->st_gen = st->st_gen; 2185 } 2186 #endif /* COMPAT_43 */ 2187 2188 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2189 int ino64_trunc_error; 2190 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2191 &ino64_trunc_error, 0, 2192 "Error on truncation of device, file or inode number, or link count"); 2193 2194 int 2195 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2196 { 2197 2198 ost->st_dev = st->st_dev; 2199 if (ost->st_dev != st->st_dev) { 2200 switch (ino64_trunc_error) { 2201 default: 2202 /* 2203 * Since dev_t is almost raw, don't clamp to the 2204 * maximum for case 2, but ignore the error. 2205 */ 2206 break; 2207 case 1: 2208 return (EOVERFLOW); 2209 } 2210 } 2211 ost->st_ino = st->st_ino; 2212 if (ost->st_ino != st->st_ino) { 2213 switch (ino64_trunc_error) { 2214 default: 2215 case 0: 2216 break; 2217 case 1: 2218 return (EOVERFLOW); 2219 case 2: 2220 ost->st_ino = UINT32_MAX; 2221 break; 2222 } 2223 } 2224 ost->st_mode = st->st_mode; 2225 ost->st_nlink = st->st_nlink; 2226 if (ost->st_nlink != st->st_nlink) { 2227 switch (ino64_trunc_error) { 2228 default: 2229 case 0: 2230 break; 2231 case 1: 2232 return (EOVERFLOW); 2233 case 2: 2234 ost->st_nlink = UINT16_MAX; 2235 break; 2236 } 2237 } 2238 ost->st_uid = st->st_uid; 2239 ost->st_gid = st->st_gid; 2240 ost->st_rdev = st->st_rdev; 2241 if (ost->st_rdev != st->st_rdev) { 2242 switch (ino64_trunc_error) { 2243 default: 2244 break; 2245 case 1: 2246 return (EOVERFLOW); 2247 } 2248 } 2249 ost->st_atim = st->st_atim; 2250 ost->st_mtim = st->st_mtim; 2251 ost->st_ctim = st->st_ctim; 2252 ost->st_size = st->st_size; 2253 ost->st_blocks = st->st_blocks; 2254 ost->st_blksize = st->st_blksize; 2255 ost->st_flags = st->st_flags; 2256 ost->st_gen = st->st_gen; 2257 ost->st_lspare = 0; 2258 ost->st_birthtim = st->st_birthtim; 2259 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2260 sizeof(*ost) - offsetof(struct freebsd11_stat, 2261 st_birthtim) - sizeof(ost->st_birthtim)); 2262 return (0); 2263 } 2264 2265 int 2266 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2267 { 2268 struct stat sb; 2269 struct freebsd11_stat osb; 2270 int error; 2271 2272 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2273 &sb, NULL); 2274 if (error != 0) 2275 return (error); 2276 error = freebsd11_cvtstat(&sb, &osb); 2277 if (error == 0) 2278 error = copyout(&osb, uap->ub, sizeof(osb)); 2279 return (error); 2280 } 2281 2282 int 2283 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2284 { 2285 struct stat sb; 2286 struct freebsd11_stat osb; 2287 int error; 2288 2289 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2290 UIO_USERSPACE, &sb, NULL); 2291 if (error != 0) 2292 return (error); 2293 error = freebsd11_cvtstat(&sb, &osb); 2294 if (error == 0) 2295 error = copyout(&osb, uap->ub, sizeof(osb)); 2296 return (error); 2297 } 2298 2299 int 2300 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2301 { 2302 struct fhandle fh; 2303 struct stat sb; 2304 struct freebsd11_stat osb; 2305 int error; 2306 2307 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2308 if (error != 0) 2309 return (error); 2310 error = kern_fhstat(td, fh, &sb); 2311 if (error != 0) 2312 return (error); 2313 error = freebsd11_cvtstat(&sb, &osb); 2314 if (error == 0) 2315 error = copyout(&osb, uap->sb, sizeof(osb)); 2316 return (error); 2317 } 2318 2319 int 2320 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2321 { 2322 struct stat sb; 2323 struct freebsd11_stat osb; 2324 int error; 2325 2326 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2327 UIO_USERSPACE, &sb, NULL); 2328 if (error != 0) 2329 return (error); 2330 error = freebsd11_cvtstat(&sb, &osb); 2331 if (error == 0) 2332 error = copyout(&osb, uap->buf, sizeof(osb)); 2333 return (error); 2334 } 2335 #endif /* COMPAT_FREEBSD11 */ 2336 2337 /* 2338 * Get file status 2339 */ 2340 #ifndef _SYS_SYSPROTO_H_ 2341 struct fstatat_args { 2342 int fd; 2343 char *path; 2344 struct stat *buf; 2345 int flag; 2346 } 2347 #endif 2348 int 2349 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2350 { 2351 struct stat sb; 2352 int error; 2353 2354 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2355 UIO_USERSPACE, &sb, NULL); 2356 if (error == 0) 2357 error = copyout(&sb, uap->buf, sizeof (sb)); 2358 return (error); 2359 } 2360 2361 int 2362 kern_statat(struct thread *td, int flag, int fd, const char *path, 2363 enum uio_seg pathseg, struct stat *sbp, 2364 void (*hook)(struct vnode *vp, struct stat *sbp)) 2365 { 2366 struct nameidata nd; 2367 int error; 2368 2369 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2370 return (EINVAL); 2371 2372 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) != 0 ? 2373 NOFOLLOW : FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | 2374 LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2375 &cap_fstat_rights, td); 2376 2377 if ((error = namei(&nd)) != 0) 2378 return (error); 2379 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td); 2380 if (error == 0) { 2381 if (__predict_false(hook != NULL)) 2382 hook(nd.ni_vp, sbp); 2383 } 2384 NDFREE(&nd, NDF_ONLY_PNBUF); 2385 vput(nd.ni_vp); 2386 #ifdef __STAT_TIME_T_EXT 2387 sbp->st_atim_ext = 0; 2388 sbp->st_mtim_ext = 0; 2389 sbp->st_ctim_ext = 0; 2390 sbp->st_btim_ext = 0; 2391 #endif 2392 #ifdef KTRACE 2393 if (KTRPOINT(td, KTR_STRUCT)) 2394 ktrstat_error(sbp, error); 2395 #endif 2396 return (error); 2397 } 2398 2399 #if defined(COMPAT_FREEBSD11) 2400 /* 2401 * Implementation of the NetBSD [l]stat() functions. 2402 */ 2403 void 2404 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2405 { 2406 2407 bzero(nsb, sizeof(*nsb)); 2408 nsb->st_dev = sb->st_dev; 2409 nsb->st_ino = sb->st_ino; 2410 nsb->st_mode = sb->st_mode; 2411 nsb->st_nlink = sb->st_nlink; 2412 nsb->st_uid = sb->st_uid; 2413 nsb->st_gid = sb->st_gid; 2414 nsb->st_rdev = sb->st_rdev; 2415 nsb->st_atim = sb->st_atim; 2416 nsb->st_mtim = sb->st_mtim; 2417 nsb->st_ctim = sb->st_ctim; 2418 nsb->st_size = sb->st_size; 2419 nsb->st_blocks = sb->st_blocks; 2420 nsb->st_blksize = sb->st_blksize; 2421 nsb->st_flags = sb->st_flags; 2422 nsb->st_gen = sb->st_gen; 2423 nsb->st_birthtim = sb->st_birthtim; 2424 } 2425 2426 #ifndef _SYS_SYSPROTO_H_ 2427 struct freebsd11_nstat_args { 2428 char *path; 2429 struct nstat *ub; 2430 }; 2431 #endif 2432 int 2433 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2434 { 2435 struct stat sb; 2436 struct nstat nsb; 2437 int error; 2438 2439 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2440 &sb, NULL); 2441 if (error != 0) 2442 return (error); 2443 freebsd11_cvtnstat(&sb, &nsb); 2444 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2445 } 2446 2447 /* 2448 * NetBSD lstat. Get file status; this version does not follow links. 2449 */ 2450 #ifndef _SYS_SYSPROTO_H_ 2451 struct freebsd11_nlstat_args { 2452 char *path; 2453 struct nstat *ub; 2454 }; 2455 #endif 2456 int 2457 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2458 { 2459 struct stat sb; 2460 struct nstat nsb; 2461 int error; 2462 2463 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2464 UIO_USERSPACE, &sb, NULL); 2465 if (error != 0) 2466 return (error); 2467 freebsd11_cvtnstat(&sb, &nsb); 2468 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2469 } 2470 #endif /* COMPAT_FREEBSD11 */ 2471 2472 /* 2473 * Get configurable pathname variables. 2474 */ 2475 #ifndef _SYS_SYSPROTO_H_ 2476 struct pathconf_args { 2477 char *path; 2478 int name; 2479 }; 2480 #endif 2481 int 2482 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2483 { 2484 long value; 2485 int error; 2486 2487 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2488 &value); 2489 if (error == 0) 2490 td->td_retval[0] = value; 2491 return (error); 2492 } 2493 2494 #ifndef _SYS_SYSPROTO_H_ 2495 struct lpathconf_args { 2496 char *path; 2497 int name; 2498 }; 2499 #endif 2500 int 2501 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2502 { 2503 long value; 2504 int error; 2505 2506 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2507 NOFOLLOW, &value); 2508 if (error == 0) 2509 td->td_retval[0] = value; 2510 return (error); 2511 } 2512 2513 int 2514 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2515 int name, u_long flags, long *valuep) 2516 { 2517 struct nameidata nd; 2518 int error; 2519 2520 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2521 pathseg, path, td); 2522 if ((error = namei(&nd)) != 0) 2523 return (error); 2524 NDFREE(&nd, NDF_ONLY_PNBUF); 2525 2526 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2527 vput(nd.ni_vp); 2528 return (error); 2529 } 2530 2531 /* 2532 * Return target name of a symbolic link. 2533 */ 2534 #ifndef _SYS_SYSPROTO_H_ 2535 struct readlink_args { 2536 char *path; 2537 char *buf; 2538 size_t count; 2539 }; 2540 #endif 2541 int 2542 sys_readlink(struct thread *td, struct readlink_args *uap) 2543 { 2544 2545 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2546 uap->buf, UIO_USERSPACE, uap->count)); 2547 } 2548 #ifndef _SYS_SYSPROTO_H_ 2549 struct readlinkat_args { 2550 int fd; 2551 char *path; 2552 char *buf; 2553 size_t bufsize; 2554 }; 2555 #endif 2556 int 2557 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2558 { 2559 2560 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2561 uap->buf, UIO_USERSPACE, uap->bufsize)); 2562 } 2563 2564 int 2565 kern_readlinkat(struct thread *td, int fd, const char *path, 2566 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2567 { 2568 struct vnode *vp; 2569 struct nameidata nd; 2570 int error; 2571 2572 if (count > IOSIZE_MAX) 2573 return (EINVAL); 2574 2575 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2576 pathseg, path, fd, td); 2577 2578 if ((error = namei(&nd)) != 0) 2579 return (error); 2580 NDFREE(&nd, NDF_ONLY_PNBUF); 2581 vp = nd.ni_vp; 2582 2583 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2584 vput(vp); 2585 2586 return (error); 2587 } 2588 2589 /* 2590 * Helper function to readlink from a vnode 2591 */ 2592 static int 2593 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2594 struct thread *td) 2595 { 2596 struct iovec aiov; 2597 struct uio auio; 2598 int error; 2599 2600 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2601 #ifdef MAC 2602 error = mac_vnode_check_readlink(td->td_ucred, vp); 2603 if (error != 0) 2604 return (error); 2605 #endif 2606 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2607 return (EINVAL); 2608 2609 aiov.iov_base = buf; 2610 aiov.iov_len = count; 2611 auio.uio_iov = &aiov; 2612 auio.uio_iovcnt = 1; 2613 auio.uio_offset = 0; 2614 auio.uio_rw = UIO_READ; 2615 auio.uio_segflg = bufseg; 2616 auio.uio_td = td; 2617 auio.uio_resid = count; 2618 error = VOP_READLINK(vp, &auio, td->td_ucred); 2619 td->td_retval[0] = count - auio.uio_resid; 2620 return (error); 2621 } 2622 2623 /* 2624 * Common implementation code for chflags() and fchflags(). 2625 */ 2626 static int 2627 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2628 { 2629 struct mount *mp; 2630 struct vattr vattr; 2631 int error; 2632 2633 /* We can't support the value matching VNOVAL. */ 2634 if (flags == VNOVAL) 2635 return (EOPNOTSUPP); 2636 2637 /* 2638 * Prevent non-root users from setting flags on devices. When 2639 * a device is reused, users can retain ownership of the device 2640 * if they are allowed to set flags and programs assume that 2641 * chown can't fail when done as root. 2642 */ 2643 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2644 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2645 if (error != 0) 2646 return (error); 2647 } 2648 2649 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2650 return (error); 2651 VATTR_NULL(&vattr); 2652 vattr.va_flags = flags; 2653 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2654 #ifdef MAC 2655 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2656 if (error == 0) 2657 #endif 2658 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2659 VOP_UNLOCK(vp); 2660 vn_finished_write(mp); 2661 return (error); 2662 } 2663 2664 /* 2665 * Change flags of a file given a path name. 2666 */ 2667 #ifndef _SYS_SYSPROTO_H_ 2668 struct chflags_args { 2669 const char *path; 2670 u_long flags; 2671 }; 2672 #endif 2673 int 2674 sys_chflags(struct thread *td, struct chflags_args *uap) 2675 { 2676 2677 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2678 uap->flags, 0)); 2679 } 2680 2681 #ifndef _SYS_SYSPROTO_H_ 2682 struct chflagsat_args { 2683 int fd; 2684 const char *path; 2685 u_long flags; 2686 int atflag; 2687 } 2688 #endif 2689 int 2690 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2691 { 2692 2693 if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2694 return (EINVAL); 2695 2696 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2697 uap->flags, uap->atflag)); 2698 } 2699 2700 /* 2701 * Same as chflags() but doesn't follow symlinks. 2702 */ 2703 #ifndef _SYS_SYSPROTO_H_ 2704 struct lchflags_args { 2705 const char *path; 2706 u_long flags; 2707 }; 2708 #endif 2709 int 2710 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2711 { 2712 2713 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2714 uap->flags, AT_SYMLINK_NOFOLLOW)); 2715 } 2716 2717 static int 2718 kern_chflagsat(struct thread *td, int fd, const char *path, 2719 enum uio_seg pathseg, u_long flags, int atflag) 2720 { 2721 struct nameidata nd; 2722 int error, follow; 2723 2724 AUDIT_ARG_FFLAGS(flags); 2725 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2726 follow |= (atflag & AT_BENEATH) != 0 ? BENEATH : 0; 2727 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2728 &cap_fchflags_rights, td); 2729 if ((error = namei(&nd)) != 0) 2730 return (error); 2731 NDFREE(&nd, NDF_ONLY_PNBUF); 2732 error = setfflags(td, nd.ni_vp, flags); 2733 vrele(nd.ni_vp); 2734 return (error); 2735 } 2736 2737 /* 2738 * Change flags of a file given a file descriptor. 2739 */ 2740 #ifndef _SYS_SYSPROTO_H_ 2741 struct fchflags_args { 2742 int fd; 2743 u_long flags; 2744 }; 2745 #endif 2746 int 2747 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2748 { 2749 struct file *fp; 2750 int error; 2751 2752 AUDIT_ARG_FD(uap->fd); 2753 AUDIT_ARG_FFLAGS(uap->flags); 2754 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2755 &fp); 2756 if (error != 0) 2757 return (error); 2758 #ifdef AUDIT 2759 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2760 AUDIT_ARG_VNODE1(fp->f_vnode); 2761 VOP_UNLOCK(fp->f_vnode); 2762 #endif 2763 error = setfflags(td, fp->f_vnode, uap->flags); 2764 fdrop(fp, td); 2765 return (error); 2766 } 2767 2768 /* 2769 * Common implementation code for chmod(), lchmod() and fchmod(). 2770 */ 2771 int 2772 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2773 { 2774 struct mount *mp; 2775 struct vattr vattr; 2776 int error; 2777 2778 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2779 return (error); 2780 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2781 VATTR_NULL(&vattr); 2782 vattr.va_mode = mode & ALLPERMS; 2783 #ifdef MAC 2784 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2785 if (error == 0) 2786 #endif 2787 error = VOP_SETATTR(vp, &vattr, cred); 2788 VOP_UNLOCK(vp); 2789 vn_finished_write(mp); 2790 return (error); 2791 } 2792 2793 /* 2794 * Change mode of a file given path name. 2795 */ 2796 #ifndef _SYS_SYSPROTO_H_ 2797 struct chmod_args { 2798 char *path; 2799 int mode; 2800 }; 2801 #endif 2802 int 2803 sys_chmod(struct thread *td, struct chmod_args *uap) 2804 { 2805 2806 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2807 uap->mode, 0)); 2808 } 2809 2810 #ifndef _SYS_SYSPROTO_H_ 2811 struct fchmodat_args { 2812 int dirfd; 2813 char *path; 2814 mode_t mode; 2815 int flag; 2816 } 2817 #endif 2818 int 2819 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2820 { 2821 2822 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2823 return (EINVAL); 2824 2825 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2826 uap->mode, uap->flag)); 2827 } 2828 2829 /* 2830 * Change mode of a file given path name (don't follow links.) 2831 */ 2832 #ifndef _SYS_SYSPROTO_H_ 2833 struct lchmod_args { 2834 char *path; 2835 int mode; 2836 }; 2837 #endif 2838 int 2839 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2840 { 2841 2842 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2843 uap->mode, AT_SYMLINK_NOFOLLOW)); 2844 } 2845 2846 int 2847 kern_fchmodat(struct thread *td, int fd, const char *path, 2848 enum uio_seg pathseg, mode_t mode, int flag) 2849 { 2850 struct nameidata nd; 2851 int error, follow; 2852 2853 AUDIT_ARG_MODE(mode); 2854 follow = (flag & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW; 2855 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2856 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2857 &cap_fchmod_rights, td); 2858 if ((error = namei(&nd)) != 0) 2859 return (error); 2860 NDFREE(&nd, NDF_ONLY_PNBUF); 2861 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2862 vrele(nd.ni_vp); 2863 return (error); 2864 } 2865 2866 /* 2867 * Change mode of a file given a file descriptor. 2868 */ 2869 #ifndef _SYS_SYSPROTO_H_ 2870 struct fchmod_args { 2871 int fd; 2872 int mode; 2873 }; 2874 #endif 2875 int 2876 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2877 { 2878 struct file *fp; 2879 int error; 2880 2881 AUDIT_ARG_FD(uap->fd); 2882 AUDIT_ARG_MODE(uap->mode); 2883 2884 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2885 if (error != 0) 2886 return (error); 2887 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2888 fdrop(fp, td); 2889 return (error); 2890 } 2891 2892 /* 2893 * Common implementation for chown(), lchown(), and fchown() 2894 */ 2895 int 2896 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2897 gid_t gid) 2898 { 2899 struct mount *mp; 2900 struct vattr vattr; 2901 int error; 2902 2903 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2904 return (error); 2905 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2906 VATTR_NULL(&vattr); 2907 vattr.va_uid = uid; 2908 vattr.va_gid = gid; 2909 #ifdef MAC 2910 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2911 vattr.va_gid); 2912 if (error == 0) 2913 #endif 2914 error = VOP_SETATTR(vp, &vattr, cred); 2915 VOP_UNLOCK(vp); 2916 vn_finished_write(mp); 2917 return (error); 2918 } 2919 2920 /* 2921 * Set ownership given a path name. 2922 */ 2923 #ifndef _SYS_SYSPROTO_H_ 2924 struct chown_args { 2925 char *path; 2926 int uid; 2927 int gid; 2928 }; 2929 #endif 2930 int 2931 sys_chown(struct thread *td, struct chown_args *uap) 2932 { 2933 2934 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2935 uap->gid, 0)); 2936 } 2937 2938 #ifndef _SYS_SYSPROTO_H_ 2939 struct fchownat_args { 2940 int fd; 2941 const char * path; 2942 uid_t uid; 2943 gid_t gid; 2944 int flag; 2945 }; 2946 #endif 2947 int 2948 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2949 { 2950 2951 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2952 return (EINVAL); 2953 2954 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2955 uap->gid, uap->flag)); 2956 } 2957 2958 int 2959 kern_fchownat(struct thread *td, int fd, const char *path, 2960 enum uio_seg pathseg, int uid, int gid, int flag) 2961 { 2962 struct nameidata nd; 2963 int error, follow; 2964 2965 AUDIT_ARG_OWNER(uid, gid); 2966 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2967 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2968 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2969 &cap_fchown_rights, td); 2970 2971 if ((error = namei(&nd)) != 0) 2972 return (error); 2973 NDFREE(&nd, NDF_ONLY_PNBUF); 2974 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2975 vrele(nd.ni_vp); 2976 return (error); 2977 } 2978 2979 /* 2980 * Set ownership given a path name, do not cross symlinks. 2981 */ 2982 #ifndef _SYS_SYSPROTO_H_ 2983 struct lchown_args { 2984 char *path; 2985 int uid; 2986 int gid; 2987 }; 2988 #endif 2989 int 2990 sys_lchown(struct thread *td, struct lchown_args *uap) 2991 { 2992 2993 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2994 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2995 } 2996 2997 /* 2998 * Set ownership given a file descriptor. 2999 */ 3000 #ifndef _SYS_SYSPROTO_H_ 3001 struct fchown_args { 3002 int fd; 3003 int uid; 3004 int gid; 3005 }; 3006 #endif 3007 int 3008 sys_fchown(struct thread *td, struct fchown_args *uap) 3009 { 3010 struct file *fp; 3011 int error; 3012 3013 AUDIT_ARG_FD(uap->fd); 3014 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3015 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3016 if (error != 0) 3017 return (error); 3018 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3019 fdrop(fp, td); 3020 return (error); 3021 } 3022 3023 /* 3024 * Common implementation code for utimes(), lutimes(), and futimes(). 3025 */ 3026 static int 3027 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3028 struct timespec *tsp) 3029 { 3030 struct timeval tv[2]; 3031 const struct timeval *tvp; 3032 int error; 3033 3034 if (usrtvp == NULL) { 3035 vfs_timestamp(&tsp[0]); 3036 tsp[1] = tsp[0]; 3037 } else { 3038 if (tvpseg == UIO_SYSSPACE) { 3039 tvp = usrtvp; 3040 } else { 3041 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3042 return (error); 3043 tvp = tv; 3044 } 3045 3046 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3047 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3048 return (EINVAL); 3049 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3050 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3051 } 3052 return (0); 3053 } 3054 3055 /* 3056 * Common implementation code for futimens(), utimensat(). 3057 */ 3058 #define UTIMENS_NULL 0x1 3059 #define UTIMENS_EXIT 0x2 3060 static int 3061 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3062 struct timespec *tsp, int *retflags) 3063 { 3064 struct timespec tsnow; 3065 int error; 3066 3067 vfs_timestamp(&tsnow); 3068 *retflags = 0; 3069 if (usrtsp == NULL) { 3070 tsp[0] = tsnow; 3071 tsp[1] = tsnow; 3072 *retflags |= UTIMENS_NULL; 3073 return (0); 3074 } 3075 if (tspseg == UIO_SYSSPACE) { 3076 tsp[0] = usrtsp[0]; 3077 tsp[1] = usrtsp[1]; 3078 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3079 return (error); 3080 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3081 *retflags |= UTIMENS_EXIT; 3082 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3083 *retflags |= UTIMENS_NULL; 3084 if (tsp[0].tv_nsec == UTIME_OMIT) 3085 tsp[0].tv_sec = VNOVAL; 3086 else if (tsp[0].tv_nsec == UTIME_NOW) 3087 tsp[0] = tsnow; 3088 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3089 return (EINVAL); 3090 if (tsp[1].tv_nsec == UTIME_OMIT) 3091 tsp[1].tv_sec = VNOVAL; 3092 else if (tsp[1].tv_nsec == UTIME_NOW) 3093 tsp[1] = tsnow; 3094 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3095 return (EINVAL); 3096 3097 return (0); 3098 } 3099 3100 /* 3101 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3102 * and utimensat(). 3103 */ 3104 static int 3105 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3106 int numtimes, int nullflag) 3107 { 3108 struct mount *mp; 3109 struct vattr vattr; 3110 int error, setbirthtime; 3111 3112 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3113 return (error); 3114 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3115 setbirthtime = 0; 3116 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3117 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3118 setbirthtime = 1; 3119 VATTR_NULL(&vattr); 3120 vattr.va_atime = ts[0]; 3121 vattr.va_mtime = ts[1]; 3122 if (setbirthtime) 3123 vattr.va_birthtime = ts[1]; 3124 if (numtimes > 2) 3125 vattr.va_birthtime = ts[2]; 3126 if (nullflag) 3127 vattr.va_vaflags |= VA_UTIMES_NULL; 3128 #ifdef MAC 3129 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3130 vattr.va_mtime); 3131 #endif 3132 if (error == 0) 3133 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3134 VOP_UNLOCK(vp); 3135 vn_finished_write(mp); 3136 return (error); 3137 } 3138 3139 /* 3140 * Set the access and modification times of a file. 3141 */ 3142 #ifndef _SYS_SYSPROTO_H_ 3143 struct utimes_args { 3144 char *path; 3145 struct timeval *tptr; 3146 }; 3147 #endif 3148 int 3149 sys_utimes(struct thread *td, struct utimes_args *uap) 3150 { 3151 3152 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3153 uap->tptr, UIO_USERSPACE)); 3154 } 3155 3156 #ifndef _SYS_SYSPROTO_H_ 3157 struct futimesat_args { 3158 int fd; 3159 const char * path; 3160 const struct timeval * times; 3161 }; 3162 #endif 3163 int 3164 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3165 { 3166 3167 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3168 uap->times, UIO_USERSPACE)); 3169 } 3170 3171 int 3172 kern_utimesat(struct thread *td, int fd, const char *path, 3173 enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) 3174 { 3175 struct nameidata nd; 3176 struct timespec ts[2]; 3177 int error; 3178 3179 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3180 return (error); 3181 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3182 &cap_futimes_rights, td); 3183 3184 if ((error = namei(&nd)) != 0) 3185 return (error); 3186 NDFREE(&nd, NDF_ONLY_PNBUF); 3187 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3188 vrele(nd.ni_vp); 3189 return (error); 3190 } 3191 3192 /* 3193 * Set the access and modification times of a file. 3194 */ 3195 #ifndef _SYS_SYSPROTO_H_ 3196 struct lutimes_args { 3197 char *path; 3198 struct timeval *tptr; 3199 }; 3200 #endif 3201 int 3202 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3203 { 3204 3205 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3206 UIO_USERSPACE)); 3207 } 3208 3209 int 3210 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3211 struct timeval *tptr, enum uio_seg tptrseg) 3212 { 3213 struct timespec ts[2]; 3214 struct nameidata nd; 3215 int error; 3216 3217 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3218 return (error); 3219 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3220 if ((error = namei(&nd)) != 0) 3221 return (error); 3222 NDFREE(&nd, NDF_ONLY_PNBUF); 3223 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3224 vrele(nd.ni_vp); 3225 return (error); 3226 } 3227 3228 /* 3229 * Set the access and modification times of a file. 3230 */ 3231 #ifndef _SYS_SYSPROTO_H_ 3232 struct futimes_args { 3233 int fd; 3234 struct timeval *tptr; 3235 }; 3236 #endif 3237 int 3238 sys_futimes(struct thread *td, struct futimes_args *uap) 3239 { 3240 3241 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3242 } 3243 3244 int 3245 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3246 enum uio_seg tptrseg) 3247 { 3248 struct timespec ts[2]; 3249 struct file *fp; 3250 int error; 3251 3252 AUDIT_ARG_FD(fd); 3253 error = getutimes(tptr, tptrseg, ts); 3254 if (error != 0) 3255 return (error); 3256 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3257 if (error != 0) 3258 return (error); 3259 #ifdef AUDIT 3260 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3261 AUDIT_ARG_VNODE1(fp->f_vnode); 3262 VOP_UNLOCK(fp->f_vnode); 3263 #endif 3264 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3265 fdrop(fp, td); 3266 return (error); 3267 } 3268 3269 int 3270 sys_futimens(struct thread *td, struct futimens_args *uap) 3271 { 3272 3273 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3274 } 3275 3276 int 3277 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3278 enum uio_seg tptrseg) 3279 { 3280 struct timespec ts[2]; 3281 struct file *fp; 3282 int error, flags; 3283 3284 AUDIT_ARG_FD(fd); 3285 error = getutimens(tptr, tptrseg, ts, &flags); 3286 if (error != 0) 3287 return (error); 3288 if (flags & UTIMENS_EXIT) 3289 return (0); 3290 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3291 if (error != 0) 3292 return (error); 3293 #ifdef AUDIT 3294 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3295 AUDIT_ARG_VNODE1(fp->f_vnode); 3296 VOP_UNLOCK(fp->f_vnode); 3297 #endif 3298 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3299 fdrop(fp, td); 3300 return (error); 3301 } 3302 3303 int 3304 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3305 { 3306 3307 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3308 uap->times, UIO_USERSPACE, uap->flag)); 3309 } 3310 3311 int 3312 kern_utimensat(struct thread *td, int fd, const char *path, 3313 enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, 3314 int flag) 3315 { 3316 struct nameidata nd; 3317 struct timespec ts[2]; 3318 int error, flags; 3319 3320 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 3321 return (EINVAL); 3322 3323 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3324 return (error); 3325 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3326 FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | AUDITVNODE1, 3327 pathseg, path, fd, &cap_futimes_rights, td); 3328 if ((error = namei(&nd)) != 0) 3329 return (error); 3330 /* 3331 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3332 * POSIX states: 3333 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3334 * "Search permission is denied by a component of the path prefix." 3335 */ 3336 NDFREE(&nd, NDF_ONLY_PNBUF); 3337 if ((flags & UTIMENS_EXIT) == 0) 3338 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3339 vrele(nd.ni_vp); 3340 return (error); 3341 } 3342 3343 /* 3344 * Truncate a file given its path name. 3345 */ 3346 #ifndef _SYS_SYSPROTO_H_ 3347 struct truncate_args { 3348 char *path; 3349 int pad; 3350 off_t length; 3351 }; 3352 #endif 3353 int 3354 sys_truncate(struct thread *td, struct truncate_args *uap) 3355 { 3356 3357 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3358 } 3359 3360 int 3361 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3362 off_t length) 3363 { 3364 struct mount *mp; 3365 struct vnode *vp; 3366 void *rl_cookie; 3367 struct vattr vattr; 3368 struct nameidata nd; 3369 int error; 3370 3371 if (length < 0) 3372 return(EINVAL); 3373 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3374 if ((error = namei(&nd)) != 0) 3375 return (error); 3376 vp = nd.ni_vp; 3377 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3378 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3379 vn_rangelock_unlock(vp, rl_cookie); 3380 vrele(vp); 3381 return (error); 3382 } 3383 NDFREE(&nd, NDF_ONLY_PNBUF); 3384 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3385 if (vp->v_type == VDIR) 3386 error = EISDIR; 3387 #ifdef MAC 3388 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3389 } 3390 #endif 3391 else if ((error = vn_writechk(vp)) == 0 && 3392 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3393 VATTR_NULL(&vattr); 3394 vattr.va_size = length; 3395 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3396 } 3397 VOP_UNLOCK(vp); 3398 vn_finished_write(mp); 3399 vn_rangelock_unlock(vp, rl_cookie); 3400 vrele(vp); 3401 return (error); 3402 } 3403 3404 #if defined(COMPAT_43) 3405 /* 3406 * Truncate a file given its path name. 3407 */ 3408 #ifndef _SYS_SYSPROTO_H_ 3409 struct otruncate_args { 3410 char *path; 3411 long length; 3412 }; 3413 #endif 3414 int 3415 otruncate(struct thread *td, struct otruncate_args *uap) 3416 { 3417 3418 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3419 } 3420 #endif /* COMPAT_43 */ 3421 3422 #if defined(COMPAT_FREEBSD6) 3423 /* Versions with the pad argument */ 3424 int 3425 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3426 { 3427 3428 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3429 } 3430 3431 int 3432 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3433 { 3434 3435 return (kern_ftruncate(td, uap->fd, uap->length)); 3436 } 3437 #endif 3438 3439 int 3440 kern_fsync(struct thread *td, int fd, bool fullsync) 3441 { 3442 struct vnode *vp; 3443 struct mount *mp; 3444 struct file *fp; 3445 int error, lock_flags; 3446 3447 AUDIT_ARG_FD(fd); 3448 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3449 if (error != 0) 3450 return (error); 3451 vp = fp->f_vnode; 3452 #if 0 3453 if (!fullsync) 3454 /* XXXKIB: compete outstanding aio writes */; 3455 #endif 3456 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3457 if (error != 0) 3458 goto drop; 3459 if (MNT_SHARED_WRITES(mp) || 3460 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3461 lock_flags = LK_SHARED; 3462 } else { 3463 lock_flags = LK_EXCLUSIVE; 3464 } 3465 vn_lock(vp, lock_flags | LK_RETRY); 3466 AUDIT_ARG_VNODE1(vp); 3467 if (vp->v_object != NULL) { 3468 VM_OBJECT_WLOCK(vp->v_object); 3469 vm_object_page_clean(vp->v_object, 0, 0, 0); 3470 VM_OBJECT_WUNLOCK(vp->v_object); 3471 } 3472 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3473 VOP_UNLOCK(vp); 3474 vn_finished_write(mp); 3475 drop: 3476 fdrop(fp, td); 3477 return (error); 3478 } 3479 3480 /* 3481 * Sync an open file. 3482 */ 3483 #ifndef _SYS_SYSPROTO_H_ 3484 struct fsync_args { 3485 int fd; 3486 }; 3487 #endif 3488 int 3489 sys_fsync(struct thread *td, struct fsync_args *uap) 3490 { 3491 3492 return (kern_fsync(td, uap->fd, true)); 3493 } 3494 3495 int 3496 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3497 { 3498 3499 return (kern_fsync(td, uap->fd, false)); 3500 } 3501 3502 /* 3503 * Rename files. Source and destination must either both be directories, or 3504 * both not be directories. If target is a directory, it must be empty. 3505 */ 3506 #ifndef _SYS_SYSPROTO_H_ 3507 struct rename_args { 3508 char *from; 3509 char *to; 3510 }; 3511 #endif 3512 int 3513 sys_rename(struct thread *td, struct rename_args *uap) 3514 { 3515 3516 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3517 uap->to, UIO_USERSPACE)); 3518 } 3519 3520 #ifndef _SYS_SYSPROTO_H_ 3521 struct renameat_args { 3522 int oldfd; 3523 char *old; 3524 int newfd; 3525 char *new; 3526 }; 3527 #endif 3528 int 3529 sys_renameat(struct thread *td, struct renameat_args *uap) 3530 { 3531 3532 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3533 UIO_USERSPACE)); 3534 } 3535 3536 #ifdef MAC 3537 static int 3538 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3539 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3540 { 3541 int error; 3542 3543 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3544 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); 3545 if ((error = namei(fromnd)) != 0) 3546 return (error); 3547 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3548 fromnd->ni_vp, &fromnd->ni_cnd); 3549 VOP_UNLOCK(fromnd->ni_dvp); 3550 if (fromnd->ni_dvp != fromnd->ni_vp) 3551 VOP_UNLOCK(fromnd->ni_vp); 3552 if (error != 0) { 3553 NDFREE(fromnd, NDF_ONLY_PNBUF); 3554 vrele(fromnd->ni_dvp); 3555 vrele(fromnd->ni_vp); 3556 if (fromnd->ni_startdir) 3557 vrele(fromnd->ni_startdir); 3558 } 3559 return (error); 3560 } 3561 #endif 3562 3563 int 3564 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3565 const char *new, enum uio_seg pathseg) 3566 { 3567 struct mount *mp = NULL; 3568 struct vnode *tvp, *fvp, *tdvp; 3569 struct nameidata fromnd, tond; 3570 int error; 3571 3572 again: 3573 bwillwrite(); 3574 #ifdef MAC 3575 if (mac_vnode_check_rename_from_enabled()) { 3576 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3577 &fromnd); 3578 if (error != 0) 3579 return (error); 3580 } else { 3581 #endif 3582 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3583 pathseg, old, oldfd, &cap_renameat_source_rights, td); 3584 if ((error = namei(&fromnd)) != 0) 3585 return (error); 3586 #ifdef MAC 3587 } 3588 #endif 3589 fvp = fromnd.ni_vp; 3590 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3591 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3592 &cap_renameat_target_rights, td); 3593 if (fromnd.ni_vp->v_type == VDIR) 3594 tond.ni_cnd.cn_flags |= WILLBEDIR; 3595 if ((error = namei(&tond)) != 0) { 3596 /* Translate error code for rename("dir1", "dir2/."). */ 3597 if (error == EISDIR && fvp->v_type == VDIR) 3598 error = EINVAL; 3599 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3600 vrele(fromnd.ni_dvp); 3601 vrele(fvp); 3602 goto out1; 3603 } 3604 tdvp = tond.ni_dvp; 3605 tvp = tond.ni_vp; 3606 error = vn_start_write(fvp, &mp, V_NOWAIT); 3607 if (error != 0) { 3608 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3609 NDFREE(&tond, NDF_ONLY_PNBUF); 3610 if (tvp != NULL) 3611 vput(tvp); 3612 if (tdvp == tvp) 3613 vrele(tdvp); 3614 else 3615 vput(tdvp); 3616 vrele(fromnd.ni_dvp); 3617 vrele(fvp); 3618 vrele(tond.ni_startdir); 3619 if (fromnd.ni_startdir != NULL) 3620 vrele(fromnd.ni_startdir); 3621 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3622 if (error != 0) 3623 return (error); 3624 goto again; 3625 } 3626 if (tvp != NULL) { 3627 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3628 error = ENOTDIR; 3629 goto out; 3630 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3631 error = EISDIR; 3632 goto out; 3633 } 3634 #ifdef CAPABILITIES 3635 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3636 /* 3637 * If the target already exists we require CAP_UNLINKAT 3638 * from 'newfd', when newfd was used for the lookup. 3639 */ 3640 error = cap_check(&tond.ni_filecaps.fc_rights, 3641 &cap_unlinkat_rights); 3642 if (error != 0) 3643 goto out; 3644 } 3645 #endif 3646 } 3647 if (fvp == tdvp) { 3648 error = EINVAL; 3649 goto out; 3650 } 3651 /* 3652 * If the source is the same as the destination (that is, if they 3653 * are links to the same vnode), then there is nothing to do. 3654 */ 3655 if (fvp == tvp) 3656 error = -1; 3657 #ifdef MAC 3658 else 3659 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3660 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3661 #endif 3662 out: 3663 if (error == 0) { 3664 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3665 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3666 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3667 NDFREE(&tond, NDF_ONLY_PNBUF); 3668 } else { 3669 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3670 NDFREE(&tond, NDF_ONLY_PNBUF); 3671 if (tvp != NULL) 3672 vput(tvp); 3673 if (tdvp == tvp) 3674 vrele(tdvp); 3675 else 3676 vput(tdvp); 3677 vrele(fromnd.ni_dvp); 3678 vrele(fvp); 3679 } 3680 vrele(tond.ni_startdir); 3681 vn_finished_write(mp); 3682 out1: 3683 if (fromnd.ni_startdir) 3684 vrele(fromnd.ni_startdir); 3685 if (error == -1) 3686 return (0); 3687 return (error); 3688 } 3689 3690 /* 3691 * Make a directory file. 3692 */ 3693 #ifndef _SYS_SYSPROTO_H_ 3694 struct mkdir_args { 3695 char *path; 3696 int mode; 3697 }; 3698 #endif 3699 int 3700 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3701 { 3702 3703 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3704 uap->mode)); 3705 } 3706 3707 #ifndef _SYS_SYSPROTO_H_ 3708 struct mkdirat_args { 3709 int fd; 3710 char *path; 3711 mode_t mode; 3712 }; 3713 #endif 3714 int 3715 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3716 { 3717 3718 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3719 } 3720 3721 int 3722 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3723 int mode) 3724 { 3725 struct mount *mp; 3726 struct vnode *vp; 3727 struct vattr vattr; 3728 struct nameidata nd; 3729 int error; 3730 3731 AUDIT_ARG_MODE(mode); 3732 restart: 3733 bwillwrite(); 3734 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3735 NOCACHE, segflg, path, fd, &cap_mkdirat_rights, 3736 td); 3737 nd.ni_cnd.cn_flags |= WILLBEDIR; 3738 if ((error = namei(&nd)) != 0) 3739 return (error); 3740 vp = nd.ni_vp; 3741 if (vp != NULL) { 3742 NDFREE(&nd, NDF_ONLY_PNBUF); 3743 /* 3744 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3745 * the strange behaviour of leaving the vnode unlocked 3746 * if the target is the same vnode as the parent. 3747 */ 3748 if (vp == nd.ni_dvp) 3749 vrele(nd.ni_dvp); 3750 else 3751 vput(nd.ni_dvp); 3752 vrele(vp); 3753 return (EEXIST); 3754 } 3755 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3756 NDFREE(&nd, NDF_ONLY_PNBUF); 3757 vput(nd.ni_dvp); 3758 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3759 return (error); 3760 goto restart; 3761 } 3762 VATTR_NULL(&vattr); 3763 vattr.va_type = VDIR; 3764 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3765 #ifdef MAC 3766 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3767 &vattr); 3768 if (error != 0) 3769 goto out; 3770 #endif 3771 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3772 #ifdef MAC 3773 out: 3774 #endif 3775 NDFREE(&nd, NDF_ONLY_PNBUF); 3776 vput(nd.ni_dvp); 3777 if (error == 0) 3778 vput(nd.ni_vp); 3779 vn_finished_write(mp); 3780 return (error); 3781 } 3782 3783 /* 3784 * Remove a directory file. 3785 */ 3786 #ifndef _SYS_SYSPROTO_H_ 3787 struct rmdir_args { 3788 char *path; 3789 }; 3790 #endif 3791 int 3792 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3793 { 3794 3795 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3796 0)); 3797 } 3798 3799 int 3800 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3801 enum uio_seg pathseg, int flag) 3802 { 3803 struct mount *mp; 3804 struct vnode *vp; 3805 struct file *fp; 3806 struct nameidata nd; 3807 cap_rights_t rights; 3808 int error; 3809 3810 fp = NULL; 3811 if (fd != FD_NONE) { 3812 error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), 3813 &fp); 3814 if (error != 0) 3815 return (error); 3816 } 3817 3818 restart: 3819 bwillwrite(); 3820 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3821 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 3822 pathseg, path, dfd, &cap_unlinkat_rights, td); 3823 if ((error = namei(&nd)) != 0) 3824 goto fdout; 3825 vp = nd.ni_vp; 3826 if (vp->v_type != VDIR) { 3827 error = ENOTDIR; 3828 goto out; 3829 } 3830 /* 3831 * No rmdir "." please. 3832 */ 3833 if (nd.ni_dvp == vp) { 3834 error = EINVAL; 3835 goto out; 3836 } 3837 /* 3838 * The root of a mounted filesystem cannot be deleted. 3839 */ 3840 if (vp->v_vflag & VV_ROOT) { 3841 error = EBUSY; 3842 goto out; 3843 } 3844 3845 if (fp != NULL && fp->f_vnode != vp) { 3846 if (VN_IS_DOOMED(fp->f_vnode)) 3847 error = EBADF; 3848 else 3849 error = EDEADLK; 3850 goto out; 3851 } 3852 3853 #ifdef MAC 3854 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3855 &nd.ni_cnd); 3856 if (error != 0) 3857 goto out; 3858 #endif 3859 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3860 NDFREE(&nd, NDF_ONLY_PNBUF); 3861 vput(vp); 3862 if (nd.ni_dvp == vp) 3863 vrele(nd.ni_dvp); 3864 else 3865 vput(nd.ni_dvp); 3866 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3867 goto fdout; 3868 goto restart; 3869 } 3870 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3871 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3872 vn_finished_write(mp); 3873 out: 3874 NDFREE(&nd, NDF_ONLY_PNBUF); 3875 vput(vp); 3876 if (nd.ni_dvp == vp) 3877 vrele(nd.ni_dvp); 3878 else 3879 vput(nd.ni_dvp); 3880 fdout: 3881 if (fp != NULL) 3882 fdrop(fp, td); 3883 return (error); 3884 } 3885 3886 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3887 int 3888 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3889 long *basep, void (*func)(struct freebsd11_dirent *)) 3890 { 3891 struct freebsd11_dirent dstdp; 3892 struct dirent *dp, *edp; 3893 char *dirbuf; 3894 off_t base; 3895 ssize_t resid, ucount; 3896 int error; 3897 3898 /* XXX arbitrary sanity limit on `count'. */ 3899 count = min(count, 64 * 1024); 3900 3901 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3902 3903 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3904 UIO_SYSSPACE); 3905 if (error != 0) 3906 goto done; 3907 if (basep != NULL) 3908 *basep = base; 3909 3910 ucount = 0; 3911 for (dp = (struct dirent *)dirbuf, 3912 edp = (struct dirent *)&dirbuf[count - resid]; 3913 ucount < count && dp < edp; ) { 3914 if (dp->d_reclen == 0) 3915 break; 3916 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3917 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3918 continue; 3919 dstdp.d_type = dp->d_type; 3920 dstdp.d_namlen = dp->d_namlen; 3921 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3922 if (dstdp.d_fileno != dp->d_fileno) { 3923 switch (ino64_trunc_error) { 3924 default: 3925 case 0: 3926 break; 3927 case 1: 3928 error = EOVERFLOW; 3929 goto done; 3930 case 2: 3931 dstdp.d_fileno = UINT32_MAX; 3932 break; 3933 } 3934 } 3935 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3936 ((dp->d_namlen + 1 + 3) &~ 3); 3937 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3938 bzero(dstdp.d_name + dstdp.d_namlen, 3939 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3940 dstdp.d_namlen); 3941 MPASS(dstdp.d_reclen <= dp->d_reclen); 3942 MPASS(ucount + dstdp.d_reclen <= count); 3943 if (func != NULL) 3944 func(&dstdp); 3945 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3946 if (error != 0) 3947 break; 3948 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3949 ucount += dstdp.d_reclen; 3950 } 3951 3952 done: 3953 free(dirbuf, M_TEMP); 3954 if (error == 0) 3955 td->td_retval[0] = ucount; 3956 return (error); 3957 } 3958 #endif /* COMPAT */ 3959 3960 #ifdef COMPAT_43 3961 static void 3962 ogetdirentries_cvt(struct freebsd11_dirent *dp) 3963 { 3964 #if (BYTE_ORDER == LITTLE_ENDIAN) 3965 /* 3966 * The expected low byte of dp->d_namlen is our dp->d_type. 3967 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 3968 */ 3969 dp->d_type = dp->d_namlen; 3970 dp->d_namlen = 0; 3971 #else 3972 /* 3973 * The dp->d_type is the high byte of the expected dp->d_namlen, 3974 * so must be zero'ed. 3975 */ 3976 dp->d_type = 0; 3977 #endif 3978 } 3979 3980 /* 3981 * Read a block of directory entries in a filesystem independent format. 3982 */ 3983 #ifndef _SYS_SYSPROTO_H_ 3984 struct ogetdirentries_args { 3985 int fd; 3986 char *buf; 3987 u_int count; 3988 long *basep; 3989 }; 3990 #endif 3991 int 3992 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3993 { 3994 long loff; 3995 int error; 3996 3997 error = kern_ogetdirentries(td, uap, &loff); 3998 if (error == 0) 3999 error = copyout(&loff, uap->basep, sizeof(long)); 4000 return (error); 4001 } 4002 4003 int 4004 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4005 long *ploff) 4006 { 4007 long base; 4008 int error; 4009 4010 /* XXX arbitrary sanity limit on `count'. */ 4011 if (uap->count > 64 * 1024) 4012 return (EINVAL); 4013 4014 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4015 &base, ogetdirentries_cvt); 4016 4017 if (error == 0 && uap->basep != NULL) 4018 error = copyout(&base, uap->basep, sizeof(long)); 4019 4020 return (error); 4021 } 4022 #endif /* COMPAT_43 */ 4023 4024 #if defined(COMPAT_FREEBSD11) 4025 #ifndef _SYS_SYSPROTO_H_ 4026 struct freebsd11_getdirentries_args { 4027 int fd; 4028 char *buf; 4029 u_int count; 4030 long *basep; 4031 }; 4032 #endif 4033 int 4034 freebsd11_getdirentries(struct thread *td, 4035 struct freebsd11_getdirentries_args *uap) 4036 { 4037 long base; 4038 int error; 4039 4040 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4041 &base, NULL); 4042 4043 if (error == 0 && uap->basep != NULL) 4044 error = copyout(&base, uap->basep, sizeof(long)); 4045 return (error); 4046 } 4047 4048 int 4049 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4050 { 4051 struct freebsd11_getdirentries_args ap; 4052 4053 ap.fd = uap->fd; 4054 ap.buf = uap->buf; 4055 ap.count = uap->count; 4056 ap.basep = NULL; 4057 return (freebsd11_getdirentries(td, &ap)); 4058 } 4059 #endif /* COMPAT_FREEBSD11 */ 4060 4061 /* 4062 * Read a block of directory entries in a filesystem independent format. 4063 */ 4064 int 4065 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4066 { 4067 off_t base; 4068 int error; 4069 4070 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4071 NULL, UIO_USERSPACE); 4072 if (error != 0) 4073 return (error); 4074 if (uap->basep != NULL) 4075 error = copyout(&base, uap->basep, sizeof(off_t)); 4076 return (error); 4077 } 4078 4079 int 4080 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4081 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4082 { 4083 struct vnode *vp; 4084 struct file *fp; 4085 struct uio auio; 4086 struct iovec aiov; 4087 off_t loff; 4088 int error, eofflag; 4089 off_t foffset; 4090 4091 AUDIT_ARG_FD(fd); 4092 if (count > IOSIZE_MAX) 4093 return (EINVAL); 4094 auio.uio_resid = count; 4095 error = getvnode(td, fd, &cap_read_rights, &fp); 4096 if (error != 0) 4097 return (error); 4098 if ((fp->f_flag & FREAD) == 0) { 4099 fdrop(fp, td); 4100 return (EBADF); 4101 } 4102 vp = fp->f_vnode; 4103 foffset = foffset_lock(fp, 0); 4104 unionread: 4105 if (vp->v_type != VDIR) { 4106 error = EINVAL; 4107 goto fail; 4108 } 4109 aiov.iov_base = buf; 4110 aiov.iov_len = count; 4111 auio.uio_iov = &aiov; 4112 auio.uio_iovcnt = 1; 4113 auio.uio_rw = UIO_READ; 4114 auio.uio_segflg = bufseg; 4115 auio.uio_td = td; 4116 vn_lock(vp, LK_SHARED | LK_RETRY); 4117 AUDIT_ARG_VNODE1(vp); 4118 loff = auio.uio_offset = foffset; 4119 #ifdef MAC 4120 error = mac_vnode_check_readdir(td->td_ucred, vp); 4121 if (error == 0) 4122 #endif 4123 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4124 NULL); 4125 foffset = auio.uio_offset; 4126 if (error != 0) { 4127 VOP_UNLOCK(vp); 4128 goto fail; 4129 } 4130 if (count == auio.uio_resid && 4131 (vp->v_vflag & VV_ROOT) && 4132 (vp->v_mount->mnt_flag & MNT_UNION)) { 4133 struct vnode *tvp = vp; 4134 4135 vp = vp->v_mount->mnt_vnodecovered; 4136 VREF(vp); 4137 fp->f_vnode = vp; 4138 foffset = 0; 4139 vput(tvp); 4140 goto unionread; 4141 } 4142 VOP_UNLOCK(vp); 4143 *basep = loff; 4144 if (residp != NULL) 4145 *residp = auio.uio_resid; 4146 td->td_retval[0] = count - auio.uio_resid; 4147 fail: 4148 foffset_unlock(fp, foffset, 0); 4149 fdrop(fp, td); 4150 return (error); 4151 } 4152 4153 /* 4154 * Set the mode mask for creation of filesystem nodes. 4155 */ 4156 #ifndef _SYS_SYSPROTO_H_ 4157 struct umask_args { 4158 int newmask; 4159 }; 4160 #endif 4161 int 4162 sys_umask(struct thread *td, struct umask_args *uap) 4163 { 4164 struct filedesc *fdp; 4165 4166 fdp = td->td_proc->p_fd; 4167 FILEDESC_XLOCK(fdp); 4168 td->td_retval[0] = fdp->fd_cmask; 4169 fdp->fd_cmask = uap->newmask & ALLPERMS; 4170 FILEDESC_XUNLOCK(fdp); 4171 return (0); 4172 } 4173 4174 /* 4175 * Void all references to file by ripping underlying filesystem away from 4176 * vnode. 4177 */ 4178 #ifndef _SYS_SYSPROTO_H_ 4179 struct revoke_args { 4180 char *path; 4181 }; 4182 #endif 4183 int 4184 sys_revoke(struct thread *td, struct revoke_args *uap) 4185 { 4186 struct vnode *vp; 4187 struct vattr vattr; 4188 struct nameidata nd; 4189 int error; 4190 4191 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4192 uap->path, td); 4193 if ((error = namei(&nd)) != 0) 4194 return (error); 4195 vp = nd.ni_vp; 4196 NDFREE(&nd, NDF_ONLY_PNBUF); 4197 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4198 error = EINVAL; 4199 goto out; 4200 } 4201 #ifdef MAC 4202 error = mac_vnode_check_revoke(td->td_ucred, vp); 4203 if (error != 0) 4204 goto out; 4205 #endif 4206 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4207 if (error != 0) 4208 goto out; 4209 if (td->td_ucred->cr_uid != vattr.va_uid) { 4210 error = priv_check(td, PRIV_VFS_ADMIN); 4211 if (error != 0) 4212 goto out; 4213 } 4214 if (devfs_usecount(vp) > 0) 4215 VOP_REVOKE(vp, REVOKEALL); 4216 out: 4217 vput(vp); 4218 return (error); 4219 } 4220 4221 /* 4222 * Convert a user file descriptor to a kernel file entry and check that, if it 4223 * is a capability, the correct rights are present. A reference on the file 4224 * entry is held upon returning. 4225 */ 4226 int 4227 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4228 { 4229 struct file *fp; 4230 int error; 4231 4232 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); 4233 if (error != 0) 4234 return (error); 4235 4236 /* 4237 * The file could be not of the vnode type, or it may be not 4238 * yet fully initialized, in which case the f_vnode pointer 4239 * may be set, but f_ops is still badfileops. E.g., 4240 * devfs_open() transiently create such situation to 4241 * facilitate csw d_fdopen(). 4242 * 4243 * Dupfdopen() handling in kern_openat() installs the 4244 * half-baked file into the process descriptor table, allowing 4245 * other thread to dereference it. Guard against the race by 4246 * checking f_ops. 4247 */ 4248 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4249 fdrop(fp, td); 4250 return (EINVAL); 4251 } 4252 *fpp = fp; 4253 return (0); 4254 } 4255 4256 /* 4257 * Get an (NFS) file handle. 4258 */ 4259 #ifndef _SYS_SYSPROTO_H_ 4260 struct lgetfh_args { 4261 char *fname; 4262 fhandle_t *fhp; 4263 }; 4264 #endif 4265 int 4266 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4267 { 4268 4269 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4270 UIO_USERSPACE, uap->fhp)); 4271 } 4272 4273 #ifndef _SYS_SYSPROTO_H_ 4274 struct getfh_args { 4275 char *fname; 4276 fhandle_t *fhp; 4277 }; 4278 #endif 4279 int 4280 sys_getfh(struct thread *td, struct getfh_args *uap) 4281 { 4282 4283 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4284 uap->fhp)); 4285 } 4286 4287 /* 4288 * syscall for the rpc.lockd to use to translate an open descriptor into 4289 * a NFS file handle. 4290 * 4291 * warning: do not remove the priv_check() call or this becomes one giant 4292 * security hole. 4293 */ 4294 #ifndef _SYS_SYSPROTO_H_ 4295 struct getfhat_args { 4296 int fd; 4297 char *path; 4298 fhandle_t *fhp; 4299 int flags; 4300 }; 4301 #endif 4302 int 4303 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4304 { 4305 4306 if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 4307 return (EINVAL); 4308 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4309 uap->fhp)); 4310 } 4311 4312 static int 4313 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4314 enum uio_seg pathseg, fhandle_t *fhp) 4315 { 4316 struct nameidata nd; 4317 fhandle_t fh; 4318 struct vnode *vp; 4319 int error; 4320 4321 error = priv_check(td, PRIV_VFS_GETFH); 4322 if (error != 0) 4323 return (error); 4324 NDINIT_AT(&nd, LOOKUP, ((flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 4325 FOLLOW) | ((flags & AT_BENEATH) != 0 ? BENEATH : 0) | LOCKLEAF | 4326 AUDITVNODE1, pathseg, path, fd, td); 4327 error = namei(&nd); 4328 if (error != 0) 4329 return (error); 4330 NDFREE(&nd, NDF_ONLY_PNBUF); 4331 vp = nd.ni_vp; 4332 bzero(&fh, sizeof(fh)); 4333 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4334 error = VOP_VPTOFH(vp, &fh.fh_fid); 4335 vput(vp); 4336 if (error == 0) 4337 error = copyout(&fh, fhp, sizeof (fh)); 4338 return (error); 4339 } 4340 4341 #ifndef _SYS_SYSPROTO_H_ 4342 struct fhlink_args { 4343 fhandle_t *fhp; 4344 const char *to; 4345 }; 4346 #endif 4347 int 4348 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4349 { 4350 4351 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4352 } 4353 4354 #ifndef _SYS_SYSPROTO_H_ 4355 struct fhlinkat_args { 4356 fhandle_t *fhp; 4357 int tofd; 4358 const char *to; 4359 }; 4360 #endif 4361 int 4362 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4363 { 4364 4365 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4366 } 4367 4368 static int 4369 kern_fhlinkat(struct thread *td, int fd, const char *path, 4370 enum uio_seg pathseg, fhandle_t *fhp) 4371 { 4372 fhandle_t fh; 4373 struct mount *mp; 4374 struct vnode *vp; 4375 int error; 4376 4377 error = priv_check(td, PRIV_VFS_GETFH); 4378 if (error != 0) 4379 return (error); 4380 error = copyin(fhp, &fh, sizeof(fh)); 4381 if (error != 0) 4382 return (error); 4383 do { 4384 bwillwrite(); 4385 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4386 return (ESTALE); 4387 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4388 vfs_unbusy(mp); 4389 if (error != 0) 4390 return (error); 4391 VOP_UNLOCK(vp); 4392 } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN); 4393 return (error); 4394 } 4395 4396 #ifndef _SYS_SYSPROTO_H_ 4397 struct fhreadlink_args { 4398 fhandle_t *fhp; 4399 char *buf; 4400 size_t bufsize; 4401 }; 4402 #endif 4403 int 4404 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4405 { 4406 fhandle_t fh; 4407 struct mount *mp; 4408 struct vnode *vp; 4409 int error; 4410 4411 error = priv_check(td, PRIV_VFS_GETFH); 4412 if (error != 0) 4413 return (error); 4414 if (uap->bufsize > IOSIZE_MAX) 4415 return (EINVAL); 4416 error = copyin(uap->fhp, &fh, sizeof(fh)); 4417 if (error != 0) 4418 return (error); 4419 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4420 return (ESTALE); 4421 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4422 vfs_unbusy(mp); 4423 if (error != 0) 4424 return (error); 4425 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4426 vput(vp); 4427 return (error); 4428 } 4429 4430 /* 4431 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4432 * open descriptor. 4433 * 4434 * warning: do not remove the priv_check() call or this becomes one giant 4435 * security hole. 4436 */ 4437 #ifndef _SYS_SYSPROTO_H_ 4438 struct fhopen_args { 4439 const struct fhandle *u_fhp; 4440 int flags; 4441 }; 4442 #endif 4443 int 4444 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4445 { 4446 struct mount *mp; 4447 struct vnode *vp; 4448 struct fhandle fhp; 4449 struct file *fp; 4450 int fmode, error; 4451 int indx; 4452 4453 error = priv_check(td, PRIV_VFS_FHOPEN); 4454 if (error != 0) 4455 return (error); 4456 indx = -1; 4457 fmode = FFLAGS(uap->flags); 4458 /* why not allow a non-read/write open for our lockd? */ 4459 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4460 return (EINVAL); 4461 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4462 if (error != 0) 4463 return(error); 4464 /* find the mount point */ 4465 mp = vfs_busyfs(&fhp.fh_fsid); 4466 if (mp == NULL) 4467 return (ESTALE); 4468 /* now give me my vnode, it gets returned to me locked */ 4469 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4470 vfs_unbusy(mp); 4471 if (error != 0) 4472 return (error); 4473 4474 error = falloc_noinstall(td, &fp); 4475 if (error != 0) { 4476 vput(vp); 4477 return (error); 4478 } 4479 /* 4480 * An extra reference on `fp' has been held for us by 4481 * falloc_noinstall(). 4482 */ 4483 4484 #ifdef INVARIANTS 4485 td->td_dupfd = -1; 4486 #endif 4487 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4488 if (error != 0) { 4489 KASSERT(fp->f_ops == &badfileops, 4490 ("VOP_OPEN in fhopen() set f_ops")); 4491 KASSERT(td->td_dupfd < 0, 4492 ("fhopen() encountered fdopen()")); 4493 4494 vput(vp); 4495 goto bad; 4496 } 4497 #ifdef INVARIANTS 4498 td->td_dupfd = 0; 4499 #endif 4500 fp->f_vnode = vp; 4501 finit_vnode(fp, fmode, NULL, &vnops); 4502 VOP_UNLOCK(vp); 4503 if ((fmode & O_TRUNC) != 0) { 4504 error = fo_truncate(fp, 0, td->td_ucred, td); 4505 if (error != 0) 4506 goto bad; 4507 } 4508 4509 error = finstall(td, fp, &indx, fmode, NULL); 4510 bad: 4511 fdrop(fp, td); 4512 td->td_retval[0] = indx; 4513 return (error); 4514 } 4515 4516 /* 4517 * Stat an (NFS) file handle. 4518 */ 4519 #ifndef _SYS_SYSPROTO_H_ 4520 struct fhstat_args { 4521 struct fhandle *u_fhp; 4522 struct stat *sb; 4523 }; 4524 #endif 4525 int 4526 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4527 { 4528 struct stat sb; 4529 struct fhandle fh; 4530 int error; 4531 4532 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4533 if (error != 0) 4534 return (error); 4535 error = kern_fhstat(td, fh, &sb); 4536 if (error == 0) 4537 error = copyout(&sb, uap->sb, sizeof(sb)); 4538 return (error); 4539 } 4540 4541 int 4542 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4543 { 4544 struct mount *mp; 4545 struct vnode *vp; 4546 int error; 4547 4548 error = priv_check(td, PRIV_VFS_FHSTAT); 4549 if (error != 0) 4550 return (error); 4551 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4552 return (ESTALE); 4553 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4554 vfs_unbusy(mp); 4555 if (error != 0) 4556 return (error); 4557 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td); 4558 vput(vp); 4559 return (error); 4560 } 4561 4562 /* 4563 * Implement fstatfs() for (NFS) file handles. 4564 */ 4565 #ifndef _SYS_SYSPROTO_H_ 4566 struct fhstatfs_args { 4567 struct fhandle *u_fhp; 4568 struct statfs *buf; 4569 }; 4570 #endif 4571 int 4572 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4573 { 4574 struct statfs *sfp; 4575 fhandle_t fh; 4576 int error; 4577 4578 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4579 if (error != 0) 4580 return (error); 4581 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4582 error = kern_fhstatfs(td, fh, sfp); 4583 if (error == 0) 4584 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4585 free(sfp, M_STATFS); 4586 return (error); 4587 } 4588 4589 int 4590 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4591 { 4592 struct mount *mp; 4593 struct vnode *vp; 4594 int error; 4595 4596 error = priv_check(td, PRIV_VFS_FHSTATFS); 4597 if (error != 0) 4598 return (error); 4599 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4600 return (ESTALE); 4601 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4602 if (error != 0) { 4603 vfs_unbusy(mp); 4604 return (error); 4605 } 4606 vput(vp); 4607 error = prison_canseemount(td->td_ucred, mp); 4608 if (error != 0) 4609 goto out; 4610 #ifdef MAC 4611 error = mac_mount_check_stat(td->td_ucred, mp); 4612 if (error != 0) 4613 goto out; 4614 #endif 4615 error = VFS_STATFS(mp, buf); 4616 out: 4617 vfs_unbusy(mp); 4618 return (error); 4619 } 4620 4621 /* 4622 * Unlike madvise(2), we do not make a best effort to remember every 4623 * possible caching hint. Instead, we remember the last setting with 4624 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4625 * region of any current setting. 4626 */ 4627 int 4628 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4629 int advice) 4630 { 4631 struct fadvise_info *fa, *new; 4632 struct file *fp; 4633 struct vnode *vp; 4634 off_t end; 4635 int error; 4636 4637 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4638 return (EINVAL); 4639 AUDIT_ARG_VALUE(advice); 4640 switch (advice) { 4641 case POSIX_FADV_SEQUENTIAL: 4642 case POSIX_FADV_RANDOM: 4643 case POSIX_FADV_NOREUSE: 4644 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4645 break; 4646 case POSIX_FADV_NORMAL: 4647 case POSIX_FADV_WILLNEED: 4648 case POSIX_FADV_DONTNEED: 4649 new = NULL; 4650 break; 4651 default: 4652 return (EINVAL); 4653 } 4654 /* XXX: CAP_POSIX_FADVISE? */ 4655 AUDIT_ARG_FD(fd); 4656 error = fget(td, fd, &cap_no_rights, &fp); 4657 if (error != 0) 4658 goto out; 4659 AUDIT_ARG_FILE(td->td_proc, fp); 4660 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4661 error = ESPIPE; 4662 goto out; 4663 } 4664 if (fp->f_type != DTYPE_VNODE) { 4665 error = ENODEV; 4666 goto out; 4667 } 4668 vp = fp->f_vnode; 4669 if (vp->v_type != VREG) { 4670 error = ENODEV; 4671 goto out; 4672 } 4673 if (len == 0) 4674 end = OFF_MAX; 4675 else 4676 end = offset + len - 1; 4677 switch (advice) { 4678 case POSIX_FADV_SEQUENTIAL: 4679 case POSIX_FADV_RANDOM: 4680 case POSIX_FADV_NOREUSE: 4681 /* 4682 * Try to merge any existing non-standard region with 4683 * this new region if possible, otherwise create a new 4684 * non-standard region for this request. 4685 */ 4686 mtx_pool_lock(mtxpool_sleep, fp); 4687 fa = fp->f_advice; 4688 if (fa != NULL && fa->fa_advice == advice && 4689 ((fa->fa_start <= end && fa->fa_end >= offset) || 4690 (end != OFF_MAX && fa->fa_start == end + 1) || 4691 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4692 if (offset < fa->fa_start) 4693 fa->fa_start = offset; 4694 if (end > fa->fa_end) 4695 fa->fa_end = end; 4696 } else { 4697 new->fa_advice = advice; 4698 new->fa_start = offset; 4699 new->fa_end = end; 4700 fp->f_advice = new; 4701 new = fa; 4702 } 4703 mtx_pool_unlock(mtxpool_sleep, fp); 4704 break; 4705 case POSIX_FADV_NORMAL: 4706 /* 4707 * If a the "normal" region overlaps with an existing 4708 * non-standard region, trim or remove the 4709 * non-standard region. 4710 */ 4711 mtx_pool_lock(mtxpool_sleep, fp); 4712 fa = fp->f_advice; 4713 if (fa != NULL) { 4714 if (offset <= fa->fa_start && end >= fa->fa_end) { 4715 new = fa; 4716 fp->f_advice = NULL; 4717 } else if (offset <= fa->fa_start && 4718 end >= fa->fa_start) 4719 fa->fa_start = end + 1; 4720 else if (offset <= fa->fa_end && end >= fa->fa_end) 4721 fa->fa_end = offset - 1; 4722 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4723 /* 4724 * If the "normal" region is a middle 4725 * portion of the existing 4726 * non-standard region, just remove 4727 * the whole thing rather than picking 4728 * one side or the other to 4729 * preserve. 4730 */ 4731 new = fa; 4732 fp->f_advice = NULL; 4733 } 4734 } 4735 mtx_pool_unlock(mtxpool_sleep, fp); 4736 break; 4737 case POSIX_FADV_WILLNEED: 4738 case POSIX_FADV_DONTNEED: 4739 error = VOP_ADVISE(vp, offset, end, advice); 4740 break; 4741 } 4742 out: 4743 if (fp != NULL) 4744 fdrop(fp, td); 4745 free(new, M_FADVISE); 4746 return (error); 4747 } 4748 4749 int 4750 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4751 { 4752 int error; 4753 4754 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4755 uap->advice); 4756 return (kern_posix_error(td, error)); 4757 } 4758 4759 int 4760 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4761 off_t *outoffp, size_t len, unsigned int flags) 4762 { 4763 struct file *infp, *outfp; 4764 struct vnode *invp, *outvp; 4765 int error; 4766 size_t retlen; 4767 void *rl_rcookie, *rl_wcookie; 4768 off_t savinoff, savoutoff; 4769 4770 infp = outfp = NULL; 4771 rl_rcookie = rl_wcookie = NULL; 4772 savinoff = -1; 4773 error = 0; 4774 retlen = 0; 4775 4776 if (flags != 0) { 4777 error = EINVAL; 4778 goto out; 4779 } 4780 if (len > SSIZE_MAX) 4781 /* 4782 * Although the len argument is size_t, the return argument 4783 * is ssize_t (which is signed). Therefore a size that won't 4784 * fit in ssize_t can't be returned. 4785 */ 4786 len = SSIZE_MAX; 4787 4788 /* Get the file structures for the file descriptors. */ 4789 error = fget_read(td, infd, &cap_read_rights, &infp); 4790 if (error != 0) 4791 goto out; 4792 if (infp->f_ops == &badfileops) { 4793 error = EBADF; 4794 goto out; 4795 } 4796 if (infp->f_vnode == NULL) { 4797 error = EINVAL; 4798 goto out; 4799 } 4800 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4801 if (error != 0) 4802 goto out; 4803 if (outfp->f_ops == &badfileops) { 4804 error = EBADF; 4805 goto out; 4806 } 4807 if (outfp->f_vnode == NULL) { 4808 error = EINVAL; 4809 goto out; 4810 } 4811 4812 /* Set the offset pointers to the correct place. */ 4813 if (inoffp == NULL) 4814 inoffp = &infp->f_offset; 4815 if (outoffp == NULL) 4816 outoffp = &outfp->f_offset; 4817 savinoff = *inoffp; 4818 savoutoff = *outoffp; 4819 4820 invp = infp->f_vnode; 4821 outvp = outfp->f_vnode; 4822 /* Sanity check the f_flag bits. */ 4823 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4824 (infp->f_flag & FREAD) == 0) { 4825 error = EBADF; 4826 goto out; 4827 } 4828 4829 /* If len == 0, just return 0. */ 4830 if (len == 0) 4831 goto out; 4832 4833 /* 4834 * If infp and outfp refer to the same file, the byte ranges cannot 4835 * overlap. 4836 */ 4837 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4838 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4839 savinoff))) { 4840 error = EINVAL; 4841 goto out; 4842 } 4843 4844 /* Range lock the byte ranges for both invp and outvp. */ 4845 for (;;) { 4846 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4847 len); 4848 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4849 len); 4850 if (rl_rcookie != NULL) 4851 break; 4852 vn_rangelock_unlock(outvp, rl_wcookie); 4853 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4854 vn_rangelock_unlock(invp, rl_rcookie); 4855 } 4856 4857 retlen = len; 4858 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4859 flags, infp->f_cred, outfp->f_cred, td); 4860 out: 4861 if (rl_rcookie != NULL) 4862 vn_rangelock_unlock(invp, rl_rcookie); 4863 if (rl_wcookie != NULL) 4864 vn_rangelock_unlock(outvp, rl_wcookie); 4865 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4866 *inoffp = savinoff; 4867 *outoffp = savoutoff; 4868 } 4869 if (outfp != NULL) 4870 fdrop(outfp, td); 4871 if (infp != NULL) 4872 fdrop(infp, td); 4873 td->td_retval[0] = retlen; 4874 return (error); 4875 } 4876 4877 int 4878 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 4879 { 4880 off_t inoff, outoff, *inoffp, *outoffp; 4881 int error; 4882 4883 inoffp = outoffp = NULL; 4884 if (uap->inoffp != NULL) { 4885 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 4886 if (error != 0) 4887 return (error); 4888 inoffp = &inoff; 4889 } 4890 if (uap->outoffp != NULL) { 4891 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 4892 if (error != 0) 4893 return (error); 4894 outoffp = &outoff; 4895 } 4896 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 4897 outoffp, uap->len, uap->flags); 4898 if (error == 0 && uap->inoffp != NULL) 4899 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 4900 if (error == 0 && uap->outoffp != NULL) 4901 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 4902 return (error); 4903 } 4904