1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <fs/devfs/devfs.h> 91 92 #include <ufs/ufs/quota.h> 93 94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 95 96 static int kern_chflagsat(struct thread *td, int fd, const char *path, 97 enum uio_seg pathseg, u_long flags, int atflag); 98 static int setfflags(struct thread *td, struct vnode *, u_long); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int getutimens(const struct timespec *, enum uio_seg, 101 struct timespec *, int *); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 107 enum uio_seg pathseg, fhandle_t *fhp); 108 static int kern_getfhat(struct thread *td, int flags, int fd, 109 const char *path, enum uio_seg pathseg, fhandle_t *fhp); 110 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 111 size_t count, struct thread *td); 112 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 113 const char *path, enum uio_seg segflag); 114 115 int 116 kern_sync(struct thread *td) 117 { 118 struct mount *mp, *nmp; 119 int save; 120 121 mtx_lock(&mountlist_mtx); 122 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 123 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 124 nmp = TAILQ_NEXT(mp, mnt_list); 125 continue; 126 } 127 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 128 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 129 save = curthread_pflags_set(TDP_SYNCIO); 130 vfs_periodic(mp, MNT_NOWAIT); 131 VFS_SYNC(mp, MNT_NOWAIT); 132 curthread_pflags_restore(save); 133 vn_finished_write(mp); 134 } 135 mtx_lock(&mountlist_mtx); 136 nmp = TAILQ_NEXT(mp, mnt_list); 137 vfs_unbusy(mp); 138 } 139 mtx_unlock(&mountlist_mtx); 140 return (0); 141 } 142 143 /* 144 * Sync each mounted filesystem. 145 */ 146 #ifndef _SYS_SYSPROTO_H_ 147 struct sync_args { 148 int dummy; 149 }; 150 #endif 151 /* ARGSUSED */ 152 int 153 sys_sync(struct thread *td, struct sync_args *uap) 154 { 155 156 return (kern_sync(td)); 157 } 158 159 /* 160 * Change filesystem quotas. 161 */ 162 #ifndef _SYS_SYSPROTO_H_ 163 struct quotactl_args { 164 char *path; 165 int cmd; 166 int uid; 167 caddr_t arg; 168 }; 169 #endif 170 int 171 sys_quotactl(struct thread *td, struct quotactl_args *uap) 172 { 173 struct mount *mp; 174 struct nameidata nd; 175 int error; 176 177 AUDIT_ARG_CMD(uap->cmd); 178 AUDIT_ARG_UID(uap->uid); 179 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 180 return (EPERM); 181 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 182 uap->path, td); 183 if ((error = namei(&nd)) != 0) 184 return (error); 185 NDFREE(&nd, NDF_ONLY_PNBUF); 186 mp = nd.ni_vp->v_mount; 187 vfs_ref(mp); 188 vput(nd.ni_vp); 189 error = vfs_busy(mp, 0); 190 if (error != 0) { 191 vfs_rel(mp); 192 return (error); 193 } 194 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 195 196 /* 197 * Since quota on operation typically needs to open quota 198 * file, the Q_QUOTAON handler needs to unbusy the mount point 199 * before calling into namei. Otherwise, unmount might be 200 * started between two vfs_busy() invocations (first is our, 201 * second is from mount point cross-walk code in lookup()), 202 * causing deadlock. 203 * 204 * Require that Q_QUOTAON handles the vfs_busy() reference on 205 * its own, always returning with ubusied mount point. 206 */ 207 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 208 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 209 vfs_unbusy(mp); 210 vfs_rel(mp); 211 return (error); 212 } 213 214 /* 215 * Used by statfs conversion routines to scale the block size up if 216 * necessary so that all of the block counts are <= 'max_size'. Note 217 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 218 * value of 'n'. 219 */ 220 void 221 statfs_scale_blocks(struct statfs *sf, long max_size) 222 { 223 uint64_t count; 224 int shift; 225 226 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 227 228 /* 229 * Attempt to scale the block counts to give a more accurate 230 * overview to userland of the ratio of free space to used 231 * space. To do this, find the largest block count and compute 232 * a divisor that lets it fit into a signed integer <= max_size. 233 */ 234 if (sf->f_bavail < 0) 235 count = -sf->f_bavail; 236 else 237 count = sf->f_bavail; 238 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 239 if (count <= max_size) 240 return; 241 242 count >>= flsl(max_size); 243 shift = 0; 244 while (count > 0) { 245 shift++; 246 count >>=1; 247 } 248 249 sf->f_bsize <<= shift; 250 sf->f_blocks >>= shift; 251 sf->f_bfree >>= shift; 252 sf->f_bavail >>= shift; 253 } 254 255 static int 256 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 257 { 258 int error; 259 260 if (mp == NULL) 261 return (EBADF); 262 error = vfs_busy(mp, 0); 263 vfs_rel(mp); 264 if (error != 0) 265 return (error); 266 #ifdef MAC 267 error = mac_mount_check_stat(td->td_ucred, mp); 268 if (error != 0) 269 goto out; 270 #endif 271 error = VFS_STATFS(mp, buf); 272 if (error != 0) 273 goto out; 274 if (priv_check_cred_vfs_generation(td->td_ucred)) { 275 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 276 prison_enforce_statfs(td->td_ucred, mp, buf); 277 } 278 out: 279 vfs_unbusy(mp); 280 return (error); 281 } 282 283 /* 284 * Get filesystem statistics. 285 */ 286 #ifndef _SYS_SYSPROTO_H_ 287 struct statfs_args { 288 char *path; 289 struct statfs *buf; 290 }; 291 #endif 292 int 293 sys_statfs(struct thread *td, struct statfs_args *uap) 294 { 295 struct statfs *sfp; 296 int error; 297 298 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 299 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 300 if (error == 0) 301 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 302 free(sfp, M_STATFS); 303 return (error); 304 } 305 306 int 307 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 308 struct statfs *buf) 309 { 310 struct mount *mp; 311 struct nameidata nd; 312 int error; 313 314 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 315 pathseg, path, td); 316 error = namei(&nd); 317 if (error != 0) 318 return (error); 319 mp = nd.ni_vp->v_mount; 320 vfs_ref(mp); 321 NDFREE(&nd, NDF_ONLY_PNBUF); 322 vput(nd.ni_vp); 323 return (kern_do_statfs(td, mp, buf)); 324 } 325 326 /* 327 * Get filesystem statistics. 328 */ 329 #ifndef _SYS_SYSPROTO_H_ 330 struct fstatfs_args { 331 int fd; 332 struct statfs *buf; 333 }; 334 #endif 335 int 336 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 337 { 338 struct statfs *sfp; 339 int error; 340 341 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 342 error = kern_fstatfs(td, uap->fd, sfp); 343 if (error == 0) 344 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 345 free(sfp, M_STATFS); 346 return (error); 347 } 348 349 int 350 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 351 { 352 struct file *fp; 353 struct mount *mp; 354 struct vnode *vp; 355 int error; 356 357 AUDIT_ARG_FD(fd); 358 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 359 if (error != 0) 360 return (error); 361 vp = fp->f_vnode; 362 vn_lock(vp, LK_SHARED | LK_RETRY); 363 #ifdef AUDIT 364 AUDIT_ARG_VNODE1(vp); 365 #endif 366 mp = vp->v_mount; 367 if (mp != NULL) 368 vfs_ref(mp); 369 VOP_UNLOCK(vp); 370 fdrop(fp, td); 371 return (kern_do_statfs(td, mp, buf)); 372 } 373 374 /* 375 * Get statistics on all filesystems. 376 */ 377 #ifndef _SYS_SYSPROTO_H_ 378 struct getfsstat_args { 379 struct statfs *buf; 380 long bufsize; 381 int mode; 382 }; 383 #endif 384 int 385 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 386 { 387 size_t count; 388 int error; 389 390 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 391 return (EINVAL); 392 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 393 UIO_USERSPACE, uap->mode); 394 if (error == 0) 395 td->td_retval[0] = count; 396 return (error); 397 } 398 399 /* 400 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 401 * The caller is responsible for freeing memory which will be allocated 402 * in '*buf'. 403 */ 404 int 405 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 406 size_t *countp, enum uio_seg bufseg, int mode) 407 { 408 struct mount *mp, *nmp; 409 struct statfs *sfsp, *sp, *sptmp, *tofree; 410 size_t count, maxcount; 411 int error; 412 413 switch (mode) { 414 case MNT_WAIT: 415 case MNT_NOWAIT: 416 break; 417 default: 418 if (bufseg == UIO_SYSSPACE) 419 *buf = NULL; 420 return (EINVAL); 421 } 422 restart: 423 maxcount = bufsize / sizeof(struct statfs); 424 if (bufsize == 0) { 425 sfsp = NULL; 426 tofree = NULL; 427 } else if (bufseg == UIO_USERSPACE) { 428 sfsp = *buf; 429 tofree = NULL; 430 } else /* if (bufseg == UIO_SYSSPACE) */ { 431 count = 0; 432 mtx_lock(&mountlist_mtx); 433 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 434 count++; 435 } 436 mtx_unlock(&mountlist_mtx); 437 if (maxcount > count) 438 maxcount = count; 439 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 440 M_STATFS, M_WAITOK); 441 } 442 443 count = 0; 444 445 /* 446 * If there is no target buffer they only want the count. 447 * 448 * This could be TAILQ_FOREACH but it is open-coded to match the original 449 * code below. 450 */ 451 if (sfsp == NULL) { 452 mtx_lock(&mountlist_mtx); 453 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 454 if (prison_canseemount(td->td_ucred, mp) != 0) { 455 nmp = TAILQ_NEXT(mp, mnt_list); 456 continue; 457 } 458 #ifdef MAC 459 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 460 nmp = TAILQ_NEXT(mp, mnt_list); 461 continue; 462 } 463 #endif 464 count++; 465 nmp = TAILQ_NEXT(mp, mnt_list); 466 } 467 mtx_unlock(&mountlist_mtx); 468 *countp = count; 469 return (0); 470 } 471 472 /* 473 * They want the entire thing. 474 * 475 * Short-circuit the corner case of no room for anything, avoids 476 * relocking below. 477 */ 478 if (maxcount < 1) { 479 goto out; 480 } 481 482 mtx_lock(&mountlist_mtx); 483 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 484 if (prison_canseemount(td->td_ucred, mp) != 0) { 485 nmp = TAILQ_NEXT(mp, mnt_list); 486 continue; 487 } 488 #ifdef MAC 489 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 490 nmp = TAILQ_NEXT(mp, mnt_list); 491 continue; 492 } 493 #endif 494 if (mode == MNT_WAIT) { 495 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 496 /* 497 * If vfs_busy() failed, and MBF_NOWAIT 498 * wasn't passed, then the mp is gone. 499 * Furthermore, because of MBF_MNTLSTLOCK, 500 * the mountlist_mtx was dropped. We have 501 * no other choice than to start over. 502 */ 503 mtx_unlock(&mountlist_mtx); 504 free(tofree, M_STATFS); 505 goto restart; 506 } 507 } else { 508 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 continue; 511 } 512 } 513 sp = &mp->mnt_stat; 514 /* 515 * If MNT_NOWAIT is specified, do not refresh 516 * the fsstat cache. 517 */ 518 if (mode != MNT_NOWAIT) { 519 error = VFS_STATFS(mp, sp); 520 if (error != 0) { 521 mtx_lock(&mountlist_mtx); 522 nmp = TAILQ_NEXT(mp, mnt_list); 523 vfs_unbusy(mp); 524 continue; 525 } 526 } 527 if (priv_check_cred_vfs_generation(td->td_ucred)) { 528 sptmp = malloc(sizeof(struct statfs), M_STATFS, 529 M_WAITOK); 530 *sptmp = *sp; 531 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 532 prison_enforce_statfs(td->td_ucred, mp, sptmp); 533 sp = sptmp; 534 } else 535 sptmp = NULL; 536 if (bufseg == UIO_SYSSPACE) { 537 bcopy(sp, sfsp, sizeof(*sp)); 538 free(sptmp, M_STATFS); 539 } else /* if (bufseg == UIO_USERSPACE) */ { 540 error = copyout(sp, sfsp, sizeof(*sp)); 541 free(sptmp, M_STATFS); 542 if (error != 0) { 543 vfs_unbusy(mp); 544 return (error); 545 } 546 } 547 sfsp++; 548 count++; 549 550 if (count == maxcount) { 551 vfs_unbusy(mp); 552 goto out; 553 } 554 555 mtx_lock(&mountlist_mtx); 556 nmp = TAILQ_NEXT(mp, mnt_list); 557 vfs_unbusy(mp); 558 } 559 mtx_unlock(&mountlist_mtx); 560 out: 561 *countp = count; 562 return (0); 563 } 564 565 #ifdef COMPAT_FREEBSD4 566 /* 567 * Get old format filesystem statistics. 568 */ 569 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 570 571 #ifndef _SYS_SYSPROTO_H_ 572 struct freebsd4_statfs_args { 573 char *path; 574 struct ostatfs *buf; 575 }; 576 #endif 577 int 578 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 579 { 580 struct ostatfs osb; 581 struct statfs *sfp; 582 int error; 583 584 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 585 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 586 if (error == 0) { 587 freebsd4_cvtstatfs(sfp, &osb); 588 error = copyout(&osb, uap->buf, sizeof(osb)); 589 } 590 free(sfp, M_STATFS); 591 return (error); 592 } 593 594 /* 595 * Get filesystem statistics. 596 */ 597 #ifndef _SYS_SYSPROTO_H_ 598 struct freebsd4_fstatfs_args { 599 int fd; 600 struct ostatfs *buf; 601 }; 602 #endif 603 int 604 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 605 { 606 struct ostatfs osb; 607 struct statfs *sfp; 608 int error; 609 610 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 611 error = kern_fstatfs(td, uap->fd, sfp); 612 if (error == 0) { 613 freebsd4_cvtstatfs(sfp, &osb); 614 error = copyout(&osb, uap->buf, sizeof(osb)); 615 } 616 free(sfp, M_STATFS); 617 return (error); 618 } 619 620 /* 621 * Get statistics on all filesystems. 622 */ 623 #ifndef _SYS_SYSPROTO_H_ 624 struct freebsd4_getfsstat_args { 625 struct ostatfs *buf; 626 long bufsize; 627 int mode; 628 }; 629 #endif 630 int 631 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 632 { 633 struct statfs *buf, *sp; 634 struct ostatfs osb; 635 size_t count, size; 636 int error; 637 638 if (uap->bufsize < 0) 639 return (EINVAL); 640 count = uap->bufsize / sizeof(struct ostatfs); 641 if (count > SIZE_MAX / sizeof(struct statfs)) 642 return (EINVAL); 643 size = count * sizeof(struct statfs); 644 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 645 uap->mode); 646 if (error == 0) 647 td->td_retval[0] = count; 648 if (size != 0) { 649 sp = buf; 650 while (count != 0 && error == 0) { 651 freebsd4_cvtstatfs(sp, &osb); 652 error = copyout(&osb, uap->buf, sizeof(osb)); 653 sp++; 654 uap->buf++; 655 count--; 656 } 657 free(buf, M_STATFS); 658 } 659 return (error); 660 } 661 662 /* 663 * Implement fstatfs() for (NFS) file handles. 664 */ 665 #ifndef _SYS_SYSPROTO_H_ 666 struct freebsd4_fhstatfs_args { 667 struct fhandle *u_fhp; 668 struct ostatfs *buf; 669 }; 670 #endif 671 int 672 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 673 { 674 struct ostatfs osb; 675 struct statfs *sfp; 676 fhandle_t fh; 677 int error; 678 679 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 680 if (error != 0) 681 return (error); 682 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 683 error = kern_fhstatfs(td, fh, sfp); 684 if (error == 0) { 685 freebsd4_cvtstatfs(sfp, &osb); 686 error = copyout(&osb, uap->buf, sizeof(osb)); 687 } 688 free(sfp, M_STATFS); 689 return (error); 690 } 691 692 /* 693 * Convert a new format statfs structure to an old format statfs structure. 694 */ 695 static void 696 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 697 { 698 699 statfs_scale_blocks(nsp, LONG_MAX); 700 bzero(osp, sizeof(*osp)); 701 osp->f_bsize = nsp->f_bsize; 702 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 703 osp->f_blocks = nsp->f_blocks; 704 osp->f_bfree = nsp->f_bfree; 705 osp->f_bavail = nsp->f_bavail; 706 osp->f_files = MIN(nsp->f_files, LONG_MAX); 707 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 708 osp->f_owner = nsp->f_owner; 709 osp->f_type = nsp->f_type; 710 osp->f_flags = nsp->f_flags; 711 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 712 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 713 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 714 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 715 strlcpy(osp->f_fstypename, nsp->f_fstypename, 716 MIN(MFSNAMELEN, OMFSNAMELEN)); 717 strlcpy(osp->f_mntonname, nsp->f_mntonname, 718 MIN(MNAMELEN, OMNAMELEN)); 719 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 720 MIN(MNAMELEN, OMNAMELEN)); 721 osp->f_fsid = nsp->f_fsid; 722 } 723 #endif /* COMPAT_FREEBSD4 */ 724 725 #if defined(COMPAT_FREEBSD11) 726 /* 727 * Get old format filesystem statistics. 728 */ 729 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 730 731 int 732 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 733 { 734 struct freebsd11_statfs osb; 735 struct statfs *sfp; 736 int error; 737 738 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 739 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 740 if (error == 0) { 741 freebsd11_cvtstatfs(sfp, &osb); 742 error = copyout(&osb, uap->buf, sizeof(osb)); 743 } 744 free(sfp, M_STATFS); 745 return (error); 746 } 747 748 /* 749 * Get filesystem statistics. 750 */ 751 int 752 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 753 { 754 struct freebsd11_statfs osb; 755 struct statfs *sfp; 756 int error; 757 758 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 759 error = kern_fstatfs(td, uap->fd, sfp); 760 if (error == 0) { 761 freebsd11_cvtstatfs(sfp, &osb); 762 error = copyout(&osb, uap->buf, sizeof(osb)); 763 } 764 free(sfp, M_STATFS); 765 return (error); 766 } 767 768 /* 769 * Get statistics on all filesystems. 770 */ 771 int 772 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 773 { 774 struct freebsd11_statfs osb; 775 struct statfs *buf, *sp; 776 size_t count, size; 777 int error; 778 779 count = uap->bufsize / sizeof(struct ostatfs); 780 size = count * sizeof(struct statfs); 781 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 782 uap->mode); 783 if (error == 0) 784 td->td_retval[0] = count; 785 if (size > 0) { 786 sp = buf; 787 while (count > 0 && error == 0) { 788 freebsd11_cvtstatfs(sp, &osb); 789 error = copyout(&osb, uap->buf, sizeof(osb)); 790 sp++; 791 uap->buf++; 792 count--; 793 } 794 free(buf, M_STATFS); 795 } 796 return (error); 797 } 798 799 /* 800 * Implement fstatfs() for (NFS) file handles. 801 */ 802 int 803 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 804 { 805 struct freebsd11_statfs osb; 806 struct statfs *sfp; 807 fhandle_t fh; 808 int error; 809 810 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 811 if (error) 812 return (error); 813 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 814 error = kern_fhstatfs(td, fh, sfp); 815 if (error == 0) { 816 freebsd11_cvtstatfs(sfp, &osb); 817 error = copyout(&osb, uap->buf, sizeof(osb)); 818 } 819 free(sfp, M_STATFS); 820 return (error); 821 } 822 823 /* 824 * Convert a new format statfs structure to an old format statfs structure. 825 */ 826 static void 827 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 828 { 829 830 bzero(osp, sizeof(*osp)); 831 osp->f_version = FREEBSD11_STATFS_VERSION; 832 osp->f_type = nsp->f_type; 833 osp->f_flags = nsp->f_flags; 834 osp->f_bsize = nsp->f_bsize; 835 osp->f_iosize = nsp->f_iosize; 836 osp->f_blocks = nsp->f_blocks; 837 osp->f_bfree = nsp->f_bfree; 838 osp->f_bavail = nsp->f_bavail; 839 osp->f_files = nsp->f_files; 840 osp->f_ffree = nsp->f_ffree; 841 osp->f_syncwrites = nsp->f_syncwrites; 842 osp->f_asyncwrites = nsp->f_asyncwrites; 843 osp->f_syncreads = nsp->f_syncreads; 844 osp->f_asyncreads = nsp->f_asyncreads; 845 osp->f_namemax = nsp->f_namemax; 846 osp->f_owner = nsp->f_owner; 847 osp->f_fsid = nsp->f_fsid; 848 strlcpy(osp->f_fstypename, nsp->f_fstypename, 849 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 850 strlcpy(osp->f_mntonname, nsp->f_mntonname, 851 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 852 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 853 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 854 } 855 #endif /* COMPAT_FREEBSD11 */ 856 857 /* 858 * Change current working directory to a given file descriptor. 859 */ 860 #ifndef _SYS_SYSPROTO_H_ 861 struct fchdir_args { 862 int fd; 863 }; 864 #endif 865 int 866 sys_fchdir(struct thread *td, struct fchdir_args *uap) 867 { 868 struct vnode *vp, *tdp; 869 struct mount *mp; 870 struct file *fp; 871 int error; 872 873 AUDIT_ARG_FD(uap->fd); 874 error = getvnode(td, uap->fd, &cap_fchdir_rights, 875 &fp); 876 if (error != 0) 877 return (error); 878 vp = fp->f_vnode; 879 vrefact(vp); 880 fdrop(fp, td); 881 vn_lock(vp, LK_SHARED | LK_RETRY); 882 AUDIT_ARG_VNODE1(vp); 883 error = change_dir(vp, td); 884 while (!error && (mp = vp->v_mountedhere) != NULL) { 885 if (vfs_busy(mp, 0)) 886 continue; 887 error = VFS_ROOT(mp, LK_SHARED, &tdp); 888 vfs_unbusy(mp); 889 if (error != 0) 890 break; 891 vput(vp); 892 vp = tdp; 893 } 894 if (error != 0) { 895 vput(vp); 896 return (error); 897 } 898 VOP_UNLOCK(vp); 899 pwd_chdir(td, vp); 900 return (0); 901 } 902 903 /* 904 * Change current working directory (``.''). 905 */ 906 #ifndef _SYS_SYSPROTO_H_ 907 struct chdir_args { 908 char *path; 909 }; 910 #endif 911 int 912 sys_chdir(struct thread *td, struct chdir_args *uap) 913 { 914 915 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 916 } 917 918 int 919 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 920 { 921 struct nameidata nd; 922 int error; 923 924 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 925 pathseg, path, td); 926 if ((error = namei(&nd)) != 0) 927 return (error); 928 if ((error = change_dir(nd.ni_vp, td)) != 0) { 929 vput(nd.ni_vp); 930 NDFREE(&nd, NDF_ONLY_PNBUF); 931 return (error); 932 } 933 VOP_UNLOCK(nd.ni_vp); 934 NDFREE(&nd, NDF_ONLY_PNBUF); 935 pwd_chdir(td, nd.ni_vp); 936 return (0); 937 } 938 939 /* 940 * Change notion of root (``/'') directory. 941 */ 942 #ifndef _SYS_SYSPROTO_H_ 943 struct chroot_args { 944 char *path; 945 }; 946 #endif 947 int 948 sys_chroot(struct thread *td, struct chroot_args *uap) 949 { 950 struct nameidata nd; 951 int error; 952 953 error = priv_check(td, PRIV_VFS_CHROOT); 954 if (error != 0) 955 return (error); 956 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 957 UIO_USERSPACE, uap->path, td); 958 error = namei(&nd); 959 if (error != 0) 960 goto error; 961 error = change_dir(nd.ni_vp, td); 962 if (error != 0) 963 goto e_vunlock; 964 #ifdef MAC 965 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 966 if (error != 0) 967 goto e_vunlock; 968 #endif 969 VOP_UNLOCK(nd.ni_vp); 970 error = pwd_chroot(td, nd.ni_vp); 971 vrele(nd.ni_vp); 972 NDFREE(&nd, NDF_ONLY_PNBUF); 973 return (error); 974 e_vunlock: 975 vput(nd.ni_vp); 976 error: 977 NDFREE(&nd, NDF_ONLY_PNBUF); 978 return (error); 979 } 980 981 /* 982 * Common routine for chroot and chdir. Callers must provide a locked vnode 983 * instance. 984 */ 985 int 986 change_dir(struct vnode *vp, struct thread *td) 987 { 988 #ifdef MAC 989 int error; 990 #endif 991 992 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 993 if (vp->v_type != VDIR) 994 return (ENOTDIR); 995 #ifdef MAC 996 error = mac_vnode_check_chdir(td->td_ucred, vp); 997 if (error != 0) 998 return (error); 999 #endif 1000 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1001 } 1002 1003 static __inline void 1004 flags_to_rights(int flags, cap_rights_t *rightsp) 1005 { 1006 1007 if (flags & O_EXEC) { 1008 cap_rights_set_one(rightsp, CAP_FEXECVE); 1009 } else { 1010 switch ((flags & O_ACCMODE)) { 1011 case O_RDONLY: 1012 cap_rights_set_one(rightsp, CAP_READ); 1013 break; 1014 case O_RDWR: 1015 cap_rights_set_one(rightsp, CAP_READ); 1016 /* FALLTHROUGH */ 1017 case O_WRONLY: 1018 cap_rights_set_one(rightsp, CAP_WRITE); 1019 if (!(flags & (O_APPEND | O_TRUNC))) 1020 cap_rights_set_one(rightsp, CAP_SEEK); 1021 break; 1022 } 1023 } 1024 1025 if (flags & O_CREAT) 1026 cap_rights_set_one(rightsp, CAP_CREATE); 1027 1028 if (flags & O_TRUNC) 1029 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1030 1031 if (flags & (O_SYNC | O_FSYNC)) 1032 cap_rights_set_one(rightsp, CAP_FSYNC); 1033 1034 if (flags & (O_EXLOCK | O_SHLOCK)) 1035 cap_rights_set_one(rightsp, CAP_FLOCK); 1036 } 1037 1038 /* 1039 * Check permissions, allocate an open file structure, and call the device 1040 * open routine if any. 1041 */ 1042 #ifndef _SYS_SYSPROTO_H_ 1043 struct open_args { 1044 char *path; 1045 int flags; 1046 int mode; 1047 }; 1048 #endif 1049 int 1050 sys_open(struct thread *td, struct open_args *uap) 1051 { 1052 1053 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1054 uap->flags, uap->mode)); 1055 } 1056 1057 #ifndef _SYS_SYSPROTO_H_ 1058 struct openat_args { 1059 int fd; 1060 char *path; 1061 int flag; 1062 int mode; 1063 }; 1064 #endif 1065 int 1066 sys_openat(struct thread *td, struct openat_args *uap) 1067 { 1068 1069 AUDIT_ARG_FD(uap->fd); 1070 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1071 uap->mode)); 1072 } 1073 1074 int 1075 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1076 int flags, int mode) 1077 { 1078 struct proc *p = td->td_proc; 1079 struct filedesc *fdp = p->p_fd; 1080 struct file *fp; 1081 struct vnode *vp; 1082 struct nameidata nd; 1083 cap_rights_t rights; 1084 int cmode, error, indx; 1085 1086 indx = -1; 1087 1088 AUDIT_ARG_FFLAGS(flags); 1089 AUDIT_ARG_MODE(mode); 1090 cap_rights_init_one(&rights, CAP_LOOKUP); 1091 flags_to_rights(flags, &rights); 1092 /* 1093 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1094 * may be specified. 1095 */ 1096 if (flags & O_EXEC) { 1097 if (flags & O_ACCMODE) 1098 return (EINVAL); 1099 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1100 return (EINVAL); 1101 } else { 1102 flags = FFLAGS(flags); 1103 } 1104 1105 /* 1106 * Allocate a file structure. The descriptor to reference it 1107 * is allocated and set by finstall() below. 1108 */ 1109 error = falloc_noinstall(td, &fp); 1110 if (error != 0) 1111 return (error); 1112 /* 1113 * An extra reference on `fp' has been held for us by 1114 * falloc_noinstall(). 1115 */ 1116 /* Set the flags early so the finit in devfs can pick them up. */ 1117 fp->f_flag = flags & FMASK; 1118 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1119 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1120 &rights, td); 1121 td->td_dupfd = -1; /* XXX check for fdopen */ 1122 error = vn_open(&nd, &flags, cmode, fp); 1123 if (error != 0) { 1124 /* 1125 * If the vn_open replaced the method vector, something 1126 * wonderous happened deep below and we just pass it up 1127 * pretending we know what we do. 1128 */ 1129 if (error == ENXIO && fp->f_ops != &badfileops) 1130 goto success; 1131 1132 /* 1133 * Handle special fdopen() case. bleh. 1134 * 1135 * Don't do this for relative (capability) lookups; we don't 1136 * understand exactly what would happen, and we don't think 1137 * that it ever should. 1138 */ 1139 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) == 0 && 1140 (error == ENODEV || error == ENXIO) && 1141 td->td_dupfd >= 0) { 1142 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1143 &indx); 1144 if (error == 0) 1145 goto success; 1146 } 1147 1148 goto bad; 1149 } 1150 td->td_dupfd = 0; 1151 NDFREE(&nd, NDF_ONLY_PNBUF); 1152 vp = nd.ni_vp; 1153 1154 /* 1155 * Store the vnode, for any f_type. Typically, the vnode use 1156 * count is decremented by direct call to vn_closefile() for 1157 * files that switched type in the cdevsw fdopen() method. 1158 */ 1159 fp->f_vnode = vp; 1160 /* 1161 * If the file wasn't claimed by devfs bind it to the normal 1162 * vnode operations here. 1163 */ 1164 if (fp->f_ops == &badfileops) { 1165 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1166 fp->f_seqcount[UIO_READ] = 1; 1167 fp->f_seqcount[UIO_WRITE] = 1; 1168 finit(fp, (flags & FMASK) | (fp->f_flag & FHASLOCK), 1169 DTYPE_VNODE, vp, &vnops); 1170 } 1171 1172 VOP_UNLOCK(vp); 1173 if (flags & O_TRUNC) { 1174 error = fo_truncate(fp, 0, td->td_ucred, td); 1175 if (error != 0) 1176 goto bad; 1177 } 1178 success: 1179 /* 1180 * If we haven't already installed the FD (for dupfdopen), do so now. 1181 */ 1182 if (indx == -1) { 1183 struct filecaps *fcaps; 1184 1185 #ifdef CAPABILITIES 1186 if ((nd.ni_lcf & NI_LCF_STRICTRELATIVE) != 0) 1187 fcaps = &nd.ni_filecaps; 1188 else 1189 #endif 1190 fcaps = NULL; 1191 error = finstall(td, fp, &indx, flags, fcaps); 1192 /* On success finstall() consumes fcaps. */ 1193 if (error != 0) { 1194 filecaps_free(&nd.ni_filecaps); 1195 goto bad; 1196 } 1197 } else { 1198 filecaps_free(&nd.ni_filecaps); 1199 } 1200 1201 /* 1202 * Release our private reference, leaving the one associated with 1203 * the descriptor table intact. 1204 */ 1205 fdrop(fp, td); 1206 td->td_retval[0] = indx; 1207 return (0); 1208 bad: 1209 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1210 fdrop(fp, td); 1211 return (error); 1212 } 1213 1214 #ifdef COMPAT_43 1215 /* 1216 * Create a file. 1217 */ 1218 #ifndef _SYS_SYSPROTO_H_ 1219 struct ocreat_args { 1220 char *path; 1221 int mode; 1222 }; 1223 #endif 1224 int 1225 ocreat(struct thread *td, struct ocreat_args *uap) 1226 { 1227 1228 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1229 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1230 } 1231 #endif /* COMPAT_43 */ 1232 1233 /* 1234 * Create a special file. 1235 */ 1236 #ifndef _SYS_SYSPROTO_H_ 1237 struct mknodat_args { 1238 int fd; 1239 char *path; 1240 mode_t mode; 1241 dev_t dev; 1242 }; 1243 #endif 1244 int 1245 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1246 { 1247 1248 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1249 uap->dev)); 1250 } 1251 1252 #if defined(COMPAT_FREEBSD11) 1253 int 1254 freebsd11_mknod(struct thread *td, 1255 struct freebsd11_mknod_args *uap) 1256 { 1257 1258 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1259 uap->mode, uap->dev)); 1260 } 1261 1262 int 1263 freebsd11_mknodat(struct thread *td, 1264 struct freebsd11_mknodat_args *uap) 1265 { 1266 1267 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1268 uap->dev)); 1269 } 1270 #endif /* COMPAT_FREEBSD11 */ 1271 1272 int 1273 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1274 int mode, dev_t dev) 1275 { 1276 struct vnode *vp; 1277 struct mount *mp; 1278 struct vattr vattr; 1279 struct nameidata nd; 1280 int error, whiteout = 0; 1281 1282 AUDIT_ARG_MODE(mode); 1283 AUDIT_ARG_DEV(dev); 1284 switch (mode & S_IFMT) { 1285 case S_IFCHR: 1286 case S_IFBLK: 1287 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1288 if (error == 0 && dev == VNOVAL) 1289 error = EINVAL; 1290 break; 1291 case S_IFWHT: 1292 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1293 break; 1294 case S_IFIFO: 1295 if (dev == 0) 1296 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1297 /* FALLTHROUGH */ 1298 default: 1299 error = EINVAL; 1300 break; 1301 } 1302 if (error != 0) 1303 return (error); 1304 restart: 1305 bwillwrite(); 1306 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1307 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1308 td); 1309 if ((error = namei(&nd)) != 0) 1310 return (error); 1311 vp = nd.ni_vp; 1312 if (vp != NULL) { 1313 NDFREE(&nd, NDF_ONLY_PNBUF); 1314 if (vp == nd.ni_dvp) 1315 vrele(nd.ni_dvp); 1316 else 1317 vput(nd.ni_dvp); 1318 vrele(vp); 1319 return (EEXIST); 1320 } else { 1321 VATTR_NULL(&vattr); 1322 vattr.va_mode = (mode & ALLPERMS) & 1323 ~td->td_proc->p_fd->fd_cmask; 1324 vattr.va_rdev = dev; 1325 whiteout = 0; 1326 1327 switch (mode & S_IFMT) { 1328 case S_IFCHR: 1329 vattr.va_type = VCHR; 1330 break; 1331 case S_IFBLK: 1332 vattr.va_type = VBLK; 1333 break; 1334 case S_IFWHT: 1335 whiteout = 1; 1336 break; 1337 default: 1338 panic("kern_mknod: invalid mode"); 1339 } 1340 } 1341 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1342 NDFREE(&nd, NDF_ONLY_PNBUF); 1343 vput(nd.ni_dvp); 1344 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1345 return (error); 1346 goto restart; 1347 } 1348 #ifdef MAC 1349 if (error == 0 && !whiteout) 1350 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1351 &nd.ni_cnd, &vattr); 1352 #endif 1353 if (error == 0) { 1354 if (whiteout) 1355 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1356 else { 1357 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1358 &nd.ni_cnd, &vattr); 1359 if (error == 0) 1360 vput(nd.ni_vp); 1361 } 1362 } 1363 NDFREE(&nd, NDF_ONLY_PNBUF); 1364 vput(nd.ni_dvp); 1365 vn_finished_write(mp); 1366 return (error); 1367 } 1368 1369 /* 1370 * Create a named pipe. 1371 */ 1372 #ifndef _SYS_SYSPROTO_H_ 1373 struct mkfifo_args { 1374 char *path; 1375 int mode; 1376 }; 1377 #endif 1378 int 1379 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1380 { 1381 1382 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1383 uap->mode)); 1384 } 1385 1386 #ifndef _SYS_SYSPROTO_H_ 1387 struct mkfifoat_args { 1388 int fd; 1389 char *path; 1390 mode_t mode; 1391 }; 1392 #endif 1393 int 1394 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1395 { 1396 1397 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1398 uap->mode)); 1399 } 1400 1401 int 1402 kern_mkfifoat(struct thread *td, int fd, const char *path, 1403 enum uio_seg pathseg, int mode) 1404 { 1405 struct mount *mp; 1406 struct vattr vattr; 1407 struct nameidata nd; 1408 int error; 1409 1410 AUDIT_ARG_MODE(mode); 1411 restart: 1412 bwillwrite(); 1413 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1414 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1415 td); 1416 if ((error = namei(&nd)) != 0) 1417 return (error); 1418 if (nd.ni_vp != NULL) { 1419 NDFREE(&nd, NDF_ONLY_PNBUF); 1420 if (nd.ni_vp == nd.ni_dvp) 1421 vrele(nd.ni_dvp); 1422 else 1423 vput(nd.ni_dvp); 1424 vrele(nd.ni_vp); 1425 return (EEXIST); 1426 } 1427 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1428 NDFREE(&nd, NDF_ONLY_PNBUF); 1429 vput(nd.ni_dvp); 1430 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1431 return (error); 1432 goto restart; 1433 } 1434 VATTR_NULL(&vattr); 1435 vattr.va_type = VFIFO; 1436 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1437 #ifdef MAC 1438 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1439 &vattr); 1440 if (error != 0) 1441 goto out; 1442 #endif 1443 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1444 if (error == 0) 1445 vput(nd.ni_vp); 1446 #ifdef MAC 1447 out: 1448 #endif 1449 vput(nd.ni_dvp); 1450 vn_finished_write(mp); 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 return (error); 1453 } 1454 1455 /* 1456 * Make a hard file link. 1457 */ 1458 #ifndef _SYS_SYSPROTO_H_ 1459 struct link_args { 1460 char *path; 1461 char *link; 1462 }; 1463 #endif 1464 int 1465 sys_link(struct thread *td, struct link_args *uap) 1466 { 1467 1468 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1469 UIO_USERSPACE, FOLLOW)); 1470 } 1471 1472 #ifndef _SYS_SYSPROTO_H_ 1473 struct linkat_args { 1474 int fd1; 1475 char *path1; 1476 int fd2; 1477 char *path2; 1478 int flag; 1479 }; 1480 #endif 1481 int 1482 sys_linkat(struct thread *td, struct linkat_args *uap) 1483 { 1484 int flag; 1485 1486 flag = uap->flag; 1487 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH)) != 0) 1488 return (EINVAL); 1489 1490 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1491 UIO_USERSPACE, ((flag & AT_SYMLINK_FOLLOW) != 0 ? FOLLOW : 1492 NOFOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0))); 1493 } 1494 1495 int hardlink_check_uid = 0; 1496 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1497 &hardlink_check_uid, 0, 1498 "Unprivileged processes cannot create hard links to files owned by other " 1499 "users"); 1500 static int hardlink_check_gid = 0; 1501 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1502 &hardlink_check_gid, 0, 1503 "Unprivileged processes cannot create hard links to files owned by other " 1504 "groups"); 1505 1506 static int 1507 can_hardlink(struct vnode *vp, struct ucred *cred) 1508 { 1509 struct vattr va; 1510 int error; 1511 1512 if (!hardlink_check_uid && !hardlink_check_gid) 1513 return (0); 1514 1515 error = VOP_GETATTR(vp, &va, cred); 1516 if (error != 0) 1517 return (error); 1518 1519 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1520 error = priv_check_cred(cred, PRIV_VFS_LINK); 1521 if (error != 0) 1522 return (error); 1523 } 1524 1525 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1526 error = priv_check_cred(cred, PRIV_VFS_LINK); 1527 if (error != 0) 1528 return (error); 1529 } 1530 1531 return (0); 1532 } 1533 1534 int 1535 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1536 const char *path2, enum uio_seg segflag, int follow) 1537 { 1538 struct nameidata nd; 1539 int error; 1540 1541 do { 1542 bwillwrite(); 1543 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag, 1544 path1, fd1, &cap_linkat_source_rights, td); 1545 if ((error = namei(&nd)) != 0) 1546 return (error); 1547 NDFREE(&nd, NDF_ONLY_PNBUF); 1548 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1549 } while (error == EAGAIN); 1550 return (error); 1551 } 1552 1553 static int 1554 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1555 enum uio_seg segflag) 1556 { 1557 struct nameidata nd; 1558 struct mount *mp; 1559 int error; 1560 1561 if (vp->v_type == VDIR) { 1562 vrele(vp); 1563 return (EPERM); /* POSIX */ 1564 } 1565 NDINIT_ATRIGHTS(&nd, CREATE, 1566 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1567 &cap_linkat_target_rights, td); 1568 if ((error = namei(&nd)) == 0) { 1569 if (nd.ni_vp != NULL) { 1570 NDFREE(&nd, NDF_ONLY_PNBUF); 1571 if (nd.ni_dvp == nd.ni_vp) 1572 vrele(nd.ni_dvp); 1573 else 1574 vput(nd.ni_dvp); 1575 vrele(nd.ni_vp); 1576 vrele(vp); 1577 return (EEXIST); 1578 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1579 /* 1580 * Cross-device link. No need to recheck 1581 * vp->v_type, since it cannot change, except 1582 * to VBAD. 1583 */ 1584 NDFREE(&nd, NDF_ONLY_PNBUF); 1585 vput(nd.ni_dvp); 1586 vrele(vp); 1587 return (EXDEV); 1588 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1589 error = can_hardlink(vp, td->td_ucred); 1590 #ifdef MAC 1591 if (error == 0) 1592 error = mac_vnode_check_link(td->td_ucred, 1593 nd.ni_dvp, vp, &nd.ni_cnd); 1594 #endif 1595 if (error != 0) { 1596 vput(vp); 1597 vput(nd.ni_dvp); 1598 NDFREE(&nd, NDF_ONLY_PNBUF); 1599 return (error); 1600 } 1601 error = vn_start_write(vp, &mp, V_NOWAIT); 1602 if (error != 0) { 1603 vput(vp); 1604 vput(nd.ni_dvp); 1605 NDFREE(&nd, NDF_ONLY_PNBUF); 1606 error = vn_start_write(NULL, &mp, 1607 V_XSLEEP | PCATCH); 1608 if (error != 0) 1609 return (error); 1610 return (EAGAIN); 1611 } 1612 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1613 VOP_UNLOCK(vp); 1614 vput(nd.ni_dvp); 1615 vn_finished_write(mp); 1616 NDFREE(&nd, NDF_ONLY_PNBUF); 1617 } else { 1618 vput(nd.ni_dvp); 1619 NDFREE(&nd, NDF_ONLY_PNBUF); 1620 vrele(vp); 1621 return (EAGAIN); 1622 } 1623 } 1624 vrele(vp); 1625 return (error); 1626 } 1627 1628 /* 1629 * Make a symbolic link. 1630 */ 1631 #ifndef _SYS_SYSPROTO_H_ 1632 struct symlink_args { 1633 char *path; 1634 char *link; 1635 }; 1636 #endif 1637 int 1638 sys_symlink(struct thread *td, struct symlink_args *uap) 1639 { 1640 1641 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1642 UIO_USERSPACE)); 1643 } 1644 1645 #ifndef _SYS_SYSPROTO_H_ 1646 struct symlinkat_args { 1647 char *path; 1648 int fd; 1649 char *path2; 1650 }; 1651 #endif 1652 int 1653 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1654 { 1655 1656 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1657 UIO_USERSPACE)); 1658 } 1659 1660 int 1661 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1662 enum uio_seg segflg) 1663 { 1664 struct mount *mp; 1665 struct vattr vattr; 1666 const char *syspath; 1667 char *tmppath; 1668 struct nameidata nd; 1669 int error; 1670 1671 if (segflg == UIO_SYSSPACE) { 1672 syspath = path1; 1673 } else { 1674 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1675 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1676 goto out; 1677 syspath = tmppath; 1678 } 1679 AUDIT_ARG_TEXT(syspath); 1680 restart: 1681 bwillwrite(); 1682 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1683 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1684 td); 1685 if ((error = namei(&nd)) != 0) 1686 goto out; 1687 if (nd.ni_vp) { 1688 NDFREE(&nd, NDF_ONLY_PNBUF); 1689 if (nd.ni_vp == nd.ni_dvp) 1690 vrele(nd.ni_dvp); 1691 else 1692 vput(nd.ni_dvp); 1693 vrele(nd.ni_vp); 1694 error = EEXIST; 1695 goto out; 1696 } 1697 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1698 NDFREE(&nd, NDF_ONLY_PNBUF); 1699 vput(nd.ni_dvp); 1700 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1701 goto out; 1702 goto restart; 1703 } 1704 VATTR_NULL(&vattr); 1705 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1706 #ifdef MAC 1707 vattr.va_type = VLNK; 1708 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1709 &vattr); 1710 if (error != 0) 1711 goto out2; 1712 #endif 1713 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1714 if (error == 0) 1715 vput(nd.ni_vp); 1716 #ifdef MAC 1717 out2: 1718 #endif 1719 NDFREE(&nd, NDF_ONLY_PNBUF); 1720 vput(nd.ni_dvp); 1721 vn_finished_write(mp); 1722 out: 1723 if (segflg != UIO_SYSSPACE) 1724 uma_zfree(namei_zone, tmppath); 1725 return (error); 1726 } 1727 1728 /* 1729 * Delete a whiteout from the filesystem. 1730 */ 1731 #ifndef _SYS_SYSPROTO_H_ 1732 struct undelete_args { 1733 char *path; 1734 }; 1735 #endif 1736 int 1737 sys_undelete(struct thread *td, struct undelete_args *uap) 1738 { 1739 struct mount *mp; 1740 struct nameidata nd; 1741 int error; 1742 1743 restart: 1744 bwillwrite(); 1745 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1746 UIO_USERSPACE, uap->path, td); 1747 error = namei(&nd); 1748 if (error != 0) 1749 return (error); 1750 1751 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1752 NDFREE(&nd, NDF_ONLY_PNBUF); 1753 if (nd.ni_vp == nd.ni_dvp) 1754 vrele(nd.ni_dvp); 1755 else 1756 vput(nd.ni_dvp); 1757 if (nd.ni_vp) 1758 vrele(nd.ni_vp); 1759 return (EEXIST); 1760 } 1761 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1762 NDFREE(&nd, NDF_ONLY_PNBUF); 1763 vput(nd.ni_dvp); 1764 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1765 return (error); 1766 goto restart; 1767 } 1768 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1769 NDFREE(&nd, NDF_ONLY_PNBUF); 1770 vput(nd.ni_dvp); 1771 vn_finished_write(mp); 1772 return (error); 1773 } 1774 1775 /* 1776 * Delete a name from the filesystem. 1777 */ 1778 #ifndef _SYS_SYSPROTO_H_ 1779 struct unlink_args { 1780 char *path; 1781 }; 1782 #endif 1783 int 1784 sys_unlink(struct thread *td, struct unlink_args *uap) 1785 { 1786 1787 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1788 0, 0)); 1789 } 1790 1791 static int 1792 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1793 int flag, enum uio_seg pathseg, ino_t oldinum) 1794 { 1795 1796 if ((flag & ~AT_REMOVEDIR) != 0) 1797 return (EINVAL); 1798 1799 if ((flag & AT_REMOVEDIR) != 0) 1800 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1801 1802 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1803 } 1804 1805 #ifndef _SYS_SYSPROTO_H_ 1806 struct unlinkat_args { 1807 int fd; 1808 char *path; 1809 int flag; 1810 }; 1811 #endif 1812 int 1813 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1814 { 1815 1816 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1817 UIO_USERSPACE, 0)); 1818 } 1819 1820 #ifndef _SYS_SYSPROTO_H_ 1821 struct funlinkat_args { 1822 int dfd; 1823 const char *path; 1824 int fd; 1825 int flag; 1826 }; 1827 #endif 1828 int 1829 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1830 { 1831 1832 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1833 UIO_USERSPACE, 0)); 1834 } 1835 1836 int 1837 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1838 enum uio_seg pathseg, int flag, ino_t oldinum) 1839 { 1840 struct mount *mp; 1841 struct file *fp; 1842 struct vnode *vp; 1843 struct nameidata nd; 1844 struct stat sb; 1845 int error; 1846 1847 fp = NULL; 1848 if (fd != FD_NONE) { 1849 error = getvnode(td, fd, &cap_no_rights, &fp); 1850 if (error != 0) 1851 return (error); 1852 } 1853 1854 restart: 1855 bwillwrite(); 1856 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1857 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 1858 pathseg, path, dfd, &cap_unlinkat_rights, td); 1859 if ((error = namei(&nd)) != 0) { 1860 if (error == EINVAL) 1861 error = EPERM; 1862 goto fdout; 1863 } 1864 vp = nd.ni_vp; 1865 if (vp->v_type == VDIR && oldinum == 0) { 1866 error = EPERM; /* POSIX */ 1867 } else if (oldinum != 0 && 1868 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1869 sb.st_ino != oldinum) { 1870 error = EIDRM; /* Identifier removed */ 1871 } else if (fp != NULL && fp->f_vnode != vp) { 1872 if (VN_IS_DOOMED(fp->f_vnode)) 1873 error = EBADF; 1874 else 1875 error = EDEADLK; 1876 } else { 1877 /* 1878 * The root of a mounted filesystem cannot be deleted. 1879 * 1880 * XXX: can this only be a VDIR case? 1881 */ 1882 if (vp->v_vflag & VV_ROOT) 1883 error = EBUSY; 1884 } 1885 if (error == 0) { 1886 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1887 NDFREE(&nd, NDF_ONLY_PNBUF); 1888 vput(nd.ni_dvp); 1889 if (vp == nd.ni_dvp) 1890 vrele(vp); 1891 else 1892 vput(vp); 1893 if ((error = vn_start_write(NULL, &mp, 1894 V_XSLEEP | PCATCH)) != 0) { 1895 goto fdout; 1896 } 1897 goto restart; 1898 } 1899 #ifdef MAC 1900 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1901 &nd.ni_cnd); 1902 if (error != 0) 1903 goto out; 1904 #endif 1905 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1906 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1907 #ifdef MAC 1908 out: 1909 #endif 1910 vn_finished_write(mp); 1911 } 1912 NDFREE(&nd, NDF_ONLY_PNBUF); 1913 vput(nd.ni_dvp); 1914 if (vp == nd.ni_dvp) 1915 vrele(vp); 1916 else 1917 vput(vp); 1918 fdout: 1919 if (fp != NULL) 1920 fdrop(fp, td); 1921 return (error); 1922 } 1923 1924 /* 1925 * Reposition read/write file offset. 1926 */ 1927 #ifndef _SYS_SYSPROTO_H_ 1928 struct lseek_args { 1929 int fd; 1930 int pad; 1931 off_t offset; 1932 int whence; 1933 }; 1934 #endif 1935 int 1936 sys_lseek(struct thread *td, struct lseek_args *uap) 1937 { 1938 1939 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1940 } 1941 1942 int 1943 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1944 { 1945 struct file *fp; 1946 int error; 1947 1948 AUDIT_ARG_FD(fd); 1949 error = fget(td, fd, &cap_seek_rights, &fp); 1950 if (error != 0) 1951 return (error); 1952 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1953 fo_seek(fp, offset, whence, td) : ESPIPE; 1954 fdrop(fp, td); 1955 return (error); 1956 } 1957 1958 #if defined(COMPAT_43) 1959 /* 1960 * Reposition read/write file offset. 1961 */ 1962 #ifndef _SYS_SYSPROTO_H_ 1963 struct olseek_args { 1964 int fd; 1965 long offset; 1966 int whence; 1967 }; 1968 #endif 1969 int 1970 olseek(struct thread *td, struct olseek_args *uap) 1971 { 1972 1973 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1974 } 1975 #endif /* COMPAT_43 */ 1976 1977 #if defined(COMPAT_FREEBSD6) 1978 /* Version with the 'pad' argument */ 1979 int 1980 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 1981 { 1982 1983 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1984 } 1985 #endif 1986 1987 /* 1988 * Check access permissions using passed credentials. 1989 */ 1990 static int 1991 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 1992 struct thread *td) 1993 { 1994 accmode_t accmode; 1995 int error; 1996 1997 /* Flags == 0 means only check for existence. */ 1998 if (user_flags == 0) 1999 return (0); 2000 2001 accmode = 0; 2002 if (user_flags & R_OK) 2003 accmode |= VREAD; 2004 if (user_flags & W_OK) 2005 accmode |= VWRITE; 2006 if (user_flags & X_OK) 2007 accmode |= VEXEC; 2008 #ifdef MAC 2009 error = mac_vnode_check_access(cred, vp, accmode); 2010 if (error != 0) 2011 return (error); 2012 #endif 2013 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2014 error = VOP_ACCESS(vp, accmode, cred, td); 2015 return (error); 2016 } 2017 2018 /* 2019 * Check access permissions using "real" credentials. 2020 */ 2021 #ifndef _SYS_SYSPROTO_H_ 2022 struct access_args { 2023 char *path; 2024 int amode; 2025 }; 2026 #endif 2027 int 2028 sys_access(struct thread *td, struct access_args *uap) 2029 { 2030 2031 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2032 0, uap->amode)); 2033 } 2034 2035 #ifndef _SYS_SYSPROTO_H_ 2036 struct faccessat_args { 2037 int dirfd; 2038 char *path; 2039 int amode; 2040 int flag; 2041 } 2042 #endif 2043 int 2044 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2045 { 2046 2047 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2048 uap->amode)); 2049 } 2050 2051 int 2052 kern_accessat(struct thread *td, int fd, const char *path, 2053 enum uio_seg pathseg, int flag, int amode) 2054 { 2055 struct ucred *cred, *usecred; 2056 struct vnode *vp; 2057 struct nameidata nd; 2058 int error; 2059 2060 if ((flag & ~(AT_EACCESS | AT_BENEATH)) != 0) 2061 return (EINVAL); 2062 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2063 return (EINVAL); 2064 2065 /* 2066 * Create and modify a temporary credential instead of one that 2067 * is potentially shared (if we need one). 2068 */ 2069 cred = td->td_ucred; 2070 if ((flag & AT_EACCESS) == 0 && 2071 ((cred->cr_uid != cred->cr_ruid || 2072 cred->cr_rgid != cred->cr_groups[0]))) { 2073 usecred = crdup(cred); 2074 usecred->cr_uid = cred->cr_ruid; 2075 usecred->cr_groups[0] = cred->cr_rgid; 2076 td->td_ucred = usecred; 2077 } else 2078 usecred = cred; 2079 AUDIT_ARG_VALUE(amode); 2080 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2081 AUDITVNODE1 | ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 2082 pathseg, path, fd, &cap_fstat_rights, td); 2083 if ((error = namei(&nd)) != 0) 2084 goto out; 2085 vp = nd.ni_vp; 2086 2087 error = vn_access(vp, amode, usecred, td); 2088 NDFREE(&nd, NDF_ONLY_PNBUF); 2089 vput(vp); 2090 out: 2091 if (usecred != cred) { 2092 td->td_ucred = cred; 2093 crfree(usecred); 2094 } 2095 return (error); 2096 } 2097 2098 /* 2099 * Check access permissions using "effective" credentials. 2100 */ 2101 #ifndef _SYS_SYSPROTO_H_ 2102 struct eaccess_args { 2103 char *path; 2104 int amode; 2105 }; 2106 #endif 2107 int 2108 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2109 { 2110 2111 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2112 AT_EACCESS, uap->amode)); 2113 } 2114 2115 #if defined(COMPAT_43) 2116 /* 2117 * Get file status; this version follows links. 2118 */ 2119 #ifndef _SYS_SYSPROTO_H_ 2120 struct ostat_args { 2121 char *path; 2122 struct ostat *ub; 2123 }; 2124 #endif 2125 int 2126 ostat(struct thread *td, struct ostat_args *uap) 2127 { 2128 struct stat sb; 2129 struct ostat osb; 2130 int error; 2131 2132 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2133 &sb, NULL); 2134 if (error != 0) 2135 return (error); 2136 cvtstat(&sb, &osb); 2137 return (copyout(&osb, uap->ub, sizeof (osb))); 2138 } 2139 2140 /* 2141 * Get file status; this version does not follow links. 2142 */ 2143 #ifndef _SYS_SYSPROTO_H_ 2144 struct olstat_args { 2145 char *path; 2146 struct ostat *ub; 2147 }; 2148 #endif 2149 int 2150 olstat(struct thread *td, struct olstat_args *uap) 2151 { 2152 struct stat sb; 2153 struct ostat osb; 2154 int error; 2155 2156 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2157 UIO_USERSPACE, &sb, NULL); 2158 if (error != 0) 2159 return (error); 2160 cvtstat(&sb, &osb); 2161 return (copyout(&osb, uap->ub, sizeof (osb))); 2162 } 2163 2164 /* 2165 * Convert from an old to a new stat structure. 2166 * XXX: many values are blindly truncated. 2167 */ 2168 void 2169 cvtstat(struct stat *st, struct ostat *ost) 2170 { 2171 2172 bzero(ost, sizeof(*ost)); 2173 ost->st_dev = st->st_dev; 2174 ost->st_ino = st->st_ino; 2175 ost->st_mode = st->st_mode; 2176 ost->st_nlink = st->st_nlink; 2177 ost->st_uid = st->st_uid; 2178 ost->st_gid = st->st_gid; 2179 ost->st_rdev = st->st_rdev; 2180 ost->st_size = MIN(st->st_size, INT32_MAX); 2181 ost->st_atim = st->st_atim; 2182 ost->st_mtim = st->st_mtim; 2183 ost->st_ctim = st->st_ctim; 2184 ost->st_blksize = st->st_blksize; 2185 ost->st_blocks = st->st_blocks; 2186 ost->st_flags = st->st_flags; 2187 ost->st_gen = st->st_gen; 2188 } 2189 #endif /* COMPAT_43 */ 2190 2191 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2192 int ino64_trunc_error; 2193 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2194 &ino64_trunc_error, 0, 2195 "Error on truncation of device, file or inode number, or link count"); 2196 2197 int 2198 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2199 { 2200 2201 ost->st_dev = st->st_dev; 2202 if (ost->st_dev != st->st_dev) { 2203 switch (ino64_trunc_error) { 2204 default: 2205 /* 2206 * Since dev_t is almost raw, don't clamp to the 2207 * maximum for case 2, but ignore the error. 2208 */ 2209 break; 2210 case 1: 2211 return (EOVERFLOW); 2212 } 2213 } 2214 ost->st_ino = st->st_ino; 2215 if (ost->st_ino != st->st_ino) { 2216 switch (ino64_trunc_error) { 2217 default: 2218 case 0: 2219 break; 2220 case 1: 2221 return (EOVERFLOW); 2222 case 2: 2223 ost->st_ino = UINT32_MAX; 2224 break; 2225 } 2226 } 2227 ost->st_mode = st->st_mode; 2228 ost->st_nlink = st->st_nlink; 2229 if (ost->st_nlink != st->st_nlink) { 2230 switch (ino64_trunc_error) { 2231 default: 2232 case 0: 2233 break; 2234 case 1: 2235 return (EOVERFLOW); 2236 case 2: 2237 ost->st_nlink = UINT16_MAX; 2238 break; 2239 } 2240 } 2241 ost->st_uid = st->st_uid; 2242 ost->st_gid = st->st_gid; 2243 ost->st_rdev = st->st_rdev; 2244 if (ost->st_rdev != st->st_rdev) { 2245 switch (ino64_trunc_error) { 2246 default: 2247 break; 2248 case 1: 2249 return (EOVERFLOW); 2250 } 2251 } 2252 ost->st_atim = st->st_atim; 2253 ost->st_mtim = st->st_mtim; 2254 ost->st_ctim = st->st_ctim; 2255 ost->st_size = st->st_size; 2256 ost->st_blocks = st->st_blocks; 2257 ost->st_blksize = st->st_blksize; 2258 ost->st_flags = st->st_flags; 2259 ost->st_gen = st->st_gen; 2260 ost->st_lspare = 0; 2261 ost->st_birthtim = st->st_birthtim; 2262 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2263 sizeof(*ost) - offsetof(struct freebsd11_stat, 2264 st_birthtim) - sizeof(ost->st_birthtim)); 2265 return (0); 2266 } 2267 2268 int 2269 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2270 { 2271 struct stat sb; 2272 struct freebsd11_stat osb; 2273 int error; 2274 2275 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2276 &sb, NULL); 2277 if (error != 0) 2278 return (error); 2279 error = freebsd11_cvtstat(&sb, &osb); 2280 if (error == 0) 2281 error = copyout(&osb, uap->ub, sizeof(osb)); 2282 return (error); 2283 } 2284 2285 int 2286 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2287 { 2288 struct stat sb; 2289 struct freebsd11_stat osb; 2290 int error; 2291 2292 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2293 UIO_USERSPACE, &sb, NULL); 2294 if (error != 0) 2295 return (error); 2296 error = freebsd11_cvtstat(&sb, &osb); 2297 if (error == 0) 2298 error = copyout(&osb, uap->ub, sizeof(osb)); 2299 return (error); 2300 } 2301 2302 int 2303 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2304 { 2305 struct fhandle fh; 2306 struct stat sb; 2307 struct freebsd11_stat osb; 2308 int error; 2309 2310 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2311 if (error != 0) 2312 return (error); 2313 error = kern_fhstat(td, fh, &sb); 2314 if (error != 0) 2315 return (error); 2316 error = freebsd11_cvtstat(&sb, &osb); 2317 if (error == 0) 2318 error = copyout(&osb, uap->sb, sizeof(osb)); 2319 return (error); 2320 } 2321 2322 int 2323 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2324 { 2325 struct stat sb; 2326 struct freebsd11_stat osb; 2327 int error; 2328 2329 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2330 UIO_USERSPACE, &sb, NULL); 2331 if (error != 0) 2332 return (error); 2333 error = freebsd11_cvtstat(&sb, &osb); 2334 if (error == 0) 2335 error = copyout(&osb, uap->buf, sizeof(osb)); 2336 return (error); 2337 } 2338 #endif /* COMPAT_FREEBSD11 */ 2339 2340 /* 2341 * Get file status 2342 */ 2343 #ifndef _SYS_SYSPROTO_H_ 2344 struct fstatat_args { 2345 int fd; 2346 char *path; 2347 struct stat *buf; 2348 int flag; 2349 } 2350 #endif 2351 int 2352 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2353 { 2354 struct stat sb; 2355 int error; 2356 2357 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2358 UIO_USERSPACE, &sb, NULL); 2359 if (error == 0) 2360 error = copyout(&sb, uap->buf, sizeof (sb)); 2361 return (error); 2362 } 2363 2364 int 2365 kern_statat(struct thread *td, int flag, int fd, const char *path, 2366 enum uio_seg pathseg, struct stat *sbp, 2367 void (*hook)(struct vnode *vp, struct stat *sbp)) 2368 { 2369 struct nameidata nd; 2370 int error; 2371 2372 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2373 return (EINVAL); 2374 2375 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) != 0 ? 2376 NOFOLLOW : FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | 2377 LOCKSHARED | LOCKLEAF | AUDITVNODE1, pathseg, path, fd, 2378 &cap_fstat_rights, td); 2379 2380 if ((error = namei(&nd)) != 0) 2381 return (error); 2382 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td); 2383 if (error == 0) { 2384 if (__predict_false(hook != NULL)) 2385 hook(nd.ni_vp, sbp); 2386 } 2387 NDFREE(&nd, NDF_ONLY_PNBUF); 2388 vput(nd.ni_vp); 2389 #ifdef __STAT_TIME_T_EXT 2390 sbp->st_atim_ext = 0; 2391 sbp->st_mtim_ext = 0; 2392 sbp->st_ctim_ext = 0; 2393 sbp->st_btim_ext = 0; 2394 #endif 2395 #ifdef KTRACE 2396 if (KTRPOINT(td, KTR_STRUCT)) 2397 ktrstat_error(sbp, error); 2398 #endif 2399 return (error); 2400 } 2401 2402 #if defined(COMPAT_FREEBSD11) 2403 /* 2404 * Implementation of the NetBSD [l]stat() functions. 2405 */ 2406 void 2407 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2408 { 2409 2410 bzero(nsb, sizeof(*nsb)); 2411 nsb->st_dev = sb->st_dev; 2412 nsb->st_ino = sb->st_ino; 2413 nsb->st_mode = sb->st_mode; 2414 nsb->st_nlink = sb->st_nlink; 2415 nsb->st_uid = sb->st_uid; 2416 nsb->st_gid = sb->st_gid; 2417 nsb->st_rdev = sb->st_rdev; 2418 nsb->st_atim = sb->st_atim; 2419 nsb->st_mtim = sb->st_mtim; 2420 nsb->st_ctim = sb->st_ctim; 2421 nsb->st_size = sb->st_size; 2422 nsb->st_blocks = sb->st_blocks; 2423 nsb->st_blksize = sb->st_blksize; 2424 nsb->st_flags = sb->st_flags; 2425 nsb->st_gen = sb->st_gen; 2426 nsb->st_birthtim = sb->st_birthtim; 2427 } 2428 2429 #ifndef _SYS_SYSPROTO_H_ 2430 struct freebsd11_nstat_args { 2431 char *path; 2432 struct nstat *ub; 2433 }; 2434 #endif 2435 int 2436 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2437 { 2438 struct stat sb; 2439 struct nstat nsb; 2440 int error; 2441 2442 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2443 &sb, NULL); 2444 if (error != 0) 2445 return (error); 2446 freebsd11_cvtnstat(&sb, &nsb); 2447 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2448 } 2449 2450 /* 2451 * NetBSD lstat. Get file status; this version does not follow links. 2452 */ 2453 #ifndef _SYS_SYSPROTO_H_ 2454 struct freebsd11_nlstat_args { 2455 char *path; 2456 struct nstat *ub; 2457 }; 2458 #endif 2459 int 2460 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2461 { 2462 struct stat sb; 2463 struct nstat nsb; 2464 int error; 2465 2466 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2467 UIO_USERSPACE, &sb, NULL); 2468 if (error != 0) 2469 return (error); 2470 freebsd11_cvtnstat(&sb, &nsb); 2471 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2472 } 2473 #endif /* COMPAT_FREEBSD11 */ 2474 2475 /* 2476 * Get configurable pathname variables. 2477 */ 2478 #ifndef _SYS_SYSPROTO_H_ 2479 struct pathconf_args { 2480 char *path; 2481 int name; 2482 }; 2483 #endif 2484 int 2485 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2486 { 2487 long value; 2488 int error; 2489 2490 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2491 &value); 2492 if (error == 0) 2493 td->td_retval[0] = value; 2494 return (error); 2495 } 2496 2497 #ifndef _SYS_SYSPROTO_H_ 2498 struct lpathconf_args { 2499 char *path; 2500 int name; 2501 }; 2502 #endif 2503 int 2504 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2505 { 2506 long value; 2507 int error; 2508 2509 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2510 NOFOLLOW, &value); 2511 if (error == 0) 2512 td->td_retval[0] = value; 2513 return (error); 2514 } 2515 2516 int 2517 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2518 int name, u_long flags, long *valuep) 2519 { 2520 struct nameidata nd; 2521 int error; 2522 2523 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2524 pathseg, path, td); 2525 if ((error = namei(&nd)) != 0) 2526 return (error); 2527 NDFREE(&nd, NDF_ONLY_PNBUF); 2528 2529 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2530 vput(nd.ni_vp); 2531 return (error); 2532 } 2533 2534 /* 2535 * Return target name of a symbolic link. 2536 */ 2537 #ifndef _SYS_SYSPROTO_H_ 2538 struct readlink_args { 2539 char *path; 2540 char *buf; 2541 size_t count; 2542 }; 2543 #endif 2544 int 2545 sys_readlink(struct thread *td, struct readlink_args *uap) 2546 { 2547 2548 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2549 uap->buf, UIO_USERSPACE, uap->count)); 2550 } 2551 #ifndef _SYS_SYSPROTO_H_ 2552 struct readlinkat_args { 2553 int fd; 2554 char *path; 2555 char *buf; 2556 size_t bufsize; 2557 }; 2558 #endif 2559 int 2560 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2561 { 2562 2563 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2564 uap->buf, UIO_USERSPACE, uap->bufsize)); 2565 } 2566 2567 int 2568 kern_readlinkat(struct thread *td, int fd, const char *path, 2569 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2570 { 2571 struct vnode *vp; 2572 struct nameidata nd; 2573 int error; 2574 2575 if (count > IOSIZE_MAX) 2576 return (EINVAL); 2577 2578 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2579 pathseg, path, fd, td); 2580 2581 if ((error = namei(&nd)) != 0) 2582 return (error); 2583 NDFREE(&nd, NDF_ONLY_PNBUF); 2584 vp = nd.ni_vp; 2585 2586 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2587 vput(vp); 2588 2589 return (error); 2590 } 2591 2592 /* 2593 * Helper function to readlink from a vnode 2594 */ 2595 static int 2596 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2597 struct thread *td) 2598 { 2599 struct iovec aiov; 2600 struct uio auio; 2601 int error; 2602 2603 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2604 #ifdef MAC 2605 error = mac_vnode_check_readlink(td->td_ucred, vp); 2606 if (error != 0) 2607 return (error); 2608 #endif 2609 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2610 return (EINVAL); 2611 2612 aiov.iov_base = buf; 2613 aiov.iov_len = count; 2614 auio.uio_iov = &aiov; 2615 auio.uio_iovcnt = 1; 2616 auio.uio_offset = 0; 2617 auio.uio_rw = UIO_READ; 2618 auio.uio_segflg = bufseg; 2619 auio.uio_td = td; 2620 auio.uio_resid = count; 2621 error = VOP_READLINK(vp, &auio, td->td_ucred); 2622 td->td_retval[0] = count - auio.uio_resid; 2623 return (error); 2624 } 2625 2626 /* 2627 * Common implementation code for chflags() and fchflags(). 2628 */ 2629 static int 2630 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2631 { 2632 struct mount *mp; 2633 struct vattr vattr; 2634 int error; 2635 2636 /* We can't support the value matching VNOVAL. */ 2637 if (flags == VNOVAL) 2638 return (EOPNOTSUPP); 2639 2640 /* 2641 * Prevent non-root users from setting flags on devices. When 2642 * a device is reused, users can retain ownership of the device 2643 * if they are allowed to set flags and programs assume that 2644 * chown can't fail when done as root. 2645 */ 2646 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2647 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2648 if (error != 0) 2649 return (error); 2650 } 2651 2652 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2653 return (error); 2654 VATTR_NULL(&vattr); 2655 vattr.va_flags = flags; 2656 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2657 #ifdef MAC 2658 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2659 if (error == 0) 2660 #endif 2661 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2662 VOP_UNLOCK(vp); 2663 vn_finished_write(mp); 2664 return (error); 2665 } 2666 2667 /* 2668 * Change flags of a file given a path name. 2669 */ 2670 #ifndef _SYS_SYSPROTO_H_ 2671 struct chflags_args { 2672 const char *path; 2673 u_long flags; 2674 }; 2675 #endif 2676 int 2677 sys_chflags(struct thread *td, struct chflags_args *uap) 2678 { 2679 2680 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2681 uap->flags, 0)); 2682 } 2683 2684 #ifndef _SYS_SYSPROTO_H_ 2685 struct chflagsat_args { 2686 int fd; 2687 const char *path; 2688 u_long flags; 2689 int atflag; 2690 } 2691 #endif 2692 int 2693 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2694 { 2695 2696 if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2697 return (EINVAL); 2698 2699 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2700 uap->flags, uap->atflag)); 2701 } 2702 2703 /* 2704 * Same as chflags() but doesn't follow symlinks. 2705 */ 2706 #ifndef _SYS_SYSPROTO_H_ 2707 struct lchflags_args { 2708 const char *path; 2709 u_long flags; 2710 }; 2711 #endif 2712 int 2713 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2714 { 2715 2716 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2717 uap->flags, AT_SYMLINK_NOFOLLOW)); 2718 } 2719 2720 static int 2721 kern_chflagsat(struct thread *td, int fd, const char *path, 2722 enum uio_seg pathseg, u_long flags, int atflag) 2723 { 2724 struct nameidata nd; 2725 int error, follow; 2726 2727 AUDIT_ARG_FFLAGS(flags); 2728 follow = (atflag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2729 follow |= (atflag & AT_BENEATH) != 0 ? BENEATH : 0; 2730 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2731 &cap_fchflags_rights, td); 2732 if ((error = namei(&nd)) != 0) 2733 return (error); 2734 NDFREE(&nd, NDF_ONLY_PNBUF); 2735 error = setfflags(td, nd.ni_vp, flags); 2736 vrele(nd.ni_vp); 2737 return (error); 2738 } 2739 2740 /* 2741 * Change flags of a file given a file descriptor. 2742 */ 2743 #ifndef _SYS_SYSPROTO_H_ 2744 struct fchflags_args { 2745 int fd; 2746 u_long flags; 2747 }; 2748 #endif 2749 int 2750 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2751 { 2752 struct file *fp; 2753 int error; 2754 2755 AUDIT_ARG_FD(uap->fd); 2756 AUDIT_ARG_FFLAGS(uap->flags); 2757 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2758 &fp); 2759 if (error != 0) 2760 return (error); 2761 #ifdef AUDIT 2762 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2763 AUDIT_ARG_VNODE1(fp->f_vnode); 2764 VOP_UNLOCK(fp->f_vnode); 2765 #endif 2766 error = setfflags(td, fp->f_vnode, uap->flags); 2767 fdrop(fp, td); 2768 return (error); 2769 } 2770 2771 /* 2772 * Common implementation code for chmod(), lchmod() and fchmod(). 2773 */ 2774 int 2775 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2776 { 2777 struct mount *mp; 2778 struct vattr vattr; 2779 int error; 2780 2781 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2782 return (error); 2783 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2784 VATTR_NULL(&vattr); 2785 vattr.va_mode = mode & ALLPERMS; 2786 #ifdef MAC 2787 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2788 if (error == 0) 2789 #endif 2790 error = VOP_SETATTR(vp, &vattr, cred); 2791 VOP_UNLOCK(vp); 2792 vn_finished_write(mp); 2793 return (error); 2794 } 2795 2796 /* 2797 * Change mode of a file given path name. 2798 */ 2799 #ifndef _SYS_SYSPROTO_H_ 2800 struct chmod_args { 2801 char *path; 2802 int mode; 2803 }; 2804 #endif 2805 int 2806 sys_chmod(struct thread *td, struct chmod_args *uap) 2807 { 2808 2809 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2810 uap->mode, 0)); 2811 } 2812 2813 #ifndef _SYS_SYSPROTO_H_ 2814 struct fchmodat_args { 2815 int dirfd; 2816 char *path; 2817 mode_t mode; 2818 int flag; 2819 } 2820 #endif 2821 int 2822 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2823 { 2824 2825 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2826 return (EINVAL); 2827 2828 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2829 uap->mode, uap->flag)); 2830 } 2831 2832 /* 2833 * Change mode of a file given path name (don't follow links.) 2834 */ 2835 #ifndef _SYS_SYSPROTO_H_ 2836 struct lchmod_args { 2837 char *path; 2838 int mode; 2839 }; 2840 #endif 2841 int 2842 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2843 { 2844 2845 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2846 uap->mode, AT_SYMLINK_NOFOLLOW)); 2847 } 2848 2849 int 2850 kern_fchmodat(struct thread *td, int fd, const char *path, 2851 enum uio_seg pathseg, mode_t mode, int flag) 2852 { 2853 struct nameidata nd; 2854 int error, follow; 2855 2856 AUDIT_ARG_MODE(mode); 2857 follow = (flag & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : FOLLOW; 2858 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2859 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2860 &cap_fchmod_rights, td); 2861 if ((error = namei(&nd)) != 0) 2862 return (error); 2863 NDFREE(&nd, NDF_ONLY_PNBUF); 2864 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2865 vrele(nd.ni_vp); 2866 return (error); 2867 } 2868 2869 /* 2870 * Change mode of a file given a file descriptor. 2871 */ 2872 #ifndef _SYS_SYSPROTO_H_ 2873 struct fchmod_args { 2874 int fd; 2875 int mode; 2876 }; 2877 #endif 2878 int 2879 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2880 { 2881 struct file *fp; 2882 int error; 2883 2884 AUDIT_ARG_FD(uap->fd); 2885 AUDIT_ARG_MODE(uap->mode); 2886 2887 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2888 if (error != 0) 2889 return (error); 2890 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2891 fdrop(fp, td); 2892 return (error); 2893 } 2894 2895 /* 2896 * Common implementation for chown(), lchown(), and fchown() 2897 */ 2898 int 2899 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2900 gid_t gid) 2901 { 2902 struct mount *mp; 2903 struct vattr vattr; 2904 int error; 2905 2906 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2907 return (error); 2908 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2909 VATTR_NULL(&vattr); 2910 vattr.va_uid = uid; 2911 vattr.va_gid = gid; 2912 #ifdef MAC 2913 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2914 vattr.va_gid); 2915 if (error == 0) 2916 #endif 2917 error = VOP_SETATTR(vp, &vattr, cred); 2918 VOP_UNLOCK(vp); 2919 vn_finished_write(mp); 2920 return (error); 2921 } 2922 2923 /* 2924 * Set ownership given a path name. 2925 */ 2926 #ifndef _SYS_SYSPROTO_H_ 2927 struct chown_args { 2928 char *path; 2929 int uid; 2930 int gid; 2931 }; 2932 #endif 2933 int 2934 sys_chown(struct thread *td, struct chown_args *uap) 2935 { 2936 2937 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2938 uap->gid, 0)); 2939 } 2940 2941 #ifndef _SYS_SYSPROTO_H_ 2942 struct fchownat_args { 2943 int fd; 2944 const char * path; 2945 uid_t uid; 2946 gid_t gid; 2947 int flag; 2948 }; 2949 #endif 2950 int 2951 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2952 { 2953 2954 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 2955 return (EINVAL); 2956 2957 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2958 uap->gid, uap->flag)); 2959 } 2960 2961 int 2962 kern_fchownat(struct thread *td, int fd, const char *path, 2963 enum uio_seg pathseg, int uid, int gid, int flag) 2964 { 2965 struct nameidata nd; 2966 int error, follow; 2967 2968 AUDIT_ARG_OWNER(uid, gid); 2969 follow = (flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : FOLLOW; 2970 follow |= (flag & AT_BENEATH) != 0 ? BENEATH : 0; 2971 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, pathseg, path, fd, 2972 &cap_fchown_rights, td); 2973 2974 if ((error = namei(&nd)) != 0) 2975 return (error); 2976 NDFREE(&nd, NDF_ONLY_PNBUF); 2977 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 2978 vrele(nd.ni_vp); 2979 return (error); 2980 } 2981 2982 /* 2983 * Set ownership given a path name, do not cross symlinks. 2984 */ 2985 #ifndef _SYS_SYSPROTO_H_ 2986 struct lchown_args { 2987 char *path; 2988 int uid; 2989 int gid; 2990 }; 2991 #endif 2992 int 2993 sys_lchown(struct thread *td, struct lchown_args *uap) 2994 { 2995 2996 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2997 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 2998 } 2999 3000 /* 3001 * Set ownership given a file descriptor. 3002 */ 3003 #ifndef _SYS_SYSPROTO_H_ 3004 struct fchown_args { 3005 int fd; 3006 int uid; 3007 int gid; 3008 }; 3009 #endif 3010 int 3011 sys_fchown(struct thread *td, struct fchown_args *uap) 3012 { 3013 struct file *fp; 3014 int error; 3015 3016 AUDIT_ARG_FD(uap->fd); 3017 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3018 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3019 if (error != 0) 3020 return (error); 3021 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3022 fdrop(fp, td); 3023 return (error); 3024 } 3025 3026 /* 3027 * Common implementation code for utimes(), lutimes(), and futimes(). 3028 */ 3029 static int 3030 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3031 struct timespec *tsp) 3032 { 3033 struct timeval tv[2]; 3034 const struct timeval *tvp; 3035 int error; 3036 3037 if (usrtvp == NULL) { 3038 vfs_timestamp(&tsp[0]); 3039 tsp[1] = tsp[0]; 3040 } else { 3041 if (tvpseg == UIO_SYSSPACE) { 3042 tvp = usrtvp; 3043 } else { 3044 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3045 return (error); 3046 tvp = tv; 3047 } 3048 3049 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3050 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3051 return (EINVAL); 3052 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3053 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3054 } 3055 return (0); 3056 } 3057 3058 /* 3059 * Common implementation code for futimens(), utimensat(). 3060 */ 3061 #define UTIMENS_NULL 0x1 3062 #define UTIMENS_EXIT 0x2 3063 static int 3064 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3065 struct timespec *tsp, int *retflags) 3066 { 3067 struct timespec tsnow; 3068 int error; 3069 3070 vfs_timestamp(&tsnow); 3071 *retflags = 0; 3072 if (usrtsp == NULL) { 3073 tsp[0] = tsnow; 3074 tsp[1] = tsnow; 3075 *retflags |= UTIMENS_NULL; 3076 return (0); 3077 } 3078 if (tspseg == UIO_SYSSPACE) { 3079 tsp[0] = usrtsp[0]; 3080 tsp[1] = usrtsp[1]; 3081 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3082 return (error); 3083 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3084 *retflags |= UTIMENS_EXIT; 3085 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3086 *retflags |= UTIMENS_NULL; 3087 if (tsp[0].tv_nsec == UTIME_OMIT) 3088 tsp[0].tv_sec = VNOVAL; 3089 else if (tsp[0].tv_nsec == UTIME_NOW) 3090 tsp[0] = tsnow; 3091 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3092 return (EINVAL); 3093 if (tsp[1].tv_nsec == UTIME_OMIT) 3094 tsp[1].tv_sec = VNOVAL; 3095 else if (tsp[1].tv_nsec == UTIME_NOW) 3096 tsp[1] = tsnow; 3097 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3098 return (EINVAL); 3099 3100 return (0); 3101 } 3102 3103 /* 3104 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3105 * and utimensat(). 3106 */ 3107 static int 3108 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3109 int numtimes, int nullflag) 3110 { 3111 struct mount *mp; 3112 struct vattr vattr; 3113 int error, setbirthtime; 3114 3115 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3116 return (error); 3117 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3118 setbirthtime = 0; 3119 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3120 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3121 setbirthtime = 1; 3122 VATTR_NULL(&vattr); 3123 vattr.va_atime = ts[0]; 3124 vattr.va_mtime = ts[1]; 3125 if (setbirthtime) 3126 vattr.va_birthtime = ts[1]; 3127 if (numtimes > 2) 3128 vattr.va_birthtime = ts[2]; 3129 if (nullflag) 3130 vattr.va_vaflags |= VA_UTIMES_NULL; 3131 #ifdef MAC 3132 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3133 vattr.va_mtime); 3134 #endif 3135 if (error == 0) 3136 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3137 VOP_UNLOCK(vp); 3138 vn_finished_write(mp); 3139 return (error); 3140 } 3141 3142 /* 3143 * Set the access and modification times of a file. 3144 */ 3145 #ifndef _SYS_SYSPROTO_H_ 3146 struct utimes_args { 3147 char *path; 3148 struct timeval *tptr; 3149 }; 3150 #endif 3151 int 3152 sys_utimes(struct thread *td, struct utimes_args *uap) 3153 { 3154 3155 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3156 uap->tptr, UIO_USERSPACE)); 3157 } 3158 3159 #ifndef _SYS_SYSPROTO_H_ 3160 struct futimesat_args { 3161 int fd; 3162 const char * path; 3163 const struct timeval * times; 3164 }; 3165 #endif 3166 int 3167 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3168 { 3169 3170 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3171 uap->times, UIO_USERSPACE)); 3172 } 3173 3174 int 3175 kern_utimesat(struct thread *td, int fd, const char *path, 3176 enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) 3177 { 3178 struct nameidata nd; 3179 struct timespec ts[2]; 3180 int error; 3181 3182 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3183 return (error); 3184 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3185 &cap_futimes_rights, td); 3186 3187 if ((error = namei(&nd)) != 0) 3188 return (error); 3189 NDFREE(&nd, NDF_ONLY_PNBUF); 3190 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3191 vrele(nd.ni_vp); 3192 return (error); 3193 } 3194 3195 /* 3196 * Set the access and modification times of a file. 3197 */ 3198 #ifndef _SYS_SYSPROTO_H_ 3199 struct lutimes_args { 3200 char *path; 3201 struct timeval *tptr; 3202 }; 3203 #endif 3204 int 3205 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3206 { 3207 3208 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3209 UIO_USERSPACE)); 3210 } 3211 3212 int 3213 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3214 struct timeval *tptr, enum uio_seg tptrseg) 3215 { 3216 struct timespec ts[2]; 3217 struct nameidata nd; 3218 int error; 3219 3220 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3221 return (error); 3222 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3223 if ((error = namei(&nd)) != 0) 3224 return (error); 3225 NDFREE(&nd, NDF_ONLY_PNBUF); 3226 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3227 vrele(nd.ni_vp); 3228 return (error); 3229 } 3230 3231 /* 3232 * Set the access and modification times of a file. 3233 */ 3234 #ifndef _SYS_SYSPROTO_H_ 3235 struct futimes_args { 3236 int fd; 3237 struct timeval *tptr; 3238 }; 3239 #endif 3240 int 3241 sys_futimes(struct thread *td, struct futimes_args *uap) 3242 { 3243 3244 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3245 } 3246 3247 int 3248 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3249 enum uio_seg tptrseg) 3250 { 3251 struct timespec ts[2]; 3252 struct file *fp; 3253 int error; 3254 3255 AUDIT_ARG_FD(fd); 3256 error = getutimes(tptr, tptrseg, ts); 3257 if (error != 0) 3258 return (error); 3259 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3260 if (error != 0) 3261 return (error); 3262 #ifdef AUDIT 3263 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3264 AUDIT_ARG_VNODE1(fp->f_vnode); 3265 VOP_UNLOCK(fp->f_vnode); 3266 #endif 3267 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3268 fdrop(fp, td); 3269 return (error); 3270 } 3271 3272 int 3273 sys_futimens(struct thread *td, struct futimens_args *uap) 3274 { 3275 3276 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3277 } 3278 3279 int 3280 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3281 enum uio_seg tptrseg) 3282 { 3283 struct timespec ts[2]; 3284 struct file *fp; 3285 int error, flags; 3286 3287 AUDIT_ARG_FD(fd); 3288 error = getutimens(tptr, tptrseg, ts, &flags); 3289 if (error != 0) 3290 return (error); 3291 if (flags & UTIMENS_EXIT) 3292 return (0); 3293 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3294 if (error != 0) 3295 return (error); 3296 #ifdef AUDIT 3297 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3298 AUDIT_ARG_VNODE1(fp->f_vnode); 3299 VOP_UNLOCK(fp->f_vnode); 3300 #endif 3301 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3302 fdrop(fp, td); 3303 return (error); 3304 } 3305 3306 int 3307 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3308 { 3309 3310 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3311 uap->times, UIO_USERSPACE, uap->flag)); 3312 } 3313 3314 int 3315 kern_utimensat(struct thread *td, int fd, const char *path, 3316 enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, 3317 int flag) 3318 { 3319 struct nameidata nd; 3320 struct timespec ts[2]; 3321 int error, flags; 3322 3323 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 3324 return (EINVAL); 3325 3326 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3327 return (error); 3328 NDINIT_ATRIGHTS(&nd, LOOKUP, ((flag & AT_SYMLINK_NOFOLLOW) ? NOFOLLOW : 3329 FOLLOW) | ((flag & AT_BENEATH) != 0 ? BENEATH : 0) | AUDITVNODE1, 3330 pathseg, path, fd, &cap_futimes_rights, td); 3331 if ((error = namei(&nd)) != 0) 3332 return (error); 3333 /* 3334 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3335 * POSIX states: 3336 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3337 * "Search permission is denied by a component of the path prefix." 3338 */ 3339 NDFREE(&nd, NDF_ONLY_PNBUF); 3340 if ((flags & UTIMENS_EXIT) == 0) 3341 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3342 vrele(nd.ni_vp); 3343 return (error); 3344 } 3345 3346 /* 3347 * Truncate a file given its path name. 3348 */ 3349 #ifndef _SYS_SYSPROTO_H_ 3350 struct truncate_args { 3351 char *path; 3352 int pad; 3353 off_t length; 3354 }; 3355 #endif 3356 int 3357 sys_truncate(struct thread *td, struct truncate_args *uap) 3358 { 3359 3360 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3361 } 3362 3363 int 3364 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3365 off_t length) 3366 { 3367 struct mount *mp; 3368 struct vnode *vp; 3369 void *rl_cookie; 3370 struct vattr vattr; 3371 struct nameidata nd; 3372 int error; 3373 3374 if (length < 0) 3375 return(EINVAL); 3376 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3377 if ((error = namei(&nd)) != 0) 3378 return (error); 3379 vp = nd.ni_vp; 3380 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3381 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3382 vn_rangelock_unlock(vp, rl_cookie); 3383 vrele(vp); 3384 return (error); 3385 } 3386 NDFREE(&nd, NDF_ONLY_PNBUF); 3387 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3388 if (vp->v_type == VDIR) 3389 error = EISDIR; 3390 #ifdef MAC 3391 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3392 } 3393 #endif 3394 else if ((error = vn_writechk(vp)) == 0 && 3395 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3396 VATTR_NULL(&vattr); 3397 vattr.va_size = length; 3398 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3399 } 3400 VOP_UNLOCK(vp); 3401 vn_finished_write(mp); 3402 vn_rangelock_unlock(vp, rl_cookie); 3403 vrele(vp); 3404 return (error); 3405 } 3406 3407 #if defined(COMPAT_43) 3408 /* 3409 * Truncate a file given its path name. 3410 */ 3411 #ifndef _SYS_SYSPROTO_H_ 3412 struct otruncate_args { 3413 char *path; 3414 long length; 3415 }; 3416 #endif 3417 int 3418 otruncate(struct thread *td, struct otruncate_args *uap) 3419 { 3420 3421 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3422 } 3423 #endif /* COMPAT_43 */ 3424 3425 #if defined(COMPAT_FREEBSD6) 3426 /* Versions with the pad argument */ 3427 int 3428 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3429 { 3430 3431 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3432 } 3433 3434 int 3435 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3436 { 3437 3438 return (kern_ftruncate(td, uap->fd, uap->length)); 3439 } 3440 #endif 3441 3442 int 3443 kern_fsync(struct thread *td, int fd, bool fullsync) 3444 { 3445 struct vnode *vp; 3446 struct mount *mp; 3447 struct file *fp; 3448 int error, lock_flags; 3449 3450 AUDIT_ARG_FD(fd); 3451 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3452 if (error != 0) 3453 return (error); 3454 vp = fp->f_vnode; 3455 #if 0 3456 if (!fullsync) 3457 /* XXXKIB: compete outstanding aio writes */; 3458 #endif 3459 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3460 if (error != 0) 3461 goto drop; 3462 if (MNT_SHARED_WRITES(mp) || 3463 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3464 lock_flags = LK_SHARED; 3465 } else { 3466 lock_flags = LK_EXCLUSIVE; 3467 } 3468 vn_lock(vp, lock_flags | LK_RETRY); 3469 AUDIT_ARG_VNODE1(vp); 3470 if (vp->v_object != NULL) { 3471 VM_OBJECT_WLOCK(vp->v_object); 3472 vm_object_page_clean(vp->v_object, 0, 0, 0); 3473 VM_OBJECT_WUNLOCK(vp->v_object); 3474 } 3475 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3476 VOP_UNLOCK(vp); 3477 vn_finished_write(mp); 3478 drop: 3479 fdrop(fp, td); 3480 return (error); 3481 } 3482 3483 /* 3484 * Sync an open file. 3485 */ 3486 #ifndef _SYS_SYSPROTO_H_ 3487 struct fsync_args { 3488 int fd; 3489 }; 3490 #endif 3491 int 3492 sys_fsync(struct thread *td, struct fsync_args *uap) 3493 { 3494 3495 return (kern_fsync(td, uap->fd, true)); 3496 } 3497 3498 int 3499 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3500 { 3501 3502 return (kern_fsync(td, uap->fd, false)); 3503 } 3504 3505 /* 3506 * Rename files. Source and destination must either both be directories, or 3507 * both not be directories. If target is a directory, it must be empty. 3508 */ 3509 #ifndef _SYS_SYSPROTO_H_ 3510 struct rename_args { 3511 char *from; 3512 char *to; 3513 }; 3514 #endif 3515 int 3516 sys_rename(struct thread *td, struct rename_args *uap) 3517 { 3518 3519 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3520 uap->to, UIO_USERSPACE)); 3521 } 3522 3523 #ifndef _SYS_SYSPROTO_H_ 3524 struct renameat_args { 3525 int oldfd; 3526 char *old; 3527 int newfd; 3528 char *new; 3529 }; 3530 #endif 3531 int 3532 sys_renameat(struct thread *td, struct renameat_args *uap) 3533 { 3534 3535 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3536 UIO_USERSPACE)); 3537 } 3538 3539 #ifdef MAC 3540 static int 3541 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3542 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3543 { 3544 int error; 3545 3546 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3547 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); 3548 if ((error = namei(fromnd)) != 0) 3549 return (error); 3550 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3551 fromnd->ni_vp, &fromnd->ni_cnd); 3552 VOP_UNLOCK(fromnd->ni_dvp); 3553 if (fromnd->ni_dvp != fromnd->ni_vp) 3554 VOP_UNLOCK(fromnd->ni_vp); 3555 if (error != 0) { 3556 NDFREE(fromnd, NDF_ONLY_PNBUF); 3557 vrele(fromnd->ni_dvp); 3558 vrele(fromnd->ni_vp); 3559 if (fromnd->ni_startdir) 3560 vrele(fromnd->ni_startdir); 3561 } 3562 return (error); 3563 } 3564 #endif 3565 3566 int 3567 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3568 const char *new, enum uio_seg pathseg) 3569 { 3570 struct mount *mp = NULL; 3571 struct vnode *tvp, *fvp, *tdvp; 3572 struct nameidata fromnd, tond; 3573 int error; 3574 3575 again: 3576 bwillwrite(); 3577 #ifdef MAC 3578 if (mac_vnode_check_rename_from_enabled()) { 3579 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3580 &fromnd); 3581 if (error != 0) 3582 return (error); 3583 } else { 3584 #endif 3585 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3586 pathseg, old, oldfd, &cap_renameat_source_rights, td); 3587 if ((error = namei(&fromnd)) != 0) 3588 return (error); 3589 #ifdef MAC 3590 } 3591 #endif 3592 fvp = fromnd.ni_vp; 3593 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3594 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3595 &cap_renameat_target_rights, td); 3596 if (fromnd.ni_vp->v_type == VDIR) 3597 tond.ni_cnd.cn_flags |= WILLBEDIR; 3598 if ((error = namei(&tond)) != 0) { 3599 /* Translate error code for rename("dir1", "dir2/."). */ 3600 if (error == EISDIR && fvp->v_type == VDIR) 3601 error = EINVAL; 3602 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3603 vrele(fromnd.ni_dvp); 3604 vrele(fvp); 3605 goto out1; 3606 } 3607 tdvp = tond.ni_dvp; 3608 tvp = tond.ni_vp; 3609 error = vn_start_write(fvp, &mp, V_NOWAIT); 3610 if (error != 0) { 3611 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3612 NDFREE(&tond, NDF_ONLY_PNBUF); 3613 if (tvp != NULL) 3614 vput(tvp); 3615 if (tdvp == tvp) 3616 vrele(tdvp); 3617 else 3618 vput(tdvp); 3619 vrele(fromnd.ni_dvp); 3620 vrele(fvp); 3621 vrele(tond.ni_startdir); 3622 if (fromnd.ni_startdir != NULL) 3623 vrele(fromnd.ni_startdir); 3624 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3625 if (error != 0) 3626 return (error); 3627 goto again; 3628 } 3629 if (tvp != NULL) { 3630 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3631 error = ENOTDIR; 3632 goto out; 3633 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3634 error = EISDIR; 3635 goto out; 3636 } 3637 #ifdef CAPABILITIES 3638 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3639 /* 3640 * If the target already exists we require CAP_UNLINKAT 3641 * from 'newfd', when newfd was used for the lookup. 3642 */ 3643 error = cap_check(&tond.ni_filecaps.fc_rights, 3644 &cap_unlinkat_rights); 3645 if (error != 0) 3646 goto out; 3647 } 3648 #endif 3649 } 3650 if (fvp == tdvp) { 3651 error = EINVAL; 3652 goto out; 3653 } 3654 /* 3655 * If the source is the same as the destination (that is, if they 3656 * are links to the same vnode), then there is nothing to do. 3657 */ 3658 if (fvp == tvp) 3659 error = -1; 3660 #ifdef MAC 3661 else 3662 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3663 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3664 #endif 3665 out: 3666 if (error == 0) { 3667 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3668 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3669 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3670 NDFREE(&tond, NDF_ONLY_PNBUF); 3671 } else { 3672 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3673 NDFREE(&tond, NDF_ONLY_PNBUF); 3674 if (tvp != NULL) 3675 vput(tvp); 3676 if (tdvp == tvp) 3677 vrele(tdvp); 3678 else 3679 vput(tdvp); 3680 vrele(fromnd.ni_dvp); 3681 vrele(fvp); 3682 } 3683 vrele(tond.ni_startdir); 3684 vn_finished_write(mp); 3685 out1: 3686 if (fromnd.ni_startdir) 3687 vrele(fromnd.ni_startdir); 3688 if (error == -1) 3689 return (0); 3690 return (error); 3691 } 3692 3693 /* 3694 * Make a directory file. 3695 */ 3696 #ifndef _SYS_SYSPROTO_H_ 3697 struct mkdir_args { 3698 char *path; 3699 int mode; 3700 }; 3701 #endif 3702 int 3703 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3704 { 3705 3706 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3707 uap->mode)); 3708 } 3709 3710 #ifndef _SYS_SYSPROTO_H_ 3711 struct mkdirat_args { 3712 int fd; 3713 char *path; 3714 mode_t mode; 3715 }; 3716 #endif 3717 int 3718 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3719 { 3720 3721 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3722 } 3723 3724 int 3725 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3726 int mode) 3727 { 3728 struct mount *mp; 3729 struct vnode *vp; 3730 struct vattr vattr; 3731 struct nameidata nd; 3732 int error; 3733 3734 AUDIT_ARG_MODE(mode); 3735 restart: 3736 bwillwrite(); 3737 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3738 NOCACHE, segflg, path, fd, &cap_mkdirat_rights, 3739 td); 3740 nd.ni_cnd.cn_flags |= WILLBEDIR; 3741 if ((error = namei(&nd)) != 0) 3742 return (error); 3743 vp = nd.ni_vp; 3744 if (vp != NULL) { 3745 NDFREE(&nd, NDF_ONLY_PNBUF); 3746 /* 3747 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3748 * the strange behaviour of leaving the vnode unlocked 3749 * if the target is the same vnode as the parent. 3750 */ 3751 if (vp == nd.ni_dvp) 3752 vrele(nd.ni_dvp); 3753 else 3754 vput(nd.ni_dvp); 3755 vrele(vp); 3756 return (EEXIST); 3757 } 3758 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3759 NDFREE(&nd, NDF_ONLY_PNBUF); 3760 vput(nd.ni_dvp); 3761 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3762 return (error); 3763 goto restart; 3764 } 3765 VATTR_NULL(&vattr); 3766 vattr.va_type = VDIR; 3767 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3768 #ifdef MAC 3769 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3770 &vattr); 3771 if (error != 0) 3772 goto out; 3773 #endif 3774 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3775 #ifdef MAC 3776 out: 3777 #endif 3778 NDFREE(&nd, NDF_ONLY_PNBUF); 3779 vput(nd.ni_dvp); 3780 if (error == 0) 3781 vput(nd.ni_vp); 3782 vn_finished_write(mp); 3783 return (error); 3784 } 3785 3786 /* 3787 * Remove a directory file. 3788 */ 3789 #ifndef _SYS_SYSPROTO_H_ 3790 struct rmdir_args { 3791 char *path; 3792 }; 3793 #endif 3794 int 3795 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3796 { 3797 3798 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3799 0)); 3800 } 3801 3802 int 3803 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3804 enum uio_seg pathseg, int flag) 3805 { 3806 struct mount *mp; 3807 struct vnode *vp; 3808 struct file *fp; 3809 struct nameidata nd; 3810 cap_rights_t rights; 3811 int error; 3812 3813 fp = NULL; 3814 if (fd != FD_NONE) { 3815 error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), 3816 &fp); 3817 if (error != 0) 3818 return (error); 3819 } 3820 3821 restart: 3822 bwillwrite(); 3823 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3824 ((flag & AT_BENEATH) != 0 ? BENEATH : 0), 3825 pathseg, path, dfd, &cap_unlinkat_rights, td); 3826 if ((error = namei(&nd)) != 0) 3827 goto fdout; 3828 vp = nd.ni_vp; 3829 if (vp->v_type != VDIR) { 3830 error = ENOTDIR; 3831 goto out; 3832 } 3833 /* 3834 * No rmdir "." please. 3835 */ 3836 if (nd.ni_dvp == vp) { 3837 error = EINVAL; 3838 goto out; 3839 } 3840 /* 3841 * The root of a mounted filesystem cannot be deleted. 3842 */ 3843 if (vp->v_vflag & VV_ROOT) { 3844 error = EBUSY; 3845 goto out; 3846 } 3847 3848 if (fp != NULL && fp->f_vnode != vp) { 3849 if (VN_IS_DOOMED(fp->f_vnode)) 3850 error = EBADF; 3851 else 3852 error = EDEADLK; 3853 goto out; 3854 } 3855 3856 #ifdef MAC 3857 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3858 &nd.ni_cnd); 3859 if (error != 0) 3860 goto out; 3861 #endif 3862 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3863 NDFREE(&nd, NDF_ONLY_PNBUF); 3864 vput(vp); 3865 if (nd.ni_dvp == vp) 3866 vrele(nd.ni_dvp); 3867 else 3868 vput(nd.ni_dvp); 3869 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3870 goto fdout; 3871 goto restart; 3872 } 3873 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3874 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3875 vn_finished_write(mp); 3876 out: 3877 NDFREE(&nd, NDF_ONLY_PNBUF); 3878 vput(vp); 3879 if (nd.ni_dvp == vp) 3880 vrele(nd.ni_dvp); 3881 else 3882 vput(nd.ni_dvp); 3883 fdout: 3884 if (fp != NULL) 3885 fdrop(fp, td); 3886 return (error); 3887 } 3888 3889 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3890 int 3891 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3892 long *basep, void (*func)(struct freebsd11_dirent *)) 3893 { 3894 struct freebsd11_dirent dstdp; 3895 struct dirent *dp, *edp; 3896 char *dirbuf; 3897 off_t base; 3898 ssize_t resid, ucount; 3899 int error; 3900 3901 /* XXX arbitrary sanity limit on `count'. */ 3902 count = min(count, 64 * 1024); 3903 3904 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3905 3906 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3907 UIO_SYSSPACE); 3908 if (error != 0) 3909 goto done; 3910 if (basep != NULL) 3911 *basep = base; 3912 3913 ucount = 0; 3914 for (dp = (struct dirent *)dirbuf, 3915 edp = (struct dirent *)&dirbuf[count - resid]; 3916 ucount < count && dp < edp; ) { 3917 if (dp->d_reclen == 0) 3918 break; 3919 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3920 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3921 continue; 3922 dstdp.d_type = dp->d_type; 3923 dstdp.d_namlen = dp->d_namlen; 3924 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3925 if (dstdp.d_fileno != dp->d_fileno) { 3926 switch (ino64_trunc_error) { 3927 default: 3928 case 0: 3929 break; 3930 case 1: 3931 error = EOVERFLOW; 3932 goto done; 3933 case 2: 3934 dstdp.d_fileno = UINT32_MAX; 3935 break; 3936 } 3937 } 3938 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3939 ((dp->d_namlen + 1 + 3) &~ 3); 3940 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3941 bzero(dstdp.d_name + dstdp.d_namlen, 3942 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3943 dstdp.d_namlen); 3944 MPASS(dstdp.d_reclen <= dp->d_reclen); 3945 MPASS(ucount + dstdp.d_reclen <= count); 3946 if (func != NULL) 3947 func(&dstdp); 3948 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3949 if (error != 0) 3950 break; 3951 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3952 ucount += dstdp.d_reclen; 3953 } 3954 3955 done: 3956 free(dirbuf, M_TEMP); 3957 if (error == 0) 3958 td->td_retval[0] = ucount; 3959 return (error); 3960 } 3961 #endif /* COMPAT */ 3962 3963 #ifdef COMPAT_43 3964 static void 3965 ogetdirentries_cvt(struct freebsd11_dirent *dp) 3966 { 3967 #if (BYTE_ORDER == LITTLE_ENDIAN) 3968 /* 3969 * The expected low byte of dp->d_namlen is our dp->d_type. 3970 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 3971 */ 3972 dp->d_type = dp->d_namlen; 3973 dp->d_namlen = 0; 3974 #else 3975 /* 3976 * The dp->d_type is the high byte of the expected dp->d_namlen, 3977 * so must be zero'ed. 3978 */ 3979 dp->d_type = 0; 3980 #endif 3981 } 3982 3983 /* 3984 * Read a block of directory entries in a filesystem independent format. 3985 */ 3986 #ifndef _SYS_SYSPROTO_H_ 3987 struct ogetdirentries_args { 3988 int fd; 3989 char *buf; 3990 u_int count; 3991 long *basep; 3992 }; 3993 #endif 3994 int 3995 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 3996 { 3997 long loff; 3998 int error; 3999 4000 error = kern_ogetdirentries(td, uap, &loff); 4001 if (error == 0) 4002 error = copyout(&loff, uap->basep, sizeof(long)); 4003 return (error); 4004 } 4005 4006 int 4007 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4008 long *ploff) 4009 { 4010 long base; 4011 int error; 4012 4013 /* XXX arbitrary sanity limit on `count'. */ 4014 if (uap->count > 64 * 1024) 4015 return (EINVAL); 4016 4017 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4018 &base, ogetdirentries_cvt); 4019 4020 if (error == 0 && uap->basep != NULL) 4021 error = copyout(&base, uap->basep, sizeof(long)); 4022 4023 return (error); 4024 } 4025 #endif /* COMPAT_43 */ 4026 4027 #if defined(COMPAT_FREEBSD11) 4028 #ifndef _SYS_SYSPROTO_H_ 4029 struct freebsd11_getdirentries_args { 4030 int fd; 4031 char *buf; 4032 u_int count; 4033 long *basep; 4034 }; 4035 #endif 4036 int 4037 freebsd11_getdirentries(struct thread *td, 4038 struct freebsd11_getdirentries_args *uap) 4039 { 4040 long base; 4041 int error; 4042 4043 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4044 &base, NULL); 4045 4046 if (error == 0 && uap->basep != NULL) 4047 error = copyout(&base, uap->basep, sizeof(long)); 4048 return (error); 4049 } 4050 4051 int 4052 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4053 { 4054 struct freebsd11_getdirentries_args ap; 4055 4056 ap.fd = uap->fd; 4057 ap.buf = uap->buf; 4058 ap.count = uap->count; 4059 ap.basep = NULL; 4060 return (freebsd11_getdirentries(td, &ap)); 4061 } 4062 #endif /* COMPAT_FREEBSD11 */ 4063 4064 /* 4065 * Read a block of directory entries in a filesystem independent format. 4066 */ 4067 int 4068 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4069 { 4070 off_t base; 4071 int error; 4072 4073 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4074 NULL, UIO_USERSPACE); 4075 if (error != 0) 4076 return (error); 4077 if (uap->basep != NULL) 4078 error = copyout(&base, uap->basep, sizeof(off_t)); 4079 return (error); 4080 } 4081 4082 int 4083 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4084 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4085 { 4086 struct vnode *vp; 4087 struct file *fp; 4088 struct uio auio; 4089 struct iovec aiov; 4090 off_t loff; 4091 int error, eofflag; 4092 off_t foffset; 4093 4094 AUDIT_ARG_FD(fd); 4095 if (count > IOSIZE_MAX) 4096 return (EINVAL); 4097 auio.uio_resid = count; 4098 error = getvnode(td, fd, &cap_read_rights, &fp); 4099 if (error != 0) 4100 return (error); 4101 if ((fp->f_flag & FREAD) == 0) { 4102 fdrop(fp, td); 4103 return (EBADF); 4104 } 4105 vp = fp->f_vnode; 4106 foffset = foffset_lock(fp, 0); 4107 unionread: 4108 if (vp->v_type != VDIR) { 4109 error = EINVAL; 4110 goto fail; 4111 } 4112 aiov.iov_base = buf; 4113 aiov.iov_len = count; 4114 auio.uio_iov = &aiov; 4115 auio.uio_iovcnt = 1; 4116 auio.uio_rw = UIO_READ; 4117 auio.uio_segflg = bufseg; 4118 auio.uio_td = td; 4119 vn_lock(vp, LK_SHARED | LK_RETRY); 4120 AUDIT_ARG_VNODE1(vp); 4121 loff = auio.uio_offset = foffset; 4122 #ifdef MAC 4123 error = mac_vnode_check_readdir(td->td_ucred, vp); 4124 if (error == 0) 4125 #endif 4126 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4127 NULL); 4128 foffset = auio.uio_offset; 4129 if (error != 0) { 4130 VOP_UNLOCK(vp); 4131 goto fail; 4132 } 4133 if (count == auio.uio_resid && 4134 (vp->v_vflag & VV_ROOT) && 4135 (vp->v_mount->mnt_flag & MNT_UNION)) { 4136 struct vnode *tvp = vp; 4137 4138 vp = vp->v_mount->mnt_vnodecovered; 4139 VREF(vp); 4140 fp->f_vnode = vp; 4141 fp->f_data = vp; 4142 foffset = 0; 4143 vput(tvp); 4144 goto unionread; 4145 } 4146 VOP_UNLOCK(vp); 4147 *basep = loff; 4148 if (residp != NULL) 4149 *residp = auio.uio_resid; 4150 td->td_retval[0] = count - auio.uio_resid; 4151 fail: 4152 foffset_unlock(fp, foffset, 0); 4153 fdrop(fp, td); 4154 return (error); 4155 } 4156 4157 /* 4158 * Set the mode mask for creation of filesystem nodes. 4159 */ 4160 #ifndef _SYS_SYSPROTO_H_ 4161 struct umask_args { 4162 int newmask; 4163 }; 4164 #endif 4165 int 4166 sys_umask(struct thread *td, struct umask_args *uap) 4167 { 4168 struct filedesc *fdp; 4169 4170 fdp = td->td_proc->p_fd; 4171 FILEDESC_XLOCK(fdp); 4172 td->td_retval[0] = fdp->fd_cmask; 4173 fdp->fd_cmask = uap->newmask & ALLPERMS; 4174 FILEDESC_XUNLOCK(fdp); 4175 return (0); 4176 } 4177 4178 /* 4179 * Void all references to file by ripping underlying filesystem away from 4180 * vnode. 4181 */ 4182 #ifndef _SYS_SYSPROTO_H_ 4183 struct revoke_args { 4184 char *path; 4185 }; 4186 #endif 4187 int 4188 sys_revoke(struct thread *td, struct revoke_args *uap) 4189 { 4190 struct vnode *vp; 4191 struct vattr vattr; 4192 struct nameidata nd; 4193 int error; 4194 4195 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4196 uap->path, td); 4197 if ((error = namei(&nd)) != 0) 4198 return (error); 4199 vp = nd.ni_vp; 4200 NDFREE(&nd, NDF_ONLY_PNBUF); 4201 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4202 error = EINVAL; 4203 goto out; 4204 } 4205 #ifdef MAC 4206 error = mac_vnode_check_revoke(td->td_ucred, vp); 4207 if (error != 0) 4208 goto out; 4209 #endif 4210 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4211 if (error != 0) 4212 goto out; 4213 if (td->td_ucred->cr_uid != vattr.va_uid) { 4214 error = priv_check(td, PRIV_VFS_ADMIN); 4215 if (error != 0) 4216 goto out; 4217 } 4218 if (devfs_usecount(vp) > 0) 4219 VOP_REVOKE(vp, REVOKEALL); 4220 out: 4221 vput(vp); 4222 return (error); 4223 } 4224 4225 /* 4226 * Convert a user file descriptor to a kernel file entry and check that, if it 4227 * is a capability, the correct rights are present. A reference on the file 4228 * entry is held upon returning. 4229 */ 4230 int 4231 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4232 { 4233 struct file *fp; 4234 int error; 4235 4236 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); 4237 if (error != 0) 4238 return (error); 4239 4240 /* 4241 * The file could be not of the vnode type, or it may be not 4242 * yet fully initialized, in which case the f_vnode pointer 4243 * may be set, but f_ops is still badfileops. E.g., 4244 * devfs_open() transiently create such situation to 4245 * facilitate csw d_fdopen(). 4246 * 4247 * Dupfdopen() handling in kern_openat() installs the 4248 * half-baked file into the process descriptor table, allowing 4249 * other thread to dereference it. Guard against the race by 4250 * checking f_ops. 4251 */ 4252 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4253 fdrop(fp, td); 4254 return (EINVAL); 4255 } 4256 *fpp = fp; 4257 return (0); 4258 } 4259 4260 /* 4261 * Get an (NFS) file handle. 4262 */ 4263 #ifndef _SYS_SYSPROTO_H_ 4264 struct lgetfh_args { 4265 char *fname; 4266 fhandle_t *fhp; 4267 }; 4268 #endif 4269 int 4270 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4271 { 4272 4273 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4274 UIO_USERSPACE, uap->fhp)); 4275 } 4276 4277 #ifndef _SYS_SYSPROTO_H_ 4278 struct getfh_args { 4279 char *fname; 4280 fhandle_t *fhp; 4281 }; 4282 #endif 4283 int 4284 sys_getfh(struct thread *td, struct getfh_args *uap) 4285 { 4286 4287 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4288 uap->fhp)); 4289 } 4290 4291 /* 4292 * syscall for the rpc.lockd to use to translate an open descriptor into 4293 * a NFS file handle. 4294 * 4295 * warning: do not remove the priv_check() call or this becomes one giant 4296 * security hole. 4297 */ 4298 #ifndef _SYS_SYSPROTO_H_ 4299 struct getfhat_args { 4300 int fd; 4301 char *path; 4302 fhandle_t *fhp; 4303 int flags; 4304 }; 4305 #endif 4306 int 4307 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4308 { 4309 4310 if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH)) != 0) 4311 return (EINVAL); 4312 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4313 uap->fhp)); 4314 } 4315 4316 static int 4317 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4318 enum uio_seg pathseg, fhandle_t *fhp) 4319 { 4320 struct nameidata nd; 4321 fhandle_t fh; 4322 struct vnode *vp; 4323 int error; 4324 4325 error = priv_check(td, PRIV_VFS_GETFH); 4326 if (error != 0) 4327 return (error); 4328 NDINIT_AT(&nd, LOOKUP, ((flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 4329 FOLLOW) | ((flags & AT_BENEATH) != 0 ? BENEATH : 0) | LOCKLEAF | 4330 AUDITVNODE1, pathseg, path, fd, td); 4331 error = namei(&nd); 4332 if (error != 0) 4333 return (error); 4334 NDFREE(&nd, NDF_ONLY_PNBUF); 4335 vp = nd.ni_vp; 4336 bzero(&fh, sizeof(fh)); 4337 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4338 error = VOP_VPTOFH(vp, &fh.fh_fid); 4339 vput(vp); 4340 if (error == 0) 4341 error = copyout(&fh, fhp, sizeof (fh)); 4342 return (error); 4343 } 4344 4345 #ifndef _SYS_SYSPROTO_H_ 4346 struct fhlink_args { 4347 fhandle_t *fhp; 4348 const char *to; 4349 }; 4350 #endif 4351 int 4352 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4353 { 4354 4355 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4356 } 4357 4358 #ifndef _SYS_SYSPROTO_H_ 4359 struct fhlinkat_args { 4360 fhandle_t *fhp; 4361 int tofd; 4362 const char *to; 4363 }; 4364 #endif 4365 int 4366 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4367 { 4368 4369 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4370 } 4371 4372 static int 4373 kern_fhlinkat(struct thread *td, int fd, const char *path, 4374 enum uio_seg pathseg, fhandle_t *fhp) 4375 { 4376 fhandle_t fh; 4377 struct mount *mp; 4378 struct vnode *vp; 4379 int error; 4380 4381 error = priv_check(td, PRIV_VFS_GETFH); 4382 if (error != 0) 4383 return (error); 4384 error = copyin(fhp, &fh, sizeof(fh)); 4385 if (error != 0) 4386 return (error); 4387 do { 4388 bwillwrite(); 4389 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4390 return (ESTALE); 4391 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4392 vfs_unbusy(mp); 4393 if (error != 0) 4394 return (error); 4395 VOP_UNLOCK(vp); 4396 } while ((error = kern_linkat_vp(td, vp, fd, path, pathseg)) == EAGAIN); 4397 return (error); 4398 } 4399 4400 #ifndef _SYS_SYSPROTO_H_ 4401 struct fhreadlink_args { 4402 fhandle_t *fhp; 4403 char *buf; 4404 size_t bufsize; 4405 }; 4406 #endif 4407 int 4408 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4409 { 4410 fhandle_t fh; 4411 struct mount *mp; 4412 struct vnode *vp; 4413 int error; 4414 4415 error = priv_check(td, PRIV_VFS_GETFH); 4416 if (error != 0) 4417 return (error); 4418 if (uap->bufsize > IOSIZE_MAX) 4419 return (EINVAL); 4420 error = copyin(uap->fhp, &fh, sizeof(fh)); 4421 if (error != 0) 4422 return (error); 4423 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4424 return (ESTALE); 4425 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4426 vfs_unbusy(mp); 4427 if (error != 0) 4428 return (error); 4429 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4430 vput(vp); 4431 return (error); 4432 } 4433 4434 /* 4435 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4436 * open descriptor. 4437 * 4438 * warning: do not remove the priv_check() call or this becomes one giant 4439 * security hole. 4440 */ 4441 #ifndef _SYS_SYSPROTO_H_ 4442 struct fhopen_args { 4443 const struct fhandle *u_fhp; 4444 int flags; 4445 }; 4446 #endif 4447 int 4448 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4449 { 4450 struct mount *mp; 4451 struct vnode *vp; 4452 struct fhandle fhp; 4453 struct file *fp; 4454 int fmode, error; 4455 int indx; 4456 4457 error = priv_check(td, PRIV_VFS_FHOPEN); 4458 if (error != 0) 4459 return (error); 4460 indx = -1; 4461 fmode = FFLAGS(uap->flags); 4462 /* why not allow a non-read/write open for our lockd? */ 4463 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4464 return (EINVAL); 4465 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4466 if (error != 0) 4467 return(error); 4468 /* find the mount point */ 4469 mp = vfs_busyfs(&fhp.fh_fsid); 4470 if (mp == NULL) 4471 return (ESTALE); 4472 /* now give me my vnode, it gets returned to me locked */ 4473 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4474 vfs_unbusy(mp); 4475 if (error != 0) 4476 return (error); 4477 4478 error = falloc_noinstall(td, &fp); 4479 if (error != 0) { 4480 vput(vp); 4481 return (error); 4482 } 4483 /* 4484 * An extra reference on `fp' has been held for us by 4485 * falloc_noinstall(). 4486 */ 4487 4488 #ifdef INVARIANTS 4489 td->td_dupfd = -1; 4490 #endif 4491 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4492 if (error != 0) { 4493 KASSERT(fp->f_ops == &badfileops, 4494 ("VOP_OPEN in fhopen() set f_ops")); 4495 KASSERT(td->td_dupfd < 0, 4496 ("fhopen() encountered fdopen()")); 4497 4498 vput(vp); 4499 goto bad; 4500 } 4501 #ifdef INVARIANTS 4502 td->td_dupfd = 0; 4503 #endif 4504 fp->f_vnode = vp; 4505 fp->f_seqcount[UIO_READ] = 1; 4506 fp->f_seqcount[UIO_WRITE] = 1; 4507 finit(fp, (fmode & FMASK) | (fp->f_flag & FHASLOCK), DTYPE_VNODE, vp, 4508 &vnops); 4509 VOP_UNLOCK(vp); 4510 if ((fmode & O_TRUNC) != 0) { 4511 error = fo_truncate(fp, 0, td->td_ucred, td); 4512 if (error != 0) 4513 goto bad; 4514 } 4515 4516 error = finstall(td, fp, &indx, fmode, NULL); 4517 bad: 4518 fdrop(fp, td); 4519 td->td_retval[0] = indx; 4520 return (error); 4521 } 4522 4523 /* 4524 * Stat an (NFS) file handle. 4525 */ 4526 #ifndef _SYS_SYSPROTO_H_ 4527 struct fhstat_args { 4528 struct fhandle *u_fhp; 4529 struct stat *sb; 4530 }; 4531 #endif 4532 int 4533 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4534 { 4535 struct stat sb; 4536 struct fhandle fh; 4537 int error; 4538 4539 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4540 if (error != 0) 4541 return (error); 4542 error = kern_fhstat(td, fh, &sb); 4543 if (error == 0) 4544 error = copyout(&sb, uap->sb, sizeof(sb)); 4545 return (error); 4546 } 4547 4548 int 4549 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4550 { 4551 struct mount *mp; 4552 struct vnode *vp; 4553 int error; 4554 4555 error = priv_check(td, PRIV_VFS_FHSTAT); 4556 if (error != 0) 4557 return (error); 4558 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4559 return (ESTALE); 4560 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4561 vfs_unbusy(mp); 4562 if (error != 0) 4563 return (error); 4564 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td); 4565 vput(vp); 4566 return (error); 4567 } 4568 4569 /* 4570 * Implement fstatfs() for (NFS) file handles. 4571 */ 4572 #ifndef _SYS_SYSPROTO_H_ 4573 struct fhstatfs_args { 4574 struct fhandle *u_fhp; 4575 struct statfs *buf; 4576 }; 4577 #endif 4578 int 4579 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4580 { 4581 struct statfs *sfp; 4582 fhandle_t fh; 4583 int error; 4584 4585 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4586 if (error != 0) 4587 return (error); 4588 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4589 error = kern_fhstatfs(td, fh, sfp); 4590 if (error == 0) 4591 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4592 free(sfp, M_STATFS); 4593 return (error); 4594 } 4595 4596 int 4597 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4598 { 4599 struct mount *mp; 4600 struct vnode *vp; 4601 int error; 4602 4603 error = priv_check(td, PRIV_VFS_FHSTATFS); 4604 if (error != 0) 4605 return (error); 4606 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4607 return (ESTALE); 4608 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4609 if (error != 0) { 4610 vfs_unbusy(mp); 4611 return (error); 4612 } 4613 vput(vp); 4614 error = prison_canseemount(td->td_ucred, mp); 4615 if (error != 0) 4616 goto out; 4617 #ifdef MAC 4618 error = mac_mount_check_stat(td->td_ucred, mp); 4619 if (error != 0) 4620 goto out; 4621 #endif 4622 error = VFS_STATFS(mp, buf); 4623 out: 4624 vfs_unbusy(mp); 4625 return (error); 4626 } 4627 4628 /* 4629 * Unlike madvise(2), we do not make a best effort to remember every 4630 * possible caching hint. Instead, we remember the last setting with 4631 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4632 * region of any current setting. 4633 */ 4634 int 4635 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4636 int advice) 4637 { 4638 struct fadvise_info *fa, *new; 4639 struct file *fp; 4640 struct vnode *vp; 4641 off_t end; 4642 int error; 4643 4644 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4645 return (EINVAL); 4646 AUDIT_ARG_VALUE(advice); 4647 switch (advice) { 4648 case POSIX_FADV_SEQUENTIAL: 4649 case POSIX_FADV_RANDOM: 4650 case POSIX_FADV_NOREUSE: 4651 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4652 break; 4653 case POSIX_FADV_NORMAL: 4654 case POSIX_FADV_WILLNEED: 4655 case POSIX_FADV_DONTNEED: 4656 new = NULL; 4657 break; 4658 default: 4659 return (EINVAL); 4660 } 4661 /* XXX: CAP_POSIX_FADVISE? */ 4662 AUDIT_ARG_FD(fd); 4663 error = fget(td, fd, &cap_no_rights, &fp); 4664 if (error != 0) 4665 goto out; 4666 AUDIT_ARG_FILE(td->td_proc, fp); 4667 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4668 error = ESPIPE; 4669 goto out; 4670 } 4671 if (fp->f_type != DTYPE_VNODE) { 4672 error = ENODEV; 4673 goto out; 4674 } 4675 vp = fp->f_vnode; 4676 if (vp->v_type != VREG) { 4677 error = ENODEV; 4678 goto out; 4679 } 4680 if (len == 0) 4681 end = OFF_MAX; 4682 else 4683 end = offset + len - 1; 4684 switch (advice) { 4685 case POSIX_FADV_SEQUENTIAL: 4686 case POSIX_FADV_RANDOM: 4687 case POSIX_FADV_NOREUSE: 4688 /* 4689 * Try to merge any existing non-standard region with 4690 * this new region if possible, otherwise create a new 4691 * non-standard region for this request. 4692 */ 4693 mtx_pool_lock(mtxpool_sleep, fp); 4694 fa = fp->f_advice; 4695 if (fa != NULL && fa->fa_advice == advice && 4696 ((fa->fa_start <= end && fa->fa_end >= offset) || 4697 (end != OFF_MAX && fa->fa_start == end + 1) || 4698 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4699 if (offset < fa->fa_start) 4700 fa->fa_start = offset; 4701 if (end > fa->fa_end) 4702 fa->fa_end = end; 4703 } else { 4704 new->fa_advice = advice; 4705 new->fa_start = offset; 4706 new->fa_end = end; 4707 fp->f_advice = new; 4708 new = fa; 4709 } 4710 mtx_pool_unlock(mtxpool_sleep, fp); 4711 break; 4712 case POSIX_FADV_NORMAL: 4713 /* 4714 * If a the "normal" region overlaps with an existing 4715 * non-standard region, trim or remove the 4716 * non-standard region. 4717 */ 4718 mtx_pool_lock(mtxpool_sleep, fp); 4719 fa = fp->f_advice; 4720 if (fa != NULL) { 4721 if (offset <= fa->fa_start && end >= fa->fa_end) { 4722 new = fa; 4723 fp->f_advice = NULL; 4724 } else if (offset <= fa->fa_start && 4725 end >= fa->fa_start) 4726 fa->fa_start = end + 1; 4727 else if (offset <= fa->fa_end && end >= fa->fa_end) 4728 fa->fa_end = offset - 1; 4729 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4730 /* 4731 * If the "normal" region is a middle 4732 * portion of the existing 4733 * non-standard region, just remove 4734 * the whole thing rather than picking 4735 * one side or the other to 4736 * preserve. 4737 */ 4738 new = fa; 4739 fp->f_advice = NULL; 4740 } 4741 } 4742 mtx_pool_unlock(mtxpool_sleep, fp); 4743 break; 4744 case POSIX_FADV_WILLNEED: 4745 case POSIX_FADV_DONTNEED: 4746 error = VOP_ADVISE(vp, offset, end, advice); 4747 break; 4748 } 4749 out: 4750 if (fp != NULL) 4751 fdrop(fp, td); 4752 free(new, M_FADVISE); 4753 return (error); 4754 } 4755 4756 int 4757 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4758 { 4759 int error; 4760 4761 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4762 uap->advice); 4763 return (kern_posix_error(td, error)); 4764 } 4765 4766 int 4767 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4768 off_t *outoffp, size_t len, unsigned int flags) 4769 { 4770 struct file *infp, *outfp; 4771 struct vnode *invp, *outvp; 4772 int error; 4773 size_t retlen; 4774 void *rl_rcookie, *rl_wcookie; 4775 off_t savinoff, savoutoff; 4776 4777 infp = outfp = NULL; 4778 rl_rcookie = rl_wcookie = NULL; 4779 savinoff = -1; 4780 error = 0; 4781 retlen = 0; 4782 4783 if (flags != 0) { 4784 error = EINVAL; 4785 goto out; 4786 } 4787 if (len > SSIZE_MAX) 4788 /* 4789 * Although the len argument is size_t, the return argument 4790 * is ssize_t (which is signed). Therefore a size that won't 4791 * fit in ssize_t can't be returned. 4792 */ 4793 len = SSIZE_MAX; 4794 4795 /* Get the file structures for the file descriptors. */ 4796 error = fget_read(td, infd, &cap_read_rights, &infp); 4797 if (error != 0) 4798 goto out; 4799 if (infp->f_ops == &badfileops) { 4800 error = EBADF; 4801 goto out; 4802 } 4803 if (infp->f_vnode == NULL) { 4804 error = EINVAL; 4805 goto out; 4806 } 4807 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4808 if (error != 0) 4809 goto out; 4810 if (outfp->f_ops == &badfileops) { 4811 error = EBADF; 4812 goto out; 4813 } 4814 if (outfp->f_vnode == NULL) { 4815 error = EINVAL; 4816 goto out; 4817 } 4818 4819 /* Set the offset pointers to the correct place. */ 4820 if (inoffp == NULL) 4821 inoffp = &infp->f_offset; 4822 if (outoffp == NULL) 4823 outoffp = &outfp->f_offset; 4824 savinoff = *inoffp; 4825 savoutoff = *outoffp; 4826 4827 invp = infp->f_vnode; 4828 outvp = outfp->f_vnode; 4829 /* Sanity check the f_flag bits. */ 4830 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4831 (infp->f_flag & FREAD) == 0) { 4832 error = EBADF; 4833 goto out; 4834 } 4835 4836 /* If len == 0, just return 0. */ 4837 if (len == 0) 4838 goto out; 4839 4840 /* 4841 * If infp and outfp refer to the same file, the byte ranges cannot 4842 * overlap. 4843 */ 4844 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4845 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4846 savinoff))) { 4847 error = EINVAL; 4848 goto out; 4849 } 4850 4851 /* Range lock the byte ranges for both invp and outvp. */ 4852 for (;;) { 4853 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4854 len); 4855 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4856 len); 4857 if (rl_rcookie != NULL) 4858 break; 4859 vn_rangelock_unlock(outvp, rl_wcookie); 4860 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4861 vn_rangelock_unlock(invp, rl_rcookie); 4862 } 4863 4864 retlen = len; 4865 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4866 flags, infp->f_cred, outfp->f_cred, td); 4867 out: 4868 if (rl_rcookie != NULL) 4869 vn_rangelock_unlock(invp, rl_rcookie); 4870 if (rl_wcookie != NULL) 4871 vn_rangelock_unlock(outvp, rl_wcookie); 4872 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4873 *inoffp = savinoff; 4874 *outoffp = savoutoff; 4875 } 4876 if (outfp != NULL) 4877 fdrop(outfp, td); 4878 if (infp != NULL) 4879 fdrop(infp, td); 4880 td->td_retval[0] = retlen; 4881 return (error); 4882 } 4883 4884 int 4885 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 4886 { 4887 off_t inoff, outoff, *inoffp, *outoffp; 4888 int error; 4889 4890 inoffp = outoffp = NULL; 4891 if (uap->inoffp != NULL) { 4892 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 4893 if (error != 0) 4894 return (error); 4895 inoffp = &inoff; 4896 } 4897 if (uap->outoffp != NULL) { 4898 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 4899 if (error != 0) 4900 return (error); 4901 outoffp = &outoff; 4902 } 4903 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 4904 outoffp, uap->len, uap->flags); 4905 if (error == 0 && uap->inoffp != NULL) 4906 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 4907 if (error == 0 && uap->outoffp != NULL) 4908 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 4909 return (error); 4910 } 4911