1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #include <sys/bio.h> 48 #include <sys/buf.h> 49 #include <sys/capsicum.h> 50 #include <sys/disk.h> 51 #include <sys/sysent.h> 52 #include <sys/malloc.h> 53 #include <sys/mount.h> 54 #include <sys/mutex.h> 55 #include <sys/sysproto.h> 56 #include <sys/namei.h> 57 #include <sys/filedesc.h> 58 #include <sys/kernel.h> 59 #include <sys/fcntl.h> 60 #include <sys/file.h> 61 #include <sys/filio.h> 62 #include <sys/limits.h> 63 #include <sys/linker.h> 64 #include <sys/rwlock.h> 65 #include <sys/sdt.h> 66 #include <sys/stat.h> 67 #include <sys/sx.h> 68 #include <sys/unistd.h> 69 #include <sys/vnode.h> 70 #include <sys/priv.h> 71 #include <sys/proc.h> 72 #include <sys/dirent.h> 73 #include <sys/jail.h> 74 #include <sys/syscallsubr.h> 75 #include <sys/sysctl.h> 76 #ifdef KTRACE 77 #include <sys/ktrace.h> 78 #endif 79 80 #include <machine/stdarg.h> 81 82 #include <security/audit/audit.h> 83 #include <security/mac/mac_framework.h> 84 85 #include <vm/vm.h> 86 #include <vm/vm_object.h> 87 #include <vm/vm_page.h> 88 #include <vm/uma.h> 89 90 #include <fs/devfs/devfs.h> 91 92 #include <ufs/ufs/quota.h> 93 94 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 95 96 static int kern_chflagsat(struct thread *td, int fd, const char *path, 97 enum uio_seg pathseg, u_long flags, int atflag); 98 static int setfflags(struct thread *td, struct vnode *, u_long); 99 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 100 static int getutimens(const struct timespec *, enum uio_seg, 101 struct timespec *, int *); 102 static int setutimes(struct thread *td, struct vnode *, 103 const struct timespec *, int, int); 104 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 105 struct thread *td); 106 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 107 enum uio_seg pathseg, fhandle_t *fhp); 108 static int kern_getfhat(struct thread *td, int flags, int fd, 109 const char *path, enum uio_seg pathseg, fhandle_t *fhp); 110 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 111 size_t count, struct thread *td); 112 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 113 const char *path, enum uio_seg segflag); 114 115 static uint64_t 116 at2cnpflags(u_int at_flags, u_int mask) 117 { 118 u_int64_t res; 119 120 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 121 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 122 123 res = 0; 124 at_flags &= mask; 125 if ((at_flags & AT_BENEATH) != 0) 126 res |= BENEATH; 127 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 128 res |= RBENEATH; 129 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 130 res |= FOLLOW; 131 /* NOFOLLOW is pseudo flag */ 132 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 133 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 134 FOLLOW; 135 } 136 return (res); 137 } 138 139 int 140 kern_sync(struct thread *td) 141 { 142 struct mount *mp, *nmp; 143 int save; 144 145 mtx_lock(&mountlist_mtx); 146 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 147 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 148 nmp = TAILQ_NEXT(mp, mnt_list); 149 continue; 150 } 151 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 152 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 153 save = curthread_pflags_set(TDP_SYNCIO); 154 vfs_periodic(mp, MNT_NOWAIT); 155 VFS_SYNC(mp, MNT_NOWAIT); 156 curthread_pflags_restore(save); 157 vn_finished_write(mp); 158 } 159 mtx_lock(&mountlist_mtx); 160 nmp = TAILQ_NEXT(mp, mnt_list); 161 vfs_unbusy(mp); 162 } 163 mtx_unlock(&mountlist_mtx); 164 return (0); 165 } 166 167 /* 168 * Sync each mounted filesystem. 169 */ 170 #ifndef _SYS_SYSPROTO_H_ 171 struct sync_args { 172 int dummy; 173 }; 174 #endif 175 /* ARGSUSED */ 176 int 177 sys_sync(struct thread *td, struct sync_args *uap) 178 { 179 180 return (kern_sync(td)); 181 } 182 183 /* 184 * Change filesystem quotas. 185 */ 186 #ifndef _SYS_SYSPROTO_H_ 187 struct quotactl_args { 188 char *path; 189 int cmd; 190 int uid; 191 caddr_t arg; 192 }; 193 #endif 194 int 195 sys_quotactl(struct thread *td, struct quotactl_args *uap) 196 { 197 struct mount *mp; 198 struct nameidata nd; 199 int error; 200 201 AUDIT_ARG_CMD(uap->cmd); 202 AUDIT_ARG_UID(uap->uid); 203 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 204 return (EPERM); 205 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 206 uap->path, td); 207 if ((error = namei(&nd)) != 0) 208 return (error); 209 NDFREE(&nd, NDF_ONLY_PNBUF); 210 mp = nd.ni_vp->v_mount; 211 vfs_ref(mp); 212 vput(nd.ni_vp); 213 error = vfs_busy(mp, 0); 214 if (error != 0) { 215 vfs_rel(mp); 216 return (error); 217 } 218 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg); 219 220 /* 221 * Since quota on operation typically needs to open quota 222 * file, the Q_QUOTAON handler needs to unbusy the mount point 223 * before calling into namei. Otherwise, unmount might be 224 * started between two vfs_busy() invocations (first is our, 225 * second is from mount point cross-walk code in lookup()), 226 * causing deadlock. 227 * 228 * Require that Q_QUOTAON handles the vfs_busy() reference on 229 * its own, always returning with ubusied mount point. 230 */ 231 if ((uap->cmd >> SUBCMDSHIFT) != Q_QUOTAON && 232 (uap->cmd >> SUBCMDSHIFT) != Q_QUOTAOFF) 233 vfs_unbusy(mp); 234 vfs_rel(mp); 235 return (error); 236 } 237 238 /* 239 * Used by statfs conversion routines to scale the block size up if 240 * necessary so that all of the block counts are <= 'max_size'. Note 241 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 242 * value of 'n'. 243 */ 244 void 245 statfs_scale_blocks(struct statfs *sf, long max_size) 246 { 247 uint64_t count; 248 int shift; 249 250 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 251 252 /* 253 * Attempt to scale the block counts to give a more accurate 254 * overview to userland of the ratio of free space to used 255 * space. To do this, find the largest block count and compute 256 * a divisor that lets it fit into a signed integer <= max_size. 257 */ 258 if (sf->f_bavail < 0) 259 count = -sf->f_bavail; 260 else 261 count = sf->f_bavail; 262 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 263 if (count <= max_size) 264 return; 265 266 count >>= flsl(max_size); 267 shift = 0; 268 while (count > 0) { 269 shift++; 270 count >>=1; 271 } 272 273 sf->f_bsize <<= shift; 274 sf->f_blocks >>= shift; 275 sf->f_bfree >>= shift; 276 sf->f_bavail >>= shift; 277 } 278 279 static int 280 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 281 { 282 int error; 283 284 if (mp == NULL) 285 return (EBADF); 286 error = vfs_busy(mp, 0); 287 vfs_rel(mp); 288 if (error != 0) 289 return (error); 290 #ifdef MAC 291 error = mac_mount_check_stat(td->td_ucred, mp); 292 if (error != 0) 293 goto out; 294 #endif 295 error = VFS_STATFS(mp, buf); 296 if (error != 0) 297 goto out; 298 if (priv_check_cred_vfs_generation(td->td_ucred)) { 299 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 300 prison_enforce_statfs(td->td_ucred, mp, buf); 301 } 302 out: 303 vfs_unbusy(mp); 304 return (error); 305 } 306 307 /* 308 * Get filesystem statistics. 309 */ 310 #ifndef _SYS_SYSPROTO_H_ 311 struct statfs_args { 312 char *path; 313 struct statfs *buf; 314 }; 315 #endif 316 int 317 sys_statfs(struct thread *td, struct statfs_args *uap) 318 { 319 struct statfs *sfp; 320 int error; 321 322 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 323 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 324 if (error == 0) 325 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 326 free(sfp, M_STATFS); 327 return (error); 328 } 329 330 int 331 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 332 struct statfs *buf) 333 { 334 struct mount *mp; 335 struct nameidata nd; 336 int error; 337 338 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 339 pathseg, path, td); 340 error = namei(&nd); 341 if (error != 0) 342 return (error); 343 mp = nd.ni_vp->v_mount; 344 vfs_ref(mp); 345 NDFREE(&nd, NDF_ONLY_PNBUF); 346 vput(nd.ni_vp); 347 return (kern_do_statfs(td, mp, buf)); 348 } 349 350 /* 351 * Get filesystem statistics. 352 */ 353 #ifndef _SYS_SYSPROTO_H_ 354 struct fstatfs_args { 355 int fd; 356 struct statfs *buf; 357 }; 358 #endif 359 int 360 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 361 { 362 struct statfs *sfp; 363 int error; 364 365 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 366 error = kern_fstatfs(td, uap->fd, sfp); 367 if (error == 0) 368 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 369 free(sfp, M_STATFS); 370 return (error); 371 } 372 373 int 374 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 375 { 376 struct file *fp; 377 struct mount *mp; 378 struct vnode *vp; 379 int error; 380 381 AUDIT_ARG_FD(fd); 382 error = getvnode(td, fd, &cap_fstatfs_rights, &fp); 383 if (error != 0) 384 return (error); 385 vp = fp->f_vnode; 386 vn_lock(vp, LK_SHARED | LK_RETRY); 387 #ifdef AUDIT 388 AUDIT_ARG_VNODE1(vp); 389 #endif 390 mp = vp->v_mount; 391 if (mp != NULL) 392 vfs_ref(mp); 393 VOP_UNLOCK(vp); 394 fdrop(fp, td); 395 return (kern_do_statfs(td, mp, buf)); 396 } 397 398 /* 399 * Get statistics on all filesystems. 400 */ 401 #ifndef _SYS_SYSPROTO_H_ 402 struct getfsstat_args { 403 struct statfs *buf; 404 long bufsize; 405 int mode; 406 }; 407 #endif 408 int 409 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 410 { 411 size_t count; 412 int error; 413 414 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 415 return (EINVAL); 416 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 417 UIO_USERSPACE, uap->mode); 418 if (error == 0) 419 td->td_retval[0] = count; 420 return (error); 421 } 422 423 /* 424 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 425 * The caller is responsible for freeing memory which will be allocated 426 * in '*buf'. 427 */ 428 int 429 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 430 size_t *countp, enum uio_seg bufseg, int mode) 431 { 432 struct mount *mp, *nmp; 433 struct statfs *sfsp, *sp, *sptmp, *tofree; 434 size_t count, maxcount; 435 int error; 436 437 switch (mode) { 438 case MNT_WAIT: 439 case MNT_NOWAIT: 440 break; 441 default: 442 if (bufseg == UIO_SYSSPACE) 443 *buf = NULL; 444 return (EINVAL); 445 } 446 restart: 447 maxcount = bufsize / sizeof(struct statfs); 448 if (bufsize == 0) { 449 sfsp = NULL; 450 tofree = NULL; 451 } else if (bufseg == UIO_USERSPACE) { 452 sfsp = *buf; 453 tofree = NULL; 454 } else /* if (bufseg == UIO_SYSSPACE) */ { 455 count = 0; 456 mtx_lock(&mountlist_mtx); 457 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 458 count++; 459 } 460 mtx_unlock(&mountlist_mtx); 461 if (maxcount > count) 462 maxcount = count; 463 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 464 M_STATFS, M_WAITOK); 465 } 466 467 count = 0; 468 469 /* 470 * If there is no target buffer they only want the count. 471 * 472 * This could be TAILQ_FOREACH but it is open-coded to match the original 473 * code below. 474 */ 475 if (sfsp == NULL) { 476 mtx_lock(&mountlist_mtx); 477 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 478 if (prison_canseemount(td->td_ucred, mp) != 0) { 479 nmp = TAILQ_NEXT(mp, mnt_list); 480 continue; 481 } 482 #ifdef MAC 483 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 484 nmp = TAILQ_NEXT(mp, mnt_list); 485 continue; 486 } 487 #endif 488 count++; 489 nmp = TAILQ_NEXT(mp, mnt_list); 490 } 491 mtx_unlock(&mountlist_mtx); 492 *countp = count; 493 return (0); 494 } 495 496 /* 497 * They want the entire thing. 498 * 499 * Short-circuit the corner case of no room for anything, avoids 500 * relocking below. 501 */ 502 if (maxcount < 1) { 503 goto out; 504 } 505 506 mtx_lock(&mountlist_mtx); 507 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 508 if (prison_canseemount(td->td_ucred, mp) != 0) { 509 nmp = TAILQ_NEXT(mp, mnt_list); 510 continue; 511 } 512 #ifdef MAC 513 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 514 nmp = TAILQ_NEXT(mp, mnt_list); 515 continue; 516 } 517 #endif 518 if (mode == MNT_WAIT) { 519 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 520 /* 521 * If vfs_busy() failed, and MBF_NOWAIT 522 * wasn't passed, then the mp is gone. 523 * Furthermore, because of MBF_MNTLSTLOCK, 524 * the mountlist_mtx was dropped. We have 525 * no other choice than to start over. 526 */ 527 mtx_unlock(&mountlist_mtx); 528 free(tofree, M_STATFS); 529 goto restart; 530 } 531 } else { 532 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 533 nmp = TAILQ_NEXT(mp, mnt_list); 534 continue; 535 } 536 } 537 sp = &mp->mnt_stat; 538 /* 539 * If MNT_NOWAIT is specified, do not refresh 540 * the fsstat cache. 541 */ 542 if (mode != MNT_NOWAIT) { 543 error = VFS_STATFS(mp, sp); 544 if (error != 0) { 545 mtx_lock(&mountlist_mtx); 546 nmp = TAILQ_NEXT(mp, mnt_list); 547 vfs_unbusy(mp); 548 continue; 549 } 550 } 551 if (priv_check_cred_vfs_generation(td->td_ucred)) { 552 sptmp = malloc(sizeof(struct statfs), M_STATFS, 553 M_WAITOK); 554 *sptmp = *sp; 555 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 556 prison_enforce_statfs(td->td_ucred, mp, sptmp); 557 sp = sptmp; 558 } else 559 sptmp = NULL; 560 if (bufseg == UIO_SYSSPACE) { 561 bcopy(sp, sfsp, sizeof(*sp)); 562 free(sptmp, M_STATFS); 563 } else /* if (bufseg == UIO_USERSPACE) */ { 564 error = copyout(sp, sfsp, sizeof(*sp)); 565 free(sptmp, M_STATFS); 566 if (error != 0) { 567 vfs_unbusy(mp); 568 return (error); 569 } 570 } 571 sfsp++; 572 count++; 573 574 if (count == maxcount) { 575 vfs_unbusy(mp); 576 goto out; 577 } 578 579 mtx_lock(&mountlist_mtx); 580 nmp = TAILQ_NEXT(mp, mnt_list); 581 vfs_unbusy(mp); 582 } 583 mtx_unlock(&mountlist_mtx); 584 out: 585 *countp = count; 586 return (0); 587 } 588 589 #ifdef COMPAT_FREEBSD4 590 /* 591 * Get old format filesystem statistics. 592 */ 593 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 594 595 #ifndef _SYS_SYSPROTO_H_ 596 struct freebsd4_statfs_args { 597 char *path; 598 struct ostatfs *buf; 599 }; 600 #endif 601 int 602 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 603 { 604 struct ostatfs osb; 605 struct statfs *sfp; 606 int error; 607 608 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 609 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 610 if (error == 0) { 611 freebsd4_cvtstatfs(sfp, &osb); 612 error = copyout(&osb, uap->buf, sizeof(osb)); 613 } 614 free(sfp, M_STATFS); 615 return (error); 616 } 617 618 /* 619 * Get filesystem statistics. 620 */ 621 #ifndef _SYS_SYSPROTO_H_ 622 struct freebsd4_fstatfs_args { 623 int fd; 624 struct ostatfs *buf; 625 }; 626 #endif 627 int 628 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 629 { 630 struct ostatfs osb; 631 struct statfs *sfp; 632 int error; 633 634 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 635 error = kern_fstatfs(td, uap->fd, sfp); 636 if (error == 0) { 637 freebsd4_cvtstatfs(sfp, &osb); 638 error = copyout(&osb, uap->buf, sizeof(osb)); 639 } 640 free(sfp, M_STATFS); 641 return (error); 642 } 643 644 /* 645 * Get statistics on all filesystems. 646 */ 647 #ifndef _SYS_SYSPROTO_H_ 648 struct freebsd4_getfsstat_args { 649 struct ostatfs *buf; 650 long bufsize; 651 int mode; 652 }; 653 #endif 654 int 655 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 656 { 657 struct statfs *buf, *sp; 658 struct ostatfs osb; 659 size_t count, size; 660 int error; 661 662 if (uap->bufsize < 0) 663 return (EINVAL); 664 count = uap->bufsize / sizeof(struct ostatfs); 665 if (count > SIZE_MAX / sizeof(struct statfs)) 666 return (EINVAL); 667 size = count * sizeof(struct statfs); 668 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 669 uap->mode); 670 if (error == 0) 671 td->td_retval[0] = count; 672 if (size != 0) { 673 sp = buf; 674 while (count != 0 && error == 0) { 675 freebsd4_cvtstatfs(sp, &osb); 676 error = copyout(&osb, uap->buf, sizeof(osb)); 677 sp++; 678 uap->buf++; 679 count--; 680 } 681 free(buf, M_STATFS); 682 } 683 return (error); 684 } 685 686 /* 687 * Implement fstatfs() for (NFS) file handles. 688 */ 689 #ifndef _SYS_SYSPROTO_H_ 690 struct freebsd4_fhstatfs_args { 691 struct fhandle *u_fhp; 692 struct ostatfs *buf; 693 }; 694 #endif 695 int 696 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 697 { 698 struct ostatfs osb; 699 struct statfs *sfp; 700 fhandle_t fh; 701 int error; 702 703 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 704 if (error != 0) 705 return (error); 706 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 707 error = kern_fhstatfs(td, fh, sfp); 708 if (error == 0) { 709 freebsd4_cvtstatfs(sfp, &osb); 710 error = copyout(&osb, uap->buf, sizeof(osb)); 711 } 712 free(sfp, M_STATFS); 713 return (error); 714 } 715 716 /* 717 * Convert a new format statfs structure to an old format statfs structure. 718 */ 719 static void 720 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 721 { 722 723 statfs_scale_blocks(nsp, LONG_MAX); 724 bzero(osp, sizeof(*osp)); 725 osp->f_bsize = nsp->f_bsize; 726 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 727 osp->f_blocks = nsp->f_blocks; 728 osp->f_bfree = nsp->f_bfree; 729 osp->f_bavail = nsp->f_bavail; 730 osp->f_files = MIN(nsp->f_files, LONG_MAX); 731 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 732 osp->f_owner = nsp->f_owner; 733 osp->f_type = nsp->f_type; 734 osp->f_flags = nsp->f_flags; 735 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 736 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 737 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 738 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 739 strlcpy(osp->f_fstypename, nsp->f_fstypename, 740 MIN(MFSNAMELEN, OMFSNAMELEN)); 741 strlcpy(osp->f_mntonname, nsp->f_mntonname, 742 MIN(MNAMELEN, OMNAMELEN)); 743 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 744 MIN(MNAMELEN, OMNAMELEN)); 745 osp->f_fsid = nsp->f_fsid; 746 } 747 #endif /* COMPAT_FREEBSD4 */ 748 749 #if defined(COMPAT_FREEBSD11) 750 /* 751 * Get old format filesystem statistics. 752 */ 753 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 754 755 int 756 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 757 { 758 struct freebsd11_statfs osb; 759 struct statfs *sfp; 760 int error; 761 762 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 763 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 764 if (error == 0) { 765 freebsd11_cvtstatfs(sfp, &osb); 766 error = copyout(&osb, uap->buf, sizeof(osb)); 767 } 768 free(sfp, M_STATFS); 769 return (error); 770 } 771 772 /* 773 * Get filesystem statistics. 774 */ 775 int 776 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 777 { 778 struct freebsd11_statfs osb; 779 struct statfs *sfp; 780 int error; 781 782 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 783 error = kern_fstatfs(td, uap->fd, sfp); 784 if (error == 0) { 785 freebsd11_cvtstatfs(sfp, &osb); 786 error = copyout(&osb, uap->buf, sizeof(osb)); 787 } 788 free(sfp, M_STATFS); 789 return (error); 790 } 791 792 /* 793 * Get statistics on all filesystems. 794 */ 795 int 796 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 797 { 798 struct freebsd11_statfs osb; 799 struct statfs *buf, *sp; 800 size_t count, size; 801 int error; 802 803 count = uap->bufsize / sizeof(struct ostatfs); 804 size = count * sizeof(struct statfs); 805 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 806 uap->mode); 807 if (error == 0) 808 td->td_retval[0] = count; 809 if (size > 0) { 810 sp = buf; 811 while (count > 0 && error == 0) { 812 freebsd11_cvtstatfs(sp, &osb); 813 error = copyout(&osb, uap->buf, sizeof(osb)); 814 sp++; 815 uap->buf++; 816 count--; 817 } 818 free(buf, M_STATFS); 819 } 820 return (error); 821 } 822 823 /* 824 * Implement fstatfs() for (NFS) file handles. 825 */ 826 int 827 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 828 { 829 struct freebsd11_statfs osb; 830 struct statfs *sfp; 831 fhandle_t fh; 832 int error; 833 834 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 835 if (error) 836 return (error); 837 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 838 error = kern_fhstatfs(td, fh, sfp); 839 if (error == 0) { 840 freebsd11_cvtstatfs(sfp, &osb); 841 error = copyout(&osb, uap->buf, sizeof(osb)); 842 } 843 free(sfp, M_STATFS); 844 return (error); 845 } 846 847 /* 848 * Convert a new format statfs structure to an old format statfs structure. 849 */ 850 static void 851 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 852 { 853 854 bzero(osp, sizeof(*osp)); 855 osp->f_version = FREEBSD11_STATFS_VERSION; 856 osp->f_type = nsp->f_type; 857 osp->f_flags = nsp->f_flags; 858 osp->f_bsize = nsp->f_bsize; 859 osp->f_iosize = nsp->f_iosize; 860 osp->f_blocks = nsp->f_blocks; 861 osp->f_bfree = nsp->f_bfree; 862 osp->f_bavail = nsp->f_bavail; 863 osp->f_files = nsp->f_files; 864 osp->f_ffree = nsp->f_ffree; 865 osp->f_syncwrites = nsp->f_syncwrites; 866 osp->f_asyncwrites = nsp->f_asyncwrites; 867 osp->f_syncreads = nsp->f_syncreads; 868 osp->f_asyncreads = nsp->f_asyncreads; 869 osp->f_namemax = nsp->f_namemax; 870 osp->f_owner = nsp->f_owner; 871 osp->f_fsid = nsp->f_fsid; 872 strlcpy(osp->f_fstypename, nsp->f_fstypename, 873 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 874 strlcpy(osp->f_mntonname, nsp->f_mntonname, 875 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 876 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 877 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 878 } 879 #endif /* COMPAT_FREEBSD11 */ 880 881 /* 882 * Change current working directory to a given file descriptor. 883 */ 884 #ifndef _SYS_SYSPROTO_H_ 885 struct fchdir_args { 886 int fd; 887 }; 888 #endif 889 int 890 sys_fchdir(struct thread *td, struct fchdir_args *uap) 891 { 892 struct vnode *vp, *tdp; 893 struct mount *mp; 894 struct file *fp; 895 int error; 896 897 AUDIT_ARG_FD(uap->fd); 898 error = getvnode(td, uap->fd, &cap_fchdir_rights, 899 &fp); 900 if (error != 0) 901 return (error); 902 vp = fp->f_vnode; 903 vrefact(vp); 904 fdrop(fp, td); 905 vn_lock(vp, LK_SHARED | LK_RETRY); 906 AUDIT_ARG_VNODE1(vp); 907 error = change_dir(vp, td); 908 while (!error && (mp = vp->v_mountedhere) != NULL) { 909 if (vfs_busy(mp, 0)) 910 continue; 911 error = VFS_ROOT(mp, LK_SHARED, &tdp); 912 vfs_unbusy(mp); 913 if (error != 0) 914 break; 915 vput(vp); 916 vp = tdp; 917 } 918 if (error != 0) { 919 vput(vp); 920 return (error); 921 } 922 VOP_UNLOCK(vp); 923 pwd_chdir(td, vp); 924 return (0); 925 } 926 927 /* 928 * Change current working directory (``.''). 929 */ 930 #ifndef _SYS_SYSPROTO_H_ 931 struct chdir_args { 932 char *path; 933 }; 934 #endif 935 int 936 sys_chdir(struct thread *td, struct chdir_args *uap) 937 { 938 939 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 940 } 941 942 int 943 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 944 { 945 struct nameidata nd; 946 int error; 947 948 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 949 pathseg, path, td); 950 if ((error = namei(&nd)) != 0) 951 return (error); 952 if ((error = change_dir(nd.ni_vp, td)) != 0) { 953 vput(nd.ni_vp); 954 NDFREE(&nd, NDF_ONLY_PNBUF); 955 return (error); 956 } 957 VOP_UNLOCK(nd.ni_vp); 958 NDFREE(&nd, NDF_ONLY_PNBUF); 959 pwd_chdir(td, nd.ni_vp); 960 return (0); 961 } 962 963 /* 964 * Change notion of root (``/'') directory. 965 */ 966 #ifndef _SYS_SYSPROTO_H_ 967 struct chroot_args { 968 char *path; 969 }; 970 #endif 971 int 972 sys_chroot(struct thread *td, struct chroot_args *uap) 973 { 974 struct nameidata nd; 975 int error; 976 977 error = priv_check(td, PRIV_VFS_CHROOT); 978 if (error != 0) 979 return (error); 980 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 981 UIO_USERSPACE, uap->path, td); 982 error = namei(&nd); 983 if (error != 0) 984 goto error; 985 error = change_dir(nd.ni_vp, td); 986 if (error != 0) 987 goto e_vunlock; 988 #ifdef MAC 989 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 990 if (error != 0) 991 goto e_vunlock; 992 #endif 993 VOP_UNLOCK(nd.ni_vp); 994 error = pwd_chroot(td, nd.ni_vp); 995 vrele(nd.ni_vp); 996 NDFREE(&nd, NDF_ONLY_PNBUF); 997 return (error); 998 e_vunlock: 999 vput(nd.ni_vp); 1000 error: 1001 NDFREE(&nd, NDF_ONLY_PNBUF); 1002 return (error); 1003 } 1004 1005 /* 1006 * Common routine for chroot and chdir. Callers must provide a locked vnode 1007 * instance. 1008 */ 1009 int 1010 change_dir(struct vnode *vp, struct thread *td) 1011 { 1012 #ifdef MAC 1013 int error; 1014 #endif 1015 1016 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1017 if (vp->v_type != VDIR) 1018 return (ENOTDIR); 1019 #ifdef MAC 1020 error = mac_vnode_check_chdir(td->td_ucred, vp); 1021 if (error != 0) 1022 return (error); 1023 #endif 1024 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1025 } 1026 1027 static __inline void 1028 flags_to_rights(int flags, cap_rights_t *rightsp) 1029 { 1030 1031 if (flags & O_EXEC) { 1032 cap_rights_set_one(rightsp, CAP_FEXECVE); 1033 } else { 1034 switch ((flags & O_ACCMODE)) { 1035 case O_RDONLY: 1036 cap_rights_set_one(rightsp, CAP_READ); 1037 break; 1038 case O_RDWR: 1039 cap_rights_set_one(rightsp, CAP_READ); 1040 /* FALLTHROUGH */ 1041 case O_WRONLY: 1042 cap_rights_set_one(rightsp, CAP_WRITE); 1043 if (!(flags & (O_APPEND | O_TRUNC))) 1044 cap_rights_set_one(rightsp, CAP_SEEK); 1045 break; 1046 } 1047 } 1048 1049 if (flags & O_CREAT) 1050 cap_rights_set_one(rightsp, CAP_CREATE); 1051 1052 if (flags & O_TRUNC) 1053 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1054 1055 if (flags & (O_SYNC | O_FSYNC)) 1056 cap_rights_set_one(rightsp, CAP_FSYNC); 1057 1058 if (flags & (O_EXLOCK | O_SHLOCK)) 1059 cap_rights_set_one(rightsp, CAP_FLOCK); 1060 } 1061 1062 /* 1063 * Check permissions, allocate an open file structure, and call the device 1064 * open routine if any. 1065 */ 1066 #ifndef _SYS_SYSPROTO_H_ 1067 struct open_args { 1068 char *path; 1069 int flags; 1070 int mode; 1071 }; 1072 #endif 1073 int 1074 sys_open(struct thread *td, struct open_args *uap) 1075 { 1076 1077 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1078 uap->flags, uap->mode)); 1079 } 1080 1081 #ifndef _SYS_SYSPROTO_H_ 1082 struct openat_args { 1083 int fd; 1084 char *path; 1085 int flag; 1086 int mode; 1087 }; 1088 #endif 1089 int 1090 sys_openat(struct thread *td, struct openat_args *uap) 1091 { 1092 1093 AUDIT_ARG_FD(uap->fd); 1094 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1095 uap->mode)); 1096 } 1097 1098 int 1099 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1100 int flags, int mode) 1101 { 1102 struct proc *p = td->td_proc; 1103 struct filedesc *fdp = p->p_fd; 1104 struct file *fp; 1105 struct vnode *vp; 1106 struct nameidata nd; 1107 cap_rights_t rights; 1108 int cmode, error, indx; 1109 1110 indx = -1; 1111 1112 AUDIT_ARG_FFLAGS(flags); 1113 AUDIT_ARG_MODE(mode); 1114 cap_rights_init_one(&rights, CAP_LOOKUP); 1115 flags_to_rights(flags, &rights); 1116 /* 1117 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1118 * may be specified. 1119 */ 1120 if (flags & O_EXEC) { 1121 if (flags & O_ACCMODE) 1122 return (EINVAL); 1123 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1124 return (EINVAL); 1125 } else { 1126 flags = FFLAGS(flags); 1127 } 1128 1129 /* 1130 * Allocate a file structure. The descriptor to reference it 1131 * is allocated and set by finstall() below. 1132 */ 1133 error = falloc_noinstall(td, &fp); 1134 if (error != 0) 1135 return (error); 1136 /* 1137 * An extra reference on `fp' has been held for us by 1138 * falloc_noinstall(). 1139 */ 1140 /* Set the flags early so the finit in devfs can pick them up. */ 1141 fp->f_flag = flags & FMASK; 1142 cmode = ((mode & ~fdp->fd_cmask) & ALLPERMS) & ~S_ISTXT; 1143 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1144 &rights, td); 1145 td->td_dupfd = -1; /* XXX check for fdopen */ 1146 error = vn_open(&nd, &flags, cmode, fp); 1147 if (error != 0) { 1148 /* 1149 * If the vn_open replaced the method vector, something 1150 * wonderous happened deep below and we just pass it up 1151 * pretending we know what we do. 1152 */ 1153 if (error == ENXIO && fp->f_ops != &badfileops) 1154 goto success; 1155 1156 /* 1157 * Handle special fdopen() case. bleh. 1158 * 1159 * Don't do this for relative (capability) lookups; we don't 1160 * understand exactly what would happen, and we don't think 1161 * that it ever should. 1162 */ 1163 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1164 (error == ENODEV || error == ENXIO) && 1165 td->td_dupfd >= 0) { 1166 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1167 &indx); 1168 if (error == 0) 1169 goto success; 1170 } 1171 1172 goto bad; 1173 } 1174 td->td_dupfd = 0; 1175 NDFREE(&nd, NDF_ONLY_PNBUF); 1176 vp = nd.ni_vp; 1177 1178 /* 1179 * Store the vnode, for any f_type. Typically, the vnode use 1180 * count is decremented by direct call to vn_closefile() for 1181 * files that switched type in the cdevsw fdopen() method. 1182 */ 1183 fp->f_vnode = vp; 1184 /* 1185 * If the file wasn't claimed by devfs bind it to the normal 1186 * vnode operations here. 1187 */ 1188 if (fp->f_ops == &badfileops) { 1189 KASSERT(vp->v_type != VFIFO, ("Unexpected fifo.")); 1190 finit_vnode(fp, flags, NULL, &vnops); 1191 } 1192 1193 VOP_UNLOCK(vp); 1194 if (flags & O_TRUNC) { 1195 error = fo_truncate(fp, 0, td->td_ucred, td); 1196 if (error != 0) 1197 goto bad; 1198 } 1199 success: 1200 /* 1201 * If we haven't already installed the FD (for dupfdopen), do so now. 1202 */ 1203 if (indx == -1) { 1204 struct filecaps *fcaps; 1205 1206 #ifdef CAPABILITIES 1207 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1208 fcaps = &nd.ni_filecaps; 1209 else 1210 #endif 1211 fcaps = NULL; 1212 error = finstall(td, fp, &indx, flags, fcaps); 1213 /* On success finstall() consumes fcaps. */ 1214 if (error != 0) { 1215 filecaps_free(&nd.ni_filecaps); 1216 goto bad; 1217 } 1218 } else { 1219 filecaps_free(&nd.ni_filecaps); 1220 } 1221 1222 /* 1223 * Release our private reference, leaving the one associated with 1224 * the descriptor table intact. 1225 */ 1226 fdrop(fp, td); 1227 td->td_retval[0] = indx; 1228 return (0); 1229 bad: 1230 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1231 fdrop(fp, td); 1232 return (error); 1233 } 1234 1235 #ifdef COMPAT_43 1236 /* 1237 * Create a file. 1238 */ 1239 #ifndef _SYS_SYSPROTO_H_ 1240 struct ocreat_args { 1241 char *path; 1242 int mode; 1243 }; 1244 #endif 1245 int 1246 ocreat(struct thread *td, struct ocreat_args *uap) 1247 { 1248 1249 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1250 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1251 } 1252 #endif /* COMPAT_43 */ 1253 1254 /* 1255 * Create a special file. 1256 */ 1257 #ifndef _SYS_SYSPROTO_H_ 1258 struct mknodat_args { 1259 int fd; 1260 char *path; 1261 mode_t mode; 1262 dev_t dev; 1263 }; 1264 #endif 1265 int 1266 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1267 { 1268 1269 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1270 uap->dev)); 1271 } 1272 1273 #if defined(COMPAT_FREEBSD11) 1274 int 1275 freebsd11_mknod(struct thread *td, 1276 struct freebsd11_mknod_args *uap) 1277 { 1278 1279 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1280 uap->mode, uap->dev)); 1281 } 1282 1283 int 1284 freebsd11_mknodat(struct thread *td, 1285 struct freebsd11_mknodat_args *uap) 1286 { 1287 1288 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1289 uap->dev)); 1290 } 1291 #endif /* COMPAT_FREEBSD11 */ 1292 1293 int 1294 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1295 int mode, dev_t dev) 1296 { 1297 struct vnode *vp; 1298 struct mount *mp; 1299 struct vattr vattr; 1300 struct nameidata nd; 1301 int error, whiteout = 0; 1302 1303 AUDIT_ARG_MODE(mode); 1304 AUDIT_ARG_DEV(dev); 1305 switch (mode & S_IFMT) { 1306 case S_IFCHR: 1307 case S_IFBLK: 1308 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1309 if (error == 0 && dev == VNOVAL) 1310 error = EINVAL; 1311 break; 1312 case S_IFWHT: 1313 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1314 break; 1315 case S_IFIFO: 1316 if (dev == 0) 1317 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1318 /* FALLTHROUGH */ 1319 default: 1320 error = EINVAL; 1321 break; 1322 } 1323 if (error != 0) 1324 return (error); 1325 restart: 1326 bwillwrite(); 1327 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1328 NOCACHE, pathseg, path, fd, &cap_mknodat_rights, 1329 td); 1330 if ((error = namei(&nd)) != 0) 1331 return (error); 1332 vp = nd.ni_vp; 1333 if (vp != NULL) { 1334 NDFREE(&nd, NDF_ONLY_PNBUF); 1335 if (vp == nd.ni_dvp) 1336 vrele(nd.ni_dvp); 1337 else 1338 vput(nd.ni_dvp); 1339 vrele(vp); 1340 return (EEXIST); 1341 } else { 1342 VATTR_NULL(&vattr); 1343 vattr.va_mode = (mode & ALLPERMS) & 1344 ~td->td_proc->p_fd->fd_cmask; 1345 vattr.va_rdev = dev; 1346 whiteout = 0; 1347 1348 switch (mode & S_IFMT) { 1349 case S_IFCHR: 1350 vattr.va_type = VCHR; 1351 break; 1352 case S_IFBLK: 1353 vattr.va_type = VBLK; 1354 break; 1355 case S_IFWHT: 1356 whiteout = 1; 1357 break; 1358 default: 1359 panic("kern_mknod: invalid mode"); 1360 } 1361 } 1362 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1363 NDFREE(&nd, NDF_ONLY_PNBUF); 1364 vput(nd.ni_dvp); 1365 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1366 return (error); 1367 goto restart; 1368 } 1369 #ifdef MAC 1370 if (error == 0 && !whiteout) 1371 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1372 &nd.ni_cnd, &vattr); 1373 #endif 1374 if (error == 0) { 1375 if (whiteout) 1376 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1377 else { 1378 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1379 &nd.ni_cnd, &vattr); 1380 if (error == 0) 1381 vput(nd.ni_vp); 1382 } 1383 } 1384 NDFREE(&nd, NDF_ONLY_PNBUF); 1385 vput(nd.ni_dvp); 1386 vn_finished_write(mp); 1387 if (error == ERELOOKUP) 1388 goto restart; 1389 return (error); 1390 } 1391 1392 /* 1393 * Create a named pipe. 1394 */ 1395 #ifndef _SYS_SYSPROTO_H_ 1396 struct mkfifo_args { 1397 char *path; 1398 int mode; 1399 }; 1400 #endif 1401 int 1402 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1403 { 1404 1405 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1406 uap->mode)); 1407 } 1408 1409 #ifndef _SYS_SYSPROTO_H_ 1410 struct mkfifoat_args { 1411 int fd; 1412 char *path; 1413 mode_t mode; 1414 }; 1415 #endif 1416 int 1417 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1418 { 1419 1420 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1421 uap->mode)); 1422 } 1423 1424 int 1425 kern_mkfifoat(struct thread *td, int fd, const char *path, 1426 enum uio_seg pathseg, int mode) 1427 { 1428 struct mount *mp; 1429 struct vattr vattr; 1430 struct nameidata nd; 1431 int error; 1432 1433 AUDIT_ARG_MODE(mode); 1434 restart: 1435 bwillwrite(); 1436 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1437 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights, 1438 td); 1439 if ((error = namei(&nd)) != 0) 1440 return (error); 1441 if (nd.ni_vp != NULL) { 1442 NDFREE(&nd, NDF_ONLY_PNBUF); 1443 if (nd.ni_vp == nd.ni_dvp) 1444 vrele(nd.ni_dvp); 1445 else 1446 vput(nd.ni_dvp); 1447 vrele(nd.ni_vp); 1448 return (EEXIST); 1449 } 1450 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1451 NDFREE(&nd, NDF_ONLY_PNBUF); 1452 vput(nd.ni_dvp); 1453 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1454 return (error); 1455 goto restart; 1456 } 1457 VATTR_NULL(&vattr); 1458 vattr.va_type = VFIFO; 1459 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_fd->fd_cmask; 1460 #ifdef MAC 1461 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1462 &vattr); 1463 if (error != 0) 1464 goto out; 1465 #endif 1466 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1467 if (error == 0) 1468 vput(nd.ni_vp); 1469 #ifdef MAC 1470 out: 1471 #endif 1472 vput(nd.ni_dvp); 1473 vn_finished_write(mp); 1474 NDFREE(&nd, NDF_ONLY_PNBUF); 1475 if (error == ERELOOKUP) 1476 goto restart; 1477 return (error); 1478 } 1479 1480 /* 1481 * Make a hard file link. 1482 */ 1483 #ifndef _SYS_SYSPROTO_H_ 1484 struct link_args { 1485 char *path; 1486 char *link; 1487 }; 1488 #endif 1489 int 1490 sys_link(struct thread *td, struct link_args *uap) 1491 { 1492 1493 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1494 UIO_USERSPACE, FOLLOW)); 1495 } 1496 1497 #ifndef _SYS_SYSPROTO_H_ 1498 struct linkat_args { 1499 int fd1; 1500 char *path1; 1501 int fd2; 1502 char *path2; 1503 int flag; 1504 }; 1505 #endif 1506 int 1507 sys_linkat(struct thread *td, struct linkat_args *uap) 1508 { 1509 int flag; 1510 1511 flag = uap->flag; 1512 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_BENEATH | 1513 AT_RESOLVE_BENEATH)) != 0) 1514 return (EINVAL); 1515 1516 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1517 UIO_USERSPACE, at2cnpflags(flag, AT_SYMLINK_FOLLOW | AT_BENEATH | 1518 AT_RESOLVE_BENEATH))); 1519 } 1520 1521 int hardlink_check_uid = 0; 1522 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1523 &hardlink_check_uid, 0, 1524 "Unprivileged processes cannot create hard links to files owned by other " 1525 "users"); 1526 static int hardlink_check_gid = 0; 1527 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1528 &hardlink_check_gid, 0, 1529 "Unprivileged processes cannot create hard links to files owned by other " 1530 "groups"); 1531 1532 static int 1533 can_hardlink(struct vnode *vp, struct ucred *cred) 1534 { 1535 struct vattr va; 1536 int error; 1537 1538 if (!hardlink_check_uid && !hardlink_check_gid) 1539 return (0); 1540 1541 error = VOP_GETATTR(vp, &va, cred); 1542 if (error != 0) 1543 return (error); 1544 1545 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1546 error = priv_check_cred(cred, PRIV_VFS_LINK); 1547 if (error != 0) 1548 return (error); 1549 } 1550 1551 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1552 error = priv_check_cred(cred, PRIV_VFS_LINK); 1553 if (error != 0) 1554 return (error); 1555 } 1556 1557 return (0); 1558 } 1559 1560 int 1561 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1562 const char *path2, enum uio_seg segflag, int follow) 1563 { 1564 struct nameidata nd; 1565 int error; 1566 1567 do { 1568 bwillwrite(); 1569 NDINIT_ATRIGHTS(&nd, LOOKUP, follow | AUDITVNODE1, segflag, 1570 path1, fd1, &cap_linkat_source_rights, td); 1571 if ((error = namei(&nd)) != 0) 1572 return (error); 1573 NDFREE(&nd, NDF_ONLY_PNBUF); 1574 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1575 } while (error == EAGAIN || error == ERELOOKUP); 1576 return (error); 1577 } 1578 1579 static int 1580 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1581 enum uio_seg segflag) 1582 { 1583 struct nameidata nd; 1584 struct mount *mp; 1585 int error; 1586 1587 if (vp->v_type == VDIR) { 1588 vrele(vp); 1589 return (EPERM); /* POSIX */ 1590 } 1591 NDINIT_ATRIGHTS(&nd, CREATE, 1592 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1593 &cap_linkat_target_rights, td); 1594 if ((error = namei(&nd)) == 0) { 1595 if (nd.ni_vp != NULL) { 1596 NDFREE(&nd, NDF_ONLY_PNBUF); 1597 if (nd.ni_dvp == nd.ni_vp) 1598 vrele(nd.ni_dvp); 1599 else 1600 vput(nd.ni_dvp); 1601 vrele(nd.ni_vp); 1602 vrele(vp); 1603 return (EEXIST); 1604 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1605 /* 1606 * Cross-device link. No need to recheck 1607 * vp->v_type, since it cannot change, except 1608 * to VBAD. 1609 */ 1610 NDFREE(&nd, NDF_ONLY_PNBUF); 1611 vput(nd.ni_dvp); 1612 vrele(vp); 1613 return (EXDEV); 1614 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1615 error = can_hardlink(vp, td->td_ucred); 1616 #ifdef MAC 1617 if (error == 0) 1618 error = mac_vnode_check_link(td->td_ucred, 1619 nd.ni_dvp, vp, &nd.ni_cnd); 1620 #endif 1621 if (error != 0) { 1622 vput(vp); 1623 vput(nd.ni_dvp); 1624 NDFREE(&nd, NDF_ONLY_PNBUF); 1625 return (error); 1626 } 1627 error = vn_start_write(vp, &mp, V_NOWAIT); 1628 if (error != 0) { 1629 vput(vp); 1630 vput(nd.ni_dvp); 1631 NDFREE(&nd, NDF_ONLY_PNBUF); 1632 error = vn_start_write(NULL, &mp, 1633 V_XSLEEP | PCATCH); 1634 if (error != 0) 1635 return (error); 1636 return (EAGAIN); 1637 } 1638 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1639 VOP_UNLOCK(vp); 1640 vput(nd.ni_dvp); 1641 vn_finished_write(mp); 1642 NDFREE(&nd, NDF_ONLY_PNBUF); 1643 } else { 1644 vput(nd.ni_dvp); 1645 NDFREE(&nd, NDF_ONLY_PNBUF); 1646 vrele(vp); 1647 return (EAGAIN); 1648 } 1649 } 1650 vrele(vp); 1651 return (error); 1652 } 1653 1654 /* 1655 * Make a symbolic link. 1656 */ 1657 #ifndef _SYS_SYSPROTO_H_ 1658 struct symlink_args { 1659 char *path; 1660 char *link; 1661 }; 1662 #endif 1663 int 1664 sys_symlink(struct thread *td, struct symlink_args *uap) 1665 { 1666 1667 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1668 UIO_USERSPACE)); 1669 } 1670 1671 #ifndef _SYS_SYSPROTO_H_ 1672 struct symlinkat_args { 1673 char *path; 1674 int fd; 1675 char *path2; 1676 }; 1677 #endif 1678 int 1679 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1680 { 1681 1682 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1683 UIO_USERSPACE)); 1684 } 1685 1686 int 1687 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1688 enum uio_seg segflg) 1689 { 1690 struct mount *mp; 1691 struct vattr vattr; 1692 const char *syspath; 1693 char *tmppath; 1694 struct nameidata nd; 1695 int error; 1696 1697 if (segflg == UIO_SYSSPACE) { 1698 syspath = path1; 1699 } else { 1700 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1701 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1702 goto out; 1703 syspath = tmppath; 1704 } 1705 AUDIT_ARG_TEXT(syspath); 1706 restart: 1707 bwillwrite(); 1708 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1709 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights, 1710 td); 1711 if ((error = namei(&nd)) != 0) 1712 goto out; 1713 if (nd.ni_vp) { 1714 NDFREE(&nd, NDF_ONLY_PNBUF); 1715 if (nd.ni_vp == nd.ni_dvp) 1716 vrele(nd.ni_dvp); 1717 else 1718 vput(nd.ni_dvp); 1719 vrele(nd.ni_vp); 1720 error = EEXIST; 1721 goto out; 1722 } 1723 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1724 NDFREE(&nd, NDF_ONLY_PNBUF); 1725 vput(nd.ni_dvp); 1726 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1727 goto out; 1728 goto restart; 1729 } 1730 VATTR_NULL(&vattr); 1731 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; 1732 #ifdef MAC 1733 vattr.va_type = VLNK; 1734 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1735 &vattr); 1736 if (error != 0) 1737 goto out2; 1738 #endif 1739 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1740 if (error == 0) 1741 vput(nd.ni_vp); 1742 #ifdef MAC 1743 out2: 1744 #endif 1745 NDFREE(&nd, NDF_ONLY_PNBUF); 1746 vput(nd.ni_dvp); 1747 vn_finished_write(mp); 1748 if (error == ERELOOKUP) 1749 goto restart; 1750 out: 1751 if (segflg != UIO_SYSSPACE) 1752 uma_zfree(namei_zone, tmppath); 1753 return (error); 1754 } 1755 1756 /* 1757 * Delete a whiteout from the filesystem. 1758 */ 1759 #ifndef _SYS_SYSPROTO_H_ 1760 struct undelete_args { 1761 char *path; 1762 }; 1763 #endif 1764 int 1765 sys_undelete(struct thread *td, struct undelete_args *uap) 1766 { 1767 struct mount *mp; 1768 struct nameidata nd; 1769 int error; 1770 1771 restart: 1772 bwillwrite(); 1773 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1774 UIO_USERSPACE, uap->path, td); 1775 error = namei(&nd); 1776 if (error != 0) 1777 return (error); 1778 1779 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1780 NDFREE(&nd, NDF_ONLY_PNBUF); 1781 if (nd.ni_vp == nd.ni_dvp) 1782 vrele(nd.ni_dvp); 1783 else 1784 vput(nd.ni_dvp); 1785 if (nd.ni_vp) 1786 vrele(nd.ni_vp); 1787 return (EEXIST); 1788 } 1789 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1790 NDFREE(&nd, NDF_ONLY_PNBUF); 1791 vput(nd.ni_dvp); 1792 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1793 return (error); 1794 goto restart; 1795 } 1796 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1797 NDFREE(&nd, NDF_ONLY_PNBUF); 1798 vput(nd.ni_dvp); 1799 vn_finished_write(mp); 1800 if (error == ERELOOKUP) 1801 goto restart; 1802 return (error); 1803 } 1804 1805 /* 1806 * Delete a name from the filesystem. 1807 */ 1808 #ifndef _SYS_SYSPROTO_H_ 1809 struct unlink_args { 1810 char *path; 1811 }; 1812 #endif 1813 int 1814 sys_unlink(struct thread *td, struct unlink_args *uap) 1815 { 1816 1817 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1818 0, 0)); 1819 } 1820 1821 static int 1822 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1823 int flag, enum uio_seg pathseg, ino_t oldinum) 1824 { 1825 1826 if ((flag & ~AT_REMOVEDIR) != 0) 1827 return (EINVAL); 1828 1829 if ((flag & AT_REMOVEDIR) != 0) 1830 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1831 1832 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1833 } 1834 1835 #ifndef _SYS_SYSPROTO_H_ 1836 struct unlinkat_args { 1837 int fd; 1838 char *path; 1839 int flag; 1840 }; 1841 #endif 1842 int 1843 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1844 { 1845 1846 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1847 UIO_USERSPACE, 0)); 1848 } 1849 1850 #ifndef _SYS_SYSPROTO_H_ 1851 struct funlinkat_args { 1852 int dfd; 1853 const char *path; 1854 int fd; 1855 int flag; 1856 }; 1857 #endif 1858 int 1859 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1860 { 1861 1862 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1863 UIO_USERSPACE, 0)); 1864 } 1865 1866 int 1867 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1868 enum uio_seg pathseg, int flag, ino_t oldinum) 1869 { 1870 struct mount *mp; 1871 struct file *fp; 1872 struct vnode *vp; 1873 struct nameidata nd; 1874 struct stat sb; 1875 int error; 1876 1877 fp = NULL; 1878 if (fd != FD_NONE) { 1879 error = getvnode(td, fd, &cap_no_rights, &fp); 1880 if (error != 0) 1881 return (error); 1882 } 1883 1884 restart: 1885 bwillwrite(); 1886 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1887 at2cnpflags(flag, AT_BENEATH | AT_RESOLVE_BENEATH), 1888 pathseg, path, dfd, &cap_unlinkat_rights, td); 1889 if ((error = namei(&nd)) != 0) { 1890 if (error == EINVAL) 1891 error = EPERM; 1892 goto fdout; 1893 } 1894 vp = nd.ni_vp; 1895 if (vp->v_type == VDIR && oldinum == 0) { 1896 error = EPERM; /* POSIX */ 1897 } else if (oldinum != 0 && 1898 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED, td)) == 0) && 1899 sb.st_ino != oldinum) { 1900 error = EIDRM; /* Identifier removed */ 1901 } else if (fp != NULL && fp->f_vnode != vp) { 1902 if (VN_IS_DOOMED(fp->f_vnode)) 1903 error = EBADF; 1904 else 1905 error = EDEADLK; 1906 } else { 1907 /* 1908 * The root of a mounted filesystem cannot be deleted. 1909 * 1910 * XXX: can this only be a VDIR case? 1911 */ 1912 if (vp->v_vflag & VV_ROOT) 1913 error = EBUSY; 1914 } 1915 if (error == 0) { 1916 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1917 NDFREE(&nd, NDF_ONLY_PNBUF); 1918 vput(nd.ni_dvp); 1919 if (vp == nd.ni_dvp) 1920 vrele(vp); 1921 else 1922 vput(vp); 1923 if ((error = vn_start_write(NULL, &mp, 1924 V_XSLEEP | PCATCH)) != 0) { 1925 goto fdout; 1926 } 1927 goto restart; 1928 } 1929 #ifdef MAC 1930 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1931 &nd.ni_cnd); 1932 if (error != 0) 1933 goto out; 1934 #endif 1935 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1936 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1937 #ifdef MAC 1938 out: 1939 #endif 1940 vn_finished_write(mp); 1941 } 1942 NDFREE(&nd, NDF_ONLY_PNBUF); 1943 vput(nd.ni_dvp); 1944 if (vp == nd.ni_dvp) 1945 vrele(vp); 1946 else 1947 vput(vp); 1948 if (error == ERELOOKUP) 1949 goto restart; 1950 fdout: 1951 if (fp != NULL) 1952 fdrop(fp, td); 1953 return (error); 1954 } 1955 1956 /* 1957 * Reposition read/write file offset. 1958 */ 1959 #ifndef _SYS_SYSPROTO_H_ 1960 struct lseek_args { 1961 int fd; 1962 int pad; 1963 off_t offset; 1964 int whence; 1965 }; 1966 #endif 1967 int 1968 sys_lseek(struct thread *td, struct lseek_args *uap) 1969 { 1970 1971 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 1972 } 1973 1974 int 1975 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 1976 { 1977 struct file *fp; 1978 int error; 1979 1980 AUDIT_ARG_FD(fd); 1981 error = fget(td, fd, &cap_seek_rights, &fp); 1982 if (error != 0) 1983 return (error); 1984 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 1985 fo_seek(fp, offset, whence, td) : ESPIPE; 1986 fdrop(fp, td); 1987 return (error); 1988 } 1989 1990 #if defined(COMPAT_43) 1991 /* 1992 * Reposition read/write file offset. 1993 */ 1994 #ifndef _SYS_SYSPROTO_H_ 1995 struct olseek_args { 1996 int fd; 1997 long offset; 1998 int whence; 1999 }; 2000 #endif 2001 int 2002 olseek(struct thread *td, struct olseek_args *uap) 2003 { 2004 2005 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2006 } 2007 #endif /* COMPAT_43 */ 2008 2009 #if defined(COMPAT_FREEBSD6) 2010 /* Version with the 'pad' argument */ 2011 int 2012 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2013 { 2014 2015 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2016 } 2017 #endif 2018 2019 /* 2020 * Check access permissions using passed credentials. 2021 */ 2022 static int 2023 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2024 struct thread *td) 2025 { 2026 accmode_t accmode; 2027 int error; 2028 2029 /* Flags == 0 means only check for existence. */ 2030 if (user_flags == 0) 2031 return (0); 2032 2033 accmode = 0; 2034 if (user_flags & R_OK) 2035 accmode |= VREAD; 2036 if (user_flags & W_OK) 2037 accmode |= VWRITE; 2038 if (user_flags & X_OK) 2039 accmode |= VEXEC; 2040 #ifdef MAC 2041 error = mac_vnode_check_access(cred, vp, accmode); 2042 if (error != 0) 2043 return (error); 2044 #endif 2045 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2046 error = VOP_ACCESS(vp, accmode, cred, td); 2047 return (error); 2048 } 2049 2050 /* 2051 * Check access permissions using "real" credentials. 2052 */ 2053 #ifndef _SYS_SYSPROTO_H_ 2054 struct access_args { 2055 char *path; 2056 int amode; 2057 }; 2058 #endif 2059 int 2060 sys_access(struct thread *td, struct access_args *uap) 2061 { 2062 2063 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2064 0, uap->amode)); 2065 } 2066 2067 #ifndef _SYS_SYSPROTO_H_ 2068 struct faccessat_args { 2069 int dirfd; 2070 char *path; 2071 int amode; 2072 int flag; 2073 } 2074 #endif 2075 int 2076 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2077 { 2078 2079 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2080 uap->amode)); 2081 } 2082 2083 int 2084 kern_accessat(struct thread *td, int fd, const char *path, 2085 enum uio_seg pathseg, int flag, int amode) 2086 { 2087 struct ucred *cred, *usecred; 2088 struct vnode *vp; 2089 struct nameidata nd; 2090 int error; 2091 2092 if ((flag & ~(AT_EACCESS | AT_BENEATH | AT_RESOLVE_BENEATH)) != 0) 2093 return (EINVAL); 2094 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2095 return (EINVAL); 2096 2097 /* 2098 * Create and modify a temporary credential instead of one that 2099 * is potentially shared (if we need one). 2100 */ 2101 cred = td->td_ucred; 2102 if ((flag & AT_EACCESS) == 0 && 2103 ((cred->cr_uid != cred->cr_ruid || 2104 cred->cr_rgid != cred->cr_groups[0]))) { 2105 usecred = crdup(cred); 2106 usecred->cr_uid = cred->cr_ruid; 2107 usecred->cr_groups[0] = cred->cr_rgid; 2108 td->td_ucred = usecred; 2109 } else 2110 usecred = cred; 2111 AUDIT_ARG_VALUE(amode); 2112 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2113 AUDITVNODE1 | at2cnpflags(flag, AT_BENEATH | AT_RESOLVE_BENEATH), 2114 pathseg, path, fd, &cap_fstat_rights, td); 2115 if ((error = namei(&nd)) != 0) 2116 goto out; 2117 vp = nd.ni_vp; 2118 2119 error = vn_access(vp, amode, usecred, td); 2120 NDFREE(&nd, NDF_ONLY_PNBUF); 2121 vput(vp); 2122 out: 2123 if (usecred != cred) { 2124 td->td_ucred = cred; 2125 crfree(usecred); 2126 } 2127 return (error); 2128 } 2129 2130 /* 2131 * Check access permissions using "effective" credentials. 2132 */ 2133 #ifndef _SYS_SYSPROTO_H_ 2134 struct eaccess_args { 2135 char *path; 2136 int amode; 2137 }; 2138 #endif 2139 int 2140 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2141 { 2142 2143 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2144 AT_EACCESS, uap->amode)); 2145 } 2146 2147 #if defined(COMPAT_43) 2148 /* 2149 * Get file status; this version follows links. 2150 */ 2151 #ifndef _SYS_SYSPROTO_H_ 2152 struct ostat_args { 2153 char *path; 2154 struct ostat *ub; 2155 }; 2156 #endif 2157 int 2158 ostat(struct thread *td, struct ostat_args *uap) 2159 { 2160 struct stat sb; 2161 struct ostat osb; 2162 int error; 2163 2164 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2165 &sb, NULL); 2166 if (error != 0) 2167 return (error); 2168 cvtstat(&sb, &osb); 2169 return (copyout(&osb, uap->ub, sizeof (osb))); 2170 } 2171 2172 /* 2173 * Get file status; this version does not follow links. 2174 */ 2175 #ifndef _SYS_SYSPROTO_H_ 2176 struct olstat_args { 2177 char *path; 2178 struct ostat *ub; 2179 }; 2180 #endif 2181 int 2182 olstat(struct thread *td, struct olstat_args *uap) 2183 { 2184 struct stat sb; 2185 struct ostat osb; 2186 int error; 2187 2188 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2189 UIO_USERSPACE, &sb, NULL); 2190 if (error != 0) 2191 return (error); 2192 cvtstat(&sb, &osb); 2193 return (copyout(&osb, uap->ub, sizeof (osb))); 2194 } 2195 2196 /* 2197 * Convert from an old to a new stat structure. 2198 * XXX: many values are blindly truncated. 2199 */ 2200 void 2201 cvtstat(struct stat *st, struct ostat *ost) 2202 { 2203 2204 bzero(ost, sizeof(*ost)); 2205 ost->st_dev = st->st_dev; 2206 ost->st_ino = st->st_ino; 2207 ost->st_mode = st->st_mode; 2208 ost->st_nlink = st->st_nlink; 2209 ost->st_uid = st->st_uid; 2210 ost->st_gid = st->st_gid; 2211 ost->st_rdev = st->st_rdev; 2212 ost->st_size = MIN(st->st_size, INT32_MAX); 2213 ost->st_atim = st->st_atim; 2214 ost->st_mtim = st->st_mtim; 2215 ost->st_ctim = st->st_ctim; 2216 ost->st_blksize = st->st_blksize; 2217 ost->st_blocks = st->st_blocks; 2218 ost->st_flags = st->st_flags; 2219 ost->st_gen = st->st_gen; 2220 } 2221 #endif /* COMPAT_43 */ 2222 2223 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2224 int ino64_trunc_error; 2225 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2226 &ino64_trunc_error, 0, 2227 "Error on truncation of device, file or inode number, or link count"); 2228 2229 int 2230 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2231 { 2232 2233 ost->st_dev = st->st_dev; 2234 if (ost->st_dev != st->st_dev) { 2235 switch (ino64_trunc_error) { 2236 default: 2237 /* 2238 * Since dev_t is almost raw, don't clamp to the 2239 * maximum for case 2, but ignore the error. 2240 */ 2241 break; 2242 case 1: 2243 return (EOVERFLOW); 2244 } 2245 } 2246 ost->st_ino = st->st_ino; 2247 if (ost->st_ino != st->st_ino) { 2248 switch (ino64_trunc_error) { 2249 default: 2250 case 0: 2251 break; 2252 case 1: 2253 return (EOVERFLOW); 2254 case 2: 2255 ost->st_ino = UINT32_MAX; 2256 break; 2257 } 2258 } 2259 ost->st_mode = st->st_mode; 2260 ost->st_nlink = st->st_nlink; 2261 if (ost->st_nlink != st->st_nlink) { 2262 switch (ino64_trunc_error) { 2263 default: 2264 case 0: 2265 break; 2266 case 1: 2267 return (EOVERFLOW); 2268 case 2: 2269 ost->st_nlink = UINT16_MAX; 2270 break; 2271 } 2272 } 2273 ost->st_uid = st->st_uid; 2274 ost->st_gid = st->st_gid; 2275 ost->st_rdev = st->st_rdev; 2276 if (ost->st_rdev != st->st_rdev) { 2277 switch (ino64_trunc_error) { 2278 default: 2279 break; 2280 case 1: 2281 return (EOVERFLOW); 2282 } 2283 } 2284 ost->st_atim = st->st_atim; 2285 ost->st_mtim = st->st_mtim; 2286 ost->st_ctim = st->st_ctim; 2287 ost->st_size = st->st_size; 2288 ost->st_blocks = st->st_blocks; 2289 ost->st_blksize = st->st_blksize; 2290 ost->st_flags = st->st_flags; 2291 ost->st_gen = st->st_gen; 2292 ost->st_lspare = 0; 2293 ost->st_birthtim = st->st_birthtim; 2294 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2295 sizeof(*ost) - offsetof(struct freebsd11_stat, 2296 st_birthtim) - sizeof(ost->st_birthtim)); 2297 return (0); 2298 } 2299 2300 int 2301 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2302 { 2303 struct stat sb; 2304 struct freebsd11_stat osb; 2305 int error; 2306 2307 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2308 &sb, NULL); 2309 if (error != 0) 2310 return (error); 2311 error = freebsd11_cvtstat(&sb, &osb); 2312 if (error == 0) 2313 error = copyout(&osb, uap->ub, sizeof(osb)); 2314 return (error); 2315 } 2316 2317 int 2318 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2319 { 2320 struct stat sb; 2321 struct freebsd11_stat osb; 2322 int error; 2323 2324 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2325 UIO_USERSPACE, &sb, NULL); 2326 if (error != 0) 2327 return (error); 2328 error = freebsd11_cvtstat(&sb, &osb); 2329 if (error == 0) 2330 error = copyout(&osb, uap->ub, sizeof(osb)); 2331 return (error); 2332 } 2333 2334 int 2335 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2336 { 2337 struct fhandle fh; 2338 struct stat sb; 2339 struct freebsd11_stat osb; 2340 int error; 2341 2342 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2343 if (error != 0) 2344 return (error); 2345 error = kern_fhstat(td, fh, &sb); 2346 if (error != 0) 2347 return (error); 2348 error = freebsd11_cvtstat(&sb, &osb); 2349 if (error == 0) 2350 error = copyout(&osb, uap->sb, sizeof(osb)); 2351 return (error); 2352 } 2353 2354 int 2355 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2356 { 2357 struct stat sb; 2358 struct freebsd11_stat osb; 2359 int error; 2360 2361 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2362 UIO_USERSPACE, &sb, NULL); 2363 if (error != 0) 2364 return (error); 2365 error = freebsd11_cvtstat(&sb, &osb); 2366 if (error == 0) 2367 error = copyout(&osb, uap->buf, sizeof(osb)); 2368 return (error); 2369 } 2370 #endif /* COMPAT_FREEBSD11 */ 2371 2372 /* 2373 * Get file status 2374 */ 2375 #ifndef _SYS_SYSPROTO_H_ 2376 struct fstatat_args { 2377 int fd; 2378 char *path; 2379 struct stat *buf; 2380 int flag; 2381 } 2382 #endif 2383 int 2384 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2385 { 2386 struct stat sb; 2387 int error; 2388 2389 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2390 UIO_USERSPACE, &sb, NULL); 2391 if (error == 0) 2392 error = copyout(&sb, uap->buf, sizeof (sb)); 2393 return (error); 2394 } 2395 2396 int 2397 kern_statat(struct thread *td, int flag, int fd, const char *path, 2398 enum uio_seg pathseg, struct stat *sbp, 2399 void (*hook)(struct vnode *vp, struct stat *sbp)) 2400 { 2401 struct nameidata nd; 2402 int error; 2403 2404 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH | 2405 AT_RESOLVE_BENEATH)) != 0) 2406 return (EINVAL); 2407 2408 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_BENEATH | 2409 AT_RESOLVE_BENEATH | AT_SYMLINK_NOFOLLOW) | LOCKSHARED | LOCKLEAF | 2410 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights, td); 2411 2412 if ((error = namei(&nd)) != 0) 2413 return (error); 2414 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED, td); 2415 if (error == 0) { 2416 if (__predict_false(hook != NULL)) 2417 hook(nd.ni_vp, sbp); 2418 } 2419 NDFREE(&nd, NDF_ONLY_PNBUF); 2420 vput(nd.ni_vp); 2421 #ifdef __STAT_TIME_T_EXT 2422 sbp->st_atim_ext = 0; 2423 sbp->st_mtim_ext = 0; 2424 sbp->st_ctim_ext = 0; 2425 sbp->st_btim_ext = 0; 2426 #endif 2427 #ifdef KTRACE 2428 if (KTRPOINT(td, KTR_STRUCT)) 2429 ktrstat_error(sbp, error); 2430 #endif 2431 return (error); 2432 } 2433 2434 #if defined(COMPAT_FREEBSD11) 2435 /* 2436 * Implementation of the NetBSD [l]stat() functions. 2437 */ 2438 void 2439 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2440 { 2441 2442 bzero(nsb, sizeof(*nsb)); 2443 nsb->st_dev = sb->st_dev; 2444 nsb->st_ino = sb->st_ino; 2445 nsb->st_mode = sb->st_mode; 2446 nsb->st_nlink = sb->st_nlink; 2447 nsb->st_uid = sb->st_uid; 2448 nsb->st_gid = sb->st_gid; 2449 nsb->st_rdev = sb->st_rdev; 2450 nsb->st_atim = sb->st_atim; 2451 nsb->st_mtim = sb->st_mtim; 2452 nsb->st_ctim = sb->st_ctim; 2453 nsb->st_size = sb->st_size; 2454 nsb->st_blocks = sb->st_blocks; 2455 nsb->st_blksize = sb->st_blksize; 2456 nsb->st_flags = sb->st_flags; 2457 nsb->st_gen = sb->st_gen; 2458 nsb->st_birthtim = sb->st_birthtim; 2459 } 2460 2461 #ifndef _SYS_SYSPROTO_H_ 2462 struct freebsd11_nstat_args { 2463 char *path; 2464 struct nstat *ub; 2465 }; 2466 #endif 2467 int 2468 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2469 { 2470 struct stat sb; 2471 struct nstat nsb; 2472 int error; 2473 2474 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2475 &sb, NULL); 2476 if (error != 0) 2477 return (error); 2478 freebsd11_cvtnstat(&sb, &nsb); 2479 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2480 } 2481 2482 /* 2483 * NetBSD lstat. Get file status; this version does not follow links. 2484 */ 2485 #ifndef _SYS_SYSPROTO_H_ 2486 struct freebsd11_nlstat_args { 2487 char *path; 2488 struct nstat *ub; 2489 }; 2490 #endif 2491 int 2492 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2493 { 2494 struct stat sb; 2495 struct nstat nsb; 2496 int error; 2497 2498 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2499 UIO_USERSPACE, &sb, NULL); 2500 if (error != 0) 2501 return (error); 2502 freebsd11_cvtnstat(&sb, &nsb); 2503 return (copyout(&nsb, uap->ub, sizeof (nsb))); 2504 } 2505 #endif /* COMPAT_FREEBSD11 */ 2506 2507 /* 2508 * Get configurable pathname variables. 2509 */ 2510 #ifndef _SYS_SYSPROTO_H_ 2511 struct pathconf_args { 2512 char *path; 2513 int name; 2514 }; 2515 #endif 2516 int 2517 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2518 { 2519 long value; 2520 int error; 2521 2522 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2523 &value); 2524 if (error == 0) 2525 td->td_retval[0] = value; 2526 return (error); 2527 } 2528 2529 #ifndef _SYS_SYSPROTO_H_ 2530 struct lpathconf_args { 2531 char *path; 2532 int name; 2533 }; 2534 #endif 2535 int 2536 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2537 { 2538 long value; 2539 int error; 2540 2541 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2542 NOFOLLOW, &value); 2543 if (error == 0) 2544 td->td_retval[0] = value; 2545 return (error); 2546 } 2547 2548 int 2549 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2550 int name, u_long flags, long *valuep) 2551 { 2552 struct nameidata nd; 2553 int error; 2554 2555 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2556 pathseg, path, td); 2557 if ((error = namei(&nd)) != 0) 2558 return (error); 2559 NDFREE(&nd, NDF_ONLY_PNBUF); 2560 2561 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2562 vput(nd.ni_vp); 2563 return (error); 2564 } 2565 2566 /* 2567 * Return target name of a symbolic link. 2568 */ 2569 #ifndef _SYS_SYSPROTO_H_ 2570 struct readlink_args { 2571 char *path; 2572 char *buf; 2573 size_t count; 2574 }; 2575 #endif 2576 int 2577 sys_readlink(struct thread *td, struct readlink_args *uap) 2578 { 2579 2580 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2581 uap->buf, UIO_USERSPACE, uap->count)); 2582 } 2583 #ifndef _SYS_SYSPROTO_H_ 2584 struct readlinkat_args { 2585 int fd; 2586 char *path; 2587 char *buf; 2588 size_t bufsize; 2589 }; 2590 #endif 2591 int 2592 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2593 { 2594 2595 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2596 uap->buf, UIO_USERSPACE, uap->bufsize)); 2597 } 2598 2599 int 2600 kern_readlinkat(struct thread *td, int fd, const char *path, 2601 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2602 { 2603 struct vnode *vp; 2604 struct nameidata nd; 2605 int error; 2606 2607 if (count > IOSIZE_MAX) 2608 return (EINVAL); 2609 2610 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 2611 pathseg, path, fd, td); 2612 2613 if ((error = namei(&nd)) != 0) 2614 return (error); 2615 NDFREE(&nd, NDF_ONLY_PNBUF); 2616 vp = nd.ni_vp; 2617 2618 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2619 vput(vp); 2620 2621 return (error); 2622 } 2623 2624 /* 2625 * Helper function to readlink from a vnode 2626 */ 2627 static int 2628 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2629 struct thread *td) 2630 { 2631 struct iovec aiov; 2632 struct uio auio; 2633 int error; 2634 2635 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2636 #ifdef MAC 2637 error = mac_vnode_check_readlink(td->td_ucred, vp); 2638 if (error != 0) 2639 return (error); 2640 #endif 2641 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2642 return (EINVAL); 2643 2644 aiov.iov_base = buf; 2645 aiov.iov_len = count; 2646 auio.uio_iov = &aiov; 2647 auio.uio_iovcnt = 1; 2648 auio.uio_offset = 0; 2649 auio.uio_rw = UIO_READ; 2650 auio.uio_segflg = bufseg; 2651 auio.uio_td = td; 2652 auio.uio_resid = count; 2653 error = VOP_READLINK(vp, &auio, td->td_ucred); 2654 td->td_retval[0] = count - auio.uio_resid; 2655 return (error); 2656 } 2657 2658 /* 2659 * Common implementation code for chflags() and fchflags(). 2660 */ 2661 static int 2662 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2663 { 2664 struct mount *mp; 2665 struct vattr vattr; 2666 int error; 2667 2668 /* We can't support the value matching VNOVAL. */ 2669 if (flags == VNOVAL) 2670 return (EOPNOTSUPP); 2671 2672 /* 2673 * Prevent non-root users from setting flags on devices. When 2674 * a device is reused, users can retain ownership of the device 2675 * if they are allowed to set flags and programs assume that 2676 * chown can't fail when done as root. 2677 */ 2678 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2679 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2680 if (error != 0) 2681 return (error); 2682 } 2683 2684 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2685 return (error); 2686 VATTR_NULL(&vattr); 2687 vattr.va_flags = flags; 2688 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2689 #ifdef MAC 2690 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2691 if (error == 0) 2692 #endif 2693 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2694 VOP_UNLOCK(vp); 2695 vn_finished_write(mp); 2696 return (error); 2697 } 2698 2699 /* 2700 * Change flags of a file given a path name. 2701 */ 2702 #ifndef _SYS_SYSPROTO_H_ 2703 struct chflags_args { 2704 const char *path; 2705 u_long flags; 2706 }; 2707 #endif 2708 int 2709 sys_chflags(struct thread *td, struct chflags_args *uap) 2710 { 2711 2712 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2713 uap->flags, 0)); 2714 } 2715 2716 #ifndef _SYS_SYSPROTO_H_ 2717 struct chflagsat_args { 2718 int fd; 2719 const char *path; 2720 u_long flags; 2721 int atflag; 2722 } 2723 #endif 2724 int 2725 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2726 { 2727 2728 if ((uap->atflag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH | 2729 AT_RESOLVE_BENEATH)) != 0) 2730 return (EINVAL); 2731 2732 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2733 uap->flags, uap->atflag)); 2734 } 2735 2736 /* 2737 * Same as chflags() but doesn't follow symlinks. 2738 */ 2739 #ifndef _SYS_SYSPROTO_H_ 2740 struct lchflags_args { 2741 const char *path; 2742 u_long flags; 2743 }; 2744 #endif 2745 int 2746 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2747 { 2748 2749 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2750 uap->flags, AT_SYMLINK_NOFOLLOW)); 2751 } 2752 2753 static int 2754 kern_chflagsat(struct thread *td, int fd, const char *path, 2755 enum uio_seg pathseg, u_long flags, int atflag) 2756 { 2757 struct nameidata nd; 2758 int error; 2759 2760 AUDIT_ARG_FFLAGS(flags); 2761 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2762 AT_BENEATH | AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, 2763 &cap_fchflags_rights, td); 2764 if ((error = namei(&nd)) != 0) 2765 return (error); 2766 NDFREE(&nd, NDF_ONLY_PNBUF); 2767 error = setfflags(td, nd.ni_vp, flags); 2768 vrele(nd.ni_vp); 2769 return (error); 2770 } 2771 2772 /* 2773 * Change flags of a file given a file descriptor. 2774 */ 2775 #ifndef _SYS_SYSPROTO_H_ 2776 struct fchflags_args { 2777 int fd; 2778 u_long flags; 2779 }; 2780 #endif 2781 int 2782 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2783 { 2784 struct file *fp; 2785 int error; 2786 2787 AUDIT_ARG_FD(uap->fd); 2788 AUDIT_ARG_FFLAGS(uap->flags); 2789 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2790 &fp); 2791 if (error != 0) 2792 return (error); 2793 #ifdef AUDIT 2794 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2795 AUDIT_ARG_VNODE1(fp->f_vnode); 2796 VOP_UNLOCK(fp->f_vnode); 2797 #endif 2798 error = setfflags(td, fp->f_vnode, uap->flags); 2799 fdrop(fp, td); 2800 return (error); 2801 } 2802 2803 /* 2804 * Common implementation code for chmod(), lchmod() and fchmod(). 2805 */ 2806 int 2807 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2808 { 2809 struct mount *mp; 2810 struct vattr vattr; 2811 int error; 2812 2813 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2814 return (error); 2815 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2816 VATTR_NULL(&vattr); 2817 vattr.va_mode = mode & ALLPERMS; 2818 #ifdef MAC 2819 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2820 if (error == 0) 2821 #endif 2822 error = VOP_SETATTR(vp, &vattr, cred); 2823 VOP_UNLOCK(vp); 2824 vn_finished_write(mp); 2825 return (error); 2826 } 2827 2828 /* 2829 * Change mode of a file given path name. 2830 */ 2831 #ifndef _SYS_SYSPROTO_H_ 2832 struct chmod_args { 2833 char *path; 2834 int mode; 2835 }; 2836 #endif 2837 int 2838 sys_chmod(struct thread *td, struct chmod_args *uap) 2839 { 2840 2841 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2842 uap->mode, 0)); 2843 } 2844 2845 #ifndef _SYS_SYSPROTO_H_ 2846 struct fchmodat_args { 2847 int dirfd; 2848 char *path; 2849 mode_t mode; 2850 int flag; 2851 } 2852 #endif 2853 int 2854 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2855 { 2856 2857 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH | 2858 AT_RESOLVE_BENEATH)) != 0) 2859 return (EINVAL); 2860 2861 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2862 uap->mode, uap->flag)); 2863 } 2864 2865 /* 2866 * Change mode of a file given path name (don't follow links.) 2867 */ 2868 #ifndef _SYS_SYSPROTO_H_ 2869 struct lchmod_args { 2870 char *path; 2871 int mode; 2872 }; 2873 #endif 2874 int 2875 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2876 { 2877 2878 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2879 uap->mode, AT_SYMLINK_NOFOLLOW)); 2880 } 2881 2882 int 2883 kern_fchmodat(struct thread *td, int fd, const char *path, 2884 enum uio_seg pathseg, mode_t mode, int flag) 2885 { 2886 struct nameidata nd; 2887 int error; 2888 2889 AUDIT_ARG_MODE(mode); 2890 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2891 AT_BENEATH | AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, 2892 &cap_fchmod_rights, td); 2893 if ((error = namei(&nd)) != 0) 2894 return (error); 2895 NDFREE(&nd, NDF_ONLY_PNBUF); 2896 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2897 vrele(nd.ni_vp); 2898 return (error); 2899 } 2900 2901 /* 2902 * Change mode of a file given a file descriptor. 2903 */ 2904 #ifndef _SYS_SYSPROTO_H_ 2905 struct fchmod_args { 2906 int fd; 2907 int mode; 2908 }; 2909 #endif 2910 int 2911 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2912 { 2913 struct file *fp; 2914 int error; 2915 2916 AUDIT_ARG_FD(uap->fd); 2917 AUDIT_ARG_MODE(uap->mode); 2918 2919 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2920 if (error != 0) 2921 return (error); 2922 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2923 fdrop(fp, td); 2924 return (error); 2925 } 2926 2927 /* 2928 * Common implementation for chown(), lchown(), and fchown() 2929 */ 2930 int 2931 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2932 gid_t gid) 2933 { 2934 struct mount *mp; 2935 struct vattr vattr; 2936 int error; 2937 2938 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2939 return (error); 2940 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2941 VATTR_NULL(&vattr); 2942 vattr.va_uid = uid; 2943 vattr.va_gid = gid; 2944 #ifdef MAC 2945 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2946 vattr.va_gid); 2947 if (error == 0) 2948 #endif 2949 error = VOP_SETATTR(vp, &vattr, cred); 2950 VOP_UNLOCK(vp); 2951 vn_finished_write(mp); 2952 return (error); 2953 } 2954 2955 /* 2956 * Set ownership given a path name. 2957 */ 2958 #ifndef _SYS_SYSPROTO_H_ 2959 struct chown_args { 2960 char *path; 2961 int uid; 2962 int gid; 2963 }; 2964 #endif 2965 int 2966 sys_chown(struct thread *td, struct chown_args *uap) 2967 { 2968 2969 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 2970 uap->gid, 0)); 2971 } 2972 2973 #ifndef _SYS_SYSPROTO_H_ 2974 struct fchownat_args { 2975 int fd; 2976 const char * path; 2977 uid_t uid; 2978 gid_t gid; 2979 int flag; 2980 }; 2981 #endif 2982 int 2983 sys_fchownat(struct thread *td, struct fchownat_args *uap) 2984 { 2985 2986 if ((uap->flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH | 2987 AT_RESOLVE_BENEATH)) != 0) 2988 return (EINVAL); 2989 2990 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 2991 uap->gid, uap->flag)); 2992 } 2993 2994 int 2995 kern_fchownat(struct thread *td, int fd, const char *path, 2996 enum uio_seg pathseg, int uid, int gid, int flag) 2997 { 2998 struct nameidata nd; 2999 int error; 3000 3001 AUDIT_ARG_OWNER(uid, gid); 3002 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3003 AT_BENEATH | AT_RESOLVE_BENEATH) | AUDITVNODE1, pathseg, path, fd, 3004 &cap_fchown_rights, td); 3005 3006 if ((error = namei(&nd)) != 0) 3007 return (error); 3008 NDFREE(&nd, NDF_ONLY_PNBUF); 3009 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3010 vrele(nd.ni_vp); 3011 return (error); 3012 } 3013 3014 /* 3015 * Set ownership given a path name, do not cross symlinks. 3016 */ 3017 #ifndef _SYS_SYSPROTO_H_ 3018 struct lchown_args { 3019 char *path; 3020 int uid; 3021 int gid; 3022 }; 3023 #endif 3024 int 3025 sys_lchown(struct thread *td, struct lchown_args *uap) 3026 { 3027 3028 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3029 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3030 } 3031 3032 /* 3033 * Set ownership given a file descriptor. 3034 */ 3035 #ifndef _SYS_SYSPROTO_H_ 3036 struct fchown_args { 3037 int fd; 3038 int uid; 3039 int gid; 3040 }; 3041 #endif 3042 int 3043 sys_fchown(struct thread *td, struct fchown_args *uap) 3044 { 3045 struct file *fp; 3046 int error; 3047 3048 AUDIT_ARG_FD(uap->fd); 3049 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3050 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3051 if (error != 0) 3052 return (error); 3053 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3054 fdrop(fp, td); 3055 return (error); 3056 } 3057 3058 /* 3059 * Common implementation code for utimes(), lutimes(), and futimes(). 3060 */ 3061 static int 3062 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3063 struct timespec *tsp) 3064 { 3065 struct timeval tv[2]; 3066 const struct timeval *tvp; 3067 int error; 3068 3069 if (usrtvp == NULL) { 3070 vfs_timestamp(&tsp[0]); 3071 tsp[1] = tsp[0]; 3072 } else { 3073 if (tvpseg == UIO_SYSSPACE) { 3074 tvp = usrtvp; 3075 } else { 3076 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3077 return (error); 3078 tvp = tv; 3079 } 3080 3081 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3082 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3083 return (EINVAL); 3084 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3085 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3086 } 3087 return (0); 3088 } 3089 3090 /* 3091 * Common implementation code for futimens(), utimensat(). 3092 */ 3093 #define UTIMENS_NULL 0x1 3094 #define UTIMENS_EXIT 0x2 3095 static int 3096 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3097 struct timespec *tsp, int *retflags) 3098 { 3099 struct timespec tsnow; 3100 int error; 3101 3102 vfs_timestamp(&tsnow); 3103 *retflags = 0; 3104 if (usrtsp == NULL) { 3105 tsp[0] = tsnow; 3106 tsp[1] = tsnow; 3107 *retflags |= UTIMENS_NULL; 3108 return (0); 3109 } 3110 if (tspseg == UIO_SYSSPACE) { 3111 tsp[0] = usrtsp[0]; 3112 tsp[1] = usrtsp[1]; 3113 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3114 return (error); 3115 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3116 *retflags |= UTIMENS_EXIT; 3117 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3118 *retflags |= UTIMENS_NULL; 3119 if (tsp[0].tv_nsec == UTIME_OMIT) 3120 tsp[0].tv_sec = VNOVAL; 3121 else if (tsp[0].tv_nsec == UTIME_NOW) 3122 tsp[0] = tsnow; 3123 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3124 return (EINVAL); 3125 if (tsp[1].tv_nsec == UTIME_OMIT) 3126 tsp[1].tv_sec = VNOVAL; 3127 else if (tsp[1].tv_nsec == UTIME_NOW) 3128 tsp[1] = tsnow; 3129 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3130 return (EINVAL); 3131 3132 return (0); 3133 } 3134 3135 /* 3136 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3137 * and utimensat(). 3138 */ 3139 static int 3140 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3141 int numtimes, int nullflag) 3142 { 3143 struct mount *mp; 3144 struct vattr vattr; 3145 int error, setbirthtime; 3146 3147 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3148 return (error); 3149 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3150 setbirthtime = 0; 3151 if (numtimes < 3 && !VOP_GETATTR(vp, &vattr, td->td_ucred) && 3152 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3153 setbirthtime = 1; 3154 VATTR_NULL(&vattr); 3155 vattr.va_atime = ts[0]; 3156 vattr.va_mtime = ts[1]; 3157 if (setbirthtime) 3158 vattr.va_birthtime = ts[1]; 3159 if (numtimes > 2) 3160 vattr.va_birthtime = ts[2]; 3161 if (nullflag) 3162 vattr.va_vaflags |= VA_UTIMES_NULL; 3163 #ifdef MAC 3164 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3165 vattr.va_mtime); 3166 #endif 3167 if (error == 0) 3168 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3169 VOP_UNLOCK(vp); 3170 vn_finished_write(mp); 3171 return (error); 3172 } 3173 3174 /* 3175 * Set the access and modification times of a file. 3176 */ 3177 #ifndef _SYS_SYSPROTO_H_ 3178 struct utimes_args { 3179 char *path; 3180 struct timeval *tptr; 3181 }; 3182 #endif 3183 int 3184 sys_utimes(struct thread *td, struct utimes_args *uap) 3185 { 3186 3187 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3188 uap->tptr, UIO_USERSPACE)); 3189 } 3190 3191 #ifndef _SYS_SYSPROTO_H_ 3192 struct futimesat_args { 3193 int fd; 3194 const char * path; 3195 const struct timeval * times; 3196 }; 3197 #endif 3198 int 3199 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3200 { 3201 3202 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3203 uap->times, UIO_USERSPACE)); 3204 } 3205 3206 int 3207 kern_utimesat(struct thread *td, int fd, const char *path, 3208 enum uio_seg pathseg, struct timeval *tptr, enum uio_seg tptrseg) 3209 { 3210 struct nameidata nd; 3211 struct timespec ts[2]; 3212 int error; 3213 3214 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3215 return (error); 3216 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3217 &cap_futimes_rights, td); 3218 3219 if ((error = namei(&nd)) != 0) 3220 return (error); 3221 NDFREE(&nd, NDF_ONLY_PNBUF); 3222 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3223 vrele(nd.ni_vp); 3224 return (error); 3225 } 3226 3227 /* 3228 * Set the access and modification times of a file. 3229 */ 3230 #ifndef _SYS_SYSPROTO_H_ 3231 struct lutimes_args { 3232 char *path; 3233 struct timeval *tptr; 3234 }; 3235 #endif 3236 int 3237 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3238 { 3239 3240 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3241 UIO_USERSPACE)); 3242 } 3243 3244 int 3245 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3246 struct timeval *tptr, enum uio_seg tptrseg) 3247 { 3248 struct timespec ts[2]; 3249 struct nameidata nd; 3250 int error; 3251 3252 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3253 return (error); 3254 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path, td); 3255 if ((error = namei(&nd)) != 0) 3256 return (error); 3257 NDFREE(&nd, NDF_ONLY_PNBUF); 3258 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3259 vrele(nd.ni_vp); 3260 return (error); 3261 } 3262 3263 /* 3264 * Set the access and modification times of a file. 3265 */ 3266 #ifndef _SYS_SYSPROTO_H_ 3267 struct futimes_args { 3268 int fd; 3269 struct timeval *tptr; 3270 }; 3271 #endif 3272 int 3273 sys_futimes(struct thread *td, struct futimes_args *uap) 3274 { 3275 3276 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3277 } 3278 3279 int 3280 kern_futimes(struct thread *td, int fd, struct timeval *tptr, 3281 enum uio_seg tptrseg) 3282 { 3283 struct timespec ts[2]; 3284 struct file *fp; 3285 int error; 3286 3287 AUDIT_ARG_FD(fd); 3288 error = getutimes(tptr, tptrseg, ts); 3289 if (error != 0) 3290 return (error); 3291 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3292 if (error != 0) 3293 return (error); 3294 #ifdef AUDIT 3295 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3296 AUDIT_ARG_VNODE1(fp->f_vnode); 3297 VOP_UNLOCK(fp->f_vnode); 3298 #endif 3299 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3300 fdrop(fp, td); 3301 return (error); 3302 } 3303 3304 int 3305 sys_futimens(struct thread *td, struct futimens_args *uap) 3306 { 3307 3308 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3309 } 3310 3311 int 3312 kern_futimens(struct thread *td, int fd, struct timespec *tptr, 3313 enum uio_seg tptrseg) 3314 { 3315 struct timespec ts[2]; 3316 struct file *fp; 3317 int error, flags; 3318 3319 AUDIT_ARG_FD(fd); 3320 error = getutimens(tptr, tptrseg, ts, &flags); 3321 if (error != 0) 3322 return (error); 3323 if (flags & UTIMENS_EXIT) 3324 return (0); 3325 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3326 if (error != 0) 3327 return (error); 3328 #ifdef AUDIT 3329 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3330 AUDIT_ARG_VNODE1(fp->f_vnode); 3331 VOP_UNLOCK(fp->f_vnode); 3332 #endif 3333 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3334 fdrop(fp, td); 3335 return (error); 3336 } 3337 3338 int 3339 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3340 { 3341 3342 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3343 uap->times, UIO_USERSPACE, uap->flag)); 3344 } 3345 3346 int 3347 kern_utimensat(struct thread *td, int fd, const char *path, 3348 enum uio_seg pathseg, struct timespec *tptr, enum uio_seg tptrseg, 3349 int flag) 3350 { 3351 struct nameidata nd; 3352 struct timespec ts[2]; 3353 int error, flags; 3354 3355 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH | 3356 AT_RESOLVE_BENEATH)) != 0) 3357 return (EINVAL); 3358 3359 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3360 return (error); 3361 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3362 AT_BENEATH | AT_RESOLVE_BENEATH) | AUDITVNODE1, 3363 pathseg, path, fd, &cap_futimes_rights, td); 3364 if ((error = namei(&nd)) != 0) 3365 return (error); 3366 /* 3367 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3368 * POSIX states: 3369 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3370 * "Search permission is denied by a component of the path prefix." 3371 */ 3372 NDFREE(&nd, NDF_ONLY_PNBUF); 3373 if ((flags & UTIMENS_EXIT) == 0) 3374 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3375 vrele(nd.ni_vp); 3376 return (error); 3377 } 3378 3379 /* 3380 * Truncate a file given its path name. 3381 */ 3382 #ifndef _SYS_SYSPROTO_H_ 3383 struct truncate_args { 3384 char *path; 3385 int pad; 3386 off_t length; 3387 }; 3388 #endif 3389 int 3390 sys_truncate(struct thread *td, struct truncate_args *uap) 3391 { 3392 3393 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3394 } 3395 3396 int 3397 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3398 off_t length) 3399 { 3400 struct mount *mp; 3401 struct vnode *vp; 3402 void *rl_cookie; 3403 struct vattr vattr; 3404 struct nameidata nd; 3405 int error; 3406 3407 if (length < 0) 3408 return (EINVAL); 3409 retry: 3410 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, td); 3411 if ((error = namei(&nd)) != 0) 3412 return (error); 3413 vp = nd.ni_vp; 3414 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3415 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3416 vn_rangelock_unlock(vp, rl_cookie); 3417 vrele(vp); 3418 return (error); 3419 } 3420 NDFREE(&nd, NDF_ONLY_PNBUF); 3421 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3422 if (vp->v_type == VDIR) 3423 error = EISDIR; 3424 #ifdef MAC 3425 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3426 } 3427 #endif 3428 else if ((error = vn_writechk(vp)) == 0 && 3429 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3430 VATTR_NULL(&vattr); 3431 vattr.va_size = length; 3432 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3433 } 3434 VOP_UNLOCK(vp); 3435 vn_finished_write(mp); 3436 vn_rangelock_unlock(vp, rl_cookie); 3437 vrele(vp); 3438 if (error == ERELOOKUP) 3439 goto retry; 3440 return (error); 3441 } 3442 3443 #if defined(COMPAT_43) 3444 /* 3445 * Truncate a file given its path name. 3446 */ 3447 #ifndef _SYS_SYSPROTO_H_ 3448 struct otruncate_args { 3449 char *path; 3450 long length; 3451 }; 3452 #endif 3453 int 3454 otruncate(struct thread *td, struct otruncate_args *uap) 3455 { 3456 3457 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3458 } 3459 #endif /* COMPAT_43 */ 3460 3461 #if defined(COMPAT_FREEBSD6) 3462 /* Versions with the pad argument */ 3463 int 3464 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3465 { 3466 3467 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3468 } 3469 3470 int 3471 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3472 { 3473 3474 return (kern_ftruncate(td, uap->fd, uap->length)); 3475 } 3476 #endif 3477 3478 int 3479 kern_fsync(struct thread *td, int fd, bool fullsync) 3480 { 3481 struct vnode *vp; 3482 struct mount *mp; 3483 struct file *fp; 3484 int error, lock_flags; 3485 3486 AUDIT_ARG_FD(fd); 3487 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3488 if (error != 0) 3489 return (error); 3490 vp = fp->f_vnode; 3491 #if 0 3492 if (!fullsync) 3493 /* XXXKIB: compete outstanding aio writes */; 3494 #endif 3495 retry: 3496 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3497 if (error != 0) 3498 goto drop; 3499 if (MNT_SHARED_WRITES(mp) || 3500 ((mp == NULL) && MNT_SHARED_WRITES(vp->v_mount))) { 3501 lock_flags = LK_SHARED; 3502 } else { 3503 lock_flags = LK_EXCLUSIVE; 3504 } 3505 vn_lock(vp, lock_flags | LK_RETRY); 3506 AUDIT_ARG_VNODE1(vp); 3507 if (vp->v_object != NULL) { 3508 VM_OBJECT_WLOCK(vp->v_object); 3509 vm_object_page_clean(vp->v_object, 0, 0, 0); 3510 VM_OBJECT_WUNLOCK(vp->v_object); 3511 } 3512 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3513 VOP_UNLOCK(vp); 3514 vn_finished_write(mp); 3515 if (error == ERELOOKUP) 3516 goto retry; 3517 drop: 3518 fdrop(fp, td); 3519 return (error); 3520 } 3521 3522 /* 3523 * Sync an open file. 3524 */ 3525 #ifndef _SYS_SYSPROTO_H_ 3526 struct fsync_args { 3527 int fd; 3528 }; 3529 #endif 3530 int 3531 sys_fsync(struct thread *td, struct fsync_args *uap) 3532 { 3533 3534 return (kern_fsync(td, uap->fd, true)); 3535 } 3536 3537 int 3538 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3539 { 3540 3541 return (kern_fsync(td, uap->fd, false)); 3542 } 3543 3544 /* 3545 * Rename files. Source and destination must either both be directories, or 3546 * both not be directories. If target is a directory, it must be empty. 3547 */ 3548 #ifndef _SYS_SYSPROTO_H_ 3549 struct rename_args { 3550 char *from; 3551 char *to; 3552 }; 3553 #endif 3554 int 3555 sys_rename(struct thread *td, struct rename_args *uap) 3556 { 3557 3558 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3559 uap->to, UIO_USERSPACE)); 3560 } 3561 3562 #ifndef _SYS_SYSPROTO_H_ 3563 struct renameat_args { 3564 int oldfd; 3565 char *old; 3566 int newfd; 3567 char *new; 3568 }; 3569 #endif 3570 int 3571 sys_renameat(struct thread *td, struct renameat_args *uap) 3572 { 3573 3574 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3575 UIO_USERSPACE)); 3576 } 3577 3578 #ifdef MAC 3579 static int 3580 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3581 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3582 { 3583 int error; 3584 3585 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3586 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights, td); 3587 if ((error = namei(fromnd)) != 0) 3588 return (error); 3589 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3590 fromnd->ni_vp, &fromnd->ni_cnd); 3591 VOP_UNLOCK(fromnd->ni_dvp); 3592 if (fromnd->ni_dvp != fromnd->ni_vp) 3593 VOP_UNLOCK(fromnd->ni_vp); 3594 if (error != 0) { 3595 NDFREE(fromnd, NDF_ONLY_PNBUF); 3596 vrele(fromnd->ni_dvp); 3597 vrele(fromnd->ni_vp); 3598 if (fromnd->ni_startdir) 3599 vrele(fromnd->ni_startdir); 3600 } 3601 return (error); 3602 } 3603 #endif 3604 3605 int 3606 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3607 const char *new, enum uio_seg pathseg) 3608 { 3609 struct mount *mp = NULL; 3610 struct vnode *tvp, *fvp, *tdvp; 3611 struct nameidata fromnd, tond; 3612 int error; 3613 3614 again: 3615 bwillwrite(); 3616 #ifdef MAC 3617 if (mac_vnode_check_rename_from_enabled()) { 3618 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3619 &fromnd); 3620 if (error != 0) 3621 return (error); 3622 } else { 3623 #endif 3624 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3625 pathseg, old, oldfd, &cap_renameat_source_rights, td); 3626 if ((error = namei(&fromnd)) != 0) 3627 return (error); 3628 #ifdef MAC 3629 } 3630 #endif 3631 fvp = fromnd.ni_vp; 3632 NDINIT_ATRIGHTS(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | 3633 SAVESTART | AUDITVNODE2, pathseg, new, newfd, 3634 &cap_renameat_target_rights, td); 3635 if (fromnd.ni_vp->v_type == VDIR) 3636 tond.ni_cnd.cn_flags |= WILLBEDIR; 3637 if ((error = namei(&tond)) != 0) { 3638 /* Translate error code for rename("dir1", "dir2/."). */ 3639 if (error == EISDIR && fvp->v_type == VDIR) 3640 error = EINVAL; 3641 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3642 vrele(fromnd.ni_dvp); 3643 vrele(fvp); 3644 goto out1; 3645 } 3646 tdvp = tond.ni_dvp; 3647 tvp = tond.ni_vp; 3648 error = vn_start_write(fvp, &mp, V_NOWAIT); 3649 if (error != 0) { 3650 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3651 NDFREE(&tond, NDF_ONLY_PNBUF); 3652 if (tvp != NULL) 3653 vput(tvp); 3654 if (tdvp == tvp) 3655 vrele(tdvp); 3656 else 3657 vput(tdvp); 3658 vrele(fromnd.ni_dvp); 3659 vrele(fvp); 3660 vrele(tond.ni_startdir); 3661 if (fromnd.ni_startdir != NULL) 3662 vrele(fromnd.ni_startdir); 3663 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3664 if (error != 0) 3665 return (error); 3666 goto again; 3667 } 3668 if (tvp != NULL) { 3669 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3670 error = ENOTDIR; 3671 goto out; 3672 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3673 error = EISDIR; 3674 goto out; 3675 } 3676 #ifdef CAPABILITIES 3677 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3678 /* 3679 * If the target already exists we require CAP_UNLINKAT 3680 * from 'newfd', when newfd was used for the lookup. 3681 */ 3682 error = cap_check(&tond.ni_filecaps.fc_rights, 3683 &cap_unlinkat_rights); 3684 if (error != 0) 3685 goto out; 3686 } 3687 #endif 3688 } 3689 if (fvp == tdvp) { 3690 error = EINVAL; 3691 goto out; 3692 } 3693 /* 3694 * If the source is the same as the destination (that is, if they 3695 * are links to the same vnode), then there is nothing to do. 3696 */ 3697 if (fvp == tvp) 3698 error = ERESTART; 3699 #ifdef MAC 3700 else 3701 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3702 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3703 #endif 3704 out: 3705 if (error == 0) { 3706 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3707 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3708 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3709 NDFREE(&tond, NDF_ONLY_PNBUF); 3710 } else { 3711 NDFREE(&fromnd, NDF_ONLY_PNBUF); 3712 NDFREE(&tond, NDF_ONLY_PNBUF); 3713 if (tvp != NULL) 3714 vput(tvp); 3715 if (tdvp == tvp) 3716 vrele(tdvp); 3717 else 3718 vput(tdvp); 3719 vrele(fromnd.ni_dvp); 3720 vrele(fvp); 3721 } 3722 vrele(tond.ni_startdir); 3723 vn_finished_write(mp); 3724 out1: 3725 if (fromnd.ni_startdir) 3726 vrele(fromnd.ni_startdir); 3727 if (error == ERESTART) 3728 return (0); 3729 if (error == ERELOOKUP) 3730 goto again; 3731 return (error); 3732 } 3733 3734 /* 3735 * Make a directory file. 3736 */ 3737 #ifndef _SYS_SYSPROTO_H_ 3738 struct mkdir_args { 3739 char *path; 3740 int mode; 3741 }; 3742 #endif 3743 int 3744 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3745 { 3746 3747 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3748 uap->mode)); 3749 } 3750 3751 #ifndef _SYS_SYSPROTO_H_ 3752 struct mkdirat_args { 3753 int fd; 3754 char *path; 3755 mode_t mode; 3756 }; 3757 #endif 3758 int 3759 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3760 { 3761 3762 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3763 } 3764 3765 int 3766 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3767 int mode) 3768 { 3769 struct mount *mp; 3770 struct vnode *vp; 3771 struct vattr vattr; 3772 struct nameidata nd; 3773 int error; 3774 3775 AUDIT_ARG_MODE(mode); 3776 restart: 3777 bwillwrite(); 3778 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3779 NC_NOMAKEENTRY | NC_KEEPPOSENTRY, segflg, path, fd, 3780 &cap_mkdirat_rights, td); 3781 nd.ni_cnd.cn_flags |= WILLBEDIR; 3782 if ((error = namei(&nd)) != 0) 3783 return (error); 3784 vp = nd.ni_vp; 3785 if (vp != NULL) { 3786 NDFREE(&nd, NDF_ONLY_PNBUF); 3787 /* 3788 * XXX namei called with LOCKPARENT but not LOCKLEAF has 3789 * the strange behaviour of leaving the vnode unlocked 3790 * if the target is the same vnode as the parent. 3791 */ 3792 if (vp == nd.ni_dvp) 3793 vrele(nd.ni_dvp); 3794 else 3795 vput(nd.ni_dvp); 3796 vrele(vp); 3797 return (EEXIST); 3798 } 3799 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3800 NDFREE(&nd, NDF_ONLY_PNBUF); 3801 vput(nd.ni_dvp); 3802 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3803 return (error); 3804 goto restart; 3805 } 3806 VATTR_NULL(&vattr); 3807 vattr.va_type = VDIR; 3808 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; 3809 #ifdef MAC 3810 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3811 &vattr); 3812 if (error != 0) 3813 goto out; 3814 #endif 3815 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3816 #ifdef MAC 3817 out: 3818 #endif 3819 NDFREE(&nd, NDF_ONLY_PNBUF); 3820 vput(nd.ni_dvp); 3821 if (error == 0) 3822 vput(nd.ni_vp); 3823 vn_finished_write(mp); 3824 if (error == ERELOOKUP) 3825 goto restart; 3826 return (error); 3827 } 3828 3829 /* 3830 * Remove a directory file. 3831 */ 3832 #ifndef _SYS_SYSPROTO_H_ 3833 struct rmdir_args { 3834 char *path; 3835 }; 3836 #endif 3837 int 3838 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3839 { 3840 3841 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3842 0)); 3843 } 3844 3845 int 3846 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3847 enum uio_seg pathseg, int flag) 3848 { 3849 struct mount *mp; 3850 struct vnode *vp; 3851 struct file *fp; 3852 struct nameidata nd; 3853 cap_rights_t rights; 3854 int error; 3855 3856 fp = NULL; 3857 if (fd != FD_NONE) { 3858 error = getvnode(td, fd, cap_rights_init_one(&rights, CAP_LOOKUP), 3859 &fp); 3860 if (error != 0) 3861 return (error); 3862 } 3863 3864 restart: 3865 bwillwrite(); 3866 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3867 at2cnpflags(flag, AT_BENEATH | AT_RESOLVE_BENEATH), 3868 pathseg, path, dfd, &cap_unlinkat_rights, td); 3869 if ((error = namei(&nd)) != 0) 3870 goto fdout; 3871 vp = nd.ni_vp; 3872 if (vp->v_type != VDIR) { 3873 error = ENOTDIR; 3874 goto out; 3875 } 3876 /* 3877 * No rmdir "." please. 3878 */ 3879 if (nd.ni_dvp == vp) { 3880 error = EINVAL; 3881 goto out; 3882 } 3883 /* 3884 * The root of a mounted filesystem cannot be deleted. 3885 */ 3886 if (vp->v_vflag & VV_ROOT) { 3887 error = EBUSY; 3888 goto out; 3889 } 3890 3891 if (fp != NULL && fp->f_vnode != vp) { 3892 if (VN_IS_DOOMED(fp->f_vnode)) 3893 error = EBADF; 3894 else 3895 error = EDEADLK; 3896 goto out; 3897 } 3898 3899 #ifdef MAC 3900 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3901 &nd.ni_cnd); 3902 if (error != 0) 3903 goto out; 3904 #endif 3905 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3906 NDFREE(&nd, NDF_ONLY_PNBUF); 3907 vput(vp); 3908 if (nd.ni_dvp == vp) 3909 vrele(nd.ni_dvp); 3910 else 3911 vput(nd.ni_dvp); 3912 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3913 goto fdout; 3914 goto restart; 3915 } 3916 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3917 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3918 vn_finished_write(mp); 3919 out: 3920 NDFREE(&nd, NDF_ONLY_PNBUF); 3921 vput(vp); 3922 if (nd.ni_dvp == vp) 3923 vrele(nd.ni_dvp); 3924 else 3925 vput(nd.ni_dvp); 3926 if (error == ERELOOKUP) 3927 goto restart; 3928 fdout: 3929 if (fp != NULL) 3930 fdrop(fp, td); 3931 return (error); 3932 } 3933 3934 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3935 int 3936 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3937 long *basep, void (*func)(struct freebsd11_dirent *)) 3938 { 3939 struct freebsd11_dirent dstdp; 3940 struct dirent *dp, *edp; 3941 char *dirbuf; 3942 off_t base; 3943 ssize_t resid, ucount; 3944 int error; 3945 3946 /* XXX arbitrary sanity limit on `count'. */ 3947 count = min(count, 64 * 1024); 3948 3949 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3950 3951 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3952 UIO_SYSSPACE); 3953 if (error != 0) 3954 goto done; 3955 if (basep != NULL) 3956 *basep = base; 3957 3958 ucount = 0; 3959 for (dp = (struct dirent *)dirbuf, 3960 edp = (struct dirent *)&dirbuf[count - resid]; 3961 ucount < count && dp < edp; ) { 3962 if (dp->d_reclen == 0) 3963 break; 3964 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 3965 if (dp->d_namlen >= sizeof(dstdp.d_name)) 3966 continue; 3967 dstdp.d_type = dp->d_type; 3968 dstdp.d_namlen = dp->d_namlen; 3969 dstdp.d_fileno = dp->d_fileno; /* truncate */ 3970 if (dstdp.d_fileno != dp->d_fileno) { 3971 switch (ino64_trunc_error) { 3972 default: 3973 case 0: 3974 break; 3975 case 1: 3976 error = EOVERFLOW; 3977 goto done; 3978 case 2: 3979 dstdp.d_fileno = UINT32_MAX; 3980 break; 3981 } 3982 } 3983 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 3984 ((dp->d_namlen + 1 + 3) &~ 3); 3985 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 3986 bzero(dstdp.d_name + dstdp.d_namlen, 3987 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 3988 dstdp.d_namlen); 3989 MPASS(dstdp.d_reclen <= dp->d_reclen); 3990 MPASS(ucount + dstdp.d_reclen <= count); 3991 if (func != NULL) 3992 func(&dstdp); 3993 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 3994 if (error != 0) 3995 break; 3996 dp = (struct dirent *)((char *)dp + dp->d_reclen); 3997 ucount += dstdp.d_reclen; 3998 } 3999 4000 done: 4001 free(dirbuf, M_TEMP); 4002 if (error == 0) 4003 td->td_retval[0] = ucount; 4004 return (error); 4005 } 4006 #endif /* COMPAT */ 4007 4008 #ifdef COMPAT_43 4009 static void 4010 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4011 { 4012 #if (BYTE_ORDER == LITTLE_ENDIAN) 4013 /* 4014 * The expected low byte of dp->d_namlen is our dp->d_type. 4015 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4016 */ 4017 dp->d_type = dp->d_namlen; 4018 dp->d_namlen = 0; 4019 #else 4020 /* 4021 * The dp->d_type is the high byte of the expected dp->d_namlen, 4022 * so must be zero'ed. 4023 */ 4024 dp->d_type = 0; 4025 #endif 4026 } 4027 4028 /* 4029 * Read a block of directory entries in a filesystem independent format. 4030 */ 4031 #ifndef _SYS_SYSPROTO_H_ 4032 struct ogetdirentries_args { 4033 int fd; 4034 char *buf; 4035 u_int count; 4036 long *basep; 4037 }; 4038 #endif 4039 int 4040 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4041 { 4042 long loff; 4043 int error; 4044 4045 error = kern_ogetdirentries(td, uap, &loff); 4046 if (error == 0) 4047 error = copyout(&loff, uap->basep, sizeof(long)); 4048 return (error); 4049 } 4050 4051 int 4052 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4053 long *ploff) 4054 { 4055 long base; 4056 int error; 4057 4058 /* XXX arbitrary sanity limit on `count'. */ 4059 if (uap->count > 64 * 1024) 4060 return (EINVAL); 4061 4062 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4063 &base, ogetdirentries_cvt); 4064 4065 if (error == 0 && uap->basep != NULL) 4066 error = copyout(&base, uap->basep, sizeof(long)); 4067 4068 return (error); 4069 } 4070 #endif /* COMPAT_43 */ 4071 4072 #if defined(COMPAT_FREEBSD11) 4073 #ifndef _SYS_SYSPROTO_H_ 4074 struct freebsd11_getdirentries_args { 4075 int fd; 4076 char *buf; 4077 u_int count; 4078 long *basep; 4079 }; 4080 #endif 4081 int 4082 freebsd11_getdirentries(struct thread *td, 4083 struct freebsd11_getdirentries_args *uap) 4084 { 4085 long base; 4086 int error; 4087 4088 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4089 &base, NULL); 4090 4091 if (error == 0 && uap->basep != NULL) 4092 error = copyout(&base, uap->basep, sizeof(long)); 4093 return (error); 4094 } 4095 4096 int 4097 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4098 { 4099 struct freebsd11_getdirentries_args ap; 4100 4101 ap.fd = uap->fd; 4102 ap.buf = uap->buf; 4103 ap.count = uap->count; 4104 ap.basep = NULL; 4105 return (freebsd11_getdirentries(td, &ap)); 4106 } 4107 #endif /* COMPAT_FREEBSD11 */ 4108 4109 /* 4110 * Read a block of directory entries in a filesystem independent format. 4111 */ 4112 int 4113 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4114 { 4115 off_t base; 4116 int error; 4117 4118 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4119 NULL, UIO_USERSPACE); 4120 if (error != 0) 4121 return (error); 4122 if (uap->basep != NULL) 4123 error = copyout(&base, uap->basep, sizeof(off_t)); 4124 return (error); 4125 } 4126 4127 int 4128 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4129 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4130 { 4131 struct vnode *vp; 4132 struct file *fp; 4133 struct uio auio; 4134 struct iovec aiov; 4135 off_t loff; 4136 int error, eofflag; 4137 off_t foffset; 4138 4139 AUDIT_ARG_FD(fd); 4140 if (count > IOSIZE_MAX) 4141 return (EINVAL); 4142 auio.uio_resid = count; 4143 error = getvnode(td, fd, &cap_read_rights, &fp); 4144 if (error != 0) 4145 return (error); 4146 if ((fp->f_flag & FREAD) == 0) { 4147 fdrop(fp, td); 4148 return (EBADF); 4149 } 4150 vp = fp->f_vnode; 4151 foffset = foffset_lock(fp, 0); 4152 unionread: 4153 if (vp->v_type != VDIR) { 4154 error = EINVAL; 4155 goto fail; 4156 } 4157 aiov.iov_base = buf; 4158 aiov.iov_len = count; 4159 auio.uio_iov = &aiov; 4160 auio.uio_iovcnt = 1; 4161 auio.uio_rw = UIO_READ; 4162 auio.uio_segflg = bufseg; 4163 auio.uio_td = td; 4164 vn_lock(vp, LK_SHARED | LK_RETRY); 4165 AUDIT_ARG_VNODE1(vp); 4166 loff = auio.uio_offset = foffset; 4167 #ifdef MAC 4168 error = mac_vnode_check_readdir(td->td_ucred, vp); 4169 if (error == 0) 4170 #endif 4171 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4172 NULL); 4173 foffset = auio.uio_offset; 4174 if (error != 0) { 4175 VOP_UNLOCK(vp); 4176 goto fail; 4177 } 4178 if (count == auio.uio_resid && 4179 (vp->v_vflag & VV_ROOT) && 4180 (vp->v_mount->mnt_flag & MNT_UNION)) { 4181 struct vnode *tvp = vp; 4182 4183 vp = vp->v_mount->mnt_vnodecovered; 4184 VREF(vp); 4185 fp->f_vnode = vp; 4186 foffset = 0; 4187 vput(tvp); 4188 goto unionread; 4189 } 4190 VOP_UNLOCK(vp); 4191 *basep = loff; 4192 if (residp != NULL) 4193 *residp = auio.uio_resid; 4194 td->td_retval[0] = count - auio.uio_resid; 4195 fail: 4196 foffset_unlock(fp, foffset, 0); 4197 fdrop(fp, td); 4198 return (error); 4199 } 4200 4201 /* 4202 * Set the mode mask for creation of filesystem nodes. 4203 */ 4204 #ifndef _SYS_SYSPROTO_H_ 4205 struct umask_args { 4206 int newmask; 4207 }; 4208 #endif 4209 int 4210 sys_umask(struct thread *td, struct umask_args *uap) 4211 { 4212 struct filedesc *fdp; 4213 4214 fdp = td->td_proc->p_fd; 4215 FILEDESC_XLOCK(fdp); 4216 td->td_retval[0] = fdp->fd_cmask; 4217 fdp->fd_cmask = uap->newmask & ALLPERMS; 4218 FILEDESC_XUNLOCK(fdp); 4219 return (0); 4220 } 4221 4222 /* 4223 * Void all references to file by ripping underlying filesystem away from 4224 * vnode. 4225 */ 4226 #ifndef _SYS_SYSPROTO_H_ 4227 struct revoke_args { 4228 char *path; 4229 }; 4230 #endif 4231 int 4232 sys_revoke(struct thread *td, struct revoke_args *uap) 4233 { 4234 struct vnode *vp; 4235 struct vattr vattr; 4236 struct nameidata nd; 4237 int error; 4238 4239 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4240 uap->path, td); 4241 if ((error = namei(&nd)) != 0) 4242 return (error); 4243 vp = nd.ni_vp; 4244 NDFREE(&nd, NDF_ONLY_PNBUF); 4245 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4246 error = EINVAL; 4247 goto out; 4248 } 4249 #ifdef MAC 4250 error = mac_vnode_check_revoke(td->td_ucred, vp); 4251 if (error != 0) 4252 goto out; 4253 #endif 4254 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4255 if (error != 0) 4256 goto out; 4257 if (td->td_ucred->cr_uid != vattr.va_uid) { 4258 error = priv_check(td, PRIV_VFS_ADMIN); 4259 if (error != 0) 4260 goto out; 4261 } 4262 if (devfs_usecount(vp) > 0) 4263 VOP_REVOKE(vp, REVOKEALL); 4264 out: 4265 vput(vp); 4266 return (error); 4267 } 4268 4269 /* 4270 * Convert a user file descriptor to a kernel file entry and check that, if it 4271 * is a capability, the correct rights are present. A reference on the file 4272 * entry is held upon returning. 4273 */ 4274 int 4275 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4276 { 4277 struct file *fp; 4278 int error; 4279 4280 error = fget_unlocked(td->td_proc->p_fd, fd, rightsp, &fp); 4281 if (error != 0) 4282 return (error); 4283 4284 /* 4285 * The file could be not of the vnode type, or it may be not 4286 * yet fully initialized, in which case the f_vnode pointer 4287 * may be set, but f_ops is still badfileops. E.g., 4288 * devfs_open() transiently create such situation to 4289 * facilitate csw d_fdopen(). 4290 * 4291 * Dupfdopen() handling in kern_openat() installs the 4292 * half-baked file into the process descriptor table, allowing 4293 * other thread to dereference it. Guard against the race by 4294 * checking f_ops. 4295 */ 4296 if (fp->f_vnode == NULL || fp->f_ops == &badfileops) { 4297 fdrop(fp, td); 4298 return (EINVAL); 4299 } 4300 *fpp = fp; 4301 return (0); 4302 } 4303 4304 /* 4305 * Get an (NFS) file handle. 4306 */ 4307 #ifndef _SYS_SYSPROTO_H_ 4308 struct lgetfh_args { 4309 char *fname; 4310 fhandle_t *fhp; 4311 }; 4312 #endif 4313 int 4314 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4315 { 4316 4317 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4318 UIO_USERSPACE, uap->fhp)); 4319 } 4320 4321 #ifndef _SYS_SYSPROTO_H_ 4322 struct getfh_args { 4323 char *fname; 4324 fhandle_t *fhp; 4325 }; 4326 #endif 4327 int 4328 sys_getfh(struct thread *td, struct getfh_args *uap) 4329 { 4330 4331 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4332 uap->fhp)); 4333 } 4334 4335 /* 4336 * syscall for the rpc.lockd to use to translate an open descriptor into 4337 * a NFS file handle. 4338 * 4339 * warning: do not remove the priv_check() call or this becomes one giant 4340 * security hole. 4341 */ 4342 #ifndef _SYS_SYSPROTO_H_ 4343 struct getfhat_args { 4344 int fd; 4345 char *path; 4346 fhandle_t *fhp; 4347 int flags; 4348 }; 4349 #endif 4350 int 4351 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4352 { 4353 4354 if ((uap->flags & ~(AT_SYMLINK_NOFOLLOW | AT_BENEATH | 4355 AT_RESOLVE_BENEATH)) != 0) 4356 return (EINVAL); 4357 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4358 uap->fhp)); 4359 } 4360 4361 static int 4362 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4363 enum uio_seg pathseg, fhandle_t *fhp) 4364 { 4365 struct nameidata nd; 4366 fhandle_t fh; 4367 struct vnode *vp; 4368 int error; 4369 4370 error = priv_check(td, PRIV_VFS_GETFH); 4371 if (error != 0) 4372 return (error); 4373 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4374 AT_BENEATH | AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, 4375 pathseg, path, fd, td); 4376 error = namei(&nd); 4377 if (error != 0) 4378 return (error); 4379 NDFREE(&nd, NDF_ONLY_PNBUF); 4380 vp = nd.ni_vp; 4381 bzero(&fh, sizeof(fh)); 4382 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4383 error = VOP_VPTOFH(vp, &fh.fh_fid); 4384 vput(vp); 4385 if (error == 0) 4386 error = copyout(&fh, fhp, sizeof (fh)); 4387 return (error); 4388 } 4389 4390 #ifndef _SYS_SYSPROTO_H_ 4391 struct fhlink_args { 4392 fhandle_t *fhp; 4393 const char *to; 4394 }; 4395 #endif 4396 int 4397 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4398 { 4399 4400 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4401 } 4402 4403 #ifndef _SYS_SYSPROTO_H_ 4404 struct fhlinkat_args { 4405 fhandle_t *fhp; 4406 int tofd; 4407 const char *to; 4408 }; 4409 #endif 4410 int 4411 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4412 { 4413 4414 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4415 } 4416 4417 static int 4418 kern_fhlinkat(struct thread *td, int fd, const char *path, 4419 enum uio_seg pathseg, fhandle_t *fhp) 4420 { 4421 fhandle_t fh; 4422 struct mount *mp; 4423 struct vnode *vp; 4424 int error; 4425 4426 error = priv_check(td, PRIV_VFS_GETFH); 4427 if (error != 0) 4428 return (error); 4429 error = copyin(fhp, &fh, sizeof(fh)); 4430 if (error != 0) 4431 return (error); 4432 do { 4433 bwillwrite(); 4434 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4435 return (ESTALE); 4436 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4437 vfs_unbusy(mp); 4438 if (error != 0) 4439 return (error); 4440 VOP_UNLOCK(vp); 4441 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4442 } while (error == EAGAIN || error == ERELOOKUP); 4443 return (error); 4444 } 4445 4446 #ifndef _SYS_SYSPROTO_H_ 4447 struct fhreadlink_args { 4448 fhandle_t *fhp; 4449 char *buf; 4450 size_t bufsize; 4451 }; 4452 #endif 4453 int 4454 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4455 { 4456 fhandle_t fh; 4457 struct mount *mp; 4458 struct vnode *vp; 4459 int error; 4460 4461 error = priv_check(td, PRIV_VFS_GETFH); 4462 if (error != 0) 4463 return (error); 4464 if (uap->bufsize > IOSIZE_MAX) 4465 return (EINVAL); 4466 error = copyin(uap->fhp, &fh, sizeof(fh)); 4467 if (error != 0) 4468 return (error); 4469 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4470 return (ESTALE); 4471 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4472 vfs_unbusy(mp); 4473 if (error != 0) 4474 return (error); 4475 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4476 vput(vp); 4477 return (error); 4478 } 4479 4480 /* 4481 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4482 * open descriptor. 4483 * 4484 * warning: do not remove the priv_check() call or this becomes one giant 4485 * security hole. 4486 */ 4487 #ifndef _SYS_SYSPROTO_H_ 4488 struct fhopen_args { 4489 const struct fhandle *u_fhp; 4490 int flags; 4491 }; 4492 #endif 4493 int 4494 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4495 { 4496 struct mount *mp; 4497 struct vnode *vp; 4498 struct fhandle fhp; 4499 struct file *fp; 4500 int fmode, error; 4501 int indx; 4502 4503 error = priv_check(td, PRIV_VFS_FHOPEN); 4504 if (error != 0) 4505 return (error); 4506 indx = -1; 4507 fmode = FFLAGS(uap->flags); 4508 /* why not allow a non-read/write open for our lockd? */ 4509 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4510 return (EINVAL); 4511 error = copyin(uap->u_fhp, &fhp, sizeof(fhp)); 4512 if (error != 0) 4513 return(error); 4514 /* find the mount point */ 4515 mp = vfs_busyfs(&fhp.fh_fsid); 4516 if (mp == NULL) 4517 return (ESTALE); 4518 /* now give me my vnode, it gets returned to me locked */ 4519 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4520 vfs_unbusy(mp); 4521 if (error != 0) 4522 return (error); 4523 4524 error = falloc_noinstall(td, &fp); 4525 if (error != 0) { 4526 vput(vp); 4527 return (error); 4528 } 4529 /* 4530 * An extra reference on `fp' has been held for us by 4531 * falloc_noinstall(). 4532 */ 4533 4534 #ifdef INVARIANTS 4535 td->td_dupfd = -1; 4536 #endif 4537 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4538 if (error != 0) { 4539 KASSERT(fp->f_ops == &badfileops, 4540 ("VOP_OPEN in fhopen() set f_ops")); 4541 KASSERT(td->td_dupfd < 0, 4542 ("fhopen() encountered fdopen()")); 4543 4544 vput(vp); 4545 goto bad; 4546 } 4547 #ifdef INVARIANTS 4548 td->td_dupfd = 0; 4549 #endif 4550 fp->f_vnode = vp; 4551 finit_vnode(fp, fmode, NULL, &vnops); 4552 VOP_UNLOCK(vp); 4553 if ((fmode & O_TRUNC) != 0) { 4554 error = fo_truncate(fp, 0, td->td_ucred, td); 4555 if (error != 0) 4556 goto bad; 4557 } 4558 4559 error = finstall(td, fp, &indx, fmode, NULL); 4560 bad: 4561 fdrop(fp, td); 4562 td->td_retval[0] = indx; 4563 return (error); 4564 } 4565 4566 /* 4567 * Stat an (NFS) file handle. 4568 */ 4569 #ifndef _SYS_SYSPROTO_H_ 4570 struct fhstat_args { 4571 struct fhandle *u_fhp; 4572 struct stat *sb; 4573 }; 4574 #endif 4575 int 4576 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4577 { 4578 struct stat sb; 4579 struct fhandle fh; 4580 int error; 4581 4582 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4583 if (error != 0) 4584 return (error); 4585 error = kern_fhstat(td, fh, &sb); 4586 if (error == 0) 4587 error = copyout(&sb, uap->sb, sizeof(sb)); 4588 return (error); 4589 } 4590 4591 int 4592 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4593 { 4594 struct mount *mp; 4595 struct vnode *vp; 4596 int error; 4597 4598 error = priv_check(td, PRIV_VFS_FHSTAT); 4599 if (error != 0) 4600 return (error); 4601 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4602 return (ESTALE); 4603 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4604 vfs_unbusy(mp); 4605 if (error != 0) 4606 return (error); 4607 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED, td); 4608 vput(vp); 4609 return (error); 4610 } 4611 4612 /* 4613 * Implement fstatfs() for (NFS) file handles. 4614 */ 4615 #ifndef _SYS_SYSPROTO_H_ 4616 struct fhstatfs_args { 4617 struct fhandle *u_fhp; 4618 struct statfs *buf; 4619 }; 4620 #endif 4621 int 4622 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4623 { 4624 struct statfs *sfp; 4625 fhandle_t fh; 4626 int error; 4627 4628 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4629 if (error != 0) 4630 return (error); 4631 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4632 error = kern_fhstatfs(td, fh, sfp); 4633 if (error == 0) 4634 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4635 free(sfp, M_STATFS); 4636 return (error); 4637 } 4638 4639 int 4640 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4641 { 4642 struct mount *mp; 4643 struct vnode *vp; 4644 int error; 4645 4646 error = priv_check(td, PRIV_VFS_FHSTATFS); 4647 if (error != 0) 4648 return (error); 4649 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4650 return (ESTALE); 4651 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4652 if (error != 0) { 4653 vfs_unbusy(mp); 4654 return (error); 4655 } 4656 vput(vp); 4657 error = prison_canseemount(td->td_ucred, mp); 4658 if (error != 0) 4659 goto out; 4660 #ifdef MAC 4661 error = mac_mount_check_stat(td->td_ucred, mp); 4662 if (error != 0) 4663 goto out; 4664 #endif 4665 error = VFS_STATFS(mp, buf); 4666 out: 4667 vfs_unbusy(mp); 4668 return (error); 4669 } 4670 4671 /* 4672 * Unlike madvise(2), we do not make a best effort to remember every 4673 * possible caching hint. Instead, we remember the last setting with 4674 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4675 * region of any current setting. 4676 */ 4677 int 4678 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4679 int advice) 4680 { 4681 struct fadvise_info *fa, *new; 4682 struct file *fp; 4683 struct vnode *vp; 4684 off_t end; 4685 int error; 4686 4687 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4688 return (EINVAL); 4689 AUDIT_ARG_VALUE(advice); 4690 switch (advice) { 4691 case POSIX_FADV_SEQUENTIAL: 4692 case POSIX_FADV_RANDOM: 4693 case POSIX_FADV_NOREUSE: 4694 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4695 break; 4696 case POSIX_FADV_NORMAL: 4697 case POSIX_FADV_WILLNEED: 4698 case POSIX_FADV_DONTNEED: 4699 new = NULL; 4700 break; 4701 default: 4702 return (EINVAL); 4703 } 4704 /* XXX: CAP_POSIX_FADVISE? */ 4705 AUDIT_ARG_FD(fd); 4706 error = fget(td, fd, &cap_no_rights, &fp); 4707 if (error != 0) 4708 goto out; 4709 AUDIT_ARG_FILE(td->td_proc, fp); 4710 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4711 error = ESPIPE; 4712 goto out; 4713 } 4714 if (fp->f_type != DTYPE_VNODE) { 4715 error = ENODEV; 4716 goto out; 4717 } 4718 vp = fp->f_vnode; 4719 if (vp->v_type != VREG) { 4720 error = ENODEV; 4721 goto out; 4722 } 4723 if (len == 0) 4724 end = OFF_MAX; 4725 else 4726 end = offset + len - 1; 4727 switch (advice) { 4728 case POSIX_FADV_SEQUENTIAL: 4729 case POSIX_FADV_RANDOM: 4730 case POSIX_FADV_NOREUSE: 4731 /* 4732 * Try to merge any existing non-standard region with 4733 * this new region if possible, otherwise create a new 4734 * non-standard region for this request. 4735 */ 4736 mtx_pool_lock(mtxpool_sleep, fp); 4737 fa = fp->f_advice; 4738 if (fa != NULL && fa->fa_advice == advice && 4739 ((fa->fa_start <= end && fa->fa_end >= offset) || 4740 (end != OFF_MAX && fa->fa_start == end + 1) || 4741 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4742 if (offset < fa->fa_start) 4743 fa->fa_start = offset; 4744 if (end > fa->fa_end) 4745 fa->fa_end = end; 4746 } else { 4747 new->fa_advice = advice; 4748 new->fa_start = offset; 4749 new->fa_end = end; 4750 fp->f_advice = new; 4751 new = fa; 4752 } 4753 mtx_pool_unlock(mtxpool_sleep, fp); 4754 break; 4755 case POSIX_FADV_NORMAL: 4756 /* 4757 * If a the "normal" region overlaps with an existing 4758 * non-standard region, trim or remove the 4759 * non-standard region. 4760 */ 4761 mtx_pool_lock(mtxpool_sleep, fp); 4762 fa = fp->f_advice; 4763 if (fa != NULL) { 4764 if (offset <= fa->fa_start && end >= fa->fa_end) { 4765 new = fa; 4766 fp->f_advice = NULL; 4767 } else if (offset <= fa->fa_start && 4768 end >= fa->fa_start) 4769 fa->fa_start = end + 1; 4770 else if (offset <= fa->fa_end && end >= fa->fa_end) 4771 fa->fa_end = offset - 1; 4772 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4773 /* 4774 * If the "normal" region is a middle 4775 * portion of the existing 4776 * non-standard region, just remove 4777 * the whole thing rather than picking 4778 * one side or the other to 4779 * preserve. 4780 */ 4781 new = fa; 4782 fp->f_advice = NULL; 4783 } 4784 } 4785 mtx_pool_unlock(mtxpool_sleep, fp); 4786 break; 4787 case POSIX_FADV_WILLNEED: 4788 case POSIX_FADV_DONTNEED: 4789 error = VOP_ADVISE(vp, offset, end, advice); 4790 break; 4791 } 4792 out: 4793 if (fp != NULL) 4794 fdrop(fp, td); 4795 free(new, M_FADVISE); 4796 return (error); 4797 } 4798 4799 int 4800 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4801 { 4802 int error; 4803 4804 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4805 uap->advice); 4806 return (kern_posix_error(td, error)); 4807 } 4808 4809 int 4810 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4811 off_t *outoffp, size_t len, unsigned int flags) 4812 { 4813 struct file *infp, *outfp; 4814 struct vnode *invp, *outvp; 4815 int error; 4816 size_t retlen; 4817 void *rl_rcookie, *rl_wcookie; 4818 off_t savinoff, savoutoff; 4819 4820 infp = outfp = NULL; 4821 rl_rcookie = rl_wcookie = NULL; 4822 savinoff = -1; 4823 error = 0; 4824 retlen = 0; 4825 4826 if (flags != 0) { 4827 error = EINVAL; 4828 goto out; 4829 } 4830 if (len > SSIZE_MAX) 4831 /* 4832 * Although the len argument is size_t, the return argument 4833 * is ssize_t (which is signed). Therefore a size that won't 4834 * fit in ssize_t can't be returned. 4835 */ 4836 len = SSIZE_MAX; 4837 4838 /* Get the file structures for the file descriptors. */ 4839 error = fget_read(td, infd, &cap_read_rights, &infp); 4840 if (error != 0) 4841 goto out; 4842 if (infp->f_ops == &badfileops) { 4843 error = EBADF; 4844 goto out; 4845 } 4846 if (infp->f_vnode == NULL) { 4847 error = EINVAL; 4848 goto out; 4849 } 4850 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4851 if (error != 0) 4852 goto out; 4853 if (outfp->f_ops == &badfileops) { 4854 error = EBADF; 4855 goto out; 4856 } 4857 if (outfp->f_vnode == NULL) { 4858 error = EINVAL; 4859 goto out; 4860 } 4861 4862 /* Set the offset pointers to the correct place. */ 4863 if (inoffp == NULL) 4864 inoffp = &infp->f_offset; 4865 if (outoffp == NULL) 4866 outoffp = &outfp->f_offset; 4867 savinoff = *inoffp; 4868 savoutoff = *outoffp; 4869 4870 invp = infp->f_vnode; 4871 outvp = outfp->f_vnode; 4872 /* Sanity check the f_flag bits. */ 4873 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4874 (infp->f_flag & FREAD) == 0) { 4875 error = EBADF; 4876 goto out; 4877 } 4878 4879 /* If len == 0, just return 0. */ 4880 if (len == 0) 4881 goto out; 4882 4883 /* 4884 * If infp and outfp refer to the same file, the byte ranges cannot 4885 * overlap. 4886 */ 4887 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4888 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4889 savinoff))) { 4890 error = EINVAL; 4891 goto out; 4892 } 4893 4894 /* Range lock the byte ranges for both invp and outvp. */ 4895 for (;;) { 4896 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4897 len); 4898 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4899 len); 4900 if (rl_rcookie != NULL) 4901 break; 4902 vn_rangelock_unlock(outvp, rl_wcookie); 4903 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4904 vn_rangelock_unlock(invp, rl_rcookie); 4905 } 4906 4907 retlen = len; 4908 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4909 flags, infp->f_cred, outfp->f_cred, td); 4910 out: 4911 if (rl_rcookie != NULL) 4912 vn_rangelock_unlock(invp, rl_rcookie); 4913 if (rl_wcookie != NULL) 4914 vn_rangelock_unlock(outvp, rl_wcookie); 4915 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4916 *inoffp = savinoff; 4917 *outoffp = savoutoff; 4918 } 4919 if (outfp != NULL) 4920 fdrop(outfp, td); 4921 if (infp != NULL) 4922 fdrop(infp, td); 4923 td->td_retval[0] = retlen; 4924 return (error); 4925 } 4926 4927 int 4928 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 4929 { 4930 off_t inoff, outoff, *inoffp, *outoffp; 4931 int error; 4932 4933 inoffp = outoffp = NULL; 4934 if (uap->inoffp != NULL) { 4935 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 4936 if (error != 0) 4937 return (error); 4938 inoffp = &inoff; 4939 } 4940 if (uap->outoffp != NULL) { 4941 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 4942 if (error != 0) 4943 return (error); 4944 outoffp = &outoff; 4945 } 4946 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 4947 outoffp, uap->len, uap->flags); 4948 if (error == 0 && uap->inoffp != NULL) 4949 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 4950 if (error == 0 && uap->outoffp != NULL) 4951 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 4952 return (error); 4953 } 4954