1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #ifdef COMPAT_FREEBSD11 48 #include <sys/abi_compat.h> 49 #endif 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/capsicum.h> 53 #include <sys/disk.h> 54 #include <sys/sysent.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mutex.h> 58 #include <sys/sysproto.h> 59 #include <sys/namei.h> 60 #include <sys/filedesc.h> 61 #include <sys/kernel.h> 62 #include <sys/fcntl.h> 63 #include <sys/file.h> 64 #include <sys/filio.h> 65 #include <sys/limits.h> 66 #include <sys/linker.h> 67 #include <sys/rwlock.h> 68 #include <sys/sdt.h> 69 #include <sys/stat.h> 70 #include <sys/sx.h> 71 #include <sys/unistd.h> 72 #include <sys/vnode.h> 73 #include <sys/priv.h> 74 #include <sys/proc.h> 75 #include <sys/dirent.h> 76 #include <sys/jail.h> 77 #include <sys/syscallsubr.h> 78 #include <sys/sysctl.h> 79 #ifdef KTRACE 80 #include <sys/ktrace.h> 81 #endif 82 83 #include <machine/stdarg.h> 84 85 #include <security/audit/audit.h> 86 #include <security/mac/mac_framework.h> 87 88 #include <vm/vm.h> 89 #include <vm/vm_object.h> 90 #include <vm/vm_page.h> 91 #include <vm/uma.h> 92 93 #include <fs/devfs/devfs.h> 94 95 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 108 enum uio_seg pathseg, fhandle_t *fhp); 109 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 110 size_t count, struct thread *td); 111 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 112 const char *path, enum uio_seg segflag); 113 114 static uint64_t 115 at2cnpflags(u_int at_flags, u_int mask) 116 { 117 uint64_t res; 118 119 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 120 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 121 122 res = 0; 123 at_flags &= mask; 124 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 125 res |= RBENEATH; 126 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 127 res |= FOLLOW; 128 /* NOFOLLOW is pseudo flag */ 129 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 130 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 131 FOLLOW; 132 } 133 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 134 res |= EMPTYPATH; 135 return (res); 136 } 137 138 int 139 kern_sync(struct thread *td) 140 { 141 struct mount *mp, *nmp; 142 int save; 143 144 mtx_lock(&mountlist_mtx); 145 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 146 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 continue; 149 } 150 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 151 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 152 save = curthread_pflags_set(TDP_SYNCIO); 153 vfs_periodic(mp, MNT_NOWAIT); 154 VFS_SYNC(mp, MNT_NOWAIT); 155 curthread_pflags_restore(save); 156 vn_finished_write(mp); 157 } 158 mtx_lock(&mountlist_mtx); 159 nmp = TAILQ_NEXT(mp, mnt_list); 160 vfs_unbusy(mp); 161 } 162 mtx_unlock(&mountlist_mtx); 163 return (0); 164 } 165 166 /* 167 * Sync each mounted filesystem. 168 */ 169 #ifndef _SYS_SYSPROTO_H_ 170 struct sync_args { 171 int dummy; 172 }; 173 #endif 174 /* ARGSUSED */ 175 int 176 sys_sync(struct thread *td, struct sync_args *uap) 177 { 178 179 return (kern_sync(td)); 180 } 181 182 /* 183 * Change filesystem quotas. 184 */ 185 #ifndef _SYS_SYSPROTO_H_ 186 struct quotactl_args { 187 char *path; 188 int cmd; 189 int uid; 190 caddr_t arg; 191 }; 192 #endif 193 int 194 sys_quotactl(struct thread *td, struct quotactl_args *uap) 195 { 196 struct mount *mp; 197 struct nameidata nd; 198 int error; 199 bool mp_busy; 200 201 AUDIT_ARG_CMD(uap->cmd); 202 AUDIT_ARG_UID(uap->uid); 203 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 204 return (EPERM); 205 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 206 uap->path); 207 if ((error = namei(&nd)) != 0) 208 return (error); 209 NDFREE_PNBUF(&nd); 210 mp = nd.ni_vp->v_mount; 211 vfs_ref(mp); 212 vput(nd.ni_vp); 213 error = vfs_busy(mp, 0); 214 if (error != 0) { 215 vfs_rel(mp); 216 return (error); 217 } 218 mp_busy = true; 219 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 220 221 /* 222 * Since quota on/off operations typically need to open quota 223 * files, the implementation may need to unbusy the mount point 224 * before calling into namei. Otherwise, unmount might be 225 * started between two vfs_busy() invocations (first is ours, 226 * second is from mount point cross-walk code in lookup()), 227 * causing deadlock. 228 * 229 * Avoid unbusying mp if the implementation indicates it has 230 * already done so. 231 */ 232 if (mp_busy) 233 vfs_unbusy(mp); 234 vfs_rel(mp); 235 return (error); 236 } 237 238 /* 239 * Used by statfs conversion routines to scale the block size up if 240 * necessary so that all of the block counts are <= 'max_size'. Note 241 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 242 * value of 'n'. 243 */ 244 void 245 statfs_scale_blocks(struct statfs *sf, long max_size) 246 { 247 uint64_t count; 248 int shift; 249 250 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 251 252 /* 253 * Attempt to scale the block counts to give a more accurate 254 * overview to userland of the ratio of free space to used 255 * space. To do this, find the largest block count and compute 256 * a divisor that lets it fit into a signed integer <= max_size. 257 */ 258 if (sf->f_bavail < 0) 259 count = -sf->f_bavail; 260 else 261 count = sf->f_bavail; 262 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 263 if (count <= max_size) 264 return; 265 266 count >>= flsl(max_size); 267 shift = 0; 268 while (count > 0) { 269 shift++; 270 count >>=1; 271 } 272 273 sf->f_bsize <<= shift; 274 sf->f_blocks >>= shift; 275 sf->f_bfree >>= shift; 276 sf->f_bavail >>= shift; 277 } 278 279 static int 280 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 281 { 282 int error; 283 284 if (mp == NULL) 285 return (EBADF); 286 error = vfs_busy(mp, 0); 287 vfs_rel(mp); 288 if (error != 0) 289 return (error); 290 #ifdef MAC 291 error = mac_mount_check_stat(td->td_ucred, mp); 292 if (error != 0) 293 goto out; 294 #endif 295 error = VFS_STATFS(mp, buf); 296 if (error != 0) 297 goto out; 298 if (priv_check_cred_vfs_generation(td->td_ucred)) { 299 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 300 prison_enforce_statfs(td->td_ucred, mp, buf); 301 } 302 out: 303 vfs_unbusy(mp); 304 return (error); 305 } 306 307 /* 308 * Get filesystem statistics. 309 */ 310 #ifndef _SYS_SYSPROTO_H_ 311 struct statfs_args { 312 char *path; 313 struct statfs *buf; 314 }; 315 #endif 316 int 317 sys_statfs(struct thread *td, struct statfs_args *uap) 318 { 319 struct statfs *sfp; 320 int error; 321 322 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 323 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 324 if (error == 0) 325 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 326 free(sfp, M_STATFS); 327 return (error); 328 } 329 330 int 331 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 332 struct statfs *buf) 333 { 334 struct mount *mp; 335 struct nameidata nd; 336 int error; 337 338 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 339 error = namei(&nd); 340 if (error != 0) 341 return (error); 342 mp = vfs_ref_from_vp(nd.ni_vp); 343 NDFREE_NOTHING(&nd); 344 vrele(nd.ni_vp); 345 return (kern_do_statfs(td, mp, buf)); 346 } 347 348 /* 349 * Get filesystem statistics. 350 */ 351 #ifndef _SYS_SYSPROTO_H_ 352 struct fstatfs_args { 353 int fd; 354 struct statfs *buf; 355 }; 356 #endif 357 int 358 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 359 { 360 struct statfs *sfp; 361 int error; 362 363 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 364 error = kern_fstatfs(td, uap->fd, sfp); 365 if (error == 0) 366 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 367 free(sfp, M_STATFS); 368 return (error); 369 } 370 371 int 372 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 373 { 374 struct file *fp; 375 struct mount *mp; 376 struct vnode *vp; 377 int error; 378 379 AUDIT_ARG_FD(fd); 380 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 381 if (error != 0) 382 return (error); 383 vp = fp->f_vnode; 384 #ifdef AUDIT 385 if (AUDITING_TD(td)) { 386 vn_lock(vp, LK_SHARED | LK_RETRY); 387 AUDIT_ARG_VNODE1(vp); 388 VOP_UNLOCK(vp); 389 } 390 #endif 391 mp = vfs_ref_from_vp(vp); 392 fdrop(fp, td); 393 return (kern_do_statfs(td, mp, buf)); 394 } 395 396 /* 397 * Get statistics on all filesystems. 398 */ 399 #ifndef _SYS_SYSPROTO_H_ 400 struct getfsstat_args { 401 struct statfs *buf; 402 long bufsize; 403 int mode; 404 }; 405 #endif 406 int 407 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 408 { 409 size_t count; 410 int error; 411 412 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 413 return (EINVAL); 414 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 415 UIO_USERSPACE, uap->mode); 416 if (error == 0) 417 td->td_retval[0] = count; 418 return (error); 419 } 420 421 /* 422 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 423 * The caller is responsible for freeing memory which will be allocated 424 * in '*buf'. 425 */ 426 int 427 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 428 size_t *countp, enum uio_seg bufseg, int mode) 429 { 430 struct mount *mp, *nmp; 431 struct statfs *sfsp, *sp, *sptmp, *tofree; 432 size_t count, maxcount; 433 int error; 434 435 switch (mode) { 436 case MNT_WAIT: 437 case MNT_NOWAIT: 438 break; 439 default: 440 if (bufseg == UIO_SYSSPACE) 441 *buf = NULL; 442 return (EINVAL); 443 } 444 restart: 445 maxcount = bufsize / sizeof(struct statfs); 446 if (bufsize == 0) { 447 sfsp = NULL; 448 tofree = NULL; 449 } else if (bufseg == UIO_USERSPACE) { 450 sfsp = *buf; 451 tofree = NULL; 452 } else /* if (bufseg == UIO_SYSSPACE) */ { 453 count = 0; 454 mtx_lock(&mountlist_mtx); 455 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 456 count++; 457 } 458 mtx_unlock(&mountlist_mtx); 459 if (maxcount > count) 460 maxcount = count; 461 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 462 M_STATFS, M_WAITOK); 463 } 464 465 count = 0; 466 467 /* 468 * If there is no target buffer they only want the count. 469 * 470 * This could be TAILQ_FOREACH but it is open-coded to match the original 471 * code below. 472 */ 473 if (sfsp == NULL) { 474 mtx_lock(&mountlist_mtx); 475 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 476 if (prison_canseemount(td->td_ucred, mp) != 0) { 477 nmp = TAILQ_NEXT(mp, mnt_list); 478 continue; 479 } 480 #ifdef MAC 481 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #endif 486 count++; 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 } 489 mtx_unlock(&mountlist_mtx); 490 *countp = count; 491 return (0); 492 } 493 494 /* 495 * They want the entire thing. 496 * 497 * Short-circuit the corner case of no room for anything, avoids 498 * relocking below. 499 */ 500 if (maxcount < 1) { 501 goto out; 502 } 503 504 mtx_lock(&mountlist_mtx); 505 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 506 if (prison_canseemount(td->td_ucred, mp) != 0) { 507 nmp = TAILQ_NEXT(mp, mnt_list); 508 continue; 509 } 510 #ifdef MAC 511 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 512 nmp = TAILQ_NEXT(mp, mnt_list); 513 continue; 514 } 515 #endif 516 if (mode == MNT_WAIT) { 517 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 518 /* 519 * If vfs_busy() failed, and MBF_NOWAIT 520 * wasn't passed, then the mp is gone. 521 * Furthermore, because of MBF_MNTLSTLOCK, 522 * the mountlist_mtx was dropped. We have 523 * no other choice than to start over. 524 */ 525 mtx_unlock(&mountlist_mtx); 526 free(tofree, M_STATFS); 527 goto restart; 528 } 529 } else { 530 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 531 nmp = TAILQ_NEXT(mp, mnt_list); 532 continue; 533 } 534 } 535 sp = &mp->mnt_stat; 536 /* 537 * If MNT_NOWAIT is specified, do not refresh 538 * the fsstat cache. 539 */ 540 if (mode != MNT_NOWAIT) { 541 error = VFS_STATFS(mp, sp); 542 if (error != 0) { 543 mtx_lock(&mountlist_mtx); 544 nmp = TAILQ_NEXT(mp, mnt_list); 545 vfs_unbusy(mp); 546 continue; 547 } 548 } 549 if (priv_check_cred_vfs_generation(td->td_ucred)) { 550 sptmp = malloc(sizeof(struct statfs), M_STATFS, 551 M_WAITOK); 552 *sptmp = *sp; 553 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 554 prison_enforce_statfs(td->td_ucred, mp, sptmp); 555 sp = sptmp; 556 } else 557 sptmp = NULL; 558 if (bufseg == UIO_SYSSPACE) { 559 bcopy(sp, sfsp, sizeof(*sp)); 560 free(sptmp, M_STATFS); 561 } else /* if (bufseg == UIO_USERSPACE) */ { 562 error = copyout(sp, sfsp, sizeof(*sp)); 563 free(sptmp, M_STATFS); 564 if (error != 0) { 565 vfs_unbusy(mp); 566 return (error); 567 } 568 } 569 sfsp++; 570 count++; 571 572 if (count == maxcount) { 573 vfs_unbusy(mp); 574 goto out; 575 } 576 577 mtx_lock(&mountlist_mtx); 578 nmp = TAILQ_NEXT(mp, mnt_list); 579 vfs_unbusy(mp); 580 } 581 mtx_unlock(&mountlist_mtx); 582 out: 583 *countp = count; 584 return (0); 585 } 586 587 #ifdef COMPAT_FREEBSD4 588 /* 589 * Get old format filesystem statistics. 590 */ 591 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 592 593 #ifndef _SYS_SYSPROTO_H_ 594 struct freebsd4_statfs_args { 595 char *path; 596 struct ostatfs *buf; 597 }; 598 #endif 599 int 600 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 601 { 602 struct ostatfs osb; 603 struct statfs *sfp; 604 int error; 605 606 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 607 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 608 if (error == 0) { 609 freebsd4_cvtstatfs(sfp, &osb); 610 error = copyout(&osb, uap->buf, sizeof(osb)); 611 } 612 free(sfp, M_STATFS); 613 return (error); 614 } 615 616 /* 617 * Get filesystem statistics. 618 */ 619 #ifndef _SYS_SYSPROTO_H_ 620 struct freebsd4_fstatfs_args { 621 int fd; 622 struct ostatfs *buf; 623 }; 624 #endif 625 int 626 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 627 { 628 struct ostatfs osb; 629 struct statfs *sfp; 630 int error; 631 632 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 633 error = kern_fstatfs(td, uap->fd, sfp); 634 if (error == 0) { 635 freebsd4_cvtstatfs(sfp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 } 638 free(sfp, M_STATFS); 639 return (error); 640 } 641 642 /* 643 * Get statistics on all filesystems. 644 */ 645 #ifndef _SYS_SYSPROTO_H_ 646 struct freebsd4_getfsstat_args { 647 struct ostatfs *buf; 648 long bufsize; 649 int mode; 650 }; 651 #endif 652 int 653 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 654 { 655 struct statfs *buf, *sp; 656 struct ostatfs osb; 657 size_t count, size; 658 int error; 659 660 if (uap->bufsize < 0) 661 return (EINVAL); 662 count = uap->bufsize / sizeof(struct ostatfs); 663 if (count > SIZE_MAX / sizeof(struct statfs)) 664 return (EINVAL); 665 size = count * sizeof(struct statfs); 666 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 667 uap->mode); 668 if (error == 0) 669 td->td_retval[0] = count; 670 if (size != 0) { 671 sp = buf; 672 while (count != 0 && error == 0) { 673 freebsd4_cvtstatfs(sp, &osb); 674 error = copyout(&osb, uap->buf, sizeof(osb)); 675 sp++; 676 uap->buf++; 677 count--; 678 } 679 free(buf, M_STATFS); 680 } 681 return (error); 682 } 683 684 /* 685 * Implement fstatfs() for (NFS) file handles. 686 */ 687 #ifndef _SYS_SYSPROTO_H_ 688 struct freebsd4_fhstatfs_args { 689 struct fhandle *u_fhp; 690 struct ostatfs *buf; 691 }; 692 #endif 693 int 694 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 695 { 696 struct ostatfs osb; 697 struct statfs *sfp; 698 fhandle_t fh; 699 int error; 700 701 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 702 if (error != 0) 703 return (error); 704 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 705 error = kern_fhstatfs(td, fh, sfp); 706 if (error == 0) { 707 freebsd4_cvtstatfs(sfp, &osb); 708 error = copyout(&osb, uap->buf, sizeof(osb)); 709 } 710 free(sfp, M_STATFS); 711 return (error); 712 } 713 714 /* 715 * Convert a new format statfs structure to an old format statfs structure. 716 */ 717 static void 718 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 719 { 720 721 statfs_scale_blocks(nsp, LONG_MAX); 722 bzero(osp, sizeof(*osp)); 723 osp->f_bsize = nsp->f_bsize; 724 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 725 osp->f_blocks = nsp->f_blocks; 726 osp->f_bfree = nsp->f_bfree; 727 osp->f_bavail = nsp->f_bavail; 728 osp->f_files = MIN(nsp->f_files, LONG_MAX); 729 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 730 osp->f_owner = nsp->f_owner; 731 osp->f_type = nsp->f_type; 732 osp->f_flags = nsp->f_flags; 733 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 734 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 735 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 736 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 737 strlcpy(osp->f_fstypename, nsp->f_fstypename, 738 MIN(MFSNAMELEN, OMFSNAMELEN)); 739 strlcpy(osp->f_mntonname, nsp->f_mntonname, 740 MIN(MNAMELEN, OMNAMELEN)); 741 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 742 MIN(MNAMELEN, OMNAMELEN)); 743 osp->f_fsid = nsp->f_fsid; 744 } 745 #endif /* COMPAT_FREEBSD4 */ 746 747 #if defined(COMPAT_FREEBSD11) 748 /* 749 * Get old format filesystem statistics. 750 */ 751 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 752 753 int 754 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 755 { 756 struct freebsd11_statfs osb; 757 struct statfs *sfp; 758 int error; 759 760 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 761 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 762 if (error == 0) { 763 freebsd11_cvtstatfs(sfp, &osb); 764 error = copyout(&osb, uap->buf, sizeof(osb)); 765 } 766 free(sfp, M_STATFS); 767 return (error); 768 } 769 770 /* 771 * Get filesystem statistics. 772 */ 773 int 774 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 775 { 776 struct freebsd11_statfs osb; 777 struct statfs *sfp; 778 int error; 779 780 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 781 error = kern_fstatfs(td, uap->fd, sfp); 782 if (error == 0) { 783 freebsd11_cvtstatfs(sfp, &osb); 784 error = copyout(&osb, uap->buf, sizeof(osb)); 785 } 786 free(sfp, M_STATFS); 787 return (error); 788 } 789 790 /* 791 * Get statistics on all filesystems. 792 */ 793 int 794 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 795 { 796 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 797 } 798 799 int 800 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 801 long bufsize, int mode) 802 { 803 struct freebsd11_statfs osb; 804 struct statfs *buf, *sp; 805 size_t count, size; 806 int error; 807 808 if (bufsize < 0) 809 return (EINVAL); 810 811 count = bufsize / sizeof(struct ostatfs); 812 size = count * sizeof(struct statfs); 813 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 814 if (error == 0) 815 td->td_retval[0] = count; 816 if (size > 0) { 817 sp = buf; 818 while (count > 0 && error == 0) { 819 freebsd11_cvtstatfs(sp, &osb); 820 error = copyout(&osb, ubuf, sizeof(osb)); 821 sp++; 822 ubuf++; 823 count--; 824 } 825 free(buf, M_STATFS); 826 } 827 return (error); 828 } 829 830 /* 831 * Implement fstatfs() for (NFS) file handles. 832 */ 833 int 834 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 835 { 836 struct freebsd11_statfs osb; 837 struct statfs *sfp; 838 fhandle_t fh; 839 int error; 840 841 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 842 if (error) 843 return (error); 844 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 845 error = kern_fhstatfs(td, fh, sfp); 846 if (error == 0) { 847 freebsd11_cvtstatfs(sfp, &osb); 848 error = copyout(&osb, uap->buf, sizeof(osb)); 849 } 850 free(sfp, M_STATFS); 851 return (error); 852 } 853 854 /* 855 * Convert a new format statfs structure to an old format statfs structure. 856 */ 857 static void 858 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 859 { 860 861 bzero(osp, sizeof(*osp)); 862 osp->f_version = FREEBSD11_STATFS_VERSION; 863 osp->f_type = nsp->f_type; 864 osp->f_flags = nsp->f_flags; 865 osp->f_bsize = nsp->f_bsize; 866 osp->f_iosize = nsp->f_iosize; 867 osp->f_blocks = nsp->f_blocks; 868 osp->f_bfree = nsp->f_bfree; 869 osp->f_bavail = nsp->f_bavail; 870 osp->f_files = nsp->f_files; 871 osp->f_ffree = nsp->f_ffree; 872 osp->f_syncwrites = nsp->f_syncwrites; 873 osp->f_asyncwrites = nsp->f_asyncwrites; 874 osp->f_syncreads = nsp->f_syncreads; 875 osp->f_asyncreads = nsp->f_asyncreads; 876 osp->f_namemax = nsp->f_namemax; 877 osp->f_owner = nsp->f_owner; 878 osp->f_fsid = nsp->f_fsid; 879 strlcpy(osp->f_fstypename, nsp->f_fstypename, 880 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 881 strlcpy(osp->f_mntonname, nsp->f_mntonname, 882 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 883 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 884 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 885 } 886 #endif /* COMPAT_FREEBSD11 */ 887 888 /* 889 * Change current working directory to a given file descriptor. 890 */ 891 #ifndef _SYS_SYSPROTO_H_ 892 struct fchdir_args { 893 int fd; 894 }; 895 #endif 896 int 897 sys_fchdir(struct thread *td, struct fchdir_args *uap) 898 { 899 struct vnode *vp, *tdp; 900 struct mount *mp; 901 struct file *fp; 902 int error; 903 904 AUDIT_ARG_FD(uap->fd); 905 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 906 &fp); 907 if (error != 0) 908 return (error); 909 vp = fp->f_vnode; 910 vref(vp); 911 fdrop(fp, td); 912 vn_lock(vp, LK_SHARED | LK_RETRY); 913 AUDIT_ARG_VNODE1(vp); 914 error = change_dir(vp, td); 915 while (!error && (mp = vp->v_mountedhere) != NULL) { 916 if (vfs_busy(mp, 0)) 917 continue; 918 error = VFS_ROOT(mp, LK_SHARED, &tdp); 919 vfs_unbusy(mp); 920 if (error != 0) 921 break; 922 vput(vp); 923 vp = tdp; 924 } 925 if (error != 0) { 926 vput(vp); 927 return (error); 928 } 929 VOP_UNLOCK(vp); 930 pwd_chdir(td, vp); 931 return (0); 932 } 933 934 /* 935 * Change current working directory (``.''). 936 */ 937 #ifndef _SYS_SYSPROTO_H_ 938 struct chdir_args { 939 char *path; 940 }; 941 #endif 942 int 943 sys_chdir(struct thread *td, struct chdir_args *uap) 944 { 945 946 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 947 } 948 949 int 950 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 951 { 952 struct nameidata nd; 953 int error; 954 955 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 956 pathseg, path); 957 if ((error = namei(&nd)) != 0) 958 return (error); 959 if ((error = change_dir(nd.ni_vp, td)) != 0) { 960 vput(nd.ni_vp); 961 NDFREE_NOTHING(&nd); 962 return (error); 963 } 964 VOP_UNLOCK(nd.ni_vp); 965 NDFREE_NOTHING(&nd); 966 pwd_chdir(td, nd.ni_vp); 967 return (0); 968 } 969 970 static int unprivileged_chroot = 0; 971 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 972 &unprivileged_chroot, 0, 973 "Unprivileged processes can use chroot(2)"); 974 /* 975 * Change notion of root (``/'') directory. 976 */ 977 #ifndef _SYS_SYSPROTO_H_ 978 struct chroot_args { 979 char *path; 980 }; 981 #endif 982 int 983 sys_chroot(struct thread *td, struct chroot_args *uap) 984 { 985 struct nameidata nd; 986 struct proc *p; 987 int error; 988 989 error = priv_check(td, PRIV_VFS_CHROOT); 990 if (error != 0) { 991 p = td->td_proc; 992 PROC_LOCK(p); 993 if (unprivileged_chroot == 0 || 994 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 995 PROC_UNLOCK(p); 996 return (error); 997 } 998 PROC_UNLOCK(p); 999 } 1000 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1001 UIO_USERSPACE, uap->path); 1002 error = namei(&nd); 1003 if (error != 0) 1004 goto error; 1005 error = change_dir(nd.ni_vp, td); 1006 if (error != 0) 1007 goto e_vunlock; 1008 #ifdef MAC 1009 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 1010 if (error != 0) 1011 goto e_vunlock; 1012 #endif 1013 VOP_UNLOCK(nd.ni_vp); 1014 error = pwd_chroot(td, nd.ni_vp); 1015 vrele(nd.ni_vp); 1016 NDFREE_NOTHING(&nd); 1017 return (error); 1018 e_vunlock: 1019 vput(nd.ni_vp); 1020 error: 1021 NDFREE_NOTHING(&nd); 1022 return (error); 1023 } 1024 1025 /* 1026 * Common routine for chroot and chdir. Callers must provide a locked vnode 1027 * instance. 1028 */ 1029 int 1030 change_dir(struct vnode *vp, struct thread *td) 1031 { 1032 #ifdef MAC 1033 int error; 1034 #endif 1035 1036 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1037 if (vp->v_type != VDIR) 1038 return (ENOTDIR); 1039 #ifdef MAC 1040 error = mac_vnode_check_chdir(td->td_ucred, vp); 1041 if (error != 0) 1042 return (error); 1043 #endif 1044 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1045 } 1046 1047 static __inline void 1048 flags_to_rights(int flags, cap_rights_t *rightsp) 1049 { 1050 if (flags & O_EXEC) { 1051 cap_rights_set_one(rightsp, CAP_FEXECVE); 1052 if (flags & O_PATH) 1053 return; 1054 } else { 1055 switch ((flags & O_ACCMODE)) { 1056 case O_RDONLY: 1057 cap_rights_set_one(rightsp, CAP_READ); 1058 break; 1059 case O_RDWR: 1060 cap_rights_set_one(rightsp, CAP_READ); 1061 /* FALLTHROUGH */ 1062 case O_WRONLY: 1063 cap_rights_set_one(rightsp, CAP_WRITE); 1064 if (!(flags & (O_APPEND | O_TRUNC))) 1065 cap_rights_set_one(rightsp, CAP_SEEK); 1066 break; 1067 } 1068 } 1069 1070 if (flags & O_CREAT) 1071 cap_rights_set_one(rightsp, CAP_CREATE); 1072 1073 if (flags & O_TRUNC) 1074 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1075 1076 if (flags & (O_SYNC | O_FSYNC)) 1077 cap_rights_set_one(rightsp, CAP_FSYNC); 1078 1079 if (flags & (O_EXLOCK | O_SHLOCK)) 1080 cap_rights_set_one(rightsp, CAP_FLOCK); 1081 } 1082 1083 /* 1084 * Check permissions, allocate an open file structure, and call the device 1085 * open routine if any. 1086 */ 1087 #ifndef _SYS_SYSPROTO_H_ 1088 struct open_args { 1089 char *path; 1090 int flags; 1091 int mode; 1092 }; 1093 #endif 1094 int 1095 sys_open(struct thread *td, struct open_args *uap) 1096 { 1097 1098 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1099 uap->flags, uap->mode)); 1100 } 1101 1102 #ifndef _SYS_SYSPROTO_H_ 1103 struct openat_args { 1104 int fd; 1105 char *path; 1106 int flag; 1107 int mode; 1108 }; 1109 #endif 1110 int 1111 sys_openat(struct thread *td, struct openat_args *uap) 1112 { 1113 1114 AUDIT_ARG_FD(uap->fd); 1115 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1116 uap->mode)); 1117 } 1118 1119 int 1120 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1121 int flags, int mode) 1122 { 1123 struct proc *p = td->td_proc; 1124 struct filedesc *fdp; 1125 struct pwddesc *pdp; 1126 struct file *fp; 1127 struct vnode *vp; 1128 struct nameidata nd; 1129 cap_rights_t rights; 1130 int cmode, error, indx; 1131 1132 indx = -1; 1133 fdp = p->p_fd; 1134 pdp = p->p_pd; 1135 1136 AUDIT_ARG_FFLAGS(flags); 1137 AUDIT_ARG_MODE(mode); 1138 cap_rights_init_one(&rights, CAP_LOOKUP); 1139 flags_to_rights(flags, &rights); 1140 1141 /* 1142 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1143 * may be specified. On the other hand, for O_PATH any mode 1144 * except O_EXEC is ignored. 1145 */ 1146 if ((flags & O_PATH) != 0) { 1147 flags &= ~(O_CREAT | O_ACCMODE); 1148 } else if ((flags & O_EXEC) != 0) { 1149 if (flags & O_ACCMODE) 1150 return (EINVAL); 1151 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1152 return (EINVAL); 1153 } else { 1154 flags = FFLAGS(flags); 1155 } 1156 1157 /* 1158 * Allocate a file structure. The descriptor to reference it 1159 * is allocated and used by finstall_refed() below. 1160 */ 1161 error = falloc_noinstall(td, &fp); 1162 if (error != 0) 1163 return (error); 1164 /* Set the flags early so the finit in devfs can pick them up. */ 1165 fp->f_flag = flags & FMASK; 1166 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1167 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1 | WANTIOCTLCAPS, 1168 pathseg, path, fd, &rights); 1169 td->td_dupfd = -1; /* XXX check for fdopen */ 1170 error = vn_open_cred(&nd, &flags, cmode, VN_OPEN_WANTIOCTLCAPS, 1171 td->td_ucred, fp); 1172 if (error != 0) { 1173 /* 1174 * If the vn_open replaced the method vector, something 1175 * wonderous happened deep below and we just pass it up 1176 * pretending we know what we do. 1177 */ 1178 if (error == ENXIO && fp->f_ops != &badfileops) { 1179 MPASS((flags & O_PATH) == 0); 1180 goto success; 1181 } 1182 1183 /* 1184 * Handle special fdopen() case. bleh. 1185 * 1186 * Don't do this for relative (capability) lookups; we don't 1187 * understand exactly what would happen, and we don't think 1188 * that it ever should. 1189 */ 1190 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1191 (error == ENODEV || error == ENXIO) && 1192 td->td_dupfd >= 0) { 1193 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1194 &indx); 1195 if (error == 0) 1196 goto success; 1197 } 1198 1199 goto bad; 1200 } 1201 td->td_dupfd = 0; 1202 NDFREE_PNBUF(&nd); 1203 vp = nd.ni_vp; 1204 1205 /* 1206 * Store the vnode, for any f_type. Typically, the vnode use 1207 * count is decremented by direct call to vn_closefile() for 1208 * files that switched type in the cdevsw fdopen() method. 1209 */ 1210 fp->f_vnode = vp; 1211 1212 /* 1213 * If the file wasn't claimed by devfs bind it to the normal 1214 * vnode operations here. 1215 */ 1216 if (fp->f_ops == &badfileops) { 1217 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1218 ("Unexpected fifo fp %p vp %p", fp, vp)); 1219 if ((flags & O_PATH) != 0) { 1220 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1221 DTYPE_VNODE, NULL, &path_fileops); 1222 vhold(vp); 1223 vunref(vp); 1224 } else { 1225 finit_vnode(fp, flags, NULL, &vnops); 1226 } 1227 } 1228 1229 VOP_UNLOCK(vp); 1230 if (flags & O_TRUNC) { 1231 error = fo_truncate(fp, 0, td->td_ucred, td); 1232 if (error != 0) 1233 goto bad; 1234 } 1235 success: 1236 /* 1237 * If we haven't already installed the FD (for dupfdopen), do so now. 1238 */ 1239 if (indx == -1) { 1240 struct filecaps *fcaps; 1241 1242 #ifdef CAPABILITIES 1243 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1244 fcaps = &nd.ni_filecaps; 1245 else 1246 #endif 1247 fcaps = NULL; 1248 error = finstall_refed(td, fp, &indx, flags, fcaps); 1249 /* On success finstall_refed() consumes fcaps. */ 1250 if (error != 0) { 1251 goto bad; 1252 } 1253 } else { 1254 NDFREE_IOCTLCAPS(&nd); 1255 falloc_abort(td, fp); 1256 } 1257 1258 td->td_retval[0] = indx; 1259 return (0); 1260 bad: 1261 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1262 NDFREE_IOCTLCAPS(&nd); 1263 falloc_abort(td, fp); 1264 return (error); 1265 } 1266 1267 #ifdef COMPAT_43 1268 /* 1269 * Create a file. 1270 */ 1271 #ifndef _SYS_SYSPROTO_H_ 1272 struct ocreat_args { 1273 char *path; 1274 int mode; 1275 }; 1276 #endif 1277 int 1278 ocreat(struct thread *td, struct ocreat_args *uap) 1279 { 1280 1281 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1282 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1283 } 1284 #endif /* COMPAT_43 */ 1285 1286 /* 1287 * Create a special file. 1288 */ 1289 #ifndef _SYS_SYSPROTO_H_ 1290 struct mknodat_args { 1291 int fd; 1292 char *path; 1293 mode_t mode; 1294 dev_t dev; 1295 }; 1296 #endif 1297 int 1298 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1299 { 1300 1301 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1302 uap->dev)); 1303 } 1304 1305 #if defined(COMPAT_FREEBSD11) 1306 int 1307 freebsd11_mknod(struct thread *td, 1308 struct freebsd11_mknod_args *uap) 1309 { 1310 1311 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1312 uap->mode, uap->dev)); 1313 } 1314 1315 int 1316 freebsd11_mknodat(struct thread *td, 1317 struct freebsd11_mknodat_args *uap) 1318 { 1319 1320 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1321 uap->dev)); 1322 } 1323 #endif /* COMPAT_FREEBSD11 */ 1324 1325 int 1326 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1327 int mode, dev_t dev) 1328 { 1329 struct vnode *vp; 1330 struct mount *mp; 1331 struct vattr vattr; 1332 struct nameidata nd; 1333 int error, whiteout = 0; 1334 1335 AUDIT_ARG_MODE(mode); 1336 AUDIT_ARG_DEV(dev); 1337 switch (mode & S_IFMT) { 1338 case S_IFCHR: 1339 case S_IFBLK: 1340 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1341 if (error == 0 && dev == VNOVAL) 1342 error = EINVAL; 1343 break; 1344 case S_IFWHT: 1345 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1346 break; 1347 case S_IFIFO: 1348 if (dev == 0) 1349 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1350 /* FALLTHROUGH */ 1351 default: 1352 error = EINVAL; 1353 break; 1354 } 1355 if (error != 0) 1356 return (error); 1357 NDPREINIT(&nd); 1358 restart: 1359 bwillwrite(); 1360 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1361 NOCACHE, pathseg, path, fd, &cap_mknodat_rights); 1362 if ((error = namei(&nd)) != 0) 1363 return (error); 1364 vp = nd.ni_vp; 1365 if (vp != NULL) { 1366 NDFREE_PNBUF(&nd); 1367 if (vp == nd.ni_dvp) 1368 vrele(nd.ni_dvp); 1369 else 1370 vput(nd.ni_dvp); 1371 vrele(vp); 1372 return (EEXIST); 1373 } else { 1374 VATTR_NULL(&vattr); 1375 vattr.va_mode = (mode & ALLPERMS) & 1376 ~td->td_proc->p_pd->pd_cmask; 1377 vattr.va_rdev = dev; 1378 whiteout = 0; 1379 1380 switch (mode & S_IFMT) { 1381 case S_IFCHR: 1382 vattr.va_type = VCHR; 1383 break; 1384 case S_IFBLK: 1385 vattr.va_type = VBLK; 1386 break; 1387 case S_IFWHT: 1388 whiteout = 1; 1389 break; 1390 default: 1391 panic("kern_mknod: invalid mode"); 1392 } 1393 } 1394 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1395 NDFREE_PNBUF(&nd); 1396 vput(nd.ni_dvp); 1397 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1398 return (error); 1399 goto restart; 1400 } 1401 #ifdef MAC 1402 if (error == 0 && !whiteout) 1403 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1404 &nd.ni_cnd, &vattr); 1405 #endif 1406 if (error == 0) { 1407 if (whiteout) 1408 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1409 else { 1410 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1411 &nd.ni_cnd, &vattr); 1412 } 1413 } 1414 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1415 true); 1416 vn_finished_write(mp); 1417 NDFREE_PNBUF(&nd); 1418 if (error == ERELOOKUP) 1419 goto restart; 1420 return (error); 1421 } 1422 1423 /* 1424 * Create a named pipe. 1425 */ 1426 #ifndef _SYS_SYSPROTO_H_ 1427 struct mkfifo_args { 1428 char *path; 1429 int mode; 1430 }; 1431 #endif 1432 int 1433 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1434 { 1435 1436 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1437 uap->mode)); 1438 } 1439 1440 #ifndef _SYS_SYSPROTO_H_ 1441 struct mkfifoat_args { 1442 int fd; 1443 char *path; 1444 mode_t mode; 1445 }; 1446 #endif 1447 int 1448 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1449 { 1450 1451 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1452 uap->mode)); 1453 } 1454 1455 int 1456 kern_mkfifoat(struct thread *td, int fd, const char *path, 1457 enum uio_seg pathseg, int mode) 1458 { 1459 struct mount *mp; 1460 struct vattr vattr; 1461 struct nameidata nd; 1462 int error; 1463 1464 AUDIT_ARG_MODE(mode); 1465 NDPREINIT(&nd); 1466 restart: 1467 bwillwrite(); 1468 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1469 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights); 1470 if ((error = namei(&nd)) != 0) 1471 return (error); 1472 if (nd.ni_vp != NULL) { 1473 NDFREE_PNBUF(&nd); 1474 if (nd.ni_vp == nd.ni_dvp) 1475 vrele(nd.ni_dvp); 1476 else 1477 vput(nd.ni_dvp); 1478 vrele(nd.ni_vp); 1479 return (EEXIST); 1480 } 1481 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1482 NDFREE_PNBUF(&nd); 1483 vput(nd.ni_dvp); 1484 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1485 return (error); 1486 goto restart; 1487 } 1488 VATTR_NULL(&vattr); 1489 vattr.va_type = VFIFO; 1490 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1491 #ifdef MAC 1492 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1493 &vattr); 1494 if (error != 0) 1495 goto out; 1496 #endif 1497 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1498 #ifdef MAC 1499 out: 1500 #endif 1501 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1502 vn_finished_write(mp); 1503 NDFREE_PNBUF(&nd); 1504 if (error == ERELOOKUP) 1505 goto restart; 1506 return (error); 1507 } 1508 1509 /* 1510 * Make a hard file link. 1511 */ 1512 #ifndef _SYS_SYSPROTO_H_ 1513 struct link_args { 1514 char *path; 1515 char *link; 1516 }; 1517 #endif 1518 int 1519 sys_link(struct thread *td, struct link_args *uap) 1520 { 1521 1522 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1523 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1524 } 1525 1526 #ifndef _SYS_SYSPROTO_H_ 1527 struct linkat_args { 1528 int fd1; 1529 char *path1; 1530 int fd2; 1531 char *path2; 1532 int flag; 1533 }; 1534 #endif 1535 int 1536 sys_linkat(struct thread *td, struct linkat_args *uap) 1537 { 1538 1539 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1540 UIO_USERSPACE, uap->flag)); 1541 } 1542 1543 int hardlink_check_uid = 0; 1544 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1545 &hardlink_check_uid, 0, 1546 "Unprivileged processes cannot create hard links to files owned by other " 1547 "users"); 1548 static int hardlink_check_gid = 0; 1549 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1550 &hardlink_check_gid, 0, 1551 "Unprivileged processes cannot create hard links to files owned by other " 1552 "groups"); 1553 1554 static int 1555 can_hardlink(struct vnode *vp, struct ucred *cred) 1556 { 1557 struct vattr va; 1558 int error; 1559 1560 if (!hardlink_check_uid && !hardlink_check_gid) 1561 return (0); 1562 1563 error = VOP_GETATTR(vp, &va, cred); 1564 if (error != 0) 1565 return (error); 1566 1567 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1568 error = priv_check_cred(cred, PRIV_VFS_LINK); 1569 if (error != 0) 1570 return (error); 1571 } 1572 1573 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1574 error = priv_check_cred(cred, PRIV_VFS_LINK); 1575 if (error != 0) 1576 return (error); 1577 } 1578 1579 return (0); 1580 } 1581 1582 int 1583 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1584 const char *path2, enum uio_seg segflag, int flag) 1585 { 1586 struct nameidata nd; 1587 int error; 1588 1589 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1590 AT_EMPTY_PATH)) != 0) 1591 return (EINVAL); 1592 1593 NDPREINIT(&nd); 1594 do { 1595 bwillwrite(); 1596 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1597 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1598 segflag, path1, fd1, &cap_linkat_source_rights); 1599 if ((error = namei(&nd)) != 0) 1600 return (error); 1601 NDFREE_PNBUF(&nd); 1602 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1603 error = priv_check(td, PRIV_VFS_FHOPEN); 1604 if (error != 0) { 1605 vrele(nd.ni_vp); 1606 return (error); 1607 } 1608 } 1609 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1610 } while (error == EAGAIN || error == ERELOOKUP); 1611 return (error); 1612 } 1613 1614 static int 1615 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1616 enum uio_seg segflag) 1617 { 1618 struct nameidata nd; 1619 struct mount *mp; 1620 int error; 1621 1622 if (vp->v_type == VDIR) { 1623 vrele(vp); 1624 return (EPERM); /* POSIX */ 1625 } 1626 NDINIT_ATRIGHTS(&nd, CREATE, 1627 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1628 &cap_linkat_target_rights); 1629 if ((error = namei(&nd)) == 0) { 1630 if (nd.ni_vp != NULL) { 1631 NDFREE_PNBUF(&nd); 1632 if (nd.ni_dvp == nd.ni_vp) 1633 vrele(nd.ni_dvp); 1634 else 1635 vput(nd.ni_dvp); 1636 vrele(nd.ni_vp); 1637 vrele(vp); 1638 return (EEXIST); 1639 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1640 /* 1641 * Cross-device link. No need to recheck 1642 * vp->v_type, since it cannot change, except 1643 * to VBAD. 1644 */ 1645 NDFREE_PNBUF(&nd); 1646 vput(nd.ni_dvp); 1647 vrele(vp); 1648 return (EXDEV); 1649 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1650 error = can_hardlink(vp, td->td_ucred); 1651 #ifdef MAC 1652 if (error == 0) 1653 error = mac_vnode_check_link(td->td_ucred, 1654 nd.ni_dvp, vp, &nd.ni_cnd); 1655 #endif 1656 if (error != 0) { 1657 vput(vp); 1658 vput(nd.ni_dvp); 1659 NDFREE_PNBUF(&nd); 1660 return (error); 1661 } 1662 error = vn_start_write(vp, &mp, V_NOWAIT); 1663 if (error != 0) { 1664 vput(vp); 1665 vput(nd.ni_dvp); 1666 NDFREE_PNBUF(&nd); 1667 error = vn_start_write(NULL, &mp, 1668 V_XSLEEP | PCATCH); 1669 if (error != 0) 1670 return (error); 1671 return (EAGAIN); 1672 } 1673 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1674 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1675 vn_finished_write(mp); 1676 NDFREE_PNBUF(&nd); 1677 vp = NULL; 1678 } else { 1679 vput(nd.ni_dvp); 1680 NDFREE_PNBUF(&nd); 1681 vrele(vp); 1682 return (EAGAIN); 1683 } 1684 } 1685 if (vp != NULL) 1686 vrele(vp); 1687 return (error); 1688 } 1689 1690 /* 1691 * Make a symbolic link. 1692 */ 1693 #ifndef _SYS_SYSPROTO_H_ 1694 struct symlink_args { 1695 char *path; 1696 char *link; 1697 }; 1698 #endif 1699 int 1700 sys_symlink(struct thread *td, struct symlink_args *uap) 1701 { 1702 1703 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1704 UIO_USERSPACE)); 1705 } 1706 1707 #ifndef _SYS_SYSPROTO_H_ 1708 struct symlinkat_args { 1709 char *path; 1710 int fd; 1711 char *path2; 1712 }; 1713 #endif 1714 int 1715 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1716 { 1717 1718 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1719 UIO_USERSPACE)); 1720 } 1721 1722 int 1723 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1724 enum uio_seg segflg) 1725 { 1726 struct mount *mp; 1727 struct vattr vattr; 1728 const char *syspath; 1729 char *tmppath; 1730 struct nameidata nd; 1731 int error; 1732 1733 if (segflg == UIO_SYSSPACE) { 1734 syspath = path1; 1735 } else { 1736 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1737 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1738 goto out; 1739 syspath = tmppath; 1740 } 1741 AUDIT_ARG_TEXT(syspath); 1742 NDPREINIT(&nd); 1743 restart: 1744 bwillwrite(); 1745 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1746 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights); 1747 if ((error = namei(&nd)) != 0) 1748 goto out; 1749 if (nd.ni_vp) { 1750 NDFREE_PNBUF(&nd); 1751 if (nd.ni_vp == nd.ni_dvp) 1752 vrele(nd.ni_dvp); 1753 else 1754 vput(nd.ni_dvp); 1755 vrele(nd.ni_vp); 1756 nd.ni_vp = NULL; 1757 error = EEXIST; 1758 goto out; 1759 } 1760 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1761 NDFREE_PNBUF(&nd); 1762 vput(nd.ni_dvp); 1763 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1764 goto out; 1765 goto restart; 1766 } 1767 VATTR_NULL(&vattr); 1768 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1769 #ifdef MAC 1770 vattr.va_type = VLNK; 1771 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1772 &vattr); 1773 if (error != 0) 1774 goto out2; 1775 #endif 1776 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1777 #ifdef MAC 1778 out2: 1779 #endif 1780 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1781 vn_finished_write(mp); 1782 NDFREE_PNBUF(&nd); 1783 if (error == ERELOOKUP) 1784 goto restart; 1785 out: 1786 if (segflg != UIO_SYSSPACE) 1787 uma_zfree(namei_zone, tmppath); 1788 return (error); 1789 } 1790 1791 /* 1792 * Delete a whiteout from the filesystem. 1793 */ 1794 #ifndef _SYS_SYSPROTO_H_ 1795 struct undelete_args { 1796 char *path; 1797 }; 1798 #endif 1799 int 1800 sys_undelete(struct thread *td, struct undelete_args *uap) 1801 { 1802 struct mount *mp; 1803 struct nameidata nd; 1804 int error; 1805 1806 NDPREINIT(&nd); 1807 restart: 1808 bwillwrite(); 1809 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1810 UIO_USERSPACE, uap->path); 1811 error = namei(&nd); 1812 if (error != 0) 1813 return (error); 1814 1815 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1816 NDFREE_PNBUF(&nd); 1817 if (nd.ni_vp == nd.ni_dvp) 1818 vrele(nd.ni_dvp); 1819 else 1820 vput(nd.ni_dvp); 1821 if (nd.ni_vp) 1822 vrele(nd.ni_vp); 1823 return (EEXIST); 1824 } 1825 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1826 NDFREE_PNBUF(&nd); 1827 vput(nd.ni_dvp); 1828 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1829 return (error); 1830 goto restart; 1831 } 1832 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1833 NDFREE_PNBUF(&nd); 1834 vput(nd.ni_dvp); 1835 vn_finished_write(mp); 1836 if (error == ERELOOKUP) 1837 goto restart; 1838 return (error); 1839 } 1840 1841 /* 1842 * Delete a name from the filesystem. 1843 */ 1844 #ifndef _SYS_SYSPROTO_H_ 1845 struct unlink_args { 1846 char *path; 1847 }; 1848 #endif 1849 int 1850 sys_unlink(struct thread *td, struct unlink_args *uap) 1851 { 1852 1853 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1854 0, 0)); 1855 } 1856 1857 static int 1858 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1859 int flag, enum uio_seg pathseg, ino_t oldinum) 1860 { 1861 1862 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1863 return (EINVAL); 1864 1865 if ((flag & AT_REMOVEDIR) != 0) 1866 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1867 1868 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1869 } 1870 1871 #ifndef _SYS_SYSPROTO_H_ 1872 struct unlinkat_args { 1873 int fd; 1874 char *path; 1875 int flag; 1876 }; 1877 #endif 1878 int 1879 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1880 { 1881 1882 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1883 UIO_USERSPACE, 0)); 1884 } 1885 1886 #ifndef _SYS_SYSPROTO_H_ 1887 struct funlinkat_args { 1888 int dfd; 1889 const char *path; 1890 int fd; 1891 int flag; 1892 }; 1893 #endif 1894 int 1895 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1896 { 1897 1898 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1899 UIO_USERSPACE, 0)); 1900 } 1901 1902 int 1903 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1904 enum uio_seg pathseg, int flag, ino_t oldinum) 1905 { 1906 struct mount *mp; 1907 struct file *fp; 1908 struct vnode *vp; 1909 struct nameidata nd; 1910 struct stat sb; 1911 int error; 1912 1913 fp = NULL; 1914 if (fd != FD_NONE) { 1915 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1916 if (error != 0) 1917 return (error); 1918 } 1919 1920 NDPREINIT(&nd); 1921 restart: 1922 bwillwrite(); 1923 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1924 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1925 pathseg, path, dfd, &cap_unlinkat_rights); 1926 if ((error = namei(&nd)) != 0) { 1927 if (error == EINVAL) 1928 error = EPERM; 1929 goto fdout; 1930 } 1931 vp = nd.ni_vp; 1932 if (vp->v_type == VDIR && oldinum == 0) { 1933 error = EPERM; /* POSIX */ 1934 } else if (oldinum != 0 && 1935 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 1936 sb.st_ino != oldinum) { 1937 error = EIDRM; /* Identifier removed */ 1938 } else if (fp != NULL && fp->f_vnode != vp) { 1939 if (VN_IS_DOOMED(fp->f_vnode)) 1940 error = EBADF; 1941 else 1942 error = EDEADLK; 1943 } else { 1944 /* 1945 * The root of a mounted filesystem cannot be deleted. 1946 * 1947 * XXX: can this only be a VDIR case? 1948 */ 1949 if (vp->v_vflag & VV_ROOT) 1950 error = EBUSY; 1951 } 1952 if (error == 0) { 1953 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1954 NDFREE_PNBUF(&nd); 1955 vput(nd.ni_dvp); 1956 if (vp == nd.ni_dvp) 1957 vrele(vp); 1958 else 1959 vput(vp); 1960 if ((error = vn_start_write(NULL, &mp, 1961 V_XSLEEP | PCATCH)) != 0) { 1962 goto fdout; 1963 } 1964 goto restart; 1965 } 1966 #ifdef MAC 1967 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1968 &nd.ni_cnd); 1969 if (error != 0) 1970 goto out; 1971 #endif 1972 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1973 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1974 #ifdef MAC 1975 out: 1976 #endif 1977 vn_finished_write(mp); 1978 } 1979 NDFREE_PNBUF(&nd); 1980 vput(nd.ni_dvp); 1981 if (vp == nd.ni_dvp) 1982 vrele(vp); 1983 else 1984 vput(vp); 1985 if (error == ERELOOKUP) 1986 goto restart; 1987 fdout: 1988 if (fp != NULL) 1989 fdrop(fp, td); 1990 return (error); 1991 } 1992 1993 /* 1994 * Reposition read/write file offset. 1995 */ 1996 #ifndef _SYS_SYSPROTO_H_ 1997 struct lseek_args { 1998 int fd; 1999 int pad; 2000 off_t offset; 2001 int whence; 2002 }; 2003 #endif 2004 int 2005 sys_lseek(struct thread *td, struct lseek_args *uap) 2006 { 2007 2008 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2009 } 2010 2011 int 2012 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2013 { 2014 struct file *fp; 2015 int error; 2016 2017 AUDIT_ARG_FD(fd); 2018 error = fget(td, fd, &cap_seek_rights, &fp); 2019 if (error != 0) 2020 return (error); 2021 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2022 fo_seek(fp, offset, whence, td) : ESPIPE; 2023 fdrop(fp, td); 2024 return (error); 2025 } 2026 2027 #if defined(COMPAT_43) 2028 /* 2029 * Reposition read/write file offset. 2030 */ 2031 #ifndef _SYS_SYSPROTO_H_ 2032 struct olseek_args { 2033 int fd; 2034 long offset; 2035 int whence; 2036 }; 2037 #endif 2038 int 2039 olseek(struct thread *td, struct olseek_args *uap) 2040 { 2041 2042 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2043 } 2044 #endif /* COMPAT_43 */ 2045 2046 #if defined(COMPAT_FREEBSD6) 2047 /* Version with the 'pad' argument */ 2048 int 2049 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2050 { 2051 2052 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2053 } 2054 #endif 2055 2056 /* 2057 * Check access permissions using passed credentials. 2058 */ 2059 static int 2060 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2061 struct thread *td) 2062 { 2063 accmode_t accmode; 2064 int error; 2065 2066 /* Flags == 0 means only check for existence. */ 2067 if (user_flags == 0) 2068 return (0); 2069 2070 accmode = 0; 2071 if (user_flags & R_OK) 2072 accmode |= VREAD; 2073 if (user_flags & W_OK) 2074 accmode |= VWRITE; 2075 if (user_flags & X_OK) 2076 accmode |= VEXEC; 2077 #ifdef MAC 2078 error = mac_vnode_check_access(cred, vp, accmode); 2079 if (error != 0) 2080 return (error); 2081 #endif 2082 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2083 error = VOP_ACCESS(vp, accmode, cred, td); 2084 return (error); 2085 } 2086 2087 /* 2088 * Check access permissions using "real" credentials. 2089 */ 2090 #ifndef _SYS_SYSPROTO_H_ 2091 struct access_args { 2092 char *path; 2093 int amode; 2094 }; 2095 #endif 2096 int 2097 sys_access(struct thread *td, struct access_args *uap) 2098 { 2099 2100 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2101 0, uap->amode)); 2102 } 2103 2104 #ifndef _SYS_SYSPROTO_H_ 2105 struct faccessat_args { 2106 int dirfd; 2107 char *path; 2108 int amode; 2109 int flag; 2110 } 2111 #endif 2112 int 2113 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2114 { 2115 2116 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2117 uap->amode)); 2118 } 2119 2120 int 2121 kern_accessat(struct thread *td, int fd, const char *path, 2122 enum uio_seg pathseg, int flag, int amode) 2123 { 2124 struct ucred *cred, *usecred; 2125 struct vnode *vp; 2126 struct nameidata nd; 2127 int error; 2128 2129 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0) 2130 return (EINVAL); 2131 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2132 return (EINVAL); 2133 2134 /* 2135 * Create and modify a temporary credential instead of one that 2136 * is potentially shared (if we need one). 2137 */ 2138 cred = td->td_ucred; 2139 if ((flag & AT_EACCESS) == 0 && 2140 ((cred->cr_uid != cred->cr_ruid || 2141 cred->cr_rgid != cred->cr_groups[0]))) { 2142 usecred = crdup(cred); 2143 usecred->cr_uid = cred->cr_ruid; 2144 usecred->cr_groups[0] = cred->cr_rgid; 2145 td->td_ucred = usecred; 2146 } else 2147 usecred = cred; 2148 AUDIT_ARG_VALUE(amode); 2149 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2150 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | 2151 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2152 if ((error = namei(&nd)) != 0) 2153 goto out; 2154 vp = nd.ni_vp; 2155 2156 error = vn_access(vp, amode, usecred, td); 2157 NDFREE_NOTHING(&nd); 2158 vput(vp); 2159 out: 2160 if (usecred != cred) { 2161 td->td_ucred = cred; 2162 crfree(usecred); 2163 } 2164 return (error); 2165 } 2166 2167 /* 2168 * Check access permissions using "effective" credentials. 2169 */ 2170 #ifndef _SYS_SYSPROTO_H_ 2171 struct eaccess_args { 2172 char *path; 2173 int amode; 2174 }; 2175 #endif 2176 int 2177 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2178 { 2179 2180 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2181 AT_EACCESS, uap->amode)); 2182 } 2183 2184 #if defined(COMPAT_43) 2185 /* 2186 * Get file status; this version follows links. 2187 */ 2188 #ifndef _SYS_SYSPROTO_H_ 2189 struct ostat_args { 2190 char *path; 2191 struct ostat *ub; 2192 }; 2193 #endif 2194 int 2195 ostat(struct thread *td, struct ostat_args *uap) 2196 { 2197 struct stat sb; 2198 struct ostat osb; 2199 int error; 2200 2201 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2202 &sb, NULL); 2203 if (error != 0) 2204 return (error); 2205 cvtstat(&sb, &osb); 2206 return (copyout(&osb, uap->ub, sizeof (osb))); 2207 } 2208 2209 /* 2210 * Get file status; this version does not follow links. 2211 */ 2212 #ifndef _SYS_SYSPROTO_H_ 2213 struct olstat_args { 2214 char *path; 2215 struct ostat *ub; 2216 }; 2217 #endif 2218 int 2219 olstat(struct thread *td, struct olstat_args *uap) 2220 { 2221 struct stat sb; 2222 struct ostat osb; 2223 int error; 2224 2225 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2226 UIO_USERSPACE, &sb, NULL); 2227 if (error != 0) 2228 return (error); 2229 cvtstat(&sb, &osb); 2230 return (copyout(&osb, uap->ub, sizeof (osb))); 2231 } 2232 2233 /* 2234 * Convert from an old to a new stat structure. 2235 * XXX: many values are blindly truncated. 2236 */ 2237 void 2238 cvtstat(struct stat *st, struct ostat *ost) 2239 { 2240 2241 bzero(ost, sizeof(*ost)); 2242 ost->st_dev = st->st_dev; 2243 ost->st_ino = st->st_ino; 2244 ost->st_mode = st->st_mode; 2245 ost->st_nlink = st->st_nlink; 2246 ost->st_uid = st->st_uid; 2247 ost->st_gid = st->st_gid; 2248 ost->st_rdev = st->st_rdev; 2249 ost->st_size = MIN(st->st_size, INT32_MAX); 2250 ost->st_atim = st->st_atim; 2251 ost->st_mtim = st->st_mtim; 2252 ost->st_ctim = st->st_ctim; 2253 ost->st_blksize = st->st_blksize; 2254 ost->st_blocks = st->st_blocks; 2255 ost->st_flags = st->st_flags; 2256 ost->st_gen = st->st_gen; 2257 } 2258 #endif /* COMPAT_43 */ 2259 2260 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2261 int ino64_trunc_error; 2262 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2263 &ino64_trunc_error, 0, 2264 "Error on truncation of device, file or inode number, or link count"); 2265 2266 int 2267 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2268 { 2269 2270 ost->st_dev = st->st_dev; 2271 if (ost->st_dev != st->st_dev) { 2272 switch (ino64_trunc_error) { 2273 default: 2274 /* 2275 * Since dev_t is almost raw, don't clamp to the 2276 * maximum for case 2, but ignore the error. 2277 */ 2278 break; 2279 case 1: 2280 return (EOVERFLOW); 2281 } 2282 } 2283 ost->st_ino = st->st_ino; 2284 if (ost->st_ino != st->st_ino) { 2285 switch (ino64_trunc_error) { 2286 default: 2287 case 0: 2288 break; 2289 case 1: 2290 return (EOVERFLOW); 2291 case 2: 2292 ost->st_ino = UINT32_MAX; 2293 break; 2294 } 2295 } 2296 ost->st_mode = st->st_mode; 2297 ost->st_nlink = st->st_nlink; 2298 if (ost->st_nlink != st->st_nlink) { 2299 switch (ino64_trunc_error) { 2300 default: 2301 case 0: 2302 break; 2303 case 1: 2304 return (EOVERFLOW); 2305 case 2: 2306 ost->st_nlink = UINT16_MAX; 2307 break; 2308 } 2309 } 2310 ost->st_uid = st->st_uid; 2311 ost->st_gid = st->st_gid; 2312 ost->st_rdev = st->st_rdev; 2313 if (ost->st_rdev != st->st_rdev) { 2314 switch (ino64_trunc_error) { 2315 default: 2316 break; 2317 case 1: 2318 return (EOVERFLOW); 2319 } 2320 } 2321 ost->st_atim = st->st_atim; 2322 ost->st_mtim = st->st_mtim; 2323 ost->st_ctim = st->st_ctim; 2324 ost->st_size = st->st_size; 2325 ost->st_blocks = st->st_blocks; 2326 ost->st_blksize = st->st_blksize; 2327 ost->st_flags = st->st_flags; 2328 ost->st_gen = st->st_gen; 2329 ost->st_lspare = 0; 2330 ost->st_birthtim = st->st_birthtim; 2331 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2332 sizeof(*ost) - offsetof(struct freebsd11_stat, 2333 st_birthtim) - sizeof(ost->st_birthtim)); 2334 return (0); 2335 } 2336 2337 int 2338 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2339 { 2340 struct stat sb; 2341 struct freebsd11_stat osb; 2342 int error; 2343 2344 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2345 &sb, NULL); 2346 if (error != 0) 2347 return (error); 2348 error = freebsd11_cvtstat(&sb, &osb); 2349 if (error == 0) 2350 error = copyout(&osb, uap->ub, sizeof(osb)); 2351 return (error); 2352 } 2353 2354 int 2355 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2356 { 2357 struct stat sb; 2358 struct freebsd11_stat osb; 2359 int error; 2360 2361 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2362 UIO_USERSPACE, &sb, NULL); 2363 if (error != 0) 2364 return (error); 2365 error = freebsd11_cvtstat(&sb, &osb); 2366 if (error == 0) 2367 error = copyout(&osb, uap->ub, sizeof(osb)); 2368 return (error); 2369 } 2370 2371 int 2372 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2373 { 2374 struct fhandle fh; 2375 struct stat sb; 2376 struct freebsd11_stat osb; 2377 int error; 2378 2379 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2380 if (error != 0) 2381 return (error); 2382 error = kern_fhstat(td, fh, &sb); 2383 if (error != 0) 2384 return (error); 2385 error = freebsd11_cvtstat(&sb, &osb); 2386 if (error == 0) 2387 error = copyout(&osb, uap->sb, sizeof(osb)); 2388 return (error); 2389 } 2390 2391 int 2392 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2393 { 2394 struct stat sb; 2395 struct freebsd11_stat osb; 2396 int error; 2397 2398 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2399 UIO_USERSPACE, &sb, NULL); 2400 if (error != 0) 2401 return (error); 2402 error = freebsd11_cvtstat(&sb, &osb); 2403 if (error == 0) 2404 error = copyout(&osb, uap->buf, sizeof(osb)); 2405 return (error); 2406 } 2407 #endif /* COMPAT_FREEBSD11 */ 2408 2409 /* 2410 * Get file status 2411 */ 2412 #ifndef _SYS_SYSPROTO_H_ 2413 struct fstatat_args { 2414 int fd; 2415 char *path; 2416 struct stat *buf; 2417 int flag; 2418 } 2419 #endif 2420 int 2421 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2422 { 2423 struct stat sb; 2424 int error; 2425 2426 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2427 UIO_USERSPACE, &sb, NULL); 2428 if (error == 0) 2429 error = copyout(&sb, uap->buf, sizeof (sb)); 2430 return (error); 2431 } 2432 2433 int 2434 kern_statat(struct thread *td, int flag, int fd, const char *path, 2435 enum uio_seg pathseg, struct stat *sbp, 2436 void (*hook)(struct vnode *vp, struct stat *sbp)) 2437 { 2438 struct nameidata nd; 2439 int error; 2440 2441 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2442 AT_EMPTY_PATH)) != 0) 2443 return (EINVAL); 2444 2445 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2446 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2447 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2448 2449 if ((error = namei(&nd)) != 0) { 2450 if (error == ENOTDIR && 2451 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2452 error = kern_fstat(td, fd, sbp); 2453 return (error); 2454 } 2455 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2456 if (error == 0) { 2457 if (__predict_false(hook != NULL)) 2458 hook(nd.ni_vp, sbp); 2459 } 2460 NDFREE_NOTHING(&nd); 2461 vput(nd.ni_vp); 2462 #ifdef __STAT_TIME_T_EXT 2463 sbp->st_atim_ext = 0; 2464 sbp->st_mtim_ext = 0; 2465 sbp->st_ctim_ext = 0; 2466 sbp->st_btim_ext = 0; 2467 #endif 2468 #ifdef KTRACE 2469 if (KTRPOINT(td, KTR_STRUCT)) 2470 ktrstat_error(sbp, error); 2471 #endif 2472 return (error); 2473 } 2474 2475 #if defined(COMPAT_FREEBSD11) 2476 /* 2477 * Implementation of the NetBSD [l]stat() functions. 2478 */ 2479 int 2480 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2481 { 2482 struct freebsd11_stat sb11; 2483 int error; 2484 2485 error = freebsd11_cvtstat(sb, &sb11); 2486 if (error != 0) 2487 return (error); 2488 2489 bzero(nsb, sizeof(*nsb)); 2490 CP(sb11, *nsb, st_dev); 2491 CP(sb11, *nsb, st_ino); 2492 CP(sb11, *nsb, st_mode); 2493 CP(sb11, *nsb, st_nlink); 2494 CP(sb11, *nsb, st_uid); 2495 CP(sb11, *nsb, st_gid); 2496 CP(sb11, *nsb, st_rdev); 2497 CP(sb11, *nsb, st_atim); 2498 CP(sb11, *nsb, st_mtim); 2499 CP(sb11, *nsb, st_ctim); 2500 CP(sb11, *nsb, st_size); 2501 CP(sb11, *nsb, st_blocks); 2502 CP(sb11, *nsb, st_blksize); 2503 CP(sb11, *nsb, st_flags); 2504 CP(sb11, *nsb, st_gen); 2505 CP(sb11, *nsb, st_birthtim); 2506 return (0); 2507 } 2508 2509 #ifndef _SYS_SYSPROTO_H_ 2510 struct freebsd11_nstat_args { 2511 char *path; 2512 struct nstat *ub; 2513 }; 2514 #endif 2515 int 2516 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2517 { 2518 struct stat sb; 2519 struct nstat nsb; 2520 int error; 2521 2522 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2523 &sb, NULL); 2524 if (error != 0) 2525 return (error); 2526 error = freebsd11_cvtnstat(&sb, &nsb); 2527 if (error == 0) 2528 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2529 return (error); 2530 } 2531 2532 /* 2533 * NetBSD lstat. Get file status; this version does not follow links. 2534 */ 2535 #ifndef _SYS_SYSPROTO_H_ 2536 struct freebsd11_nlstat_args { 2537 char *path; 2538 struct nstat *ub; 2539 }; 2540 #endif 2541 int 2542 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2543 { 2544 struct stat sb; 2545 struct nstat nsb; 2546 int error; 2547 2548 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2549 UIO_USERSPACE, &sb, NULL); 2550 if (error != 0) 2551 return (error); 2552 error = freebsd11_cvtnstat(&sb, &nsb); 2553 if (error == 0) 2554 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2555 return (error); 2556 } 2557 #endif /* COMPAT_FREEBSD11 */ 2558 2559 /* 2560 * Get configurable pathname variables. 2561 */ 2562 #ifndef _SYS_SYSPROTO_H_ 2563 struct pathconf_args { 2564 char *path; 2565 int name; 2566 }; 2567 #endif 2568 int 2569 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2570 { 2571 long value; 2572 int error; 2573 2574 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2575 &value); 2576 if (error == 0) 2577 td->td_retval[0] = value; 2578 return (error); 2579 } 2580 2581 #ifndef _SYS_SYSPROTO_H_ 2582 struct lpathconf_args { 2583 char *path; 2584 int name; 2585 }; 2586 #endif 2587 int 2588 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2589 { 2590 long value; 2591 int error; 2592 2593 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2594 NOFOLLOW, &value); 2595 if (error == 0) 2596 td->td_retval[0] = value; 2597 return (error); 2598 } 2599 2600 int 2601 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2602 int name, u_long flags, long *valuep) 2603 { 2604 struct nameidata nd; 2605 int error; 2606 2607 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2608 pathseg, path); 2609 if ((error = namei(&nd)) != 0) 2610 return (error); 2611 NDFREE_NOTHING(&nd); 2612 2613 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2614 vput(nd.ni_vp); 2615 return (error); 2616 } 2617 2618 /* 2619 * Return target name of a symbolic link. 2620 */ 2621 #ifndef _SYS_SYSPROTO_H_ 2622 struct readlink_args { 2623 char *path; 2624 char *buf; 2625 size_t count; 2626 }; 2627 #endif 2628 int 2629 sys_readlink(struct thread *td, struct readlink_args *uap) 2630 { 2631 2632 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2633 uap->buf, UIO_USERSPACE, uap->count)); 2634 } 2635 #ifndef _SYS_SYSPROTO_H_ 2636 struct readlinkat_args { 2637 int fd; 2638 char *path; 2639 char *buf; 2640 size_t bufsize; 2641 }; 2642 #endif 2643 int 2644 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2645 { 2646 2647 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2648 uap->buf, UIO_USERSPACE, uap->bufsize)); 2649 } 2650 2651 int 2652 kern_readlinkat(struct thread *td, int fd, const char *path, 2653 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2654 { 2655 struct vnode *vp; 2656 struct nameidata nd; 2657 int error; 2658 2659 if (count > IOSIZE_MAX) 2660 return (EINVAL); 2661 2662 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2663 EMPTYPATH, pathseg, path, fd); 2664 2665 if ((error = namei(&nd)) != 0) 2666 return (error); 2667 NDFREE_NOTHING(&nd); 2668 vp = nd.ni_vp; 2669 2670 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2671 vput(vp); 2672 2673 return (error); 2674 } 2675 2676 /* 2677 * Helper function to readlink from a vnode 2678 */ 2679 static int 2680 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2681 struct thread *td) 2682 { 2683 struct iovec aiov; 2684 struct uio auio; 2685 int error; 2686 2687 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2688 #ifdef MAC 2689 error = mac_vnode_check_readlink(td->td_ucred, vp); 2690 if (error != 0) 2691 return (error); 2692 #endif 2693 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2694 return (EINVAL); 2695 2696 aiov.iov_base = buf; 2697 aiov.iov_len = count; 2698 auio.uio_iov = &aiov; 2699 auio.uio_iovcnt = 1; 2700 auio.uio_offset = 0; 2701 auio.uio_rw = UIO_READ; 2702 auio.uio_segflg = bufseg; 2703 auio.uio_td = td; 2704 auio.uio_resid = count; 2705 error = VOP_READLINK(vp, &auio, td->td_ucred); 2706 td->td_retval[0] = count - auio.uio_resid; 2707 return (error); 2708 } 2709 2710 /* 2711 * Common implementation code for chflags() and fchflags(). 2712 */ 2713 static int 2714 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2715 { 2716 struct mount *mp; 2717 struct vattr vattr; 2718 int error; 2719 2720 /* We can't support the value matching VNOVAL. */ 2721 if (flags == VNOVAL) 2722 return (EOPNOTSUPP); 2723 2724 /* 2725 * Prevent non-root users from setting flags on devices. When 2726 * a device is reused, users can retain ownership of the device 2727 * if they are allowed to set flags and programs assume that 2728 * chown can't fail when done as root. 2729 */ 2730 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2731 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2732 if (error != 0) 2733 return (error); 2734 } 2735 2736 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2737 return (error); 2738 VATTR_NULL(&vattr); 2739 vattr.va_flags = flags; 2740 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2741 #ifdef MAC 2742 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2743 if (error == 0) 2744 #endif 2745 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2746 VOP_UNLOCK(vp); 2747 vn_finished_write(mp); 2748 return (error); 2749 } 2750 2751 /* 2752 * Change flags of a file given a path name. 2753 */ 2754 #ifndef _SYS_SYSPROTO_H_ 2755 struct chflags_args { 2756 const char *path; 2757 u_long flags; 2758 }; 2759 #endif 2760 int 2761 sys_chflags(struct thread *td, struct chflags_args *uap) 2762 { 2763 2764 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2765 uap->flags, 0)); 2766 } 2767 2768 #ifndef _SYS_SYSPROTO_H_ 2769 struct chflagsat_args { 2770 int fd; 2771 const char *path; 2772 u_long flags; 2773 int atflag; 2774 } 2775 #endif 2776 int 2777 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2778 { 2779 2780 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2781 uap->flags, uap->atflag)); 2782 } 2783 2784 /* 2785 * Same as chflags() but doesn't follow symlinks. 2786 */ 2787 #ifndef _SYS_SYSPROTO_H_ 2788 struct lchflags_args { 2789 const char *path; 2790 u_long flags; 2791 }; 2792 #endif 2793 int 2794 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2795 { 2796 2797 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2798 uap->flags, AT_SYMLINK_NOFOLLOW)); 2799 } 2800 2801 static int 2802 kern_chflagsat(struct thread *td, int fd, const char *path, 2803 enum uio_seg pathseg, u_long flags, int atflag) 2804 { 2805 struct nameidata nd; 2806 int error; 2807 2808 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2809 AT_EMPTY_PATH)) != 0) 2810 return (EINVAL); 2811 2812 AUDIT_ARG_FFLAGS(flags); 2813 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2814 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2815 fd, &cap_fchflags_rights); 2816 if ((error = namei(&nd)) != 0) 2817 return (error); 2818 NDFREE_NOTHING(&nd); 2819 error = setfflags(td, nd.ni_vp, flags); 2820 vrele(nd.ni_vp); 2821 return (error); 2822 } 2823 2824 /* 2825 * Change flags of a file given a file descriptor. 2826 */ 2827 #ifndef _SYS_SYSPROTO_H_ 2828 struct fchflags_args { 2829 int fd; 2830 u_long flags; 2831 }; 2832 #endif 2833 int 2834 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2835 { 2836 struct file *fp; 2837 int error; 2838 2839 AUDIT_ARG_FD(uap->fd); 2840 AUDIT_ARG_FFLAGS(uap->flags); 2841 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2842 &fp); 2843 if (error != 0) 2844 return (error); 2845 #ifdef AUDIT 2846 if (AUDITING_TD(td)) { 2847 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2848 AUDIT_ARG_VNODE1(fp->f_vnode); 2849 VOP_UNLOCK(fp->f_vnode); 2850 } 2851 #endif 2852 error = setfflags(td, fp->f_vnode, uap->flags); 2853 fdrop(fp, td); 2854 return (error); 2855 } 2856 2857 /* 2858 * Common implementation code for chmod(), lchmod() and fchmod(). 2859 */ 2860 int 2861 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2862 { 2863 struct mount *mp; 2864 struct vattr vattr; 2865 int error; 2866 2867 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2868 return (error); 2869 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2870 VATTR_NULL(&vattr); 2871 vattr.va_mode = mode & ALLPERMS; 2872 #ifdef MAC 2873 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2874 if (error == 0) 2875 #endif 2876 error = VOP_SETATTR(vp, &vattr, cred); 2877 VOP_UNLOCK(vp); 2878 vn_finished_write(mp); 2879 return (error); 2880 } 2881 2882 /* 2883 * Change mode of a file given path name. 2884 */ 2885 #ifndef _SYS_SYSPROTO_H_ 2886 struct chmod_args { 2887 char *path; 2888 int mode; 2889 }; 2890 #endif 2891 int 2892 sys_chmod(struct thread *td, struct chmod_args *uap) 2893 { 2894 2895 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2896 uap->mode, 0)); 2897 } 2898 2899 #ifndef _SYS_SYSPROTO_H_ 2900 struct fchmodat_args { 2901 int dirfd; 2902 char *path; 2903 mode_t mode; 2904 int flag; 2905 } 2906 #endif 2907 int 2908 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2909 { 2910 2911 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2912 uap->mode, uap->flag)); 2913 } 2914 2915 /* 2916 * Change mode of a file given path name (don't follow links.) 2917 */ 2918 #ifndef _SYS_SYSPROTO_H_ 2919 struct lchmod_args { 2920 char *path; 2921 int mode; 2922 }; 2923 #endif 2924 int 2925 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2926 { 2927 2928 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2929 uap->mode, AT_SYMLINK_NOFOLLOW)); 2930 } 2931 2932 int 2933 kern_fchmodat(struct thread *td, int fd, const char *path, 2934 enum uio_seg pathseg, mode_t mode, int flag) 2935 { 2936 struct nameidata nd; 2937 int error; 2938 2939 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2940 AT_EMPTY_PATH)) != 0) 2941 return (EINVAL); 2942 2943 AUDIT_ARG_MODE(mode); 2944 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2945 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2946 fd, &cap_fchmod_rights); 2947 if ((error = namei(&nd)) != 0) 2948 return (error); 2949 NDFREE_NOTHING(&nd); 2950 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2951 vrele(nd.ni_vp); 2952 return (error); 2953 } 2954 2955 /* 2956 * Change mode of a file given a file descriptor. 2957 */ 2958 #ifndef _SYS_SYSPROTO_H_ 2959 struct fchmod_args { 2960 int fd; 2961 int mode; 2962 }; 2963 #endif 2964 int 2965 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2966 { 2967 struct file *fp; 2968 int error; 2969 2970 AUDIT_ARG_FD(uap->fd); 2971 AUDIT_ARG_MODE(uap->mode); 2972 2973 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2974 if (error != 0) 2975 return (error); 2976 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2977 fdrop(fp, td); 2978 return (error); 2979 } 2980 2981 /* 2982 * Common implementation for chown(), lchown(), and fchown() 2983 */ 2984 int 2985 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2986 gid_t gid) 2987 { 2988 struct mount *mp; 2989 struct vattr vattr; 2990 int error; 2991 2992 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2993 return (error); 2994 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2995 VATTR_NULL(&vattr); 2996 vattr.va_uid = uid; 2997 vattr.va_gid = gid; 2998 #ifdef MAC 2999 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 3000 vattr.va_gid); 3001 if (error == 0) 3002 #endif 3003 error = VOP_SETATTR(vp, &vattr, cred); 3004 VOP_UNLOCK(vp); 3005 vn_finished_write(mp); 3006 return (error); 3007 } 3008 3009 /* 3010 * Set ownership given a path name. 3011 */ 3012 #ifndef _SYS_SYSPROTO_H_ 3013 struct chown_args { 3014 char *path; 3015 int uid; 3016 int gid; 3017 }; 3018 #endif 3019 int 3020 sys_chown(struct thread *td, struct chown_args *uap) 3021 { 3022 3023 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3024 uap->gid, 0)); 3025 } 3026 3027 #ifndef _SYS_SYSPROTO_H_ 3028 struct fchownat_args { 3029 int fd; 3030 const char * path; 3031 uid_t uid; 3032 gid_t gid; 3033 int flag; 3034 }; 3035 #endif 3036 int 3037 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3038 { 3039 3040 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3041 uap->gid, uap->flag)); 3042 } 3043 3044 int 3045 kern_fchownat(struct thread *td, int fd, const char *path, 3046 enum uio_seg pathseg, int uid, int gid, int flag) 3047 { 3048 struct nameidata nd; 3049 int error; 3050 3051 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3052 AT_EMPTY_PATH)) != 0) 3053 return (EINVAL); 3054 3055 AUDIT_ARG_OWNER(uid, gid); 3056 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3057 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3058 fd, &cap_fchown_rights); 3059 3060 if ((error = namei(&nd)) != 0) 3061 return (error); 3062 NDFREE_NOTHING(&nd); 3063 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3064 vrele(nd.ni_vp); 3065 return (error); 3066 } 3067 3068 /* 3069 * Set ownership given a path name, do not cross symlinks. 3070 */ 3071 #ifndef _SYS_SYSPROTO_H_ 3072 struct lchown_args { 3073 char *path; 3074 int uid; 3075 int gid; 3076 }; 3077 #endif 3078 int 3079 sys_lchown(struct thread *td, struct lchown_args *uap) 3080 { 3081 3082 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3083 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3084 } 3085 3086 /* 3087 * Set ownership given a file descriptor. 3088 */ 3089 #ifndef _SYS_SYSPROTO_H_ 3090 struct fchown_args { 3091 int fd; 3092 int uid; 3093 int gid; 3094 }; 3095 #endif 3096 int 3097 sys_fchown(struct thread *td, struct fchown_args *uap) 3098 { 3099 struct file *fp; 3100 int error; 3101 3102 AUDIT_ARG_FD(uap->fd); 3103 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3104 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3105 if (error != 0) 3106 return (error); 3107 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3108 fdrop(fp, td); 3109 return (error); 3110 } 3111 3112 /* 3113 * Common implementation code for utimes(), lutimes(), and futimes(). 3114 */ 3115 static int 3116 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3117 struct timespec *tsp) 3118 { 3119 struct timeval tv[2]; 3120 const struct timeval *tvp; 3121 int error; 3122 3123 if (usrtvp == NULL) { 3124 vfs_timestamp(&tsp[0]); 3125 tsp[1] = tsp[0]; 3126 } else { 3127 if (tvpseg == UIO_SYSSPACE) { 3128 tvp = usrtvp; 3129 } else { 3130 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3131 return (error); 3132 tvp = tv; 3133 } 3134 3135 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3136 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3137 return (EINVAL); 3138 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3139 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3140 } 3141 return (0); 3142 } 3143 3144 /* 3145 * Common implementation code for futimens(), utimensat(). 3146 */ 3147 #define UTIMENS_NULL 0x1 3148 #define UTIMENS_EXIT 0x2 3149 static int 3150 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3151 struct timespec *tsp, int *retflags) 3152 { 3153 struct timespec tsnow; 3154 int error; 3155 3156 vfs_timestamp(&tsnow); 3157 *retflags = 0; 3158 if (usrtsp == NULL) { 3159 tsp[0] = tsnow; 3160 tsp[1] = tsnow; 3161 *retflags |= UTIMENS_NULL; 3162 return (0); 3163 } 3164 if (tspseg == UIO_SYSSPACE) { 3165 tsp[0] = usrtsp[0]; 3166 tsp[1] = usrtsp[1]; 3167 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3168 return (error); 3169 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3170 *retflags |= UTIMENS_EXIT; 3171 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3172 *retflags |= UTIMENS_NULL; 3173 if (tsp[0].tv_nsec == UTIME_OMIT) 3174 tsp[0].tv_sec = VNOVAL; 3175 else if (tsp[0].tv_nsec == UTIME_NOW) 3176 tsp[0] = tsnow; 3177 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3178 return (EINVAL); 3179 if (tsp[1].tv_nsec == UTIME_OMIT) 3180 tsp[1].tv_sec = VNOVAL; 3181 else if (tsp[1].tv_nsec == UTIME_NOW) 3182 tsp[1] = tsnow; 3183 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3184 return (EINVAL); 3185 3186 return (0); 3187 } 3188 3189 /* 3190 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3191 * and utimensat(). 3192 */ 3193 static int 3194 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3195 int numtimes, int nullflag) 3196 { 3197 struct mount *mp; 3198 struct vattr vattr; 3199 int error; 3200 bool setbirthtime; 3201 3202 setbirthtime = false; 3203 vattr.va_birthtime.tv_sec = VNOVAL; 3204 vattr.va_birthtime.tv_nsec = 0; 3205 3206 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3207 return (error); 3208 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3209 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3210 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3211 setbirthtime = true; 3212 VATTR_NULL(&vattr); 3213 vattr.va_atime = ts[0]; 3214 vattr.va_mtime = ts[1]; 3215 if (setbirthtime) 3216 vattr.va_birthtime = ts[1]; 3217 if (numtimes > 2) 3218 vattr.va_birthtime = ts[2]; 3219 if (nullflag) 3220 vattr.va_vaflags |= VA_UTIMES_NULL; 3221 #ifdef MAC 3222 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3223 vattr.va_mtime); 3224 #endif 3225 if (error == 0) 3226 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3227 VOP_UNLOCK(vp); 3228 vn_finished_write(mp); 3229 return (error); 3230 } 3231 3232 /* 3233 * Set the access and modification times of a file. 3234 */ 3235 #ifndef _SYS_SYSPROTO_H_ 3236 struct utimes_args { 3237 char *path; 3238 struct timeval *tptr; 3239 }; 3240 #endif 3241 int 3242 sys_utimes(struct thread *td, struct utimes_args *uap) 3243 { 3244 3245 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3246 uap->tptr, UIO_USERSPACE)); 3247 } 3248 3249 #ifndef _SYS_SYSPROTO_H_ 3250 struct futimesat_args { 3251 int fd; 3252 const char * path; 3253 const struct timeval * times; 3254 }; 3255 #endif 3256 int 3257 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3258 { 3259 3260 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3261 uap->times, UIO_USERSPACE)); 3262 } 3263 3264 int 3265 kern_utimesat(struct thread *td, int fd, const char *path, 3266 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3267 { 3268 struct nameidata nd; 3269 struct timespec ts[2]; 3270 int error; 3271 3272 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3273 return (error); 3274 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3275 &cap_futimes_rights); 3276 3277 if ((error = namei(&nd)) != 0) 3278 return (error); 3279 NDFREE_NOTHING(&nd); 3280 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3281 vrele(nd.ni_vp); 3282 return (error); 3283 } 3284 3285 /* 3286 * Set the access and modification times of a file. 3287 */ 3288 #ifndef _SYS_SYSPROTO_H_ 3289 struct lutimes_args { 3290 char *path; 3291 struct timeval *tptr; 3292 }; 3293 #endif 3294 int 3295 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3296 { 3297 3298 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3299 UIO_USERSPACE)); 3300 } 3301 3302 int 3303 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3304 const struct timeval *tptr, enum uio_seg tptrseg) 3305 { 3306 struct timespec ts[2]; 3307 struct nameidata nd; 3308 int error; 3309 3310 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3311 return (error); 3312 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3313 if ((error = namei(&nd)) != 0) 3314 return (error); 3315 NDFREE_NOTHING(&nd); 3316 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3317 vrele(nd.ni_vp); 3318 return (error); 3319 } 3320 3321 /* 3322 * Set the access and modification times of a file. 3323 */ 3324 #ifndef _SYS_SYSPROTO_H_ 3325 struct futimes_args { 3326 int fd; 3327 struct timeval *tptr; 3328 }; 3329 #endif 3330 int 3331 sys_futimes(struct thread *td, struct futimes_args *uap) 3332 { 3333 3334 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3335 } 3336 3337 int 3338 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3339 enum uio_seg tptrseg) 3340 { 3341 struct timespec ts[2]; 3342 struct file *fp; 3343 int error; 3344 3345 AUDIT_ARG_FD(fd); 3346 error = getutimes(tptr, tptrseg, ts); 3347 if (error != 0) 3348 return (error); 3349 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3350 if (error != 0) 3351 return (error); 3352 #ifdef AUDIT 3353 if (AUDITING_TD(td)) { 3354 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3355 AUDIT_ARG_VNODE1(fp->f_vnode); 3356 VOP_UNLOCK(fp->f_vnode); 3357 } 3358 #endif 3359 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3360 fdrop(fp, td); 3361 return (error); 3362 } 3363 3364 int 3365 sys_futimens(struct thread *td, struct futimens_args *uap) 3366 { 3367 3368 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3369 } 3370 3371 int 3372 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3373 enum uio_seg tptrseg) 3374 { 3375 struct timespec ts[2]; 3376 struct file *fp; 3377 int error, flags; 3378 3379 AUDIT_ARG_FD(fd); 3380 error = getutimens(tptr, tptrseg, ts, &flags); 3381 if (error != 0) 3382 return (error); 3383 if (flags & UTIMENS_EXIT) 3384 return (0); 3385 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3386 if (error != 0) 3387 return (error); 3388 #ifdef AUDIT 3389 if (AUDITING_TD(td)) { 3390 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3391 AUDIT_ARG_VNODE1(fp->f_vnode); 3392 VOP_UNLOCK(fp->f_vnode); 3393 } 3394 #endif 3395 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3396 fdrop(fp, td); 3397 return (error); 3398 } 3399 3400 int 3401 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3402 { 3403 3404 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3405 uap->times, UIO_USERSPACE, uap->flag)); 3406 } 3407 3408 int 3409 kern_utimensat(struct thread *td, int fd, const char *path, 3410 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3411 int flag) 3412 { 3413 struct nameidata nd; 3414 struct timespec ts[2]; 3415 int error, flags; 3416 3417 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3418 AT_EMPTY_PATH)) != 0) 3419 return (EINVAL); 3420 3421 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3422 return (error); 3423 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3424 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3425 pathseg, path, fd, &cap_futimes_rights); 3426 if ((error = namei(&nd)) != 0) 3427 return (error); 3428 /* 3429 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3430 * POSIX states: 3431 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3432 * "Search permission is denied by a component of the path prefix." 3433 */ 3434 NDFREE_NOTHING(&nd); 3435 if ((flags & UTIMENS_EXIT) == 0) 3436 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3437 vrele(nd.ni_vp); 3438 return (error); 3439 } 3440 3441 /* 3442 * Truncate a file given its path name. 3443 */ 3444 #ifndef _SYS_SYSPROTO_H_ 3445 struct truncate_args { 3446 char *path; 3447 int pad; 3448 off_t length; 3449 }; 3450 #endif 3451 int 3452 sys_truncate(struct thread *td, struct truncate_args *uap) 3453 { 3454 3455 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3456 } 3457 3458 int 3459 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3460 off_t length) 3461 { 3462 struct mount *mp; 3463 struct vnode *vp; 3464 void *rl_cookie; 3465 struct vattr vattr; 3466 struct nameidata nd; 3467 int error; 3468 3469 if (length < 0) 3470 return (EINVAL); 3471 NDPREINIT(&nd); 3472 retry: 3473 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3474 if ((error = namei(&nd)) != 0) 3475 return (error); 3476 vp = nd.ni_vp; 3477 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3478 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3479 vn_rangelock_unlock(vp, rl_cookie); 3480 vrele(vp); 3481 return (error); 3482 } 3483 NDFREE_PNBUF(&nd); 3484 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3485 if (vp->v_type == VDIR) 3486 error = EISDIR; 3487 #ifdef MAC 3488 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3489 } 3490 #endif 3491 else if ((error = vn_writechk(vp)) == 0 && 3492 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3493 VATTR_NULL(&vattr); 3494 vattr.va_size = length; 3495 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3496 } 3497 VOP_UNLOCK(vp); 3498 vn_finished_write(mp); 3499 vn_rangelock_unlock(vp, rl_cookie); 3500 vrele(vp); 3501 if (error == ERELOOKUP) 3502 goto retry; 3503 return (error); 3504 } 3505 3506 #if defined(COMPAT_43) 3507 /* 3508 * Truncate a file given its path name. 3509 */ 3510 #ifndef _SYS_SYSPROTO_H_ 3511 struct otruncate_args { 3512 char *path; 3513 long length; 3514 }; 3515 #endif 3516 int 3517 otruncate(struct thread *td, struct otruncate_args *uap) 3518 { 3519 3520 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3521 } 3522 #endif /* COMPAT_43 */ 3523 3524 #if defined(COMPAT_FREEBSD6) 3525 /* Versions with the pad argument */ 3526 int 3527 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3528 { 3529 3530 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3531 } 3532 3533 int 3534 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3535 { 3536 3537 return (kern_ftruncate(td, uap->fd, uap->length)); 3538 } 3539 #endif 3540 3541 int 3542 kern_fsync(struct thread *td, int fd, bool fullsync) 3543 { 3544 struct vnode *vp; 3545 struct mount *mp; 3546 struct file *fp; 3547 int error; 3548 3549 AUDIT_ARG_FD(fd); 3550 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3551 if (error != 0) 3552 return (error); 3553 vp = fp->f_vnode; 3554 #if 0 3555 if (!fullsync) 3556 /* XXXKIB: compete outstanding aio writes */; 3557 #endif 3558 retry: 3559 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3560 if (error != 0) 3561 goto drop; 3562 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3563 AUDIT_ARG_VNODE1(vp); 3564 if (vp->v_object != NULL) { 3565 VM_OBJECT_WLOCK(vp->v_object); 3566 vm_object_page_clean(vp->v_object, 0, 0, 0); 3567 VM_OBJECT_WUNLOCK(vp->v_object); 3568 } 3569 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3570 VOP_UNLOCK(vp); 3571 vn_finished_write(mp); 3572 if (error == ERELOOKUP) 3573 goto retry; 3574 drop: 3575 fdrop(fp, td); 3576 return (error); 3577 } 3578 3579 /* 3580 * Sync an open file. 3581 */ 3582 #ifndef _SYS_SYSPROTO_H_ 3583 struct fsync_args { 3584 int fd; 3585 }; 3586 #endif 3587 int 3588 sys_fsync(struct thread *td, struct fsync_args *uap) 3589 { 3590 3591 return (kern_fsync(td, uap->fd, true)); 3592 } 3593 3594 int 3595 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3596 { 3597 3598 return (kern_fsync(td, uap->fd, false)); 3599 } 3600 3601 /* 3602 * Rename files. Source and destination must either both be directories, or 3603 * both not be directories. If target is a directory, it must be empty. 3604 */ 3605 #ifndef _SYS_SYSPROTO_H_ 3606 struct rename_args { 3607 char *from; 3608 char *to; 3609 }; 3610 #endif 3611 int 3612 sys_rename(struct thread *td, struct rename_args *uap) 3613 { 3614 3615 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3616 uap->to, UIO_USERSPACE)); 3617 } 3618 3619 #ifndef _SYS_SYSPROTO_H_ 3620 struct renameat_args { 3621 int oldfd; 3622 char *old; 3623 int newfd; 3624 char *new; 3625 }; 3626 #endif 3627 int 3628 sys_renameat(struct thread *td, struct renameat_args *uap) 3629 { 3630 3631 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3632 UIO_USERSPACE)); 3633 } 3634 3635 #ifdef MAC 3636 static int 3637 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3638 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3639 { 3640 int error; 3641 3642 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3643 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights); 3644 if ((error = namei(fromnd)) != 0) 3645 return (error); 3646 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3647 fromnd->ni_vp, &fromnd->ni_cnd); 3648 VOP_UNLOCK(fromnd->ni_dvp); 3649 if (fromnd->ni_dvp != fromnd->ni_vp) 3650 VOP_UNLOCK(fromnd->ni_vp); 3651 if (error != 0) { 3652 NDFREE_PNBUF(fromnd); 3653 vrele(fromnd->ni_dvp); 3654 vrele(fromnd->ni_vp); 3655 if (fromnd->ni_startdir) 3656 vrele(fromnd->ni_startdir); 3657 } 3658 return (error); 3659 } 3660 #endif 3661 3662 int 3663 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3664 const char *new, enum uio_seg pathseg) 3665 { 3666 struct mount *mp = NULL; 3667 struct vnode *tvp, *fvp, *tdvp; 3668 struct nameidata fromnd, tond; 3669 uint64_t tondflags; 3670 int error; 3671 3672 again: 3673 bwillwrite(); 3674 #ifdef MAC 3675 if (mac_vnode_check_rename_from_enabled()) { 3676 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3677 &fromnd); 3678 if (error != 0) 3679 return (error); 3680 } else { 3681 #endif 3682 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3683 pathseg, old, oldfd, &cap_renameat_source_rights); 3684 if ((error = namei(&fromnd)) != 0) 3685 return (error); 3686 #ifdef MAC 3687 } 3688 #endif 3689 fvp = fromnd.ni_vp; 3690 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2; 3691 if (fromnd.ni_vp->v_type == VDIR) 3692 tondflags |= WILLBEDIR; 3693 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3694 &cap_renameat_target_rights); 3695 if ((error = namei(&tond)) != 0) { 3696 /* Translate error code for rename("dir1", "dir2/."). */ 3697 if (error == EISDIR && fvp->v_type == VDIR) 3698 error = EINVAL; 3699 NDFREE_PNBUF(&fromnd); 3700 vrele(fromnd.ni_dvp); 3701 vrele(fvp); 3702 goto out1; 3703 } 3704 tdvp = tond.ni_dvp; 3705 tvp = tond.ni_vp; 3706 error = vn_start_write(fvp, &mp, V_NOWAIT); 3707 if (error != 0) { 3708 NDFREE_PNBUF(&fromnd); 3709 NDFREE_PNBUF(&tond); 3710 if (tvp != NULL) 3711 vput(tvp); 3712 if (tdvp == tvp) 3713 vrele(tdvp); 3714 else 3715 vput(tdvp); 3716 vrele(fromnd.ni_dvp); 3717 vrele(fvp); 3718 vrele(tond.ni_startdir); 3719 if (fromnd.ni_startdir != NULL) 3720 vrele(fromnd.ni_startdir); 3721 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3722 if (error != 0) 3723 return (error); 3724 goto again; 3725 } 3726 if (tvp != NULL) { 3727 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3728 error = ENOTDIR; 3729 goto out; 3730 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3731 error = EISDIR; 3732 goto out; 3733 } 3734 #ifdef CAPABILITIES 3735 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3736 /* 3737 * If the target already exists we require CAP_UNLINKAT 3738 * from 'newfd', when newfd was used for the lookup. 3739 */ 3740 error = cap_check(&tond.ni_filecaps.fc_rights, 3741 &cap_unlinkat_rights); 3742 if (error != 0) 3743 goto out; 3744 } 3745 #endif 3746 } 3747 if (fvp == tdvp) { 3748 error = EINVAL; 3749 goto out; 3750 } 3751 /* 3752 * If the source is the same as the destination (that is, if they 3753 * are links to the same vnode), then there is nothing to do. 3754 */ 3755 if (fvp == tvp) 3756 error = ERESTART; 3757 #ifdef MAC 3758 else 3759 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3760 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3761 #endif 3762 out: 3763 if (error == 0) { 3764 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3765 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3766 NDFREE_PNBUF(&fromnd); 3767 NDFREE_PNBUF(&tond); 3768 } else { 3769 NDFREE_PNBUF(&fromnd); 3770 NDFREE_PNBUF(&tond); 3771 if (tvp != NULL) 3772 vput(tvp); 3773 if (tdvp == tvp) 3774 vrele(tdvp); 3775 else 3776 vput(tdvp); 3777 vrele(fromnd.ni_dvp); 3778 vrele(fvp); 3779 } 3780 vrele(tond.ni_startdir); 3781 vn_finished_write(mp); 3782 out1: 3783 if (fromnd.ni_startdir) 3784 vrele(fromnd.ni_startdir); 3785 if (error == ERESTART) 3786 return (0); 3787 if (error == ERELOOKUP) 3788 goto again; 3789 return (error); 3790 } 3791 3792 /* 3793 * Make a directory file. 3794 */ 3795 #ifndef _SYS_SYSPROTO_H_ 3796 struct mkdir_args { 3797 char *path; 3798 int mode; 3799 }; 3800 #endif 3801 int 3802 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3803 { 3804 3805 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3806 uap->mode)); 3807 } 3808 3809 #ifndef _SYS_SYSPROTO_H_ 3810 struct mkdirat_args { 3811 int fd; 3812 char *path; 3813 mode_t mode; 3814 }; 3815 #endif 3816 int 3817 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3818 { 3819 3820 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3821 } 3822 3823 int 3824 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3825 int mode) 3826 { 3827 struct mount *mp; 3828 struct vattr vattr; 3829 struct nameidata nd; 3830 int error; 3831 3832 AUDIT_ARG_MODE(mode); 3833 NDPREINIT(&nd); 3834 restart: 3835 bwillwrite(); 3836 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3837 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3838 segflg, path, fd, &cap_mkdirat_rights); 3839 if ((error = namei(&nd)) != 0) 3840 return (error); 3841 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3842 NDFREE_PNBUF(&nd); 3843 vput(nd.ni_dvp); 3844 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3845 return (error); 3846 goto restart; 3847 } 3848 VATTR_NULL(&vattr); 3849 vattr.va_type = VDIR; 3850 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3851 #ifdef MAC 3852 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3853 &vattr); 3854 if (error != 0) 3855 goto out; 3856 #endif 3857 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3858 #ifdef MAC 3859 out: 3860 #endif 3861 NDFREE_PNBUF(&nd); 3862 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3863 vn_finished_write(mp); 3864 if (error == ERELOOKUP) 3865 goto restart; 3866 return (error); 3867 } 3868 3869 /* 3870 * Remove a directory file. 3871 */ 3872 #ifndef _SYS_SYSPROTO_H_ 3873 struct rmdir_args { 3874 char *path; 3875 }; 3876 #endif 3877 int 3878 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3879 { 3880 3881 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3882 0)); 3883 } 3884 3885 int 3886 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3887 enum uio_seg pathseg, int flag) 3888 { 3889 struct mount *mp; 3890 struct vnode *vp; 3891 struct file *fp; 3892 struct nameidata nd; 3893 cap_rights_t rights; 3894 int error; 3895 3896 fp = NULL; 3897 if (fd != FD_NONE) { 3898 error = getvnode(td, fd, cap_rights_init_one(&rights, 3899 CAP_LOOKUP), &fp); 3900 if (error != 0) 3901 return (error); 3902 } 3903 3904 NDPREINIT(&nd); 3905 restart: 3906 bwillwrite(); 3907 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3908 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3909 pathseg, path, dfd, &cap_unlinkat_rights); 3910 if ((error = namei(&nd)) != 0) 3911 goto fdout; 3912 vp = nd.ni_vp; 3913 if (vp->v_type != VDIR) { 3914 error = ENOTDIR; 3915 goto out; 3916 } 3917 /* 3918 * No rmdir "." please. 3919 */ 3920 if (nd.ni_dvp == vp) { 3921 error = EINVAL; 3922 goto out; 3923 } 3924 /* 3925 * The root of a mounted filesystem cannot be deleted. 3926 */ 3927 if (vp->v_vflag & VV_ROOT) { 3928 error = EBUSY; 3929 goto out; 3930 } 3931 3932 if (fp != NULL && fp->f_vnode != vp) { 3933 if (VN_IS_DOOMED(fp->f_vnode)) 3934 error = EBADF; 3935 else 3936 error = EDEADLK; 3937 goto out; 3938 } 3939 3940 #ifdef MAC 3941 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3942 &nd.ni_cnd); 3943 if (error != 0) 3944 goto out; 3945 #endif 3946 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3947 NDFREE_PNBUF(&nd); 3948 vput(vp); 3949 if (nd.ni_dvp == vp) 3950 vrele(nd.ni_dvp); 3951 else 3952 vput(nd.ni_dvp); 3953 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3954 goto fdout; 3955 goto restart; 3956 } 3957 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3958 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3959 vn_finished_write(mp); 3960 out: 3961 NDFREE_PNBUF(&nd); 3962 vput(vp); 3963 if (nd.ni_dvp == vp) 3964 vrele(nd.ni_dvp); 3965 else 3966 vput(nd.ni_dvp); 3967 if (error == ERELOOKUP) 3968 goto restart; 3969 fdout: 3970 if (fp != NULL) 3971 fdrop(fp, td); 3972 return (error); 3973 } 3974 3975 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3976 int 3977 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3978 long *basep, void (*func)(struct freebsd11_dirent *)) 3979 { 3980 struct freebsd11_dirent dstdp; 3981 struct dirent *dp, *edp; 3982 char *dirbuf; 3983 off_t base; 3984 ssize_t resid, ucount; 3985 int error; 3986 3987 /* XXX arbitrary sanity limit on `count'. */ 3988 count = min(count, 64 * 1024); 3989 3990 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3991 3992 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3993 UIO_SYSSPACE); 3994 if (error != 0) 3995 goto done; 3996 if (basep != NULL) 3997 *basep = base; 3998 3999 ucount = 0; 4000 for (dp = (struct dirent *)dirbuf, 4001 edp = (struct dirent *)&dirbuf[count - resid]; 4002 ucount < count && dp < edp; ) { 4003 if (dp->d_reclen == 0) 4004 break; 4005 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4006 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4007 continue; 4008 dstdp.d_type = dp->d_type; 4009 dstdp.d_namlen = dp->d_namlen; 4010 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4011 if (dstdp.d_fileno != dp->d_fileno) { 4012 switch (ino64_trunc_error) { 4013 default: 4014 case 0: 4015 break; 4016 case 1: 4017 error = EOVERFLOW; 4018 goto done; 4019 case 2: 4020 dstdp.d_fileno = UINT32_MAX; 4021 break; 4022 } 4023 } 4024 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4025 ((dp->d_namlen + 1 + 3) &~ 3); 4026 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4027 bzero(dstdp.d_name + dstdp.d_namlen, 4028 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4029 dstdp.d_namlen); 4030 MPASS(dstdp.d_reclen <= dp->d_reclen); 4031 MPASS(ucount + dstdp.d_reclen <= count); 4032 if (func != NULL) 4033 func(&dstdp); 4034 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4035 if (error != 0) 4036 break; 4037 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4038 ucount += dstdp.d_reclen; 4039 } 4040 4041 done: 4042 free(dirbuf, M_TEMP); 4043 if (error == 0) 4044 td->td_retval[0] = ucount; 4045 return (error); 4046 } 4047 #endif /* COMPAT */ 4048 4049 #ifdef COMPAT_43 4050 static void 4051 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4052 { 4053 #if (BYTE_ORDER == LITTLE_ENDIAN) 4054 /* 4055 * The expected low byte of dp->d_namlen is our dp->d_type. 4056 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4057 */ 4058 dp->d_type = dp->d_namlen; 4059 dp->d_namlen = 0; 4060 #else 4061 /* 4062 * The dp->d_type is the high byte of the expected dp->d_namlen, 4063 * so must be zero'ed. 4064 */ 4065 dp->d_type = 0; 4066 #endif 4067 } 4068 4069 /* 4070 * Read a block of directory entries in a filesystem independent format. 4071 */ 4072 #ifndef _SYS_SYSPROTO_H_ 4073 struct ogetdirentries_args { 4074 int fd; 4075 char *buf; 4076 u_int count; 4077 long *basep; 4078 }; 4079 #endif 4080 int 4081 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4082 { 4083 long loff; 4084 int error; 4085 4086 error = kern_ogetdirentries(td, uap, &loff); 4087 if (error == 0) 4088 error = copyout(&loff, uap->basep, sizeof(long)); 4089 return (error); 4090 } 4091 4092 int 4093 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4094 long *ploff) 4095 { 4096 long base; 4097 int error; 4098 4099 /* XXX arbitrary sanity limit on `count'. */ 4100 if (uap->count > 64 * 1024) 4101 return (EINVAL); 4102 4103 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4104 &base, ogetdirentries_cvt); 4105 4106 if (error == 0 && uap->basep != NULL) 4107 error = copyout(&base, uap->basep, sizeof(long)); 4108 4109 return (error); 4110 } 4111 #endif /* COMPAT_43 */ 4112 4113 #if defined(COMPAT_FREEBSD11) 4114 #ifndef _SYS_SYSPROTO_H_ 4115 struct freebsd11_getdirentries_args { 4116 int fd; 4117 char *buf; 4118 u_int count; 4119 long *basep; 4120 }; 4121 #endif 4122 int 4123 freebsd11_getdirentries(struct thread *td, 4124 struct freebsd11_getdirentries_args *uap) 4125 { 4126 long base; 4127 int error; 4128 4129 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4130 &base, NULL); 4131 4132 if (error == 0 && uap->basep != NULL) 4133 error = copyout(&base, uap->basep, sizeof(long)); 4134 return (error); 4135 } 4136 4137 int 4138 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4139 { 4140 struct freebsd11_getdirentries_args ap; 4141 4142 ap.fd = uap->fd; 4143 ap.buf = uap->buf; 4144 ap.count = uap->count; 4145 ap.basep = NULL; 4146 return (freebsd11_getdirentries(td, &ap)); 4147 } 4148 #endif /* COMPAT_FREEBSD11 */ 4149 4150 /* 4151 * Read a block of directory entries in a filesystem independent format. 4152 */ 4153 int 4154 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4155 { 4156 off_t base; 4157 int error; 4158 4159 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4160 NULL, UIO_USERSPACE); 4161 if (error != 0) 4162 return (error); 4163 if (uap->basep != NULL) 4164 error = copyout(&base, uap->basep, sizeof(off_t)); 4165 return (error); 4166 } 4167 4168 int 4169 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4170 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4171 { 4172 struct vnode *vp; 4173 struct file *fp; 4174 struct uio auio; 4175 struct iovec aiov; 4176 off_t loff; 4177 int error, eofflag; 4178 off_t foffset; 4179 4180 AUDIT_ARG_FD(fd); 4181 if (count > IOSIZE_MAX) 4182 return (EINVAL); 4183 auio.uio_resid = count; 4184 error = getvnode(td, fd, &cap_read_rights, &fp); 4185 if (error != 0) 4186 return (error); 4187 if ((fp->f_flag & FREAD) == 0) { 4188 fdrop(fp, td); 4189 return (EBADF); 4190 } 4191 vp = fp->f_vnode; 4192 foffset = foffset_lock(fp, 0); 4193 unionread: 4194 if (vp->v_type != VDIR) { 4195 error = EINVAL; 4196 goto fail; 4197 } 4198 if (__predict_false((vp->v_vflag & VV_UNLINKED) != 0)) { 4199 error = ENOENT; 4200 goto fail; 4201 } 4202 aiov.iov_base = buf; 4203 aiov.iov_len = count; 4204 auio.uio_iov = &aiov; 4205 auio.uio_iovcnt = 1; 4206 auio.uio_rw = UIO_READ; 4207 auio.uio_segflg = bufseg; 4208 auio.uio_td = td; 4209 vn_lock(vp, LK_SHARED | LK_RETRY); 4210 AUDIT_ARG_VNODE1(vp); 4211 loff = auio.uio_offset = foffset; 4212 #ifdef MAC 4213 error = mac_vnode_check_readdir(td->td_ucred, vp); 4214 if (error == 0) 4215 #endif 4216 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4217 NULL); 4218 foffset = auio.uio_offset; 4219 if (error != 0) { 4220 VOP_UNLOCK(vp); 4221 goto fail; 4222 } 4223 if (count == auio.uio_resid && 4224 (vp->v_vflag & VV_ROOT) && 4225 (vp->v_mount->mnt_flag & MNT_UNION)) { 4226 struct vnode *tvp = vp; 4227 4228 vp = vp->v_mount->mnt_vnodecovered; 4229 VREF(vp); 4230 fp->f_vnode = vp; 4231 foffset = 0; 4232 vput(tvp); 4233 goto unionread; 4234 } 4235 VOP_UNLOCK(vp); 4236 *basep = loff; 4237 if (residp != NULL) 4238 *residp = auio.uio_resid; 4239 td->td_retval[0] = count - auio.uio_resid; 4240 fail: 4241 foffset_unlock(fp, foffset, 0); 4242 fdrop(fp, td); 4243 return (error); 4244 } 4245 4246 /* 4247 * Set the mode mask for creation of filesystem nodes. 4248 */ 4249 #ifndef _SYS_SYSPROTO_H_ 4250 struct umask_args { 4251 int newmask; 4252 }; 4253 #endif 4254 int 4255 sys_umask(struct thread *td, struct umask_args *uap) 4256 { 4257 struct pwddesc *pdp; 4258 4259 pdp = td->td_proc->p_pd; 4260 PWDDESC_XLOCK(pdp); 4261 td->td_retval[0] = pdp->pd_cmask; 4262 pdp->pd_cmask = uap->newmask & ALLPERMS; 4263 PWDDESC_XUNLOCK(pdp); 4264 return (0); 4265 } 4266 4267 /* 4268 * Void all references to file by ripping underlying filesystem away from 4269 * vnode. 4270 */ 4271 #ifndef _SYS_SYSPROTO_H_ 4272 struct revoke_args { 4273 char *path; 4274 }; 4275 #endif 4276 int 4277 sys_revoke(struct thread *td, struct revoke_args *uap) 4278 { 4279 struct vnode *vp; 4280 struct vattr vattr; 4281 struct nameidata nd; 4282 int error; 4283 4284 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4285 uap->path); 4286 if ((error = namei(&nd)) != 0) 4287 return (error); 4288 vp = nd.ni_vp; 4289 NDFREE_NOTHING(&nd); 4290 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4291 error = EINVAL; 4292 goto out; 4293 } 4294 #ifdef MAC 4295 error = mac_vnode_check_revoke(td->td_ucred, vp); 4296 if (error != 0) 4297 goto out; 4298 #endif 4299 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4300 if (error != 0) 4301 goto out; 4302 if (td->td_ucred->cr_uid != vattr.va_uid) { 4303 error = priv_check(td, PRIV_VFS_ADMIN); 4304 if (error != 0) 4305 goto out; 4306 } 4307 if (devfs_usecount(vp) > 0) 4308 VOP_REVOKE(vp, REVOKEALL); 4309 out: 4310 vput(vp); 4311 return (error); 4312 } 4313 4314 /* 4315 * This variant of getvnode() allows O_PATH files. Caller should 4316 * ensure that returned file and vnode are only used for compatible 4317 * semantics. 4318 */ 4319 int 4320 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4321 struct file **fpp) 4322 { 4323 struct file *fp; 4324 int error; 4325 4326 error = fget_unlocked(td, fd, rightsp, &fp); 4327 if (error != 0) 4328 return (error); 4329 4330 /* 4331 * The file could be not of the vnode type, or it may be not 4332 * yet fully initialized, in which case the f_vnode pointer 4333 * may be set, but f_ops is still badfileops. E.g., 4334 * devfs_open() transiently create such situation to 4335 * facilitate csw d_fdopen(). 4336 * 4337 * Dupfdopen() handling in kern_openat() installs the 4338 * half-baked file into the process descriptor table, allowing 4339 * other thread to dereference it. Guard against the race by 4340 * checking f_ops. 4341 */ 4342 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4343 fdrop(fp, td); 4344 *fpp = NULL; 4345 return (EINVAL); 4346 } 4347 4348 *fpp = fp; 4349 return (0); 4350 } 4351 4352 /* 4353 * Convert a user file descriptor to a kernel file entry and check 4354 * that, if it is a capability, the correct rights are present. 4355 * A reference on the file entry is held upon returning. 4356 */ 4357 int 4358 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4359 { 4360 int error; 4361 4362 error = getvnode_path(td, fd, rightsp, fpp); 4363 if (__predict_false(error != 0)) 4364 return (error); 4365 4366 /* 4367 * Filter out O_PATH file descriptors, most getvnode() callers 4368 * do not call fo_ methods. 4369 */ 4370 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4371 fdrop(*fpp, td); 4372 *fpp = NULL; 4373 error = EBADF; 4374 } 4375 4376 return (error); 4377 } 4378 4379 /* 4380 * Get an (NFS) file handle. 4381 */ 4382 #ifndef _SYS_SYSPROTO_H_ 4383 struct lgetfh_args { 4384 char *fname; 4385 fhandle_t *fhp; 4386 }; 4387 #endif 4388 int 4389 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4390 { 4391 4392 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4393 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4394 } 4395 4396 #ifndef _SYS_SYSPROTO_H_ 4397 struct getfh_args { 4398 char *fname; 4399 fhandle_t *fhp; 4400 }; 4401 #endif 4402 int 4403 sys_getfh(struct thread *td, struct getfh_args *uap) 4404 { 4405 4406 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4407 uap->fhp, UIO_USERSPACE)); 4408 } 4409 4410 /* 4411 * syscall for the rpc.lockd to use to translate an open descriptor into 4412 * a NFS file handle. 4413 * 4414 * warning: do not remove the priv_check() call or this becomes one giant 4415 * security hole. 4416 */ 4417 #ifndef _SYS_SYSPROTO_H_ 4418 struct getfhat_args { 4419 int fd; 4420 char *path; 4421 fhandle_t *fhp; 4422 int flags; 4423 }; 4424 #endif 4425 int 4426 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4427 { 4428 4429 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4430 uap->fhp, UIO_USERSPACE)); 4431 } 4432 4433 int 4434 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4435 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4436 { 4437 struct nameidata nd; 4438 fhandle_t fh; 4439 struct vnode *vp; 4440 int error; 4441 4442 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4443 return (EINVAL); 4444 error = priv_check(td, PRIV_VFS_GETFH); 4445 if (error != 0) 4446 return (error); 4447 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4448 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4449 fd); 4450 error = namei(&nd); 4451 if (error != 0) 4452 return (error); 4453 NDFREE_NOTHING(&nd); 4454 vp = nd.ni_vp; 4455 bzero(&fh, sizeof(fh)); 4456 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4457 error = VOP_VPTOFH(vp, &fh.fh_fid); 4458 vput(vp); 4459 if (error == 0) { 4460 if (fhseg == UIO_USERSPACE) 4461 error = copyout(&fh, fhp, sizeof (fh)); 4462 else 4463 memcpy(fhp, &fh, sizeof(fh)); 4464 } 4465 return (error); 4466 } 4467 4468 #ifndef _SYS_SYSPROTO_H_ 4469 struct fhlink_args { 4470 fhandle_t *fhp; 4471 const char *to; 4472 }; 4473 #endif 4474 int 4475 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4476 { 4477 4478 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4479 } 4480 4481 #ifndef _SYS_SYSPROTO_H_ 4482 struct fhlinkat_args { 4483 fhandle_t *fhp; 4484 int tofd; 4485 const char *to; 4486 }; 4487 #endif 4488 int 4489 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4490 { 4491 4492 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4493 } 4494 4495 static int 4496 kern_fhlinkat(struct thread *td, int fd, const char *path, 4497 enum uio_seg pathseg, fhandle_t *fhp) 4498 { 4499 fhandle_t fh; 4500 struct mount *mp; 4501 struct vnode *vp; 4502 int error; 4503 4504 error = priv_check(td, PRIV_VFS_GETFH); 4505 if (error != 0) 4506 return (error); 4507 error = copyin(fhp, &fh, sizeof(fh)); 4508 if (error != 0) 4509 return (error); 4510 do { 4511 bwillwrite(); 4512 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4513 return (ESTALE); 4514 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4515 vfs_unbusy(mp); 4516 if (error != 0) 4517 return (error); 4518 VOP_UNLOCK(vp); 4519 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4520 } while (error == EAGAIN || error == ERELOOKUP); 4521 return (error); 4522 } 4523 4524 #ifndef _SYS_SYSPROTO_H_ 4525 struct fhreadlink_args { 4526 fhandle_t *fhp; 4527 char *buf; 4528 size_t bufsize; 4529 }; 4530 #endif 4531 int 4532 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4533 { 4534 fhandle_t fh; 4535 struct mount *mp; 4536 struct vnode *vp; 4537 int error; 4538 4539 error = priv_check(td, PRIV_VFS_GETFH); 4540 if (error != 0) 4541 return (error); 4542 if (uap->bufsize > IOSIZE_MAX) 4543 return (EINVAL); 4544 error = copyin(uap->fhp, &fh, sizeof(fh)); 4545 if (error != 0) 4546 return (error); 4547 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4548 return (ESTALE); 4549 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4550 vfs_unbusy(mp); 4551 if (error != 0) 4552 return (error); 4553 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4554 vput(vp); 4555 return (error); 4556 } 4557 4558 /* 4559 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4560 * open descriptor. 4561 * 4562 * warning: do not remove the priv_check() call or this becomes one giant 4563 * security hole. 4564 */ 4565 #ifndef _SYS_SYSPROTO_H_ 4566 struct fhopen_args { 4567 const struct fhandle *u_fhp; 4568 int flags; 4569 }; 4570 #endif 4571 int 4572 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4573 { 4574 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4575 } 4576 4577 int 4578 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4579 { 4580 struct mount *mp; 4581 struct vnode *vp; 4582 struct fhandle fhp; 4583 struct file *fp; 4584 int fmode, error; 4585 int indx; 4586 4587 error = priv_check(td, PRIV_VFS_FHOPEN); 4588 if (error != 0) 4589 return (error); 4590 indx = -1; 4591 fmode = FFLAGS(flags); 4592 /* why not allow a non-read/write open for our lockd? */ 4593 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4594 return (EINVAL); 4595 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4596 if (error != 0) 4597 return(error); 4598 /* find the mount point */ 4599 mp = vfs_busyfs(&fhp.fh_fsid); 4600 if (mp == NULL) 4601 return (ESTALE); 4602 /* now give me my vnode, it gets returned to me locked */ 4603 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4604 vfs_unbusy(mp); 4605 if (error != 0) 4606 return (error); 4607 4608 error = falloc_noinstall(td, &fp); 4609 if (error != 0) { 4610 vput(vp); 4611 return (error); 4612 } 4613 /* 4614 * An extra reference on `fp' has been held for us by 4615 * falloc_noinstall(). 4616 */ 4617 4618 #ifdef INVARIANTS 4619 td->td_dupfd = -1; 4620 #endif 4621 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4622 if (error != 0) { 4623 KASSERT(fp->f_ops == &badfileops, 4624 ("VOP_OPEN in fhopen() set f_ops")); 4625 KASSERT(td->td_dupfd < 0, 4626 ("fhopen() encountered fdopen()")); 4627 4628 vput(vp); 4629 goto bad; 4630 } 4631 #ifdef INVARIANTS 4632 td->td_dupfd = 0; 4633 #endif 4634 fp->f_vnode = vp; 4635 finit_vnode(fp, fmode, NULL, &vnops); 4636 VOP_UNLOCK(vp); 4637 if ((fmode & O_TRUNC) != 0) { 4638 error = fo_truncate(fp, 0, td->td_ucred, td); 4639 if (error != 0) 4640 goto bad; 4641 } 4642 4643 error = finstall(td, fp, &indx, fmode, NULL); 4644 bad: 4645 fdrop(fp, td); 4646 td->td_retval[0] = indx; 4647 return (error); 4648 } 4649 4650 /* 4651 * Stat an (NFS) file handle. 4652 */ 4653 #ifndef _SYS_SYSPROTO_H_ 4654 struct fhstat_args { 4655 struct fhandle *u_fhp; 4656 struct stat *sb; 4657 }; 4658 #endif 4659 int 4660 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4661 { 4662 struct stat sb; 4663 struct fhandle fh; 4664 int error; 4665 4666 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4667 if (error != 0) 4668 return (error); 4669 error = kern_fhstat(td, fh, &sb); 4670 if (error == 0) 4671 error = copyout(&sb, uap->sb, sizeof(sb)); 4672 return (error); 4673 } 4674 4675 int 4676 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4677 { 4678 struct mount *mp; 4679 struct vnode *vp; 4680 int error; 4681 4682 error = priv_check(td, PRIV_VFS_FHSTAT); 4683 if (error != 0) 4684 return (error); 4685 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4686 return (ESTALE); 4687 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4688 vfs_unbusy(mp); 4689 if (error != 0) 4690 return (error); 4691 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4692 vput(vp); 4693 return (error); 4694 } 4695 4696 /* 4697 * Implement fstatfs() for (NFS) file handles. 4698 */ 4699 #ifndef _SYS_SYSPROTO_H_ 4700 struct fhstatfs_args { 4701 struct fhandle *u_fhp; 4702 struct statfs *buf; 4703 }; 4704 #endif 4705 int 4706 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4707 { 4708 struct statfs *sfp; 4709 fhandle_t fh; 4710 int error; 4711 4712 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4713 if (error != 0) 4714 return (error); 4715 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4716 error = kern_fhstatfs(td, fh, sfp); 4717 if (error == 0) 4718 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4719 free(sfp, M_STATFS); 4720 return (error); 4721 } 4722 4723 int 4724 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4725 { 4726 struct mount *mp; 4727 struct vnode *vp; 4728 int error; 4729 4730 error = priv_check(td, PRIV_VFS_FHSTATFS); 4731 if (error != 0) 4732 return (error); 4733 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4734 return (ESTALE); 4735 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4736 if (error != 0) { 4737 vfs_unbusy(mp); 4738 return (error); 4739 } 4740 vput(vp); 4741 error = prison_canseemount(td->td_ucred, mp); 4742 if (error != 0) 4743 goto out; 4744 #ifdef MAC 4745 error = mac_mount_check_stat(td->td_ucred, mp); 4746 if (error != 0) 4747 goto out; 4748 #endif 4749 error = VFS_STATFS(mp, buf); 4750 out: 4751 vfs_unbusy(mp); 4752 return (error); 4753 } 4754 4755 /* 4756 * Unlike madvise(2), we do not make a best effort to remember every 4757 * possible caching hint. Instead, we remember the last setting with 4758 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4759 * region of any current setting. 4760 */ 4761 int 4762 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4763 int advice) 4764 { 4765 struct fadvise_info *fa, *new; 4766 struct file *fp; 4767 struct vnode *vp; 4768 off_t end; 4769 int error; 4770 4771 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4772 return (EINVAL); 4773 AUDIT_ARG_VALUE(advice); 4774 switch (advice) { 4775 case POSIX_FADV_SEQUENTIAL: 4776 case POSIX_FADV_RANDOM: 4777 case POSIX_FADV_NOREUSE: 4778 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4779 break; 4780 case POSIX_FADV_NORMAL: 4781 case POSIX_FADV_WILLNEED: 4782 case POSIX_FADV_DONTNEED: 4783 new = NULL; 4784 break; 4785 default: 4786 return (EINVAL); 4787 } 4788 /* XXX: CAP_POSIX_FADVISE? */ 4789 AUDIT_ARG_FD(fd); 4790 error = fget(td, fd, &cap_no_rights, &fp); 4791 if (error != 0) 4792 goto out; 4793 AUDIT_ARG_FILE(td->td_proc, fp); 4794 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4795 error = ESPIPE; 4796 goto out; 4797 } 4798 if (fp->f_type != DTYPE_VNODE) { 4799 error = ENODEV; 4800 goto out; 4801 } 4802 vp = fp->f_vnode; 4803 if (vp->v_type != VREG) { 4804 error = ENODEV; 4805 goto out; 4806 } 4807 if (len == 0) 4808 end = OFF_MAX; 4809 else 4810 end = offset + len - 1; 4811 switch (advice) { 4812 case POSIX_FADV_SEQUENTIAL: 4813 case POSIX_FADV_RANDOM: 4814 case POSIX_FADV_NOREUSE: 4815 /* 4816 * Try to merge any existing non-standard region with 4817 * this new region if possible, otherwise create a new 4818 * non-standard region for this request. 4819 */ 4820 mtx_pool_lock(mtxpool_sleep, fp); 4821 fa = fp->f_advice; 4822 if (fa != NULL && fa->fa_advice == advice && 4823 ((fa->fa_start <= end && fa->fa_end >= offset) || 4824 (end != OFF_MAX && fa->fa_start == end + 1) || 4825 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4826 if (offset < fa->fa_start) 4827 fa->fa_start = offset; 4828 if (end > fa->fa_end) 4829 fa->fa_end = end; 4830 } else { 4831 new->fa_advice = advice; 4832 new->fa_start = offset; 4833 new->fa_end = end; 4834 fp->f_advice = new; 4835 new = fa; 4836 } 4837 mtx_pool_unlock(mtxpool_sleep, fp); 4838 break; 4839 case POSIX_FADV_NORMAL: 4840 /* 4841 * If a the "normal" region overlaps with an existing 4842 * non-standard region, trim or remove the 4843 * non-standard region. 4844 */ 4845 mtx_pool_lock(mtxpool_sleep, fp); 4846 fa = fp->f_advice; 4847 if (fa != NULL) { 4848 if (offset <= fa->fa_start && end >= fa->fa_end) { 4849 new = fa; 4850 fp->f_advice = NULL; 4851 } else if (offset <= fa->fa_start && 4852 end >= fa->fa_start) 4853 fa->fa_start = end + 1; 4854 else if (offset <= fa->fa_end && end >= fa->fa_end) 4855 fa->fa_end = offset - 1; 4856 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4857 /* 4858 * If the "normal" region is a middle 4859 * portion of the existing 4860 * non-standard region, just remove 4861 * the whole thing rather than picking 4862 * one side or the other to 4863 * preserve. 4864 */ 4865 new = fa; 4866 fp->f_advice = NULL; 4867 } 4868 } 4869 mtx_pool_unlock(mtxpool_sleep, fp); 4870 break; 4871 case POSIX_FADV_WILLNEED: 4872 case POSIX_FADV_DONTNEED: 4873 error = VOP_ADVISE(vp, offset, end, advice); 4874 break; 4875 } 4876 out: 4877 if (fp != NULL) 4878 fdrop(fp, td); 4879 free(new, M_FADVISE); 4880 return (error); 4881 } 4882 4883 int 4884 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4885 { 4886 int error; 4887 4888 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4889 uap->advice); 4890 return (kern_posix_error(td, error)); 4891 } 4892 4893 int 4894 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4895 off_t *outoffp, size_t len, unsigned int flags) 4896 { 4897 struct file *infp, *outfp; 4898 struct vnode *invp, *outvp; 4899 int error; 4900 size_t retlen; 4901 void *rl_rcookie, *rl_wcookie; 4902 off_t savinoff, savoutoff; 4903 4904 infp = outfp = NULL; 4905 rl_rcookie = rl_wcookie = NULL; 4906 savinoff = -1; 4907 error = 0; 4908 retlen = 0; 4909 4910 if (flags != 0) { 4911 error = EINVAL; 4912 goto out; 4913 } 4914 if (len > SSIZE_MAX) 4915 /* 4916 * Although the len argument is size_t, the return argument 4917 * is ssize_t (which is signed). Therefore a size that won't 4918 * fit in ssize_t can't be returned. 4919 */ 4920 len = SSIZE_MAX; 4921 4922 /* Get the file structures for the file descriptors. */ 4923 error = fget_read(td, infd, &cap_read_rights, &infp); 4924 if (error != 0) 4925 goto out; 4926 if (infp->f_ops == &badfileops) { 4927 error = EBADF; 4928 goto out; 4929 } 4930 if (infp->f_vnode == NULL) { 4931 error = EINVAL; 4932 goto out; 4933 } 4934 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4935 if (error != 0) 4936 goto out; 4937 if (outfp->f_ops == &badfileops) { 4938 error = EBADF; 4939 goto out; 4940 } 4941 if (outfp->f_vnode == NULL) { 4942 error = EINVAL; 4943 goto out; 4944 } 4945 4946 /* Set the offset pointers to the correct place. */ 4947 if (inoffp == NULL) 4948 inoffp = &infp->f_offset; 4949 if (outoffp == NULL) 4950 outoffp = &outfp->f_offset; 4951 savinoff = *inoffp; 4952 savoutoff = *outoffp; 4953 4954 invp = infp->f_vnode; 4955 outvp = outfp->f_vnode; 4956 /* Sanity check the f_flag bits. */ 4957 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4958 (infp->f_flag & FREAD) == 0) { 4959 error = EBADF; 4960 goto out; 4961 } 4962 4963 /* If len == 0, just return 0. */ 4964 if (len == 0) 4965 goto out; 4966 4967 /* 4968 * If infp and outfp refer to the same file, the byte ranges cannot 4969 * overlap. 4970 */ 4971 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4972 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4973 savinoff))) { 4974 error = EINVAL; 4975 goto out; 4976 } 4977 4978 /* Range lock the byte ranges for both invp and outvp. */ 4979 for (;;) { 4980 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4981 len); 4982 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4983 len); 4984 if (rl_rcookie != NULL) 4985 break; 4986 vn_rangelock_unlock(outvp, rl_wcookie); 4987 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4988 vn_rangelock_unlock(invp, rl_rcookie); 4989 } 4990 4991 retlen = len; 4992 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4993 flags, infp->f_cred, outfp->f_cred, td); 4994 out: 4995 if (rl_rcookie != NULL) 4996 vn_rangelock_unlock(invp, rl_rcookie); 4997 if (rl_wcookie != NULL) 4998 vn_rangelock_unlock(outvp, rl_wcookie); 4999 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 5000 *inoffp = savinoff; 5001 *outoffp = savoutoff; 5002 } 5003 if (outfp != NULL) 5004 fdrop(outfp, td); 5005 if (infp != NULL) 5006 fdrop(infp, td); 5007 td->td_retval[0] = retlen; 5008 return (error); 5009 } 5010 5011 int 5012 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5013 { 5014 off_t inoff, outoff, *inoffp, *outoffp; 5015 int error; 5016 5017 inoffp = outoffp = NULL; 5018 if (uap->inoffp != NULL) { 5019 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5020 if (error != 0) 5021 return (error); 5022 inoffp = &inoff; 5023 } 5024 if (uap->outoffp != NULL) { 5025 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5026 if (error != 0) 5027 return (error); 5028 outoffp = &outoff; 5029 } 5030 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5031 outoffp, uap->len, uap->flags); 5032 if (error == 0 && uap->inoffp != NULL) 5033 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5034 if (error == 0 && uap->outoffp != NULL) 5035 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5036 return (error); 5037 } 5038