1 /*- 2 * SPDX-License-Identifier: BSD-3-Clause 3 * 4 * Copyright (c) 1989, 1993 5 * The Regents of the University of California. All rights reserved. 6 * (c) UNIX System Laboratories, Inc. 7 * All or some portions of this file are derived from material licensed 8 * to the University of California by American Telephone and Telegraph 9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with 10 * the permission of UNIX System Laboratories, Inc. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 3. Neither the name of the University nor the names of its contributors 21 * may be used to endorse or promote products derived from this software 22 * without specific prior written permission. 23 * 24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND 25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE 28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 34 * SUCH DAMAGE. 35 * 36 * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 37 */ 38 39 #include <sys/cdefs.h> 40 __FBSDID("$FreeBSD$"); 41 42 #include "opt_capsicum.h" 43 #include "opt_ktrace.h" 44 45 #include <sys/param.h> 46 #include <sys/systm.h> 47 #ifdef COMPAT_FREEBSD11 48 #include <sys/abi_compat.h> 49 #endif 50 #include <sys/bio.h> 51 #include <sys/buf.h> 52 #include <sys/capsicum.h> 53 #include <sys/disk.h> 54 #include <sys/sysent.h> 55 #include <sys/malloc.h> 56 #include <sys/mount.h> 57 #include <sys/mutex.h> 58 #include <sys/sysproto.h> 59 #include <sys/namei.h> 60 #include <sys/filedesc.h> 61 #include <sys/kernel.h> 62 #include <sys/fcntl.h> 63 #include <sys/file.h> 64 #include <sys/filio.h> 65 #include <sys/limits.h> 66 #include <sys/linker.h> 67 #include <sys/rwlock.h> 68 #include <sys/sdt.h> 69 #include <sys/stat.h> 70 #include <sys/sx.h> 71 #include <sys/unistd.h> 72 #include <sys/vnode.h> 73 #include <sys/priv.h> 74 #include <sys/proc.h> 75 #include <sys/dirent.h> 76 #include <sys/jail.h> 77 #include <sys/syscallsubr.h> 78 #include <sys/sysctl.h> 79 #ifdef KTRACE 80 #include <sys/ktrace.h> 81 #endif 82 83 #include <machine/stdarg.h> 84 85 #include <security/audit/audit.h> 86 #include <security/mac/mac_framework.h> 87 88 #include <vm/vm.h> 89 #include <vm/vm_object.h> 90 #include <vm/vm_page.h> 91 #include <vm/uma.h> 92 93 #include <fs/devfs/devfs.h> 94 95 MALLOC_DEFINE(M_FADVISE, "fadvise", "posix_fadvise(2) information"); 96 97 static int kern_chflagsat(struct thread *td, int fd, const char *path, 98 enum uio_seg pathseg, u_long flags, int atflag); 99 static int setfflags(struct thread *td, struct vnode *, u_long); 100 static int getutimes(const struct timeval *, enum uio_seg, struct timespec *); 101 static int getutimens(const struct timespec *, enum uio_seg, 102 struct timespec *, int *); 103 static int setutimes(struct thread *td, struct vnode *, 104 const struct timespec *, int, int); 105 static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 106 struct thread *td); 107 static int kern_fhlinkat(struct thread *td, int fd, const char *path, 108 enum uio_seg pathseg, fhandle_t *fhp); 109 static int kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, 110 size_t count, struct thread *td); 111 static int kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, 112 const char *path, enum uio_seg segflag); 113 114 static uint64_t 115 at2cnpflags(u_int at_flags, u_int mask) 116 { 117 uint64_t res; 118 119 MPASS((at_flags & (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)) != 120 (AT_SYMLINK_FOLLOW | AT_SYMLINK_NOFOLLOW)); 121 122 res = 0; 123 at_flags &= mask; 124 if ((at_flags & AT_RESOLVE_BENEATH) != 0) 125 res |= RBENEATH; 126 if ((at_flags & AT_SYMLINK_FOLLOW) != 0) 127 res |= FOLLOW; 128 /* NOFOLLOW is pseudo flag */ 129 if ((mask & AT_SYMLINK_NOFOLLOW) != 0) { 130 res |= (at_flags & AT_SYMLINK_NOFOLLOW) != 0 ? NOFOLLOW : 131 FOLLOW; 132 } 133 if ((mask & AT_EMPTY_PATH) != 0 && (at_flags & AT_EMPTY_PATH) != 0) 134 res |= EMPTYPATH; 135 return (res); 136 } 137 138 int 139 kern_sync(struct thread *td) 140 { 141 struct mount *mp, *nmp; 142 int save; 143 144 mtx_lock(&mountlist_mtx); 145 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 146 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK)) { 147 nmp = TAILQ_NEXT(mp, mnt_list); 148 continue; 149 } 150 if ((mp->mnt_flag & MNT_RDONLY) == 0 && 151 vn_start_write(NULL, &mp, V_NOWAIT) == 0) { 152 save = curthread_pflags_set(TDP_SYNCIO); 153 vfs_periodic(mp, MNT_NOWAIT); 154 VFS_SYNC(mp, MNT_NOWAIT); 155 curthread_pflags_restore(save); 156 vn_finished_write(mp); 157 } 158 mtx_lock(&mountlist_mtx); 159 nmp = TAILQ_NEXT(mp, mnt_list); 160 vfs_unbusy(mp); 161 } 162 mtx_unlock(&mountlist_mtx); 163 return (0); 164 } 165 166 /* 167 * Sync each mounted filesystem. 168 */ 169 #ifndef _SYS_SYSPROTO_H_ 170 struct sync_args { 171 int dummy; 172 }; 173 #endif 174 /* ARGSUSED */ 175 int 176 sys_sync(struct thread *td, struct sync_args *uap) 177 { 178 179 return (kern_sync(td)); 180 } 181 182 /* 183 * Change filesystem quotas. 184 */ 185 #ifndef _SYS_SYSPROTO_H_ 186 struct quotactl_args { 187 char *path; 188 int cmd; 189 int uid; 190 caddr_t arg; 191 }; 192 #endif 193 int 194 sys_quotactl(struct thread *td, struct quotactl_args *uap) 195 { 196 struct mount *mp; 197 struct nameidata nd; 198 int error; 199 bool mp_busy; 200 201 AUDIT_ARG_CMD(uap->cmd); 202 AUDIT_ARG_UID(uap->uid); 203 if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS)) 204 return (EPERM); 205 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 206 uap->path); 207 if ((error = namei(&nd)) != 0) 208 return (error); 209 NDFREE_PNBUF(&nd); 210 mp = nd.ni_vp->v_mount; 211 vfs_ref(mp); 212 vput(nd.ni_vp); 213 error = vfs_busy(mp, 0); 214 if (error != 0) { 215 vfs_rel(mp); 216 return (error); 217 } 218 mp_busy = true; 219 error = VFS_QUOTACTL(mp, uap->cmd, uap->uid, uap->arg, &mp_busy); 220 221 /* 222 * Since quota on/off operations typically need to open quota 223 * files, the implementation may need to unbusy the mount point 224 * before calling into namei. Otherwise, unmount might be 225 * started between two vfs_busy() invocations (first is ours, 226 * second is from mount point cross-walk code in lookup()), 227 * causing deadlock. 228 * 229 * Avoid unbusying mp if the implementation indicates it has 230 * already done so. 231 */ 232 if (mp_busy) 233 vfs_unbusy(mp); 234 vfs_rel(mp); 235 return (error); 236 } 237 238 /* 239 * Used by statfs conversion routines to scale the block size up if 240 * necessary so that all of the block counts are <= 'max_size'. Note 241 * that 'max_size' should be a bitmask, i.e. 2^n - 1 for some non-zero 242 * value of 'n'. 243 */ 244 void 245 statfs_scale_blocks(struct statfs *sf, long max_size) 246 { 247 uint64_t count; 248 int shift; 249 250 KASSERT(powerof2(max_size + 1), ("%s: invalid max_size", __func__)); 251 252 /* 253 * Attempt to scale the block counts to give a more accurate 254 * overview to userland of the ratio of free space to used 255 * space. To do this, find the largest block count and compute 256 * a divisor that lets it fit into a signed integer <= max_size. 257 */ 258 if (sf->f_bavail < 0) 259 count = -sf->f_bavail; 260 else 261 count = sf->f_bavail; 262 count = MAX(sf->f_blocks, MAX(sf->f_bfree, count)); 263 if (count <= max_size) 264 return; 265 266 count >>= flsl(max_size); 267 shift = 0; 268 while (count > 0) { 269 shift++; 270 count >>=1; 271 } 272 273 sf->f_bsize <<= shift; 274 sf->f_blocks >>= shift; 275 sf->f_bfree >>= shift; 276 sf->f_bavail >>= shift; 277 } 278 279 static int 280 kern_do_statfs(struct thread *td, struct mount *mp, struct statfs *buf) 281 { 282 int error; 283 284 if (mp == NULL) 285 return (EBADF); 286 error = vfs_busy(mp, 0); 287 vfs_rel(mp); 288 if (error != 0) 289 return (error); 290 #ifdef MAC 291 error = mac_mount_check_stat(td->td_ucred, mp); 292 if (error != 0) 293 goto out; 294 #endif 295 error = VFS_STATFS(mp, buf); 296 if (error != 0) 297 goto out; 298 if (priv_check_cred_vfs_generation(td->td_ucred)) { 299 buf->f_fsid.val[0] = buf->f_fsid.val[1] = 0; 300 prison_enforce_statfs(td->td_ucred, mp, buf); 301 } 302 out: 303 vfs_unbusy(mp); 304 return (error); 305 } 306 307 /* 308 * Get filesystem statistics. 309 */ 310 #ifndef _SYS_SYSPROTO_H_ 311 struct statfs_args { 312 char *path; 313 struct statfs *buf; 314 }; 315 #endif 316 int 317 sys_statfs(struct thread *td, struct statfs_args *uap) 318 { 319 struct statfs *sfp; 320 int error; 321 322 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 323 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 324 if (error == 0) 325 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 326 free(sfp, M_STATFS); 327 return (error); 328 } 329 330 int 331 kern_statfs(struct thread *td, const char *path, enum uio_seg pathseg, 332 struct statfs *buf) 333 { 334 struct mount *mp; 335 struct nameidata nd; 336 int error; 337 338 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 339 error = namei(&nd); 340 if (error != 0) 341 return (error); 342 mp = vfs_ref_from_vp(nd.ni_vp); 343 NDFREE_NOTHING(&nd); 344 vrele(nd.ni_vp); 345 return (kern_do_statfs(td, mp, buf)); 346 } 347 348 /* 349 * Get filesystem statistics. 350 */ 351 #ifndef _SYS_SYSPROTO_H_ 352 struct fstatfs_args { 353 int fd; 354 struct statfs *buf; 355 }; 356 #endif 357 int 358 sys_fstatfs(struct thread *td, struct fstatfs_args *uap) 359 { 360 struct statfs *sfp; 361 int error; 362 363 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 364 error = kern_fstatfs(td, uap->fd, sfp); 365 if (error == 0) 366 error = copyout(sfp, uap->buf, sizeof(struct statfs)); 367 free(sfp, M_STATFS); 368 return (error); 369 } 370 371 int 372 kern_fstatfs(struct thread *td, int fd, struct statfs *buf) 373 { 374 struct file *fp; 375 struct mount *mp; 376 struct vnode *vp; 377 int error; 378 379 AUDIT_ARG_FD(fd); 380 error = getvnode_path(td, fd, &cap_fstatfs_rights, &fp); 381 if (error != 0) 382 return (error); 383 vp = fp->f_vnode; 384 #ifdef AUDIT 385 if (AUDITING_TD(td)) { 386 vn_lock(vp, LK_SHARED | LK_RETRY); 387 AUDIT_ARG_VNODE1(vp); 388 VOP_UNLOCK(vp); 389 } 390 #endif 391 mp = vfs_ref_from_vp(vp); 392 fdrop(fp, td); 393 return (kern_do_statfs(td, mp, buf)); 394 } 395 396 /* 397 * Get statistics on all filesystems. 398 */ 399 #ifndef _SYS_SYSPROTO_H_ 400 struct getfsstat_args { 401 struct statfs *buf; 402 long bufsize; 403 int mode; 404 }; 405 #endif 406 int 407 sys_getfsstat(struct thread *td, struct getfsstat_args *uap) 408 { 409 size_t count; 410 int error; 411 412 if (uap->bufsize < 0 || uap->bufsize > SIZE_MAX) 413 return (EINVAL); 414 error = kern_getfsstat(td, &uap->buf, uap->bufsize, &count, 415 UIO_USERSPACE, uap->mode); 416 if (error == 0) 417 td->td_retval[0] = count; 418 return (error); 419 } 420 421 /* 422 * If (bufsize > 0 && bufseg == UIO_SYSSPACE) 423 * The caller is responsible for freeing memory which will be allocated 424 * in '*buf'. 425 */ 426 int 427 kern_getfsstat(struct thread *td, struct statfs **buf, size_t bufsize, 428 size_t *countp, enum uio_seg bufseg, int mode) 429 { 430 struct mount *mp, *nmp; 431 struct statfs *sfsp, *sp, *sptmp, *tofree; 432 size_t count, maxcount; 433 int error; 434 435 switch (mode) { 436 case MNT_WAIT: 437 case MNT_NOWAIT: 438 break; 439 default: 440 if (bufseg == UIO_SYSSPACE) 441 *buf = NULL; 442 return (EINVAL); 443 } 444 restart: 445 maxcount = bufsize / sizeof(struct statfs); 446 if (bufsize == 0) { 447 sfsp = NULL; 448 tofree = NULL; 449 } else if (bufseg == UIO_USERSPACE) { 450 sfsp = *buf; 451 tofree = NULL; 452 } else /* if (bufseg == UIO_SYSSPACE) */ { 453 count = 0; 454 mtx_lock(&mountlist_mtx); 455 TAILQ_FOREACH(mp, &mountlist, mnt_list) { 456 count++; 457 } 458 mtx_unlock(&mountlist_mtx); 459 if (maxcount > count) 460 maxcount = count; 461 tofree = sfsp = *buf = malloc(maxcount * sizeof(struct statfs), 462 M_STATFS, M_WAITOK); 463 } 464 465 count = 0; 466 467 /* 468 * If there is no target buffer they only want the count. 469 * 470 * This could be TAILQ_FOREACH but it is open-coded to match the original 471 * code below. 472 */ 473 if (sfsp == NULL) { 474 mtx_lock(&mountlist_mtx); 475 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 476 if (prison_canseemount(td->td_ucred, mp) != 0) { 477 nmp = TAILQ_NEXT(mp, mnt_list); 478 continue; 479 } 480 #ifdef MAC 481 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 482 nmp = TAILQ_NEXT(mp, mnt_list); 483 continue; 484 } 485 #endif 486 count++; 487 nmp = TAILQ_NEXT(mp, mnt_list); 488 } 489 mtx_unlock(&mountlist_mtx); 490 *countp = count; 491 return (0); 492 } 493 494 /* 495 * They want the entire thing. 496 * 497 * Short-circuit the corner case of no room for anything, avoids 498 * relocking below. 499 */ 500 if (maxcount < 1) { 501 goto out; 502 } 503 504 mtx_lock(&mountlist_mtx); 505 for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { 506 if (prison_canseemount(td->td_ucred, mp) != 0) { 507 nmp = TAILQ_NEXT(mp, mnt_list); 508 continue; 509 } 510 #ifdef MAC 511 if (mac_mount_check_stat(td->td_ucred, mp) != 0) { 512 nmp = TAILQ_NEXT(mp, mnt_list); 513 continue; 514 } 515 #endif 516 if (mode == MNT_WAIT) { 517 if (vfs_busy(mp, MBF_MNTLSTLOCK) != 0) { 518 /* 519 * If vfs_busy() failed, and MBF_NOWAIT 520 * wasn't passed, then the mp is gone. 521 * Furthermore, because of MBF_MNTLSTLOCK, 522 * the mountlist_mtx was dropped. We have 523 * no other choice than to start over. 524 */ 525 mtx_unlock(&mountlist_mtx); 526 free(tofree, M_STATFS); 527 goto restart; 528 } 529 } else { 530 if (vfs_busy(mp, MBF_NOWAIT | MBF_MNTLSTLOCK) != 0) { 531 nmp = TAILQ_NEXT(mp, mnt_list); 532 continue; 533 } 534 } 535 sp = &mp->mnt_stat; 536 /* 537 * If MNT_NOWAIT is specified, do not refresh 538 * the fsstat cache. 539 */ 540 if (mode != MNT_NOWAIT) { 541 error = VFS_STATFS(mp, sp); 542 if (error != 0) { 543 mtx_lock(&mountlist_mtx); 544 nmp = TAILQ_NEXT(mp, mnt_list); 545 vfs_unbusy(mp); 546 continue; 547 } 548 } 549 if (priv_check_cred_vfs_generation(td->td_ucred)) { 550 sptmp = malloc(sizeof(struct statfs), M_STATFS, 551 M_WAITOK); 552 *sptmp = *sp; 553 sptmp->f_fsid.val[0] = sptmp->f_fsid.val[1] = 0; 554 prison_enforce_statfs(td->td_ucred, mp, sptmp); 555 sp = sptmp; 556 } else 557 sptmp = NULL; 558 if (bufseg == UIO_SYSSPACE) { 559 bcopy(sp, sfsp, sizeof(*sp)); 560 free(sptmp, M_STATFS); 561 } else /* if (bufseg == UIO_USERSPACE) */ { 562 error = copyout(sp, sfsp, sizeof(*sp)); 563 free(sptmp, M_STATFS); 564 if (error != 0) { 565 vfs_unbusy(mp); 566 return (error); 567 } 568 } 569 sfsp++; 570 count++; 571 572 if (count == maxcount) { 573 vfs_unbusy(mp); 574 goto out; 575 } 576 577 mtx_lock(&mountlist_mtx); 578 nmp = TAILQ_NEXT(mp, mnt_list); 579 vfs_unbusy(mp); 580 } 581 mtx_unlock(&mountlist_mtx); 582 out: 583 *countp = count; 584 return (0); 585 } 586 587 #ifdef COMPAT_FREEBSD4 588 /* 589 * Get old format filesystem statistics. 590 */ 591 static void freebsd4_cvtstatfs(struct statfs *, struct ostatfs *); 592 593 #ifndef _SYS_SYSPROTO_H_ 594 struct freebsd4_statfs_args { 595 char *path; 596 struct ostatfs *buf; 597 }; 598 #endif 599 int 600 freebsd4_statfs(struct thread *td, struct freebsd4_statfs_args *uap) 601 { 602 struct ostatfs osb; 603 struct statfs *sfp; 604 int error; 605 606 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 607 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 608 if (error == 0) { 609 freebsd4_cvtstatfs(sfp, &osb); 610 error = copyout(&osb, uap->buf, sizeof(osb)); 611 } 612 free(sfp, M_STATFS); 613 return (error); 614 } 615 616 /* 617 * Get filesystem statistics. 618 */ 619 #ifndef _SYS_SYSPROTO_H_ 620 struct freebsd4_fstatfs_args { 621 int fd; 622 struct ostatfs *buf; 623 }; 624 #endif 625 int 626 freebsd4_fstatfs(struct thread *td, struct freebsd4_fstatfs_args *uap) 627 { 628 struct ostatfs osb; 629 struct statfs *sfp; 630 int error; 631 632 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 633 error = kern_fstatfs(td, uap->fd, sfp); 634 if (error == 0) { 635 freebsd4_cvtstatfs(sfp, &osb); 636 error = copyout(&osb, uap->buf, sizeof(osb)); 637 } 638 free(sfp, M_STATFS); 639 return (error); 640 } 641 642 /* 643 * Get statistics on all filesystems. 644 */ 645 #ifndef _SYS_SYSPROTO_H_ 646 struct freebsd4_getfsstat_args { 647 struct ostatfs *buf; 648 long bufsize; 649 int mode; 650 }; 651 #endif 652 int 653 freebsd4_getfsstat(struct thread *td, struct freebsd4_getfsstat_args *uap) 654 { 655 struct statfs *buf, *sp; 656 struct ostatfs osb; 657 size_t count, size; 658 int error; 659 660 if (uap->bufsize < 0) 661 return (EINVAL); 662 count = uap->bufsize / sizeof(struct ostatfs); 663 if (count > SIZE_MAX / sizeof(struct statfs)) 664 return (EINVAL); 665 size = count * sizeof(struct statfs); 666 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, 667 uap->mode); 668 if (error == 0) 669 td->td_retval[0] = count; 670 if (size != 0) { 671 sp = buf; 672 while (count != 0 && error == 0) { 673 freebsd4_cvtstatfs(sp, &osb); 674 error = copyout(&osb, uap->buf, sizeof(osb)); 675 sp++; 676 uap->buf++; 677 count--; 678 } 679 free(buf, M_STATFS); 680 } 681 return (error); 682 } 683 684 /* 685 * Implement fstatfs() for (NFS) file handles. 686 */ 687 #ifndef _SYS_SYSPROTO_H_ 688 struct freebsd4_fhstatfs_args { 689 struct fhandle *u_fhp; 690 struct ostatfs *buf; 691 }; 692 #endif 693 int 694 freebsd4_fhstatfs(struct thread *td, struct freebsd4_fhstatfs_args *uap) 695 { 696 struct ostatfs osb; 697 struct statfs *sfp; 698 fhandle_t fh; 699 int error; 700 701 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 702 if (error != 0) 703 return (error); 704 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 705 error = kern_fhstatfs(td, fh, sfp); 706 if (error == 0) { 707 freebsd4_cvtstatfs(sfp, &osb); 708 error = copyout(&osb, uap->buf, sizeof(osb)); 709 } 710 free(sfp, M_STATFS); 711 return (error); 712 } 713 714 /* 715 * Convert a new format statfs structure to an old format statfs structure. 716 */ 717 static void 718 freebsd4_cvtstatfs(struct statfs *nsp, struct ostatfs *osp) 719 { 720 721 statfs_scale_blocks(nsp, LONG_MAX); 722 bzero(osp, sizeof(*osp)); 723 osp->f_bsize = nsp->f_bsize; 724 osp->f_iosize = MIN(nsp->f_iosize, LONG_MAX); 725 osp->f_blocks = nsp->f_blocks; 726 osp->f_bfree = nsp->f_bfree; 727 osp->f_bavail = nsp->f_bavail; 728 osp->f_files = MIN(nsp->f_files, LONG_MAX); 729 osp->f_ffree = MIN(nsp->f_ffree, LONG_MAX); 730 osp->f_owner = nsp->f_owner; 731 osp->f_type = nsp->f_type; 732 osp->f_flags = nsp->f_flags; 733 osp->f_syncwrites = MIN(nsp->f_syncwrites, LONG_MAX); 734 osp->f_asyncwrites = MIN(nsp->f_asyncwrites, LONG_MAX); 735 osp->f_syncreads = MIN(nsp->f_syncreads, LONG_MAX); 736 osp->f_asyncreads = MIN(nsp->f_asyncreads, LONG_MAX); 737 strlcpy(osp->f_fstypename, nsp->f_fstypename, 738 MIN(MFSNAMELEN, OMFSNAMELEN)); 739 strlcpy(osp->f_mntonname, nsp->f_mntonname, 740 MIN(MNAMELEN, OMNAMELEN)); 741 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 742 MIN(MNAMELEN, OMNAMELEN)); 743 osp->f_fsid = nsp->f_fsid; 744 } 745 #endif /* COMPAT_FREEBSD4 */ 746 747 #if defined(COMPAT_FREEBSD11) 748 /* 749 * Get old format filesystem statistics. 750 */ 751 static void freebsd11_cvtstatfs(struct statfs *, struct freebsd11_statfs *); 752 753 int 754 freebsd11_statfs(struct thread *td, struct freebsd11_statfs_args *uap) 755 { 756 struct freebsd11_statfs osb; 757 struct statfs *sfp; 758 int error; 759 760 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 761 error = kern_statfs(td, uap->path, UIO_USERSPACE, sfp); 762 if (error == 0) { 763 freebsd11_cvtstatfs(sfp, &osb); 764 error = copyout(&osb, uap->buf, sizeof(osb)); 765 } 766 free(sfp, M_STATFS); 767 return (error); 768 } 769 770 /* 771 * Get filesystem statistics. 772 */ 773 int 774 freebsd11_fstatfs(struct thread *td, struct freebsd11_fstatfs_args *uap) 775 { 776 struct freebsd11_statfs osb; 777 struct statfs *sfp; 778 int error; 779 780 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 781 error = kern_fstatfs(td, uap->fd, sfp); 782 if (error == 0) { 783 freebsd11_cvtstatfs(sfp, &osb); 784 error = copyout(&osb, uap->buf, sizeof(osb)); 785 } 786 free(sfp, M_STATFS); 787 return (error); 788 } 789 790 /* 791 * Get statistics on all filesystems. 792 */ 793 int 794 freebsd11_getfsstat(struct thread *td, struct freebsd11_getfsstat_args *uap) 795 { 796 return (kern_freebsd11_getfsstat(td, uap->buf, uap->bufsize, uap->mode)); 797 } 798 799 int 800 kern_freebsd11_getfsstat(struct thread *td, struct freebsd11_statfs * ubuf, 801 long bufsize, int mode) 802 { 803 struct freebsd11_statfs osb; 804 struct statfs *buf, *sp; 805 size_t count, size; 806 int error; 807 808 if (bufsize < 0) 809 return (EINVAL); 810 811 count = bufsize / sizeof(struct ostatfs); 812 size = count * sizeof(struct statfs); 813 error = kern_getfsstat(td, &buf, size, &count, UIO_SYSSPACE, mode); 814 if (error == 0) 815 td->td_retval[0] = count; 816 if (size > 0) { 817 sp = buf; 818 while (count > 0 && error == 0) { 819 freebsd11_cvtstatfs(sp, &osb); 820 error = copyout(&osb, ubuf, sizeof(osb)); 821 sp++; 822 ubuf++; 823 count--; 824 } 825 free(buf, M_STATFS); 826 } 827 return (error); 828 } 829 830 /* 831 * Implement fstatfs() for (NFS) file handles. 832 */ 833 int 834 freebsd11_fhstatfs(struct thread *td, struct freebsd11_fhstatfs_args *uap) 835 { 836 struct freebsd11_statfs osb; 837 struct statfs *sfp; 838 fhandle_t fh; 839 int error; 840 841 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 842 if (error) 843 return (error); 844 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 845 error = kern_fhstatfs(td, fh, sfp); 846 if (error == 0) { 847 freebsd11_cvtstatfs(sfp, &osb); 848 error = copyout(&osb, uap->buf, sizeof(osb)); 849 } 850 free(sfp, M_STATFS); 851 return (error); 852 } 853 854 /* 855 * Convert a new format statfs structure to an old format statfs structure. 856 */ 857 static void 858 freebsd11_cvtstatfs(struct statfs *nsp, struct freebsd11_statfs *osp) 859 { 860 861 bzero(osp, sizeof(*osp)); 862 osp->f_version = FREEBSD11_STATFS_VERSION; 863 osp->f_type = nsp->f_type; 864 osp->f_flags = nsp->f_flags; 865 osp->f_bsize = nsp->f_bsize; 866 osp->f_iosize = nsp->f_iosize; 867 osp->f_blocks = nsp->f_blocks; 868 osp->f_bfree = nsp->f_bfree; 869 osp->f_bavail = nsp->f_bavail; 870 osp->f_files = nsp->f_files; 871 osp->f_ffree = nsp->f_ffree; 872 osp->f_syncwrites = nsp->f_syncwrites; 873 osp->f_asyncwrites = nsp->f_asyncwrites; 874 osp->f_syncreads = nsp->f_syncreads; 875 osp->f_asyncreads = nsp->f_asyncreads; 876 osp->f_namemax = nsp->f_namemax; 877 osp->f_owner = nsp->f_owner; 878 osp->f_fsid = nsp->f_fsid; 879 strlcpy(osp->f_fstypename, nsp->f_fstypename, 880 MIN(MFSNAMELEN, sizeof(osp->f_fstypename))); 881 strlcpy(osp->f_mntonname, nsp->f_mntonname, 882 MIN(MNAMELEN, sizeof(osp->f_mntonname))); 883 strlcpy(osp->f_mntfromname, nsp->f_mntfromname, 884 MIN(MNAMELEN, sizeof(osp->f_mntfromname))); 885 } 886 #endif /* COMPAT_FREEBSD11 */ 887 888 /* 889 * Change current working directory to a given file descriptor. 890 */ 891 #ifndef _SYS_SYSPROTO_H_ 892 struct fchdir_args { 893 int fd; 894 }; 895 #endif 896 int 897 sys_fchdir(struct thread *td, struct fchdir_args *uap) 898 { 899 struct vnode *vp, *tdp; 900 struct mount *mp; 901 struct file *fp; 902 int error; 903 904 AUDIT_ARG_FD(uap->fd); 905 error = getvnode_path(td, uap->fd, &cap_fchdir_rights, 906 &fp); 907 if (error != 0) 908 return (error); 909 vp = fp->f_vnode; 910 vref(vp); 911 fdrop(fp, td); 912 vn_lock(vp, LK_SHARED | LK_RETRY); 913 AUDIT_ARG_VNODE1(vp); 914 error = change_dir(vp, td); 915 while (!error && (mp = vp->v_mountedhere) != NULL) { 916 if (vfs_busy(mp, 0)) 917 continue; 918 error = VFS_ROOT(mp, LK_SHARED, &tdp); 919 vfs_unbusy(mp); 920 if (error != 0) 921 break; 922 vput(vp); 923 vp = tdp; 924 } 925 if (error != 0) { 926 vput(vp); 927 return (error); 928 } 929 VOP_UNLOCK(vp); 930 pwd_chdir(td, vp); 931 return (0); 932 } 933 934 /* 935 * Change current working directory (``.''). 936 */ 937 #ifndef _SYS_SYSPROTO_H_ 938 struct chdir_args { 939 char *path; 940 }; 941 #endif 942 int 943 sys_chdir(struct thread *td, struct chdir_args *uap) 944 { 945 946 return (kern_chdir(td, uap->path, UIO_USERSPACE)); 947 } 948 949 int 950 kern_chdir(struct thread *td, const char *path, enum uio_seg pathseg) 951 { 952 struct nameidata nd; 953 int error; 954 955 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 956 pathseg, path); 957 if ((error = namei(&nd)) != 0) 958 return (error); 959 if ((error = change_dir(nd.ni_vp, td)) != 0) { 960 vput(nd.ni_vp); 961 NDFREE_NOTHING(&nd); 962 return (error); 963 } 964 VOP_UNLOCK(nd.ni_vp); 965 NDFREE_NOTHING(&nd); 966 pwd_chdir(td, nd.ni_vp); 967 return (0); 968 } 969 970 static int unprivileged_chroot = 0; 971 SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_chroot, CTLFLAG_RW, 972 &unprivileged_chroot, 0, 973 "Unprivileged processes can use chroot(2)"); 974 /* 975 * Change notion of root (``/'') directory. 976 */ 977 #ifndef _SYS_SYSPROTO_H_ 978 struct chroot_args { 979 char *path; 980 }; 981 #endif 982 int 983 sys_chroot(struct thread *td, struct chroot_args *uap) 984 { 985 struct nameidata nd; 986 struct proc *p; 987 int error; 988 989 error = priv_check(td, PRIV_VFS_CHROOT); 990 if (error != 0) { 991 p = td->td_proc; 992 PROC_LOCK(p); 993 if (unprivileged_chroot == 0 || 994 (p->p_flag2 & P2_NO_NEW_PRIVS) == 0) { 995 PROC_UNLOCK(p); 996 return (error); 997 } 998 PROC_UNLOCK(p); 999 } 1000 NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1, 1001 UIO_USERSPACE, uap->path); 1002 error = namei(&nd); 1003 if (error != 0) 1004 goto error; 1005 error = change_dir(nd.ni_vp, td); 1006 if (error != 0) 1007 goto e_vunlock; 1008 #ifdef MAC 1009 error = mac_vnode_check_chroot(td->td_ucred, nd.ni_vp); 1010 if (error != 0) 1011 goto e_vunlock; 1012 #endif 1013 VOP_UNLOCK(nd.ni_vp); 1014 error = pwd_chroot(td, nd.ni_vp); 1015 vrele(nd.ni_vp); 1016 NDFREE_NOTHING(&nd); 1017 return (error); 1018 e_vunlock: 1019 vput(nd.ni_vp); 1020 error: 1021 NDFREE_NOTHING(&nd); 1022 return (error); 1023 } 1024 1025 /* 1026 * Common routine for chroot and chdir. Callers must provide a locked vnode 1027 * instance. 1028 */ 1029 int 1030 change_dir(struct vnode *vp, struct thread *td) 1031 { 1032 #ifdef MAC 1033 int error; 1034 #endif 1035 1036 ASSERT_VOP_LOCKED(vp, "change_dir(): vp not locked"); 1037 if (vp->v_type != VDIR) 1038 return (ENOTDIR); 1039 #ifdef MAC 1040 error = mac_vnode_check_chdir(td->td_ucred, vp); 1041 if (error != 0) 1042 return (error); 1043 #endif 1044 return (VOP_ACCESS(vp, VEXEC, td->td_ucred, td)); 1045 } 1046 1047 static __inline void 1048 flags_to_rights(int flags, cap_rights_t *rightsp) 1049 { 1050 if (flags & O_EXEC) { 1051 cap_rights_set_one(rightsp, CAP_FEXECVE); 1052 if (flags & O_PATH) 1053 return; 1054 } else { 1055 switch ((flags & O_ACCMODE)) { 1056 case O_RDONLY: 1057 cap_rights_set_one(rightsp, CAP_READ); 1058 break; 1059 case O_RDWR: 1060 cap_rights_set_one(rightsp, CAP_READ); 1061 /* FALLTHROUGH */ 1062 case O_WRONLY: 1063 cap_rights_set_one(rightsp, CAP_WRITE); 1064 if (!(flags & (O_APPEND | O_TRUNC))) 1065 cap_rights_set_one(rightsp, CAP_SEEK); 1066 break; 1067 } 1068 } 1069 1070 if (flags & O_CREAT) 1071 cap_rights_set_one(rightsp, CAP_CREATE); 1072 1073 if (flags & O_TRUNC) 1074 cap_rights_set_one(rightsp, CAP_FTRUNCATE); 1075 1076 if (flags & (O_SYNC | O_FSYNC)) 1077 cap_rights_set_one(rightsp, CAP_FSYNC); 1078 1079 if (flags & (O_EXLOCK | O_SHLOCK)) 1080 cap_rights_set_one(rightsp, CAP_FLOCK); 1081 } 1082 1083 /* 1084 * Check permissions, allocate an open file structure, and call the device 1085 * open routine if any. 1086 */ 1087 #ifndef _SYS_SYSPROTO_H_ 1088 struct open_args { 1089 char *path; 1090 int flags; 1091 int mode; 1092 }; 1093 #endif 1094 int 1095 sys_open(struct thread *td, struct open_args *uap) 1096 { 1097 1098 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1099 uap->flags, uap->mode)); 1100 } 1101 1102 #ifndef _SYS_SYSPROTO_H_ 1103 struct openat_args { 1104 int fd; 1105 char *path; 1106 int flag; 1107 int mode; 1108 }; 1109 #endif 1110 int 1111 sys_openat(struct thread *td, struct openat_args *uap) 1112 { 1113 1114 AUDIT_ARG_FD(uap->fd); 1115 return (kern_openat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 1116 uap->mode)); 1117 } 1118 1119 int 1120 kern_openat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1121 int flags, int mode) 1122 { 1123 struct proc *p = td->td_proc; 1124 struct filedesc *fdp; 1125 struct pwddesc *pdp; 1126 struct file *fp; 1127 struct vnode *vp; 1128 struct nameidata nd; 1129 cap_rights_t rights; 1130 int cmode, error, indx; 1131 1132 indx = -1; 1133 fdp = p->p_fd; 1134 pdp = p->p_pd; 1135 1136 AUDIT_ARG_FFLAGS(flags); 1137 AUDIT_ARG_MODE(mode); 1138 cap_rights_init_one(&rights, CAP_LOOKUP); 1139 flags_to_rights(flags, &rights); 1140 1141 /* 1142 * Only one of the O_EXEC, O_RDONLY, O_WRONLY and O_RDWR flags 1143 * may be specified. On the other hand, for O_PATH any mode 1144 * except O_EXEC is ignored. 1145 */ 1146 if ((flags & O_PATH) != 0) { 1147 flags &= ~(O_CREAT | O_ACCMODE); 1148 } else if ((flags & O_EXEC) != 0) { 1149 if (flags & O_ACCMODE) 1150 return (EINVAL); 1151 } else if ((flags & O_ACCMODE) == O_ACCMODE) { 1152 return (EINVAL); 1153 } else { 1154 flags = FFLAGS(flags); 1155 } 1156 1157 /* 1158 * Allocate a file structure. The descriptor to reference it 1159 * is allocated and used by finstall_refed() below. 1160 */ 1161 error = falloc_noinstall(td, &fp); 1162 if (error != 0) 1163 return (error); 1164 /* Set the flags early so the finit in devfs can pick them up. */ 1165 fp->f_flag = flags & FMASK; 1166 cmode = ((mode & ~pdp->pd_cmask) & ALLPERMS) & ~S_ISTXT; 1167 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 1168 &rights); 1169 td->td_dupfd = -1; /* XXX check for fdopen */ 1170 error = vn_open(&nd, &flags, cmode, fp); 1171 if (error != 0) { 1172 /* 1173 * If the vn_open replaced the method vector, something 1174 * wonderous happened deep below and we just pass it up 1175 * pretending we know what we do. 1176 */ 1177 if (error == ENXIO && fp->f_ops != &badfileops) { 1178 MPASS((flags & O_PATH) == 0); 1179 goto success; 1180 } 1181 1182 /* 1183 * Handle special fdopen() case. bleh. 1184 * 1185 * Don't do this for relative (capability) lookups; we don't 1186 * understand exactly what would happen, and we don't think 1187 * that it ever should. 1188 */ 1189 if ((nd.ni_resflags & NIRES_STRICTREL) == 0 && 1190 (error == ENODEV || error == ENXIO) && 1191 td->td_dupfd >= 0) { 1192 error = dupfdopen(td, fdp, td->td_dupfd, flags, error, 1193 &indx); 1194 if (error == 0) 1195 goto success; 1196 } 1197 1198 goto bad; 1199 } 1200 td->td_dupfd = 0; 1201 NDFREE_PNBUF(&nd); 1202 vp = nd.ni_vp; 1203 1204 /* 1205 * Store the vnode, for any f_type. Typically, the vnode use 1206 * count is decremented by direct call to vn_closefile() for 1207 * files that switched type in the cdevsw fdopen() method. 1208 */ 1209 fp->f_vnode = vp; 1210 1211 /* 1212 * If the file wasn't claimed by devfs bind it to the normal 1213 * vnode operations here. 1214 */ 1215 if (fp->f_ops == &badfileops) { 1216 KASSERT(vp->v_type != VFIFO || (flags & O_PATH) != 0, 1217 ("Unexpected fifo fp %p vp %p", fp, vp)); 1218 if ((flags & O_PATH) != 0) { 1219 finit(fp, (flags & FMASK) | (fp->f_flag & FKQALLOWED), 1220 DTYPE_VNODE, NULL, &path_fileops); 1221 vhold(vp); 1222 vunref(vp); 1223 } else { 1224 finit_vnode(fp, flags, NULL, &vnops); 1225 } 1226 } 1227 1228 VOP_UNLOCK(vp); 1229 if (flags & O_TRUNC) { 1230 error = fo_truncate(fp, 0, td->td_ucred, td); 1231 if (error != 0) 1232 goto bad; 1233 } 1234 success: 1235 /* 1236 * If we haven't already installed the FD (for dupfdopen), do so now. 1237 */ 1238 if (indx == -1) { 1239 struct filecaps *fcaps; 1240 1241 #ifdef CAPABILITIES 1242 if ((nd.ni_resflags & NIRES_STRICTREL) != 0) 1243 fcaps = &nd.ni_filecaps; 1244 else 1245 #endif 1246 fcaps = NULL; 1247 error = finstall_refed(td, fp, &indx, flags, fcaps); 1248 /* On success finstall_refed() consumes fcaps. */ 1249 if (error != 0) { 1250 filecaps_free(&nd.ni_filecaps); 1251 goto bad; 1252 } 1253 } else { 1254 filecaps_free(&nd.ni_filecaps); 1255 falloc_abort(td, fp); 1256 } 1257 1258 td->td_retval[0] = indx; 1259 return (0); 1260 bad: 1261 KASSERT(indx == -1, ("indx=%d, should be -1", indx)); 1262 falloc_abort(td, fp); 1263 return (error); 1264 } 1265 1266 #ifdef COMPAT_43 1267 /* 1268 * Create a file. 1269 */ 1270 #ifndef _SYS_SYSPROTO_H_ 1271 struct ocreat_args { 1272 char *path; 1273 int mode; 1274 }; 1275 #endif 1276 int 1277 ocreat(struct thread *td, struct ocreat_args *uap) 1278 { 1279 1280 return (kern_openat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1281 O_WRONLY | O_CREAT | O_TRUNC, uap->mode)); 1282 } 1283 #endif /* COMPAT_43 */ 1284 1285 /* 1286 * Create a special file. 1287 */ 1288 #ifndef _SYS_SYSPROTO_H_ 1289 struct mknodat_args { 1290 int fd; 1291 char *path; 1292 mode_t mode; 1293 dev_t dev; 1294 }; 1295 #endif 1296 int 1297 sys_mknodat(struct thread *td, struct mknodat_args *uap) 1298 { 1299 1300 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1301 uap->dev)); 1302 } 1303 1304 #if defined(COMPAT_FREEBSD11) 1305 int 1306 freebsd11_mknod(struct thread *td, 1307 struct freebsd11_mknod_args *uap) 1308 { 1309 1310 return (kern_mknodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1311 uap->mode, uap->dev)); 1312 } 1313 1314 int 1315 freebsd11_mknodat(struct thread *td, 1316 struct freebsd11_mknodat_args *uap) 1317 { 1318 1319 return (kern_mknodat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode, 1320 uap->dev)); 1321 } 1322 #endif /* COMPAT_FREEBSD11 */ 1323 1324 int 1325 kern_mknodat(struct thread *td, int fd, const char *path, enum uio_seg pathseg, 1326 int mode, dev_t dev) 1327 { 1328 struct vnode *vp; 1329 struct mount *mp; 1330 struct vattr vattr; 1331 struct nameidata nd; 1332 int error, whiteout = 0; 1333 1334 AUDIT_ARG_MODE(mode); 1335 AUDIT_ARG_DEV(dev); 1336 switch (mode & S_IFMT) { 1337 case S_IFCHR: 1338 case S_IFBLK: 1339 error = priv_check(td, PRIV_VFS_MKNOD_DEV); 1340 if (error == 0 && dev == VNOVAL) 1341 error = EINVAL; 1342 break; 1343 case S_IFWHT: 1344 error = priv_check(td, PRIV_VFS_MKNOD_WHT); 1345 break; 1346 case S_IFIFO: 1347 if (dev == 0) 1348 return (kern_mkfifoat(td, fd, path, pathseg, mode)); 1349 /* FALLTHROUGH */ 1350 default: 1351 error = EINVAL; 1352 break; 1353 } 1354 if (error != 0) 1355 return (error); 1356 NDPREINIT(&nd); 1357 restart: 1358 bwillwrite(); 1359 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1360 NOCACHE, pathseg, path, fd, &cap_mknodat_rights); 1361 if ((error = namei(&nd)) != 0) 1362 return (error); 1363 vp = nd.ni_vp; 1364 if (vp != NULL) { 1365 NDFREE_PNBUF(&nd); 1366 if (vp == nd.ni_dvp) 1367 vrele(nd.ni_dvp); 1368 else 1369 vput(nd.ni_dvp); 1370 vrele(vp); 1371 return (EEXIST); 1372 } else { 1373 VATTR_NULL(&vattr); 1374 vattr.va_mode = (mode & ALLPERMS) & 1375 ~td->td_proc->p_pd->pd_cmask; 1376 vattr.va_rdev = dev; 1377 whiteout = 0; 1378 1379 switch (mode & S_IFMT) { 1380 case S_IFCHR: 1381 vattr.va_type = VCHR; 1382 break; 1383 case S_IFBLK: 1384 vattr.va_type = VBLK; 1385 break; 1386 case S_IFWHT: 1387 whiteout = 1; 1388 break; 1389 default: 1390 panic("kern_mknod: invalid mode"); 1391 } 1392 } 1393 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1394 NDFREE_PNBUF(&nd); 1395 vput(nd.ni_dvp); 1396 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1397 return (error); 1398 goto restart; 1399 } 1400 #ifdef MAC 1401 if (error == 0 && !whiteout) 1402 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, 1403 &nd.ni_cnd, &vattr); 1404 #endif 1405 if (error == 0) { 1406 if (whiteout) 1407 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); 1408 else { 1409 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, 1410 &nd.ni_cnd, &vattr); 1411 } 1412 } 1413 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 && !whiteout ? &nd.ni_vp : NULL, 1414 true); 1415 vn_finished_write(mp); 1416 NDFREE_PNBUF(&nd); 1417 if (error == ERELOOKUP) 1418 goto restart; 1419 return (error); 1420 } 1421 1422 /* 1423 * Create a named pipe. 1424 */ 1425 #ifndef _SYS_SYSPROTO_H_ 1426 struct mkfifo_args { 1427 char *path; 1428 int mode; 1429 }; 1430 #endif 1431 int 1432 sys_mkfifo(struct thread *td, struct mkfifo_args *uap) 1433 { 1434 1435 return (kern_mkfifoat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 1436 uap->mode)); 1437 } 1438 1439 #ifndef _SYS_SYSPROTO_H_ 1440 struct mkfifoat_args { 1441 int fd; 1442 char *path; 1443 mode_t mode; 1444 }; 1445 #endif 1446 int 1447 sys_mkfifoat(struct thread *td, struct mkfifoat_args *uap) 1448 { 1449 1450 return (kern_mkfifoat(td, uap->fd, uap->path, UIO_USERSPACE, 1451 uap->mode)); 1452 } 1453 1454 int 1455 kern_mkfifoat(struct thread *td, int fd, const char *path, 1456 enum uio_seg pathseg, int mode) 1457 { 1458 struct mount *mp; 1459 struct vattr vattr; 1460 struct nameidata nd; 1461 int error; 1462 1463 AUDIT_ARG_MODE(mode); 1464 NDPREINIT(&nd); 1465 restart: 1466 bwillwrite(); 1467 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1468 NOCACHE, pathseg, path, fd, &cap_mkfifoat_rights); 1469 if ((error = namei(&nd)) != 0) 1470 return (error); 1471 if (nd.ni_vp != NULL) { 1472 NDFREE_PNBUF(&nd); 1473 if (nd.ni_vp == nd.ni_dvp) 1474 vrele(nd.ni_dvp); 1475 else 1476 vput(nd.ni_dvp); 1477 vrele(nd.ni_vp); 1478 return (EEXIST); 1479 } 1480 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1481 NDFREE_PNBUF(&nd); 1482 vput(nd.ni_dvp); 1483 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1484 return (error); 1485 goto restart; 1486 } 1487 VATTR_NULL(&vattr); 1488 vattr.va_type = VFIFO; 1489 vattr.va_mode = (mode & ALLPERMS) & ~td->td_proc->p_pd->pd_cmask; 1490 #ifdef MAC 1491 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1492 &vattr); 1493 if (error != 0) 1494 goto out; 1495 #endif 1496 error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 1497 #ifdef MAC 1498 out: 1499 #endif 1500 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1501 vn_finished_write(mp); 1502 NDFREE_PNBUF(&nd); 1503 if (error == ERELOOKUP) 1504 goto restart; 1505 return (error); 1506 } 1507 1508 /* 1509 * Make a hard file link. 1510 */ 1511 #ifndef _SYS_SYSPROTO_H_ 1512 struct link_args { 1513 char *path; 1514 char *link; 1515 }; 1516 #endif 1517 int 1518 sys_link(struct thread *td, struct link_args *uap) 1519 { 1520 1521 return (kern_linkat(td, AT_FDCWD, AT_FDCWD, uap->path, uap->link, 1522 UIO_USERSPACE, AT_SYMLINK_FOLLOW)); 1523 } 1524 1525 #ifndef _SYS_SYSPROTO_H_ 1526 struct linkat_args { 1527 int fd1; 1528 char *path1; 1529 int fd2; 1530 char *path2; 1531 int flag; 1532 }; 1533 #endif 1534 int 1535 sys_linkat(struct thread *td, struct linkat_args *uap) 1536 { 1537 1538 return (kern_linkat(td, uap->fd1, uap->fd2, uap->path1, uap->path2, 1539 UIO_USERSPACE, uap->flag)); 1540 } 1541 1542 int hardlink_check_uid = 0; 1543 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_uid, CTLFLAG_RW, 1544 &hardlink_check_uid, 0, 1545 "Unprivileged processes cannot create hard links to files owned by other " 1546 "users"); 1547 static int hardlink_check_gid = 0; 1548 SYSCTL_INT(_security_bsd, OID_AUTO, hardlink_check_gid, CTLFLAG_RW, 1549 &hardlink_check_gid, 0, 1550 "Unprivileged processes cannot create hard links to files owned by other " 1551 "groups"); 1552 1553 static int 1554 can_hardlink(struct vnode *vp, struct ucred *cred) 1555 { 1556 struct vattr va; 1557 int error; 1558 1559 if (!hardlink_check_uid && !hardlink_check_gid) 1560 return (0); 1561 1562 error = VOP_GETATTR(vp, &va, cred); 1563 if (error != 0) 1564 return (error); 1565 1566 if (hardlink_check_uid && cred->cr_uid != va.va_uid) { 1567 error = priv_check_cred(cred, PRIV_VFS_LINK); 1568 if (error != 0) 1569 return (error); 1570 } 1571 1572 if (hardlink_check_gid && !groupmember(va.va_gid, cred)) { 1573 error = priv_check_cred(cred, PRIV_VFS_LINK); 1574 if (error != 0) 1575 return (error); 1576 } 1577 1578 return (0); 1579 } 1580 1581 int 1582 kern_linkat(struct thread *td, int fd1, int fd2, const char *path1, 1583 const char *path2, enum uio_seg segflag, int flag) 1584 { 1585 struct nameidata nd; 1586 int error; 1587 1588 if ((flag & ~(AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | 1589 AT_EMPTY_PATH)) != 0) 1590 return (EINVAL); 1591 1592 NDPREINIT(&nd); 1593 do { 1594 bwillwrite(); 1595 NDINIT_ATRIGHTS(&nd, LOOKUP, AUDITVNODE1 | at2cnpflags(flag, 1596 AT_SYMLINK_FOLLOW | AT_RESOLVE_BENEATH | AT_EMPTY_PATH), 1597 segflag, path1, fd1, &cap_linkat_source_rights); 1598 if ((error = namei(&nd)) != 0) 1599 return (error); 1600 NDFREE_PNBUF(&nd); 1601 if ((nd.ni_resflags & NIRES_EMPTYPATH) != 0) { 1602 error = priv_check(td, PRIV_VFS_FHOPEN); 1603 if (error != 0) { 1604 vrele(nd.ni_vp); 1605 return (error); 1606 } 1607 } 1608 error = kern_linkat_vp(td, nd.ni_vp, fd2, path2, segflag); 1609 } while (error == EAGAIN || error == ERELOOKUP); 1610 return (error); 1611 } 1612 1613 static int 1614 kern_linkat_vp(struct thread *td, struct vnode *vp, int fd, const char *path, 1615 enum uio_seg segflag) 1616 { 1617 struct nameidata nd; 1618 struct mount *mp; 1619 int error; 1620 1621 if (vp->v_type == VDIR) { 1622 vrele(vp); 1623 return (EPERM); /* POSIX */ 1624 } 1625 NDINIT_ATRIGHTS(&nd, CREATE, 1626 LOCKPARENT | SAVENAME | AUDITVNODE2 | NOCACHE, segflag, path, fd, 1627 &cap_linkat_target_rights); 1628 if ((error = namei(&nd)) == 0) { 1629 if (nd.ni_vp != NULL) { 1630 NDFREE_PNBUF(&nd); 1631 if (nd.ni_dvp == nd.ni_vp) 1632 vrele(nd.ni_dvp); 1633 else 1634 vput(nd.ni_dvp); 1635 vrele(nd.ni_vp); 1636 vrele(vp); 1637 return (EEXIST); 1638 } else if (nd.ni_dvp->v_mount != vp->v_mount) { 1639 /* 1640 * Cross-device link. No need to recheck 1641 * vp->v_type, since it cannot change, except 1642 * to VBAD. 1643 */ 1644 NDFREE_PNBUF(&nd); 1645 vput(nd.ni_dvp); 1646 vrele(vp); 1647 return (EXDEV); 1648 } else if ((error = vn_lock(vp, LK_EXCLUSIVE)) == 0) { 1649 error = can_hardlink(vp, td->td_ucred); 1650 #ifdef MAC 1651 if (error == 0) 1652 error = mac_vnode_check_link(td->td_ucred, 1653 nd.ni_dvp, vp, &nd.ni_cnd); 1654 #endif 1655 if (error != 0) { 1656 vput(vp); 1657 vput(nd.ni_dvp); 1658 NDFREE_PNBUF(&nd); 1659 return (error); 1660 } 1661 error = vn_start_write(vp, &mp, V_NOWAIT); 1662 if (error != 0) { 1663 vput(vp); 1664 vput(nd.ni_dvp); 1665 NDFREE_PNBUF(&nd); 1666 error = vn_start_write(NULL, &mp, 1667 V_XSLEEP | PCATCH); 1668 if (error != 0) 1669 return (error); 1670 return (EAGAIN); 1671 } 1672 error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); 1673 VOP_VPUT_PAIR(nd.ni_dvp, &vp, true); 1674 vn_finished_write(mp); 1675 NDFREE_PNBUF(&nd); 1676 vp = NULL; 1677 } else { 1678 vput(nd.ni_dvp); 1679 NDFREE_PNBUF(&nd); 1680 vrele(vp); 1681 return (EAGAIN); 1682 } 1683 } 1684 if (vp != NULL) 1685 vrele(vp); 1686 return (error); 1687 } 1688 1689 /* 1690 * Make a symbolic link. 1691 */ 1692 #ifndef _SYS_SYSPROTO_H_ 1693 struct symlink_args { 1694 char *path; 1695 char *link; 1696 }; 1697 #endif 1698 int 1699 sys_symlink(struct thread *td, struct symlink_args *uap) 1700 { 1701 1702 return (kern_symlinkat(td, uap->path, AT_FDCWD, uap->link, 1703 UIO_USERSPACE)); 1704 } 1705 1706 #ifndef _SYS_SYSPROTO_H_ 1707 struct symlinkat_args { 1708 char *path; 1709 int fd; 1710 char *path2; 1711 }; 1712 #endif 1713 int 1714 sys_symlinkat(struct thread *td, struct symlinkat_args *uap) 1715 { 1716 1717 return (kern_symlinkat(td, uap->path1, uap->fd, uap->path2, 1718 UIO_USERSPACE)); 1719 } 1720 1721 int 1722 kern_symlinkat(struct thread *td, const char *path1, int fd, const char *path2, 1723 enum uio_seg segflg) 1724 { 1725 struct mount *mp; 1726 struct vattr vattr; 1727 const char *syspath; 1728 char *tmppath; 1729 struct nameidata nd; 1730 int error; 1731 1732 if (segflg == UIO_SYSSPACE) { 1733 syspath = path1; 1734 } else { 1735 tmppath = uma_zalloc(namei_zone, M_WAITOK); 1736 if ((error = copyinstr(path1, tmppath, MAXPATHLEN, NULL)) != 0) 1737 goto out; 1738 syspath = tmppath; 1739 } 1740 AUDIT_ARG_TEXT(syspath); 1741 NDPREINIT(&nd); 1742 restart: 1743 bwillwrite(); 1744 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 1745 NOCACHE, segflg, path2, fd, &cap_symlinkat_rights); 1746 if ((error = namei(&nd)) != 0) 1747 goto out; 1748 if (nd.ni_vp) { 1749 NDFREE_PNBUF(&nd); 1750 if (nd.ni_vp == nd.ni_dvp) 1751 vrele(nd.ni_dvp); 1752 else 1753 vput(nd.ni_dvp); 1754 vrele(nd.ni_vp); 1755 nd.ni_vp = NULL; 1756 error = EEXIST; 1757 goto out; 1758 } 1759 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1760 NDFREE_PNBUF(&nd); 1761 vput(nd.ni_dvp); 1762 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1763 goto out; 1764 goto restart; 1765 } 1766 VATTR_NULL(&vattr); 1767 vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_pd->pd_cmask; 1768 #ifdef MAC 1769 vattr.va_type = VLNK; 1770 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 1771 &vattr); 1772 if (error != 0) 1773 goto out2; 1774 #endif 1775 error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, syspath); 1776 #ifdef MAC 1777 out2: 1778 #endif 1779 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 1780 vn_finished_write(mp); 1781 NDFREE_PNBUF(&nd); 1782 if (error == ERELOOKUP) 1783 goto restart; 1784 out: 1785 if (segflg != UIO_SYSSPACE) 1786 uma_zfree(namei_zone, tmppath); 1787 return (error); 1788 } 1789 1790 /* 1791 * Delete a whiteout from the filesystem. 1792 */ 1793 #ifndef _SYS_SYSPROTO_H_ 1794 struct undelete_args { 1795 char *path; 1796 }; 1797 #endif 1798 int 1799 sys_undelete(struct thread *td, struct undelete_args *uap) 1800 { 1801 struct mount *mp; 1802 struct nameidata nd; 1803 int error; 1804 1805 NDPREINIT(&nd); 1806 restart: 1807 bwillwrite(); 1808 NDINIT(&nd, DELETE, LOCKPARENT | DOWHITEOUT | AUDITVNODE1, 1809 UIO_USERSPACE, uap->path); 1810 error = namei(&nd); 1811 if (error != 0) 1812 return (error); 1813 1814 if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { 1815 NDFREE_PNBUF(&nd); 1816 if (nd.ni_vp == nd.ni_dvp) 1817 vrele(nd.ni_dvp); 1818 else 1819 vput(nd.ni_dvp); 1820 if (nd.ni_vp) 1821 vrele(nd.ni_vp); 1822 return (EEXIST); 1823 } 1824 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1825 NDFREE_PNBUF(&nd); 1826 vput(nd.ni_dvp); 1827 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 1828 return (error); 1829 goto restart; 1830 } 1831 error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); 1832 NDFREE_PNBUF(&nd); 1833 vput(nd.ni_dvp); 1834 vn_finished_write(mp); 1835 if (error == ERELOOKUP) 1836 goto restart; 1837 return (error); 1838 } 1839 1840 /* 1841 * Delete a name from the filesystem. 1842 */ 1843 #ifndef _SYS_SYSPROTO_H_ 1844 struct unlink_args { 1845 char *path; 1846 }; 1847 #endif 1848 int 1849 sys_unlink(struct thread *td, struct unlink_args *uap) 1850 { 1851 1852 return (kern_funlinkat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 1853 0, 0)); 1854 } 1855 1856 static int 1857 kern_funlinkat_ex(struct thread *td, int dfd, const char *path, int fd, 1858 int flag, enum uio_seg pathseg, ino_t oldinum) 1859 { 1860 1861 if ((flag & ~(AT_REMOVEDIR | AT_RESOLVE_BENEATH)) != 0) 1862 return (EINVAL); 1863 1864 if ((flag & AT_REMOVEDIR) != 0) 1865 return (kern_frmdirat(td, dfd, path, fd, UIO_USERSPACE, 0)); 1866 1867 return (kern_funlinkat(td, dfd, path, fd, UIO_USERSPACE, 0, 0)); 1868 } 1869 1870 #ifndef _SYS_SYSPROTO_H_ 1871 struct unlinkat_args { 1872 int fd; 1873 char *path; 1874 int flag; 1875 }; 1876 #endif 1877 int 1878 sys_unlinkat(struct thread *td, struct unlinkat_args *uap) 1879 { 1880 1881 return (kern_funlinkat_ex(td, uap->fd, uap->path, FD_NONE, uap->flag, 1882 UIO_USERSPACE, 0)); 1883 } 1884 1885 #ifndef _SYS_SYSPROTO_H_ 1886 struct funlinkat_args { 1887 int dfd; 1888 const char *path; 1889 int fd; 1890 int flag; 1891 }; 1892 #endif 1893 int 1894 sys_funlinkat(struct thread *td, struct funlinkat_args *uap) 1895 { 1896 1897 return (kern_funlinkat_ex(td, uap->dfd, uap->path, uap->fd, uap->flag, 1898 UIO_USERSPACE, 0)); 1899 } 1900 1901 int 1902 kern_funlinkat(struct thread *td, int dfd, const char *path, int fd, 1903 enum uio_seg pathseg, int flag, ino_t oldinum) 1904 { 1905 struct mount *mp; 1906 struct file *fp; 1907 struct vnode *vp; 1908 struct nameidata nd; 1909 struct stat sb; 1910 int error; 1911 1912 fp = NULL; 1913 if (fd != FD_NONE) { 1914 error = getvnode_path(td, fd, &cap_no_rights, &fp); 1915 if (error != 0) 1916 return (error); 1917 } 1918 1919 NDPREINIT(&nd); 1920 restart: 1921 bwillwrite(); 1922 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 1923 at2cnpflags(flag, AT_RESOLVE_BENEATH), 1924 pathseg, path, dfd, &cap_unlinkat_rights); 1925 if ((error = namei(&nd)) != 0) { 1926 if (error == EINVAL) 1927 error = EPERM; 1928 goto fdout; 1929 } 1930 vp = nd.ni_vp; 1931 if (vp->v_type == VDIR && oldinum == 0) { 1932 error = EPERM; /* POSIX */ 1933 } else if (oldinum != 0 && 1934 ((error = VOP_STAT(vp, &sb, td->td_ucred, NOCRED)) == 0) && 1935 sb.st_ino != oldinum) { 1936 error = EIDRM; /* Identifier removed */ 1937 } else if (fp != NULL && fp->f_vnode != vp) { 1938 if (VN_IS_DOOMED(fp->f_vnode)) 1939 error = EBADF; 1940 else 1941 error = EDEADLK; 1942 } else { 1943 /* 1944 * The root of a mounted filesystem cannot be deleted. 1945 * 1946 * XXX: can this only be a VDIR case? 1947 */ 1948 if (vp->v_vflag & VV_ROOT) 1949 error = EBUSY; 1950 } 1951 if (error == 0) { 1952 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 1953 NDFREE_PNBUF(&nd); 1954 vput(nd.ni_dvp); 1955 if (vp == nd.ni_dvp) 1956 vrele(vp); 1957 else 1958 vput(vp); 1959 if ((error = vn_start_write(NULL, &mp, 1960 V_XSLEEP | PCATCH)) != 0) { 1961 goto fdout; 1962 } 1963 goto restart; 1964 } 1965 #ifdef MAC 1966 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 1967 &nd.ni_cnd); 1968 if (error != 0) 1969 goto out; 1970 #endif 1971 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 1972 error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); 1973 #ifdef MAC 1974 out: 1975 #endif 1976 vn_finished_write(mp); 1977 } 1978 NDFREE_PNBUF(&nd); 1979 vput(nd.ni_dvp); 1980 if (vp == nd.ni_dvp) 1981 vrele(vp); 1982 else 1983 vput(vp); 1984 if (error == ERELOOKUP) 1985 goto restart; 1986 fdout: 1987 if (fp != NULL) 1988 fdrop(fp, td); 1989 return (error); 1990 } 1991 1992 /* 1993 * Reposition read/write file offset. 1994 */ 1995 #ifndef _SYS_SYSPROTO_H_ 1996 struct lseek_args { 1997 int fd; 1998 int pad; 1999 off_t offset; 2000 int whence; 2001 }; 2002 #endif 2003 int 2004 sys_lseek(struct thread *td, struct lseek_args *uap) 2005 { 2006 2007 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2008 } 2009 2010 int 2011 kern_lseek(struct thread *td, int fd, off_t offset, int whence) 2012 { 2013 struct file *fp; 2014 int error; 2015 2016 AUDIT_ARG_FD(fd); 2017 error = fget(td, fd, &cap_seek_rights, &fp); 2018 if (error != 0) 2019 return (error); 2020 error = (fp->f_ops->fo_flags & DFLAG_SEEKABLE) != 0 ? 2021 fo_seek(fp, offset, whence, td) : ESPIPE; 2022 fdrop(fp, td); 2023 return (error); 2024 } 2025 2026 #if defined(COMPAT_43) 2027 /* 2028 * Reposition read/write file offset. 2029 */ 2030 #ifndef _SYS_SYSPROTO_H_ 2031 struct olseek_args { 2032 int fd; 2033 long offset; 2034 int whence; 2035 }; 2036 #endif 2037 int 2038 olseek(struct thread *td, struct olseek_args *uap) 2039 { 2040 2041 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2042 } 2043 #endif /* COMPAT_43 */ 2044 2045 #if defined(COMPAT_FREEBSD6) 2046 /* Version with the 'pad' argument */ 2047 int 2048 freebsd6_lseek(struct thread *td, struct freebsd6_lseek_args *uap) 2049 { 2050 2051 return (kern_lseek(td, uap->fd, uap->offset, uap->whence)); 2052 } 2053 #endif 2054 2055 /* 2056 * Check access permissions using passed credentials. 2057 */ 2058 static int 2059 vn_access(struct vnode *vp, int user_flags, struct ucred *cred, 2060 struct thread *td) 2061 { 2062 accmode_t accmode; 2063 int error; 2064 2065 /* Flags == 0 means only check for existence. */ 2066 if (user_flags == 0) 2067 return (0); 2068 2069 accmode = 0; 2070 if (user_flags & R_OK) 2071 accmode |= VREAD; 2072 if (user_flags & W_OK) 2073 accmode |= VWRITE; 2074 if (user_flags & X_OK) 2075 accmode |= VEXEC; 2076 #ifdef MAC 2077 error = mac_vnode_check_access(cred, vp, accmode); 2078 if (error != 0) 2079 return (error); 2080 #endif 2081 if ((accmode & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) 2082 error = VOP_ACCESS(vp, accmode, cred, td); 2083 return (error); 2084 } 2085 2086 /* 2087 * Check access permissions using "real" credentials. 2088 */ 2089 #ifndef _SYS_SYSPROTO_H_ 2090 struct access_args { 2091 char *path; 2092 int amode; 2093 }; 2094 #endif 2095 int 2096 sys_access(struct thread *td, struct access_args *uap) 2097 { 2098 2099 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2100 0, uap->amode)); 2101 } 2102 2103 #ifndef _SYS_SYSPROTO_H_ 2104 struct faccessat_args { 2105 int dirfd; 2106 char *path; 2107 int amode; 2108 int flag; 2109 } 2110 #endif 2111 int 2112 sys_faccessat(struct thread *td, struct faccessat_args *uap) 2113 { 2114 2115 return (kern_accessat(td, uap->fd, uap->path, UIO_USERSPACE, uap->flag, 2116 uap->amode)); 2117 } 2118 2119 int 2120 kern_accessat(struct thread *td, int fd, const char *path, 2121 enum uio_seg pathseg, int flag, int amode) 2122 { 2123 struct ucred *cred, *usecred; 2124 struct vnode *vp; 2125 struct nameidata nd; 2126 int error; 2127 2128 if ((flag & ~(AT_EACCESS | AT_RESOLVE_BENEATH | AT_EMPTY_PATH)) != 0) 2129 return (EINVAL); 2130 if (amode != F_OK && (amode & ~(R_OK | W_OK | X_OK)) != 0) 2131 return (EINVAL); 2132 2133 /* 2134 * Create and modify a temporary credential instead of one that 2135 * is potentially shared (if we need one). 2136 */ 2137 cred = td->td_ucred; 2138 if ((flag & AT_EACCESS) == 0 && 2139 ((cred->cr_uid != cred->cr_ruid || 2140 cred->cr_rgid != cred->cr_groups[0]))) { 2141 usecred = crdup(cred); 2142 usecred->cr_uid = cred->cr_ruid; 2143 usecred->cr_groups[0] = cred->cr_rgid; 2144 td->td_ucred = usecred; 2145 } else 2146 usecred = cred; 2147 AUDIT_ARG_VALUE(amode); 2148 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | 2149 AUDITVNODE1 | at2cnpflags(flag, AT_RESOLVE_BENEATH | 2150 AT_EMPTY_PATH), pathseg, path, fd, &cap_fstat_rights); 2151 if ((error = namei(&nd)) != 0) 2152 goto out; 2153 vp = nd.ni_vp; 2154 2155 error = vn_access(vp, amode, usecred, td); 2156 NDFREE_NOTHING(&nd); 2157 vput(vp); 2158 out: 2159 if (usecred != cred) { 2160 td->td_ucred = cred; 2161 crfree(usecred); 2162 } 2163 return (error); 2164 } 2165 2166 /* 2167 * Check access permissions using "effective" credentials. 2168 */ 2169 #ifndef _SYS_SYSPROTO_H_ 2170 struct eaccess_args { 2171 char *path; 2172 int amode; 2173 }; 2174 #endif 2175 int 2176 sys_eaccess(struct thread *td, struct eaccess_args *uap) 2177 { 2178 2179 return (kern_accessat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2180 AT_EACCESS, uap->amode)); 2181 } 2182 2183 #if defined(COMPAT_43) 2184 /* 2185 * Get file status; this version follows links. 2186 */ 2187 #ifndef _SYS_SYSPROTO_H_ 2188 struct ostat_args { 2189 char *path; 2190 struct ostat *ub; 2191 }; 2192 #endif 2193 int 2194 ostat(struct thread *td, struct ostat_args *uap) 2195 { 2196 struct stat sb; 2197 struct ostat osb; 2198 int error; 2199 2200 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2201 &sb, NULL); 2202 if (error != 0) 2203 return (error); 2204 cvtstat(&sb, &osb); 2205 return (copyout(&osb, uap->ub, sizeof (osb))); 2206 } 2207 2208 /* 2209 * Get file status; this version does not follow links. 2210 */ 2211 #ifndef _SYS_SYSPROTO_H_ 2212 struct olstat_args { 2213 char *path; 2214 struct ostat *ub; 2215 }; 2216 #endif 2217 int 2218 olstat(struct thread *td, struct olstat_args *uap) 2219 { 2220 struct stat sb; 2221 struct ostat osb; 2222 int error; 2223 2224 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2225 UIO_USERSPACE, &sb, NULL); 2226 if (error != 0) 2227 return (error); 2228 cvtstat(&sb, &osb); 2229 return (copyout(&osb, uap->ub, sizeof (osb))); 2230 } 2231 2232 /* 2233 * Convert from an old to a new stat structure. 2234 * XXX: many values are blindly truncated. 2235 */ 2236 void 2237 cvtstat(struct stat *st, struct ostat *ost) 2238 { 2239 2240 bzero(ost, sizeof(*ost)); 2241 ost->st_dev = st->st_dev; 2242 ost->st_ino = st->st_ino; 2243 ost->st_mode = st->st_mode; 2244 ost->st_nlink = st->st_nlink; 2245 ost->st_uid = st->st_uid; 2246 ost->st_gid = st->st_gid; 2247 ost->st_rdev = st->st_rdev; 2248 ost->st_size = MIN(st->st_size, INT32_MAX); 2249 ost->st_atim = st->st_atim; 2250 ost->st_mtim = st->st_mtim; 2251 ost->st_ctim = st->st_ctim; 2252 ost->st_blksize = st->st_blksize; 2253 ost->st_blocks = st->st_blocks; 2254 ost->st_flags = st->st_flags; 2255 ost->st_gen = st->st_gen; 2256 } 2257 #endif /* COMPAT_43 */ 2258 2259 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 2260 int ino64_trunc_error; 2261 SYSCTL_INT(_vfs, OID_AUTO, ino64_trunc_error, CTLFLAG_RW, 2262 &ino64_trunc_error, 0, 2263 "Error on truncation of device, file or inode number, or link count"); 2264 2265 int 2266 freebsd11_cvtstat(struct stat *st, struct freebsd11_stat *ost) 2267 { 2268 2269 ost->st_dev = st->st_dev; 2270 if (ost->st_dev != st->st_dev) { 2271 switch (ino64_trunc_error) { 2272 default: 2273 /* 2274 * Since dev_t is almost raw, don't clamp to the 2275 * maximum for case 2, but ignore the error. 2276 */ 2277 break; 2278 case 1: 2279 return (EOVERFLOW); 2280 } 2281 } 2282 ost->st_ino = st->st_ino; 2283 if (ost->st_ino != st->st_ino) { 2284 switch (ino64_trunc_error) { 2285 default: 2286 case 0: 2287 break; 2288 case 1: 2289 return (EOVERFLOW); 2290 case 2: 2291 ost->st_ino = UINT32_MAX; 2292 break; 2293 } 2294 } 2295 ost->st_mode = st->st_mode; 2296 ost->st_nlink = st->st_nlink; 2297 if (ost->st_nlink != st->st_nlink) { 2298 switch (ino64_trunc_error) { 2299 default: 2300 case 0: 2301 break; 2302 case 1: 2303 return (EOVERFLOW); 2304 case 2: 2305 ost->st_nlink = UINT16_MAX; 2306 break; 2307 } 2308 } 2309 ost->st_uid = st->st_uid; 2310 ost->st_gid = st->st_gid; 2311 ost->st_rdev = st->st_rdev; 2312 if (ost->st_rdev != st->st_rdev) { 2313 switch (ino64_trunc_error) { 2314 default: 2315 break; 2316 case 1: 2317 return (EOVERFLOW); 2318 } 2319 } 2320 ost->st_atim = st->st_atim; 2321 ost->st_mtim = st->st_mtim; 2322 ost->st_ctim = st->st_ctim; 2323 ost->st_size = st->st_size; 2324 ost->st_blocks = st->st_blocks; 2325 ost->st_blksize = st->st_blksize; 2326 ost->st_flags = st->st_flags; 2327 ost->st_gen = st->st_gen; 2328 ost->st_lspare = 0; 2329 ost->st_birthtim = st->st_birthtim; 2330 bzero((char *)&ost->st_birthtim + sizeof(ost->st_birthtim), 2331 sizeof(*ost) - offsetof(struct freebsd11_stat, 2332 st_birthtim) - sizeof(ost->st_birthtim)); 2333 return (0); 2334 } 2335 2336 int 2337 freebsd11_stat(struct thread *td, struct freebsd11_stat_args* uap) 2338 { 2339 struct stat sb; 2340 struct freebsd11_stat osb; 2341 int error; 2342 2343 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2344 &sb, NULL); 2345 if (error != 0) 2346 return (error); 2347 error = freebsd11_cvtstat(&sb, &osb); 2348 if (error == 0) 2349 error = copyout(&osb, uap->ub, sizeof(osb)); 2350 return (error); 2351 } 2352 2353 int 2354 freebsd11_lstat(struct thread *td, struct freebsd11_lstat_args* uap) 2355 { 2356 struct stat sb; 2357 struct freebsd11_stat osb; 2358 int error; 2359 2360 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2361 UIO_USERSPACE, &sb, NULL); 2362 if (error != 0) 2363 return (error); 2364 error = freebsd11_cvtstat(&sb, &osb); 2365 if (error == 0) 2366 error = copyout(&osb, uap->ub, sizeof(osb)); 2367 return (error); 2368 } 2369 2370 int 2371 freebsd11_fhstat(struct thread *td, struct freebsd11_fhstat_args* uap) 2372 { 2373 struct fhandle fh; 2374 struct stat sb; 2375 struct freebsd11_stat osb; 2376 int error; 2377 2378 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 2379 if (error != 0) 2380 return (error); 2381 error = kern_fhstat(td, fh, &sb); 2382 if (error != 0) 2383 return (error); 2384 error = freebsd11_cvtstat(&sb, &osb); 2385 if (error == 0) 2386 error = copyout(&osb, uap->sb, sizeof(osb)); 2387 return (error); 2388 } 2389 2390 int 2391 freebsd11_fstatat(struct thread *td, struct freebsd11_fstatat_args* uap) 2392 { 2393 struct stat sb; 2394 struct freebsd11_stat osb; 2395 int error; 2396 2397 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2398 UIO_USERSPACE, &sb, NULL); 2399 if (error != 0) 2400 return (error); 2401 error = freebsd11_cvtstat(&sb, &osb); 2402 if (error == 0) 2403 error = copyout(&osb, uap->buf, sizeof(osb)); 2404 return (error); 2405 } 2406 #endif /* COMPAT_FREEBSD11 */ 2407 2408 /* 2409 * Get file status 2410 */ 2411 #ifndef _SYS_SYSPROTO_H_ 2412 struct fstatat_args { 2413 int fd; 2414 char *path; 2415 struct stat *buf; 2416 int flag; 2417 } 2418 #endif 2419 int 2420 sys_fstatat(struct thread *td, struct fstatat_args *uap) 2421 { 2422 struct stat sb; 2423 int error; 2424 2425 error = kern_statat(td, uap->flag, uap->fd, uap->path, 2426 UIO_USERSPACE, &sb, NULL); 2427 if (error == 0) 2428 error = copyout(&sb, uap->buf, sizeof (sb)); 2429 return (error); 2430 } 2431 2432 int 2433 kern_statat(struct thread *td, int flag, int fd, const char *path, 2434 enum uio_seg pathseg, struct stat *sbp, 2435 void (*hook)(struct vnode *vp, struct stat *sbp)) 2436 { 2437 struct nameidata nd; 2438 int error; 2439 2440 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2441 AT_EMPTY_PATH)) != 0) 2442 return (EINVAL); 2443 2444 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_RESOLVE_BENEATH | 2445 AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH) | LOCKSHARED | LOCKLEAF | 2446 AUDITVNODE1, pathseg, path, fd, &cap_fstat_rights); 2447 2448 if ((error = namei(&nd)) != 0) { 2449 if (error == ENOTDIR && 2450 (nd.ni_resflags & NIRES_EMPTYPATH) != 0) 2451 error = kern_fstat(td, fd, sbp); 2452 return (error); 2453 } 2454 error = VOP_STAT(nd.ni_vp, sbp, td->td_ucred, NOCRED); 2455 if (error == 0) { 2456 if (__predict_false(hook != NULL)) 2457 hook(nd.ni_vp, sbp); 2458 } 2459 NDFREE_NOTHING(&nd); 2460 vput(nd.ni_vp); 2461 #ifdef __STAT_TIME_T_EXT 2462 sbp->st_atim_ext = 0; 2463 sbp->st_mtim_ext = 0; 2464 sbp->st_ctim_ext = 0; 2465 sbp->st_btim_ext = 0; 2466 #endif 2467 #ifdef KTRACE 2468 if (KTRPOINT(td, KTR_STRUCT)) 2469 ktrstat_error(sbp, error); 2470 #endif 2471 return (error); 2472 } 2473 2474 #if defined(COMPAT_FREEBSD11) 2475 /* 2476 * Implementation of the NetBSD [l]stat() functions. 2477 */ 2478 int 2479 freebsd11_cvtnstat(struct stat *sb, struct nstat *nsb) 2480 { 2481 struct freebsd11_stat sb11; 2482 int error; 2483 2484 error = freebsd11_cvtstat(sb, &sb11); 2485 if (error != 0) 2486 return (error); 2487 2488 bzero(nsb, sizeof(*nsb)); 2489 CP(sb11, *nsb, st_dev); 2490 CP(sb11, *nsb, st_ino); 2491 CP(sb11, *nsb, st_mode); 2492 CP(sb11, *nsb, st_nlink); 2493 CP(sb11, *nsb, st_uid); 2494 CP(sb11, *nsb, st_gid); 2495 CP(sb11, *nsb, st_rdev); 2496 CP(sb11, *nsb, st_atim); 2497 CP(sb11, *nsb, st_mtim); 2498 CP(sb11, *nsb, st_ctim); 2499 CP(sb11, *nsb, st_size); 2500 CP(sb11, *nsb, st_blocks); 2501 CP(sb11, *nsb, st_blksize); 2502 CP(sb11, *nsb, st_flags); 2503 CP(sb11, *nsb, st_gen); 2504 CP(sb11, *nsb, st_birthtim); 2505 return (0); 2506 } 2507 2508 #ifndef _SYS_SYSPROTO_H_ 2509 struct freebsd11_nstat_args { 2510 char *path; 2511 struct nstat *ub; 2512 }; 2513 #endif 2514 int 2515 freebsd11_nstat(struct thread *td, struct freebsd11_nstat_args *uap) 2516 { 2517 struct stat sb; 2518 struct nstat nsb; 2519 int error; 2520 2521 error = kern_statat(td, 0, AT_FDCWD, uap->path, UIO_USERSPACE, 2522 &sb, NULL); 2523 if (error != 0) 2524 return (error); 2525 error = freebsd11_cvtnstat(&sb, &nsb); 2526 if (error == 0) 2527 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2528 return (error); 2529 } 2530 2531 /* 2532 * NetBSD lstat. Get file status; this version does not follow links. 2533 */ 2534 #ifndef _SYS_SYSPROTO_H_ 2535 struct freebsd11_nlstat_args { 2536 char *path; 2537 struct nstat *ub; 2538 }; 2539 #endif 2540 int 2541 freebsd11_nlstat(struct thread *td, struct freebsd11_nlstat_args *uap) 2542 { 2543 struct stat sb; 2544 struct nstat nsb; 2545 int error; 2546 2547 error = kern_statat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->path, 2548 UIO_USERSPACE, &sb, NULL); 2549 if (error != 0) 2550 return (error); 2551 error = freebsd11_cvtnstat(&sb, &nsb); 2552 if (error == 0) 2553 error = copyout(&nsb, uap->ub, sizeof (nsb)); 2554 return (error); 2555 } 2556 #endif /* COMPAT_FREEBSD11 */ 2557 2558 /* 2559 * Get configurable pathname variables. 2560 */ 2561 #ifndef _SYS_SYSPROTO_H_ 2562 struct pathconf_args { 2563 char *path; 2564 int name; 2565 }; 2566 #endif 2567 int 2568 sys_pathconf(struct thread *td, struct pathconf_args *uap) 2569 { 2570 long value; 2571 int error; 2572 2573 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, FOLLOW, 2574 &value); 2575 if (error == 0) 2576 td->td_retval[0] = value; 2577 return (error); 2578 } 2579 2580 #ifndef _SYS_SYSPROTO_H_ 2581 struct lpathconf_args { 2582 char *path; 2583 int name; 2584 }; 2585 #endif 2586 int 2587 sys_lpathconf(struct thread *td, struct lpathconf_args *uap) 2588 { 2589 long value; 2590 int error; 2591 2592 error = kern_pathconf(td, uap->path, UIO_USERSPACE, uap->name, 2593 NOFOLLOW, &value); 2594 if (error == 0) 2595 td->td_retval[0] = value; 2596 return (error); 2597 } 2598 2599 int 2600 kern_pathconf(struct thread *td, const char *path, enum uio_seg pathseg, 2601 int name, u_long flags, long *valuep) 2602 { 2603 struct nameidata nd; 2604 int error; 2605 2606 NDINIT(&nd, LOOKUP, LOCKSHARED | LOCKLEAF | AUDITVNODE1 | flags, 2607 pathseg, path); 2608 if ((error = namei(&nd)) != 0) 2609 return (error); 2610 NDFREE_NOTHING(&nd); 2611 2612 error = VOP_PATHCONF(nd.ni_vp, name, valuep); 2613 vput(nd.ni_vp); 2614 return (error); 2615 } 2616 2617 /* 2618 * Return target name of a symbolic link. 2619 */ 2620 #ifndef _SYS_SYSPROTO_H_ 2621 struct readlink_args { 2622 char *path; 2623 char *buf; 2624 size_t count; 2625 }; 2626 #endif 2627 int 2628 sys_readlink(struct thread *td, struct readlink_args *uap) 2629 { 2630 2631 return (kern_readlinkat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2632 uap->buf, UIO_USERSPACE, uap->count)); 2633 } 2634 #ifndef _SYS_SYSPROTO_H_ 2635 struct readlinkat_args { 2636 int fd; 2637 char *path; 2638 char *buf; 2639 size_t bufsize; 2640 }; 2641 #endif 2642 int 2643 sys_readlinkat(struct thread *td, struct readlinkat_args *uap) 2644 { 2645 2646 return (kern_readlinkat(td, uap->fd, uap->path, UIO_USERSPACE, 2647 uap->buf, UIO_USERSPACE, uap->bufsize)); 2648 } 2649 2650 int 2651 kern_readlinkat(struct thread *td, int fd, const char *path, 2652 enum uio_seg pathseg, char *buf, enum uio_seg bufseg, size_t count) 2653 { 2654 struct vnode *vp; 2655 struct nameidata nd; 2656 int error; 2657 2658 if (count > IOSIZE_MAX) 2659 return (EINVAL); 2660 2661 NDINIT_AT(&nd, LOOKUP, NOFOLLOW | LOCKSHARED | LOCKLEAF | AUDITVNODE1 | 2662 EMPTYPATH, pathseg, path, fd); 2663 2664 if ((error = namei(&nd)) != 0) 2665 return (error); 2666 NDFREE_NOTHING(&nd); 2667 vp = nd.ni_vp; 2668 2669 error = kern_readlink_vp(vp, buf, bufseg, count, td); 2670 vput(vp); 2671 2672 return (error); 2673 } 2674 2675 /* 2676 * Helper function to readlink from a vnode 2677 */ 2678 static int 2679 kern_readlink_vp(struct vnode *vp, char *buf, enum uio_seg bufseg, size_t count, 2680 struct thread *td) 2681 { 2682 struct iovec aiov; 2683 struct uio auio; 2684 int error; 2685 2686 ASSERT_VOP_LOCKED(vp, "kern_readlink_vp(): vp not locked"); 2687 #ifdef MAC 2688 error = mac_vnode_check_readlink(td->td_ucred, vp); 2689 if (error != 0) 2690 return (error); 2691 #endif 2692 if (vp->v_type != VLNK && (vp->v_vflag & VV_READLINK) == 0) 2693 return (EINVAL); 2694 2695 aiov.iov_base = buf; 2696 aiov.iov_len = count; 2697 auio.uio_iov = &aiov; 2698 auio.uio_iovcnt = 1; 2699 auio.uio_offset = 0; 2700 auio.uio_rw = UIO_READ; 2701 auio.uio_segflg = bufseg; 2702 auio.uio_td = td; 2703 auio.uio_resid = count; 2704 error = VOP_READLINK(vp, &auio, td->td_ucred); 2705 td->td_retval[0] = count - auio.uio_resid; 2706 return (error); 2707 } 2708 2709 /* 2710 * Common implementation code for chflags() and fchflags(). 2711 */ 2712 static int 2713 setfflags(struct thread *td, struct vnode *vp, u_long flags) 2714 { 2715 struct mount *mp; 2716 struct vattr vattr; 2717 int error; 2718 2719 /* We can't support the value matching VNOVAL. */ 2720 if (flags == VNOVAL) 2721 return (EOPNOTSUPP); 2722 2723 /* 2724 * Prevent non-root users from setting flags on devices. When 2725 * a device is reused, users can retain ownership of the device 2726 * if they are allowed to set flags and programs assume that 2727 * chown can't fail when done as root. 2728 */ 2729 if (vp->v_type == VCHR || vp->v_type == VBLK) { 2730 error = priv_check(td, PRIV_VFS_CHFLAGS_DEV); 2731 if (error != 0) 2732 return (error); 2733 } 2734 2735 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2736 return (error); 2737 VATTR_NULL(&vattr); 2738 vattr.va_flags = flags; 2739 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2740 #ifdef MAC 2741 error = mac_vnode_check_setflags(td->td_ucred, vp, vattr.va_flags); 2742 if (error == 0) 2743 #endif 2744 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 2745 VOP_UNLOCK(vp); 2746 vn_finished_write(mp); 2747 return (error); 2748 } 2749 2750 /* 2751 * Change flags of a file given a path name. 2752 */ 2753 #ifndef _SYS_SYSPROTO_H_ 2754 struct chflags_args { 2755 const char *path; 2756 u_long flags; 2757 }; 2758 #endif 2759 int 2760 sys_chflags(struct thread *td, struct chflags_args *uap) 2761 { 2762 2763 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2764 uap->flags, 0)); 2765 } 2766 2767 #ifndef _SYS_SYSPROTO_H_ 2768 struct chflagsat_args { 2769 int fd; 2770 const char *path; 2771 u_long flags; 2772 int atflag; 2773 } 2774 #endif 2775 int 2776 sys_chflagsat(struct thread *td, struct chflagsat_args *uap) 2777 { 2778 2779 return (kern_chflagsat(td, uap->fd, uap->path, UIO_USERSPACE, 2780 uap->flags, uap->atflag)); 2781 } 2782 2783 /* 2784 * Same as chflags() but doesn't follow symlinks. 2785 */ 2786 #ifndef _SYS_SYSPROTO_H_ 2787 struct lchflags_args { 2788 const char *path; 2789 u_long flags; 2790 }; 2791 #endif 2792 int 2793 sys_lchflags(struct thread *td, struct lchflags_args *uap) 2794 { 2795 2796 return (kern_chflagsat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2797 uap->flags, AT_SYMLINK_NOFOLLOW)); 2798 } 2799 2800 static int 2801 kern_chflagsat(struct thread *td, int fd, const char *path, 2802 enum uio_seg pathseg, u_long flags, int atflag) 2803 { 2804 struct nameidata nd; 2805 int error; 2806 2807 if ((atflag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2808 AT_EMPTY_PATH)) != 0) 2809 return (EINVAL); 2810 2811 AUDIT_ARG_FFLAGS(flags); 2812 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(atflag, AT_SYMLINK_NOFOLLOW | 2813 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2814 fd, &cap_fchflags_rights); 2815 if ((error = namei(&nd)) != 0) 2816 return (error); 2817 NDFREE_NOTHING(&nd); 2818 error = setfflags(td, nd.ni_vp, flags); 2819 vrele(nd.ni_vp); 2820 return (error); 2821 } 2822 2823 /* 2824 * Change flags of a file given a file descriptor. 2825 */ 2826 #ifndef _SYS_SYSPROTO_H_ 2827 struct fchflags_args { 2828 int fd; 2829 u_long flags; 2830 }; 2831 #endif 2832 int 2833 sys_fchflags(struct thread *td, struct fchflags_args *uap) 2834 { 2835 struct file *fp; 2836 int error; 2837 2838 AUDIT_ARG_FD(uap->fd); 2839 AUDIT_ARG_FFLAGS(uap->flags); 2840 error = getvnode(td, uap->fd, &cap_fchflags_rights, 2841 &fp); 2842 if (error != 0) 2843 return (error); 2844 #ifdef AUDIT 2845 if (AUDITING_TD(td)) { 2846 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 2847 AUDIT_ARG_VNODE1(fp->f_vnode); 2848 VOP_UNLOCK(fp->f_vnode); 2849 } 2850 #endif 2851 error = setfflags(td, fp->f_vnode, uap->flags); 2852 fdrop(fp, td); 2853 return (error); 2854 } 2855 2856 /* 2857 * Common implementation code for chmod(), lchmod() and fchmod(). 2858 */ 2859 int 2860 setfmode(struct thread *td, struct ucred *cred, struct vnode *vp, int mode) 2861 { 2862 struct mount *mp; 2863 struct vattr vattr; 2864 int error; 2865 2866 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2867 return (error); 2868 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2869 VATTR_NULL(&vattr); 2870 vattr.va_mode = mode & ALLPERMS; 2871 #ifdef MAC 2872 error = mac_vnode_check_setmode(cred, vp, vattr.va_mode); 2873 if (error == 0) 2874 #endif 2875 error = VOP_SETATTR(vp, &vattr, cred); 2876 VOP_UNLOCK(vp); 2877 vn_finished_write(mp); 2878 return (error); 2879 } 2880 2881 /* 2882 * Change mode of a file given path name. 2883 */ 2884 #ifndef _SYS_SYSPROTO_H_ 2885 struct chmod_args { 2886 char *path; 2887 int mode; 2888 }; 2889 #endif 2890 int 2891 sys_chmod(struct thread *td, struct chmod_args *uap) 2892 { 2893 2894 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2895 uap->mode, 0)); 2896 } 2897 2898 #ifndef _SYS_SYSPROTO_H_ 2899 struct fchmodat_args { 2900 int dirfd; 2901 char *path; 2902 mode_t mode; 2903 int flag; 2904 } 2905 #endif 2906 int 2907 sys_fchmodat(struct thread *td, struct fchmodat_args *uap) 2908 { 2909 2910 return (kern_fchmodat(td, uap->fd, uap->path, UIO_USERSPACE, 2911 uap->mode, uap->flag)); 2912 } 2913 2914 /* 2915 * Change mode of a file given path name (don't follow links.) 2916 */ 2917 #ifndef _SYS_SYSPROTO_H_ 2918 struct lchmod_args { 2919 char *path; 2920 int mode; 2921 }; 2922 #endif 2923 int 2924 sys_lchmod(struct thread *td, struct lchmod_args *uap) 2925 { 2926 2927 return (kern_fchmodat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 2928 uap->mode, AT_SYMLINK_NOFOLLOW)); 2929 } 2930 2931 int 2932 kern_fchmodat(struct thread *td, int fd, const char *path, 2933 enum uio_seg pathseg, mode_t mode, int flag) 2934 { 2935 struct nameidata nd; 2936 int error; 2937 2938 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 2939 AT_EMPTY_PATH)) != 0) 2940 return (EINVAL); 2941 2942 AUDIT_ARG_MODE(mode); 2943 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 2944 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 2945 fd, &cap_fchmod_rights); 2946 if ((error = namei(&nd)) != 0) 2947 return (error); 2948 NDFREE_NOTHING(&nd); 2949 error = setfmode(td, td->td_ucred, nd.ni_vp, mode); 2950 vrele(nd.ni_vp); 2951 return (error); 2952 } 2953 2954 /* 2955 * Change mode of a file given a file descriptor. 2956 */ 2957 #ifndef _SYS_SYSPROTO_H_ 2958 struct fchmod_args { 2959 int fd; 2960 int mode; 2961 }; 2962 #endif 2963 int 2964 sys_fchmod(struct thread *td, struct fchmod_args *uap) 2965 { 2966 struct file *fp; 2967 int error; 2968 2969 AUDIT_ARG_FD(uap->fd); 2970 AUDIT_ARG_MODE(uap->mode); 2971 2972 error = fget(td, uap->fd, &cap_fchmod_rights, &fp); 2973 if (error != 0) 2974 return (error); 2975 error = fo_chmod(fp, uap->mode, td->td_ucred, td); 2976 fdrop(fp, td); 2977 return (error); 2978 } 2979 2980 /* 2981 * Common implementation for chown(), lchown(), and fchown() 2982 */ 2983 int 2984 setfown(struct thread *td, struct ucred *cred, struct vnode *vp, uid_t uid, 2985 gid_t gid) 2986 { 2987 struct mount *mp; 2988 struct vattr vattr; 2989 int error; 2990 2991 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 2992 return (error); 2993 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 2994 VATTR_NULL(&vattr); 2995 vattr.va_uid = uid; 2996 vattr.va_gid = gid; 2997 #ifdef MAC 2998 error = mac_vnode_check_setowner(cred, vp, vattr.va_uid, 2999 vattr.va_gid); 3000 if (error == 0) 3001 #endif 3002 error = VOP_SETATTR(vp, &vattr, cred); 3003 VOP_UNLOCK(vp); 3004 vn_finished_write(mp); 3005 return (error); 3006 } 3007 3008 /* 3009 * Set ownership given a path name. 3010 */ 3011 #ifndef _SYS_SYSPROTO_H_ 3012 struct chown_args { 3013 char *path; 3014 int uid; 3015 int gid; 3016 }; 3017 #endif 3018 int 3019 sys_chown(struct thread *td, struct chown_args *uap) 3020 { 3021 3022 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, uap->uid, 3023 uap->gid, 0)); 3024 } 3025 3026 #ifndef _SYS_SYSPROTO_H_ 3027 struct fchownat_args { 3028 int fd; 3029 const char * path; 3030 uid_t uid; 3031 gid_t gid; 3032 int flag; 3033 }; 3034 #endif 3035 int 3036 sys_fchownat(struct thread *td, struct fchownat_args *uap) 3037 { 3038 3039 return (kern_fchownat(td, uap->fd, uap->path, UIO_USERSPACE, uap->uid, 3040 uap->gid, uap->flag)); 3041 } 3042 3043 int 3044 kern_fchownat(struct thread *td, int fd, const char *path, 3045 enum uio_seg pathseg, int uid, int gid, int flag) 3046 { 3047 struct nameidata nd; 3048 int error; 3049 3050 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3051 AT_EMPTY_PATH)) != 0) 3052 return (EINVAL); 3053 3054 AUDIT_ARG_OWNER(uid, gid); 3055 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3056 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, pathseg, path, 3057 fd, &cap_fchown_rights); 3058 3059 if ((error = namei(&nd)) != 0) 3060 return (error); 3061 NDFREE_NOTHING(&nd); 3062 error = setfown(td, td->td_ucred, nd.ni_vp, uid, gid); 3063 vrele(nd.ni_vp); 3064 return (error); 3065 } 3066 3067 /* 3068 * Set ownership given a path name, do not cross symlinks. 3069 */ 3070 #ifndef _SYS_SYSPROTO_H_ 3071 struct lchown_args { 3072 char *path; 3073 int uid; 3074 int gid; 3075 }; 3076 #endif 3077 int 3078 sys_lchown(struct thread *td, struct lchown_args *uap) 3079 { 3080 3081 return (kern_fchownat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3082 uap->uid, uap->gid, AT_SYMLINK_NOFOLLOW)); 3083 } 3084 3085 /* 3086 * Set ownership given a file descriptor. 3087 */ 3088 #ifndef _SYS_SYSPROTO_H_ 3089 struct fchown_args { 3090 int fd; 3091 int uid; 3092 int gid; 3093 }; 3094 #endif 3095 int 3096 sys_fchown(struct thread *td, struct fchown_args *uap) 3097 { 3098 struct file *fp; 3099 int error; 3100 3101 AUDIT_ARG_FD(uap->fd); 3102 AUDIT_ARG_OWNER(uap->uid, uap->gid); 3103 error = fget(td, uap->fd, &cap_fchown_rights, &fp); 3104 if (error != 0) 3105 return (error); 3106 error = fo_chown(fp, uap->uid, uap->gid, td->td_ucred, td); 3107 fdrop(fp, td); 3108 return (error); 3109 } 3110 3111 /* 3112 * Common implementation code for utimes(), lutimes(), and futimes(). 3113 */ 3114 static int 3115 getutimes(const struct timeval *usrtvp, enum uio_seg tvpseg, 3116 struct timespec *tsp) 3117 { 3118 struct timeval tv[2]; 3119 const struct timeval *tvp; 3120 int error; 3121 3122 if (usrtvp == NULL) { 3123 vfs_timestamp(&tsp[0]); 3124 tsp[1] = tsp[0]; 3125 } else { 3126 if (tvpseg == UIO_SYSSPACE) { 3127 tvp = usrtvp; 3128 } else { 3129 if ((error = copyin(usrtvp, tv, sizeof(tv))) != 0) 3130 return (error); 3131 tvp = tv; 3132 } 3133 3134 if (tvp[0].tv_usec < 0 || tvp[0].tv_usec >= 1000000 || 3135 tvp[1].tv_usec < 0 || tvp[1].tv_usec >= 1000000) 3136 return (EINVAL); 3137 TIMEVAL_TO_TIMESPEC(&tvp[0], &tsp[0]); 3138 TIMEVAL_TO_TIMESPEC(&tvp[1], &tsp[1]); 3139 } 3140 return (0); 3141 } 3142 3143 /* 3144 * Common implementation code for futimens(), utimensat(). 3145 */ 3146 #define UTIMENS_NULL 0x1 3147 #define UTIMENS_EXIT 0x2 3148 static int 3149 getutimens(const struct timespec *usrtsp, enum uio_seg tspseg, 3150 struct timespec *tsp, int *retflags) 3151 { 3152 struct timespec tsnow; 3153 int error; 3154 3155 vfs_timestamp(&tsnow); 3156 *retflags = 0; 3157 if (usrtsp == NULL) { 3158 tsp[0] = tsnow; 3159 tsp[1] = tsnow; 3160 *retflags |= UTIMENS_NULL; 3161 return (0); 3162 } 3163 if (tspseg == UIO_SYSSPACE) { 3164 tsp[0] = usrtsp[0]; 3165 tsp[1] = usrtsp[1]; 3166 } else if ((error = copyin(usrtsp, tsp, sizeof(*tsp) * 2)) != 0) 3167 return (error); 3168 if (tsp[0].tv_nsec == UTIME_OMIT && tsp[1].tv_nsec == UTIME_OMIT) 3169 *retflags |= UTIMENS_EXIT; 3170 if (tsp[0].tv_nsec == UTIME_NOW && tsp[1].tv_nsec == UTIME_NOW) 3171 *retflags |= UTIMENS_NULL; 3172 if (tsp[0].tv_nsec == UTIME_OMIT) 3173 tsp[0].tv_sec = VNOVAL; 3174 else if (tsp[0].tv_nsec == UTIME_NOW) 3175 tsp[0] = tsnow; 3176 else if (tsp[0].tv_nsec < 0 || tsp[0].tv_nsec >= 1000000000L) 3177 return (EINVAL); 3178 if (tsp[1].tv_nsec == UTIME_OMIT) 3179 tsp[1].tv_sec = VNOVAL; 3180 else if (tsp[1].tv_nsec == UTIME_NOW) 3181 tsp[1] = tsnow; 3182 else if (tsp[1].tv_nsec < 0 || tsp[1].tv_nsec >= 1000000000L) 3183 return (EINVAL); 3184 3185 return (0); 3186 } 3187 3188 /* 3189 * Common implementation code for utimes(), lutimes(), futimes(), futimens(), 3190 * and utimensat(). 3191 */ 3192 static int 3193 setutimes(struct thread *td, struct vnode *vp, const struct timespec *ts, 3194 int numtimes, int nullflag) 3195 { 3196 struct mount *mp; 3197 struct vattr vattr; 3198 int error; 3199 bool setbirthtime; 3200 3201 setbirthtime = false; 3202 vattr.va_birthtime.tv_sec = VNOVAL; 3203 vattr.va_birthtime.tv_nsec = 0; 3204 3205 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) 3206 return (error); 3207 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3208 if (numtimes < 3 && VOP_GETATTR(vp, &vattr, td->td_ucred) == 0 && 3209 timespeccmp(&ts[1], &vattr.va_birthtime, < )) 3210 setbirthtime = true; 3211 VATTR_NULL(&vattr); 3212 vattr.va_atime = ts[0]; 3213 vattr.va_mtime = ts[1]; 3214 if (setbirthtime) 3215 vattr.va_birthtime = ts[1]; 3216 if (numtimes > 2) 3217 vattr.va_birthtime = ts[2]; 3218 if (nullflag) 3219 vattr.va_vaflags |= VA_UTIMES_NULL; 3220 #ifdef MAC 3221 error = mac_vnode_check_setutimes(td->td_ucred, vp, vattr.va_atime, 3222 vattr.va_mtime); 3223 #endif 3224 if (error == 0) 3225 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3226 VOP_UNLOCK(vp); 3227 vn_finished_write(mp); 3228 return (error); 3229 } 3230 3231 /* 3232 * Set the access and modification times of a file. 3233 */ 3234 #ifndef _SYS_SYSPROTO_H_ 3235 struct utimes_args { 3236 char *path; 3237 struct timeval *tptr; 3238 }; 3239 #endif 3240 int 3241 sys_utimes(struct thread *td, struct utimes_args *uap) 3242 { 3243 3244 return (kern_utimesat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3245 uap->tptr, UIO_USERSPACE)); 3246 } 3247 3248 #ifndef _SYS_SYSPROTO_H_ 3249 struct futimesat_args { 3250 int fd; 3251 const char * path; 3252 const struct timeval * times; 3253 }; 3254 #endif 3255 int 3256 sys_futimesat(struct thread *td, struct futimesat_args *uap) 3257 { 3258 3259 return (kern_utimesat(td, uap->fd, uap->path, UIO_USERSPACE, 3260 uap->times, UIO_USERSPACE)); 3261 } 3262 3263 int 3264 kern_utimesat(struct thread *td, int fd, const char *path, 3265 enum uio_seg pathseg, const struct timeval *tptr, enum uio_seg tptrseg) 3266 { 3267 struct nameidata nd; 3268 struct timespec ts[2]; 3269 int error; 3270 3271 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3272 return (error); 3273 NDINIT_ATRIGHTS(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path, fd, 3274 &cap_futimes_rights); 3275 3276 if ((error = namei(&nd)) != 0) 3277 return (error); 3278 NDFREE_NOTHING(&nd); 3279 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3280 vrele(nd.ni_vp); 3281 return (error); 3282 } 3283 3284 /* 3285 * Set the access and modification times of a file. 3286 */ 3287 #ifndef _SYS_SYSPROTO_H_ 3288 struct lutimes_args { 3289 char *path; 3290 struct timeval *tptr; 3291 }; 3292 #endif 3293 int 3294 sys_lutimes(struct thread *td, struct lutimes_args *uap) 3295 { 3296 3297 return (kern_lutimes(td, uap->path, UIO_USERSPACE, uap->tptr, 3298 UIO_USERSPACE)); 3299 } 3300 3301 int 3302 kern_lutimes(struct thread *td, const char *path, enum uio_seg pathseg, 3303 const struct timeval *tptr, enum uio_seg tptrseg) 3304 { 3305 struct timespec ts[2]; 3306 struct nameidata nd; 3307 int error; 3308 3309 if ((error = getutimes(tptr, tptrseg, ts)) != 0) 3310 return (error); 3311 NDINIT(&nd, LOOKUP, NOFOLLOW | AUDITVNODE1, pathseg, path); 3312 if ((error = namei(&nd)) != 0) 3313 return (error); 3314 NDFREE_NOTHING(&nd); 3315 error = setutimes(td, nd.ni_vp, ts, 2, tptr == NULL); 3316 vrele(nd.ni_vp); 3317 return (error); 3318 } 3319 3320 /* 3321 * Set the access and modification times of a file. 3322 */ 3323 #ifndef _SYS_SYSPROTO_H_ 3324 struct futimes_args { 3325 int fd; 3326 struct timeval *tptr; 3327 }; 3328 #endif 3329 int 3330 sys_futimes(struct thread *td, struct futimes_args *uap) 3331 { 3332 3333 return (kern_futimes(td, uap->fd, uap->tptr, UIO_USERSPACE)); 3334 } 3335 3336 int 3337 kern_futimes(struct thread *td, int fd, const struct timeval *tptr, 3338 enum uio_seg tptrseg) 3339 { 3340 struct timespec ts[2]; 3341 struct file *fp; 3342 int error; 3343 3344 AUDIT_ARG_FD(fd); 3345 error = getutimes(tptr, tptrseg, ts); 3346 if (error != 0) 3347 return (error); 3348 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3349 if (error != 0) 3350 return (error); 3351 #ifdef AUDIT 3352 if (AUDITING_TD(td)) { 3353 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3354 AUDIT_ARG_VNODE1(fp->f_vnode); 3355 VOP_UNLOCK(fp->f_vnode); 3356 } 3357 #endif 3358 error = setutimes(td, fp->f_vnode, ts, 2, tptr == NULL); 3359 fdrop(fp, td); 3360 return (error); 3361 } 3362 3363 int 3364 sys_futimens(struct thread *td, struct futimens_args *uap) 3365 { 3366 3367 return (kern_futimens(td, uap->fd, uap->times, UIO_USERSPACE)); 3368 } 3369 3370 int 3371 kern_futimens(struct thread *td, int fd, const struct timespec *tptr, 3372 enum uio_seg tptrseg) 3373 { 3374 struct timespec ts[2]; 3375 struct file *fp; 3376 int error, flags; 3377 3378 AUDIT_ARG_FD(fd); 3379 error = getutimens(tptr, tptrseg, ts, &flags); 3380 if (error != 0) 3381 return (error); 3382 if (flags & UTIMENS_EXIT) 3383 return (0); 3384 error = getvnode(td, fd, &cap_futimes_rights, &fp); 3385 if (error != 0) 3386 return (error); 3387 #ifdef AUDIT 3388 if (AUDITING_TD(td)) { 3389 vn_lock(fp->f_vnode, LK_SHARED | LK_RETRY); 3390 AUDIT_ARG_VNODE1(fp->f_vnode); 3391 VOP_UNLOCK(fp->f_vnode); 3392 } 3393 #endif 3394 error = setutimes(td, fp->f_vnode, ts, 2, flags & UTIMENS_NULL); 3395 fdrop(fp, td); 3396 return (error); 3397 } 3398 3399 int 3400 sys_utimensat(struct thread *td, struct utimensat_args *uap) 3401 { 3402 3403 return (kern_utimensat(td, uap->fd, uap->path, UIO_USERSPACE, 3404 uap->times, UIO_USERSPACE, uap->flag)); 3405 } 3406 3407 int 3408 kern_utimensat(struct thread *td, int fd, const char *path, 3409 enum uio_seg pathseg, const struct timespec *tptr, enum uio_seg tptrseg, 3410 int flag) 3411 { 3412 struct nameidata nd; 3413 struct timespec ts[2]; 3414 int error, flags; 3415 3416 if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH | 3417 AT_EMPTY_PATH)) != 0) 3418 return (EINVAL); 3419 3420 if ((error = getutimens(tptr, tptrseg, ts, &flags)) != 0) 3421 return (error); 3422 NDINIT_ATRIGHTS(&nd, LOOKUP, at2cnpflags(flag, AT_SYMLINK_NOFOLLOW | 3423 AT_RESOLVE_BENEATH | AT_EMPTY_PATH) | AUDITVNODE1, 3424 pathseg, path, fd, &cap_futimes_rights); 3425 if ((error = namei(&nd)) != 0) 3426 return (error); 3427 /* 3428 * We are allowed to call namei() regardless of 2xUTIME_OMIT. 3429 * POSIX states: 3430 * "If both tv_nsec fields are UTIME_OMIT... EACCESS may be detected." 3431 * "Search permission is denied by a component of the path prefix." 3432 */ 3433 NDFREE_NOTHING(&nd); 3434 if ((flags & UTIMENS_EXIT) == 0) 3435 error = setutimes(td, nd.ni_vp, ts, 2, flags & UTIMENS_NULL); 3436 vrele(nd.ni_vp); 3437 return (error); 3438 } 3439 3440 /* 3441 * Truncate a file given its path name. 3442 */ 3443 #ifndef _SYS_SYSPROTO_H_ 3444 struct truncate_args { 3445 char *path; 3446 int pad; 3447 off_t length; 3448 }; 3449 #endif 3450 int 3451 sys_truncate(struct thread *td, struct truncate_args *uap) 3452 { 3453 3454 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3455 } 3456 3457 int 3458 kern_truncate(struct thread *td, const char *path, enum uio_seg pathseg, 3459 off_t length) 3460 { 3461 struct mount *mp; 3462 struct vnode *vp; 3463 void *rl_cookie; 3464 struct vattr vattr; 3465 struct nameidata nd; 3466 int error; 3467 3468 if (length < 0) 3469 return (EINVAL); 3470 NDPREINIT(&nd); 3471 retry: 3472 NDINIT(&nd, LOOKUP, FOLLOW | AUDITVNODE1, pathseg, path); 3473 if ((error = namei(&nd)) != 0) 3474 return (error); 3475 vp = nd.ni_vp; 3476 rl_cookie = vn_rangelock_wlock(vp, 0, OFF_MAX); 3477 if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { 3478 vn_rangelock_unlock(vp, rl_cookie); 3479 vrele(vp); 3480 return (error); 3481 } 3482 NDFREE_PNBUF(&nd); 3483 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); 3484 if (vp->v_type == VDIR) 3485 error = EISDIR; 3486 #ifdef MAC 3487 else if ((error = mac_vnode_check_write(td->td_ucred, NOCRED, vp))) { 3488 } 3489 #endif 3490 else if ((error = vn_writechk(vp)) == 0 && 3491 (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { 3492 VATTR_NULL(&vattr); 3493 vattr.va_size = length; 3494 error = VOP_SETATTR(vp, &vattr, td->td_ucred); 3495 } 3496 VOP_UNLOCK(vp); 3497 vn_finished_write(mp); 3498 vn_rangelock_unlock(vp, rl_cookie); 3499 vrele(vp); 3500 if (error == ERELOOKUP) 3501 goto retry; 3502 return (error); 3503 } 3504 3505 #if defined(COMPAT_43) 3506 /* 3507 * Truncate a file given its path name. 3508 */ 3509 #ifndef _SYS_SYSPROTO_H_ 3510 struct otruncate_args { 3511 char *path; 3512 long length; 3513 }; 3514 #endif 3515 int 3516 otruncate(struct thread *td, struct otruncate_args *uap) 3517 { 3518 3519 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3520 } 3521 #endif /* COMPAT_43 */ 3522 3523 #if defined(COMPAT_FREEBSD6) 3524 /* Versions with the pad argument */ 3525 int 3526 freebsd6_truncate(struct thread *td, struct freebsd6_truncate_args *uap) 3527 { 3528 3529 return (kern_truncate(td, uap->path, UIO_USERSPACE, uap->length)); 3530 } 3531 3532 int 3533 freebsd6_ftruncate(struct thread *td, struct freebsd6_ftruncate_args *uap) 3534 { 3535 3536 return (kern_ftruncate(td, uap->fd, uap->length)); 3537 } 3538 #endif 3539 3540 int 3541 kern_fsync(struct thread *td, int fd, bool fullsync) 3542 { 3543 struct vnode *vp; 3544 struct mount *mp; 3545 struct file *fp; 3546 int error; 3547 3548 AUDIT_ARG_FD(fd); 3549 error = getvnode(td, fd, &cap_fsync_rights, &fp); 3550 if (error != 0) 3551 return (error); 3552 vp = fp->f_vnode; 3553 #if 0 3554 if (!fullsync) 3555 /* XXXKIB: compete outstanding aio writes */; 3556 #endif 3557 retry: 3558 error = vn_start_write(vp, &mp, V_WAIT | PCATCH); 3559 if (error != 0) 3560 goto drop; 3561 vn_lock(vp, vn_lktype_write(mp, vp) | LK_RETRY); 3562 AUDIT_ARG_VNODE1(vp); 3563 if (vp->v_object != NULL) { 3564 VM_OBJECT_WLOCK(vp->v_object); 3565 vm_object_page_clean(vp->v_object, 0, 0, 0); 3566 VM_OBJECT_WUNLOCK(vp->v_object); 3567 } 3568 error = fullsync ? VOP_FSYNC(vp, MNT_WAIT, td) : VOP_FDATASYNC(vp, td); 3569 VOP_UNLOCK(vp); 3570 vn_finished_write(mp); 3571 if (error == ERELOOKUP) 3572 goto retry; 3573 drop: 3574 fdrop(fp, td); 3575 return (error); 3576 } 3577 3578 /* 3579 * Sync an open file. 3580 */ 3581 #ifndef _SYS_SYSPROTO_H_ 3582 struct fsync_args { 3583 int fd; 3584 }; 3585 #endif 3586 int 3587 sys_fsync(struct thread *td, struct fsync_args *uap) 3588 { 3589 3590 return (kern_fsync(td, uap->fd, true)); 3591 } 3592 3593 int 3594 sys_fdatasync(struct thread *td, struct fdatasync_args *uap) 3595 { 3596 3597 return (kern_fsync(td, uap->fd, false)); 3598 } 3599 3600 /* 3601 * Rename files. Source and destination must either both be directories, or 3602 * both not be directories. If target is a directory, it must be empty. 3603 */ 3604 #ifndef _SYS_SYSPROTO_H_ 3605 struct rename_args { 3606 char *from; 3607 char *to; 3608 }; 3609 #endif 3610 int 3611 sys_rename(struct thread *td, struct rename_args *uap) 3612 { 3613 3614 return (kern_renameat(td, AT_FDCWD, uap->from, AT_FDCWD, 3615 uap->to, UIO_USERSPACE)); 3616 } 3617 3618 #ifndef _SYS_SYSPROTO_H_ 3619 struct renameat_args { 3620 int oldfd; 3621 char *old; 3622 int newfd; 3623 char *new; 3624 }; 3625 #endif 3626 int 3627 sys_renameat(struct thread *td, struct renameat_args *uap) 3628 { 3629 3630 return (kern_renameat(td, uap->oldfd, uap->old, uap->newfd, uap->new, 3631 UIO_USERSPACE)); 3632 } 3633 3634 #ifdef MAC 3635 static int 3636 kern_renameat_mac(struct thread *td, int oldfd, const char *old, int newfd, 3637 const char *new, enum uio_seg pathseg, struct nameidata *fromnd) 3638 { 3639 int error; 3640 3641 NDINIT_ATRIGHTS(fromnd, DELETE, LOCKPARENT | LOCKLEAF | SAVESTART | 3642 AUDITVNODE1, pathseg, old, oldfd, &cap_renameat_source_rights); 3643 if ((error = namei(fromnd)) != 0) 3644 return (error); 3645 error = mac_vnode_check_rename_from(td->td_ucred, fromnd->ni_dvp, 3646 fromnd->ni_vp, &fromnd->ni_cnd); 3647 VOP_UNLOCK(fromnd->ni_dvp); 3648 if (fromnd->ni_dvp != fromnd->ni_vp) 3649 VOP_UNLOCK(fromnd->ni_vp); 3650 if (error != 0) { 3651 NDFREE_PNBUF(fromnd); 3652 vrele(fromnd->ni_dvp); 3653 vrele(fromnd->ni_vp); 3654 if (fromnd->ni_startdir) 3655 vrele(fromnd->ni_startdir); 3656 } 3657 return (error); 3658 } 3659 #endif 3660 3661 int 3662 kern_renameat(struct thread *td, int oldfd, const char *old, int newfd, 3663 const char *new, enum uio_seg pathseg) 3664 { 3665 struct mount *mp = NULL; 3666 struct vnode *tvp, *fvp, *tdvp; 3667 struct nameidata fromnd, tond; 3668 uint64_t tondflags; 3669 int error; 3670 3671 again: 3672 bwillwrite(); 3673 #ifdef MAC 3674 if (mac_vnode_check_rename_from_enabled()) { 3675 error = kern_renameat_mac(td, oldfd, old, newfd, new, pathseg, 3676 &fromnd); 3677 if (error != 0) 3678 return (error); 3679 } else { 3680 #endif 3681 NDINIT_ATRIGHTS(&fromnd, DELETE, WANTPARENT | SAVESTART | AUDITVNODE1, 3682 pathseg, old, oldfd, &cap_renameat_source_rights); 3683 if ((error = namei(&fromnd)) != 0) 3684 return (error); 3685 #ifdef MAC 3686 } 3687 #endif 3688 fvp = fromnd.ni_vp; 3689 tondflags = LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | AUDITVNODE2; 3690 if (fromnd.ni_vp->v_type == VDIR) 3691 tondflags |= WILLBEDIR; 3692 NDINIT_ATRIGHTS(&tond, RENAME, tondflags, pathseg, new, newfd, 3693 &cap_renameat_target_rights); 3694 if ((error = namei(&tond)) != 0) { 3695 /* Translate error code for rename("dir1", "dir2/."). */ 3696 if (error == EISDIR && fvp->v_type == VDIR) 3697 error = EINVAL; 3698 NDFREE_PNBUF(&fromnd); 3699 vrele(fromnd.ni_dvp); 3700 vrele(fvp); 3701 goto out1; 3702 } 3703 tdvp = tond.ni_dvp; 3704 tvp = tond.ni_vp; 3705 error = vn_start_write(fvp, &mp, V_NOWAIT); 3706 if (error != 0) { 3707 NDFREE_PNBUF(&fromnd); 3708 NDFREE_PNBUF(&tond); 3709 if (tvp != NULL) 3710 vput(tvp); 3711 if (tdvp == tvp) 3712 vrele(tdvp); 3713 else 3714 vput(tdvp); 3715 vrele(fromnd.ni_dvp); 3716 vrele(fvp); 3717 vrele(tond.ni_startdir); 3718 if (fromnd.ni_startdir != NULL) 3719 vrele(fromnd.ni_startdir); 3720 error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH); 3721 if (error != 0) 3722 return (error); 3723 goto again; 3724 } 3725 if (tvp != NULL) { 3726 if (fvp->v_type == VDIR && tvp->v_type != VDIR) { 3727 error = ENOTDIR; 3728 goto out; 3729 } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { 3730 error = EISDIR; 3731 goto out; 3732 } 3733 #ifdef CAPABILITIES 3734 if (newfd != AT_FDCWD && (tond.ni_resflags & NIRES_ABS) == 0) { 3735 /* 3736 * If the target already exists we require CAP_UNLINKAT 3737 * from 'newfd', when newfd was used for the lookup. 3738 */ 3739 error = cap_check(&tond.ni_filecaps.fc_rights, 3740 &cap_unlinkat_rights); 3741 if (error != 0) 3742 goto out; 3743 } 3744 #endif 3745 } 3746 if (fvp == tdvp) { 3747 error = EINVAL; 3748 goto out; 3749 } 3750 /* 3751 * If the source is the same as the destination (that is, if they 3752 * are links to the same vnode), then there is nothing to do. 3753 */ 3754 if (fvp == tvp) 3755 error = ERESTART; 3756 #ifdef MAC 3757 else 3758 error = mac_vnode_check_rename_to(td->td_ucred, tdvp, 3759 tond.ni_vp, fromnd.ni_dvp == tdvp, &tond.ni_cnd); 3760 #endif 3761 out: 3762 if (error == 0) { 3763 error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, 3764 tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); 3765 NDFREE_PNBUF(&fromnd); 3766 NDFREE_PNBUF(&tond); 3767 } else { 3768 NDFREE_PNBUF(&fromnd); 3769 NDFREE_PNBUF(&tond); 3770 if (tvp != NULL) 3771 vput(tvp); 3772 if (tdvp == tvp) 3773 vrele(tdvp); 3774 else 3775 vput(tdvp); 3776 vrele(fromnd.ni_dvp); 3777 vrele(fvp); 3778 } 3779 vrele(tond.ni_startdir); 3780 vn_finished_write(mp); 3781 out1: 3782 if (fromnd.ni_startdir) 3783 vrele(fromnd.ni_startdir); 3784 if (error == ERESTART) 3785 return (0); 3786 if (error == ERELOOKUP) 3787 goto again; 3788 return (error); 3789 } 3790 3791 /* 3792 * Make a directory file. 3793 */ 3794 #ifndef _SYS_SYSPROTO_H_ 3795 struct mkdir_args { 3796 char *path; 3797 int mode; 3798 }; 3799 #endif 3800 int 3801 sys_mkdir(struct thread *td, struct mkdir_args *uap) 3802 { 3803 3804 return (kern_mkdirat(td, AT_FDCWD, uap->path, UIO_USERSPACE, 3805 uap->mode)); 3806 } 3807 3808 #ifndef _SYS_SYSPROTO_H_ 3809 struct mkdirat_args { 3810 int fd; 3811 char *path; 3812 mode_t mode; 3813 }; 3814 #endif 3815 int 3816 sys_mkdirat(struct thread *td, struct mkdirat_args *uap) 3817 { 3818 3819 return (kern_mkdirat(td, uap->fd, uap->path, UIO_USERSPACE, uap->mode)); 3820 } 3821 3822 int 3823 kern_mkdirat(struct thread *td, int fd, const char *path, enum uio_seg segflg, 3824 int mode) 3825 { 3826 struct mount *mp; 3827 struct vattr vattr; 3828 struct nameidata nd; 3829 int error; 3830 3831 AUDIT_ARG_MODE(mode); 3832 NDPREINIT(&nd); 3833 restart: 3834 bwillwrite(); 3835 NDINIT_ATRIGHTS(&nd, CREATE, LOCKPARENT | SAVENAME | AUDITVNODE1 | 3836 NC_NOMAKEENTRY | NC_KEEPPOSENTRY | FAILIFEXISTS | WILLBEDIR, 3837 segflg, path, fd, &cap_mkdirat_rights); 3838 if ((error = namei(&nd)) != 0) 3839 return (error); 3840 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3841 NDFREE_PNBUF(&nd); 3842 vput(nd.ni_dvp); 3843 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3844 return (error); 3845 goto restart; 3846 } 3847 VATTR_NULL(&vattr); 3848 vattr.va_type = VDIR; 3849 vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_pd->pd_cmask; 3850 #ifdef MAC 3851 error = mac_vnode_check_create(td->td_ucred, nd.ni_dvp, &nd.ni_cnd, 3852 &vattr); 3853 if (error != 0) 3854 goto out; 3855 #endif 3856 error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); 3857 #ifdef MAC 3858 out: 3859 #endif 3860 NDFREE_PNBUF(&nd); 3861 VOP_VPUT_PAIR(nd.ni_dvp, error == 0 ? &nd.ni_vp : NULL, true); 3862 vn_finished_write(mp); 3863 if (error == ERELOOKUP) 3864 goto restart; 3865 return (error); 3866 } 3867 3868 /* 3869 * Remove a directory file. 3870 */ 3871 #ifndef _SYS_SYSPROTO_H_ 3872 struct rmdir_args { 3873 char *path; 3874 }; 3875 #endif 3876 int 3877 sys_rmdir(struct thread *td, struct rmdir_args *uap) 3878 { 3879 3880 return (kern_frmdirat(td, AT_FDCWD, uap->path, FD_NONE, UIO_USERSPACE, 3881 0)); 3882 } 3883 3884 int 3885 kern_frmdirat(struct thread *td, int dfd, const char *path, int fd, 3886 enum uio_seg pathseg, int flag) 3887 { 3888 struct mount *mp; 3889 struct vnode *vp; 3890 struct file *fp; 3891 struct nameidata nd; 3892 cap_rights_t rights; 3893 int error; 3894 3895 fp = NULL; 3896 if (fd != FD_NONE) { 3897 error = getvnode(td, fd, cap_rights_init_one(&rights, 3898 CAP_LOOKUP), &fp); 3899 if (error != 0) 3900 return (error); 3901 } 3902 3903 NDPREINIT(&nd); 3904 restart: 3905 bwillwrite(); 3906 NDINIT_ATRIGHTS(&nd, DELETE, LOCKPARENT | LOCKLEAF | AUDITVNODE1 | 3907 at2cnpflags(flag, AT_RESOLVE_BENEATH), 3908 pathseg, path, dfd, &cap_unlinkat_rights); 3909 if ((error = namei(&nd)) != 0) 3910 goto fdout; 3911 vp = nd.ni_vp; 3912 if (vp->v_type != VDIR) { 3913 error = ENOTDIR; 3914 goto out; 3915 } 3916 /* 3917 * No rmdir "." please. 3918 */ 3919 if (nd.ni_dvp == vp) { 3920 error = EINVAL; 3921 goto out; 3922 } 3923 /* 3924 * The root of a mounted filesystem cannot be deleted. 3925 */ 3926 if (vp->v_vflag & VV_ROOT) { 3927 error = EBUSY; 3928 goto out; 3929 } 3930 3931 if (fp != NULL && fp->f_vnode != vp) { 3932 if (VN_IS_DOOMED(fp->f_vnode)) 3933 error = EBADF; 3934 else 3935 error = EDEADLK; 3936 goto out; 3937 } 3938 3939 #ifdef MAC 3940 error = mac_vnode_check_unlink(td->td_ucred, nd.ni_dvp, vp, 3941 &nd.ni_cnd); 3942 if (error != 0) 3943 goto out; 3944 #endif 3945 if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { 3946 NDFREE_PNBUF(&nd); 3947 vput(vp); 3948 if (nd.ni_dvp == vp) 3949 vrele(nd.ni_dvp); 3950 else 3951 vput(nd.ni_dvp); 3952 if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) 3953 goto fdout; 3954 goto restart; 3955 } 3956 vfs_notify_upper(vp, VFS_NOTIFY_UPPER_UNLINK); 3957 error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); 3958 vn_finished_write(mp); 3959 out: 3960 NDFREE_PNBUF(&nd); 3961 vput(vp); 3962 if (nd.ni_dvp == vp) 3963 vrele(nd.ni_dvp); 3964 else 3965 vput(nd.ni_dvp); 3966 if (error == ERELOOKUP) 3967 goto restart; 3968 fdout: 3969 if (fp != NULL) 3970 fdrop(fp, td); 3971 return (error); 3972 } 3973 3974 #if defined(COMPAT_43) || defined(COMPAT_FREEBSD11) 3975 int 3976 freebsd11_kern_getdirentries(struct thread *td, int fd, char *ubuf, u_int count, 3977 long *basep, void (*func)(struct freebsd11_dirent *)) 3978 { 3979 struct freebsd11_dirent dstdp; 3980 struct dirent *dp, *edp; 3981 char *dirbuf; 3982 off_t base; 3983 ssize_t resid, ucount; 3984 int error; 3985 3986 /* XXX arbitrary sanity limit on `count'. */ 3987 count = min(count, 64 * 1024); 3988 3989 dirbuf = malloc(count, M_TEMP, M_WAITOK); 3990 3991 error = kern_getdirentries(td, fd, dirbuf, count, &base, &resid, 3992 UIO_SYSSPACE); 3993 if (error != 0) 3994 goto done; 3995 if (basep != NULL) 3996 *basep = base; 3997 3998 ucount = 0; 3999 for (dp = (struct dirent *)dirbuf, 4000 edp = (struct dirent *)&dirbuf[count - resid]; 4001 ucount < count && dp < edp; ) { 4002 if (dp->d_reclen == 0) 4003 break; 4004 MPASS(dp->d_reclen >= _GENERIC_DIRLEN(0)); 4005 if (dp->d_namlen >= sizeof(dstdp.d_name)) 4006 continue; 4007 dstdp.d_type = dp->d_type; 4008 dstdp.d_namlen = dp->d_namlen; 4009 dstdp.d_fileno = dp->d_fileno; /* truncate */ 4010 if (dstdp.d_fileno != dp->d_fileno) { 4011 switch (ino64_trunc_error) { 4012 default: 4013 case 0: 4014 break; 4015 case 1: 4016 error = EOVERFLOW; 4017 goto done; 4018 case 2: 4019 dstdp.d_fileno = UINT32_MAX; 4020 break; 4021 } 4022 } 4023 dstdp.d_reclen = sizeof(dstdp) - sizeof(dstdp.d_name) + 4024 ((dp->d_namlen + 1 + 3) &~ 3); 4025 bcopy(dp->d_name, dstdp.d_name, dstdp.d_namlen); 4026 bzero(dstdp.d_name + dstdp.d_namlen, 4027 dstdp.d_reclen - offsetof(struct freebsd11_dirent, d_name) - 4028 dstdp.d_namlen); 4029 MPASS(dstdp.d_reclen <= dp->d_reclen); 4030 MPASS(ucount + dstdp.d_reclen <= count); 4031 if (func != NULL) 4032 func(&dstdp); 4033 error = copyout(&dstdp, ubuf + ucount, dstdp.d_reclen); 4034 if (error != 0) 4035 break; 4036 dp = (struct dirent *)((char *)dp + dp->d_reclen); 4037 ucount += dstdp.d_reclen; 4038 } 4039 4040 done: 4041 free(dirbuf, M_TEMP); 4042 if (error == 0) 4043 td->td_retval[0] = ucount; 4044 return (error); 4045 } 4046 #endif /* COMPAT */ 4047 4048 #ifdef COMPAT_43 4049 static void 4050 ogetdirentries_cvt(struct freebsd11_dirent *dp) 4051 { 4052 #if (BYTE_ORDER == LITTLE_ENDIAN) 4053 /* 4054 * The expected low byte of dp->d_namlen is our dp->d_type. 4055 * The high MBZ byte of dp->d_namlen is our dp->d_namlen. 4056 */ 4057 dp->d_type = dp->d_namlen; 4058 dp->d_namlen = 0; 4059 #else 4060 /* 4061 * The dp->d_type is the high byte of the expected dp->d_namlen, 4062 * so must be zero'ed. 4063 */ 4064 dp->d_type = 0; 4065 #endif 4066 } 4067 4068 /* 4069 * Read a block of directory entries in a filesystem independent format. 4070 */ 4071 #ifndef _SYS_SYSPROTO_H_ 4072 struct ogetdirentries_args { 4073 int fd; 4074 char *buf; 4075 u_int count; 4076 long *basep; 4077 }; 4078 #endif 4079 int 4080 ogetdirentries(struct thread *td, struct ogetdirentries_args *uap) 4081 { 4082 long loff; 4083 int error; 4084 4085 error = kern_ogetdirentries(td, uap, &loff); 4086 if (error == 0) 4087 error = copyout(&loff, uap->basep, sizeof(long)); 4088 return (error); 4089 } 4090 4091 int 4092 kern_ogetdirentries(struct thread *td, struct ogetdirentries_args *uap, 4093 long *ploff) 4094 { 4095 long base; 4096 int error; 4097 4098 /* XXX arbitrary sanity limit on `count'. */ 4099 if (uap->count > 64 * 1024) 4100 return (EINVAL); 4101 4102 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4103 &base, ogetdirentries_cvt); 4104 4105 if (error == 0 && uap->basep != NULL) 4106 error = copyout(&base, uap->basep, sizeof(long)); 4107 4108 return (error); 4109 } 4110 #endif /* COMPAT_43 */ 4111 4112 #if defined(COMPAT_FREEBSD11) 4113 #ifndef _SYS_SYSPROTO_H_ 4114 struct freebsd11_getdirentries_args { 4115 int fd; 4116 char *buf; 4117 u_int count; 4118 long *basep; 4119 }; 4120 #endif 4121 int 4122 freebsd11_getdirentries(struct thread *td, 4123 struct freebsd11_getdirentries_args *uap) 4124 { 4125 long base; 4126 int error; 4127 4128 error = freebsd11_kern_getdirentries(td, uap->fd, uap->buf, uap->count, 4129 &base, NULL); 4130 4131 if (error == 0 && uap->basep != NULL) 4132 error = copyout(&base, uap->basep, sizeof(long)); 4133 return (error); 4134 } 4135 4136 int 4137 freebsd11_getdents(struct thread *td, struct freebsd11_getdents_args *uap) 4138 { 4139 struct freebsd11_getdirentries_args ap; 4140 4141 ap.fd = uap->fd; 4142 ap.buf = uap->buf; 4143 ap.count = uap->count; 4144 ap.basep = NULL; 4145 return (freebsd11_getdirentries(td, &ap)); 4146 } 4147 #endif /* COMPAT_FREEBSD11 */ 4148 4149 /* 4150 * Read a block of directory entries in a filesystem independent format. 4151 */ 4152 int 4153 sys_getdirentries(struct thread *td, struct getdirentries_args *uap) 4154 { 4155 off_t base; 4156 int error; 4157 4158 error = kern_getdirentries(td, uap->fd, uap->buf, uap->count, &base, 4159 NULL, UIO_USERSPACE); 4160 if (error != 0) 4161 return (error); 4162 if (uap->basep != NULL) 4163 error = copyout(&base, uap->basep, sizeof(off_t)); 4164 return (error); 4165 } 4166 4167 int 4168 kern_getdirentries(struct thread *td, int fd, char *buf, size_t count, 4169 off_t *basep, ssize_t *residp, enum uio_seg bufseg) 4170 { 4171 struct vnode *vp; 4172 struct file *fp; 4173 struct uio auio; 4174 struct iovec aiov; 4175 off_t loff; 4176 int error, eofflag; 4177 off_t foffset; 4178 4179 AUDIT_ARG_FD(fd); 4180 if (count > IOSIZE_MAX) 4181 return (EINVAL); 4182 auio.uio_resid = count; 4183 error = getvnode(td, fd, &cap_read_rights, &fp); 4184 if (error != 0) 4185 return (error); 4186 if ((fp->f_flag & FREAD) == 0) { 4187 fdrop(fp, td); 4188 return (EBADF); 4189 } 4190 vp = fp->f_vnode; 4191 foffset = foffset_lock(fp, 0); 4192 unionread: 4193 if (vp->v_type != VDIR) { 4194 error = EINVAL; 4195 goto fail; 4196 } 4197 aiov.iov_base = buf; 4198 aiov.iov_len = count; 4199 auio.uio_iov = &aiov; 4200 auio.uio_iovcnt = 1; 4201 auio.uio_rw = UIO_READ; 4202 auio.uio_segflg = bufseg; 4203 auio.uio_td = td; 4204 vn_lock(vp, LK_SHARED | LK_RETRY); 4205 AUDIT_ARG_VNODE1(vp); 4206 loff = auio.uio_offset = foffset; 4207 #ifdef MAC 4208 error = mac_vnode_check_readdir(td->td_ucred, vp); 4209 if (error == 0) 4210 #endif 4211 error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, 4212 NULL); 4213 foffset = auio.uio_offset; 4214 if (error != 0) { 4215 VOP_UNLOCK(vp); 4216 goto fail; 4217 } 4218 if (count == auio.uio_resid && 4219 (vp->v_vflag & VV_ROOT) && 4220 (vp->v_mount->mnt_flag & MNT_UNION)) { 4221 struct vnode *tvp = vp; 4222 4223 vp = vp->v_mount->mnt_vnodecovered; 4224 VREF(vp); 4225 fp->f_vnode = vp; 4226 foffset = 0; 4227 vput(tvp); 4228 goto unionread; 4229 } 4230 VOP_UNLOCK(vp); 4231 *basep = loff; 4232 if (residp != NULL) 4233 *residp = auio.uio_resid; 4234 td->td_retval[0] = count - auio.uio_resid; 4235 fail: 4236 foffset_unlock(fp, foffset, 0); 4237 fdrop(fp, td); 4238 return (error); 4239 } 4240 4241 /* 4242 * Set the mode mask for creation of filesystem nodes. 4243 */ 4244 #ifndef _SYS_SYSPROTO_H_ 4245 struct umask_args { 4246 int newmask; 4247 }; 4248 #endif 4249 int 4250 sys_umask(struct thread *td, struct umask_args *uap) 4251 { 4252 struct pwddesc *pdp; 4253 4254 pdp = td->td_proc->p_pd; 4255 PWDDESC_XLOCK(pdp); 4256 td->td_retval[0] = pdp->pd_cmask; 4257 pdp->pd_cmask = uap->newmask & ALLPERMS; 4258 PWDDESC_XUNLOCK(pdp); 4259 return (0); 4260 } 4261 4262 /* 4263 * Void all references to file by ripping underlying filesystem away from 4264 * vnode. 4265 */ 4266 #ifndef _SYS_SYSPROTO_H_ 4267 struct revoke_args { 4268 char *path; 4269 }; 4270 #endif 4271 int 4272 sys_revoke(struct thread *td, struct revoke_args *uap) 4273 { 4274 struct vnode *vp; 4275 struct vattr vattr; 4276 struct nameidata nd; 4277 int error; 4278 4279 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | AUDITVNODE1, UIO_USERSPACE, 4280 uap->path); 4281 if ((error = namei(&nd)) != 0) 4282 return (error); 4283 vp = nd.ni_vp; 4284 NDFREE_NOTHING(&nd); 4285 if (vp->v_type != VCHR || vp->v_rdev == NULL) { 4286 error = EINVAL; 4287 goto out; 4288 } 4289 #ifdef MAC 4290 error = mac_vnode_check_revoke(td->td_ucred, vp); 4291 if (error != 0) 4292 goto out; 4293 #endif 4294 error = VOP_GETATTR(vp, &vattr, td->td_ucred); 4295 if (error != 0) 4296 goto out; 4297 if (td->td_ucred->cr_uid != vattr.va_uid) { 4298 error = priv_check(td, PRIV_VFS_ADMIN); 4299 if (error != 0) 4300 goto out; 4301 } 4302 if (devfs_usecount(vp) > 0) 4303 VOP_REVOKE(vp, REVOKEALL); 4304 out: 4305 vput(vp); 4306 return (error); 4307 } 4308 4309 /* 4310 * This variant of getvnode() allows O_PATH files. Caller should 4311 * ensure that returned file and vnode are only used for compatible 4312 * semantics. 4313 */ 4314 int 4315 getvnode_path(struct thread *td, int fd, cap_rights_t *rightsp, 4316 struct file **fpp) 4317 { 4318 struct file *fp; 4319 int error; 4320 4321 error = fget_unlocked(td, fd, rightsp, &fp); 4322 if (error != 0) 4323 return (error); 4324 4325 /* 4326 * The file could be not of the vnode type, or it may be not 4327 * yet fully initialized, in which case the f_vnode pointer 4328 * may be set, but f_ops is still badfileops. E.g., 4329 * devfs_open() transiently create such situation to 4330 * facilitate csw d_fdopen(). 4331 * 4332 * Dupfdopen() handling in kern_openat() installs the 4333 * half-baked file into the process descriptor table, allowing 4334 * other thread to dereference it. Guard against the race by 4335 * checking f_ops. 4336 */ 4337 if (__predict_false(fp->f_vnode == NULL || fp->f_ops == &badfileops)) { 4338 fdrop(fp, td); 4339 *fpp = NULL; 4340 return (EINVAL); 4341 } 4342 4343 *fpp = fp; 4344 return (0); 4345 } 4346 4347 /* 4348 * Convert a user file descriptor to a kernel file entry and check 4349 * that, if it is a capability, the correct rights are present. 4350 * A reference on the file entry is held upon returning. 4351 */ 4352 int 4353 getvnode(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp) 4354 { 4355 int error; 4356 4357 error = getvnode_path(td, fd, rightsp, fpp); 4358 if (__predict_false(error != 0)) 4359 return (error); 4360 4361 /* 4362 * Filter out O_PATH file descriptors, most getvnode() callers 4363 * do not call fo_ methods. 4364 */ 4365 if (__predict_false((*fpp)->f_ops == &path_fileops)) { 4366 fdrop(*fpp, td); 4367 *fpp = NULL; 4368 error = EBADF; 4369 } 4370 4371 return (error); 4372 } 4373 4374 /* 4375 * Get an (NFS) file handle. 4376 */ 4377 #ifndef _SYS_SYSPROTO_H_ 4378 struct lgetfh_args { 4379 char *fname; 4380 fhandle_t *fhp; 4381 }; 4382 #endif 4383 int 4384 sys_lgetfh(struct thread *td, struct lgetfh_args *uap) 4385 { 4386 4387 return (kern_getfhat(td, AT_SYMLINK_NOFOLLOW, AT_FDCWD, uap->fname, 4388 UIO_USERSPACE, uap->fhp, UIO_USERSPACE)); 4389 } 4390 4391 #ifndef _SYS_SYSPROTO_H_ 4392 struct getfh_args { 4393 char *fname; 4394 fhandle_t *fhp; 4395 }; 4396 #endif 4397 int 4398 sys_getfh(struct thread *td, struct getfh_args *uap) 4399 { 4400 4401 return (kern_getfhat(td, 0, AT_FDCWD, uap->fname, UIO_USERSPACE, 4402 uap->fhp, UIO_USERSPACE)); 4403 } 4404 4405 /* 4406 * syscall for the rpc.lockd to use to translate an open descriptor into 4407 * a NFS file handle. 4408 * 4409 * warning: do not remove the priv_check() call or this becomes one giant 4410 * security hole. 4411 */ 4412 #ifndef _SYS_SYSPROTO_H_ 4413 struct getfhat_args { 4414 int fd; 4415 char *path; 4416 fhandle_t *fhp; 4417 int flags; 4418 }; 4419 #endif 4420 int 4421 sys_getfhat(struct thread *td, struct getfhat_args *uap) 4422 { 4423 4424 return (kern_getfhat(td, uap->flags, uap->fd, uap->path, UIO_USERSPACE, 4425 uap->fhp, UIO_USERSPACE)); 4426 } 4427 4428 int 4429 kern_getfhat(struct thread *td, int flags, int fd, const char *path, 4430 enum uio_seg pathseg, fhandle_t *fhp, enum uio_seg fhseg) 4431 { 4432 struct nameidata nd; 4433 fhandle_t fh; 4434 struct vnode *vp; 4435 int error; 4436 4437 if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_RESOLVE_BENEATH)) != 0) 4438 return (EINVAL); 4439 error = priv_check(td, PRIV_VFS_GETFH); 4440 if (error != 0) 4441 return (error); 4442 NDINIT_AT(&nd, LOOKUP, at2cnpflags(flags, AT_SYMLINK_NOFOLLOW | 4443 AT_RESOLVE_BENEATH) | LOCKLEAF | AUDITVNODE1, pathseg, path, 4444 fd); 4445 error = namei(&nd); 4446 if (error != 0) 4447 return (error); 4448 NDFREE_NOTHING(&nd); 4449 vp = nd.ni_vp; 4450 bzero(&fh, sizeof(fh)); 4451 fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; 4452 error = VOP_VPTOFH(vp, &fh.fh_fid); 4453 vput(vp); 4454 if (error == 0) { 4455 if (fhseg == UIO_USERSPACE) 4456 error = copyout(&fh, fhp, sizeof (fh)); 4457 else 4458 memcpy(fhp, &fh, sizeof(fh)); 4459 } 4460 return (error); 4461 } 4462 4463 #ifndef _SYS_SYSPROTO_H_ 4464 struct fhlink_args { 4465 fhandle_t *fhp; 4466 const char *to; 4467 }; 4468 #endif 4469 int 4470 sys_fhlink(struct thread *td, struct fhlink_args *uap) 4471 { 4472 4473 return (kern_fhlinkat(td, AT_FDCWD, uap->to, UIO_USERSPACE, uap->fhp)); 4474 } 4475 4476 #ifndef _SYS_SYSPROTO_H_ 4477 struct fhlinkat_args { 4478 fhandle_t *fhp; 4479 int tofd; 4480 const char *to; 4481 }; 4482 #endif 4483 int 4484 sys_fhlinkat(struct thread *td, struct fhlinkat_args *uap) 4485 { 4486 4487 return (kern_fhlinkat(td, uap->tofd, uap->to, UIO_USERSPACE, uap->fhp)); 4488 } 4489 4490 static int 4491 kern_fhlinkat(struct thread *td, int fd, const char *path, 4492 enum uio_seg pathseg, fhandle_t *fhp) 4493 { 4494 fhandle_t fh; 4495 struct mount *mp; 4496 struct vnode *vp; 4497 int error; 4498 4499 error = priv_check(td, PRIV_VFS_GETFH); 4500 if (error != 0) 4501 return (error); 4502 error = copyin(fhp, &fh, sizeof(fh)); 4503 if (error != 0) 4504 return (error); 4505 do { 4506 bwillwrite(); 4507 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4508 return (ESTALE); 4509 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4510 vfs_unbusy(mp); 4511 if (error != 0) 4512 return (error); 4513 VOP_UNLOCK(vp); 4514 error = kern_linkat_vp(td, vp, fd, path, pathseg); 4515 } while (error == EAGAIN || error == ERELOOKUP); 4516 return (error); 4517 } 4518 4519 #ifndef _SYS_SYSPROTO_H_ 4520 struct fhreadlink_args { 4521 fhandle_t *fhp; 4522 char *buf; 4523 size_t bufsize; 4524 }; 4525 #endif 4526 int 4527 sys_fhreadlink(struct thread *td, struct fhreadlink_args *uap) 4528 { 4529 fhandle_t fh; 4530 struct mount *mp; 4531 struct vnode *vp; 4532 int error; 4533 4534 error = priv_check(td, PRIV_VFS_GETFH); 4535 if (error != 0) 4536 return (error); 4537 if (uap->bufsize > IOSIZE_MAX) 4538 return (EINVAL); 4539 error = copyin(uap->fhp, &fh, sizeof(fh)); 4540 if (error != 0) 4541 return (error); 4542 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4543 return (ESTALE); 4544 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_SHARED, &vp); 4545 vfs_unbusy(mp); 4546 if (error != 0) 4547 return (error); 4548 error = kern_readlink_vp(vp, uap->buf, UIO_USERSPACE, uap->bufsize, td); 4549 vput(vp); 4550 return (error); 4551 } 4552 4553 /* 4554 * syscall for the rpc.lockd to use to translate a NFS file handle into an 4555 * open descriptor. 4556 * 4557 * warning: do not remove the priv_check() call or this becomes one giant 4558 * security hole. 4559 */ 4560 #ifndef _SYS_SYSPROTO_H_ 4561 struct fhopen_args { 4562 const struct fhandle *u_fhp; 4563 int flags; 4564 }; 4565 #endif 4566 int 4567 sys_fhopen(struct thread *td, struct fhopen_args *uap) 4568 { 4569 return (kern_fhopen(td, uap->u_fhp, uap->flags)); 4570 } 4571 4572 int 4573 kern_fhopen(struct thread *td, const struct fhandle *u_fhp, int flags) 4574 { 4575 struct mount *mp; 4576 struct vnode *vp; 4577 struct fhandle fhp; 4578 struct file *fp; 4579 int fmode, error; 4580 int indx; 4581 4582 error = priv_check(td, PRIV_VFS_FHOPEN); 4583 if (error != 0) 4584 return (error); 4585 indx = -1; 4586 fmode = FFLAGS(flags); 4587 /* why not allow a non-read/write open for our lockd? */ 4588 if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) 4589 return (EINVAL); 4590 error = copyin(u_fhp, &fhp, sizeof(fhp)); 4591 if (error != 0) 4592 return(error); 4593 /* find the mount point */ 4594 mp = vfs_busyfs(&fhp.fh_fsid); 4595 if (mp == NULL) 4596 return (ESTALE); 4597 /* now give me my vnode, it gets returned to me locked */ 4598 error = VFS_FHTOVP(mp, &fhp.fh_fid, LK_EXCLUSIVE, &vp); 4599 vfs_unbusy(mp); 4600 if (error != 0) 4601 return (error); 4602 4603 error = falloc_noinstall(td, &fp); 4604 if (error != 0) { 4605 vput(vp); 4606 return (error); 4607 } 4608 /* 4609 * An extra reference on `fp' has been held for us by 4610 * falloc_noinstall(). 4611 */ 4612 4613 #ifdef INVARIANTS 4614 td->td_dupfd = -1; 4615 #endif 4616 error = vn_open_vnode(vp, fmode, td->td_ucred, td, fp); 4617 if (error != 0) { 4618 KASSERT(fp->f_ops == &badfileops, 4619 ("VOP_OPEN in fhopen() set f_ops")); 4620 KASSERT(td->td_dupfd < 0, 4621 ("fhopen() encountered fdopen()")); 4622 4623 vput(vp); 4624 goto bad; 4625 } 4626 #ifdef INVARIANTS 4627 td->td_dupfd = 0; 4628 #endif 4629 fp->f_vnode = vp; 4630 finit_vnode(fp, fmode, NULL, &vnops); 4631 VOP_UNLOCK(vp); 4632 if ((fmode & O_TRUNC) != 0) { 4633 error = fo_truncate(fp, 0, td->td_ucred, td); 4634 if (error != 0) 4635 goto bad; 4636 } 4637 4638 error = finstall(td, fp, &indx, fmode, NULL); 4639 bad: 4640 fdrop(fp, td); 4641 td->td_retval[0] = indx; 4642 return (error); 4643 } 4644 4645 /* 4646 * Stat an (NFS) file handle. 4647 */ 4648 #ifndef _SYS_SYSPROTO_H_ 4649 struct fhstat_args { 4650 struct fhandle *u_fhp; 4651 struct stat *sb; 4652 }; 4653 #endif 4654 int 4655 sys_fhstat(struct thread *td, struct fhstat_args *uap) 4656 { 4657 struct stat sb; 4658 struct fhandle fh; 4659 int error; 4660 4661 error = copyin(uap->u_fhp, &fh, sizeof(fh)); 4662 if (error != 0) 4663 return (error); 4664 error = kern_fhstat(td, fh, &sb); 4665 if (error == 0) 4666 error = copyout(&sb, uap->sb, sizeof(sb)); 4667 return (error); 4668 } 4669 4670 int 4671 kern_fhstat(struct thread *td, struct fhandle fh, struct stat *sb) 4672 { 4673 struct mount *mp; 4674 struct vnode *vp; 4675 int error; 4676 4677 error = priv_check(td, PRIV_VFS_FHSTAT); 4678 if (error != 0) 4679 return (error); 4680 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4681 return (ESTALE); 4682 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4683 vfs_unbusy(mp); 4684 if (error != 0) 4685 return (error); 4686 error = VOP_STAT(vp, sb, td->td_ucred, NOCRED); 4687 vput(vp); 4688 return (error); 4689 } 4690 4691 /* 4692 * Implement fstatfs() for (NFS) file handles. 4693 */ 4694 #ifndef _SYS_SYSPROTO_H_ 4695 struct fhstatfs_args { 4696 struct fhandle *u_fhp; 4697 struct statfs *buf; 4698 }; 4699 #endif 4700 int 4701 sys_fhstatfs(struct thread *td, struct fhstatfs_args *uap) 4702 { 4703 struct statfs *sfp; 4704 fhandle_t fh; 4705 int error; 4706 4707 error = copyin(uap->u_fhp, &fh, sizeof(fhandle_t)); 4708 if (error != 0) 4709 return (error); 4710 sfp = malloc(sizeof(struct statfs), M_STATFS, M_WAITOK); 4711 error = kern_fhstatfs(td, fh, sfp); 4712 if (error == 0) 4713 error = copyout(sfp, uap->buf, sizeof(*sfp)); 4714 free(sfp, M_STATFS); 4715 return (error); 4716 } 4717 4718 int 4719 kern_fhstatfs(struct thread *td, fhandle_t fh, struct statfs *buf) 4720 { 4721 struct mount *mp; 4722 struct vnode *vp; 4723 int error; 4724 4725 error = priv_check(td, PRIV_VFS_FHSTATFS); 4726 if (error != 0) 4727 return (error); 4728 if ((mp = vfs_busyfs(&fh.fh_fsid)) == NULL) 4729 return (ESTALE); 4730 error = VFS_FHTOVP(mp, &fh.fh_fid, LK_EXCLUSIVE, &vp); 4731 if (error != 0) { 4732 vfs_unbusy(mp); 4733 return (error); 4734 } 4735 vput(vp); 4736 error = prison_canseemount(td->td_ucred, mp); 4737 if (error != 0) 4738 goto out; 4739 #ifdef MAC 4740 error = mac_mount_check_stat(td->td_ucred, mp); 4741 if (error != 0) 4742 goto out; 4743 #endif 4744 error = VFS_STATFS(mp, buf); 4745 out: 4746 vfs_unbusy(mp); 4747 return (error); 4748 } 4749 4750 /* 4751 * Unlike madvise(2), we do not make a best effort to remember every 4752 * possible caching hint. Instead, we remember the last setting with 4753 * the exception that we will allow POSIX_FADV_NORMAL to adjust the 4754 * region of any current setting. 4755 */ 4756 int 4757 kern_posix_fadvise(struct thread *td, int fd, off_t offset, off_t len, 4758 int advice) 4759 { 4760 struct fadvise_info *fa, *new; 4761 struct file *fp; 4762 struct vnode *vp; 4763 off_t end; 4764 int error; 4765 4766 if (offset < 0 || len < 0 || offset > OFF_MAX - len) 4767 return (EINVAL); 4768 AUDIT_ARG_VALUE(advice); 4769 switch (advice) { 4770 case POSIX_FADV_SEQUENTIAL: 4771 case POSIX_FADV_RANDOM: 4772 case POSIX_FADV_NOREUSE: 4773 new = malloc(sizeof(*fa), M_FADVISE, M_WAITOK); 4774 break; 4775 case POSIX_FADV_NORMAL: 4776 case POSIX_FADV_WILLNEED: 4777 case POSIX_FADV_DONTNEED: 4778 new = NULL; 4779 break; 4780 default: 4781 return (EINVAL); 4782 } 4783 /* XXX: CAP_POSIX_FADVISE? */ 4784 AUDIT_ARG_FD(fd); 4785 error = fget(td, fd, &cap_no_rights, &fp); 4786 if (error != 0) 4787 goto out; 4788 AUDIT_ARG_FILE(td->td_proc, fp); 4789 if ((fp->f_ops->fo_flags & DFLAG_SEEKABLE) == 0) { 4790 error = ESPIPE; 4791 goto out; 4792 } 4793 if (fp->f_type != DTYPE_VNODE) { 4794 error = ENODEV; 4795 goto out; 4796 } 4797 vp = fp->f_vnode; 4798 if (vp->v_type != VREG) { 4799 error = ENODEV; 4800 goto out; 4801 } 4802 if (len == 0) 4803 end = OFF_MAX; 4804 else 4805 end = offset + len - 1; 4806 switch (advice) { 4807 case POSIX_FADV_SEQUENTIAL: 4808 case POSIX_FADV_RANDOM: 4809 case POSIX_FADV_NOREUSE: 4810 /* 4811 * Try to merge any existing non-standard region with 4812 * this new region if possible, otherwise create a new 4813 * non-standard region for this request. 4814 */ 4815 mtx_pool_lock(mtxpool_sleep, fp); 4816 fa = fp->f_advice; 4817 if (fa != NULL && fa->fa_advice == advice && 4818 ((fa->fa_start <= end && fa->fa_end >= offset) || 4819 (end != OFF_MAX && fa->fa_start == end + 1) || 4820 (fa->fa_end != OFF_MAX && fa->fa_end + 1 == offset))) { 4821 if (offset < fa->fa_start) 4822 fa->fa_start = offset; 4823 if (end > fa->fa_end) 4824 fa->fa_end = end; 4825 } else { 4826 new->fa_advice = advice; 4827 new->fa_start = offset; 4828 new->fa_end = end; 4829 fp->f_advice = new; 4830 new = fa; 4831 } 4832 mtx_pool_unlock(mtxpool_sleep, fp); 4833 break; 4834 case POSIX_FADV_NORMAL: 4835 /* 4836 * If a the "normal" region overlaps with an existing 4837 * non-standard region, trim or remove the 4838 * non-standard region. 4839 */ 4840 mtx_pool_lock(mtxpool_sleep, fp); 4841 fa = fp->f_advice; 4842 if (fa != NULL) { 4843 if (offset <= fa->fa_start && end >= fa->fa_end) { 4844 new = fa; 4845 fp->f_advice = NULL; 4846 } else if (offset <= fa->fa_start && 4847 end >= fa->fa_start) 4848 fa->fa_start = end + 1; 4849 else if (offset <= fa->fa_end && end >= fa->fa_end) 4850 fa->fa_end = offset - 1; 4851 else if (offset >= fa->fa_start && end <= fa->fa_end) { 4852 /* 4853 * If the "normal" region is a middle 4854 * portion of the existing 4855 * non-standard region, just remove 4856 * the whole thing rather than picking 4857 * one side or the other to 4858 * preserve. 4859 */ 4860 new = fa; 4861 fp->f_advice = NULL; 4862 } 4863 } 4864 mtx_pool_unlock(mtxpool_sleep, fp); 4865 break; 4866 case POSIX_FADV_WILLNEED: 4867 case POSIX_FADV_DONTNEED: 4868 error = VOP_ADVISE(vp, offset, end, advice); 4869 break; 4870 } 4871 out: 4872 if (fp != NULL) 4873 fdrop(fp, td); 4874 free(new, M_FADVISE); 4875 return (error); 4876 } 4877 4878 int 4879 sys_posix_fadvise(struct thread *td, struct posix_fadvise_args *uap) 4880 { 4881 int error; 4882 4883 error = kern_posix_fadvise(td, uap->fd, uap->offset, uap->len, 4884 uap->advice); 4885 return (kern_posix_error(td, error)); 4886 } 4887 4888 int 4889 kern_copy_file_range(struct thread *td, int infd, off_t *inoffp, int outfd, 4890 off_t *outoffp, size_t len, unsigned int flags) 4891 { 4892 struct file *infp, *outfp; 4893 struct vnode *invp, *outvp; 4894 int error; 4895 size_t retlen; 4896 void *rl_rcookie, *rl_wcookie; 4897 off_t savinoff, savoutoff; 4898 4899 infp = outfp = NULL; 4900 rl_rcookie = rl_wcookie = NULL; 4901 savinoff = -1; 4902 error = 0; 4903 retlen = 0; 4904 4905 if (flags != 0) { 4906 error = EINVAL; 4907 goto out; 4908 } 4909 if (len > SSIZE_MAX) 4910 /* 4911 * Although the len argument is size_t, the return argument 4912 * is ssize_t (which is signed). Therefore a size that won't 4913 * fit in ssize_t can't be returned. 4914 */ 4915 len = SSIZE_MAX; 4916 4917 /* Get the file structures for the file descriptors. */ 4918 error = fget_read(td, infd, &cap_read_rights, &infp); 4919 if (error != 0) 4920 goto out; 4921 if (infp->f_ops == &badfileops) { 4922 error = EBADF; 4923 goto out; 4924 } 4925 if (infp->f_vnode == NULL) { 4926 error = EINVAL; 4927 goto out; 4928 } 4929 error = fget_write(td, outfd, &cap_write_rights, &outfp); 4930 if (error != 0) 4931 goto out; 4932 if (outfp->f_ops == &badfileops) { 4933 error = EBADF; 4934 goto out; 4935 } 4936 if (outfp->f_vnode == NULL) { 4937 error = EINVAL; 4938 goto out; 4939 } 4940 4941 /* Set the offset pointers to the correct place. */ 4942 if (inoffp == NULL) 4943 inoffp = &infp->f_offset; 4944 if (outoffp == NULL) 4945 outoffp = &outfp->f_offset; 4946 savinoff = *inoffp; 4947 savoutoff = *outoffp; 4948 4949 invp = infp->f_vnode; 4950 outvp = outfp->f_vnode; 4951 /* Sanity check the f_flag bits. */ 4952 if ((outfp->f_flag & (FWRITE | FAPPEND)) != FWRITE || 4953 (infp->f_flag & FREAD) == 0) { 4954 error = EBADF; 4955 goto out; 4956 } 4957 4958 /* If len == 0, just return 0. */ 4959 if (len == 0) 4960 goto out; 4961 4962 /* 4963 * If infp and outfp refer to the same file, the byte ranges cannot 4964 * overlap. 4965 */ 4966 if (invp == outvp && ((savinoff <= savoutoff && savinoff + len > 4967 savoutoff) || (savinoff > savoutoff && savoutoff + len > 4968 savinoff))) { 4969 error = EINVAL; 4970 goto out; 4971 } 4972 4973 /* Range lock the byte ranges for both invp and outvp. */ 4974 for (;;) { 4975 rl_wcookie = vn_rangelock_wlock(outvp, *outoffp, *outoffp + 4976 len); 4977 rl_rcookie = vn_rangelock_tryrlock(invp, *inoffp, *inoffp + 4978 len); 4979 if (rl_rcookie != NULL) 4980 break; 4981 vn_rangelock_unlock(outvp, rl_wcookie); 4982 rl_rcookie = vn_rangelock_rlock(invp, *inoffp, *inoffp + len); 4983 vn_rangelock_unlock(invp, rl_rcookie); 4984 } 4985 4986 retlen = len; 4987 error = vn_copy_file_range(invp, inoffp, outvp, outoffp, &retlen, 4988 flags, infp->f_cred, outfp->f_cred, td); 4989 out: 4990 if (rl_rcookie != NULL) 4991 vn_rangelock_unlock(invp, rl_rcookie); 4992 if (rl_wcookie != NULL) 4993 vn_rangelock_unlock(outvp, rl_wcookie); 4994 if (savinoff != -1 && (error == EINTR || error == ERESTART)) { 4995 *inoffp = savinoff; 4996 *outoffp = savoutoff; 4997 } 4998 if (outfp != NULL) 4999 fdrop(outfp, td); 5000 if (infp != NULL) 5001 fdrop(infp, td); 5002 td->td_retval[0] = retlen; 5003 return (error); 5004 } 5005 5006 int 5007 sys_copy_file_range(struct thread *td, struct copy_file_range_args *uap) 5008 { 5009 off_t inoff, outoff, *inoffp, *outoffp; 5010 int error; 5011 5012 inoffp = outoffp = NULL; 5013 if (uap->inoffp != NULL) { 5014 error = copyin(uap->inoffp, &inoff, sizeof(off_t)); 5015 if (error != 0) 5016 return (error); 5017 inoffp = &inoff; 5018 } 5019 if (uap->outoffp != NULL) { 5020 error = copyin(uap->outoffp, &outoff, sizeof(off_t)); 5021 if (error != 0) 5022 return (error); 5023 outoffp = &outoff; 5024 } 5025 error = kern_copy_file_range(td, uap->infd, inoffp, uap->outfd, 5026 outoffp, uap->len, uap->flags); 5027 if (error == 0 && uap->inoffp != NULL) 5028 error = copyout(inoffp, uap->inoffp, sizeof(off_t)); 5029 if (error == 0 && uap->outoffp != NULL) 5030 error = copyout(outoffp, uap->outoffp, sizeof(off_t)); 5031 return (error); 5032 } 5033